def goToLinkName(self, linkName): if (self.__currPage== None): raise NameError('no page to open, please go to a page, before calling this') res = HtmlLibrary.getLinks(self.__currPageContent, linkName) if (len(res)==1): link = res.items()[0][1] if (link.startswith('http')): nextUrl = linkName else: nextUrl = urlparse.urljoin(self.getCurrentPageUrl(), link) self.goToPage(nextUrl) elif (len(res)==0): if (type(linkName) == str): print linkName else: print 'Regex: ' + linkName.pattern raise NameError('Didn\'t found the link') else: if (type(linkName) == str): print linkName else: print 'Regex: ' + linkName.pattern raise NameError('more then one link for this name, so please choose manually.') return res
import HtmlLibrary from PythonBrowser import PythonBrowser import re import urlparse p = PythonBrowser() p.goToPage('http://www.youku.com/') print 'going to "%s"...'%p.getCurrentPageUrl() pageContent = p.getCurrentPageContent() print 'Opening Page in browser for view..' p.openHtmlInBrowser() regexSearchPatternForLinks = re.compile('(signin|sign in|login|log in|browse)', re.IGNORECASE) links = HtmlLibrary.getLinks(pageContent, regexSearchPatternForLinks) print 'Links on page matching pattern:' for link, linkHref in links.items(): print 'Html Link: "%s"'%link print '#############################' print print 'Open page after search..' p.openHtmlInBrowser()
import HtmlLibrary from PythonBrowser import PythonBrowser import re import urlparse p = PythonBrowser() p.goToPage('http://www.youku.com/') print 'going to "%s"...'%p.getCurrentPageUrl() pageContent = p.getCurrentPageContent() print 'Opening Page in browser for view..' p.openHtmlInBrowser() regexSearchPatternForLinks = re.compile('(signin|sign in|login|log in|browse)', re.IGNORECASE) links = HtmlLibrary.getLinks(pageContent, regexSearchPatternForLinks) print 'Links on page matching pattern:' for link, linkHref in links.items(): print 'Html Link: "%s"'%link print '#############################' print print 'Lets search for first 10 images...' allImages, allImagesTitles = HtmlLibrary.findElementNameBegin(pageContent, 'img') i =0; for pic in allImages: print pic print '#######################' i = i +1; if (i>10):