import HtmlLibrary from PythonBrowser import PythonBrowser import re import urlparse p = PythonBrowser() p.goToPage('http://www.youku.com/') print 'going to "%s"...'%p.getCurrentPageUrl() pageContent = p.getCurrentPageContent() print 'Opening Page in browser for view..' p.openHtmlInBrowser() regexSearchPatternForLinks = re.compile('(signin|sign in|login|log in|browse)', re.IGNORECASE) links = HtmlLibrary.getLinks(pageContent, regexSearchPatternForLinks) print 'Links on page matching pattern:' for link, linkHref in links.items(): print 'Html Link: "%s"'%link print '#############################' print print 'Open page after search..' p.openHtmlInBrowser()
import HtmlLibrary from PythonBrowser import PythonBrowser import re import urlparse p = PythonBrowser() p.goToPage('http://www.youku.com/') print 'going to "%s"...'%p.getCurrentPageUrl() pageContent = p.getCurrentPageContent() print 'Opening Page in browser for view..' p.openHtmlInBrowser() regexSearchPatternForLinks = re.compile('(signin|sign in|login|log in|browse)', re.IGNORECASE) links = HtmlLibrary.getLinks(pageContent, regexSearchPatternForLinks) print 'Links on page matching pattern:' for link, linkHref in links.items(): print 'Html Link: "%s"'%link print '#############################' print print 'Lets search for first 10 images...' allImages, allImagesTitles = HtmlLibrary.findElementNameBegin(pageContent, 'img') i =0; for pic in allImages: print pic print '#######################' i = i +1; if (i>10):