コード例 #1
0
ファイル: PythonBrowser.py プロジェクト: jk983294/Store
 def goToLinkName(self, linkName):
     if (self.__currPage== None):
         raise NameError('no page to open, please go to a page, before calling this')
     
     res = HtmlLibrary.getLinks(self.__currPageContent, linkName)
         
     if (len(res)==1):
         link = res.items()[0][1]
         if (link.startswith('http')):
             nextUrl = linkName
         else:
             nextUrl = urlparse.urljoin(self.getCurrentPageUrl(), link)
         self.goToPage(nextUrl)
     elif (len(res)==0):
         if (type(linkName) == str):
             print linkName
         else:
             print 'Regex: ' + linkName.pattern
         raise NameError('Didn\'t found the link')
     else:
         if (type(linkName) == str):
             print linkName
         else:
             print 'Regex: ' + linkName.pattern
         raise NameError('more then one link for this name, so please choose manually.')
     return res   
コード例 #2
0
ファイル: UsageExample1.py プロジェクト: jk983294/Store
import HtmlLibrary
from PythonBrowser import PythonBrowser
import re
import urlparse

p = PythonBrowser()
p.goToPage('http://www.youku.com/')

print 'going to "%s"...'%p.getCurrentPageUrl()
pageContent = p.getCurrentPageContent()

print 'Opening Page in browser for view..'
p.openHtmlInBrowser()

regexSearchPatternForLinks = re.compile('(signin|sign in|login|log in|browse)', re.IGNORECASE)
links = HtmlLibrary.getLinks(pageContent, regexSearchPatternForLinks)

print 'Links on page matching pattern:'
for link, linkHref in links.items():
    print 'Html Link: "%s"'%link
    print '#############################'


print
print 'Open page after search..'
p.openHtmlInBrowser()


コード例 #3
0
ファイル: UsageExample.py プロジェクト: jk983294/Store
import HtmlLibrary
from PythonBrowser import PythonBrowser
import re
import urlparse

p = PythonBrowser()
p.goToPage('http://www.youku.com/')

print 'going to "%s"...'%p.getCurrentPageUrl()
pageContent = p.getCurrentPageContent()

print 'Opening Page in browser for view..'
p.openHtmlInBrowser()

regexSearchPatternForLinks = re.compile('(signin|sign in|login|log in|browse)', re.IGNORECASE)
links = HtmlLibrary.getLinks(pageContent, regexSearchPatternForLinks)

print 'Links on page matching pattern:'
for link, linkHref in links.items():
    print 'Html Link: "%s"'%link
    print '#############################'

print
print 'Lets search for first 10 images...'
allImages, allImagesTitles = HtmlLibrary.findElementNameBegin(pageContent, 'img')
i  =0;
for pic in allImages:
    print pic
    print '#######################'
    i = i +1;
    if (i>10):