Beispiel #1
0
url = 'c:\\python25\web.html'
ie = PAMIE()
ie.navigate(url)
FormatHtml(ie.outerHTML())

"""

#url = 'http://www.sciencedirect.com/science?_ob=ArticleListURL&_method=tag&refSource=search&_st=13&count=739&_chunk=2&PREV_LIST=1&NEXT_LIST=3&view=c&md5=e528e094dcc02c469caa87884e42fb84&_ArticleListID=987492397&sisr_search=&next=next+page&sisrterm='

url = 'http://www.sciencedirect.com/science?_ob=ArticleListURL&_method=list&_ArticleListID=988190031&_sort=v&_st=17&view=c&_acct=C000050221&_version=1&_urlVersion=0&_userid=10&md5=b0d9363ae004117965dbf00dd37582dc'

ie = PAMIE()
ie.navigate(url)

counter = 0
FormatHtml(ie.outerHTML(), counter)

#urls = [url]
while ie.buttonClick('next'):
#	url = ie.locationURL()
#	print "NEXT page: ", url
#	urls.append(url)
	counter = FormatHtml(ie.outerHTML(), counter)
		
#for url in urls:
#	print url
#	req = urllib2.Request(url)
#	req.add_header("User-Agent", 'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11')
#
#	f = urllib2.urlopen(req)
#