Exemple #1
0
 def testScrape1(self):
     import re
     br = Browser()
     # http://books.half.ebay.com/ws/web/HalfISBNSearch?isbn=9781565920903 -> halfbook-test1.html
     res = br.open_local_file("halfbook-test1.html")
     soup = BeautifulSoup(res.read())
     ratings = soup.findAll('span',{'class': 'Header'})
     for r in ratings:
         rating = r.text
         prices= r.parent.parent.parent.parent.findAll('table')[1].findAll('tr')[1:]
         all   = r.parent.parent.parent.parent.findAll('table')[2].find(text=re.compile('View all.*')).parent['href']
         # get link
         if rating == 'Brand New':
             res = br.open_local_file("halfbook-test1-allbrandnewitems.html")
             soup = BeautifulSoup(res.read())
             rating2 = soup.findAll('span',{'class': 'Header'})
             prices = rating2[0].parent.parent.parent.parent.findAll('table')[3].findAll('tr')[1:]
             for row in prices:
                 m = re.search("itemid=(\d+)",row.find('a',href=re.compile("itemid=\d+"))['href'])
                 itemid=m.group(0)
                 seller = row.find('a',{'class':'SellerDisplayLink'}).text
                 price = row.find('span',{'class':'ItemPrice'}).text
                 print rating,seller,itemid,price