Esempio n. 1
0
 def getBookAsinList(self):
     self.bookAsinList = []
     html = MyHtml.getHtml(
         self.url, name=self.tag, crawlDate=self.fetchDate)
     divProductList = html.xpath(".//div[@class='productList']")[0]
     trListProductList = divProductList.xpath("./table/tr[@class='small']")
     for i, tr in enumerate(trListProductList):
         if i % 2 == 1:
             continue
         aUrl = tr.xpath("./td[2]/a")[0].attrib['href'].strip()
         asin = Book.getAsinFromUrl(aUrl)
         if asin != '' and asin[0] != 'B':
             book = Book.loadBookByAsin(asin, self.fetchDate)
             if self.checkBook(book):
                 self.bookAsinList.append(asin)
Esempio n. 2
0
 def getBookAsinList(self):
     self.bookAsinList=[]
     html=MyHtml.getHtml(self.url,name="NewReleasedBooks",crawlDate=self.fetchDate)
     divBooksList = html.xpath(".//div[@id='zg_centerListWrapper']")[0]
     divItemsList=divBooksList.xpath("./div[@class='zg_itemImmersion']")
     
     for item in divItemsList:
     #item=divItemsList[0]
     #if item:
         aUrl=item.xpath("./div[2]/div[2]/a")[0].attrib['href'].strip()
         rank=item.xpath("./div[@class='zg_rankDiv']/span")[0].text.strip()
         asin=Book.getAsinFromUrl(aUrl)
         if asin!='':
             book=Book.loadBookByAsin(asin,self.fetchDate)
             #if self.checkBook(book,rank):
             self.bookAsinList.append(asin)
     print self.bookAsinList
Esempio n. 3
0
 def getPreviousReviewedBook(self,reviewerId):
     initUrl="http://www.amazon.com/gp/cdp/member-reviews/"+reviewerId
     self.allRevLink=initUrl
     html=MyHtml.getHtml(initUrl)
     ftable=html.xpath('.//body/table[2]')[0]
     pages=ftable.xpath('./tr/td[2]/table[1]/tr[1]/td[2]/b/a[last()]')
     if pages is not None and len(pages)>0:
         totalPages=pages[0].text.strip()
     else:
         totalPages=1
     strPages=str(totalPages)
     print strPages
     if "-" in strPages:
         totalPages=totalPages.split('-')
         print "totalPages",totalPages
         totalPages=totalPages[1]
     else:
         totalPages=totalPages
     print totalPages,"totalPages"
     sortBy='MostRecentReview'
     j=1
     self.counter=0
     flag=0
     for j in range(1,(int(totalPages)+1)):
         baseUrl="http://www.amazon.com/gp/cdp/member-reviews/"+reviewerId
         baseUrl=baseUrl+ \
         '?pageNumber={}&sortBy={}'.format(str(j), sortBy)
         print baseUrl,j,totalPages
         html=MyHtml.getHtml(baseUrl)
         ftable=html.xpath('.//body/table[2]')[0]
         mainTable=ftable.xpath('./tr/td[2]/table[2]/tr[@valign="top"]')
         for row in mainTable:
             if row is not None:
                 isBook=row.xpath('./td[5]/table/tr[2]/td/b')
                 if isBook is not None and len(isBook)>0:
                     if isBook[0].text is not None and flag==0:
                         edition=isBook[0].text.strip()
                         if "Edition" in edition:
                             print "got the previous book"
                             flag=1
                             reviewdate=row.xpath('./following-sibling::*')
                             reviewdate=reviewdate[0].xpath('.//nobr')
                             if reviewdate:
                                 reviewdate=reviewdate[0].text.strip()
                                 print "got the reviewDate",reviewdate
                                 self.previousBookReviewDate=CommonTool.strToDate(reviewdate)
                                 
                             #to get link of the previous reviewedbook 
                             url=row.xpath('./td[5]/table/tr[1]/td/b/a')[0].attrib['href']
                             asin=Book.getAsinFromUrl(url)
                             previousBook=Book.loadBookByAsin(asin)
                             print "asinofPrevious",asin
                             print "previousBook",previousBook
                             self.previousBookPublishDate=previousBook.publishDate
                 
                             
                 if j==1 and self.lRevTime=='':
                     reviewdate=mainTable[0].xpath('./following-sibling::*')
                     reviewdate=reviewdate[0].xpath('.//nobr')
                     print reviewdate,"lRevtime"
                     if reviewdate:
                         reviewdate=reviewdate[0].text.strip()
                         self.lRevTime=CommonTool.strToDate(reviewdate)
                 print "value of j",j
                 if j==int(totalPages) and self.fRevTime=='':
                     print "inside frevtime loop"
                     reviewdate=mainTable[-1].xpath('./following-sibling::*')
                     reviewdate=reviewdate[0].xpath('.//nobr')
                     print reviewdate,"fRevtime"
                     if reviewdate:
                         reviewdate=reviewdate[0].text.strip()
                         self.fRevTime=CommonTool.strToDate(reviewdate)
                     
                 # rate
                 rateObj = row.xpath("./following-sibling::*")
                 rate = 'N/A'
                 if len(rateObj) != 0:
                     rateObj1 = rateObj[0].xpath(".//img")
                     title = rateObj1[0].attrib['title']
                     rate = title.split("out")[0].strip()
                     self.sum = self.sum + float(rate)        
     j=j+1
         #end of inner for loop
     print "sum",self.sum
     if self.rNum is not 0:
         self.avgRate=self.sum/self.rNum 
         self.avgRate=round(self.avgRate,2)
     if self.lRevTime=='' or self.fRevTime=='':
         duration=0
     else :  
         duration = (self.lRevTime-self.fRevTime).days
     self.duration=int(duration)