Esempio n. 1
0
def getReviewList(asin):
    reviewsResult = []
    baseUrl = 'http://www.amazon.com/product-reviews/'
    html = MyHtml.getHtml(baseUrl + asin, ffhead=True)
    spanPaging = html.xpath('.//span[@class="paging"]')
    if len(spanPaging) > 0:
        totalPage = int(spanPaging[0].xpath('./a')[-2].text.strip())
    else:
        totalPage = 1
    sortBy = 'recent'
    pageNumber = totalPage
    while pageNumber > 0:
        url = baseUrl + asin + \
            '?pageNumber={}&sortBy={}'.format(str(pageNumber), sortBy)
        # print pageNumber, url
        html = MyHtml.getHtml(url, ffhead=True)
        tableProductReviews = html.xpath('.//table[@id="productReviews"]')[0]
        divReviewList = tableProductReviews.xpath('./tr/td/div')[::-1]
        for divReview in divReviewList:
            a = divReview.xpath('./preceding-sibling::a[1]')[0]
            reviewID = a.attrib['name']
            reviewsResult.append(reviewID)
        # end of for
        pageNumber -= 1
    # end of while
    return reviewsResult
Esempio n. 2
0
def getReviewIndex(asin, targetReviewID):
    baseUrl = 'http://www.amazon.com/product-reviews/'
    html = MyHtml.getHtml(baseUrl + asin, ffhead=True)
    spanPaging = html.xpath('.//span[@class="paging"]')
    if len(spanPaging) > 0:
        totalPage = int(spanPaging[0].xpath('./a')[-2].text.strip())
    else:
        totalPage = 1
    sortBy = 'bySubmissionDateAscending'
    pageNumber = 1
    rank = 0
    while pageNumber <= totalPage:
        url = baseUrl + asin + \
            '?pageNumber={}&sortBy={}'.format(str(pageNumber), sortBy)
        # print pageNumber, url
        html = MyHtml.getHtml(url, ffhead=True)
        tableProductReviews = html.xpath('.//table[@id="productReviews"]')[0]
        divReviewList = tableProductReviews.xpath('./tr/td/div')
        for divReview in divReviewList:
            rank += 1
            a = divReview.xpath('./preceding-sibling::a[1]')[0]
            reviewID = a.attrib['name']
            if reviewID == targetReviewID:
                return rank
        # end of for
        pageNumber += 1
    return -1
Esempio n. 3
0
 def getReviewList(self, asin):
     reviewsResult = []
     totalPage=0
     baseUrl = 'http://www.amazon.com/product-reviews/'
     html = MyHtml.getHtml(baseUrl + asin, ffhead=True)
     pageNumberList = html.xpath(".//ul[@class='a-pagination']//li")
     if len(pageNumberList)>0:
         countOfListItems=len(pageNumberList)
     
         indexOfPageTotal=countOfListItems-2
         listitem=pageNumberList[indexOfPageTotal]
         totalPage=int(listitem.xpath('.//a')[0].text.strip())
     else: 
         totalPage=1
     sortBy = 'recent'
     pageNumber = 1
     foundKnownReview = False
     
     while pageNumber <= totalPage:
         url = baseUrl + asin + \
             '?pageNumber={}&sortBy={}'.format(str(pageNumber), sortBy)
    
         html = MyHtml.getHtml(url, ffhead=True)
         isCount=html.xpath('.//div[@id="cm_cr-product_info"]/div/div[1]/div[2]/span')
         if isCount is not None and len(isCount)>0:
             countOfReviews=int(isCount[0].text.strip())
         else:
             countofReviews=0
         print countOfReviews,"count of reviews"
         if countOfReviews>0:
             divWholeReviewList = html.xpath('.//div[@id="cm_cr-review_list"]')[0]
             divReviewList = divWholeReviewList.xpath('./div[@id]')
             for divReview in divReviewList:
                 reviewID = divReview.attrib['id']
                 if (reviewID in reviewsResult) or (reviewID in self.reviewList):
                     foundKnownReview = True
                     break
                 aReview = Review.Review()
                 aReview.reviewID = reviewID
                 Review.saveReview(review=aReview)
                 reviewsResult.append(reviewID)
             # end of for
             pageNumber += 1
             if foundKnownReview:
                 break
         else:
             pageNumber+=1
         # end of while
     return reviewsResult[::-1]
Esempio n. 4
0
 def getComments(self):
     self.comment = ''
     if self.numOfComments == 0:
         return
     url = 'http://www.amazon.com/review/{0}/'.format(self.reviewID)
     flag = True
     while flag:
         try:
             html = MyHtml.getHtml(url)
             commentTexts = html.xpath('.//div[@class="postBody"]/div[3]')
             for commentText in commentTexts:
                 self.comment += commentText.text.strip().replace('\n',
                                                                  '<br />')
                 self.comment += '<end />'
             try:
                 url = html.xpath(
                     './/div[@class="cdPageSelectorPagination"]\
                     /a[text="Next >"]')[0].attrib['href']
             except LookupError:
                 flag = False
                 break
         except Exception, e:
             sys.stderr.write(
                 'getComments -> getHtml reviewID: {0} errmsg: {1}\n'.format
                 (self.reviewID, str(e)))
             flag = False
             break
Esempio n. 5
0
 def __init__(self, url):
     self.url = url
     self.html = MyHtml.getHtml(url)
     self.revList = []
     self.solveRankingsTable()
     # self.printData()
     self.printReviewers()
Esempio n. 6
0
    def getProductLinkPages(self):
        """
        get the urls of web pages which contains the list of products reviewers made reviews about,
        and save those urls to self.pLinkList. This pLinkList will be used in getProductLinks()
        :return: null
        """
        print "allRevLinks",self.allRevLink
        if self.allRevLink == '':
            self.totalPage = 0
            return
        allLinks = MyHtml.getHtml(self.allRevLink, self.rID + "_AllProductLinks_1")
        if int(self.rNum) % 10 != 0:
            self.totalPage = int(self.rNum) / 10 + 1
        else:
            self.totalPage = int(self.rNum) / 10

        print self.totalPage, 'totalPage'
        linkHead = self.allRevLink.replace('/ref=pdp_new', '').strip()
        self.pLinkList.append(self.allRevLink)
        page = 2
        while page <= self.totalPage:
            tempLink = linkHead + "?ie=UTF8&display=public&page=" + \
                str(page) + "&sort_by=MostRecentReview"
            page = page + 1
            self.pLinkList.append(tempLink)
        del page
Esempio n. 7
0
def getReviews(asin, numOfReviews=1, fetchDate=date.today(),
               bookPublishDate=date.today()):
    reviewsResult = []
    totalPage=0
    baseUrl = 'http://www.amazon.com/product-reviews/'
    html = MyHtml.getHtml(baseUrl + asin, ffhead=True)
    countOfReviews=int(html.xpath('.//div[@id="cm_cr-product_info"]/div/div[1]/div[2]/span')[0].text.strip())
    if countOfReviews==0:
        return      
    pageNumberList = html.xpath(".//ul[@class='a-pagination']//li")
    if len(pageNumberList)>0:
        countOfListItems=len(pageNumberList)
        print "countOflIstItems",countOfListItems
        indexOfPageTotal=countOfListItems-2
        listitem=pageNumberList[indexOfPageTotal]
        totalPage=int(listitem.xpath('.//a')[0].text.strip())
    else:
        totalPage=1
    print "totalpage",totalPage
    # Most Helpful First
    
    sortBy = 'helpful'
    pageNumber = 1
    rank = 0
    while pageNumber <= totalPage:
        url = baseUrl + asin + \
            '?pageNumber={}&sortBy={}'.format(str(pageNumber), sortBy)
        print pageNumber, url, totalPage
        
        rank = solveReviewPage(asin, rank, url, fetchDate, bookPublishDate)
        sortBy = 'recent'
        maxRank = rank
        rank += 1
        rank = solveReviewPage2(reviewsResult, rank, url,fetchDate)
        pageNumber += 1
Esempio n. 8
0
    def __init__(self, rID, hasPhoto='0', linkFlag=False):
        if rID != '':
            self.url = "http://www.amazon.com/gp/pdp/profile/" + rID
            self.html = MyHtml.getHtml(self.url, rID + '_profile', ffhead=True)
            if self.html is None:
                self.initEmptyObj()
            else:
                self._saveToFile = True
                self.pLinkList = []
                self.allProductLinks = []
                self.avgRate=0.0
                self.duration=0
                self.previousBookPublishDate='N/A'
                self.previousBookReviewDate='N/A'
                self.rRank=''
                self.fRevTime=""
                self.lRevTime = ""
                self.counter = 0
                self.aboutMe='N/A'
                self.sum = 0.0
                self.solveProfileUp()
                self.rID = rID
                
                userName = self.html.xpath(".//div[@class='a-section']/h1")[0].text.strip()
                if len(userName) == 0:
                    self.username = '******'
                    self.rName='N/A'
                else:
                    self.username = userName
                    self.rName=userName

                # getLocation
                #/html/body/div[3]/div[2]/div/div/div/div[1]/div[2]/span[2]/div/div[1]/span
                span=bool(self.html.xpath(".//div[@class='profile-info']/div[1]/span"))
                if span:
                    loc = self.html.xpath(".//div[@class='profile-info']/div[1]/span")[0].text.strip()
             
                    try:
                        self.location=loc
                        print loc
                        if self.location == '':
                            self.location = 'N/A'
                    except LookupError:
                        self.location = 'N/A'
                else:
                    self.location='N/A'

                self.hasPhoto = hasPhoto
                if rID is not None and len(rID)>0:
                    if self.rNum >0:
                        self.getPreviousReviewedBook(rID)

                del self.html
                saveReviewer(self)
        else:
            self.initEmptyObj()
Esempio n. 9
0
 def __init__(self, asin):
     self._printWithTag = False
     self.asin = asin
     self.url = getURLFromAsin(asin)
     self.html = MyHtml.getHtml(self.url)
     if self.html is None:
         self.asin = None
         sys.stderr.write('getHtml Error: not found\n')
         return
     self.fetchDate = date.today()
     self.tag = []
     self.initAttrib()
     self.reviewList = []
Esempio n. 10
0
 def getBookAsinList(self):
     self.bookAsinList = []
     html = MyHtml.getHtml(
         self.url, name=self.tag, crawlDate=self.fetchDate)
     divProductList = html.xpath(".//div[@class='productList']")[0]
     trListProductList = divProductList.xpath("./table/tr[@class='small']")
     for i, tr in enumerate(trListProductList):
         if i % 2 == 1:
             continue
         aUrl = tr.xpath("./td[2]/a")[0].attrib['href'].strip()
         asin = Book.getAsinFromUrl(aUrl)
         if asin != '' and asin[0] != 'B':
             book = Book.loadBookByAsin(asin, self.fetchDate)
             if self.checkBook(book):
                 self.bookAsinList.append(asin)
Esempio n. 11
0
 def getBookAsinList(self):
     self.bookAsinList=[]
     html=MyHtml.getHtml(self.url,name="NewReleasedBooks",crawlDate=self.fetchDate)
     divBooksList = html.xpath(".//div[@id='zg_centerListWrapper']")[0]
     divItemsList=divBooksList.xpath("./div[@class='zg_itemImmersion']")
     
     for item in divItemsList:
     #item=divItemsList[0]
     #if item:
         aUrl=item.xpath("./div[2]/div[2]/a")[0].attrib['href'].strip()
         rank=item.xpath("./div[@class='zg_rankDiv']/span")[0].text.strip()
         asin=Book.getAsinFromUrl(aUrl)
         if asin!='':
             book=Book.loadBookByAsin(asin,self.fetchDate)
             #if self.checkBook(book,rank):
             self.bookAsinList.append(asin)
     print self.bookAsinList
Esempio n. 12
0
def solveReviewPage2(reviewsResult, rank, url, fetchDate):
    import traceback
    html = MyHtml.getHtml(url, ffhead=True)
    
    divWholeReviewList = html.xpath('.//div[@id="cm_cr-review_list"]')[0]
    divReviewList = divWholeReviewList.xpath('./div[@id]')  # [::-1]
    for divReview in divReviewList:
        reviewID = divReview.attrib['id']
        rank -= 1
        
        aReview = loadReview(reviewID)
        aReview.timeRank=rank
        saveReview(review=aReview)
        reviewsResult.append(aReview.reviewID)  # end of for
        traceback.print_exc()
        rank += 1
    return rank
Esempio n. 13
0
    def update(self):
        self.html = MyHtml.getHtml(self.url)
        # update bookRating, numberOfReviews, bookRanking
        self.getReviews()

        divCenterCol = self.html.xpath("//div[@id='centerCol']")
        if divCenterCol is not None and len(divCenterCol)>0:
            divCenterCol=divCenterCol[0]
            divAveReviews = divCenterCol.xpath(
                ".//div[@id='averageCustomerReviews']")
            try:
                self.rate = divAveReviews[0].xpath(
                    "./span")[0].attrib['title'].split(' out of ')[0]
                self.numOfReviews = divAveReviews[0].xpath(
                    ".//span[@id='acrCustomerReviewText']")[0].text.split(' ')[0]
                self.numOfReviews = int(
                    filter(lambda x: x.isdigit(), self.numOfReviews))
            except Exception:
                self.rate = 0
                self.numOfReviews = 0
            del divAveReviews

            self.solveProductDetails()
Esempio n. 14
0
 def getPreviousReviewedBook(self,reviewerId):
     initUrl="http://www.amazon.com/gp/cdp/member-reviews/"+reviewerId
     self.allRevLink=initUrl
     html=MyHtml.getHtml(initUrl)
     ftable=html.xpath('.//body/table[2]')[0]
     pages=ftable.xpath('./tr/td[2]/table[1]/tr[1]/td[2]/b/a[last()]')
     if pages is not None and len(pages)>0:
         totalPages=pages[0].text.strip()
     else:
         totalPages=1
     strPages=str(totalPages)
     print strPages
     if "-" in strPages:
         totalPages=totalPages.split('-')
         print "totalPages",totalPages
         totalPages=totalPages[1]
     else:
         totalPages=totalPages
     print totalPages,"totalPages"
     sortBy='MostRecentReview'
     j=1
     self.counter=0
     flag=0
     for j in range(1,(int(totalPages)+1)):
         baseUrl="http://www.amazon.com/gp/cdp/member-reviews/"+reviewerId
         baseUrl=baseUrl+ \
         '?pageNumber={}&sortBy={}'.format(str(j), sortBy)
         print baseUrl,j,totalPages
         html=MyHtml.getHtml(baseUrl)
         ftable=html.xpath('.//body/table[2]')[0]
         mainTable=ftable.xpath('./tr/td[2]/table[2]/tr[@valign="top"]')
         for row in mainTable:
             if row is not None:
                 isBook=row.xpath('./td[5]/table/tr[2]/td/b')
                 if isBook is not None and len(isBook)>0:
                     if isBook[0].text is not None and flag==0:
                         edition=isBook[0].text.strip()
                         if "Edition" in edition:
                             print "got the previous book"
                             flag=1
                             reviewdate=row.xpath('./following-sibling::*')
                             reviewdate=reviewdate[0].xpath('.//nobr')
                             if reviewdate:
                                 reviewdate=reviewdate[0].text.strip()
                                 print "got the reviewDate",reviewdate
                                 self.previousBookReviewDate=CommonTool.strToDate(reviewdate)
                                 
                             #to get link of the previous reviewedbook 
                             url=row.xpath('./td[5]/table/tr[1]/td/b/a')[0].attrib['href']
                             asin=Book.getAsinFromUrl(url)
                             previousBook=Book.loadBookByAsin(asin)
                             print "asinofPrevious",asin
                             print "previousBook",previousBook
                             self.previousBookPublishDate=previousBook.publishDate
                 
                             
                 if j==1 and self.lRevTime=='':
                     reviewdate=mainTable[0].xpath('./following-sibling::*')
                     reviewdate=reviewdate[0].xpath('.//nobr')
                     print reviewdate,"lRevtime"
                     if reviewdate:
                         reviewdate=reviewdate[0].text.strip()
                         self.lRevTime=CommonTool.strToDate(reviewdate)
                 print "value of j",j
                 if j==int(totalPages) and self.fRevTime=='':
                     print "inside frevtime loop"
                     reviewdate=mainTable[-1].xpath('./following-sibling::*')
                     reviewdate=reviewdate[0].xpath('.//nobr')
                     print reviewdate,"fRevtime"
                     if reviewdate:
                         reviewdate=reviewdate[0].text.strip()
                         self.fRevTime=CommonTool.strToDate(reviewdate)
                     
                 # rate
                 rateObj = row.xpath("./following-sibling::*")
                 rate = 'N/A'
                 if len(rateObj) != 0:
                     rateObj1 = rateObj[0].xpath(".//img")
                     title = rateObj1[0].attrib['title']
                     rate = title.split("out")[0].strip()
                     self.sum = self.sum + float(rate)        
     j=j+1
         #end of inner for loop
     print "sum",self.sum
     if self.rNum is not 0:
         self.avgRate=self.sum/self.rNum 
         self.avgRate=round(self.avgRate,2)
     if self.lRevTime=='' or self.fRevTime=='':
         duration=0
     else :  
         duration = (self.lRevTime-self.fRevTime).days
     self.duration=int(duration)
Esempio n. 15
0
    def getProductLinksUsingThreads(self,productList):
        print "getting products using thread method"
        for link in productList:
            
            linkPage = MyHtml.getHtml(
                link, self.rID + "_AllProductLinks_" + str(self.page))
            trObjs = linkPage.xpath(
                ".//body/table[2]/tr[1]/td[2]/table[2]/tr[@valign='top']")
            if link is self.firstLink:
                try:
                    firstTimeObj = trObjs[-1]
                except LookupError:
                    print link
                    exit(0)
                fTime = firstTimeObj.xpath("./following-sibling::*")
                if len(fTime) == 0:
                    self.fRevTime = 'N/A'
                else:
                    # print etree.tostring(fTime[0])
                    fTime = fTime[0].xpath(".//nobr")
                    if len(fTime) == 0:
                        self.fRevTime = 'N/A'
                    else:
                        self.fRevTime = fTime[0].text.strip()
                        self.fRevTime = CommonTool.strToDate(self.fRevTime)
                del fTime

            if link is self.lastLink:
                lastTimeObj = trObjs[0]
                lTime = lastTimeObj.xpath("./following-sibling::*")
                if len(lTime) == 0:
                    self.lRevTime = 'N/A'
                else:
                    # print etree.tostring(lTime[0])
                    lTime = lTime[0].xpath(".//nobr")
                    if len(lTime) == 0:
                        self.lRevTime = 'N/A'
                    else:
                        self.lRevTime = lTime[0].text.strip()
                        self.lRevTime = CommonTool.strToDate(self.lRevTime)
                del lTime

            for trObj in trObjs:
                tableObj = trObj.xpath(
                    "./td[@class='small'][3]/table[@class='small']")
                # aLink
                if len(tableObj) != 0:
                    aLink = tableObj[0].xpath(".//a")
                    if len(aLink) == 0:
                        aLink = ''
                    else:
                        aLink = aLink[0].attrib['href']

                # rate
                rateObj = trObj.xpath("./following-sibling::*")
                rate = 'N/A'
                if len(rateObj) != 0:
                    try:
                        rateObj1 = rateObj[0].xpath(".//img")
                        title = rateObj1[0].attrib['title']
                        rate = title.split("out")[0].strip()
                        self.sum = self.sum + float(rate)
                        self.counter = self.counter + 1
                    except Exception, e:
                        sys.stderr.write(str(e) + ' rate Exception\n')

                # reviewID
                reviewID = ''
                rIDObj = rateObj[0].xpath(".//a")
                if len(rIDObj) != 0:
                    reviewID = rIDObj[0].attrib['name']

                # label The review is from
                label = ''
                labelObj = rateObj[0].xpath(".//div[@class='tiny']")
                if len(labelObj) != 0:
                    # verified purchase + the review is from
                    aObj = labelObj[-1].xpath(".//a")
                    if len(aObj) != 0:
                        label = filtTag.filter_tags(
                            etree.tostring(aObj[0]).strip())

                # 1---book 0---product
                parLeft = label.find('(')
                parRight = label.find(')')

                if parLeft == -1 and parRight == -1:
                    label = '0'
                elif label[-1] == ')':
                    label = label.split('(')
                    label = label[-1][:-1]
                    if (label.find('Paperback') != -1) or (label.find('Hardcover') != -1):
                        label = '1'
                        bookID = aLink.replace('/ref=cm_cr-mr-title', '')
                        bookID = bookID[-10:]
                        self.reviewedBookList.append(bookID)

                productList = []
                productList.append(aLink)
                productList.append(rate)
                productList.append(reviewID)
                productList.append(label)
                self.allProductLinks.append(productList)
                del productList

            self.page = self.page + 1
Esempio n. 16
0
 def solveCustomerReview(self):
     if not hasattr(self, 'html'):
         
         self.html = MyHtml.getHtml(self.url,ffhead=True)
         self.getReviews(self.asin)
         self.solveReviewSummary()
Esempio n. 17
0
def solveReviewPage(asin, rank, url, fetchDate, bookPublishDate):
    hlre = re.compile(
        r'^(\d+) of (\d+) people found the following review helpful')
    html = MyHtml.getHtml(url)
    print "solving Review Page"
    countOfReviews=int(html.xpath('.//div[@id="cm_cr-product_info"]/div/div[1]/div[2]/span')[0].text.strip())
    if countOfReviews>0:
        
        divWholeReviewList = html.xpath('.//div[@id="cm_cr-review_list"]')[0]
        divReviewList = divWholeReviewList.xpath('./div[@id]')     
        print divReviewList
        for divReview in divReviewList:
            
            aReview = Review()
            rank += 1
            aReview.helpfulRank = rank
            aReview.asin = asin
            aReview.reviewID = divReview.attrib['id']

            # helpful line
            parentNode=divReview.xpath('.//span[@class="a-size-base cr-vote"]/span[1]/span[1]')
            print "helpful Match",parentNode
            helpfulMatch=None
            if parentNode  is not None: 
                match1 = divReview.xpath('.//span[@class="a-size-base cr-vote"]/span[1]/span[1]')
                if match1 is not None and len(match1)>0:
                    if match1[0].text is not None:
                        helpfulMatch=hlre.match(match1[0].text.strip())
                else:
                    
                    match2=divReview.xpath('.//span[@class="a-size-base cr-vote"]/span[1]/span[1]/span[1]')
                    if match2 is not None and len(match2)>0:
                        if match2[0].text is not None:
                            helpfulMatch=hlre.match(match2[0].text.strip())
                        
                print helpfulMatch
                if helpfulMatch:
                    aReview.helpful = int(helpfulMatch.group(1))
                    aReview.total = int(helpfulMatch.group(2))
                    aReview.helpfulness = aReview.helpful * \
                                100 / aReview.total / 100.0
                else:
                    aReview.helpful=0
                    aReview.total=0
                    aReview.helpfulness=0
                del helpfulMatch

                print "getting rate data"
                
                rateData = divReview.xpath('.//span[@class="a-icon-alt"]')[0].text.strip()
                aReview.rate=rateData.split(' ')[0].strip()
                aReview.title = divReview.xpath('.//a[@class="a-size-base a-link-normal review-title a-color-base a-text-bold"]')[0].text.strip()
            
                print "getting reviewerId"
               
                reviewer = divReview.xpath('.//span[@class="a-size-base a-color-secondary review-byline"]/a')
                if reviewer is not None and len(reviewer)>0:
                    aReview.reviewerID=reviewer[0].attrib['href'].split('/')[4].split('?')[0]
                print "reviewerId",aReview.reviewerID
                
            aReview.date = CommonTool.strToDate(divReview.xpath('./div[@class="a-row"]/span[4]')[0].text.strip())
            aReview.elapsedDate = (fetchDate - aReview.date).days
            print bookPublishDate
            if bookPublishDate=='N/A':
                aReview.reviewBookDate='N/A'
            else:
                aReview.reviewBookDate = (aReview.date - (bookPublishDate)).days
                
    
            # format line
            try:
                strFormat = divReview[3].xpath('./a[1]')[0].text.strip()
                aReview.fromFormat = strFormat.split(' ')[1]
            except IndexError:
                aReview.fromFormat = ''
            spanVerifiedPurchase = divReview.xpath('.//span[@class="a-size-mini a-color-state a-text-bold"]')
            if spanVerifiedPurchase:
                spanVerifiedPurchase=spanVerifiedPurchase[0].text.strip()
                if spanVerifiedPurchase=="Verified Purchase":
                    aReview.verified = 1
                else:
                    aReview.verified = 0
    
                # review text line
            divReviewText = divReview.xpath('.//div[@class="a-row review-data"]/span')[0]
            aReview.description = filtTag.filter_tags(
                etree.tostring(divReviewText).strip()).strip()
            aReview.description = aReview.description.replace('\n', '<br />')
            del divReviewText
   
   
                # review comments line
            aReview.numOfComments = CommonTool.strToInt(
                divReview.xpath('.//div[@class="a-row a-spacing-top-small review-comments"]/div/a/span/span[1]')[0].text.strip())
            aReview.getComments()
            saveReview(review=aReview)
    #             try:
    #                 reviewer = Reviewer.loadReviewer(aReview.reviewerID)
    #                 aReview.lastReviewRank = reviewer.getPreBookReviewRanking(
    #                     aReview.reviewID)
    #             except Exception, e:
    #                 sys.stderr.write(str(e) + '\n')
    #                 sys.stderr.write('lastReviewRank not found! url: {0} id: {1} \
    #                 reviewerID: {2}\n'.format(
    #                     url, aReview.reviewID, aReview.reviewerID))
    #                 import traceback
    #                 traceback.print_exc()
            
        # end of for
    # end of else
    return rank