Beispiel #1
0
 def printReviews(self, ct=None):
     fout = ''
     flag = False
     if ct is None:
         flag = 1
         if not os.path.exists("../data/" + self.fetchDate.isoformat() +
                               "/review/"):
             os.makedirs(
                 "../data/" + self.fetchDate.isoformat() + "/review/")
         ct = CommonTool()
         if not os.path.exists("../data/" + self.fetchDate.isoformat() +
                               "/review/review.txt"):
             fout = open(
                 "../data/" + self.fetchDate.isoformat() +
                 "/review/review.txt", "w")
             print "writing reviews in new file"
             ct.setFout(fout)
             ct.writeln(Review.Review.tableHead)
         else:
             fout = open(
                 "../data/" + self.fetchDate.isoformat() +
                 "/review/review.txt", "a")
             ct.setFout(fout)
     for reviewID in self.reviewList:
         print "printing reviews"
         review = Review.loadReview(reviewID, self.fetchDate)
         review.printData(ct)
         review.insertReviewDataIntoTable()
     if flag:
         fout.close()
Beispiel #2
0
 def solveReview(self):
     with open("../data/" + self.fetchDate.isoformat() + "/review.txt", "w")\
             as fout:
         ct = CommonTool(fout)
         fout.write(Review.Review.tableHead)
         fout.write('\n')
         fout.flush()
         # ct.writeln(Review.Review.tableHead)
         for i, reviewID in enumerate(self.reviewList):
             if reviewID == '':
                 continue
             print 'solve review {0} of {1}: {2}'.format(
                 i, len(self.reviewList), reviewID)
             review = Review.loadReview(reviewID)
             review.printData(ct)
             if review.isNew:
                 print 'isNewReview'
                 if review.reviewerID not in self.reviewerList:
                     self.reviewerList.append(review.reviewerID)
                 review.isNew = False
Beispiel #3
0
 def solveReviewSummary(self):
     quotesTable = self.html.xpath(".//table[@id='quotesTable']")
     if len(quotesTable) > 0:
         self.hasQuoteTable = 1
         quotes = quotesTable[0].xpath(
             "./td/a[@class='a-link-normal a-text-normal a-color-base']")
         for quote in quotes:
             words = quote.attrib["href"].split("/")
             # print words
             reviewID = words[4]
             # print reviewID
             #try:
             review = Review.loadReview(reviewID, self.fetchDate)
             #review.setQuoteTable(1)
             Review.saveReview(review)
             #except :
             sys.stderr.write(
                 'quotesTable review not found: {0} {1}\n'.format(
                     self.asin, reviewID))
     else:
         self.hasQuoteTable = 0
Beispiel #4
0
    def calcReviewTopPercent(self):
        print 'calcReviewTopPercent'
        import math

        maxRank = len(self.reviewList)
        top1Percent = int(math.ceil(maxRank / 100.0))
        top5Percent = int(math.ceil(maxRank / 20.0))
        top10Percent = int(math.ceil(maxRank / 10.0))

        for rank, reviewID in enumerate(self.reviewList):
            aReview = Review.loadReview(reviewID)
            try:
                aReview.timeRank = rank
            except AttributeError, e:
                sys.stderr.write(str(e) + '\n')
                sys.stderr.write('reviewID' + reviewID)
                sys.exit(-1)
            if rank <= top1Percent:
                aReview.top1Percent = 1
            elif rank <= top5Percent:
                aReview.top5Percent = 1
            elif rank <= top10Percent:
                aReview.top10Percent = 1
            Review.saveReview(aReview)
Beispiel #5
0
 def printData(self):
     with open("../data/" + self.fetchDate.isoformat() + "/dataAll.txt",
               "w") as fout:
         ct = CommonTool(fout)
         fout.write("\t".join(self.tableHeadList))
         fout.write('\n')
         fout.flush()
         for i, reviewID in enumerate(self.reviewList):
             if reviewID == '':
                 continue
             print 'solve review {0} of {1}: {2}'.format(
                 i, len(self.reviewList), reviewID)
             review = Review.loadReview(reviewID)
             book = Book.loadBookByAsin(review.asin)
             reviewer = Reviewer.loadReviewer(review.reviewerID)
             ct.write(reviewID)
             ct.write(review.asin)
             ct.write(review.reviewerID)
             ct.write(reviewer.rName)
             ct.write(reviewer.tRev1)
             ct.write(reviewer.tRev10)
             ct.write(reviewer.tRev50)
             ct.write(reviewer.tRev100)
             ct.write(reviewer.tRev500)
             ct.write(reviewer.tRev1000)
             ct.write(reviewer.tRevHall)
             ct.write(reviewer.vVoice)
             ct.write(review.verified)
             ct.write(review.rate)
             ct.write(review.title)
             ct.write(review.date)
             ct.write(review.fetchDate)
             ct.write(review.reviewBookDate)
             ct.write(review.elapsedDate)
             ct.write(review.helpful)
             ct.write(review.total)
             ct.write(review.helpfulness)
             ct.write(review.helpfulRank)
             ct.write(review.timeRank)
             #ct.write(review.top1Percent)
             #ct.write(review.top5Percent)
             #ct.write(review.top10Percent)
             ct.write(review.description)
             ct.write(review.numOfComments)
             ct.write(review.comment)
             #ct.write(review.isQuoteTable)
             ct.write(review.lastReviewRank)
             ct.write(book.url)
             ct.write(book.tag)
             ct.write(book.allowPreview)
             ct.write(book.binding)
             ct.write(book.publishDate)
             ct.write(book.author)
             ct.write(book.authorInfo)
             ct.write(book.rate)
             ct.write(book.numOfReviews)
             ct.write(book.kindlePrice)
             ct.write(book.hardcoverPrice)
             ct.write(book.paperbackPrice)
             ct.write(book.bookDsc)
             ct.write(book.listPrice)
             ct.write(book.pages)
             ct.write(book.isbn10)
             ct.write(book.isbn13)
             ct.write(book.subrank)
             ct.write(book.hasEditorialReview)
             ct.write(book.editorialReview)
             #ct.write(book.hasQuoteTable)
             ct.write(reviewer.email)
             ct.write(reviewer.webPage)
             ct.write(reviewer.hasPhoto)
             ct.write(reviewer.rNum)
             ct.write(reviewer.helpRate)
             ct.write(reviewer.hVote)
             ct.write(reviewer.tVote)
             ct.write(reviewer.avgRate)
             ct.write(reviewer.fRevTime)
             ct.write(reviewer.lRevTime)
             ct.write(reviewer.duration)
             if reviewer.rReal == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             if reviewer.location == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             if reviewer.aboutMe == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             if reviewer.interest == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             ct.write(review.fromFormat)
             # if review.fromFormat == "Hardcover":
             #     ct.write(0)
             # elif review.fromFormat == "Paperback":
             #     ct.write(1)
             # else:
             #     ct.write(2)
             if reviewer.rRank == "N/A":
                 ct.write(0)
             else:
                 ct.write(reviewer.rRank)
             ct.writeln(book.rank)