def printReviews(self, ct=None): fout = '' flag = False if ct is None: flag = 1 if not os.path.exists("../data/" + self.fetchDate.isoformat() + "/review/"): os.makedirs( "../data/" + self.fetchDate.isoformat() + "/review/") ct = CommonTool() if not os.path.exists("../data/" + self.fetchDate.isoformat() + "/review/review.txt"): fout = open( "../data/" + self.fetchDate.isoformat() + "/review/review.txt", "w") print "writing reviews in new file" ct.setFout(fout) ct.writeln(Review.Review.tableHead) else: fout = open( "../data/" + self.fetchDate.isoformat() + "/review/review.txt", "a") ct.setFout(fout) for reviewID in self.reviewList: print "printing reviews" review = Review.loadReview(reviewID, self.fetchDate) review.printData(ct) review.insertReviewDataIntoTable() if flag: fout.close()
def solveReview(self): with open("../data/" + self.fetchDate.isoformat() + "/review.txt", "w")\ as fout: ct = CommonTool(fout) fout.write(Review.Review.tableHead) fout.write('\n') fout.flush() # ct.writeln(Review.Review.tableHead) for i, reviewID in enumerate(self.reviewList): if reviewID == '': continue print 'solve review {0} of {1}: {2}'.format( i, len(self.reviewList), reviewID) review = Review.loadReview(reviewID) review.printData(ct) if review.isNew: print 'isNewReview' if review.reviewerID not in self.reviewerList: self.reviewerList.append(review.reviewerID) review.isNew = False
def solveReviewSummary(self): quotesTable = self.html.xpath(".//table[@id='quotesTable']") if len(quotesTable) > 0: self.hasQuoteTable = 1 quotes = quotesTable[0].xpath( "./td/a[@class='a-link-normal a-text-normal a-color-base']") for quote in quotes: words = quote.attrib["href"].split("/") # print words reviewID = words[4] # print reviewID #try: review = Review.loadReview(reviewID, self.fetchDate) #review.setQuoteTable(1) Review.saveReview(review) #except : sys.stderr.write( 'quotesTable review not found: {0} {1}\n'.format( self.asin, reviewID)) else: self.hasQuoteTable = 0
def calcReviewTopPercent(self): print 'calcReviewTopPercent' import math maxRank = len(self.reviewList) top1Percent = int(math.ceil(maxRank / 100.0)) top5Percent = int(math.ceil(maxRank / 20.0)) top10Percent = int(math.ceil(maxRank / 10.0)) for rank, reviewID in enumerate(self.reviewList): aReview = Review.loadReview(reviewID) try: aReview.timeRank = rank except AttributeError, e: sys.stderr.write(str(e) + '\n') sys.stderr.write('reviewID' + reviewID) sys.exit(-1) if rank <= top1Percent: aReview.top1Percent = 1 elif rank <= top5Percent: aReview.top5Percent = 1 elif rank <= top10Percent: aReview.top10Percent = 1 Review.saveReview(aReview)
def printData(self): with open("../data/" + self.fetchDate.isoformat() + "/dataAll.txt", "w") as fout: ct = CommonTool(fout) fout.write("\t".join(self.tableHeadList)) fout.write('\n') fout.flush() for i, reviewID in enumerate(self.reviewList): if reviewID == '': continue print 'solve review {0} of {1}: {2}'.format( i, len(self.reviewList), reviewID) review = Review.loadReview(reviewID) book = Book.loadBookByAsin(review.asin) reviewer = Reviewer.loadReviewer(review.reviewerID) ct.write(reviewID) ct.write(review.asin) ct.write(review.reviewerID) ct.write(reviewer.rName) ct.write(reviewer.tRev1) ct.write(reviewer.tRev10) ct.write(reviewer.tRev50) ct.write(reviewer.tRev100) ct.write(reviewer.tRev500) ct.write(reviewer.tRev1000) ct.write(reviewer.tRevHall) ct.write(reviewer.vVoice) ct.write(review.verified) ct.write(review.rate) ct.write(review.title) ct.write(review.date) ct.write(review.fetchDate) ct.write(review.reviewBookDate) ct.write(review.elapsedDate) ct.write(review.helpful) ct.write(review.total) ct.write(review.helpfulness) ct.write(review.helpfulRank) ct.write(review.timeRank) #ct.write(review.top1Percent) #ct.write(review.top5Percent) #ct.write(review.top10Percent) ct.write(review.description) ct.write(review.numOfComments) ct.write(review.comment) #ct.write(review.isQuoteTable) ct.write(review.lastReviewRank) ct.write(book.url) ct.write(book.tag) ct.write(book.allowPreview) ct.write(book.binding) ct.write(book.publishDate) ct.write(book.author) ct.write(book.authorInfo) ct.write(book.rate) ct.write(book.numOfReviews) ct.write(book.kindlePrice) ct.write(book.hardcoverPrice) ct.write(book.paperbackPrice) ct.write(book.bookDsc) ct.write(book.listPrice) ct.write(book.pages) ct.write(book.isbn10) ct.write(book.isbn13) ct.write(book.subrank) ct.write(book.hasEditorialReview) ct.write(book.editorialReview) #ct.write(book.hasQuoteTable) ct.write(reviewer.email) ct.write(reviewer.webPage) ct.write(reviewer.hasPhoto) ct.write(reviewer.rNum) ct.write(reviewer.helpRate) ct.write(reviewer.hVote) ct.write(reviewer.tVote) ct.write(reviewer.avgRate) ct.write(reviewer.fRevTime) ct.write(reviewer.lRevTime) ct.write(reviewer.duration) if reviewer.rReal == "N/A": ct.write(0) else: ct.write(1) if reviewer.location == "N/A": ct.write(0) else: ct.write(1) if reviewer.aboutMe == "N/A": ct.write(0) else: ct.write(1) if reviewer.interest == "N/A": ct.write(0) else: ct.write(1) ct.write(review.fromFormat) # if review.fromFormat == "Hardcover": # ct.write(0) # elif review.fromFormat == "Paperback": # ct.write(1) # else: # ct.write(2) if reviewer.rRank == "N/A": ct.write(0) else: ct.write(reviewer.rRank) ct.writeln(book.rank)