#Some User Statistics ratingTable = [] print "Building Review Table" print "There are %d reviews" % len(loader.reviews) for id, review in loader.reviews.iteritems(): paper = review.paper reviewer = review.user ratingStats = { "rating": review.overallRating, "time": np.datetime64(review.time), "pastPaperSimilarity": calcFeatures.computePaperReviewerSimilarity( tfidf, paper, reviewer), "authorReviewerSimilarity": calcFeatures.getAuthorReviewerSimilarity( tfidf, paper.primaryAuthor, reviewer), "maxAuthorReviewerSimilarity": calcFeatures.getAuthorReviewerSimilarity( tfidf, paper.maxAuthor, reviewer), "specificCommonSubjects": calcFeatures.computeSpecificCommonSubjects( paper, reviewer), "generalCommonSubjects": calcFeatures.computeGeneralCommonSubjects( paper, reviewer), "samePrimaryGeneralSubject": reviewer.primaryGeneralSubjectArea == paper.primaryGeneralSubjectArea, "samePrimarySpecificSubject": reviewer.primarySpecificSubjectArea == paper.primarySpecificSubjectArea,
"paperId": paper.id, "userId": reviewer.id, "rating": review.overallRating, "revPaperCount": len(reviewer.pastPapers), "revTopPaperCount": reviewer.topPastPapers, "revKDDPaperCount": reviewer.topKDDPast, ###Similarity "maxSimilarity": calcFeatures.getAuthorReviewerSimilarity(tfidf, paper.maxAuthor, reviewer), "primarySimilarity": calcFeatures.getAuthorReviewerSimilarity(tfidf, paper.primaryAuthor, reviewer), "maxJacSimilarity": calcFeatures.getAuthorReviewerSimilarity(tfidf, paper.maxAuthor, reviewer, jaccard=True), "primaryJacSimilarity": calcFeatures.getAuthorReviewerSimilarity(tfidf, paper.primaryAuthor, reviewer, jaccard=True), ###Co-Author Distance
for id, review in loader.reviews.iteritems(): paper = review.paper reviewer = review.user reviewTable.append({ "paperId": paper.id, "userId": reviewer.id, "rating": review.overallRating, "revPaperCount": len(reviewer.pastPapers), "revTopPaperCount": reviewer.topPastPapers, "revKDDPaperCount": reviewer.topKDDPast, ###Similarity "maxSimilarity": calcFeatures.getAuthorReviewerSimilarity( tfidf, paper.maxAuthor, reviewer), "primarySimilarity": calcFeatures.getAuthorReviewerSimilarity( tfidf, paper.primaryAuthor, reviewer), "maxJacSimilarity": calcFeatures.getAuthorReviewerSimilarity( tfidf, paper.maxAuthor, reviewer, jaccard=True), "primaryJacSimilarity": calcFeatures.getAuthorReviewerSimilarity( tfidf, paper.primaryAuthor, reviewer, jaccard=True), ###Co-Author Distance "minDist": review.minDist, "avgDist": review.avgDist, "revCountry": reviewer.country,