distStats = calcFeatures.getPaperDistStats(paper) simStats = calcFeatures.getSimStats(paper, tfidf) paperTable.append({ "paperId": paper.id, ###true values "avgRating": paper.avgRating, "accepted": paper.accepted, ###nationalities of authors "modeAuthorCountry": calcFeatures.getAuthorCountryMode(paper), "maxAuthorCountry": maxAuthor.country, "primaryAuthorCountry": paper.primaryAuthor.country, ###affiliation of team of authors "isIndustry": int(affiliation == 'industry'), "isAcademic": int(affiliation == 'academic'), "isMixed": int(affiliation == 'mixed'), ###past paper counts of authors "avgPaperCount":
"accepted": paper.accepted, "reviewerAverage": reviewer.avgRating, "paperAverage": paper.avgRating, "reviewerRatingDiff": review.overallRating - reviewer.avgRating, "paperRatingDiff": review.overallRating - paper.avgRating, "paperId": paper.id, "userId": reviewer.id, "reviewLength": len( ("%s %s %s" % ( review.ratings["strengths"], review.ratings["weaknesses"], review.ratings["comments"])).split()), "externalReviewer": review.externalReviewer, "reviewerCountry": reviewer.country, "authorCountryMode": calcFeatures.getAuthorCountryMode(paper), "primaryAuthorCountry": paper.primaryAuthor.country, "reviewerIsAcademic": reviewer.isAcademic, "paperAcademicOrIndustry": calcFeatures.getAcademicOrIndustry(paper), "maxAuthorIsAcademic": paper.maxAuthor.isAcademic, } ratingTable.append(ratingStats) if id % 100 == 0 and id > 0: print id DataFrame(ratingTable).to_pickle("savedFrames/ratingPrediction/ratingTable")