print "Calculating Features" calcFeatures.calcAuthorsPastPapers(loader) calcFeatures.calcTopConfsJoursCount(loader) calcFeatures.computeAverages(loader) calcFeatures.computeDistances(loader.reviews) print "Constructing Paper Table" paperTable = [] for id, paper in loader.papers.iteritems(): maxAuthor = sorted(paper.authors, key=lambda a: len(a.pastPapers))[-1] maxTopAuthor = sorted(paper.authors, key=lambda a: a.topPastPapers)[-1] maxKDDAuthor = sorted(paper.authors, key=lambda a: a.topKDDPast)[-1] numAuthors = len(paper.authors) affiliation = calcFeatures.getAcademicOrIndustry(paper) distStats = calcFeatures.getPaperDistStats(paper) simStats = calcFeatures.getSimStats(paper, tfidf) paperTable.append({ "paperId": paper.id, ###true values "avgRating": paper.avgRating, "accepted": paper.accepted, ###nationalities of authors
calcFeatures.calcAuthorsPastPapers(loader) calcFeatures.calcTopConfsJoursCount(loader) calcFeatures.computeAverages(loader) calcFeatures.computeDistances(loader.reviews) print "Constructing Paper Table" paperTable = [] for id, paper in loader.papers.iteritems(): maxAuthor = sorted(paper.authors, key=lambda a: len(a.pastPapers))[-1] maxTopAuthor = sorted(paper.authors, key=lambda a: a.topPastPapers)[-1] maxKDDAuthor = sorted(paper.authors, key=lambda a: a.topKDDPast)[-1] numAuthors = len(paper.authors) affiliation = calcFeatures.getAcademicOrIndustry(paper) distStats = calcFeatures.getPaperDistStats(paper) simStats = calcFeatures.getSimStats(paper, tfidf) paperTable.append({ "paperId": paper.id, ###true values "avgRating": paper.avgRating, "accepted": paper.accepted, ###nationalities of authors "modeAuthorCountry": calcFeatures.getAuthorCountryMode(paper), "maxAuthorCountry": maxAuthor.country, "primaryAuthorCountry": paper.primaryAuthor.country,
"accepted": paper.accepted, "reviewerAverage": reviewer.avgRating, "paperAverage": paper.avgRating, "reviewerRatingDiff": review.overallRating - reviewer.avgRating, "paperRatingDiff": review.overallRating - paper.avgRating, "paperId": paper.id, "userId": reviewer.id, "reviewLength": len( ("%s %s %s" % ( review.ratings["strengths"], review.ratings["weaknesses"], review.ratings["comments"])).split()), "externalReviewer": review.externalReviewer, "reviewerCountry": reviewer.country, "authorCountryMode": calcFeatures.getAuthorCountryMode(paper), "primaryAuthorCountry": paper.primaryAuthor.country, "reviewerIsAcademic": reviewer.isAcademic, "paperAcademicOrIndustry": calcFeatures.getAcademicOrIndustry(paper), "maxAuthorIsAcademic": paper.maxAuthor.isAcademic, } ratingTable.append(ratingStats) if id % 100 == 0 and id > 0: print id DataFrame(ratingTable).to_pickle("savedFrames/ratingPrediction/ratingTable")
review.overallRating - paper.avgRating, "paperId": paper.id, "userId": reviewer.id, "reviewLength": len(("%s %s %s" % (review.ratings["strengths"], review.ratings["weaknesses"], review.ratings["comments"])).split()), "externalReviewer": review.externalReviewer, "reviewerCountry": reviewer.country, "authorCountryMode": calcFeatures.getAuthorCountryMode(paper), "primaryAuthorCountry": paper.primaryAuthor.country, "reviewerIsAcademic": reviewer.isAcademic, "paperAcademicOrIndustry": calcFeatures.getAcademicOrIndustry(paper), "maxAuthorIsAcademic": paper.maxAuthor.isAcademic, } ratingTable.append(ratingStats) if id % 100 == 0 and id > 0: print id DataFrame(ratingTable).to_pickle("savedFrames/ratingPrediction/ratingTable")