print "Calculating Features"
calcFeatures.calcAuthorsPastPapers(loader)
calcFeatures.calcTopConfsJoursCount(loader)
calcFeatures.computeAverages(loader)
calcFeatures.computeDistances(loader.reviews)

print "Constructing Paper Table"
paperTable = []

for id, paper in loader.papers.iteritems():

    maxAuthor = sorted(paper.authors, key=lambda a: len(a.pastPapers))[-1]
    maxTopAuthor = sorted(paper.authors, key=lambda a: a.topPastPapers)[-1]
    maxKDDAuthor = sorted(paper.authors, key=lambda a: a.topKDDPast)[-1]
    numAuthors = len(paper.authors)
    affiliation = calcFeatures.getAcademicOrIndustry(paper)

    distStats = calcFeatures.getPaperDistStats(paper)
    simStats = calcFeatures.getSimStats(paper, tfidf)

    paperTable.append({
        "paperId":
        paper.id,

        ###true values
        "avgRating":
        paper.avgRating,
        "accepted":
        paper.accepted,

        ###nationalities of authors
calcFeatures.calcAuthorsPastPapers(loader)
calcFeatures.calcTopConfsJoursCount(loader)
calcFeatures.computeAverages(loader)
calcFeatures.computeDistances(loader.reviews)


print "Constructing Paper Table"
paperTable = []

for id, paper in loader.papers.iteritems():

    maxAuthor = sorted(paper.authors, key=lambda a: len(a.pastPapers))[-1]
    maxTopAuthor = sorted(paper.authors, key=lambda a: a.topPastPapers)[-1]
    maxKDDAuthor = sorted(paper.authors, key=lambda a: a.topKDDPast)[-1]
    numAuthors = len(paper.authors)
    affiliation = calcFeatures.getAcademicOrIndustry(paper)

    distStats = calcFeatures.getPaperDistStats(paper)
    simStats = calcFeatures.getSimStats(paper, tfidf)

    paperTable.append({
        "paperId": paper.id,

        ###true values
        "avgRating": paper.avgRating,
        "accepted": paper.accepted,

        ###nationalities of authors
        "modeAuthorCountry": calcFeatures.getAuthorCountryMode(paper),
        "maxAuthorCountry": maxAuthor.country,
        "primaryAuthorCountry": paper.primaryAuthor.country,
        "accepted": paper.accepted,

        "reviewerAverage": reviewer.avgRating,
        "paperAverage": paper.avgRating,
        "reviewerRatingDiff": review.overallRating - reviewer.avgRating,
        "paperRatingDiff": review.overallRating - paper.avgRating,
        "paperId": paper.id,
        "userId": reviewer.id,

        "reviewLength": len(
            ("%s %s %s" % (
                review.ratings["strengths"],
                review.ratings["weaknesses"],
                review.ratings["comments"])).split()),
        "externalReviewer": review.externalReviewer,

        "reviewerCountry": reviewer.country,
        "authorCountryMode": calcFeatures.getAuthorCountryMode(paper),
        "primaryAuthorCountry": paper.primaryAuthor.country,

        "reviewerIsAcademic": reviewer.isAcademic,
        "paperAcademicOrIndustry": calcFeatures.getAcademicOrIndustry(paper),
        "maxAuthorIsAcademic": paper.maxAuthor.isAcademic,
    }
    ratingTable.append(ratingStats)

    if id % 100 == 0 and id > 0:
        print id

DataFrame(ratingTable).to_pickle("savedFrames/ratingPrediction/ratingTable")
Beispiel #4
0
        review.overallRating - paper.avgRating,
        "paperId":
        paper.id,
        "userId":
        reviewer.id,
        "reviewLength":
        len(("%s %s %s" %
             (review.ratings["strengths"], review.ratings["weaknesses"],
              review.ratings["comments"])).split()),
        "externalReviewer":
        review.externalReviewer,
        "reviewerCountry":
        reviewer.country,
        "authorCountryMode":
        calcFeatures.getAuthorCountryMode(paper),
        "primaryAuthorCountry":
        paper.primaryAuthor.country,
        "reviewerIsAcademic":
        reviewer.isAcademic,
        "paperAcademicOrIndustry":
        calcFeatures.getAcademicOrIndustry(paper),
        "maxAuthorIsAcademic":
        paper.maxAuthor.isAcademic,
    }
    ratingTable.append(ratingStats)

    if id % 100 == 0 and id > 0:
        print id

DataFrame(ratingTable).to_pickle("savedFrames/ratingPrediction/ratingTable")