def plotCondSecond(df, num, color, first): df_cond = df[df["minDist"] == num] return plotBar( df_cond, "secondMinDist", "rating", range(7)[num:], color=color, x_label="Reviewer Distance to Second Closest Author", y_label="Avg Rating", sameFigure=(not first), plotMean=False )
df.loc[df["authorCountryMode"] == "United Kingdom", "authorCountryMode"]\ = "United\nKingdom" countryCounts = df['authorCountryMode'].value_counts().to_dict() countries = [c for c, count in countryCounts.iteritems() if count > 10] countries.sort(key=lambda c: df[df['authorCountryMode']==c]['accepted'].mean()) countries = countries[1:] p = plotBar( df, "authorCountryMode", "accepted", countries, title="Proportion of Papers Accepted by Country", x_label="Mode Author Country", y_label="Proportion Accepted", categorical=True, errorBars=True ) j = 0 for container in plt.gca().containers: for i, child in enumerate(container.get_children()): if j > 3: child.set_color(plt.cm.coolwarm_r((child.get_height() - 0.05)/0.20)) j += 1 gcf().set_size_inches((40, 10)) y = ylim()[1] - 0.025
for a in paper.authors: authorExpList.append(a.topPastPapers) numRuns = 500 avgRatings = [] errors = [] for i in range(1, max(numAuthorsList) + 1): ratings = np.empty([numRuns, 1]) for j in range(numRuns): sample = random.sample(authorExpList, i) maxExp = max(sample) ratings[j] = getAvg(maxExp) avgRatings.append(ratings.mean()) errors.append(ratings.std()/math.sqrt(ratings.shape[0])) p1 = plotBar( df, "numAuthors", "avgRating", range(1, 8), color="green", x_label="Number of Authors", y_label="Average Rating", marker="o", plotMean=False, ) p2 = plt.errorbar(np.arange(1, 8), avgRatings[:7], yerr=errors[:7], marker = 'o') plt.xlim([0,8]) plt.legend((p1, p2), ('Real', 'Simulated'), loc=4) plt.show()
x_percentile=False, xlim=[-.1, .1]) df_dist = df[df["distDiff"] >= 0] plotBucket(df, "distDiff", "higherRating", delta=5, x_label="Difference in Min Distance to Reviewer", y_label="Probability of a Higher Rating", color="Green", x_percentile=False) plotBar(df_original, "minDist", "paperRatingDiff", [1, 2, 3, 4, 5, 6, 7, 8], x_label="Min Distance between Reviewer and Paper Authors", y_label="Deviation from Paper Average Rating", title="Deviation from Paper Average vs. Min Dist to Reviewer", color="Blue") # df["avgRatingDist"] = df["rating"] - df["rating2"] # for i in range(7): # for j in range(7): # if (j >= i): # print "(%d, %d)" % (i,j) # print df[(df["minDist"] == i) & (df["minDist2"]==j)]["avgRatingDist"].mean() # print df[(df["minDist"] == i) & (df["minDist2"]==j)]["avgRatingDist"].shape[0] show()
plotBucket( df, "paperId", "accepted", x_label="Submission Order", y_label="P(Accept)", title="Acceptance vs. Submission Order", numBuckets=10 ) #how subsequent reviews influence each other plotBar( df_review, "order", "rating", range(12)[1:], x_label="Review Order", y_label="Rating", title="Rating vs. Order of Review", ) df = df_review plots = [] colors =\ ["red", "pink", "", "cyan", "blue",] for i in [-2, -1, 1, 2]: df["rating%d" % i] = df["rating"] == i plots.append(plotBar( df_review,
"avgRating", color="white", x_label="Number of Authors", y_label="Average Rating", marker="None", x_percentile=False) p = [] df1 = df.copy()[slices[0]] ofInterest = df1["numAuthors"] >= 3 newValue = np.round(df1[ofInterest]["numAuthors"].mean(), 2) df1.loc[ofInterest, "numAuthors"] = newValue p.append( plotBar(df1, "numAuthors", "avgRating", [1, 2, newValue], color="blue", sameFigure=True, marker="s", plotMean=False)) colors = ["red", "green", "purple"] p.extend([ plotBucket(df[slices[i]], "numAuthors", "avgRating", numBuckets=4, color=colors[i - 1], sameFigure=True, marker="s", plotMean=False) for i in [1, 2, 3] ])
plotBucket(df, "paperId", "accepted", x_label="Submission Order", y_label="P(Accept)", title="Acceptance vs. Submission Order", numBuckets=10) #how subsequent reviews influence each other plotBar( df_review, "order", "rating", range(12)[1:], x_label="Review Order", y_label="Rating", title="Rating vs. Order of Review", ) df = df_review plots = [] colors =\ ["red", "pink", "", "cyan", "blue",] for i in [-2, -1, 1, 2]: df["rating%d" % i] = df["rating"] == i plots.append( plotBar(df_review,
plotBucket( df, "distDiff", "higherRating", delta=5, x_label="Difference in Min Distance to Reviewer", y_label="Probability of a Higher Rating", color="Green", x_percentile=False, ) plotBar( df_original, "minDist", "paperRatingDiff", [1, 2, 3, 4, 5, 6, 7, 8], x_label="Min Distance between Reviewer and Paper Authors", y_label="Deviation from Paper Average Rating", title="Deviation from Paper Average vs. Min Dist to Reviewer", color="Blue", ) # df["avgRatingDist"] = df["rating"] - df["rating2"] # for i in range(7): # for j in range(7): # if (j >= i): # print "(%d, %d)" % (i,j) # print df[(df["minDist"] == i) & (df["minDist2"]==j)]["avgRatingDist"].mean() # print df[(df["minDist"] == i) & (df["minDist2"]==j)]["avgRatingDist"].shape[0]
#Plot co-author dist p1 = plotBucket( df_merged, "avgDist", "rating", delta=10, x_label="Reviewer Author Distance", y_label="Avg Rating", xlim=[0, 8], x_percentile=False ) p2 = plotBar( df_merged, "minDist", "rating", range(7)[1:], color="red", sameFigure=True, ) legend([p1, p2], ["Average Author Distance", "Min Author Distances"]) #Plot min co-author dist vs P(Accept) g = df_merged.groupby('paperId') h = pd.concat([ g["minDist"].mean(), g["minDist"].min(), g["accepted"].median()], axis=1)
color="white", x_label="Number of Authors", y_label="Average Rating", marker="None", x_percentile=False ) p = [] df1 = df.copy()[slices[0]] ofInterest = df1["numAuthors"] >= 3 newValue = np.round(df1[ofInterest]["numAuthors"].mean(), 2) df1.loc[ofInterest, "numAuthors"] = newValue p.append(plotBar( df1, "numAuthors", "avgRating", [1, 2, newValue], color="blue", sameFigure=True, marker="s", plotMean=False)) colors = ["red", "green", "purple"] p.extend([ plotBucket( df[slices[i]], "numAuthors", "avgRating", numBuckets=4, color=colors[i-1], sameFigure=True, marker="s",
df.loc[df["authorCountryMode"] == "United Kingdom", "authorCountryMode"]\ = "United\nKingdom" countryCounts = df['authorCountryMode'].value_counts().to_dict() countries = [c for c, count in countryCounts.iteritems() if count > 10] countries.sort( key=lambda c: df[df['authorCountryMode'] == c]['accepted'].mean()) countries = countries[1:] p = plotBar(df, "authorCountryMode", "accepted", countries, title="Proportion of Papers Accepted by Country", x_label="Mode Author Country", y_label="Proportion Accepted", categorical=True, errorBars=True) j = 0 for container in plt.gca().containers: for i, child in enumerate(container.get_children()): if j > 3: child.set_color( plt.cm.coolwarm_r((child.get_height() - 0.05) / 0.20)) j += 1 gcf().set_size_inches((40, 10)) y = ylim()[1] - 0.025
delta=10, color="white", x_label="Number of Authors", y_label="Average Rating", marker="None", x_percentile=False) df1 = df_paper.copy()[slices[0]] ofInterest = df1["#Authors"] >= 3 newValue = np.round(df1[ofInterest]["#Authors"].mean(), 2) df1.loc[ofInterest, "#Authors"] = newValue p1 = plotBar( df1, "#Authors", "avgRating", [1, 2, newValue], color="blue", sameFigure=True, marker="s", plotMean=False) p2 = plotBucket( df_paper[slices[1]], "#Authors", "avgRating", delta=20, color="red", sameFigure=True, x_percentile=False, marker="s", plotMean=False)
# ycol, # delta=5, # color="blue", # title="default", # x_label="default", # y_label="default", # x_percentile=True, # xlim=None): authorFrame = userFrame[userFrame["#Papers"] > 0] authorFrame["acceptanceRate"] = authorFrame["#Accepted"] \ * 1.0/authorFrame["#Papers"] plotBar(authorFrame, "#Papers", "acceptanceRate", [0, 1, 2, 3, 4, 5], title="Acceptance Rate vs. Number of Submissions", x_label="Number of Submissions", y_label="Acceptance Rate", xlim=[0, 6]) plotBucket(userFrame, "#PastPapers", "#Papers", x_label="Number of Past Papers", y_label="Number of Submissions", x_percentile=False, xlim=[0, 200]) plotBucket(authorFrame, "#PastPapers", "acceptanceRate",
# delta=5, # color="blue", # title="default", # x_label="default", # y_label="default", # x_percentile=True, # xlim=None): authorFrame = userFrame[userFrame["#Papers"] > 0] authorFrame["acceptanceRate"] = authorFrame["#Accepted"] \ * 1.0/authorFrame["#Papers"] plotBar( authorFrame, "#Papers", "acceptanceRate", [0, 1, 2, 3, 4, 5], title="Acceptance Rate vs. Number of Submissions", x_label="Number of Submissions", y_label="Acceptance Rate", xlim=[0, 6] ) plotBucket( userFrame, "#PastPapers", "#Papers", x_label="Number of Past Papers", y_label="Number of Submissions", x_percentile=False, xlim=[0, 200])