def f1(): df = pd.merge( df_review.groupby("userId")["reviewLength"].mean().reset_index(), df_reviewer, on="userId") plotBucket(df, "numBids", "reviewLength", x_label="Number of Bids", y_label="Average Review Length", x_percentile=False, title="Review Quality vs. Number of Bids", numBuckets=7, xlim=[0, 100]) plotFrequencyHistogram(df, 'numBids', 'Number of Bids', myBins=[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50])
def f1(): df = pd.merge( df_review.groupby("userId")["reviewLength"].mean().reset_index(), df_reviewer, on="userId") plotBucket( df, "numBids", "reviewLength", x_label="Number of Bids", y_label="Average Review Length", x_percentile=False, title="Review Quality vs. Number of Bids", numBuckets=7, xlim=[0,100] ) plotFrequencyHistogram( df, 'numBids', 'Number of Bids', myBins=[0,5,10,15,20,25,30, 35, 40, 45, 50] )
'4/12', '4/13', '4/14', '4/15', '4/16', '4/17', ] dates_5Days = transformDates(labels_5Days) datesMidDay = [ .5 * (dates_5Days[i] + dates_5Days[i + 1]) for i in range(len(dates_5Days) - 1) ] plotFrequencyHistogram(df, 'time', 'Submission Date', myBins=dates_5Days, plotMean=False) xticks(datesMidDay, labels_5Days[:-1]) plotDeadline() #Plot 2 -- Submissions in the last 12 hours labels_12Hours = [ '4/14 12:00 PM', '4/14 2:00 PM', '4/14 4:00 AM', '4/14 6:00 PM', '4/14 8:00 PM', '4/14 10:00 PM', '4/15 12:00 AM', '4/15 2:00 AM',
more_scramble = set() for id, reviewer in loader.reviewers.iteritems(): revs = reviewer.reviews if len(revs) > 7: sorted_rev = sorted(revs, key=lambda x: int(x.time.strftime('%s'))) paperIds = [r.paper.id for r in sorted_rev] numInv = countInversions(paperIds) inversions[id] = numInv if numInv > 15: more_scramble.add(id) else: less_scramble.add(id) df_order = pd.DataFrame(inversions.values()) df_order.columns = ['order_stat'] plotFrequencyHistogram(df_order, 'order_stat', "# perm inversions", myBins=np.linspace(0, 50, 10)) df_paper = pd.read_pickle("savedFrames/iteration5/paperTable") df_review = pd.read_pickle("savedFrames/iteration5/reviewTable") df = pd.merge(df_review, df_paper, on="paperId") df['time'] = df['time'].values.astype(datetime.datetime) df["agree"] = (df["rating"] > 0) == df["accepted"] df["positive"] = df["rating"] > 0 df["absRating"] = df["rating"].abs() df_less = df[df['userId'].isin(less_scramble)] df_more = df[df['userId'].isin(more_scramble)] def transformDates(dateLabels): return list(
) plot( [0, 0], plt.ylim(), color='red', linewidth=.5, linestyle="--") legend( [p1, p2, p3], ["High Similarity", "Medium Similarity", "Low Similarity"], loc=3) #Basic Frequency Plots plotFrequencyHistogram( df_rating, maxSimCol, "Most Experienced Author / Reviewer Similarity", color="#62FFBB", myBins=None) plotFrequencyHistogram( df_paper, "avgRating", "Average Rating for a Paper", color="#B9E84D", myBins=np.arange(-3, 3.5, .5)) plotFrequencyHistogram( df_paper, "authorsMaxPastPaper", "Past Paper Count of Most Experienced Author", color="#E8634D",
'4/11', '4/12', '4/13', '4/14', '4/15', '4/16', '4/17', ] dates_5Days = transformDates(labels_5Days) datesMidDay = [.5*(dates_5Days[i] + dates_5Days[i+1]) for i in range(len(dates_5Days) - 1)] plotFrequencyHistogram( df, 'time', 'Submission Date', myBins=dates_5Days, plotMean=False ) xticks(datesMidDay, labels_5Days[:-1]) plotDeadline() #Plot 2 -- Submissions in the last 12 hours labels_12Hours = [ '4/14 12:00 PM', '4/14 2:00 PM', '4/14 4:00 AM', '4/14 6:00 PM', '4/14 8:00 PM', '4/14 10:00 PM', '4/15 12:00 AM',
x_label="Similarity", y_label="Rating", delta=15, title= "Average Rating vs. Similarity of Reviewer to Most Experienced Author", marker="s", color="green", xlim=[0, 100] ) setp(p, linewidth=2, alpha=1) #Basic Frequency Plots plotFrequencyHistogram( df, "rating", "Paper Ratings", color="#B9E84D", myBins=[-3.6, -2.4, -1.2, 0, 1.2, 2.4, 3.6], plotMean=False) xticks([-3, -1.8, -.6, .6, 1.8, 3], ["Strong Reject", "Reject", "Weak Reject", "Weak Accept", "Accept", "Strong Accept"]) colors =\ ["#FF1C00", "#FF5C54", "#FFA6A1", "#A1D3FF", "#54B2FF", "#006FFF"] for container in plt.gca().containers: for i, child in enumerate(container.get_children()): child.set_color(colors[i]) plotFrequencyHistogram( df_reviewer[df_reviewer["numReviews"] >= 8],