def f1():
    df = pd.merge(
        df_review.groupby("userId")["reviewLength"].mean().reset_index(),
        df_reviewer,
        on="userId")

    plotBucket(df,
               "numBids",
               "reviewLength",
               x_label="Number of Bids",
               y_label="Average Review Length",
               x_percentile=False,
               title="Review Quality vs. Number of Bids",
               numBuckets=7,
               xlim=[0, 100])

    plotFrequencyHistogram(df,
                           'numBids',
                           'Number of Bids',
                           myBins=[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50])
Esempio n. 2
0
def f1():
	df = pd.merge(
		df_review.groupby("userId")["reviewLength"].mean().reset_index(),
		df_reviewer,
		on="userId")

	plotBucket(
		df,
		"numBids",
		"reviewLength",
		x_label="Number of Bids",
		y_label="Average Review Length",
		x_percentile=False,
		title="Review Quality vs. Number of Bids",
		numBuckets=7,
		xlim=[0,100]
	)

	plotFrequencyHistogram(
	    df,
	    'numBids',
	    'Number of Bids',
	    myBins=[0,5,10,15,20,25,30, 35, 40, 45, 50]
	)
    '4/12',
    '4/13',
    '4/14',
    '4/15',
    '4/16',
    '4/17',
]
dates_5Days = transformDates(labels_5Days)
datesMidDay = [
    .5 * (dates_5Days[i] + dates_5Days[i + 1])
    for i in range(len(dates_5Days) - 1)
]

plotFrequencyHistogram(df,
                       'time',
                       'Submission Date',
                       myBins=dates_5Days,
                       plotMean=False)
xticks(datesMidDay, labels_5Days[:-1])
plotDeadline()

#Plot 2 -- Submissions in the last 12 hours
labels_12Hours = [
    '4/14 12:00 PM',
    '4/14 2:00 PM',
    '4/14 4:00 AM',
    '4/14 6:00 PM',
    '4/14 8:00 PM',
    '4/14 10:00 PM',
    '4/15 12:00 AM',
    '4/15 2:00 AM',
Esempio n. 4
0
more_scramble = set()
for id, reviewer in loader.reviewers.iteritems():
    revs = reviewer.reviews
    if len(revs) > 7:
        sorted_rev = sorted(revs, key=lambda x: int(x.time.strftime('%s')))
        paperIds = [r.paper.id for r in sorted_rev]
        numInv = countInversions(paperIds)
        inversions[id] = numInv
        if numInv > 15:
            more_scramble.add(id)
        else:
            less_scramble.add(id)
df_order = pd.DataFrame(inversions.values())
df_order.columns = ['order_stat']
plotFrequencyHistogram(df_order,
                       'order_stat',
                       "# perm inversions",
                       myBins=np.linspace(0, 50, 10))

df_paper = pd.read_pickle("savedFrames/iteration5/paperTable")
df_review = pd.read_pickle("savedFrames/iteration5/reviewTable")
df = pd.merge(df_review, df_paper, on="paperId")
df['time'] = df['time'].values.astype(datetime.datetime)
df["agree"] = (df["rating"] > 0) == df["accepted"]
df["positive"] = df["rating"] > 0
df["absRating"] = df["rating"].abs()
df_less = df[df['userId'].isin(less_scramble)]
df_more = df[df['userId'].isin(more_scramble)]


def transformDates(dateLabels):
    return list(
)
plot(
    [0, 0],
    plt.ylim(),
    color='red',
    linewidth=.5,
    linestyle="--")
legend(
    [p1, p2, p3],
    ["High Similarity", "Medium Similarity", "Low Similarity"],
    loc=3)

#Basic Frequency Plots
plotFrequencyHistogram(
    df_rating,
    maxSimCol,
    "Most Experienced Author / Reviewer Similarity",
    color="#62FFBB",
    myBins=None)

plotFrequencyHistogram(
    df_paper,
    "avgRating",
    "Average Rating for a Paper",
    color="#B9E84D",
    myBins=np.arange(-3, 3.5, .5))

plotFrequencyHistogram(
    df_paper,
    "authorsMaxPastPaper",
    "Past Paper Count of Most Experienced Author",
    color="#E8634D",
    '4/11',
    '4/12',
    '4/13',
    '4/14',
    '4/15',
    '4/16',
    '4/17',
]
dates_5Days = transformDates(labels_5Days)
datesMidDay = [.5*(dates_5Days[i] + dates_5Days[i+1])
               for i in range(len(dates_5Days) - 1)]

plotFrequencyHistogram(
    df,
    'time',
    'Submission Date',
    myBins=dates_5Days,
    plotMean=False
)
xticks(datesMidDay, labels_5Days[:-1])
plotDeadline()

#Plot 2 -- Submissions in the last 12 hours
labels_12Hours = [
    '4/14 12:00 PM',
    '4/14 2:00 PM',
    '4/14 4:00 AM',
    '4/14 6:00 PM',
    '4/14 8:00 PM',
    '4/14 10:00 PM',
    '4/15 12:00 AM',
    x_label="Similarity",
    y_label="Rating",
    delta=15,
    title=
    "Average Rating vs. Similarity of Reviewer to Most Experienced Author",
    marker="s",
    color="green",
    xlim=[0, 100]
)
setp(p, linewidth=2, alpha=1)

#Basic Frequency Plots
plotFrequencyHistogram(
    df,
    "rating",
    "Paper Ratings",
    color="#B9E84D",
    myBins=[-3.6, -2.4, -1.2, 0, 1.2, 2.4, 3.6],
    plotMean=False)
xticks([-3, -1.8, -.6, .6, 1.8, 3],
       ["Strong Reject", "Reject", "Weak Reject",
        "Weak Accept", "Accept", "Strong Accept"])
colors =\
    ["#FF1C00", "#FF5C54", "#FFA6A1", "#A1D3FF", "#54B2FF", "#006FFF"]
for container in plt.gca().containers:
    for i, child in enumerate(container.get_children()):
        child.set_color(colors[i])


plotFrequencyHistogram(
    df_reviewer[df_reviewer["numReviews"] >= 8],