def get_order(answers, metrics, min_answers=50): sessions = answers.groupby('session').apply(len) sessions = sessions[sessions >= min_answers] metric = answers["correct"] ends = defaultdict(lambda: []) for session in sessions.index: answer_index = answers[answers['session'] == session].index for name, metric in metrics.items(): a = metric.loc[answer_index] success = rolling_success(a) end = success[-1] ends[name].append(end) return pd.DataFrame.from_dict(ends)
def master_curves(answers, metrics, min_answers=50, student_count=None, smooth=0): sessions = answers.groupby('session').apply(len) sessions = sessions[sessions >= min_answers] # sessions = sessions.sample(1) for i, mcs in enumerate(metrics): plt.subplot(len(metrics), 1, i + 1) for metric_name, metric in mcs.items(): success_mean = metric.mean() s = np.zeros(min_answers) for student in sessions.index[:student_count] if student_count else sessions.index: current = success_mean a = metric.loc[answers[answers['session'] == student].index] success = rolling_success(a, success_mean) if smooth: success = pd.rolling_mean(pd.Series(success), 20) # hack to make line more smooth s += success[:min_answers] plt.plot(range(len(s)), s / (student_count if student_count else len(sessions)), label=metric_name) plt.legend()