def model_learning(prediction_model, time_model, data, length=200): enums = defaultdict(lambda: 0) model = TimeCombiner(prediction_model, time_model) model.pre_process_data(data) df = [] for answer in data.iter(): item = answer["item"] student = answer["student"] prediction, time_prediction = model.predict(student, item, answer) model.update(student, item, prediction, time_prediction, answer["correct"], answer["response_time"], answer) skill = model._prediction_model.get_skill(student) speed = model._time_model.get_skill(student) enums[student] += 1 df.append([item, student, skill, speed, enums[student], answer["correct"], np.log(answer["response_time"]), prediction, np.log(time_prediction), model._prediction_model.get_difficulty(item), model._time_model.get_difficulty(item), ]) df = pd.DataFrame(df, columns=('item', 'student', 'skill', 'speed', 'enum', 'correct', 'response_time_log', 'prediction', 'time_prediction_log', 'difficulty', 'intensity')) points = range(1, length + 1) plt.subplot(321) plt.title(str(model)) plt.plot(points, [df.loc[df['enum'] == p, 'skill'].mean() for p in points]) plt.xlabel('# answer') plt.ylabel('skill') plt.subplot(323) plt.plot(points, [df.loc[df['enum'] == p, 'speed'].mean() for p in points]) plt.xlabel('# answer') plt.ylabel('speed') plt.subplot(325) plt.bar(points[:-1], [(df['enum'] == p).sum() for p in points[:-1]]) plt.ylabel('User count') plt.xlabel('# answer') plt.subplot(322) plt.plot(points, [df.loc[df['enum'] == p, 'correct'].mean() for p in points], label='observation') plt.plot(points, [df.loc[df['enum'] == p, 'prediction'].mean() for p in points], label='prediction') plt.legend(loc=3) plt.xlabel('# answer') plt.ylabel('success') plt.subplot(324) plt.plot(points, [np.exp(df.loc[df['enum'] == p, 'response_time_log'].mean()) for p in points], label='observation') plt.plot(points, [np.exp(df.loc[df['enum'] == p, 'time_prediction_log'].mean()) for p in points], label='prediction') plt.legend(loc=3) plt.xlabel('# answer') plt.ylabel('time') plt.subplot(326) plt.plot(points, [df.loc[df['enum'] == p, 'intensity'].mean() for p in points]) plt.ylabel('Intensity') # plt.plot(points, [df.loc[df['enum'] == p, 'difficulty'].mean() for p in points]) # plt.ylabel('Difficulty') plt.xlabel('# answer') df.to_pickle('model_learning.pd') return df
from utils.runner import Runner from utils.utils import grid_search, enumerate_df def moving_average(a, n=3) : ret = np.cumsum(a, dtype=float) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1:] / n data_all = d.Data("../data/matmat/2016-06-27/answers.pd") data_all.trim_times() data_long = d.Data("../data/matmat/2016-06-27/answers.pd", filter=(100, 100)) data_long.trim_times() data = data_all time_model = BasicTimeModel(alpha=0.6, beta=0.1, K=0.25) model = TimeCombiner(AvgModel(), time_model) skills = defaultdict(lambda: []) def update(student, item): skills[student].append(time_model.get_skill(student)) model.after_update_callback = update Runner(data, model).run(force=True) # skills = time_model.get_skills(data.get_students()) # sns.distplot(skills) length = 200