def get_difficulties(answers, data=None, model=None, force=False, name="difficulty"): if data and model: runner = Runner(data, model) file_name = "../cache/difficulties_{}.pd".format(runner._hash) else: data = d.Data("../data/matmat/2016-11-28/answers.pd") model = EloPriorCurrentModel(KC=2, KI=0.5) runner = Runner(data, model) file_name = "../cache/difficulties_matmat.pd" if os.path.exists(file_name) and not force: difficulties = pd.read_pickle(file_name) else: items = answers["item"].unique() runner.run(force=True) difficulties = pd.Series(data=model.get_difficulties(items), index=items, name=name) difficulties.to_pickle(file_name) return difficulties
"beta": np.arange(0., 0.2, 0.02), # "KC": np.arange(1.5, 5.0, 0.25), # "KI": np.arange(0, 2.5, 0.25), # }, plot_axes=["KC", "KI"]) }, plot_axes=["alpha", "beta"]) plt.show() items = data.get_items_df() items = items[(items["skill_lvl_2"] == 210) & ~items["skill_lvl_3"].isnull()].loc[:, ("question", "answer", "visualization")] items = items[items["visualization"] == "free_answer"] answers[answers["item"].isin(items.index)].to_pickle("../../data/matmat/2016-01-04/answers-multiplication.pd") data_multiplication = Data("../../data/matmat/2016-01-04/answers-multiplication.pd") model = EloPriorCurrentModel(alpha=1.4, beta=0.1, KC=3, KI=0.5) items = items.join(pd.Series(answers.groupby("item").size(), name="answer_count")) items = items.join(pd.Series(answers.groupby("item").apply(lambda i: i["correct"].sum() / len(i)), name="success_rate")) items = items.join(pd.Series(answers.groupby("item")["response_time"].median(), name="response_time")) Evaluator(data_multiplication, model).get_report(force_run=True) items["model_difficulty"] = model.get_difficulties(items.index) items["model_difficulty"] -= items["model_difficulty"].mean() skills = items.groupby("question").agg({ "answer_count": "sum", "success_rate": "mean", "response_time": "mean", "model_difficulty": "mean", })