def get_difficulties(answers, data=None, model=None, force=False, name="difficulty"): if data and model: runner = Runner(data, model) file_name = "../cache/difficulties_{}.pd".format(runner._hash) else: data = d.Data("../data/matmat/2016-11-28/answers.pd") model = EloPriorCurrentModel(KC=2, KI=0.5) runner = Runner(data, model) file_name = "../cache/difficulties_matmat.pd" if os.path.exists(file_name) and not force: difficulties = pd.read_pickle(file_name) else: items = answers["item"].unique() runner.run(force=True) difficulties = pd.Series(data=model.get_difficulties(items), index=items, name=name) difficulties.to_pickle(file_name) return difficulties
items = data.get_items_df() items = items[(items["skill_lvl_2"] == 210) & ~items["skill_lvl_3"].isnull()].loc[:, ("question", "answer", "visualization")] items = items[items["visualization"] == "free_answer"] answers[answers["item"].isin(items.index)].to_pickle("../../data/matmat/2016-01-04/answers-multiplication.pd") data_multiplication = Data("../../data/matmat/2016-01-04/answers-multiplication.pd") model = EloPriorCurrentModel(alpha=1.4, beta=0.1, KC=3, KI=0.5) items = items.join(pd.Series(answers.groupby("item").size(), name="answer_count")) items = items.join(pd.Series(answers.groupby("item").apply(lambda i: i["correct"].sum() / len(i)), name="success_rate")) items = items.join(pd.Series(answers.groupby("item")["response_time"].median(), name="response_time")) Evaluator(data_multiplication, model).get_report(force_run=True) items["model_difficulty"] = model.get_difficulties(items.index) items["model_difficulty"] -= items["model_difficulty"].mean() skills = items.groupby("question").agg({ "answer_count": "sum", "success_rate": "mean", "response_time": "mean", "model_difficulty": "mean", }) dfSR = pd.DataFrame(index=range(1, 11)[::-1], columns=range(1, 11), dtype=float) dfD = pd.DataFrame(index=range(1, 11)[::-1], columns=range(1, 11), dtype=float) dfAC = pd.DataFrame(index=range(1, 11)[::-1], columns=range(1, 11), dtype=float) dfRT = pd.DataFrame(index=range(1, 11)[::-1], columns=range(1, 11), dtype=float) for q, skill in skills.iterrows(): a,b = map(int, q.split("x"))