from utils.data import Data import seaborn as sns import pandas as pd import matplotlib.pylab as plt TRASHOLD = 25 d = Data("../data/matmat/2015-11-20/answers.pd") answers = d.get_dataframe_all() items = d.get_items_df() skills = d.get_skills_df() items = items.join(skills, on="skill_lvl_1") concepts = items["name"].unique() sts = {} for concept in concepts: print(concept) its = list(items[items["name"] == concept].index) students = answers[answers["item"].isin(its)].groupby("student").size() students = students[students >= TRASHOLD] print(len(students)) sts[concept] = students data = pd.DataFrame(index=concepts, columns=concepts, dtype=float) for concept1 in concepts: for concept2 in concepts: count = len(set(sts[concept1]) & set(sts[concept2])) print(concept1, concept2, count) data[concept1][concept2] = count
def grid(data, model): utils.grid_search(data, model, {"KC": 3, "KI": 0.5}, { # {"alpha": 0.25, "beta": 0.02}, { "alpha": np.arange(0.4, 1.7, 0.2), "beta": np.arange(0., 0.2, 0.02), # "KC": np.arange(1.5, 5.0, 0.25), # "KI": np.arange(0, 2.5, 0.25), # }, plot_axes=["KC", "KI"]) }, plot_axes=["alpha", "beta"]) plt.show() items = data.get_items_df() items = items[(items["skill_lvl_2"] == 210) & ~items["skill_lvl_3"].isnull()].loc[:, ("question", "answer", "visualization")] items = items[items["visualization"] == "free_answer"] answers[answers["item"].isin(items.index)].to_pickle("../../data/matmat/2016-01-04/answers-multiplication.pd") data_multiplication = Data("../../data/matmat/2016-01-04/answers-multiplication.pd") model = EloPriorCurrentModel(alpha=1.4, beta=0.1, KC=3, KI=0.5) items = items.join(pd.Series(answers.groupby("item").size(), name="answer_count")) items = items.join(pd.Series(answers.groupby("item").apply(lambda i: i["correct"].sum() / len(i)), name="success_rate")) items = items.join(pd.Series(answers.groupby("item")["response_time"].median(), name="response_time")) Evaluator(data_multiplication, model).get_report(force_run=True) items["model_difficulty"] = model.get_difficulties(items.index) items["model_difficulty"] -= items["model_difficulty"].mean()