from utils.data import Data
import seaborn as sns
import pandas as pd
import matplotlib.pylab as plt

TRASHOLD = 25

d = Data("../data/matmat/2015-11-20/answers.pd")
answers = d.get_dataframe_all()
items = d.get_items_df()
skills = d.get_skills_df()
items = items.join(skills, on="skill_lvl_1")

concepts = items["name"].unique()
sts = {}

for concept in concepts:
    print(concept)
    its = list(items[items["name"] == concept].index)
    students = answers[answers["item"].isin(its)].groupby("student").size()
    students = students[students >= TRASHOLD]
    print(len(students))
    sts[concept] = students

data = pd.DataFrame(index=concepts, columns=concepts, dtype=float)

for concept1 in concepts:
    for concept2 in concepts:
        count = len(set(sts[concept1]) & set(sts[concept2]))
        print(concept1, concept2, count)
        data[concept1][concept2] = count
def grid(data, model):
    utils.grid_search(data, model,
          {"KC": 3, "KI": 0.5}, {
          # {"alpha": 0.25, "beta": 0.02}, {
              "alpha": np.arange(0.4, 1.7, 0.2),
              "beta": np.arange(0., 0.2, 0.02),
              # "KC": np.arange(1.5, 5.0, 0.25),
              # "KI": np.arange(0, 2.5, 0.25),
          # }, plot_axes=["KC", "KI"])
        }, plot_axes=["alpha", "beta"])

    plt.show()


items = data.get_items_df()
items = items[(items["skill_lvl_2"] == 210) & ~items["skill_lvl_3"].isnull()].loc[:, ("question", "answer", "visualization")]
items = items[items["visualization"] == "free_answer"]

answers[answers["item"].isin(items.index)].to_pickle("../../data/matmat/2016-01-04/answers-multiplication.pd")
data_multiplication = Data("../../data/matmat/2016-01-04/answers-multiplication.pd")
model = EloPriorCurrentModel(alpha=1.4, beta=0.1, KC=3, KI=0.5)

items = items.join(pd.Series(answers.groupby("item").size(), name="answer_count"))
items = items.join(pd.Series(answers.groupby("item").apply(lambda i: i["correct"].sum() / len(i)), name="success_rate"))
items = items.join(pd.Series(answers.groupby("item")["response_time"].median(), name="response_time"))

Evaluator(data_multiplication, model).get_report(force_run=True)
items["model_difficulty"] = model.get_difficulties(items.index)
items["model_difficulty"] -= items["model_difficulty"].mean()