def difficulty_vs_time(data, the_skill, concepts=False): data.filter_data(0, 100) pk, level = data.get_skill_id(the_skill) data.trim_times() data.add_log_response_times() m = EloPriorCurrentModel(KC=2, KI=0.5) items = data.get_items_df() items = items[items["visualization"] != "pairing"] items = items.join(get_difficulty(data, m)) items = items.join(pd.Series(data.get_dataframe_all().groupby(["item"])["log_response_time"].mean(), name="log_response_time_mean")) items = items[items["skill_lvl_"+str(level)] == pk] if concepts: skills = data.get_skills_df() skills = skills.join(items.groupby("skill_lvl_3")["difficulty"].mean()) skills = skills.join(items.groupby("skill_lvl_3")["log_response_time_mean"].mean()) skills = skills[skills.index.isin(items["skill_lvl_3"].unique())] for id, skill in skills.iterrows(): plt.plot(skill["difficulty"], skill["log_response_time_mean"], "ok") plt.text(skill["difficulty"], skill["log_response_time_mean"], skill["name"]) else: colors = "rgbyk" visualizations = list(items["visualization"].unique()) for id, item in items.iterrows(): plt.plot(item["difficulty"], item["log_response_time_mean"], "o", color=colors[visualizations.index(item["visualization"])]) plt.text(item["difficulty"], item["log_response_time_mean"], item["name"]) for i, vis in enumerate(visualizations): plt.plot(-1, 2, "o", color=colors[i], label=vis) plt.xlabel("difficulty according to " + str(m)) plt.ylabel("mean of log time") plt.legend(loc=0) plt.title(the_skill)
################################################################################ ################################################################################ ################################################################################ ################################################################################ ## MAIN ######################################################################## ################################################################################ if __name__ == '__main__': ## RANDOM TESTING ############################################################## data, classes = load_data_from_csv('../data/classifier-data.csv', 4, float) data, classes = arr(data), arr(classes) bd1, bc1 = filter_data(data, classes, lambda x, y: y == 2) bd1, bc1 = np.array(bd1), np.array(bc1) def test(trd, trc, ted, tec): print('tc', '\n') tc = TreeClassify(trd, trc) print(tc, '\n') # print(tc.predict(ted), '\n') # print(tc.predict_soft(ted), '\n') # print(tc.confusion(ted, tec), '\n') # print(tc.auc(ted, tec), '\n') # print(tc.roc(ted, tec), '\n') err = tc.err(ted, tec) print(err, '\n') return err
################################################################################ ################################################################################ ## MAIN ######################################################################## ################################################################################ if __name__ == '__main__': ## RANDOM TESTING ############################################################## data,classes = load_data_from_csv('../data/classifier-data.csv', 4, float) data,classes = arr(data), arr(classes) bd1,bc1 = filter_data(data, classes, lambda x,y: y == 2) bd1,bc1 = np.array(bd1), np.array(bc1) def test(trd, trc, ted, tec): print('gbc', '\n') gbc = GaussBayesClassify(trd, trc) print(gbc, '\n') # print(gbc.predict(ted), '\n') # print(gbc.predict_soft(ted), '\n') # print(gbc.confusion(ted, tec), '\n') # print(gbc.auc(ted, tec), '\n') # print(gbc.roc(ted, tec), '\n') err = gbc.err(ted, tec) print(err, '\n') return err