import numpy as np from pyBKT.generate import synthetic_data, random_model_uni from pyBKT.fit import EM_fit from utils import crossvalidate, accuracy, rmse, auc, check_data, data_helper, ktidem_skills import copy np.seterr(divide='ignore', invalid='ignore') num_fit_initializations = 20 seed, folds = 2020, 5 #can customize to anything, keep same seed and # folds over all trials results = {} #create dictionary to store accuracy and rmse results df, skill_list, student_count, data_count, template_count = ktidem_skills.find_skills() for i in range(10): skill_name = skill_list[i] results[skill_name]=[student_count[i], data_count[i], template_count[i]] data = data_helper.convert_data(df, skill_name) check_data.check_data(data) results[skill_name].append((np.sum(data["data"][0]) - len(data["data"][0]))/len(data["data"][0])) print("creating simple model") results[skill_name].append(crossvalidate.crossvalidate(data, folds=folds, seed=seed)[2]) data_multiguess = data_helper.convert_data(df, skill_name, multiguess=True) check_data.check_data(data_multiguess) print("creating kt_idem model") results[skill_name].append(crossvalidate.crossvalidate(data_multiguess, folds=folds, seed=seed)[2]) #print(results) print("Model\tNum Students\tNum Data\tNum Templates\tCorrect Percent\tSimple AUC\tKT_IDEM AUC") for k, v in results.items(): print("%s\t%d\t%d\t%d\t%.5f\t%.5f\t%.5f" % (k, v[0], v[1], v[2], v[3], v[4], v[5]))
import sys sys.path.append('../') import numpy as np from pyBKT.generate import synthetic_data, random_model_uni from pyBKT.fit import EM_fit from utils import data_helper, check_data np.seterr(divide='ignore', invalid='ignore') num_fit_initializations = 20 skill_name = "Table" data = data_helper.convert_data("as.csv", skill_name, multilearn=True) check_data.check_data(data) num_gs = len(data["gs_names"]) num_learns = len(data["resource_names"]) num_fit_initializations = 5 best_likelihood = float("-inf") for i in range(num_fit_initializations): fitmodel = random_model_uni.random_model_uni(num_learns, num_gs) # include this line to randomly set initial param values (fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data) print(log_likelihoods[-1]) if(log_likelihoods[-1] > best_likelihood): best_likelihood = log_likelihoods[-1] best_model = fitmodel # compare the fit model to the true model print('') print('Trained model for %s skill given %d learning rates, %d guess/slip rate' % (skill_name, num_learns, num_gs)) print('\t\tlearned')
import sys sys.path.append('../') import numpy as np from pyBKT.generate import synthetic_data, random_model_uni from pyBKT.fit import EM_fit from utils import crossvalidate, accuracy, rmse, auc, check_data, data_helper import copy np.seterr(divide='ignore', invalid='ignore') num_fit_initializations = 20 skill_name = "Box and Whisker" seed, folds = 2020, 5 #can customize to anything, keep same seed and # folds over all trials results = {} #create dictionary to store accuracy and rmse results #data! print("starting simple model data collection") data, df = data_helper.convert_data("as.csv", skill_name, return_df=True)#save dataframe for further trials check_data.check_data(data) print("creating simple model") results["Simple Model"] = crossvalidate.crossvalidate(data, folds=folds, seed=seed) print("starting majority class calculation") majority = 0 if np.sum(data["data"][0]) - len(data["data"][0]) > len(data["data"][0]) - (np.sum(data["data"][0]) - len(data["data"][0])): majority = 1 pred_values = np.zeros((len(data["data"][0]),)) pred_values.fill(majority) true_values = data["data"][0].tolist() pred_values = pred_values.tolist() results["Majority Class"] = (accuracy.compute_acc(true_values,pred_values), rmse.compute_rmse(true_values,pred_values), auc.compute_auc(true_values, pred_values))
import sys sys.path.append('../') import numpy as np from pyBKT.generate import synthetic_data, random_model_uni from pyBKT.fit import EM_fit from utils import data_helper, check_data np.seterr(divide='ignore', invalid='ignore') skill_name = "Box and Whisker" #data! data = data_helper.convert_data("as.csv", skill_name, multiprior=True) check_data.check_data(data) num_learns = len(data["resource_names"]) num_gs = len(data["gs_names"]) num_fit_initializations = 5 best_likelihood = float("-inf") for i in range(num_fit_initializations): fitmodel = random_model_uni.random_model_uni( num_learns, num_gs) # include this line to randomly set initial param values #set prior to 0 fitmodel["pi_0"] = np.array([[1], [0]]) fitmodel["prior"] = 0 (fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data) if (log_likelihoods[-1] > best_likelihood): best_likelihood = log_likelihoods[-1] best_model = fitmodel
import sys sys.path.append('../') import numpy as np from pyBKT.generate import synthetic_data, random_model_uni from pyBKT.fit import EM_fit from utils import data_helper, check_data from copy import deepcopy np.seterr(divide='ignore', invalid='ignore') num_fit_initializations = 20 #data! data = data_helper.convert_data("as.csv", "Box and Whisker") check_data.check_data(data) num_learns = len(data["resource_names"]) num_gs = len(data["gs_names"]) num_fit_initializations = 5 best_likelihood = float("-inf") for i in range(num_fit_initializations): fitmodel = random_model_uni.random_model_uni( num_learns, num_gs) # include this line to randomly set initial param values (fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data) if (log_likelihoods[-1] > best_likelihood): best_likelihood = log_likelihoods[-1] best_model = fitmodel print('') print('Trained model given %d learning rates, %d guess/slip rate' % (num_learns, num_gs))
df, skill_list, student_count, data_count, template_count = ktidem_skills_ct.find_skills() ct_default={'order_id': 'Row', 'skill_name': 'KC(SubSkills)', 'correct': 'Correct First Attempt', 'user_id': 'Anon Student Id', 'multiguess': 'Problem Name', } for i in range(12): skill_name = skill_list[i] results[skill_name]=[student_count[i], data_count[i], template_count[i]] data = data_helper.convert_data(df, skill_name, defaults=ct_default) check_data.check_data(data) results[skill_name].append((np.sum(data["data"][0]) - len(data["data"][0]))/len(data["data"][0])) print("creating simple model") results[skill_name].append(crossvalidate.crossvalidate(data, folds=folds, seed=seed)[2]) data_multiguess = data_helper.convert_data(df, skill_name, defaults=ct_default, multiguess=True) check_data.check_data(data_multiguess) print("creating kt_idem model") results[skill_name].append(crossvalidate.crossvalidate(data_multiguess, folds=folds, seed=seed)[2]) #print(results) print("Model\tNum Students\tNum Data\tNum Problems\tCorrect Percent\tSimple AUC\tKT_IDEM AUC") for k, v in results.items(): print("%s\t%d\t%d\t%d\t%.5f\t%.5f\t%.5f" % (k, v[0], v[1], v[2], v[3], v[4], v[5]))
#may take a while to run since model has to account for all existing pairs (on the order of (# unique questions)^2) import sys sys.path.append('../') import numpy as np from pyBKT.generate import synthetic_data, random_model_uni from pyBKT.fit import EM_fit from utils import data_helper, check_data np.seterr(divide='ignore', invalid='ignore') skill_name = "Finding the intersection, Mixed" #data! data = data_helper.convert_data("ct.csv", skill_name, multipair=True) check_data.check_data(data) num_learns = len(data["resource_names"]) num_gs = len(data["gs_names"]) num_fit_initializations = 5 best_likelihood = float("-inf") for i in range(num_fit_initializations): fitmodel = random_model_uni.random_model_uni(num_learns, num_gs) # include this line to randomly set initial param values (fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data) if(log_likelihoods[-1] > best_likelihood): best_likelihood = log_likelihoods[-1] best_model = fitmodel print('') print('Trained model for %s skill given %d learning rates, %d guess/slip rate' % (skill_name, num_learns, num_gs)) print('\t\tlearned') print('prior\t\t%.4f' % (best_model["pi_0"][1][0])) for key, value in data["resource_names"].items():
import sys sys.path.append('../') import numpy as np from pyBKT.generate import synthetic_data, random_model_uni from pyBKT.fit import EM_fit from utils import crossvalidate, data_helper, check_data from copy import deepcopy np.seterr(divide='ignore', invalid='ignore') num_fit_initializations = 20 skill_name = "Range" #data! data = data_helper.convert_data("as.csv", skill_name) check_data.check_data(data) #specifying verbose allows data from all iterations of crossvalidation to be printed out crossvalidate.crossvalidate(data, verbose=True)