Exemple #1
0
    def _fit(self, data, skill, forgets, preload=False):
        """ Helper function for fitting data. """
        num_learns = len(data["resource_names"])
        num_gs = len(data["gs_names"])
        self._check_manual_param_init(num_learns, num_gs, skill)
        num_fit_initializations = self.num_fits
        best_likelihood = float("-inf")
        best_model = None

        for i in range(num_fit_initializations):
            fitmodel = random_model_uni.random_model_uni(num_learns, num_gs)
            if forgets:
                fitmodel["forgets"] = np.random.uniform(
                    size=fitmodel["forgets"].shape)
            if self.manual_param_init and skill in self.fit_model:
                for var in self.fit_model[skill]:
                    if var in fitmodel:
                        fitmodel[var] = self.fit_model[skill][var]
            if not preload:
                fitmodel, log_likelihoods = EM_fit.EM_fit(
                    fitmodel, data, parallel=self.parallel)
                if log_likelihoods[-1] > best_likelihood:
                    best_likelihood = log_likelihoods[-1]
                    best_model = fitmodel
            else:
                best_model = fitmodel
        fit_model = best_model
        fit_model["learns"] = fit_model["As"][:, 1, 0]
        fit_model["forgets"] = fit_model["As"][:, 0, 1]
        fit_model["prior"] = fit_model["pi_0"][1][0]
        fit_model["resource_names"] = data["resource_names"]
        fit_model["gs_names"] = data["gs_names"]
        return fit_model
Exemple #2
0
truemodel["prior"] = truemodel["pi_0"][1][0]

truemodel["guesses"] = np.full(num_subparts, 0.1, dtype=np.float_)
truemodel["slips"] = np.full(num_subparts, 0.03, dtype=np.float_)

#data!
print("generating data...")
data = synthetic_data.synthetic_data(truemodel, observation_sequence_lengths)

#fit models, starting with random initializations
print('fitting! each dot is a new EM initialization')

best_likelihood = float("-inf")

fitmodel = deepcopy(truemodel) # NOTE: include this line to initialize at the truth
(fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data)
if(log_likelihoods[-1] > best_likelihood):
    best_likelihood = log_likelihoods[-1]
    best_model = fitmodel

# compare the fit model to the true model

print('')
print('\ttruth\tlearned')
for r in range(num_resources):
    print('learn%d\t%.4f\t%.4f' % (r+1, truemodel['As'][r, 1, 0].squeeze(), best_model['As'][r, 1, 0].squeeze()))
for r in range(num_resources):
    print('forget%d\t%.4f\t%.4f' % (r+1, truemodel['As'][r, 0, 1].squeeze(), best_model['As'][r, 0, 1].squeeze()))

for s in range(num_subparts):
    print('guess%d\t%.4f\t%.4f' % (s+1, truemodel['guesses'][s], best_model['guesses'][s]))
Exemple #3
0
print("All data okay")

total_auc = 0
total_trials = 0
all_true = []
all_pred = []
for skill in range(skill_count):
    num_fit_initializations = 5
    best_likelihood = float("-inf")
    if len(Data[skill]["resources"]) < 1:
        print("No data for skill %s" % skill)
        continue
    else:
        for i in range(num_fit_initializations):
            fitmodel = random_model_uni.random_model_uni(1, 1)
            (fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, Data[skill])
            if (log_likelihoods[-1] > best_likelihood):
                best_likelihood = log_likelihoods[-1]
                best_model = fitmodel

        #print(" ")
        #print('\tlearned')
        #print('prior\t%.4f' % (best_model["pi_0"][1][0]))
        #for r in range(1):
        #    print('learn%d\t%.4f' % (r+1, best_model['As'][r, 1, 0].squeeze()))
        #for r in range(1):
        #    print('forget%d\t%.4f' % (r+1, best_model['As'][r, 0, 1].squeeze()))

        #for s in range(1):
        #    print('guess%d\t%.4f' % (s+1, best_model['guesses'][s]))
        #for s in range(1):
def crossvalidate(data, folds=5, verbose=False, seed=0, return_arrays=False):

    if "resource_names" in data:
        num_learns = len(data["resource_names"])
    else:
        num_learns = 1

    if "gs_names" in data:
        num_gs = len(data["gs_names"])
    else:
        num_gs = 1

    total = 0
    acc = 0
    area_under_curve = 0
    num_fit_initializations = 20
    split_size = (len(data["starts"]) // folds)
    #create random permutation to act as indices for folds for crossvalidation
    shuffle = np.random.RandomState(seed=seed).permutation(len(data["starts"]))
    all_true, all_pred = [], []

    # crossvalidation on students which are identified by the starts array
    for iteration in range(folds):
        #create training/test data based on random permutation from earlier
        train = np.concatenate(
            (shuffle[0:iteration * split_size],
             shuffle[(iteration + 1) * split_size:len(data["starts"])]))
        test = shuffle[iteration * split_size:(iteration + 1) * split_size]
        training_data = fix_data(data, train)
        num_fit_initializations = 5
        best_likelihood = float("-inf")

        for i in range(num_fit_initializations):
            fitmodel = random_model_uni.random_model_uni(
                num_learns, num_gs
            )  # include this line to randomly set initial param values
            (fitmodel,
             log_likelihoods) = EM_fit.EM_fit(fitmodel, training_data)
            if (log_likelihoods[-1] > best_likelihood):
                best_likelihood = log_likelihoods[-1]
                best_model = fitmodel

        if verbose:
            print(" ")
            print('Iteration %d' % (iteration))
            print('\tlearned')
            print('prior\t%.4f' % (best_model["pi_0"][1][0]))
            for r in range(num_learns):
                print('learn%d\t%.4f' %
                      (r + 1, best_model['As'][r, 1, 0].squeeze()))
            for r in range(num_learns):
                print('forget%d\t%.4f' %
                      (r + 1, best_model['As'][r, 0, 1].squeeze()))

            for s in range(num_gs):
                print('guess%d\t%.4f' % (s + 1, best_model['guesses'][s]))
            for s in range(num_gs):
                print('slip%d\t%.4f' % (s + 1, best_model['slips'][s]))

        test_data = fix_data(data, test)

        # run model predictions from training data on test data
        (correct_predictions,
         state_predictions) = predict_onestep.run(best_model, test_data)

        flat_true_values = np.zeros((len(test_data["data"][0]), ),
                                    dtype=np.intc)
        for i in range(len(test_data["data"])):
            for j in range(len(test_data["data"][0])):
                if test_data["data"][i][j] != 0:
                    flat_true_values[j] = test_data["data"][i][j]
        flat_true_values = flat_true_values.tolist()

        # print(len(flat_true_values))
        # print(len(correct_predictions))
        # print(auc.compute_auc(flat_true_values, correct_predictions))
        all_true.extend(flat_true_values)
        all_pred.extend(correct_predictions)

    if return_arrays:
        return (all_true, all_pred)

# print(len(all_true))
    print(len(all_pred))
    total += rmse.compute_rmse(all_true, all_pred)
    acc += accuracy.compute_acc(all_true, all_pred)
    area_under_curve += auc.compute_auc(all_true, all_pred)
    if verbose:
        print("Average RMSE: ", total)
        print("Average Accuracy: ", acc)
        print("Average AUC: ", area_under_curve)
    return (acc, total, area_under_curve)