l2 += [0.0 for i in range(Q.shape[1])] l2 += [0.0 for i in range(O.shape[1])] bounds = [(None, None) for i in range(S.shape[1])] bounds += [(None, None) for i in range(Q.shape[1])] bounds += [(0, None) for i in range(O.shape[1])] X = X.toarray() X2 = Q.toarray() if args.model == "AFM": m = CustomLogistic(bounds=bounds, l2=l2, fit_intercept=False) m.fit(X, y) yHat = 1 - m.predict_proba(X) elif args.model == "AFM+S": m = BoundedLogistic(first_bounds=bounds, first_l2=l2) m.fit(X, X2, y) yHat = 1 - m.predict_proba(X, X2) else: raise ValueError("Model type not supported") headers = original_headers + ["Predicted Error Rate (%s)" % kc_model] outfilePath = args.workingDir + "/output.txt" outfile = open(outfilePath, 'w') outfile.write("\t".join(headers) + "\n") cntRowMissOpp = 0 for i, row in enumerate(original_step_data): oppCell = row[len(row) - 1] if oppCell is None or oppCell == "": cntRowMissOpp += 1 d = row + [""]
def afms(kcs, opps, actuals, stu, student_label, item_label, nfolds=3, seed=None): """ Executes AFM+S on the provided data and returns model fits and parameter estimates """ sv = DictVectorizer() qv = DictVectorizer() ov = DictVectorizer() S = sv.fit_transform(stu) Q = qv.fit_transform(kcs) O = ov.fit_transform(opps) X = hstack((S, Q, O)) y = np.array(actuals) l2 = [1.0 for i in range(S.shape[1])] l2 += [0.0 for i in range(Q.shape[1])] l2 += [0.0 for i in range(O.shape[1])] bounds = [(None, None) for i in range(S.shape[1])] bounds += [(None, None) for i in range(Q.shape[1])] bounds += [(0, None) for i in range(O.shape[1])] X = X.toarray() X2 = Q.toarray() model = BoundedLogistic(first_bounds=bounds, first_l2=l2) model.fit(X, X2, y) coef_s = model.coef1_[0:S.shape[1]] coef_s = [[k, v, invlogit(v)] for k, v in sv.inverse_transform([coef_s])[0].items()] coef_q = model.coef1_[S.shape[1]:S.shape[1] + Q.shape[1]] coef_qint = qv.inverse_transform([coef_q])[0] coef_o = model.coef1_[S.shape[1] + Q.shape[1]:S.shape[1] + Q.shape[1] + O.shape[1]] coef_qslope = ov.inverse_transform([coef_o])[0] coef_qslip = qv.inverse_transform([model.coef2_])[0] kc_vals = [] all_kcs = set(coef_qint).union(set(coef_qslope)).union(set(coef_qslip)) for kc in all_kcs: kc_vals.append([ kc, coef_qint.setdefault(kc, 0.0), invlogit(coef_qint.setdefault(kc, 0.0)), coef_qslope.setdefault(kc, 0.0), coef_qslip.setdefault(kc, 0.0) ]) cvs = [ KFold(len(y), n_folds=nfolds, shuffle=True, random_state=seed), StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed), LabelKFold(student_label, n_folds=nfolds), LabelKFold(item_label, n_folds=nfolds) ] # scores_header = [] scores = [] for cv in cvs: score = [] for train_index, test_index in cv: X_train, X_test = X[train_index], X[test_index] X2_train, X2_test = X2[train_index], X2[test_index] y_train, y_test = y[train_index], y[test_index] model.fit(X_train, X2_train, y_train) score.append(model.mean_squared_error(X_test, X2_test, y_test)) # scores_header.append(cv_name) scores.append(np.mean(np.sqrt(score))) return scores, kc_vals, coef_s
l2 += [0.0 for i in range(Q.shape[1])] l2 += [0.0 for i in range(O.shape[1])] # Bound the learning rates to be positive bounds = [(None, None) for i in range(S.shape[1])] bounds += [(None, None) for i in range(Q.shape[1])] bounds += [(0, None) for i in range(O.shape[1])] X = X.toarray() X2 = Q.toarray() afm = CustomLogistic(bounds=bounds, l2=l2, fit_intercept=False) afm.fit(X, y) yAFM = afm.predict_proba(X) afms = BoundedLogistic(first_bounds=bounds, first_l2=l2) afms.fit(X, X2, y) yAFMS = afms.predict_proba(X, X2) #plotkcs = ['All Knowledge Components'] plotkcs = list(set([kc for row in kcs for kc in row])) + ['All Knowledge Components'] #f, subplots = plt.subplots(len(plotkcs)) for plot_id, plotkc in enumerate(plotkcs): plt.figure(plot_id+1) #if len(plotkcs) > 1: # p = subplots[plot_id] #else: # p = subplots