Example #1
0
    l2 += [0.0 for i in range(Q.shape[1])]
    l2 += [0.0 for i in range(O.shape[1])]

    bounds = [(None, None) for i in range(S.shape[1])]
    bounds += [(None, None) for i in range(Q.shape[1])]
    bounds += [(0, None) for i in range(O.shape[1])]

    X = X.toarray()
    X2 = Q.toarray()

    if args.model == "AFM":
        m = CustomLogistic(bounds=bounds, l2=l2, fit_intercept=False)
        m.fit(X, y)
        yHat = 1 - m.predict_proba(X)
    elif args.model == "AFM+S":
        m = BoundedLogistic(first_bounds=bounds, first_l2=l2)
        m.fit(X, X2, y)
        yHat = 1 - m.predict_proba(X, X2)
    else:
        raise ValueError("Model type not supported")

    headers = original_headers + ["Predicted Error Rate (%s)" % kc_model]
    outfilePath = args.workingDir + "/output.txt"
    outfile = open(outfilePath, 'w')
    outfile.write("\t".join(headers) + "\n")
    cntRowMissOpp = 0
    for i, row in enumerate(original_step_data):
        oppCell = row[len(row) - 1]
        if oppCell is None or oppCell == "":
            cntRowMissOpp += 1
            d = row + [""]
Example #2
0
def afms(kcs,
         opps,
         actuals,
         stu,
         student_label,
         item_label,
         nfolds=3,
         seed=None):
    """
    Executes AFM+S on the provided data and returns model fits and parameter estimates
    """
    sv = DictVectorizer()
    qv = DictVectorizer()
    ov = DictVectorizer()

    S = sv.fit_transform(stu)
    Q = qv.fit_transform(kcs)
    O = ov.fit_transform(opps)

    X = hstack((S, Q, O))
    y = np.array(actuals)

    l2 = [1.0 for i in range(S.shape[1])]
    l2 += [0.0 for i in range(Q.shape[1])]
    l2 += [0.0 for i in range(O.shape[1])]

    bounds = [(None, None) for i in range(S.shape[1])]
    bounds += [(None, None) for i in range(Q.shape[1])]
    bounds += [(0, None) for i in range(O.shape[1])]

    X = X.toarray()
    X2 = Q.toarray()

    model = BoundedLogistic(first_bounds=bounds, first_l2=l2)
    model.fit(X, X2, y)
    coef_s = model.coef1_[0:S.shape[1]]
    coef_s = [[k, v, invlogit(v)]
              for k, v in sv.inverse_transform([coef_s])[0].items()]
    coef_q = model.coef1_[S.shape[1]:S.shape[1] + Q.shape[1]]
    coef_qint = qv.inverse_transform([coef_q])[0]
    coef_o = model.coef1_[S.shape[1] + Q.shape[1]:S.shape[1] + Q.shape[1] +
                          O.shape[1]]
    coef_qslope = ov.inverse_transform([coef_o])[0]
    coef_qslip = qv.inverse_transform([model.coef2_])[0]

    kc_vals = []
    all_kcs = set(coef_qint).union(set(coef_qslope)).union(set(coef_qslip))
    for kc in all_kcs:
        kc_vals.append([
            kc,
            coef_qint.setdefault(kc, 0.0),
            invlogit(coef_qint.setdefault(kc, 0.0)),
            coef_qslope.setdefault(kc, 0.0),
            coef_qslip.setdefault(kc, 0.0)
        ])

    cvs = [
        KFold(len(y), n_folds=nfolds, shuffle=True, random_state=seed),
        StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed),
        LabelKFold(student_label, n_folds=nfolds),
        LabelKFold(item_label, n_folds=nfolds)
    ]

    # scores_header = []
    scores = []
    for cv in cvs:
        score = []
        for train_index, test_index in cv:
            X_train, X_test = X[train_index], X[test_index]
            X2_train, X2_test = X2[train_index], X2[test_index]
            y_train, y_test = y[train_index], y[test_index]
            model.fit(X_train, X2_train, y_train)
            score.append(model.mean_squared_error(X_test, X2_test, y_test))
        # scores_header.append(cv_name)
        scores.append(np.mean(np.sqrt(score)))

    return scores, kc_vals, coef_s
    l2 += [0.0 for i in range(Q.shape[1])]
    l2 += [0.0 for i in range(O.shape[1])]

    # Bound the learning rates to be positive
    bounds = [(None, None) for i in range(S.shape[1])]
    bounds += [(None, None) for i in range(Q.shape[1])]
    bounds += [(0, None) for i in range(O.shape[1])]

    X = X.toarray()
    X2 = Q.toarray()

    afm = CustomLogistic(bounds=bounds, l2=l2, fit_intercept=False)
    afm.fit(X, y)
    yAFM = afm.predict_proba(X)

    afms = BoundedLogistic(first_bounds=bounds, first_l2=l2)
    afms.fit(X, X2, y)
    yAFMS = afms.predict_proba(X, X2)

    #plotkcs = ['All Knowledge Components']
    plotkcs = list(set([kc for row in kcs for kc in row])) + ['All Knowledge Components']

    #f, subplots = plt.subplots(len(plotkcs))
    for plot_id, plotkc in enumerate(plotkcs):

        plt.figure(plot_id+1)

        #if len(plotkcs) > 1:
        #    p = subplots[plot_id]
        #else:
        #    p = subplots
Example #4
0
    l2 += [0.0 for i in range(Q.shape[1])] 
    l2 += [0.0 for i in range(O.shape[1])]

    # Bound the learning rates to be positive
    bounds = [(None, None) for i in range(S.shape[1])] 
    bounds += [(None, None) for i in range(Q.shape[1])] 
    bounds += [(0, None) for i in range(O.shape[1])]
    
    X = X.toarray()
    X2 = Q.toarray()

    afm = CustomLogistic(bounds=bounds, l2=l2, fit_intercept=False)
    afm.fit(X, y)
    yAFM = afm.predict_proba(X)

    afms = BoundedLogistic(first_bounds=bounds, first_l2=l2)
    afms.fit(X, X2, y)
    yAFMS = afms.predict_proba(X, X2)

    #plotkcs = ['All Knowledge Components']
    plotkcs = list(set([kc for row in kcs for kc in row])) + ['All Knowledge Components']

    #f, subplots = plt.subplots(len(plotkcs))
    for plot_id, plotkc in enumerate(plotkcs):

        plt.figure(plot_id+1)

        #if len(plotkcs) > 1:
        #    p = subplots[plot_id]
        #else:
        #    p = subplots