Beispiel #1
0
    # AFM
    X = hstack((S, Q, O))
    y = np.array(y)
    l2 = [1.0 for i in range(S.shape[1])]
    l2 += [0.0 for i in range(Q.shape[1])]
    l2 += [0.0 for i in range(O.shape[1])]

    bounds = [(None, None) for i in range(S.shape[1])]
    bounds += [(None, None) for i in range(Q.shape[1])]
    bounds += [(0, None) for i in range(O.shape[1])]

    X = X.toarray()
    X2 = Q.toarray()

    if args.model == "AFM":
        m = CustomLogistic(bounds=bounds, l2=l2, fit_intercept=False)
        m.fit(X, y)
        yHat = 1 - m.predict_proba(X)
    elif args.model == "AFM+S":
        m = BoundedLogistic(first_bounds=bounds, first_l2=l2)
        m.fit(X, X2, y)
        yHat = 1 - m.predict_proba(X, X2)
    else:
        raise ValueError("Model type not supported")

    headers = original_headers + ["Predicted Error Rate (%s)" % kc_model]
    outfilePath = args.workingDir + "/output.txt"
    outfile = open(outfilePath, 'w')
    outfile.write("\t".join(headers) + "\n")
    cntRowMissOpp = 0
    for i, row in enumerate(original_step_data):
Beispiel #2
0
    # AFM
    X = hstack((S, Q, O))
    y = np.array(y)
    l2 = [1.0 for i in range(S.shape[1])]
    l2 += [0.0 for i in range(Q.shape[1])]
    l2 += [0.0 for i in range(O.shape[1])]

    bounds = [(None, None) for i in range(S.shape[1])]
    bounds += [(None, None) for i in range(Q.shape[1])]
    bounds += [(0, None) for i in range(O.shape[1])]

    X = X.toarray()
    X2 = Q.toarray()

    if args.m == "AFM":
        m = CustomLogistic(bounds=bounds, l2=l2, fit_intercept=False)
        m.fit(X, y)
        coef_s = m.coef_[0:S.shape[1]]
        coef_s = [[k, v, invlogit(v)]
                  for k, v in sv.inverse_transform([coef_s])[0].items()]
        coef_q = m.coef_[S.shape[1]:S.shape[1] + Q.shape[1]]
        coef_qint = qv.inverse_transform([coef_q])[0]
        coef_o = m.coef_[S.shape[1] + Q.shape[1]:S.shape[1] + Q.shape[1] +
                         O.shape[1]]
        coef_qslope = ov.inverse_transform([coef_o])[0]

        kc_vals = []
        all_kcs = set(coef_qint).union(set(coef_qslope))
        for kc in all_kcs:
            kc_vals.append([
                kc,
Beispiel #3
0
def afm(kcs,
        opps,
        actuals,
        stu,
        student_label,
        item_label,
        nfolds=3,
        seed=None):
    """
    Executes AFM on the provided data and returns model fits and parameter estimates
    """
    sv = DictVectorizer()
    qv = DictVectorizer()
    ov = DictVectorizer()

    S = sv.fit_transform(stu)
    Q = qv.fit_transform(kcs)
    O = ov.fit_transform(opps)

    X = hstack((S, Q, O))
    y = np.array(actuals)

    l2 = [1.0 for i in range(S.shape[1])]
    l2 += [0.0 for i in range(Q.shape[1])]
    l2 += [0.0 for i in range(O.shape[1])]

    bounds = [(None, None) for i in range(S.shape[1])]
    bounds += [(None, None) for i in range(Q.shape[1])]
    bounds += [(0, None) for i in range(O.shape[1])]

    X = X.toarray()
    X2 = Q.toarray()

    model = CustomLogistic(bounds=bounds, l2=l2, fit_intercept=False)
    model.fit(X, y)

    coef_s = model.coef_[0:S.shape[1]]
    coef_s = [[k, v, invlogit(v)]
              for k, v in sv.inverse_transform([coef_s])[0].items()]
    coef_q = model.coef_[S.shape[1]:S.shape[1] + Q.shape[1]]
    coef_qint = qv.inverse_transform([coef_q])[0]
    coef_o = model.coef_[S.shape[1] + Q.shape[1]:S.shape[1] + Q.shape[1] +
                         O.shape[1]]
    coef_qslope = ov.inverse_transform([coef_o])[0]

    kc_vals = []
    all_kcs = set(coef_qint).union(set(coef_qslope))

    for kc in all_kcs:
        kc_vals.append([
            kc,
            coef_qint.setdefault(kc, 0.0),
            invlogit(coef_qint.setdefault(kc, 0.0)),
            coef_qslope.setdefault(kc, 0.0)
        ])

    cvs = [
        KFold(len(y), n_folds=nfolds, shuffle=True, random_state=seed),
        StratifiedKFold(y, n_folds=nfolds, shuffle=True, random_state=seed),
        LabelKFold(student_label, n_folds=nfolds),
        LabelKFold(item_label, n_folds=nfolds)
    ]

    scores = []
    for cv in cvs:
        score = []
        for train_index, test_index in cv:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            model.fit(X_train, y_train)
            score.append(model.mean_squared_error(X_test, y_test))
        scores.append(np.mean(np.sqrt(score)))

    return scores, kc_vals, coef_s
    y = np.array(y)

    # Regularize the student intercepts
    l2 = [1.0 for i in range(S.shape[1])]
    l2 += [0.0 for i in range(Q.shape[1])]
    l2 += [0.0 for i in range(O.shape[1])]

    # Bound the learning rates to be positive
    bounds = [(None, None) for i in range(S.shape[1])]
    bounds += [(None, None) for i in range(Q.shape[1])]
    bounds += [(0, None) for i in range(O.shape[1])]

    X = X.toarray()
    X2 = Q.toarray()

    afm = CustomLogistic(bounds=bounds, l2=l2, fit_intercept=False)
    afm.fit(X, y)
    yAFM = afm.predict_proba(X)

    afms = BoundedLogistic(first_bounds=bounds, first_l2=l2)
    afms.fit(X, X2, y)
    yAFMS = afms.predict_proba(X, X2)

    #plotkcs = ['All Knowledge Components']
    plotkcs = list(set([kc for row in kcs for kc in row])) + ['All Knowledge Components']

    #f, subplots = plt.subplots(len(plotkcs))
    for plot_id, plotkc in enumerate(plotkcs):

        plt.figure(plot_id+1)
Beispiel #5
0
    y = np.array(y)

    # Regularize the student intercepts
    l2 = [1.0 for i in range(S.shape[1])] 
    l2 += [0.0 for i in range(Q.shape[1])] 
    l2 += [0.0 for i in range(O.shape[1])]

    # Bound the learning rates to be positive
    bounds = [(None, None) for i in range(S.shape[1])] 
    bounds += [(None, None) for i in range(Q.shape[1])] 
    bounds += [(0, None) for i in range(O.shape[1])]
    
    X = X.toarray()
    X2 = Q.toarray()

    afm = CustomLogistic(bounds=bounds, l2=l2, fit_intercept=False)
    afm.fit(X, y)
    yAFM = afm.predict_proba(X)

    afms = BoundedLogistic(first_bounds=bounds, first_l2=l2)
    afms.fit(X, X2, y)
    yAFMS = afms.predict_proba(X, X2)

    #plotkcs = ['All Knowledge Components']
    plotkcs = list(set([kc for row in kcs for kc in row])) + ['All Knowledge Components']

    #f, subplots = plt.subplots(len(plotkcs))
    for plot_id, plotkc in enumerate(plotkcs):

        plt.figure(plot_id+1)