Example #1
0
def test_sag():
    for clf in (
        SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0),
        SAGAClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0),
        PySAGClassifier(eta=1e-3, max_iter=20, random_state=0)
            ):
        clf.fit(X_bin, y_bin)
        assert not hasattr(clf, 'predict_proba')
        assert_equal(clf.score(X_bin, y_bin), 1.0)
        assert_equal(list(clf.classes_), [-1, 1])
Example #2
0
def saga_cv(which, alphas, l1_ratio):

    if which == 'cdcp':
        n_folds = 3
        path = os.path.join("data", "process", "erule", "folds", "{}", "{}")
    elif which == 'ukp':
        n_folds = 5
        path = os.path.join("data", "process", "ukp-essays", "folds", "{}",
                            "{}")
    else:
        raise ValueError

    clf_link = SAGAClassifier(loss='smooth_hinge',
                              penalty='l1',
                              tol=1e-4,
                              max_iter=100,
                              random_state=0,
                              verbose=0)
    clf_prop = clone(clf_link)

    link_scores = np.zeros((n_folds, len(alphas)))
    prop_scores = np.zeros_like(link_scores)

    for k in range(n_folds):
        X_tr_link, y_tr_link = load_csr(path.format(k, 'train.npz'),
                                        return_y=True)
        X_te_link, y_te_link = load_csr(path.format(k, 'val.npz'),
                                        return_y=True)

        X_tr_prop, y_tr_prop = load_csr(path.format(k, 'prop-train.npz'),
                                        return_y=True)
        X_te_prop, y_te_prop = load_csr(path.format(k, 'prop-val.npz'),
                                        return_y=True)

        le = LabelEncoder()
        y_tr_prop_enc = le.fit_transform(y_tr_prop)
        y_te_prop_enc = le.transform(y_te_prop)

        link_sw = compute_sample_weight('balanced', y_tr_link)

        for j, alpha in enumerate(alphas):

            beta = alpha * l1_ratio
            alpha *= 1 - l1_ratio
            clf_link.set_params(alpha=alpha, beta=beta)
            clf_prop.set_params(alpha=alpha, beta=beta)

            clf_link.fit(X_tr_link, y_tr_link, sample_weight=link_sw)
            y_pred_link = clf_link.predict(X_te_link)

            clf_prop.fit(X_tr_prop, y_tr_prop_enc)
            y_pred_prop = clf_prop.predict(X_te_prop)

            with warnings.catch_warnings() as w:
                warnings.simplefilter('ignore')
                link_f = f1_score(y_te_link, y_pred_link, average='binary')
                prop_f = f1_score(y_te_prop_enc, y_pred_prop, average='macro')

            link_scores[k, j] = link_f
            prop_scores[k, j] = prop_f

    return link_scores, prop_scores