Beispiel #1
0
def test_l1_regularized_saga():
    beta = 1e-3
    pysaga = PySAGAClassifier(eta=1e-3, alpha=0.0, beta=beta, max_iter=10, penalty="l1", random_state=0)
    saga = SAGAClassifier(eta=1e-3, alpha=0.0, beta=beta, max_iter=10, penalty="l1", random_state=0)
    pysaga.fit(X_bin, y_bin)
    saga.fit(X_bin, y_bin)
    np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
Beispiel #2
0
def test_no_reg_saga():
    # Using no regularisation at all
    pysaga = PySAGAClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, penalty=None, random_state=0)
    saga = SAGAClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=10, penalty=None, random_state=0)

    pysaga.fit(X_bin, y_bin)
    saga.fit(X_bin, y_bin)
    np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
Beispiel #3
0
def test_elastic_saga():
    ab = [1e-5, 1e-2, 1e-1, 1.0]
    for alpha, beta in zip(ab, ab):
        pysaga = PySAGAClassifier(eta=1e-3, alpha=alpha, beta=beta, max_iter=1, penalty="l1", random_state=0)
        saga = SAGAClassifier(eta=1e-3, alpha=alpha, beta=beta, max_iter=1, penalty="l1", random_state=0, tol=0)
        pysaga.fit(X_bin, y_bin)
        saga.fit(X_bin, y_bin)
        np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
Beispiel #4
0
def test_saga_score():
    X, y = make_classification(1000, random_state=0)

    pysaga = PySAGAClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=1, penalty=None, random_state=0)
    saga = SAGAClassifier(eta=1e-3, alpha=0.0, beta=0.0, max_iter=1, penalty=None, random_state=0)

    pysaga.fit(X, y)
    saga.fit(X, y)
    assert_equal(pysaga.score(X, y), saga.score(X, y))
Beispiel #5
0
def test_enet_regularized_saga():
    X_sparse = sparse.rand(100, 50, density=.5, random_state=0).tocsr()
    y_sparse = np.random.randint(0, high=2, size=100)

    eta = 1e-3
    for (X, y) in ((X_bin, y_bin), (X_sparse, y_sparse)):
        for alpha in np.logspace(-3, 0, 5):
            for beta in np.logspace(-3, 3, 5):
                pysaga = PySAGAClassifier(
                    eta=eta, alpha=alpha, beta=beta,
                    max_iter=5, penalty='l1', random_state=0)
                saga = SAGAClassifier(
                    eta=eta, alpha=alpha, beta=beta, max_iter=5,
                    penalty='l1', random_state=0, tol=1e-24)

                pysaga.fit(X, y)
                saga.fit(X, y)
                np.testing.assert_array_almost_equal(pysaga.coef_, saga.coef_)
def fit_single(solver, X, y, penalty='l2', single_target=True, C=1,
               max_iter=10, skip_slow=False):
    if skip_slow and solver == 'lightning' and penalty == 'l1':
        print('skip_slowping l1 logistic regression with solver lightning.')
        return

    print('Solving %s logistic regression with penalty %s, solver %s.'
          % ('binary' if single_target else 'multinomial',
             penalty, solver))

    if solver == 'lightning':
        from lightning.classification import SAGAClassifier

    if single_target or solver not in ['sag', 'saga']:
        multi_class = 'ovr'
    else:
        multi_class = 'multinomial'

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42,
                                                        stratify=y)
    n_samples = X_train.shape[0]
    n_classes = np.unique(y_train).shape[0]
    test_scores = [1]
    train_scores = [1]
    accuracies = [1 / n_classes]
    times = [0]

    if penalty == 'l2':
        alpha = 1. / (C * n_samples)
        beta = 0
        lightning_penalty = None
    else:
        alpha = 0.
        beta = 1. / (C * n_samples)
        lightning_penalty = 'l1'

    for this_max_iter in range(1, max_iter + 1, 2):
        print('[%s, %s, %s] Max iter: %s' %
              ('binary' if single_target else 'multinomial',
               penalty, solver, this_max_iter))
        if solver == 'lightning':
            lr = SAGAClassifier(loss='log', alpha=alpha, beta=beta,
                                penalty=lightning_penalty,
                                tol=-1, max_iter=this_max_iter)
        else:
            lr = LogisticRegression(solver=solver,
                                    multi_class=multi_class,
                                    C=C,
                                    penalty=penalty,
                                    fit_intercept=False, tol=1e-24,
                                    max_iter=this_max_iter,
                                    random_state=42,
                                    )
        t0 = time.clock()
        lr.fit(X_train, y_train)
        train_time = time.clock() - t0

        scores = []
        for (X, y) in [(X_train, y_train), (X_test, y_test)]:
            try:
                y_pred = lr.predict_proba(X)
            except NotImplementedError:
                # Lightning predict_proba is not implemented for n_classes > 2
                y_pred = _predict_proba(lr, X)
            score = log_loss(y, y_pred, normalize=False) / n_samples
            score += (0.5 * alpha * np.sum(lr.coef_ ** 2) +
                      beta * np.sum(np.abs(lr.coef_)))
            scores.append(score)
        train_score, test_score = tuple(scores)

        y_pred = lr.predict(X_test)
        accuracy = np.sum(y_pred == y_test) / y_test.shape[0]
        test_scores.append(test_score)
        train_scores.append(train_score)
        accuracies.append(accuracy)
        times.append(train_time)
    return lr, times, train_scores, test_scores, accuracies
Beispiel #7
0
def test_sag_sample_weights():
    clf1 = SAGAClassifier(loss='log', max_iter=5, verbose=0, random_state=0)
    clf2 = SAGAClassifier(loss='log', max_iter=5, verbose=0, random_state=0)
    clf1.fit(X, y)
    sample_weights = [1] * y.size
    clf2.fit(X, y, sample_weight=sample_weights)
    np.testing.assert_array_equal(clf1.coef_.ravel(), clf2.coef_.ravel())

    # same thing but for a regression object
    alpha = 1.0
    clf1 = SAGARegressor(loss='squared', alpha=alpha, max_iter=5, random_state=0)
    clf1.fit(X, y)
    sample_weights = [2] * y.size
    # alpha needs to be multiplied accordingly
    clf2 = SAGARegressor(loss='squared', alpha=2 * alpha, max_iter=5, random_state=0)
    clf2.fit(X, y, sample_weight=sample_weights)
    np.testing.assert_array_equal(clf1.coef_.ravel(), clf2.coef_.ravel())

    #
    # check that samples with a zero weight do not have an influence on the
    # resulting coefficients by adding noise to original samples
    X2 = np.concatenate((X, np.random.randn(*X.shape)), axis=0)   # augment with noise
    y2 = np.concatenate((y, y), axis=0)
    sample_weights = np.ones(y2.size, dtype=np.float)
    sample_weights[X.shape[0]:] = 0.

    clf1 = SAGARegressor(loss='squared', alpha=alpha,  max_iter=100, random_state=0, tol=1e-24)
    clf1.fit(X, y)
    clf2 = SAGARegressor(loss='squared', alpha=0.5*alpha, max_iter=100, random_state=0, tol=1e-24)
    clf2.fit(X2, y2, sample_weight=sample_weights)
    np.testing.assert_array_almost_equal(clf1.coef_.ravel(), clf2.coef_.ravel(), decimal=6)
def test_l1_prox(l1):
    x = np.ones(5)
    penalty = L1Penalty(l1=l1)
    if l1 <= 1.:
        np.testing.assert_array_equal(penalty.projection(x, stepsize=1.),
                                      x - l1)
        np.testing.assert_array_equal(penalty.projection(-x, stepsize=1.),
                                      -x + l1)
    else:
        np.testing.assert_array_equal(penalty.projection(x, stepsize=1.), 0)
        np.testing.assert_array_equal(penalty.projection(-x, stepsize=1.), 0)


@pytest.mark.parametrize("clf", [
    SAGClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0),
    SAGAClassifier(eta=1e-3, max_iter=20, verbose=0, random_state=0),
    PySAGClassifier(eta=1e-3, max_iter=20, random_state=0)
])
def test_sag(clf, bin_train_data):
    X_bin, y_bin = bin_train_data
    clf.fit(X_bin, y_bin)
    assert not hasattr(clf, 'predict_proba')
    assert clf.score(X_bin, y_bin) == 1.0
    assert list(clf.classes_) == [-1, 1]


@pytest.mark.parametrize(
    "SAG_", [SAGAClassifier, SAGClassifier, SAGRegressor, SAGARegressor])
def test_sag_dataset(SAG_, bin_train_data):
    # make sure SAG/SAGA accept a Dataset object as argument
    X_bin, y_bin = bin_train_data
Beispiel #9
0
def fit_single(
    solver,
    X,
    y,
    penalty="l2",
    single_target=True,
    C=1,
    max_iter=10,
    skip_slow=False,
    dtype=np.float64,
):
    if skip_slow and solver == "lightning" and penalty == "l1":
        print("skip_slowping l1 logistic regression with solver lightning.")
        return

    print(
        "Solving %s logistic regression with penalty %s, solver %s."
        % ("binary" if single_target else "multinomial", penalty, solver)
    )

    if solver == "lightning":
        from lightning.classification import SAGAClassifier

    if single_target or solver not in ["sag", "saga"]:
        multi_class = "ovr"
    else:
        multi_class = "multinomial"
    X = X.astype(dtype)
    y = y.astype(dtype)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=42, stratify=y
    )
    n_samples = X_train.shape[0]
    n_classes = np.unique(y_train).shape[0]
    test_scores = [1]
    train_scores = [1]
    accuracies = [1 / n_classes]
    times = [0]

    if penalty == "l2":
        alpha = 1.0 / (C * n_samples)
        beta = 0
        lightning_penalty = None
    else:
        alpha = 0.0
        beta = 1.0 / (C * n_samples)
        lightning_penalty = "l1"

    for this_max_iter in range(1, max_iter + 1, 2):
        print(
            "[%s, %s, %s] Max iter: %s"
            % (
                "binary" if single_target else "multinomial",
                penalty,
                solver,
                this_max_iter,
            )
        )
        if solver == "lightning":
            lr = SAGAClassifier(
                loss="log",
                alpha=alpha,
                beta=beta,
                penalty=lightning_penalty,
                tol=-1,
                max_iter=this_max_iter,
            )
        else:
            lr = LogisticRegression(
                solver=solver,
                multi_class=multi_class,
                C=C,
                penalty=penalty,
                fit_intercept=False,
                tol=0,
                max_iter=this_max_iter,
                random_state=42,
            )

        # Makes cpu cache even for all fit calls
        X_train.max()
        t0 = time.clock()

        lr.fit(X_train, y_train)
        train_time = time.clock() - t0

        scores = []
        for X, y in [(X_train, y_train), (X_test, y_test)]:
            try:
                y_pred = lr.predict_proba(X)
            except NotImplementedError:
                # Lightning predict_proba is not implemented for n_classes > 2
                y_pred = _predict_proba(lr, X)
            score = log_loss(y, y_pred, normalize=False) / n_samples
            score += 0.5 * alpha * np.sum(lr.coef_**2) + beta * np.sum(
                np.abs(lr.coef_)
            )
            scores.append(score)
        train_score, test_score = tuple(scores)

        y_pred = lr.predict(X_test)
        accuracy = np.sum(y_pred == y_test) / y_test.shape[0]
        test_scores.append(test_score)
        train_scores.append(train_score)
        accuracies.append(accuracy)
        times.append(train_time)
    return lr, times, train_scores, test_scores, accuracies
Beispiel #10
0
def saga_cv(which, alphas, l1_ratio):

    if which == 'cdcp':
        n_folds = 3
        path = os.path.join("data", "process", "erule", "folds", "{}", "{}")
    elif which == 'ukp':
        n_folds = 5
        path = os.path.join("data", "process", "ukp-essays", "folds", "{}",
                            "{}")
    else:
        raise ValueError

    clf_link = SAGAClassifier(loss='smooth_hinge',
                              penalty='l1',
                              tol=1e-4,
                              max_iter=100,
                              random_state=0,
                              verbose=0)
    clf_prop = clone(clf_link)

    link_scores = np.zeros((n_folds, len(alphas)))
    prop_scores = np.zeros_like(link_scores)

    for k in range(n_folds):
        X_tr_link, y_tr_link = load_csr(path.format(k, 'train.npz'),
                                        return_y=True)
        X_te_link, y_te_link = load_csr(path.format(k, 'val.npz'),
                                        return_y=True)

        X_tr_prop, y_tr_prop = load_csr(path.format(k, 'prop-train.npz'),
                                        return_y=True)
        X_te_prop, y_te_prop = load_csr(path.format(k, 'prop-val.npz'),
                                        return_y=True)

        le = LabelEncoder()
        y_tr_prop_enc = le.fit_transform(y_tr_prop)
        y_te_prop_enc = le.transform(y_te_prop)

        link_sw = compute_sample_weight('balanced', y_tr_link)

        for j, alpha in enumerate(alphas):

            beta = alpha * l1_ratio
            alpha *= 1 - l1_ratio
            clf_link.set_params(alpha=alpha, beta=beta)
            clf_prop.set_params(alpha=alpha, beta=beta)

            clf_link.fit(X_tr_link, y_tr_link, sample_weight=link_sw)
            y_pred_link = clf_link.predict(X_te_link)

            clf_prop.fit(X_tr_prop, y_tr_prop_enc)
            y_pred_prop = clf_prop.predict(X_te_prop)

            with warnings.catch_warnings() as w:
                warnings.simplefilter('ignore')
                link_f = f1_score(y_te_link, y_pred_link, average='binary')
                prop_f = f1_score(y_te_prop_enc, y_pred_prop, average='macro')

            link_scores[k, j] = link_f
            prop_scores[k, j] = prop_f

    return link_scores, prop_scores
Beispiel #11
0
class BaselineStruct(BaseArgumentMixin):
    def __init__(self, alpha_link, alpha_prop, l1_ratio):
        self.alpha_link = alpha_link
        self.alpha_prop = alpha_prop
        self.l1_ratio = l1_ratio
        self.compat_features = False

    def initialize_labels(self, y_props_flat, y_links_flat):
        self.prop_encoder_ = LabelEncoder().fit(y_props_flat)
        self.link_encoder_ = LabelEncoder().fit(y_links_flat)

        self.n_prop_states = len(self.prop_encoder_.classes_)
        self.n_link_states = len(self.link_encoder_.classes_)

    def fit(self, X_link, y_link, X_prop, y_prop):
        self.initialize_labels(y_prop, y_link)
        y_link = self.link_encoder_.transform(y_link)
        y_prop = self.prop_encoder_.transform(y_prop)

        self.link_clf_ = SAGAClassifier(loss='smooth_hinge',
                                        penalty='l1',
                                        tol=1e-4,
                                        max_iter=500,
                                        random_state=0,
                                        verbose=0)

        self.prop_clf_ = clone(self.link_clf_)

        alpha_link = self.alpha_link * (1 - self.l1_ratio)
        beta_link = self.alpha_link * self.l1_ratio
        sw = compute_sample_weight('balanced', y_link)
        self.link_clf_.set_params(alpha=alpha_link, beta=beta_link)
        self.link_clf_.fit(X_link, y_link, sample_weight=sw)

        alpha_prop = self.alpha_prop * (1 - self.l1_ratio)
        beta_prop = self.alpha_prop * self.l1_ratio
        self.prop_clf_.set_params(alpha=alpha_prop, beta=beta_prop)
        self.prop_clf_.fit(X_prop, y_prop)
        return self

    def decision_function(self, X_link, X_prop, docs):

        link_offsets = np.cumsum([len(doc.features) for doc in docs])
        y_link_flat = self.link_clf_.decision_function(X_link)

        y_link_marg = np.zeros(
            (len(y_link_flat), len(self.link_encoder_.classes_)))
        link_on, = self.link_encoder_.transform([True])
        y_link_marg[:, link_on] = y_link_flat.ravel()

        Y_link = [
            y_link_marg[start:end]
            for start, end in zip(np.append(0, link_offsets), link_offsets)
        ]

        prop_offsets = np.cumsum([len(doc.prop_features) for doc in docs])
        y_prop_marg = self.prop_clf_.decision_function(X_prop)
        Y_prop = [
            y_prop_marg[start:end]
            for start, end in zip(np.append(0, prop_offsets), prop_offsets)
        ]

        Y_pred = []
        for y_link, y_prop in zip(Y_link, Y_prop):
            Y_pred.append(DocLabel(y_prop, y_link))

        assert len(Y_pred) == len(docs)

        return Y_pred
Beispiel #12
0
def fit_single(solver,
               X,
               y,
               penalty='l2',
               single_target=True,
               C=1,
               max_iter=10,
               skip_slow=False):
    if skip_slow and solver == 'lightning' and penalty == 'l1':
        print('skip_slowping l1 logistic regression with solver lightning.')
        return

    print('Solving %s logistic regression with penalty %s, solver %s.' %
          ('binary' if single_target else 'multinomial', penalty, solver))

    if solver == 'lightning':
        from lightning.classification import SAGAClassifier

    if single_target or solver not in ['sag', 'saga']:
        multi_class = 'ovr'
    else:
        multi_class = 'multinomial'

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        random_state=42,
                                                        stratify=y)
    n_samples = X_train.shape[0]
    n_classes = np.unique(y_train).shape[0]
    test_scores = [1]
    train_scores = [1]
    accuracies = [1 / n_classes]
    times = [0]

    if penalty == 'l2':
        alpha = 1. / (C * n_samples)
        beta = 0
        lightning_penalty = None
    else:
        alpha = 0.
        beta = 1. / (C * n_samples)
        lightning_penalty = 'l1'

    for this_max_iter in range(1, max_iter + 1, 2):
        print('[%s, %s, %s] Max iter: %s' %
              ('binary' if single_target else 'multinomial', penalty, solver,
               this_max_iter))
        if solver == 'lightning':
            lr = SAGAClassifier(loss='log',
                                alpha=alpha,
                                beta=beta,
                                penalty=lightning_penalty,
                                tol=-1,
                                max_iter=this_max_iter)
        else:
            lr = LogisticRegression(
                solver=solver,
                multi_class=multi_class,
                C=C,
                penalty=penalty,
                fit_intercept=False,
                tol=1e-24,
                max_iter=this_max_iter,
                random_state=42,
            )
        t0 = time.clock()
        lr.fit(X_train, y_train)
        train_time = time.clock() - t0

        scores = []
        for (X, y) in [(X_train, y_train), (X_test, y_test)]:
            try:
                y_pred = lr.predict_proba(X)
            except NotImplementedError:
                # Lightning predict_proba is not implemented for n_classes > 2
                y_pred = _predict_proba(lr, X)
            score = log_loss(y, y_pred, normalize=False) / n_samples
            score += (0.5 * alpha * np.sum(lr.coef_**2) +
                      beta * np.sum(np.abs(lr.coef_)))
            scores.append(score)
        train_score, test_score = tuple(scores)

        y_pred = lr.predict(X_test)
        accuracy = np.sum(y_pred == y_test) / y_test.shape[0]
        test_scores.append(test_score)
        train_scores.append(train_score)
        accuracies.append(accuracy)
        times.append(train_time)
    return lr, times, train_scores, test_scores, accuracies
clf3 = CDClassifier(loss="squared_hinge",
                    alpha=alpha,
                    C=1.0 / X.shape[0],
                    max_iter=50,
                    n_calls=X.shape[1] / 3,
                    random_state=0,
                    tol=tol)
clf4 = AdaGradClassifier(loss="squared_hinge",
                         alpha=alpha,
                         eta=eta_adagrad,
                         n_iter=100,
                         n_calls=X.shape[0] / 2,
                         random_state=0)
clf5 = SAGAClassifier(loss="squared_hinge",
                      alpha=alpha,
                      max_iter=100,
                      random_state=0,
                      tol=tol)
clf6 = SAGClassifier(loss="squared_hinge",
                     alpha=alpha,
                     max_iter=100,
                     random_state=0,
                     tol=tol)

plt.figure()

data = {}
for clf, name in ((clf1, "SVRG"), (clf2, "SDCA"), (clf3, "PCD"),
                  (clf4, "AdaGrad"), (clf5, "SAGA"), (clf6, "SAG")):
    print(name)
    cb = Callback(X, y)
Beispiel #14
0
def test_sag_sample_weights():
    clf1 = SAGAClassifier(loss='log', max_iter=5, verbose=0, random_state=0)
    clf2 = SAGAClassifier(loss='log', max_iter=5, verbose=0, random_state=0)
    clf1.fit(X, y)
    sample_weights = [1] * y.size
    clf2.fit(X, y, sample_weight=sample_weights)
    np.testing.assert_array_equal(clf1.coef_.ravel(), clf2.coef_.ravel())

    # same thing but for a regression object
    alpha = 1.0
    clf1 = SAGARegressor(loss='squared', alpha=alpha, max_iter=5, random_state=0)
    clf1.fit(X, y)
    sample_weights = [2] * y.size
    # alpha needs to be multiplied accordingly
    clf2 = SAGARegressor(loss='squared', alpha=2 * alpha, max_iter=5, random_state=0)
    clf2.fit(X, y, sample_weight=sample_weights)
    np.testing.assert_array_equal(clf1.coef_.ravel(), clf2.coef_.ravel())

    #
    # check that samples with a zero weight do not have an influence on the
    # resulting coefficients by adding noise to original samples
    X2 = np.concatenate((X, np.random.randn(*X.shape)), axis=0)   # augment with noise
    y2 = np.concatenate((y, y), axis=0)
    sample_weights = np.ones(y2.size, dtype=np.float)
    sample_weights[X.shape[0]:] = 0.

    clf1 = SAGARegressor(loss='squared', alpha=alpha,  max_iter=100, random_state=0, tol=1e-24)
    clf1.fit(X, y)
    clf2 = SAGARegressor(loss='squared', alpha=0.5*alpha, max_iter=100, random_state=0, tol=1e-24)
    clf2.fit(X2, y2, sample_weight=sample_weights)
    np.testing.assert_array_almost_equal(clf1.coef_.ravel(), clf2.coef_.ravel(), decimal=6)