Beispiel #1
0
    def test_LogisticRegression_fit(self):
        """...Test LogisticRegression fit with different solvers and penalties
        """
        sto_seed = 179312
        raw_features, y = Test.get_train_data()

        for fit_intercept in [True, False]:
            for penalty in penalties:

                if penalty == 'binarsity':
                    # binarize features
                    n_cuts = 3
                    binarizer = FeaturesBinarizer(n_cuts=n_cuts)
                    features = binarizer.fit_transform(raw_features)
                else:
                    features = raw_features

                for solver in solvers:
                    solver_kwargs = {
                        'penalty': penalty,
                        'tol': 1e-5,
                        'solver': solver,
                        'verbose': False,
                        'max_iter': 10,
                        'fit_intercept': fit_intercept
                    }

                    if penalty != 'none':
                        solver_kwargs['C'] = 100

                    if penalty == 'binarsity':
                        solver_kwargs[
                            'blocks_start'] = binarizer.feature_indices[:-1, ]
                        solver_kwargs['blocks_length'] = binarizer.n_values

                    if solver == 'sdca':
                        solver_kwargs['sdca_ridge_strength'] = 2e-2

                    if solver in ['sgd', 'svrg', 'sdca']:
                        solver_kwargs['random_state'] = sto_seed

                    if solver == 'sgd':
                        solver_kwargs['step'] = 1.

                    if solver == 'bfgs':
                        # BFGS only accepts ProxZero and ProxL2sq for now
                        if penalty not in ['none', 'l2']:
                            continue

                    learner = LogisticRegression(**solver_kwargs)
                    learner.fit(features, y)
                    probas = learner.predict_proba(features)[:, 1]
                    auc = roc_auc_score(y, probas)
                    self.assertGreater(
                        auc, 0.7, "solver %s with penalty %s and "
                        "intercept %s reached too low AUC" %
                        (solver, penalty, fit_intercept))
Beispiel #2
0
    def test_predict_proba(self):
        """...Test LogReg predict_proba
        """
        X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0)
        learner = LogisticRegression(random_state=32289, tol=1e-13)
        learner.fit(X, y)

        X_test, y_test = Test.get_train_data(n_features=12, n_samples=5, nnz=0)
        predicted_probas = np.array([[0.35851418, 0.64148582],
                                     [0.42549328, 0.57450672],
                                     [0.6749705, 0.3250295],
                                     [0.39684181, 0.60315819],
                                     [0.42732443, 0.57267557]])
        np.testing.assert_array_almost_equal(learner.predict_proba(X_test),
                                             predicted_probas,
                                             decimal=3)
Beispiel #3
0
    def test_LogisticRegression_fit(self):
        """...Test LogisticRegression fit with different solvers and penalties
        """
        sto_seed = 179312
        X, y = Test.get_train_data()

        for fit_intercept in [True, False]:
            for penalty in penalties:
                for solver in solvers:
                    solver_kwargs = {
                        'penalty': penalty,
                        'tol': 1e-5,
                        'solver': solver,
                        'verbose': False,
                        'max_iter': 10,
                        'fit_intercept': fit_intercept
                    }

                    if penalty != 'none':
                        solver_kwargs['C'] = 50

                    if solver == 'sdca':
                        solver_kwargs['sdca_ridge_strength'] = 2e-2

                    if solver in ['sgd', 'svrg', 'sdca']:
                        solver_kwargs['random_state'] = sto_seed

                    if solver == 'sgd':
                        solver_kwargs['step'] = 1.

                    if solver == 'bfgs':
                        # BFGS only accepts ProxZero and ProxL2sq for now
                        if penalty not in ['none', 'l2']:
                            continue

                    learner = LogisticRegression(**solver_kwargs)
                    learner.fit(X, y)
                    probas = learner.predict_proba(X)[:, 1]
                    auc = roc_auc_score(y, probas)
                    self.assertGreater(
                        auc, 0.7, "solver %s with penalty %s and "
                        "intercept %s reached too low AUC" %
                        (solver, penalty, fit_intercept))
Beispiel #4
0
train_set = fetch_tick_dataset('binary/adult/adult.trn.bz2')
test_set = fetch_tick_dataset('binary/adult/adult.tst.bz2')

clf_tick = LogRegTick(C=1e5, penalty='l1', tol=1e-8)
clf_scikit = LogRegScikit(penalty='l1', tol=1e-8)

t1 = time()
clf_tick.fit(train_set[0], train_set[1])
t_tick = time() - t1

t1 = time()
clf_scikit.fit(train_set[0], train_set[1])
t_scikit = time() - t1

pred_tick = clf_tick.predict_proba(test_set[0])
pred_scikit = clf_scikit.predict_proba(test_set[0])

fpr_tick, tpr_tick, _ = roc_curve(test_set[1], pred_tick[:, 1])
fpr_scikit, tpr_scikit, _ = roc_curve(test_set[1], pred_scikit[:, 1])

plt.figure(figsize=(10, 8))

ax1 = plt.subplot2grid((2, 2), (0, 0))
plt.stem(clf_tick.weights)
plt.title(r'Model-weights in $\mathtt{tick}$', fontsize=16)
plt.ylim((-2, 2.5))

ax2 = plt.subplot2grid((2, 2), (0, 1))
plt.stem(np.ravel(clf_scikit.coef_))
# plt.legend()
Beispiel #5
0
==============================================
Binary classification with logistic regression
==============================================

This code perform binary classification on adult dataset with logistic
regression learner (`tick.inference.LogisticRegression`).
"""

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

from tick.inference import LogisticRegression
from tick.dataset import fetch_tick_dataset

train_set = fetch_tick_dataset('binary/adult/adult.trn.bz2')
test_set = fetch_tick_dataset('binary/adult/adult.tst.bz2')

learner = LogisticRegression()
learner.fit(train_set[0], train_set[1])

predictions = learner.predict_proba(test_set[0])
fpr, tpr, _ = roc_curve(test_set[1], predictions[:, 1])

plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, lw=2)
plt.title("ROC curve on adult dataset (area = {:.2f})".format(auc(fpr, tpr)))
plt.ylabel("True Positive Rate")
plt.xlabel("False Positive Rate")

plt.show()