コード例 #1
0
ファイル: logreg_test.py プロジェクト: jayeshchoudhari/tick
    def test_LogisticRegression_solver_step(self):
        """...Test LogisticRegression setting of step parameter of solver
        """
        for solver in solvers:
            if solver in ['sdca', 'bfgs']:
                msg = '^Solver "%s" has no settable step$' % solver
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner = LogisticRegression(solver=solver, step=1,
                                                 **Test.specific_solver_kwargs(
                                                     solver))
                    self.assertIsNone(learner.step)
            else:
                learner = LogisticRegression(solver=solver, step=self.float_1,
                                             **Test.specific_solver_kwargs(
                                                 solver))
                self.assertEqual(learner.step, self.float_1)
                self.assertEqual(learner._solver_obj.step, self.float_1)
                learner.step = self.float_2
                self.assertEqual(learner.step, self.float_2)
                self.assertEqual(learner._solver_obj.step, self.float_2)

            if solver in ['sgd']:
                msg = '^SGD step needs to be tuned manually$'
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner = LogisticRegression(solver='sgd')
                    learner.fit(self.X, self.y)
コード例 #2
0
ファイル: logreg_test.py プロジェクト: cderemble/tick
    def test_predict(self):
        """...Test LogReg prediction
        """
        labels_mappings = [{
            -1: -1.,
            1: 1.
        }, {
            -1: 1.,
            1: -1.
        }, {
            -1: 1,
            1: 0
        }, {
            -1: 0,
            1: 1
        }, {
            -1: 'cat',
            1: 'dog'
        }]

        for labels_mapping in labels_mappings:
            X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0)
            y = np.vectorize(labels_mapping.get)(y)

            learner = LogisticRegression(random_state=32789, tol=1e-9)
            learner.fit(X, y)

            X_test, y_test = Test.get_train_data(n_features=12,
                                                 n_samples=5,
                                                 nnz=0)
            predicted_y = [1., 1., -1., 1., 1.]
            predicted_y = np.vectorize(labels_mapping.get)(predicted_y)
            np.testing.assert_array_equal(learner.predict(X_test), predicted_y)
コード例 #3
0
ファイル: logreg_test.py プロジェクト: tozammel/tick
    def test_LogisticRegression_fit(self):
        """...Test LogisticRegression fit with different solvers and penalties
        """
        sto_seed = 179312
        raw_features, y = Test.get_train_data()

        for fit_intercept in [True, False]:
            for penalty in penalties:

                if penalty == 'binarsity':
                    # binarize features
                    n_cuts = 3
                    binarizer = FeaturesBinarizer(n_cuts=n_cuts)
                    features = binarizer.fit_transform(raw_features)
                else:
                    features = raw_features

                for solver in solvers:
                    solver_kwargs = {
                        'penalty': penalty,
                        'tol': 1e-5,
                        'solver': solver,
                        'verbose': False,
                        'max_iter': 10,
                        'fit_intercept': fit_intercept
                    }

                    if penalty != 'none':
                        solver_kwargs['C'] = 100

                    if penalty == 'binarsity':
                        solver_kwargs[
                            'blocks_start'] = binarizer.feature_indices[:-1, ]
                        solver_kwargs['blocks_length'] = binarizer.n_values

                    if solver == 'sdca':
                        solver_kwargs['sdca_ridge_strength'] = 2e-2

                    if solver in ['sgd', 'svrg', 'sdca']:
                        solver_kwargs['random_state'] = sto_seed

                    if solver == 'sgd':
                        solver_kwargs['step'] = 1.

                    if solver == 'bfgs':
                        # BFGS only accepts ProxZero and ProxL2sq for now
                        if penalty not in ['none', 'l2']:
                            continue

                    learner = LogisticRegression(**solver_kwargs)
                    learner.fit(features, y)
                    probas = learner.predict_proba(features)[:, 1]
                    auc = roc_auc_score(y, probas)
                    self.assertGreater(
                        auc, 0.7, "solver %s with penalty %s and "
                        "intercept %s reached too low AUC" %
                        (solver, penalty, fit_intercept))
コード例 #4
0
ファイル: logreg_test.py プロジェクト: cderemble/tick
    def test_decision_function(self):
        """...Test LogReg predict_proba
        """
        X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0)
        learner = LogisticRegression(random_state=32789, tol=1e-13)
        learner.fit(X, y)

        X_test, y_test = Test.get_train_data(n_features=12, n_samples=5, nnz=0)
        decision_function_values = np.array(
            [0.58182, 0.30026, -0.73075, 0.41864, 0.29278])
        np.testing.assert_array_almost_equal(learner.decision_function(X_test),
                                             decision_function_values,
                                             decimal=3)
コード例 #5
0
ファイル: logreg_test.py プロジェクト: cderemble/tick
    def test_LogisticRegression_warm_start(self):
        """...Test LogisticRegression warm start
        """
        sto_seed = 179312
        X, y = Test.get_train_data()

        fit_intercepts = [True, False]
        cases = itertools.product(solvers, fit_intercepts)

        for solver, fit_intercept in cases:
            solver_kwargs = {
                'solver': solver,
                'max_iter': 2,
                'fit_intercept': fit_intercept,
                'warm_start': True,
                'tol': 0
            }

            if solver == 'sdca':
                msg = '^SDCA cannot be warm started$'
                with self.assertRaisesRegex(ValueError, msg):
                    LogisticRegression(**solver_kwargs)

            else:

                if solver in ['sgd', 'svrg']:
                    solver_kwargs['random_state'] = sto_seed

                if solver == 'sgd':
                    solver_kwargs['step'] = 1.

                learner = LogisticRegression(**solver_kwargs)

                learner.fit(X, y)
                if fit_intercept:
                    coeffs_1 = np.hstack((learner.weights, learner.intercept))
                else:
                    coeffs_1 = learner.weights

                learner.fit(X, y)
                if fit_intercept:
                    coeffs_2 = np.hstack((learner.weights, learner.intercept))
                else:
                    coeffs_2 = learner.weights

                # Thanks to warm start objective should have decreased
                self.assertLess(learner._solver_obj.objective(coeffs_2),
                                learner._solver_obj.objective(coeffs_1))
コード例 #6
0
ファイル: logreg_test.py プロジェクト: cderemble/tick
    def test_predict_proba(self):
        """...Test LogReg predict_proba
        """
        X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0)
        learner = LogisticRegression(random_state=32289, tol=1e-13)
        learner.fit(X, y)

        X_test, y_test = Test.get_train_data(n_features=12, n_samples=5, nnz=0)
        predicted_probas = np.array([[0.35851418, 0.64148582],
                                     [0.42549328, 0.57450672],
                                     [0.6749705, 0.3250295],
                                     [0.39684181, 0.60315819],
                                     [0.42732443, 0.57267557]])
        np.testing.assert_array_almost_equal(learner.predict_proba(X_test),
                                             predicted_probas,
                                             decimal=3)
コード例 #7
0
ファイル: logreg_test.py プロジェクト: cderemble/tick
    def test_LogisticRegression_fit(self):
        """...Test LogisticRegression fit with different solvers and penalties
        """
        sto_seed = 179312
        X, y = Test.get_train_data()

        for fit_intercept in [True, False]:
            for penalty in penalties:
                for solver in solvers:
                    solver_kwargs = {
                        'penalty': penalty,
                        'tol': 1e-5,
                        'solver': solver,
                        'verbose': False,
                        'max_iter': 10,
                        'fit_intercept': fit_intercept
                    }

                    if penalty != 'none':
                        solver_kwargs['C'] = 50

                    if solver == 'sdca':
                        solver_kwargs['sdca_ridge_strength'] = 2e-2

                    if solver in ['sgd', 'svrg', 'sdca']:
                        solver_kwargs['random_state'] = sto_seed

                    if solver == 'sgd':
                        solver_kwargs['step'] = 1.

                    if solver == 'bfgs':
                        # BFGS only accepts ProxZero and ProxL2sq for now
                        if penalty not in ['none', 'l2']:
                            continue

                    learner = LogisticRegression(**solver_kwargs)
                    learner.fit(X, y)
                    probas = learner.predict_proba(X)[:, 1]
                    auc = roc_auc_score(y, probas)
                    self.assertGreater(
                        auc, 0.7, "solver %s with penalty %s and "
                        "intercept %s reached too low AUC" %
                        (solver, penalty, fit_intercept))
コード例 #8
0
ファイル: logreg_test.py プロジェクト: jayeshchoudhari/tick
    def test_labels_encoding(self):
        """...Test that class encoding is well done for LogReg
        """
        learner = LogisticRegression(max_iter=1)

        np.random.seed(38027)
        n_features = 3
        n_samples = 5
        X = np.random.rand(n_samples, n_features)

        encoded_y = np.array([1., -1., 1., -1., -1.])
        learner.fit(X, encoded_y)
        np.testing.assert_array_equal(learner.classes, [-1., 1.])
        np.testing.assert_array_equal(learner._encode_labels_vector(encoded_y),
                                      encoded_y)

        zero_one_y = np.array([1., 0., 1., 0., 0.])
        learner.fit(X, zero_one_y)
        np.testing.assert_array_equal(learner.classes, [0., 1.])
        np.testing.assert_array_equal(learner._encode_labels_vector(zero_one_y),
                                      encoded_y)

        text_y = np.array(['cat', 'dog', 'cat', 'dog', 'dog'])
        learner.fit(X, text_y)
        np.testing.assert_array_equal(set(learner.classes), {'cat', 'dog'})
        encoded_text_y = learner._encode_labels_vector(text_y)
        np.testing.assert_array_equal(encoded_text_y,
                                      encoded_y * np.sign(encoded_text_y[0])
                                      * np.sign(encoded_y[0]))
コード例 #9
0
import matplotlib.pyplot as plt

from sklearn.metrics import roc_curve, auc
from sklearn.linear_model import LogisticRegression as LogRegScikit

from tick.dataset import fetch_tick_dataset
from tick.inference import LogisticRegression as LogRegTick

train_set = fetch_tick_dataset('binary/adult/adult.trn.bz2')
test_set = fetch_tick_dataset('binary/adult/adult.tst.bz2')

clf_tick = LogRegTick(C=1e5, penalty='l1', tol=1e-8)
clf_scikit = LogRegScikit(penalty='l1', tol=1e-8)

t1 = time()
clf_tick.fit(train_set[0], train_set[1])
t_tick = time() - t1

t1 = time()
clf_scikit.fit(train_set[0], train_set[1])
t_scikit = time() - t1

pred_tick = clf_tick.predict_proba(test_set[0])
pred_scikit = clf_scikit.predict_proba(test_set[0])

fpr_tick, tpr_tick, _ = roc_curve(test_set[1], pred_tick[:, 1])
fpr_scikit, tpr_scikit, _ = roc_curve(test_set[1], pred_scikit[:, 1])

plt.figure(figsize=(10, 8))

ax1 = plt.subplot2grid((2, 2), (0, 0))
コード例 #10
0
==============================================
Binary classification with logistic regression
==============================================

This code perform binary classification on adult dataset with logistic
regression learner (`tick.inference.LogisticRegression`).
"""

import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

from tick.inference import LogisticRegression
from tick.dataset import fetch_tick_dataset

train_set = fetch_tick_dataset('binary/adult/adult.trn.bz2')
test_set = fetch_tick_dataset('binary/adult/adult.tst.bz2')

learner = LogisticRegression()
learner.fit(train_set[0], train_set[1])

predictions = learner.predict_proba(test_set[0])
fpr, tpr, _ = roc_curve(test_set[1], predictions[:, 1])

plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, lw=2)
plt.title("ROC curve on adult dataset (area = {:.2f})".format(auc(fpr, tpr)))
plt.ylabel("True Positive Rate")
plt.xlabel("False Positive Rate")

plt.show()