def test_LogisticRegression_solver_step(self): """...Test LogisticRegression setting of step parameter of solver """ for solver in solvers: if solver in ['sdca', 'bfgs']: msg = '^Solver "%s" has no settable step$' % solver with self.assertWarnsRegex(RuntimeWarning, msg): learner = LogisticRegression(solver=solver, step=1, **Test.specific_solver_kwargs( solver)) self.assertIsNone(learner.step) else: learner = LogisticRegression(solver=solver, step=self.float_1, **Test.specific_solver_kwargs( solver)) self.assertEqual(learner.step, self.float_1) self.assertEqual(learner._solver_obj.step, self.float_1) learner.step = self.float_2 self.assertEqual(learner.step, self.float_2) self.assertEqual(learner._solver_obj.step, self.float_2) if solver in ['sgd']: msg = '^SGD step needs to be tuned manually$' with self.assertWarnsRegex(RuntimeWarning, msg): learner = LogisticRegression(solver='sgd') learner.fit(self.X, self.y)
def test_predict(self): """...Test LogReg prediction """ labels_mappings = [{ -1: -1., 1: 1. }, { -1: 1., 1: -1. }, { -1: 1, 1: 0 }, { -1: 0, 1: 1 }, { -1: 'cat', 1: 'dog' }] for labels_mapping in labels_mappings: X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0) y = np.vectorize(labels_mapping.get)(y) learner = LogisticRegression(random_state=32789, tol=1e-9) learner.fit(X, y) X_test, y_test = Test.get_train_data(n_features=12, n_samples=5, nnz=0) predicted_y = [1., 1., -1., 1., 1.] predicted_y = np.vectorize(labels_mapping.get)(predicted_y) np.testing.assert_array_equal(learner.predict(X_test), predicted_y)
def test_LogisticRegression_fit(self): """...Test LogisticRegression fit with different solvers and penalties """ sto_seed = 179312 raw_features, y = Test.get_train_data() for fit_intercept in [True, False]: for penalty in penalties: if penalty == 'binarsity': # binarize features n_cuts = 3 binarizer = FeaturesBinarizer(n_cuts=n_cuts) features = binarizer.fit_transform(raw_features) else: features = raw_features for solver in solvers: solver_kwargs = { 'penalty': penalty, 'tol': 1e-5, 'solver': solver, 'verbose': False, 'max_iter': 10, 'fit_intercept': fit_intercept } if penalty != 'none': solver_kwargs['C'] = 100 if penalty == 'binarsity': solver_kwargs[ 'blocks_start'] = binarizer.feature_indices[:-1, ] solver_kwargs['blocks_length'] = binarizer.n_values if solver == 'sdca': solver_kwargs['sdca_ridge_strength'] = 2e-2 if solver in ['sgd', 'svrg', 'sdca']: solver_kwargs['random_state'] = sto_seed if solver == 'sgd': solver_kwargs['step'] = 1. if solver == 'bfgs': # BFGS only accepts ProxZero and ProxL2sq for now if penalty not in ['none', 'l2']: continue learner = LogisticRegression(**solver_kwargs) learner.fit(features, y) probas = learner.predict_proba(features)[:, 1] auc = roc_auc_score(y, probas) self.assertGreater( auc, 0.7, "solver %s with penalty %s and " "intercept %s reached too low AUC" % (solver, penalty, fit_intercept))
def test_decision_function(self): """...Test LogReg predict_proba """ X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0) learner = LogisticRegression(random_state=32789, tol=1e-13) learner.fit(X, y) X_test, y_test = Test.get_train_data(n_features=12, n_samples=5, nnz=0) decision_function_values = np.array( [0.58182, 0.30026, -0.73075, 0.41864, 0.29278]) np.testing.assert_array_almost_equal(learner.decision_function(X_test), decision_function_values, decimal=3)
def test_LogisticRegression_warm_start(self): """...Test LogisticRegression warm start """ sto_seed = 179312 X, y = Test.get_train_data() fit_intercepts = [True, False] cases = itertools.product(solvers, fit_intercepts) for solver, fit_intercept in cases: solver_kwargs = { 'solver': solver, 'max_iter': 2, 'fit_intercept': fit_intercept, 'warm_start': True, 'tol': 0 } if solver == 'sdca': msg = '^SDCA cannot be warm started$' with self.assertRaisesRegex(ValueError, msg): LogisticRegression(**solver_kwargs) else: if solver in ['sgd', 'svrg']: solver_kwargs['random_state'] = sto_seed if solver == 'sgd': solver_kwargs['step'] = 1. learner = LogisticRegression(**solver_kwargs) learner.fit(X, y) if fit_intercept: coeffs_1 = np.hstack((learner.weights, learner.intercept)) else: coeffs_1 = learner.weights learner.fit(X, y) if fit_intercept: coeffs_2 = np.hstack((learner.weights, learner.intercept)) else: coeffs_2 = learner.weights # Thanks to warm start objective should have decreased self.assertLess(learner._solver_obj.objective(coeffs_2), learner._solver_obj.objective(coeffs_1))
def test_predict_proba(self): """...Test LogReg predict_proba """ X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0) learner = LogisticRegression(random_state=32289, tol=1e-13) learner.fit(X, y) X_test, y_test = Test.get_train_data(n_features=12, n_samples=5, nnz=0) predicted_probas = np.array([[0.35851418, 0.64148582], [0.42549328, 0.57450672], [0.6749705, 0.3250295], [0.39684181, 0.60315819], [0.42732443, 0.57267557]]) np.testing.assert_array_almost_equal(learner.predict_proba(X_test), predicted_probas, decimal=3)
def test_LogisticRegression_fit(self): """...Test LogisticRegression fit with different solvers and penalties """ sto_seed = 179312 X, y = Test.get_train_data() for fit_intercept in [True, False]: for penalty in penalties: for solver in solvers: solver_kwargs = { 'penalty': penalty, 'tol': 1e-5, 'solver': solver, 'verbose': False, 'max_iter': 10, 'fit_intercept': fit_intercept } if penalty != 'none': solver_kwargs['C'] = 50 if solver == 'sdca': solver_kwargs['sdca_ridge_strength'] = 2e-2 if solver in ['sgd', 'svrg', 'sdca']: solver_kwargs['random_state'] = sto_seed if solver == 'sgd': solver_kwargs['step'] = 1. if solver == 'bfgs': # BFGS only accepts ProxZero and ProxL2sq for now if penalty not in ['none', 'l2']: continue learner = LogisticRegression(**solver_kwargs) learner.fit(X, y) probas = learner.predict_proba(X)[:, 1] auc = roc_auc_score(y, probas) self.assertGreater( auc, 0.7, "solver %s with penalty %s and " "intercept %s reached too low AUC" % (solver, penalty, fit_intercept))
def test_labels_encoding(self): """...Test that class encoding is well done for LogReg """ learner = LogisticRegression(max_iter=1) np.random.seed(38027) n_features = 3 n_samples = 5 X = np.random.rand(n_samples, n_features) encoded_y = np.array([1., -1., 1., -1., -1.]) learner.fit(X, encoded_y) np.testing.assert_array_equal(learner.classes, [-1., 1.]) np.testing.assert_array_equal(learner._encode_labels_vector(encoded_y), encoded_y) zero_one_y = np.array([1., 0., 1., 0., 0.]) learner.fit(X, zero_one_y) np.testing.assert_array_equal(learner.classes, [0., 1.]) np.testing.assert_array_equal(learner._encode_labels_vector(zero_one_y), encoded_y) text_y = np.array(['cat', 'dog', 'cat', 'dog', 'dog']) learner.fit(X, text_y) np.testing.assert_array_equal(set(learner.classes), {'cat', 'dog'}) encoded_text_y = learner._encode_labels_vector(text_y) np.testing.assert_array_equal(encoded_text_y, encoded_y * np.sign(encoded_text_y[0]) * np.sign(encoded_y[0]))
import matplotlib.pyplot as plt from sklearn.metrics import roc_curve, auc from sklearn.linear_model import LogisticRegression as LogRegScikit from tick.dataset import fetch_tick_dataset from tick.inference import LogisticRegression as LogRegTick train_set = fetch_tick_dataset('binary/adult/adult.trn.bz2') test_set = fetch_tick_dataset('binary/adult/adult.tst.bz2') clf_tick = LogRegTick(C=1e5, penalty='l1', tol=1e-8) clf_scikit = LogRegScikit(penalty='l1', tol=1e-8) t1 = time() clf_tick.fit(train_set[0], train_set[1]) t_tick = time() - t1 t1 = time() clf_scikit.fit(train_set[0], train_set[1]) t_scikit = time() - t1 pred_tick = clf_tick.predict_proba(test_set[0]) pred_scikit = clf_scikit.predict_proba(test_set[0]) fpr_tick, tpr_tick, _ = roc_curve(test_set[1], pred_tick[:, 1]) fpr_scikit, tpr_scikit, _ = roc_curve(test_set[1], pred_scikit[:, 1]) plt.figure(figsize=(10, 8)) ax1 = plt.subplot2grid((2, 2), (0, 0))
============================================== Binary classification with logistic regression ============================================== This code perform binary classification on adult dataset with logistic regression learner (`tick.inference.LogisticRegression`). """ import matplotlib.pyplot as plt from sklearn.metrics import roc_curve, auc from tick.inference import LogisticRegression from tick.dataset import fetch_tick_dataset train_set = fetch_tick_dataset('binary/adult/adult.trn.bz2') test_set = fetch_tick_dataset('binary/adult/adult.tst.bz2') learner = LogisticRegression() learner.fit(train_set[0], train_set[1]) predictions = learner.predict_proba(test_set[0]) fpr, tpr, _ = roc_curve(test_set[1], predictions[:, 1]) plt.figure(figsize=(6, 5)) plt.plot(fpr, tpr, lw=2) plt.title("ROC curve on adult dataset (area = {:.2f})".format(auc(fpr, tpr))) plt.ylabel("True Positive Rate") plt.xlabel("False Positive Rate") plt.show()