def test_LogisticRegression_fit(self): """...Test LogisticRegression fit with different solvers and penalties """ sto_seed = 179312 raw_features, y = Test.get_train_data() for fit_intercept in [True, False]: for penalty in penalties: if penalty == 'binarsity': # binarize features n_cuts = 3 binarizer = FeaturesBinarizer(n_cuts=n_cuts) features = binarizer.fit_transform(raw_features) else: features = raw_features for solver in solvers: solver_kwargs = { 'penalty': penalty, 'tol': 1e-5, 'solver': solver, 'verbose': False, 'max_iter': 10, 'fit_intercept': fit_intercept } if penalty != 'none': solver_kwargs['C'] = 100 if penalty == 'binarsity': solver_kwargs[ 'blocks_start'] = binarizer.feature_indices[:-1, ] solver_kwargs['blocks_length'] = binarizer.n_values if solver == 'sdca': solver_kwargs['sdca_ridge_strength'] = 2e-2 if solver in ['sgd', 'svrg', 'sdca']: solver_kwargs['random_state'] = sto_seed if solver == 'sgd': solver_kwargs['step'] = 1. if solver == 'bfgs': # BFGS only accepts ProxZero and ProxL2sq for now if penalty not in ['none', 'l2']: continue learner = LogisticRegression(**solver_kwargs) learner.fit(features, y) probas = learner.predict_proba(features)[:, 1] auc = roc_auc_score(y, probas) self.assertGreater( auc, 0.7, "solver %s with penalty %s and " "intercept %s reached too low AUC" % (solver, penalty, fit_intercept))
def test_predict_proba(self): """...Test LogReg predict_proba """ X, y = Test.get_train_data(n_features=12, n_samples=300, nnz=0) learner = LogisticRegression(random_state=32289, tol=1e-13) learner.fit(X, y) X_test, y_test = Test.get_train_data(n_features=12, n_samples=5, nnz=0) predicted_probas = np.array([[0.35851418, 0.64148582], [0.42549328, 0.57450672], [0.6749705, 0.3250295], [0.39684181, 0.60315819], [0.42732443, 0.57267557]]) np.testing.assert_array_almost_equal(learner.predict_proba(X_test), predicted_probas, decimal=3)
def test_LogisticRegression_fit(self): """...Test LogisticRegression fit with different solvers and penalties """ sto_seed = 179312 X, y = Test.get_train_data() for fit_intercept in [True, False]: for penalty in penalties: for solver in solvers: solver_kwargs = { 'penalty': penalty, 'tol': 1e-5, 'solver': solver, 'verbose': False, 'max_iter': 10, 'fit_intercept': fit_intercept } if penalty != 'none': solver_kwargs['C'] = 50 if solver == 'sdca': solver_kwargs['sdca_ridge_strength'] = 2e-2 if solver in ['sgd', 'svrg', 'sdca']: solver_kwargs['random_state'] = sto_seed if solver == 'sgd': solver_kwargs['step'] = 1. if solver == 'bfgs': # BFGS only accepts ProxZero and ProxL2sq for now if penalty not in ['none', 'l2']: continue learner = LogisticRegression(**solver_kwargs) learner.fit(X, y) probas = learner.predict_proba(X)[:, 1] auc = roc_auc_score(y, probas) self.assertGreater( auc, 0.7, "solver %s with penalty %s and " "intercept %s reached too low AUC" % (solver, penalty, fit_intercept))
train_set = fetch_tick_dataset('binary/adult/adult.trn.bz2') test_set = fetch_tick_dataset('binary/adult/adult.tst.bz2') clf_tick = LogRegTick(C=1e5, penalty='l1', tol=1e-8) clf_scikit = LogRegScikit(penalty='l1', tol=1e-8) t1 = time() clf_tick.fit(train_set[0], train_set[1]) t_tick = time() - t1 t1 = time() clf_scikit.fit(train_set[0], train_set[1]) t_scikit = time() - t1 pred_tick = clf_tick.predict_proba(test_set[0]) pred_scikit = clf_scikit.predict_proba(test_set[0]) fpr_tick, tpr_tick, _ = roc_curve(test_set[1], pred_tick[:, 1]) fpr_scikit, tpr_scikit, _ = roc_curve(test_set[1], pred_scikit[:, 1]) plt.figure(figsize=(10, 8)) ax1 = plt.subplot2grid((2, 2), (0, 0)) plt.stem(clf_tick.weights) plt.title(r'Model-weights in $\mathtt{tick}$', fontsize=16) plt.ylim((-2, 2.5)) ax2 = plt.subplot2grid((2, 2), (0, 1)) plt.stem(np.ravel(clf_scikit.coef_)) # plt.legend()
============================================== Binary classification with logistic regression ============================================== This code perform binary classification on adult dataset with logistic regression learner (`tick.inference.LogisticRegression`). """ import matplotlib.pyplot as plt from sklearn.metrics import roc_curve, auc from tick.inference import LogisticRegression from tick.dataset import fetch_tick_dataset train_set = fetch_tick_dataset('binary/adult/adult.trn.bz2') test_set = fetch_tick_dataset('binary/adult/adult.tst.bz2') learner = LogisticRegression() learner.fit(train_set[0], train_set[1]) predictions = learner.predict_proba(test_set[0]) fpr, tpr, _ = roc_curve(test_set[1], predictions[:, 1]) plt.figure(figsize=(6, 5)) plt.plot(fpr, tpr, lw=2) plt.title("ROC curve on adult dataset (area = {:.2f})".format(auc(fpr, tpr))) plt.ylabel("True Positive Rate") plt.xlabel("False Positive Rate") plt.show()