def test_logistic_cv_mock_scorer(): class MockScorer(object): def __init__(self): self.calls = 0 self.scores = [0.1, 0.4, 0.8, 0.5] def __call__(self, model, X, y, sample_weight=None): score = self.scores[self.calls % len(self.scores)] self.calls += 1 return score mock_scorer = MockScorer() Cs = [1, 2, 3, 4] cv = 2 lr = LogisticRegressionCV(Cs=Cs, scoring=mock_scorer, cv=cv) lr.fit(X, Y1) # Cs[2] has the highest score (0.8) from MockScorer assert lr.C_[0] == Cs[2] # scorer called 8 times (cv*len(Cs)) assert mock_scorer.calls == cv * len(Cs) # reset mock_scorer mock_scorer.calls = 0 with pytest.warns(ChangedBehaviorWarning): custom_score = lr.score(X, lr.predict(X)) assert custom_score == mock_scorer.scores[0] assert mock_scorer.calls == 1
def test_logistic_cv_score_does_not_warn_by_default(): lr = LogisticRegressionCV(cv=2) lr.fit(X, Y1) with pytest.warns(None) as record: lr.score(X, lr.predict(X)) assert len(record) == 0
def test_multinomial_logistic_regression_string_inputs(): # Test with string labels for LogisticRegression(CV) n_samples, n_features, n_classes = 50, 5, 3 X_ref, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, n_informative=3, random_state=0) y_str = LabelEncoder().fit(['bar', 'baz', 'foo']).inverse_transform(y) # For numerical labels, let y values be taken from set (-1, 0, 1) y = np.array(y) - 1 # Test for string labels lr = LogisticRegression(solver='lbfgs', multi_class='multinomial') lr_cv = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial') lr_str = LogisticRegression(solver='lbfgs', multi_class='multinomial') lr_cv_str = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial') lr.fit(X_ref, y) lr_cv.fit(X_ref, y) lr_str.fit(X_ref, y_str) lr_cv_str.fit(X_ref, y_str) assert_array_almost_equal(lr.coef_, lr_str.coef_) assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo']) assert_array_almost_equal(lr_cv.coef_, lr_cv_str.coef_) assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo']) assert_equal(sorted(lr_cv_str.classes_), ['bar', 'baz', 'foo']) # The predictions should be in original labels assert_equal(sorted(np.unique(lr_str.predict(X_ref))), ['bar', 'baz', 'foo']) assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz', 'foo']) # Make sure class weights can be given with string labels lr_cv_str = LogisticRegression(solver='lbfgs', class_weight={ 'bar': 1, 'baz': 2, 'foo': 0 }, multi_class='multinomial').fit( X_ref, y_str) assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz'])
class LogisticRegressionCVImpl(): def __init__(self, Cs=10, fit_intercept=True, cv=3, dual=False, penalty='l2', scoring=None, solver='lbfgs', tol=0.0001, max_iter=100, class_weight='balanced', n_jobs=None, verbose=0, refit=True, intercept_scaling=1.0, multi_class='ovr', random_state=None): self._hyperparams = { 'Cs': Cs, 'fit_intercept': fit_intercept, 'cv': cv, 'dual': dual, 'penalty': penalty, 'scoring': scoring, 'solver': solver, 'tol': tol, 'max_iter': max_iter, 'class_weight': class_weight, 'n_jobs': n_jobs, 'verbose': verbose, 'refit': refit, 'intercept_scaling': intercept_scaling, 'multi_class': multi_class, 'random_state': random_state } self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X)
def test_multinomial_logistic_regression_string_inputs(): # Test with string labels for LogisticRegression(CV) n_samples, n_features, n_classes = 50, 5, 3 X_ref, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, n_informative=3, random_state=0) y_str = LabelEncoder().fit(['bar', 'baz', 'foo']).inverse_transform(y) # For numerical labels, let y values be taken from set (-1, 0, 1) y = np.array(y) - 1 # Test for string labels lr = LogisticRegression(solver='lbfgs', multi_class='multinomial') lr_cv = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial') lr_str = LogisticRegression(solver='lbfgs', multi_class='multinomial') lr_cv_str = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial') lr.fit(X_ref, y) lr_cv.fit(X_ref, y) lr_str.fit(X_ref, y_str) lr_cv_str.fit(X_ref, y_str) assert_array_almost_equal(lr.coef_, lr_str.coef_) assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo']) assert_array_almost_equal(lr_cv.coef_, lr_cv_str.coef_) assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo']) assert_equal(sorted(lr_cv_str.classes_), ['bar', 'baz', 'foo']) # The predictions should be in original labels assert_equal(sorted(np.unique(lr_str.predict(X_ref))), ['bar', 'baz', 'foo']) assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz', 'foo']) # Make sure class weights can be given with string labels lr_cv_str = LogisticRegression( solver='lbfgs', class_weight={'bar': 1, 'baz': 2, 'foo': 0}, multi_class='multinomial').fit(X_ref, y_str) assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz'])
# Random Forest rf_clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0) rf_clf.fit(X_train, y_train) rf_predictions = rf_clf.predict(X_test) score = accuracy_score(y_test, rf_predictions) f_score = f1_score(y_test, rf_predictions, average='micro') print("The accuracy score (Random Forest) is:", score) print("The F score-Micro (Random Forest) is:", f_score) from sklearn import metrics # Logistic Regression lr_classifier = LogisticRegressionCV() lr_classifier.fit(X_train, y_train) lt_predictions = lr_classifier.predict(X_test) score = accuracy_score(y_test, lt_predictions) f_score = f1_score(y_test, lt_predictions, average='micro') print("The accuracy score (Logistic Regression) is:", score) print("The F score-Micro (Logistic Regression) is:", f_score) cnf_matrix = metrics.confusion_matrix(y_test, rf_predictions) # import required modules import seaborn as sns class_names = [0, 1] # name of classes fig, ax = plt.subplots() tick_marks = np.arange(len(class_names)) plt.xticks(tick_marks, class_names) plt.yticks(tick_marks, class_names)