Beispiel #1
0
def test_logreg_cv_penalty():
    # Test that the correct penalty is passed to the final fit.
    X, y = make_classification(n_samples=50, n_features=20, random_state=0)
    lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='liblinear')
    lr_cv.fit(X, y)
    lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear')
    lr.fit(X, y)
    assert_equal(np.count_nonzero(lr_cv.coef_), np.count_nonzero(lr.coef_))
def test_logreg_cv_penalty():
    # Test that the correct penalty is passed to the final fit.
    X, y = make_classification(n_samples=50, n_features=20, random_state=0)
    lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='liblinear')
    lr_cv.fit(X, y)
    lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear')
    lr.fit(X, y)
    assert_equal(np.count_nonzero(lr_cv.coef_), np.count_nonzero(lr.coef_))
Beispiel #3
0
def test_logistic_regression_multinomial():
    # Tests for the multinomial option in logistic regression

    # Some basic attributes of Logistic Regression
    n_samples, n_features, n_classes = 50, 20, 3
    X, y = make_classification(n_samples=n_samples,
                               n_features=n_features,
                               n_informative=10,
                               n_classes=n_classes,
                               random_state=0)

    # 'lbfgs' is used as a referenced
    solver = 'lbfgs'
    ref_i = LogisticRegression(solver=solver, multi_class='multinomial')
    ref_w = LogisticRegression(solver=solver,
                               multi_class='multinomial',
                               fit_intercept=False)
    ref_i.fit(X, y)
    ref_w.fit(X, y)
    assert_array_equal(ref_i.coef_.shape, (n_classes, n_features))
    assert_array_equal(ref_w.coef_.shape, (n_classes, n_features))
    for solver in ['sag', 'newton-cg']:
        clf_i = LogisticRegression(solver=solver,
                                   multi_class='multinomial',
                                   random_state=42,
                                   max_iter=1000,
                                   tol=1e-6)
        clf_w = LogisticRegression(solver=solver,
                                   multi_class='multinomial',
                                   random_state=42,
                                   max_iter=1000,
                                   tol=1e-6,
                                   fit_intercept=False)
        clf_i.fit(X, y)
        clf_w.fit(X, y)
        assert_array_equal(clf_i.coef_.shape, (n_classes, n_features))
        assert_array_equal(clf_w.coef_.shape, (n_classes, n_features))

        # Compare solutions between lbfgs and the other solvers
        assert_almost_equal(ref_i.coef_, clf_i.coef_, decimal=3)
        assert_almost_equal(ref_w.coef_, clf_w.coef_, decimal=3)
        assert_almost_equal(ref_i.intercept_, clf_i.intercept_, decimal=3)

    # Test that the path give almost the same results. However since in this
    # case we take the average of the coefs after fitting across all the
    # folds, it need not be exactly the same.
    for solver in ['lbfgs', 'newton-cg', 'sag']:
        clf_path = LogisticRegressionCV(solver=solver,
                                        max_iter=2000,
                                        tol=1e-6,
                                        multi_class='multinomial',
                                        Cs=[1.])
        clf_path.fit(X, y)
        assert_array_almost_equal(clf_path.coef_, ref_i.coef_, decimal=3)
        assert_almost_equal(clf_path.intercept_, ref_i.intercept_, decimal=3)
def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    # Test the liblinear fails when class_weight of type dict is
    # provided, when it is multiclass. However it can handle
    # binary problems.
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raises(ValueError, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=auto
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='auto')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='auto')
    clf_lib.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
def test_ova_iris():
    """Test that our OvA implementation is correct using the iris dataset."""
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # Use pre-defined fold as folds generated for different y
    cv = StratifiedKFold(target, 3)
    clf = LogisticRegressionCV(cv=cv)
    clf.fit(train, target)

    clf1 = LogisticRegressionCV(cv=cv)
    target[target == 0] = 1
    clf1.fit(train, target)

    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    assert_equal(len(clf.classes_), 3)

    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10, ))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))
Beispiel #6
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
Beispiel #7
0
def func1():
    # n_features 设置多少个特征,多维的特征
    # n_informative 有几个属性是有特别关联的
    # n_targets 有多少个输出
    #X,y = datasets.make_regression(n_samples=100, n_features=100, n_informative=10, n_targets=1, noise=0.0, bias=0.0, random_state=None)
    X, y = datasets.make_regression(n_samples=10, n_features=2)

    print('line X ', X)
    print('line y ', y)

    #plt.scatter(X[:,0],y)
    #plt.show()

    lm = LinearRegression()
    lm.fit(X, y)
    X_test = [[0.69803203, 0.62000084]]
    y_predict = lm.predict(X_test)
    print(y_predict)

    X, y = datasets.make_moons(10, noise=0.2)
    print('logis ', X)
    print('logis ', y)
    #X_train,X_test,y_train,y_test = train_test_split(X,y)
    logis_regre = LogisticRegression()
    #y = [[-140.66643209],[114.7982953],[103.11834249],[-177.27466722],[24.48139711],[-30.44916242],[38.96288527],[-57.62121771],[82.14111136],[90.54966151]]
    logis_regre.fit(X, y)
    print(logis_regre.predict(X_test))

    logis_regre = LogisticRegressionCV()
    logis_regre.fit(X, y)
    print(logis_regre.predict(X_test))
def test_logistic_regression_multinomial():
    # Tests for the multinomial option in logistic regression

    # Some basic attributes of Logistic Regression
    n_samples, n_features, n_classes = 50, 20, 3
    X, y = make_classification(n_samples=n_samples,
                               n_features=n_features,
                               n_informative=10,
                               n_classes=n_classes,
                               random_state=0)
    clf_int = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    clf_int.fit(X, y)
    assert_array_equal(clf_int.coef_.shape, (n_classes, n_features))

    clf_wint = LogisticRegression(solver='lbfgs',
                                  multi_class='multinomial',
                                  fit_intercept=False)
    clf_wint.fit(X, y)
    assert_array_equal(clf_wint.coef_.shape, (n_classes, n_features))

    # Similar tests for newton-cg solver option
    clf_ncg_int = LogisticRegression(solver='newton-cg',
                                     multi_class='multinomial')
    clf_ncg_int.fit(X, y)
    assert_array_equal(clf_ncg_int.coef_.shape, (n_classes, n_features))

    clf_ncg_wint = LogisticRegression(solver='newton-cg',
                                      fit_intercept=False,
                                      multi_class='multinomial')
    clf_ncg_wint.fit(X, y)
    assert_array_equal(clf_ncg_wint.coef_.shape, (n_classes, n_features))

    # Compare solutions between lbfgs and newton-cg
    assert_almost_equal(clf_int.coef_, clf_ncg_int.coef_, decimal=3)
    assert_almost_equal(clf_wint.coef_, clf_ncg_wint.coef_, decimal=3)
    assert_almost_equal(clf_int.intercept_, clf_ncg_int.intercept_, decimal=3)

    # Test that the path give almost the same results. However since in this
    # case we take the average of the coefs after fitting across all the
    # folds, it need not be exactly the same.
    for solver in ['lbfgs', 'newton-cg']:
        clf_path = LogisticRegressionCV(solver=solver,
                                        multi_class='multinomial',
                                        Cs=[1.])
        clf_path.fit(X, y)
        assert_array_almost_equal(clf_path.coef_, clf_int.coef_, decimal=3)
        assert_almost_equal(clf_path.intercept_, clf_int.intercept_, decimal=3)
Beispiel #9
0
def test_logistic_cv_score_does_not_warn_by_default():
    lr = LogisticRegressionCV(cv=2)
    lr.fit(X, Y1)

    with pytest.warns(None) as record:
        lr.score(X, lr.predict(X))
    assert len(record) == 0
Beispiel #10
0
def test_logistic_regression_multinomial():
    # Tests for the multinomial option in logistic regression

    # Some basic attributes of Logistic Regression
    n_samples, n_features, n_classes = 50, 20, 3
    X, y = make_classification(n_samples=n_samples,
                               n_features=n_features,
                               n_informative=10,
                               n_classes=n_classes, random_state=0)

    # 'lbfgs' is used as a referenced
    solver = 'lbfgs'
    ref_i = LogisticRegression(solver=solver, multi_class='multinomial')
    ref_w = LogisticRegression(solver=solver, multi_class='multinomial',
                               fit_intercept=False)
    ref_i.fit(X, y)
    ref_w.fit(X, y)
    assert_array_equal(ref_i.coef_.shape, (n_classes, n_features))
    assert_array_equal(ref_w.coef_.shape, (n_classes, n_features))
    for solver in ['sag', 'saga', 'newton-cg']:
        clf_i = LogisticRegression(solver=solver, multi_class='multinomial',
                                   random_state=42, max_iter=2000, tol=1e-7,
                                   )
        clf_w = LogisticRegression(solver=solver, multi_class='multinomial',
                                   random_state=42, max_iter=2000, tol=1e-7,
                                   fit_intercept=False)
        clf_i.fit(X, y)
        clf_w.fit(X, y)
        assert_array_equal(clf_i.coef_.shape, (n_classes, n_features))
        assert_array_equal(clf_w.coef_.shape, (n_classes, n_features))

        # Compare solutions between lbfgs and the other solvers
        assert_almost_equal(ref_i.coef_, clf_i.coef_, decimal=3)
        assert_almost_equal(ref_w.coef_, clf_w.coef_, decimal=3)
        assert_almost_equal(ref_i.intercept_, clf_i.intercept_, decimal=3)

    # Test that the path give almost the same results. However since in this
    # case we take the average of the coefs after fitting across all the
    # folds, it need not be exactly the same.
    for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']:
        clf_path = LogisticRegressionCV(solver=solver, max_iter=2000, tol=1e-6,
                                        multi_class='multinomial', Cs=[1.])
        clf_path.fit(X, y)
        assert_array_almost_equal(clf_path.coef_, ref_i.coef_, decimal=3)
        assert_almost_equal(clf_path.intercept_, ref_i.intercept_, decimal=3)
def test_n_iter():
    # Test that self.n_iter_ has the correct format.
    X, y = iris.data, iris.target
    y_bin = y.copy()
    y_bin[y_bin == 2] = 0

    n_Cs = 4
    n_cv_fold = 2

    for solver in ['newton-cg', 'liblinear', 'sag', 'lbfgs']:
        # OvR case
        n_classes = 1 if solver == 'liblinear' else np.unique(y).shape[0]
        clf = LogisticRegression(tol=1e-2, multi_class='ovr',
                                 solver=solver, C=1.,
                                 random_state=42, max_iter=100)
        clf.fit(X, y)
        assert_equal(clf.n_iter_.shape, (n_classes,))

        n_classes = np.unique(y).shape[0]
        clf = LogisticRegressionCV(tol=1e-2, multi_class='ovr',
                                   solver=solver, Cs=n_Cs, cv=n_cv_fold,
                                   random_state=42, max_iter=100)
        clf.fit(X, y)
        assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
        clf.fit(X, y_bin)
        assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))

        # multinomial case
        n_classes = 1
        if solver in ('liblinear', 'sag'):
            break

        clf = LogisticRegression(tol=1e-2, multi_class='multinomial',
                                 solver=solver, C=1.,
                                 random_state=42, max_iter=100)
        clf.fit(X, y)
        assert_equal(clf.n_iter_.shape, (n_classes,))

        clf = LogisticRegressionCV(tol=1e-2, multi_class='multinomial',
                                   solver=solver, Cs=n_Cs, cv=n_cv_fold,
                                   random_state=42, max_iter=100)
        clf.fit(X, y)
        assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
        clf.fit(X, y_bin)
        assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))
def test_logistic_regression_multinomial():
    # Tests for the multinomial option in logistic regression

    # Some basic attributes of Logistic Regression
    n_samples, n_features, n_classes = 50, 20, 3
    X, y = make_classification(n_samples=n_samples,
                               n_features=n_features,
                               n_informative=10,
                               n_classes=n_classes, random_state=0)
    clf_int = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    clf_int.fit(X, y)
    assert_array_equal(clf_int.coef_.shape, (n_classes, n_features))

    clf_wint = LogisticRegression(solver='lbfgs', multi_class='multinomial',
                                  fit_intercept=False)
    clf_wint.fit(X, y)
    assert_array_equal(clf_wint.coef_.shape, (n_classes, n_features))

    # Similar tests for newton-cg solver option
    clf_ncg_int = LogisticRegression(solver='newton-cg',
                                     multi_class='multinomial')
    clf_ncg_int.fit(X, y)
    assert_array_equal(clf_ncg_int.coef_.shape, (n_classes, n_features))

    clf_ncg_wint = LogisticRegression(solver='newton-cg', fit_intercept=False,
                                      multi_class='multinomial')
    clf_ncg_wint.fit(X, y)
    assert_array_equal(clf_ncg_wint.coef_.shape, (n_classes, n_features))

    # Compare solutions between lbfgs and newton-cg
    assert_almost_equal(clf_int.coef_, clf_ncg_int.coef_, decimal=3)
    assert_almost_equal(clf_wint.coef_, clf_ncg_wint.coef_, decimal=3)
    assert_almost_equal(clf_int.intercept_, clf_ncg_int.intercept_, decimal=3)

    # Test that the path give almost the same results. However since in this
    # case we take the average of the coefs after fitting across all the
    # folds, it need not be exactly the same.
    for solver in ['lbfgs', 'newton-cg']:
        clf_path = LogisticRegressionCV(solver=solver,
                                        multi_class='multinomial', Cs=[1.])
        clf_path.fit(X, y)
        assert_array_almost_equal(clf_path.coef_, clf_int.coef_, decimal=3)
        assert_almost_equal(clf_path.intercept_, clf_int.intercept_, decimal=3)
def test_logistic_cv_sparse():
    X, y = make_classification(n_samples=50, n_features=5, random_state=0)
    X[X < 1.0] = 0.0
    csr = sp.csr_matrix(X)

    clf = LogisticRegressionCV(fit_intercept=True)
    clf.fit(X, y)
    clfs = LogisticRegressionCV(fit_intercept=True)
    clfs.fit(csr, y)
    assert_array_almost_equal(clfs.coef_, clf.coef_)
    assert_array_almost_equal(clfs.intercept_, clf.intercept_)
    assert_equal(clfs.C_, clf.C_)
Beispiel #14
0
def test_ova_iris():
    """Test that our OvA implementation is correct using the iris dataset."""
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # Use pre-defined fold as folds generated for different y
    cv = StratifiedKFold(target, 3)
    clf = LogisticRegressionCV(cv=cv)
    clf.fit(train, target)

    clf1 = LogisticRegressionCV(cv=cv)
    target[target == 0] = 1
    clf1.fit(train, target)

    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    assert_equal(len(clf.classes_), 3)

    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10, ))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))
Beispiel #15
0
def test_logistic_cv_mock_scorer():

    class MockScorer(object):
        def __init__(self):
            self.calls = 0
            self.scores = [0.1, 0.4, 0.8, 0.5]

        def __call__(self, model, X, y, sample_weight=None):
            score = self.scores[self.calls % len(self.scores)]
            self.calls += 1
            return score

    mock_scorer = MockScorer()
    Cs = [1, 2, 3, 4]
    cv = 2

    lr = LogisticRegressionCV(Cs=Cs, scoring=mock_scorer, cv=cv)
    lr.fit(X, Y1)

    # Cs[2] has the highest score (0.8) from MockScorer
    assert lr.C_[0] == Cs[2]

    # scorer called 8 times (cv*len(Cs))
    assert mock_scorer.calls == cv * len(Cs)

    # reset mock_scorer
    mock_scorer.calls = 0
    with pytest.warns(ChangedBehaviorWarning):
        custom_score = lr.score(X, lr.predict(X))

    assert custom_score == mock_scorer.scores[0]
    assert mock_scorer.calls == 1
Beispiel #16
0
def estimate_weights_logistic_regresssion(X_s, X_t):
    """ estimate a logistic regressor to predict the probability of a sample
    to be generated by one class or the other.
    If one class is over or under represented weights will be adapted.

    Parameters:
        X_s: samples from the source domain
        X_t: samples from the target domain

    Returns:
        weigths for X_s """
    X_all, all_labels = prepare_data_for_weights_estimation(X_s, X_t)

    kf = KFold(X_all.shape[0], 10, shuffle=True)
    best_lr = LogisticRegressionCV(class_weight="auto",
                                   Cs=np.logspace(4, 8, 10),
                                   fit_intercept=False)
    best_lr.fit(X_all, all_labels)

    weights = X_s.shape[0] / X_t.shape[0] * np.exp(
        np.dot(X_s, best_lr.coef_.T) + best_lr.intercept_)
    return weights
Beispiel #17
0
def estimate_weights_logistic_regresssion(X_s, X_t):
    """ estimate a logistic regressor to predict the probability of a sample
    to be generated by one class or the other.
    If one class is over or under represented weights will be adapted.

    Parameters:
        X_s: samples from the source domain
        X_t: samples from the target domain

    Returns:
        weigths for X_s """
    X_all, all_labels = prepare_data_for_weights_estimation(X_s, X_t)

    kf = KFold(X_all.shape[0], 10, shuffle=True)
    best_lr = LogisticRegressionCV(class_weight="auto",
                                   Cs=np.logspace(4, 8, 10),
                                   fit_intercept=False)
    best_lr.fit(X_all, all_labels)

    weights = X_s.shape[0] / X_t.shape[0] * np.exp(np.dot(X_s, best_lr.coef_.T)
                                                   + best_lr.intercept_)
    return weights
Beispiel #18
0
class LogisticRegressionCVImpl():
    def __init__(self,
                 Cs=10,
                 fit_intercept=True,
                 cv=3,
                 dual=False,
                 penalty='l2',
                 scoring=None,
                 solver='lbfgs',
                 tol=0.0001,
                 max_iter=100,
                 class_weight='balanced',
                 n_jobs=None,
                 verbose=0,
                 refit=True,
                 intercept_scaling=1.0,
                 multi_class='ovr',
                 random_state=None):
        self._hyperparams = {
            'Cs': Cs,
            'fit_intercept': fit_intercept,
            'cv': cv,
            'dual': dual,
            'penalty': penalty,
            'scoring': scoring,
            'solver': solver,
            'tol': tol,
            'max_iter': max_iter,
            'class_weight': class_weight,
            'n_jobs': n_jobs,
            'verbose': verbose,
            'refit': refit,
            'intercept_scaling': intercept_scaling,
            'multi_class': multi_class,
            'random_state': random_state
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)
Beispiel #19
0
def test_logistic_cv():
    # test for LogisticRegressionCV object
    n_samples, n_features = 50, 5
    rng = np.random.RandomState(0)
    X_ref = rng.randn(n_samples, n_features)
    y = np.sign(X_ref.dot(5 * rng.randn(n_features)))
    X_ref -= X_ref.mean()
    X_ref /= X_ref.std()
    lr_cv = LogisticRegressionCV(Cs=[1.0], fit_intercept=False, solver="liblinear")
    lr_cv.fit(X_ref, y)
    lr = LogisticRegression(C=1.0, fit_intercept=False)
    lr.fit(X_ref, y)
    assert_array_almost_equal(lr.coef_, lr_cv.coef_)

    assert_array_equal(lr_cv.coef_.shape, (1, n_features))
    assert_array_equal(lr_cv.classes_, [-1, 1])
    assert_equal(len(lr_cv.classes_), 2)

    coefs_paths = np.asarray(list(lr_cv.coefs_paths_.values()))
    assert_array_equal(coefs_paths.shape, (1, 3, 1, n_features))
    assert_array_equal(lr_cv.Cs_.shape, (1,))
    scores = np.asarray(list(lr_cv.scores_.values()))
    assert_array_equal(scores.shape, (1, 3, 1))
Beispiel #20
0
def test_logistic_cv():
    # test for LogisticRegressionCV object
    n_samples, n_features = 50, 5
    rng = np.random.RandomState(0)
    X_ref = rng.randn(n_samples, n_features)
    y = np.sign(X_ref.dot(5 * rng.randn(n_features)))
    X_ref -= X_ref.mean()
    X_ref /= X_ref.std()
    lr_cv = LogisticRegressionCV(Cs=[1.], fit_intercept=False,
                                 solver='liblinear')
    lr_cv.fit(X_ref, y)
    lr = LogisticRegression(C=1., fit_intercept=False)
    lr.fit(X_ref, y)
    assert_array_almost_equal(lr.coef_, lr_cv.coef_)

    assert_array_equal(lr_cv.coef_.shape, (1, n_features))
    assert_array_equal(lr_cv.classes_, [-1, 1])
    assert_equal(len(lr_cv.classes_), 2)

    coefs_paths = np.asarray(list(lr_cv.coefs_paths_.values()))
    assert_array_equal(coefs_paths.shape, (1, 3, 1, n_features))
    assert_array_equal(lr_cv.Cs_.shape, (1,))
    scores = np.asarray(list(lr_cv.scores_.values()))
    assert_array_equal(scores.shape, (1, 3, 1))
Beispiel #21
0
def test_logistic_cv_sparse():
    X, y = make_classification(n_samples=50, n_features=5, random_state=0)
    X[X < 1.0] = 0.0
    csr = sp.csr_matrix(X)

    clf = LogisticRegressionCV(fit_intercept=True)
    clf.fit(X, y)
    clfs = LogisticRegressionCV(fit_intercept=True)
    clfs.fit(csr, y)
    assert_array_almost_equal(clfs.coef_, clf.coef_)
    assert_array_almost_equal(clfs.intercept_, clf.intercept_)
    assert_equal(clfs.C_, clf.C_)
Beispiel #22
0
def test_error():
    # Test for appropriate exception on errors
    msg = "Penalty term must be positive"
    assert_raise_message(ValueError, msg,
                         LogisticRegression(C=-1).fit, X, Y1)
    assert_raise_message(ValueError, msg,
                         LogisticRegression(C="test").fit, X, Y1)

    msg = "is not a valid scoring value"
    assert_raise_message(ValueError, msg,
                         LogisticRegressionCV(scoring='bad-scorer', cv=2).fit,
                         X, Y1)

    for LR in [LogisticRegression, LogisticRegressionCV]:
        msg = "Tolerance for stopping criteria must be positive"
        assert_raise_message(ValueError, msg, LR(tol=-1).fit, X, Y1)
        assert_raise_message(ValueError, msg, LR(tol="test").fit, X, Y1)

        msg = "Maximum number of iteration must be positive"
        assert_raise_message(ValueError, msg, LR(max_iter=-1).fit, X, Y1)
        assert_raise_message(ValueError, msg, LR(max_iter="test").fit, X, Y1)
Beispiel #23
0
 def __init__(self,
              Cs=10,
              fit_intercept=True,
              cv=3,
              dual=False,
              penalty='l2',
              scoring=None,
              solver='lbfgs',
              tol=0.0001,
              max_iter=100,
              class_weight='balanced',
              n_jobs=None,
              verbose=0,
              refit=True,
              intercept_scaling=1.0,
              multi_class='ovr',
              random_state=None):
     self._hyperparams = {
         'Cs': Cs,
         'fit_intercept': fit_intercept,
         'cv': cv,
         'dual': dual,
         'penalty': penalty,
         'scoring': scoring,
         'solver': solver,
         'tol': tol,
         'max_iter': max_iter,
         'class_weight': class_weight,
         'n_jobs': n_jobs,
         'verbose': verbose,
         'refit': refit,
         'intercept_scaling': intercept_scaling,
         'multi_class': multi_class,
         'random_state': random_state
     }
     self._wrapped_model = SKLModel(**self._hyperparams)
Beispiel #24
0
def test_logistic_regressioncv_class_weights():
    for weight in [{0: 0.1, 1: 0.2}, {0: 0.1, 1: 0.2, 2: 0.5}]:
        n_classes = len(weight)
        for class_weight in (weight, 'balanced'):
            X, y = make_classification(n_samples=30, n_features=3,
                                       n_repeated=0,
                                       n_informative=3, n_redundant=0,
                                       n_classes=n_classes, random_state=0)

            clf_lbf = LogisticRegressionCV(solver='lbfgs', Cs=1,
                                           fit_intercept=False,
                                           class_weight=class_weight)
            clf_ncg = LogisticRegressionCV(solver='newton-cg', Cs=1,
                                           fit_intercept=False,
                                           class_weight=class_weight)
            clf_lib = LogisticRegressionCV(solver='liblinear', Cs=1,
                                           fit_intercept=False,
                                           class_weight=class_weight)
            clf_sag = LogisticRegressionCV(solver='sag', Cs=1,
                                           fit_intercept=False,
                                           class_weight=class_weight,
                                           tol=1e-5, max_iter=10000,
                                           random_state=0)
            clf_saga = LogisticRegressionCV(solver='saga', Cs=1,
                                            fit_intercept=False,
                                            class_weight=class_weight,
                                            tol=1e-5, max_iter=10000,
                                            random_state=0)
            clf_lbf.fit(X, y)
            clf_ncg.fit(X, y)
            clf_lib.fit(X, y)
            clf_sag.fit(X, y)
            clf_saga.fit(X, y)
            assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
            assert_array_almost_equal(clf_ncg.coef_, clf_lbf.coef_, decimal=4)
            assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
            assert_array_almost_equal(clf_saga.coef_, clf_lbf.coef_, decimal=4)
Beispiel #25
0
def test_ovr_multinomial_iris():
    # Test that OvR and multinomial are correct using the iris dataset.
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # The cv indices from stratified kfold (where stratification is done based
    # on the fine-grained iris classes, i.e, before the classes 0 and 1 are
    # conflated) is used for both clf and clf1
    n_cv = 2
    cv = StratifiedKFold(n_cv)
    precomputed_folds = list(cv.split(train, target))

    # Train clf on the original dataset where classes 0 and 1 are separated
    clf = LogisticRegressionCV(cv=precomputed_folds)
    clf.fit(train, target)

    # Conflate classes 0 and 1 and train clf1 on this modified dataset
    clf1 = LogisticRegressionCV(cv=precomputed_folds)
    target_copy = target.copy()
    target_copy[target_copy == 0] = 1
    clf1.fit(train, target_copy)

    # Ensure that what OvR learns for class2 is same regardless of whether
    # classes 0 and 1 are separated or not
    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, n_cv, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10,))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, n_cv, 10))

    # Test that for the iris data multinomial gives a better accuracy than OvR
    for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']:
        max_iter = 2000 if solver in ['sag', 'saga'] else 15
        clf_multi = LogisticRegressionCV(
            solver=solver, multi_class='multinomial', max_iter=max_iter,
            random_state=42, tol=1e-5 if solver in ['sag', 'saga'] else 1e-2,
            cv=2)
        clf_multi.fit(train, target)
        multi_score = clf_multi.score(train, target)
        ovr_score = clf.score(train, target)
        assert_greater(multi_score, ovr_score)

        # Test attributes of LogisticRegressionCV
        assert_equal(clf.coef_.shape, clf_multi.coef_.shape)
        assert_array_equal(clf_multi.classes_, [0, 1, 2])
        coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values()))
        assert_array_almost_equal(coefs_paths.shape, (3, n_cv, 10,
                                                      n_features + 1))
        assert_equal(clf_multi.Cs_.shape, (10,))
        scores = np.asarray(list(clf_multi.scores_.values()))
        assert_equal(scores.shape, (3, n_cv, 10))
Beispiel #26
0
def test_multinomial_logistic_regression_string_inputs():
    # Test with string labels for LogisticRegression(CV)
    n_samples, n_features, n_classes = 50, 5, 3
    X_ref, y = make_classification(n_samples=n_samples, n_features=n_features,
                                   n_classes=n_classes, n_informative=3,
                                   random_state=0)
    y_str = LabelEncoder().fit(['bar', 'baz', 'foo']).inverse_transform(y)
    # For numerical labels, let y values be taken from set (-1, 0, 1)
    y = np.array(y) - 1
    # Test for string labels
    lr = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    lr_cv = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')
    lr_str = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    lr_cv_str = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')

    lr.fit(X_ref, y)
    lr_cv.fit(X_ref, y)
    lr_str.fit(X_ref, y_str)
    lr_cv_str.fit(X_ref, y_str)

    assert_array_almost_equal(lr.coef_, lr_str.coef_)
    assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo'])
    assert_array_almost_equal(lr_cv.coef_, lr_cv_str.coef_)
    assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo'])
    assert_equal(sorted(lr_cv_str.classes_), ['bar', 'baz', 'foo'])

    # The predictions should be in original labels
    assert_equal(sorted(np.unique(lr_str.predict(X_ref))),
                 ['bar', 'baz', 'foo'])
    assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))),
                 ['bar', 'baz', 'foo'])

    # Make sure class weights can be given with string labels
    lr_cv_str = LogisticRegression(
        solver='lbfgs', class_weight={'bar': 1, 'baz': 2, 'foo': 0},
        multi_class='multinomial').fit(X_ref, y_str)
    assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz'])
def test_ovr_multinomial_iris():
    """Test that OvR and multinomial are correct using the iris dataset."""
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # Use pre-defined fold as folds generated for different y
    cv = StratifiedKFold(target, 3)
    clf = LogisticRegressionCV(cv=cv)
    clf.fit(train, target)

    clf1 = LogisticRegressionCV(cv=cv)
    target_copy = target.copy()
    target_copy[target_copy == 0] = 1
    clf1.fit(train, target_copy)

    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10, ))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))

    # Test that for the iris data multinomial gives a better accuracy than OvR
    clf_multi = LogisticRegressionCV(
        solver='lbfgs', multi_class='multinomial', max_iter=5
        )
    clf_multi.fit(train, target)
    multi_score = clf_multi.score(train, target)
    ovr_score = clf.score(train, target)
    assert_greater(multi_score, ovr_score)

    # Test attributes of LogisticRegressionCV
    assert_equal(clf.coef_.shape, clf_multi.coef_.shape)
    assert_array_equal(clf_multi.classes_, [0, 1, 2])
    coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf_multi.Cs_.shape, (10, ))
    scores = np.asarray(list(clf_multi.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))
Beispiel #28
0
def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20,
                               n_features=20,
                               n_informative=10,
                               n_classes=3,
                               random_state=0)

    # Test the liblinear fails when class_weight of type dict is
    # provided, when it is multiclass. However it can handle
    # binary problems.
    clf_lib = LogisticRegressionCV(class_weight={
        0: 0.1,
        1: 0.2
    },
                                   solver='liblinear')
    assert_raises(ValueError, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=auto
    X, y = make_classification(n_samples=20,
                               n_features=20,
                               n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs',
                                   fit_intercept=False,
                                   class_weight='auto')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear',
                                   fit_intercept=False,
                                   class_weight='auto')
    clf_lib.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
Beispiel #29
0
def test_ovr_multinomial_iris():
    """Test that OvR and multinomial are correct using the iris dataset."""
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # Use pre-defined fold as folds generated for different y
    cv = StratifiedKFold(target, 3)
    clf = LogisticRegressionCV(cv=cv)
    clf.fit(train, target)

    clf1 = LogisticRegressionCV(cv=cv)
    target_copy = target.copy()
    target_copy[target_copy == 0] = 1
    clf1.fit(train, target_copy)

    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10, ))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))

    # Test that for the iris data multinomial gives a better accuracy than OvR
    clf_multi = LogisticRegressionCV(solver='lbfgs',
                                     multi_class='multinomial',
                                     max_iter=5)
    clf_multi.fit(train, target)
    multi_score = clf_multi.score(train, target)
    ovr_score = clf.score(train, target)
    assert_greater(multi_score, ovr_score)

    # Test attributes of LogisticRegressionCV
    assert_equal(clf.coef_.shape, clf_multi.coef_.shape)
    assert_array_equal(clf_multi.classes_, [0, 1, 2])
    coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf_multi.Cs_.shape, (10, ))
    scores = np.asarray(list(clf_multi.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))
Beispiel #30
0
def test_saga_sparse():
    # Test LogRegCV with solver='liblinear' works for sparse matrices

    X, y = make_classification(n_samples=10, n_features=5, random_state=0)
    clf = LogisticRegressionCV(solver='saga')
    clf.fit(sparse.csr_matrix(X), y)
def test_ovr_multinomial_iris():
    # Test that OvR and multinomial are correct using the iris dataset.
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # The cv indices from stratified kfold (where stratification is done based
    # on the fine-grained iris classes, i.e, before the classes 0 and 1 are
    # conflated) is used for both clf and clf1
    cv = StratifiedKFold(3)
    precomputed_folds = list(cv.split(train, target))

    # Train clf on the original dataset where classes 0 and 1 are separated
    clf = LogisticRegressionCV(cv=precomputed_folds)
    clf.fit(train, target)

    # Conflate classes 0 and 1 and train clf1 on this modifed dataset
    clf1 = LogisticRegressionCV(cv=precomputed_folds)
    target_copy = target.copy()
    target_copy[target_copy == 0] = 1
    clf1.fit(train, target_copy)

    # Ensure that what OvR learns for class2 is same regardless of whether
    # classes 0 and 1 are separated or not
    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10, ))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))

    # Test that for the iris data multinomial gives a better accuracy than OvR
    for solver in ['lbfgs', 'newton-cg']:
        clf_multi = LogisticRegressionCV(solver=solver,
                                         multi_class='multinomial',
                                         max_iter=15)
        clf_multi.fit(train, target)
        multi_score = clf_multi.score(train, target)
        ovr_score = clf.score(train, target)
        assert_greater(multi_score, ovr_score)

        # Test attributes of LogisticRegressionCV
        assert_equal(clf.coef_.shape, clf_multi.coef_.shape)
        assert_array_equal(clf_multi.classes_, [0, 1, 2])
        coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values()))
        assert_array_almost_equal(coefs_paths.shape,
                                  (3, 3, 10, n_features + 1))
        assert_equal(clf_multi.Cs_.shape, (10, ))
        scores = np.asarray(list(clf_multi.scores_.values()))
        assert_equal(scores.shape, (3, 3, 10))
Beispiel #32
0
print("The accuracy score (MultinomialNB) is:", score)
print("The F score-Micro (MultinomialNB) is:", f_score)

# Random Forest
rf_clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
rf_clf.fit(X_train, y_train)
rf_predictions = rf_clf.predict(X_test)
score = accuracy_score(y_test, rf_predictions)
f_score = f1_score(y_test, rf_predictions, average='micro')
print("The accuracy score (Random Forest) is:", score)
print("The F score-Micro (Random Forest) is:", f_score)

from sklearn import metrics

# Logistic Regression
lr_classifier = LogisticRegressionCV()
lr_classifier.fit(X_train, y_train)
lt_predictions = lr_classifier.predict(X_test)
score = accuracy_score(y_test, lt_predictions)
f_score = f1_score(y_test, lt_predictions, average='micro')
print("The accuracy score (Logistic Regression) is:", score)
print("The F score-Micro (Logistic Regression) is:", f_score)

cnf_matrix = metrics.confusion_matrix(y_test, rf_predictions)

# import required modules
import seaborn as sns

class_names = [0, 1]  # name  of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
Beispiel #33
0
    AdaBoostClassifier(),
    BaggingClassifier(),
    BernoulliNB(),
    CalibratedClassifierCV(),
    DecisionTreeClassifier(),
    ExtraTreeClassifier(),
    ExtraTreesClassifier(),
    GaussianNB(),
    GaussianProcessClassifier(),
    GradientBoostingClassifier(),
    KNeighborsClassifier(),
    LabelPropagation(),
    LabelSpreading(),
    LinearDiscriminantAnalysis(),
    LogisticRegression(),
    LogisticRegressionCV(),
    MLPClassifier(),
    NuSVC(probability=True),
    QuadraticDiscriminantAnalysis(),
    RandomForestClassifier(),
    SGDClassifier(loss='log'),
    SVC(probability=True),
    XGBClassifier()
]

names = [
    'AdaBoostClassifier', 'BaggingClassifier', 'BernoulliNB',
    'CalibratedClassifierCV', 'DecisionTreeClassifier', 'ExtraTreeClassifier',
    'ExtraTreesClassifier', 'GaussianNB', 'GaussianProcessClassifier',
    'GradientBoostingClassifier', 'KNeighborsClassifier', 'LabelPropagation',
    'LabelSpreading', 'LinearDiscriminantAnalysis', 'LogisticRegression',
def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20,
                               n_features=20,
                               n_informative=10,
                               n_classes=3,
                               random_state=0)

    msg = ("In LogisticRegressionCV the liblinear solver cannot handle "
           "multiclass with class_weight of type dict. Use the lbfgs, "
           "newton-cg or sag solvers or set class_weight='balanced'")
    clf_lib = LogisticRegressionCV(class_weight={
        0: 0.1,
        1: 0.2
    },
                                   solver='liblinear')
    assert_raise_message(ValueError, msg, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=balanced
    X, y = make_classification(n_samples=20,
                               n_features=20,
                               n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs',
                                   fit_intercept=False,
                                   class_weight='balanced')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear',
                                   fit_intercept=False,
                                   class_weight='balanced')
    clf_lib.fit(X, y)
    clf_sag = LogisticRegressionCV(solver='sag',
                                   fit_intercept=False,
                                   class_weight='balanced',
                                   max_iter=2000)
    clf_sag.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_lib.coef_, clf_sag.coef_, decimal=4)
def test_liblinear_logregcv_sparse():
    """Test LogRegCV with solver='liblinear' works for sparse matrices"""

    X, y = make_classification(n_samples=10, n_features=5)
    clf = LogisticRegressionCV(solver='liblinear')
    clf.fit(sparse.csr_matrix(X), y)
def test_liblinear_logregcv_sparse():
    # Test LogRegCV with solver='liblinear' works for sparse matrices

    X, y = make_classification(n_samples=10, n_features=5)
    clf = LogisticRegressionCV(solver='liblinear')
    clf.fit(sparse.csr_matrix(X), y)
def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    msg = ("In LogisticRegressionCV the liblinear solver cannot handle "
           "multiclass with class_weight of type dict. Use the lbfgs, "
           "newton-cg or sag solvers or set class_weight='balanced'")
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raise_message(ValueError, msg, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=balanced
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='balanced')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='balanced')
    clf_lib.fit(X, y)
    clf_sag = LogisticRegressionCV(solver='sag', fit_intercept=False,
                                   class_weight='balanced', max_iter=2000)
    clf_sag.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_lib.coef_, clf_sag.coef_, decimal=4)