Python LogisticRegressionCV.fit 예제들, sklearn.linear_model.logistic.LogisticRegressionCV.fit Python 예제들

예제 #1

0

파일 보기

파일: test_logistic.py 프로젝트: xiaolinfu/scikit-learn

def test_ova_iris():
    """Test that our OvA implementation is correct using the iris dataset."""
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # Use pre-defined fold as folds generated for different y
    cv = StratifiedKFold(target, 3)
    clf = LogisticRegressionCV(cv=cv)
    clf.fit(train, target)

    clf1 = LogisticRegressionCV(cv=cv)
    target[target == 0] = 1
    clf1.fit(train, target)

    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    assert_equal(len(clf.classes_), 3)

    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10, ))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))

예제 #2

0

파일 보기

파일: test_logistic.py 프로젝트: JinguoGao/scikit-learn

def test_ova_iris():
    """Test that our OvA implementation is correct using the iris dataset."""
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # Use pre-defined fold as folds generated for different y
    cv = StratifiedKFold(target, 3)
    clf = LogisticRegressionCV(cv=cv)
    clf.fit(train, target)

    clf1 = LogisticRegressionCV(cv=cv)
    target[target == 0] = 1
    clf1.fit(train, target)

    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    assert_equal(len(clf.classes_), 3)

    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10, ))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))

예제 #3

0

파일 보기

파일: test_logistic.py 프로젝트: zinc-40/scikit-learn

def test_logistic_regression_multinomial():
    """Tests for the multinomial option in logistic regression"""

    # Some basic attributes of Logistic Regression
    n_samples, n_features, n_classes = 50, 20, 3
    X, y = make_classification(n_samples=50,
                               n_features=20,
                               n_informative=10,
                               n_classes=3,
                               random_state=0)
    clf_int = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    clf_int.fit(X, y)
    assert_array_equal(clf_int.coef_.shape, (n_classes, n_features))

    clf_wint = LogisticRegression(solver='lbfgs',
                                  multi_class='multinomial',
                                  fit_intercept=False)
    clf_wint.fit(X, y)
    assert_array_equal(clf_wint.coef_.shape, (n_classes, n_features))

    # Test that the path give almost the same results. However since in this
    # case we take the average of the coefs after fitting across all the
    # folds, it need not be exactly the same.
    clf_path = LogisticRegressionCV(solver='lbfgs',
                                    multi_class='multinomial',
                                    Cs=[1.])
    clf_path.fit(X, y)
    assert_array_almost_equal(clf_path.coef_, clf_int.coef_, decimal=3)
    assert_almost_equal(clf_path.intercept_, clf_int.intercept_, decimal=3)

예제 #4

0

파일 보기

파일: test_logistic.py 프로젝트: zhouli01/scikit-learn

def test_logistic_cv_score_does_not_warn_by_default():
    lr = LogisticRegressionCV(cv=2)
    lr.fit(X, Y1)

    with pytest.warns(None) as record:
        lr.score(X, lr.predict(X))
    assert len(record) == 0

예제 #5

0

파일 보기

파일: test_logistic.py 프로젝트: zhouli01/scikit-learn

def test_logistic_cv_mock_scorer():

    class MockScorer(object):
        def __init__(self):
            self.calls = 0
            self.scores = [0.1, 0.4, 0.8, 0.5]

        def __call__(self, model, X, y, sample_weight=None):
            score = self.scores[self.calls % len(self.scores)]
            self.calls += 1
            return score

    mock_scorer = MockScorer()
    Cs = [1, 2, 3, 4]
    cv = 2

    lr = LogisticRegressionCV(Cs=Cs, scoring=mock_scorer, cv=cv)
    lr.fit(X, Y1)

    # Cs[2] has the highest score (0.8) from MockScorer
    assert lr.C_[0] == Cs[2]

    # scorer called 8 times (cv*len(Cs))
    assert mock_scorer.calls == cv * len(Cs)

    # reset mock_scorer
    mock_scorer.calls = 0
    with pytest.warns(ChangedBehaviorWarning):
        custom_score = lr.score(X, lr.predict(X))

    assert custom_score == mock_scorer.scores[0]
    assert mock_scorer.calls == 1

예제 #6

0

파일 보기

파일: test_logistic.py 프로젝트: dmiruke/aurora_detection

def test_logistic_cv():
    # test for LogisticRegressionCV object
    n_samples, n_features = 50, 5
    rng = np.random.RandomState(0)
    X_ref = rng.randn(n_samples, n_features)
    y = np.sign(X_ref.dot(5 * rng.randn(n_features)))
    X_ref -= X_ref.mean()
    X_ref /= X_ref.std()
    lr_cv = LogisticRegressionCV(Cs=[1.],
                                 fit_intercept=False,
                                 solver='liblinear')
    lr_cv.fit(X_ref, y)
    lr = LogisticRegression(C=1., fit_intercept=False)
    lr.fit(X_ref, y)
    assert_array_almost_equal(lr.coef_, lr_cv.coef_)

    assert_array_equal(lr_cv.coef_.shape, (1, n_features))
    assert_array_equal(lr_cv.classes_, [-1, 1])
    assert_equal(len(lr_cv.classes_), 2)

    coefs_paths = np.asarray(list(lr_cv.coefs_paths_.values()))
    assert_array_equal(coefs_paths.shape, (1, 3, 1, n_features))
    assert_array_equal(lr_cv.Cs_.shape, (1, ))
    scores = np.asarray(list(lr_cv.scores_.values()))
    assert_array_equal(scores.shape, (1, 3, 1))

예제 #7

0

파일 보기

파일: test_logistic.py 프로젝트: xiaolinfu/scikit-learn

def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20,
                               n_features=20,
                               n_informative=10,
                               n_classes=3,
                               random_state=0)

    # Test the liblinear fails when class_weight of type dict is
    # provided, when it is multiclass
    clf_lib = LogisticRegressionCV(class_weight={
        0: 0.1,
        1: 0.2
    },
                                   solver='liblinear')
    assert_raises(ValueError, clf_lib.fit, X, y)

    # Test for class_weight=auto
    X, y = make_classification(n_samples=20,
                               n_features=20,
                               n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs',
                                   fit_intercept=False,
                                   class_weight='auto')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear',
                                   fit_intercept=False,
                                   class_weight='auto')
    clf_lib.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)

예제 #8

0

파일 보기

파일: test_logistic.py 프로젝트: huafengw/scikit-learn

def test_logreg_cv_penalty():
    # Test that the correct penalty is passed to the final fit.
    X, y = make_classification(n_samples=50, n_features=20, random_state=0)
    lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='liblinear')
    lr_cv.fit(X, y)
    lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear')
    lr.fit(X, y)
    assert_equal(np.count_nonzero(lr_cv.coef_), np.count_nonzero(lr.coef_))

예제 #9

0

파일 보기

파일: test_logistic.py 프로젝트: dmiruke/aurora_detection

def test_logreg_cv_penalty():
    # Test that the correct penalty is passed to the final fit.
    X, y = make_classification(n_samples=50, n_features=20, random_state=0)
    lr_cv = LogisticRegressionCV(penalty="l1", Cs=[1.0], solver='liblinear')
    lr_cv.fit(X, y)
    lr = LogisticRegression(penalty="l1", C=1.0, solver='liblinear')
    lr.fit(X, y)
    assert_equal(np.count_nonzero(lr_cv.coef_), np.count_nonzero(lr.coef_))

예제 #10

0

파일 보기

파일: test_logistic.py 프로젝트: digimatronics/sci-kit

def test_logistic_regression_multinomial():
    # Tests for the multinomial option in logistic regression

    # Some basic attributes of Logistic Regression
    n_samples, n_features, n_classes = 50, 20, 3
    X, y = make_classification(n_samples=n_samples,
                               n_features=n_features,
                               n_informative=10,
                               n_classes=n_classes,
                               random_state=0)

    # 'lbfgs' is used as a referenced
    solver = 'lbfgs'
    ref_i = LogisticRegression(solver=solver, multi_class='multinomial')
    ref_w = LogisticRegression(solver=solver,
                               multi_class='multinomial',
                               fit_intercept=False)
    ref_i.fit(X, y)
    ref_w.fit(X, y)
    assert_array_equal(ref_i.coef_.shape, (n_classes, n_features))
    assert_array_equal(ref_w.coef_.shape, (n_classes, n_features))
    for solver in ['sag', 'newton-cg']:
        clf_i = LogisticRegression(solver=solver,
                                   multi_class='multinomial',
                                   random_state=42,
                                   max_iter=1000,
                                   tol=1e-6)
        clf_w = LogisticRegression(solver=solver,
                                   multi_class='multinomial',
                                   random_state=42,
                                   max_iter=1000,
                                   tol=1e-6,
                                   fit_intercept=False)
        clf_i.fit(X, y)
        clf_w.fit(X, y)
        assert_array_equal(clf_i.coef_.shape, (n_classes, n_features))
        assert_array_equal(clf_w.coef_.shape, (n_classes, n_features))

        # Compare solutions between lbfgs and the other solvers
        assert_almost_equal(ref_i.coef_, clf_i.coef_, decimal=3)
        assert_almost_equal(ref_w.coef_, clf_w.coef_, decimal=3)
        assert_almost_equal(ref_i.intercept_, clf_i.intercept_, decimal=3)

    # Test that the path give almost the same results. However since in this
    # case we take the average of the coefs after fitting across all the
    # folds, it need not be exactly the same.
    for solver in ['lbfgs', 'newton-cg', 'sag']:
        clf_path = LogisticRegressionCV(solver=solver,
                                        max_iter=2000,
                                        tol=1e-6,
                                        multi_class='multinomial',
                                        Cs=[1.])
        clf_path.fit(X, y)
        assert_array_almost_equal(clf_path.coef_, ref_i.coef_, decimal=3)
        assert_almost_equal(clf_path.intercept_, ref_i.intercept_, decimal=3)

예제 #11

0

파일 보기

파일: test_logistic.py 프로젝트: txuninho/scikit-learn

def test_logistic_cv_sparse():
    X, y = make_classification(n_samples=50, n_features=5, random_state=0)
    X[X < 1.0] = 0.0
    csr = sp.csr_matrix(X)

    clf = LogisticRegressionCV(fit_intercept=True)
    clf.fit(X, y)
    clfs = LogisticRegressionCV(fit_intercept=True)
    clfs.fit(csr, y)
    assert_array_almost_equal(clfs.coef_, clf.coef_)
    assert_array_almost_equal(clfs.intercept_, clf.intercept_)
    assert_equal(clfs.C_, clf.C_)

예제 #12

0

파일 보기

파일: test_logistic.py 프로젝트: dmiruke/aurora_detection

def test_logistic_cv_sparse():
    X, y = make_classification(n_samples=50, n_features=5, random_state=0)
    X[X < 1.0] = 0.0
    csr = sp.csr_matrix(X)

    clf = LogisticRegressionCV(fit_intercept=True)
    clf.fit(X, y)
    clfs = LogisticRegressionCV(fit_intercept=True)
    clfs.fit(csr, y)
    assert_array_almost_equal(clfs.coef_, clf.coef_)
    assert_array_almost_equal(clfs.intercept_, clf.intercept_)
    assert_equal(clfs.C_, clf.C_)

예제 #13

0

파일 보기

파일: logistic_regression_cv.py 프로젝트: sreev/lale

class LogisticRegressionCVImpl():
    def __init__(self,
                 Cs=10,
                 fit_intercept=True,
                 cv=3,
                 dual=False,
                 penalty='l2',
                 scoring=None,
                 solver='lbfgs',
                 tol=0.0001,
                 max_iter=100,
                 class_weight='balanced',
                 n_jobs=None,
                 verbose=0,
                 refit=True,
                 intercept_scaling=1.0,
                 multi_class='ovr',
                 random_state=None):
        self._hyperparams = {
            'Cs': Cs,
            'fit_intercept': fit_intercept,
            'cv': cv,
            'dual': dual,
            'penalty': penalty,
            'scoring': scoring,
            'solver': solver,
            'tol': tol,
            'max_iter': max_iter,
            'class_weight': class_weight,
            'n_jobs': n_jobs,
            'verbose': verbose,
            'refit': refit,
            'intercept_scaling': intercept_scaling,
            'multi_class': multi_class,
            'random_state': random_state
        }
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)

예제 #14

0

파일 보기

파일: test_logistic.py 프로젝트: Pabrousseau/Scikit-learn

def test_logistic_regressioncv_class_weights():
    for weight in [{0: 0.1, 1: 0.2}, {0: 0.1, 1: 0.2, 2: 0.5}]:
        n_classes = len(weight)
        for class_weight in (weight, 'balanced'):
            X, y = make_classification(n_samples=30, n_features=3,
                                       n_repeated=0,
                                       n_informative=3, n_redundant=0,
                                       n_classes=n_classes, random_state=0)

            clf_lbf = LogisticRegressionCV(solver='lbfgs', Cs=1,
                                           fit_intercept=False,
                                           class_weight=class_weight)
            clf_ncg = LogisticRegressionCV(solver='newton-cg', Cs=1,
                                           fit_intercept=False,
                                           class_weight=class_weight)
            clf_lib = LogisticRegressionCV(solver='liblinear', Cs=1,
                                           fit_intercept=False,
                                           class_weight=class_weight)
            clf_sag = LogisticRegressionCV(solver='sag', Cs=1,
                                           fit_intercept=False,
                                           class_weight=class_weight,
                                           tol=1e-5, max_iter=10000,
                                           random_state=0)
            clf_lbf.fit(X, y)
            clf_ncg.fit(X, y)
            clf_lib.fit(X, y)
            clf_sag.fit(X, y)
            assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
            assert_array_almost_equal(clf_ncg.coef_, clf_lbf.coef_, decimal=4)
            assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)

예제 #15

0

파일 보기

파일: test_logistic.py 프로젝트: vikasgoel2000/lambda-packs

def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    msg = ("In LogisticRegressionCV the liblinear solver cannot handle "
           "multiclass with class_weight of type dict. Use the lbfgs, "
           "newton-cg or sag solvers or set class_weight='balanced'")
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raise_message(ValueError, msg, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=balanced
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='balanced')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='balanced')
    clf_lib.fit(X, y)
    clf_sag = LogisticRegressionCV(solver='sag', fit_intercept=False,
                                   class_weight='balanced', max_iter=2000)
    clf_sag.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_lib.coef_, clf_sag.coef_, decimal=4)

예제 #16

0

파일 보기

파일: test_logistic.py 프로젝트: 0664j35t3r/scikit-learn

def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    msg = ("In LogisticRegressionCV the liblinear solver cannot handle "
           "multiclass with class_weight of type dict. Use the lbfgs, "
           "newton-cg or sag solvers or set class_weight='balanced'")
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raise_message(ValueError, msg, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=balanced
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='balanced')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='balanced')
    clf_lib.fit(X, y)
    clf_sag = LogisticRegressionCV(solver='sag', fit_intercept=False,
                                   class_weight='balanced', max_iter=2000)
    clf_sag.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_lib.coef_, clf_sag.coef_, decimal=4)

예제 #17

0

파일 보기

파일: test_logistic.py 프로젝트: AtonLerin/maya_python_packages

def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    # Test the liblinear fails when class_weight of type dict is
    # provided, when it is multiclass. However it can handle
    # binary problems.
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raises(ValueError, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=balanced
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='balanced')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='balanced')
    clf_lib.fit(X, y)
    clf_sag = LogisticRegressionCV(solver='sag', fit_intercept=False,
                                   class_weight='balanced', max_iter=2000)
    clf_sag.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_lib.coef_, clf_sag.coef_, decimal=4)

예제 #18

0

파일 보기

파일: test_logistic.py 프로젝트: huafengw/scikit-learn

def test_logistic_regression_multinomial():
    # Tests for the multinomial option in logistic regression

    # Some basic attributes of Logistic Regression
    n_samples, n_features, n_classes = 50, 20, 3
    X, y = make_classification(n_samples=n_samples,
                               n_features=n_features,
                               n_informative=10,
                               n_classes=n_classes, random_state=0)

    # 'lbfgs' is used as a referenced
    solver = 'lbfgs'
    ref_i = LogisticRegression(solver=solver, multi_class='multinomial')
    ref_w = LogisticRegression(solver=solver, multi_class='multinomial',
                               fit_intercept=False)
    ref_i.fit(X, y)
    ref_w.fit(X, y)
    assert_array_equal(ref_i.coef_.shape, (n_classes, n_features))
    assert_array_equal(ref_w.coef_.shape, (n_classes, n_features))
    for solver in ['sag', 'saga', 'newton-cg']:
        clf_i = LogisticRegression(solver=solver, multi_class='multinomial',
                                   random_state=42, max_iter=2000, tol=1e-7,
                                   )
        clf_w = LogisticRegression(solver=solver, multi_class='multinomial',
                                   random_state=42, max_iter=2000, tol=1e-7,
                                   fit_intercept=False)
        clf_i.fit(X, y)
        clf_w.fit(X, y)
        assert_array_equal(clf_i.coef_.shape, (n_classes, n_features))
        assert_array_equal(clf_w.coef_.shape, (n_classes, n_features))

        # Compare solutions between lbfgs and the other solvers
        assert_almost_equal(ref_i.coef_, clf_i.coef_, decimal=3)
        assert_almost_equal(ref_w.coef_, clf_w.coef_, decimal=3)
        assert_almost_equal(ref_i.intercept_, clf_i.intercept_, decimal=3)

    # Test that the path give almost the same results. However since in this
    # case we take the average of the coefs after fitting across all the
    # folds, it need not be exactly the same.
    for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']:
        clf_path = LogisticRegressionCV(solver=solver, max_iter=2000, tol=1e-6,
                                        multi_class='multinomial', Cs=[1.])
        clf_path.fit(X, y)
        assert_array_almost_equal(clf_path.coef_, ref_i.coef_, decimal=3)
        assert_almost_equal(clf_path.intercept_, ref_i.intercept_, decimal=3)

예제 #19

0

파일 보기

파일: test_logistic.py 프로젝트: digimatronics/sci-kit

def test_multinomial_logistic_regression_string_inputs():
    # Test with string labels for LogisticRegression(CV)
    n_samples, n_features, n_classes = 50, 5, 3
    X_ref, y = make_classification(n_samples=n_samples,
                                   n_features=n_features,
                                   n_classes=n_classes,
                                   n_informative=3,
                                   random_state=0)
    y_str = LabelEncoder().fit(['bar', 'baz', 'foo']).inverse_transform(y)
    # For numerical labels, let y values be taken from set (-1, 0, 1)
    y = np.array(y) - 1
    # Test for string labels
    lr = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    lr_cv = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')
    lr_str = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    lr_cv_str = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')

    lr.fit(X_ref, y)
    lr_cv.fit(X_ref, y)
    lr_str.fit(X_ref, y_str)
    lr_cv_str.fit(X_ref, y_str)

    assert_array_almost_equal(lr.coef_, lr_str.coef_)
    assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo'])
    assert_array_almost_equal(lr_cv.coef_, lr_cv_str.coef_)
    assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo'])
    assert_equal(sorted(lr_cv_str.classes_), ['bar', 'baz', 'foo'])

    # The predictions should be in original labels
    assert_equal(sorted(np.unique(lr_str.predict(X_ref))),
                 ['bar', 'baz', 'foo'])
    assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))),
                 ['bar', 'baz', 'foo'])

    # Make sure class weights can be given with string labels
    lr_cv_str = LogisticRegression(solver='lbfgs',
                                   class_weight={
                                       'bar': 1,
                                       'baz': 2,
                                       'foo': 0
                                   },
                                   multi_class='multinomial').fit(
                                       X_ref, y_str)
    assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz'])

예제 #20

0

파일 보기

파일: test_logistic.py 프로젝트: AtonLerin/maya_python_packages

def test_logistic_regression_multinomial():
    # Tests for the multinomial option in logistic regression

    # Some basic attributes of Logistic Regression
    n_samples, n_features, n_classes = 50, 20, 3
    X, y = make_classification(n_samples=n_samples,
                               n_features=n_features,
                               n_informative=10,
                               n_classes=n_classes, random_state=0)
    clf_int = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    clf_int.fit(X, y)
    assert_array_equal(clf_int.coef_.shape, (n_classes, n_features))

    clf_wint = LogisticRegression(solver='lbfgs', multi_class='multinomial',
                                  fit_intercept=False)
    clf_wint.fit(X, y)
    assert_array_equal(clf_wint.coef_.shape, (n_classes, n_features))

    # Similar tests for newton-cg solver option
    clf_ncg_int = LogisticRegression(solver='newton-cg',
                                     multi_class='multinomial')
    clf_ncg_int.fit(X, y)
    assert_array_equal(clf_ncg_int.coef_.shape, (n_classes, n_features))

    clf_ncg_wint = LogisticRegression(solver='newton-cg', fit_intercept=False,
                                      multi_class='multinomial')
    clf_ncg_wint.fit(X, y)
    assert_array_equal(clf_ncg_wint.coef_.shape, (n_classes, n_features))

    # Compare solutions between lbfgs and newton-cg
    assert_almost_equal(clf_int.coef_, clf_ncg_int.coef_, decimal=3)
    assert_almost_equal(clf_wint.coef_, clf_ncg_wint.coef_, decimal=3)
    assert_almost_equal(clf_int.intercept_, clf_ncg_int.intercept_, decimal=3)

    # Test that the path give almost the same results. However since in this
    # case we take the average of the coefs after fitting across all the
    # folds, it need not be exactly the same.
    for solver in ['lbfgs', 'newton-cg']:
        clf_path = LogisticRegressionCV(solver=solver,
                                        multi_class='multinomial', Cs=[1.])
        clf_path.fit(X, y)
        assert_array_almost_equal(clf_path.coef_, clf_int.coef_, decimal=3)
        assert_almost_equal(clf_path.intercept_, clf_int.intercept_, decimal=3)

예제 #21

0

파일 보기

파일: test_logistic.py 프로젝트: JinguoGao/scikit-learn

def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    # Test the liblinear fails when class_weight of type dict is
    # provided, when it is multiclass
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raises(ValueError, clf_lib.fit, X, y)

    # Test for class_weight=auto
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='auto')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='auto')
    clf_lib.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)

예제 #22

0

파일 보기

파일: test_logistic.py 프로젝트: xubenben/scikit-learn

def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20,
                               n_features=20,
                               n_informative=10,
                               n_classes=3,
                               random_state=0)

    # Test the liblinear fails when class_weight of type dict is
    # provided, when it is multiclass. However it can handle
    # binary problems.
    clf_lib = LogisticRegressionCV(class_weight={
        0: 0.1,
        1: 0.2
    },
                                   solver='liblinear')
    assert_raises(ValueError, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=balanced
    X, y = make_classification(n_samples=20,
                               n_features=20,
                               n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs',
                                   fit_intercept=False,
                                   class_weight='balanced')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear',
                                   fit_intercept=False,
                                   class_weight='balanced')
    clf_lib.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)

예제 #23

0

파일 보기

파일: test_logistic.py 프로젝트: digimatronics/sci-kit

def test_ovr_multinomial_iris():
    # Test that OvR and multinomial are correct using the iris dataset.
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # The cv indices from stratified kfold (where stratification is done based
    # on the fine-grained iris classes, i.e, before the classes 0 and 1 are
    # conflated) is used for both clf and clf1
    n_cv = 2
    cv = StratifiedKFold(n_cv)
    precomputed_folds = list(cv.split(train, target))

    # Train clf on the original dataset where classes 0 and 1 are separated
    clf = LogisticRegressionCV(cv=precomputed_folds)
    clf.fit(train, target)

    # Conflate classes 0 and 1 and train clf1 on this modified dataset
    clf1 = LogisticRegressionCV(cv=precomputed_folds)
    target_copy = target.copy()
    target_copy[target_copy == 0] = 1
    clf1.fit(train, target_copy)

    # Ensure that what OvR learns for class2 is same regardless of whether
    # classes 0 and 1 are separated or not
    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, n_cv, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10, ))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, n_cv, 10))

    # Test that for the iris data multinomial gives a better accuracy than OvR
    for solver in ['lbfgs', 'newton-cg', 'sag']:
        max_iter = 100 if solver == 'sag' else 15
        clf_multi = LogisticRegressionCV(solver=solver,
                                         multi_class='multinomial',
                                         max_iter=max_iter,
                                         random_state=42,
                                         tol=1e-2,
                                         cv=2)
        clf_multi.fit(train, target)
        multi_score = clf_multi.score(train, target)
        ovr_score = clf.score(train, target)
        assert_greater(multi_score, ovr_score)

        # Test attributes of LogisticRegressionCV
        assert_equal(clf.coef_.shape, clf_multi.coef_.shape)
        assert_array_equal(clf_multi.classes_, [0, 1, 2])
        coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values()))
        assert_array_almost_equal(coefs_paths.shape,
                                  (3, n_cv, 10, n_features + 1))
        assert_equal(clf_multi.Cs_.shape, (10, ))
        scores = np.asarray(list(clf_multi.scores_.values()))
        assert_equal(scores.shape, (3, n_cv, 10))

예제 #24

0

파일 보기

def estimate_weights_logistic_regresssion(X_s, X_t):
    """ estimate a logistic regressor to predict the probability of a sample
    to be generated by one class or the other.
    If one class is over or under represented weights will be adapted.

    Parameters:
        X_s: samples from the source domain
        X_t: samples from the target domain

    Returns:
        weigths for X_s """
    X_all, all_labels = prepare_data_for_weights_estimation(X_s, X_t)

    kf = KFold(X_all.shape[0], 10, shuffle=True)
    best_lr = LogisticRegressionCV(class_weight="auto",
                                   Cs=np.logspace(4, 8, 10),
                                   fit_intercept=False)
    best_lr.fit(X_all, all_labels)

    weights = X_s.shape[0] / X_t.shape[0] * np.exp(
        np.dot(X_s, best_lr.coef_.T) + best_lr.intercept_)
    return weights

예제 #25

0

파일 보기

파일: domain_adaptation.py 프로젝트: 151706061/MITK

def estimate_weights_logistic_regresssion(X_s, X_t):
    """ estimate a logistic regressor to predict the probability of a sample
    to be generated by one class or the other.
    If one class is over or under represented weights will be adapted.

    Parameters:
        X_s: samples from the source domain
        X_t: samples from the target domain

    Returns:
        weigths for X_s """
    X_all, all_labels = prepare_data_for_weights_estimation(X_s, X_t)

    kf = KFold(X_all.shape[0], 10, shuffle=True)
    best_lr = LogisticRegressionCV(class_weight="auto",
                                   Cs=np.logspace(4, 8, 10),
                                   fit_intercept=False)
    best_lr.fit(X_all, all_labels)

    weights = X_s.shape[0] / X_t.shape[0] * np.exp(np.dot(X_s, best_lr.coef_.T)
                                                   + best_lr.intercept_)
    return weights

예제 #26

0

파일 보기

파일: test_logistic.py 프로젝트: huafengw/scikit-learn

def test_ovr_multinomial_iris():
    # Test that OvR and multinomial are correct using the iris dataset.
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # The cv indices from stratified kfold (where stratification is done based
    # on the fine-grained iris classes, i.e, before the classes 0 and 1 are
    # conflated) is used for both clf and clf1
    n_cv = 2
    cv = StratifiedKFold(n_cv)
    precomputed_folds = list(cv.split(train, target))

    # Train clf on the original dataset where classes 0 and 1 are separated
    clf = LogisticRegressionCV(cv=precomputed_folds)
    clf.fit(train, target)

    # Conflate classes 0 and 1 and train clf1 on this modified dataset
    clf1 = LogisticRegressionCV(cv=precomputed_folds)
    target_copy = target.copy()
    target_copy[target_copy == 0] = 1
    clf1.fit(train, target_copy)

    # Ensure that what OvR learns for class2 is same regardless of whether
    # classes 0 and 1 are separated or not
    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, n_cv, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10,))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, n_cv, 10))

    # Test that for the iris data multinomial gives a better accuracy than OvR
    for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']:
        max_iter = 2000 if solver in ['sag', 'saga'] else 15
        clf_multi = LogisticRegressionCV(
            solver=solver, multi_class='multinomial', max_iter=max_iter,
            random_state=42, tol=1e-5 if solver in ['sag', 'saga'] else 1e-2,
            cv=2)
        clf_multi.fit(train, target)
        multi_score = clf_multi.score(train, target)
        ovr_score = clf.score(train, target)
        assert_greater(multi_score, ovr_score)

        # Test attributes of LogisticRegressionCV
        assert_equal(clf.coef_.shape, clf_multi.coef_.shape)
        assert_array_equal(clf_multi.classes_, [0, 1, 2])
        coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values()))
        assert_array_almost_equal(coefs_paths.shape, (3, n_cv, 10,
                                                      n_features + 1))
        assert_equal(clf_multi.Cs_.shape, (10,))
        scores = np.asarray(list(clf_multi.scores_.values()))
        assert_equal(scores.shape, (3, n_cv, 10))

예제 #27

0

파일 보기

파일: test_logistic.py 프로젝트: txuninho/scikit-learn

def test_logistic_cv():
    # test for LogisticRegressionCV object
    n_samples, n_features = 50, 5
    rng = np.random.RandomState(0)
    X_ref = rng.randn(n_samples, n_features)
    y = np.sign(X_ref.dot(5 * rng.randn(n_features)))
    X_ref -= X_ref.mean()
    X_ref /= X_ref.std()
    lr_cv = LogisticRegressionCV(Cs=[1.0], fit_intercept=False, solver="liblinear")
    lr_cv.fit(X_ref, y)
    lr = LogisticRegression(C=1.0, fit_intercept=False)
    lr.fit(X_ref, y)
    assert_array_almost_equal(lr.coef_, lr_cv.coef_)

    assert_array_equal(lr_cv.coef_.shape, (1, n_features))
    assert_array_equal(lr_cv.classes_, [-1, 1])
    assert_equal(len(lr_cv.classes_), 2)

    coefs_paths = np.asarray(list(lr_cv.coefs_paths_.values()))
    assert_array_equal(coefs_paths.shape, (1, 3, 1, n_features))
    assert_array_equal(lr_cv.Cs_.shape, (1,))
    scores = np.asarray(list(lr_cv.scores_.values()))
    assert_array_equal(scores.shape, (1, 3, 1))

예제 #28

0

파일 보기

파일: test_logistic.py 프로젝트: huafengw/scikit-learn

def test_multinomial_logistic_regression_string_inputs():
    # Test with string labels for LogisticRegression(CV)
    n_samples, n_features, n_classes = 50, 5, 3
    X_ref, y = make_classification(n_samples=n_samples, n_features=n_features,
                                   n_classes=n_classes, n_informative=3,
                                   random_state=0)
    y_str = LabelEncoder().fit(['bar', 'baz', 'foo']).inverse_transform(y)
    # For numerical labels, let y values be taken from set (-1, 0, 1)
    y = np.array(y) - 1
    # Test for string labels
    lr = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    lr_cv = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')
    lr_str = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    lr_cv_str = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')

    lr.fit(X_ref, y)
    lr_cv.fit(X_ref, y)
    lr_str.fit(X_ref, y_str)
    lr_cv_str.fit(X_ref, y_str)

    assert_array_almost_equal(lr.coef_, lr_str.coef_)
    assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo'])
    assert_array_almost_equal(lr_cv.coef_, lr_cv_str.coef_)
    assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo'])
    assert_equal(sorted(lr_cv_str.classes_), ['bar', 'baz', 'foo'])

    # The predictions should be in original labels
    assert_equal(sorted(np.unique(lr_str.predict(X_ref))),
                 ['bar', 'baz', 'foo'])
    assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))),
                 ['bar', 'baz', 'foo'])

    # Make sure class weights can be given with string labels
    lr_cv_str = LogisticRegression(
        solver='lbfgs', class_weight={'bar': 1, 'baz': 2, 'foo': 0},
        multi_class='multinomial').fit(X_ref, y_str)
    assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz'])

예제 #29

0

파일 보기

파일: test_logistic.py 프로젝트: AtonLerin/maya_python_packages

def test_ovr_multinomial_iris():
    # Test that OvR and multinomial are correct using the iris dataset.
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # Use pre-defined fold as folds generated for different y
    cv = StratifiedKFold(target, 3)
    clf = LogisticRegressionCV(cv=cv)
    clf.fit(train, target)

    clf1 = LogisticRegressionCV(cv=cv)
    target_copy = target.copy()
    target_copy[target_copy == 0] = 1
    clf1.fit(train, target_copy)

    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10, ))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))

    # Test that for the iris data multinomial gives a better accuracy than OvR
    for solver in ['lbfgs', 'newton-cg']:
        clf_multi = LogisticRegressionCV(
            solver=solver, multi_class='multinomial', max_iter=15
            )
        clf_multi.fit(train, target)
        multi_score = clf_multi.score(train, target)
        ovr_score = clf.score(train, target)
        assert_greater(multi_score, ovr_score)

        # Test attributes of LogisticRegressionCV
        assert_equal(clf.coef_.shape, clf_multi.coef_.shape)
        assert_array_equal(clf_multi.classes_, [0, 1, 2])
        coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values()))
        assert_array_almost_equal(coefs_paths.shape, (3, 3, 10,
                                                      n_features + 1))
        assert_equal(clf_multi.Cs_.shape, (10, ))
        scores = np.asarray(list(clf_multi.scores_.values()))
        assert_equal(scores.shape, (3, 3, 10))

예제 #30

0

파일 보기

파일: test_logistic.py 프로젝트: zkuncheva/scikit-learn

def test_ovr_multinomial_iris():
    # Test that OvR and multinomial are correct using the iris dataset.
    train, target = iris.data, iris.target
    n_samples, n_features = train.shape

    # Use pre-defined fold as folds generated for different y
    cv = StratifiedKFold(target, 3)
    clf = LogisticRegressionCV(cv=cv)
    clf.fit(train, target)

    clf1 = LogisticRegressionCV(cv=cv)
    target_copy = target.copy()
    target_copy[target_copy == 0] = 1
    clf1.fit(train, target_copy)

    assert_array_almost_equal(clf.scores_[2], clf1.scores_[2])
    assert_array_almost_equal(clf.intercept_[2:], clf1.intercept_)
    assert_array_almost_equal(clf.coef_[2][np.newaxis, :], clf1.coef_)

    # Test the shape of various attributes.
    assert_equal(clf.coef_.shape, (3, n_features))
    assert_array_equal(clf.classes_, [0, 1, 2])
    coefs_paths = np.asarray(list(clf.coefs_paths_.values()))
    assert_array_almost_equal(coefs_paths.shape, (3, 3, 10, n_features + 1))
    assert_equal(clf.Cs_.shape, (10, ))
    scores = np.asarray(list(clf.scores_.values()))
    assert_equal(scores.shape, (3, 3, 10))

    # Test that for the iris data multinomial gives a better accuracy than OvR
    for solver in ['lbfgs', 'newton-cg']:
        clf_multi = LogisticRegressionCV(solver=solver,
                                         multi_class='multinomial',
                                         max_iter=15)
        clf_multi.fit(train, target)
        multi_score = clf_multi.score(train, target)
        ovr_score = clf.score(train, target)
        assert_greater(multi_score, ovr_score)

        # Test attributes of LogisticRegressionCV
        assert_equal(clf.coef_.shape, clf_multi.coef_.shape)
        assert_array_equal(clf_multi.classes_, [0, 1, 2])
        coefs_paths = np.asarray(list(clf_multi.coefs_paths_.values()))
        assert_array_almost_equal(coefs_paths.shape,
                                  (3, 3, 10, n_features + 1))
        assert_equal(clf_multi.Cs_.shape, (10, ))
        scores = np.asarray(list(clf_multi.scores_.values()))
        assert_equal(scores.shape, (3, 3, 10))

예제 #31

0

파일 보기

파일: test_logistic.py 프로젝트: AngelaGuoguo/scikit-learn

def test_liblinear_logregcv_sparse():
    """Test LogRegCV with solver='liblinear' works for sparse matrices"""

    X, y = make_classification(n_samples=10, n_features=5)
    clf = LogisticRegressionCV(solver='liblinear')
    clf.fit(sparse.csr_matrix(X), y)

예제 #32

0

파일 보기

파일: test_logistic.py 프로젝트: yufengg/scikit-learn

def test_saga_sparse():
    # Test LogRegCV with solver='liblinear' works for sparse matrices

    X, y = make_classification(n_samples=10, n_features=5, random_state=0)
    clf = LogisticRegressionCV(solver='saga')
    clf.fit(sparse.csr_matrix(X), y)

예제 #33

0

파일 보기

파일: test_logistic.py 프로젝트: dmiruke/aurora_detection

def test_liblinear_logregcv_sparse():
    # Test LogRegCV with solver='liblinear' works for sparse matrices

    X, y = make_classification(n_samples=10, n_features=5)
    clf = LogisticRegressionCV(solver='liblinear')
    clf.fit(sparse.csr_matrix(X), y)

예제 #34

0

파일 보기

print("The F score-Micro (MultinomialNB) is:", f_score)

# Random Forest
rf_clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
rf_clf.fit(X_train, y_train)
rf_predictions = rf_clf.predict(X_test)
score = accuracy_score(y_test, rf_predictions)
f_score = f1_score(y_test, rf_predictions, average='micro')
print("The accuracy score (Random Forest) is:", score)
print("The F score-Micro (Random Forest) is:", f_score)

from sklearn import metrics

# Logistic Regression
lr_classifier = LogisticRegressionCV()
lr_classifier.fit(X_train, y_train)
lt_predictions = lr_classifier.predict(X_test)
score = accuracy_score(y_test, lt_predictions)
f_score = f1_score(y_test, lt_predictions, average='micro')
print("The accuracy score (Logistic Regression) is:", score)
print("The F score-Micro (Logistic Regression) is:", f_score)

cnf_matrix = metrics.confusion_matrix(y_test, rf_predictions)

# import required modules
import seaborn as sns

class_names = [0, 1]  # name  of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)