Esempi in Python per make_classification, esempi in Python per sklearn.datasets.make_classification

Esempio n. 1

0

Mostra file

File: test_logistic.py Progetto: 0664j35t3r/scikit-learn

def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    msg = ("In LogisticRegressionCV the liblinear solver cannot handle "
           "multiclass with class_weight of type dict. Use the lbfgs, "
           "newton-cg or sag solvers or set class_weight='balanced'")
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raise_message(ValueError, msg, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=balanced
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='balanced')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='balanced')
    clf_lib.fit(X, y)
    clf_sag = LogisticRegressionCV(solver='sag', fit_intercept=False,
                                   class_weight='balanced', max_iter=2000)
    clf_sag.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_lib.coef_, clf_sag.coef_, decimal=4)

Esempio n. 2

0

Mostra file

File: test_logistic.py Progetto: AngelaGuoguo/scikit-learn

def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    # Test the liblinear fails when class_weight of type dict is
    # provided, when it is multiclass. However it can handle
    # binary problems.
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raises(ValueError, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=auto
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='auto')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='auto')
    clf_lib.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)

Esempio n. 3

0

Mostra file

File: test_samples_generator.py Progetto: dominicSchiller/DataScience_EA12_Clustering_Exercise

def test_make_classification():
    weights = [0.1, 0.25]
    X, y = make_classification(n_samples=100, n_features=20, n_informative=5,
                               n_redundant=1, n_repeated=1, n_classes=3,
                               n_clusters_per_class=1, hypercube=False,
                               shift=None, scale=None, weights=weights,
                               random_state=0)

    assert_equal(weights, [0.1, 0.25])
    assert_equal(X.shape, (100, 20), "X shape mismatch")
    assert_equal(y.shape, (100,), "y shape mismatch")
    assert_equal(np.unique(y).shape, (3,), "Unexpected number of classes")
    assert_equal(sum(y == 0), 10, "Unexpected number of samples in class #0")
    assert_equal(sum(y == 1), 25, "Unexpected number of samples in class #1")
    assert_equal(sum(y == 2), 65, "Unexpected number of samples in class #2")

    # Test for n_features > 30
    X, y = make_classification(n_samples=2000, n_features=31, n_informative=31,
                               n_redundant=0, n_repeated=0, hypercube=True,
                               scale=0.5, random_state=0)

    assert_equal(X.shape, (2000, 31), "X shape mismatch")
    assert_equal(y.shape, (2000,), "y shape mismatch")
    assert_equal(np.unique(X.view([('', X.dtype)]*X.shape[1])).view(X.dtype)
                 .reshape(-1, X.shape[1]).shape[0], 2000,
                 "Unexpected number of unique rows")

Esempio n. 4

0

Mostra file

File: test_logistic.py Progetto: civisanalytics/python-glmnet

    def setUp(self):
        np.random.seed(488881)
        # binomial
        x, y = make_classification(n_samples=300, random_state=6601)
        x_sparse = csr_matrix(x)

        x_wide, y_wide = make_classification(n_samples=100, n_features=150,
                                             random_state=8911)
        x_wide_sparse = csr_matrix(x_wide)
        self.binomial = [(x, y), (x_sparse, y), (x_wide, y_wide),
                         (x_wide_sparse, y_wide)]

        # multinomial
        x, y = make_classification(n_samples=400, n_classes=3, n_informative=15,
                                   n_features=25, random_state=10585)
        x_sparse = csr_matrix(x)

        x_wide, y_wide = make_classification(n_samples=400, n_classes=3,
                                             n_informative=15, n_features=500,
                                             random_state=15841)
        x_wide_sparse = csr_matrix(x_wide)
        self.multinomial = [(x, y), (x_sparse, y), (x_wide, y_wide),
                            (x_wide_sparse, y_wide)]

        self.alphas = [0., 0.25, 0.50, 0.75, 1.]
        self.n_splits = [-1, 0, 5]
        self.scoring = [
            "accuracy",
            "roc_auc",
            "average_precision",
            "log_loss",
            "precision_macro",
            "precision_micro",
            "precision_weighted",
            "f1_micro",
            "f1_macro",
            "f1_weighted",
        ]
        self.multinomial_scoring = [
            "accuracy",
            "log_loss",
            "precision_macro",
            "precision_micro",
            "precision_weighted",
            "f1_micro",
            "f1_macro",
            "f1_weighted"
        ]

Esempio n. 5

0

Mostra file

File: test_grid_search.py Progetto: CheMcCandless/scikit-learn

def test_grid_search_precomputed_kernel_error_kernel_function():
    """Test that grid search returns an error when using a kernel_function"""
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
    kernel_function = lambda x1, x2: np.dot(x1, x2.T)
    clf = SVC(kernel=kernel_function)
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
    assert_raises(ValueError, cv.fit, X_, y_)

Esempio n. 6

0

Mostra file

File: test_tree.py Progetto: Carol-Hu/scikit-learn

def test_importances():
    """Check variable importances."""
    X, y = datasets.make_classification(n_samples=2000,
                                        n_features=10,
                                        n_informative=3,
                                        n_redundant=0,
                                        n_repeated=0,
                                        shuffle=False,
                                        random_state=0)

    for name, Tree in CLF_TREES.items():
        clf = Tree(random_state=0)

        clf.fit(X, y)
        importances = clf.feature_importances_
        n_important = np.sum(importances > 0.1)

        assert_equal(importances.shape[0], 10, "Failed with {0}".format(name))
        assert_equal(n_important, 3, "Failed with {0}".format(name))

        X_new = clf.transform(X, threshold="mean")
        assert_less(0, X_new.shape[1], "Failed with {0}".format(name))
        assert_less(X_new.shape[1], X.shape[1], "Failed with {0}".format(name))

    # Check on iris that importances are the same for all builders
    clf = DecisionTreeClassifier(random_state=0)
    clf.fit(iris.data, iris.target)
    clf2 = DecisionTreeClassifier(random_state=0,
                                  max_leaf_nodes=len(iris.data))
    clf2.fit(iris.data, iris.target)

    assert_array_equal(clf.feature_importances_,
                       clf2.feature_importances_)

Esempio n. 7

0

Mostra file

File: test_grid_search.py Progetto: CheMcCandless/scikit-learn

def test_deprecated_score_func():
    # test that old deprecated way of passing a score / loss function is still
    # supported
    X, y = make_classification(n_samples=200, n_features=100, random_state=0)
    clf = LinearSVC(random_state=0)
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
    cv.fit(X[:180], y[:180])
    y_pred = cv.predict(X[180:])
    C = cv.best_estimator_.C

    clf = LinearSVC(random_state=0)
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score)
    with warnings.catch_warnings(record=True):
        # catch deprecation warning
        cv.fit(X[:180], y[:180])
    y_pred_func = cv.predict(X[180:])
    C_func = cv.best_estimator_.C

    assert_array_equal(y_pred, y_pred_func)
    assert_equal(C, C_func)

    # test loss where greater is worse
    def f1_loss(y_true_, y_pred_):
        return -f1_score(y_true_, y_pred_)

    clf = LinearSVC(random_state=0)
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, loss_func=f1_loss)
    with warnings.catch_warnings(record=True):
        # catch deprecation warning
        cv.fit(X[:180], y[:180])
    y_pred_loss = cv.predict(X[180:])
    C_loss = cv.best_estimator_.C

    assert_array_equal(y_pred, y_pred_loss)
    assert_equal(C, C_loss)

Esempio n. 8

0

Mostra file

File: test_grid_search.py Progetto: CheMcCandless/scikit-learn

def test_grid_search_precomputed_kernel():
    """Test that grid search works when the input features are given in the
    form of a precomputed kernel matrix """
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    # compute the training kernel matrix corresponding to the linear kernel
    K_train = np.dot(X_[:180], X_[:180].T)
    y_train = y_[:180]

    clf = SVC(kernel='precomputed')
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(K_train, y_train)

    assert_true(cv.best_score_ >= 0)

    # compute the test kernel matrix
    K_test = np.dot(X_[180:], X_[:180].T)
    y_test = y_[180:]

    y_pred = cv.predict(K_test)

    assert_true(np.mean(y_pred == y_test) >= 0)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cv.fit, K_train.tolist(), y_train)

Esempio n. 9

0

Mostra file

File: test_partial_dependence.py Progetto: manhhomienbienthuy/scikit-learn

def test_partial_dependence_unknown_feature(estimator, features):
    X, y = make_classification(random_state=0)
    estimator.fit(X, y)

    err_msg = 'all features must be in'
    with pytest.raises(ValueError, match=err_msg):
        partial_dependence(estimator, X, [features])

Esempio n. 10

0

Mostra file

File: test_grid_search.py Progetto: CheMcCandless/scikit-learn

def test_grid_search_sparse_scoring():
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    clf = LinearSVC()
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
    cv.fit(X_[:180], y_[:180])
    y_pred = cv.predict(X_[180:])
    C = cv.best_estimator_.C

    X_ = sp.csr_matrix(X_)
    clf = LinearSVC()
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
    cv.fit(X_[:180], y_[:180])
    y_pred2 = cv.predict(X_[180:])
    C2 = cv.best_estimator_.C

    assert_array_equal(y_pred, y_pred2)
    assert_equal(C, C2)
    # Smoke test the score
    #np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]),
    #                        cv.score(X_[:180], y[:180]))

    # test loss where greater is worse
    def f1_loss(y_true_, y_pred_):
        return -f1_score(y_true_, y_pred_)
    F1Loss = make_scorer(f1_loss, greater_is_better=False)
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring=F1Loss)
    cv.fit(X_[:180], y_[:180])
    y_pred3 = cv.predict(X_[180:])
    C3 = cv.best_estimator_.C

    assert_equal(C, C3)
    assert_array_equal(y_pred, y_pred3)

Esempio n. 11

0

Mostra file

File: test_logistic.py Progetto: huafengw/scikit-learn

def test_logreg_l1_sparse_data():
    # Because liblinear penalizes the intercept and saga does not, we do not
    # fit the intercept to make it possible to compare the coefficients of
    # the two models at convergence.
    rng = np.random.RandomState(42)
    n_samples = 50
    X, y = make_classification(n_samples=n_samples, n_features=20,
                               random_state=0)
    X_noise = rng.normal(scale=0.1, size=(n_samples, 3))
    X_constant = np.zeros(shape=(n_samples, 2))
    X = np.concatenate((X, X_noise, X_constant), axis=1)
    X[X < 1] = 0
    X = sparse.csr_matrix(X)

    lr_liblinear = LogisticRegression(penalty="l1", C=1.0, solver='liblinear',
                                      fit_intercept=False,
                                      tol=1e-10)
    lr_liblinear.fit(X, y)

    lr_saga = LogisticRegression(penalty="l1", C=1.0, solver='saga',
                                 fit_intercept=False,
                                 max_iter=1000, tol=1e-10)
    lr_saga.fit(X, y)
    assert_array_almost_equal(lr_saga.coef_, lr_liblinear.coef_)
    # Noise and constant features should be regularized to zero by the l1
    # penalty
    assert_array_almost_equal(lr_liblinear.coef_[0, -5:], np.zeros(5))
    assert_array_almost_equal(lr_saga.coef_[0, -5:], np.zeros(5))

    # Check that solving on the sparse and dense data yield the same results
    lr_saga_dense = LogisticRegression(penalty="l1", C=1.0, solver='saga',
                                       fit_intercept=False,
                                       max_iter=1000, tol=1e-10)
    lr_saga_dense.fit(X.toarray(), y)
    assert_array_almost_equal(lr_saga.coef_, lr_saga_dense.coef_)

Esempio n. 12

0

Mostra file

File: test_svm.py Progetto: abhisg/scikit-learn

def test_linearsvc_parameters():
    # Test possible parameter combinations in LinearSVC
    # Generate list of possible parameter combinations
    losses = ['hinge', 'squared_hinge', 'logistic_regression', 'foo']
    penalties, duals = ['l1', 'l2', 'bar'], [True, False]

    X, y = make_classification(n_samples=5, n_features=5)

    for loss, penalty, dual in itertools.product(losses, penalties, duals):
        clf = svm.LinearSVC(penalty=penalty, loss=loss, dual=dual)
        if ((loss, penalty) == ('hinge', 'l1') or
                (loss, penalty, dual) == ('hinge', 'l2', False) or
                (penalty, dual) == ('l1', True) or
                loss == 'foo' or penalty == 'bar'):

            assert_raises_regexp(ValueError,
                                 "Unsupported set of arguments.*penalty='%s.*"
                                 "loss='%s.*dual=%s"
                                 % (penalty, loss, dual),
                                 clf.fit, X, y)
        else:
            clf.fit(X, y)

    # Incorrect loss value - test if explicit error message is raised
    assert_raises_regexp(ValueError, ".*loss='l3' is not supported.*",
                         svm.LinearSVC(loss="l3").fit, X, y)

Esempio n. 13

0

Mostra file

File: test_logistic.py Progetto: huafengw/scikit-learn

def test_logistic_regression_solvers():
    X, y = make_classification(n_features=10, n_informative=5, random_state=0)

    ncg = LogisticRegression(solver='newton-cg', fit_intercept=False)
    lbf = LogisticRegression(solver='lbfgs', fit_intercept=False)
    lib = LogisticRegression(fit_intercept=False)
    sag = LogisticRegression(solver='sag', fit_intercept=False,
                             random_state=42)
    saga = LogisticRegression(solver='saga', fit_intercept=False,
                              random_state=42)
    ncg.fit(X, y)
    lbf.fit(X, y)
    sag.fit(X, y)
    saga.fit(X, y)
    lib.fit(X, y)
    assert_array_almost_equal(ncg.coef_, lib.coef_, decimal=3)
    assert_array_almost_equal(lib.coef_, lbf.coef_, decimal=3)
    assert_array_almost_equal(ncg.coef_, lbf.coef_, decimal=3)
    assert_array_almost_equal(sag.coef_, lib.coef_, decimal=3)
    assert_array_almost_equal(sag.coef_, ncg.coef_, decimal=3)
    assert_array_almost_equal(sag.coef_, lbf.coef_, decimal=3)
    assert_array_almost_equal(saga.coef_, sag.coef_, decimal=3)
    assert_array_almost_equal(saga.coef_, lbf.coef_, decimal=3)
    assert_array_almost_equal(saga.coef_, ncg.coef_, decimal=3)
    assert_array_almost_equal(saga.coef_, lib.coef_, decimal=3)

Esempio n. 14

0

Mostra file

File: test_logistic.py Progetto: huafengw/scikit-learn

def test_logistic_regression_solvers_multiclass():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)
    tol = 1e-7
    ncg = LogisticRegression(solver='newton-cg', fit_intercept=False, tol=tol)
    lbf = LogisticRegression(solver='lbfgs', fit_intercept=False, tol=tol)
    lib = LogisticRegression(fit_intercept=False, tol=tol)
    sag = LogisticRegression(solver='sag', fit_intercept=False, tol=tol,
                             max_iter=1000, random_state=42)
    saga = LogisticRegression(solver='saga', fit_intercept=False, tol=tol,
                              max_iter=10000, random_state=42)
    ncg.fit(X, y)
    lbf.fit(X, y)
    sag.fit(X, y)
    saga.fit(X, y)
    lib.fit(X, y)
    assert_array_almost_equal(ncg.coef_, lib.coef_, decimal=4)
    assert_array_almost_equal(lib.coef_, lbf.coef_, decimal=4)
    assert_array_almost_equal(ncg.coef_, lbf.coef_, decimal=4)
    assert_array_almost_equal(sag.coef_, lib.coef_, decimal=4)
    assert_array_almost_equal(sag.coef_, ncg.coef_, decimal=4)
    assert_array_almost_equal(sag.coef_, lbf.coef_, decimal=4)
    assert_array_almost_equal(saga.coef_, sag.coef_, decimal=4)
    assert_array_almost_equal(saga.coef_, lbf.coef_, decimal=4)
    assert_array_almost_equal(saga.coef_, ncg.coef_, decimal=4)
    assert_array_almost_equal(saga.coef_, lib.coef_, decimal=4)

Esempio n. 15

0

Mostra file

File: test_logistic.py Progetto: huafengw/scikit-learn

def test_logistic_loss_and_grad():
    X_ref, y = make_classification(n_samples=20, random_state=0)
    n_features = X_ref.shape[1]

    X_sp = X_ref.copy()
    X_sp[X_sp < .1] = 0
    X_sp = sp.csr_matrix(X_sp)
    for X in (X_ref, X_sp):
        w = np.zeros(n_features)

        # First check that our derivation of the grad is correct
        loss, grad = _logistic_loss_and_grad(w, X, y, alpha=1.)
        approx_grad = optimize.approx_fprime(
            w, lambda w: _logistic_loss_and_grad(w, X, y, alpha=1.)[0], 1e-3
        )
        assert_array_almost_equal(grad, approx_grad, decimal=2)

        # Second check that our intercept implementation is good
        w = np.zeros(n_features + 1)
        loss_interp, grad_interp = _logistic_loss_and_grad(
            w, X, y, alpha=1.
        )
        assert_array_almost_equal(loss, loss_interp)

        approx_grad = optimize.approx_fprime(
            w, lambda w: _logistic_loss_and_grad(w, X, y, alpha=1.)[0], 1e-3
        )
        assert_array_almost_equal(grad_interp, approx_grad, decimal=2)

Esempio n. 16

0

Mostra file

File: test_logistic.py Progetto: huafengw/scikit-learn

def test_intercept_logistic_helper():
    n_samples, n_features = 10, 5
    X, y = make_classification(n_samples=n_samples, n_features=n_features,
                               random_state=0)

    # Fit intercept case.
    alpha = 1.
    w = np.ones(n_features + 1)
    grad_interp, hess_interp = _logistic_grad_hess(w, X, y, alpha)
    loss_interp = _logistic_loss(w, X, y, alpha)

    # Do not fit intercept. This can be considered equivalent to adding
    # a feature vector of ones, i.e column of one vectors.
    X_ = np.hstack((X, np.ones(10)[:, np.newaxis]))
    grad, hess = _logistic_grad_hess(w, X_, y, alpha)
    loss = _logistic_loss(w, X_, y, alpha)

    # In the fit_intercept=False case, the feature vector of ones is
    # penalized. This should be taken care of.
    assert_almost_equal(loss_interp + 0.5 * (w[-1] ** 2), loss)

    # Check gradient.
    assert_array_almost_equal(grad_interp[:n_features], grad[:n_features])
    assert_almost_equal(grad_interp[-1] + alpha * w[-1], grad[-1])

    rng = np.random.RandomState(0)
    grad = rng.rand(n_features + 1)
    hess_interp = hess_interp(grad)
    hess = hess(grad)
    assert_array_almost_equal(hess_interp[:n_features], hess[:n_features])
    assert_almost_equal(hess_interp[-1] + alpha * grad[-1], hess[-1])

Esempio n. 17

0

Mostra file

File: unittest_svm_cd.py Progetto: acharuva/svm_cd

def main():
    (X,y) = skd.make_classification()
    N = X.shape[0]
    X = np.append(X,np.ones((N,1)),axis=1)
    y = 2*y-1
        
    skf = StratifiedKFold(y,5)
    for train,test in skf:
        X_train = X[train,:]
        y_train = y[train]
        
        X_test = X[test,:]
        y_test = y[test]
        
        C = 0.01
        
        # dual co-ordinate descent SVM
        clf = SVMCD(C)
        clf.fit(X_train,y_train,w_prior=np.ones(21))
        pred = clf.decision_function(X_test)
        score = clf.score(X_test,y_test)
        fpr, tpr, thresholds = metrics.roc_curve(y_test, pred)
        print score, metrics.auc(fpr, tpr), "//",
        w1  = clf.w;
        
        # standard svm
        clf = SVC(C=C,kernel='linear')
        clf.fit(X_train, y_train) 
        pred = clf.decision_function(X_test)
        score = clf.score(X_test,y_test)
        fpr, tpr, thresholds = metrics.roc_curve(y_test, pred)
        print score, metrics.auc(fpr, tpr)
        w2 = clf.coef_
        w2.shape = (21,)

Esempio n. 18

0

Mostra file

File: test_logistic.py Progetto: AngelaGuoguo/scikit-learn

def test_logistic_regression_convergence_warnings():
    """Test that warnings are raised if model does not converge"""

    X, y = make_classification(n_samples=20, n_features=20)
    clf_lib = LogisticRegression(solver='liblinear', max_iter=2, verbose=1)
    assert_warns(ConvergenceWarning, clf_lib.fit, X, y)
    assert_equal(clf_lib.n_iter_, 2)

Esempio n. 19

0

Mostra file

File: test_gradient_boosting.py Progetto: amueller/scikit-learn

def test_gradient_boosting_validation_fraction():
    X, y = make_classification(n_samples=1000, random_state=0)

    gbc = GradientBoostingClassifier(n_estimators=100,
                                     n_iter_no_change=10,
                                     validation_fraction=0.1,
                                     learning_rate=0.1, max_depth=3,
                                     random_state=42)
    gbc2 = clone(gbc).set_params(validation_fraction=0.3)
    gbc3 = clone(gbc).set_params(n_iter_no_change=20)

    gbr = GradientBoostingRegressor(n_estimators=100, n_iter_no_change=10,
                                    learning_rate=0.1, max_depth=3,
                                    validation_fraction=0.1,
                                    random_state=42)
    gbr2 = clone(gbr).set_params(validation_fraction=0.3)
    gbr3 = clone(gbr).set_params(n_iter_no_change=20)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    # Check if validation_fraction has an effect
    gbc.fit(X_train, y_train)
    gbc2.fit(X_train, y_train)
    assert gbc.n_estimators_ != gbc2.n_estimators_

    gbr.fit(X_train, y_train)
    gbr2.fit(X_train, y_train)
    assert gbr.n_estimators_ != gbr2.n_estimators_

    # Check if n_estimators_ increase monotonically with n_iter_no_change
    # Set validation
    gbc3.fit(X_train, y_train)
    gbr3.fit(X_train, y_train)
    assert gbr.n_estimators_ < gbr3.n_estimators_
    assert gbc.n_estimators_ < gbc3.n_estimators_

Esempio n. 20

0

Mostra file

File: test_logistic.py Progetto: AngelaGuoguo/scikit-learn

def test_liblinear_random_state():
    X, y = make_classification(n_samples=20)
    lr1 = LogisticRegression(random_state=0)
    lr1.fit(X, y)
    lr2 = LogisticRegression(random_state=0)
    lr2.fit(X, y)
    assert_array_almost_equal(lr1.coef_, lr2.coef_)

Esempio n. 21

0

Mostra file

File: test_grid_search.py Progetto: BasilBeirouti/scikit-learn

def test_grid_search_failing_classifier():
    # GridSearchCV with on_error != 'raise'
    # Ensures that a warning is raised and score reset where appropriate.

    X, y = make_classification(n_samples=20, n_features=10, random_state=0)

    clf = FailingClassifier()

    # refit=False because we only want to check that errors caused by fits
    # to individual folds will be caught and warnings raised instead. If
    # refit was done, then an exception would be raised on refit and not
    # caught by grid_search (expected behavior), and this would cause an
    # error in this test.
    gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                      refit=False, error_score=0.0)

    assert_warns(FitFailedWarning, gs.fit, X, y)

    # Ensure that grid scores were set to zero as required for those fits
    # that are expected to fail.
    assert all(np.all(this_point.cv_validation_scores == 0.0)
               for this_point in gs.grid_scores_
               if this_point.parameters['parameter'] ==
               FailingClassifier.FAILING_PARAMETER)

    gs = GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                      refit=False, error_score=float('nan'))
    assert_warns(FitFailedWarning, gs.fit, X, y)
    assert all(np.all(np.isnan(this_point.cv_validation_scores))
               for this_point in gs.grid_scores_
               if this_point.parameters['parameter'] ==
               FailingClassifier.FAILING_PARAMETER)

Esempio n. 22

0

Mostra file

File: test_common.py Progetto: mbarnes1/entity_resolution

def test_class_weight_auto_classifiers():
    """Test that class_weight="auto" improves f1-score"""

    # This test is broken; its success depends on:
    # * a rare fortuitous RNG seed for make_classification; and
    # * the use of binary F1 over a seemingly arbitrary positive class for two
    #   datasets, and weighted average F1 for the third.
    # Its expectations need to be clarified and reimplemented.
    raise SkipTest("This test requires redefinition")

    classifiers = all_estimators(type_filter="classifier")

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        classifiers = [c for c in classifiers if "class_weight" in c[1]().get_params().keys()]

    for n_classes, weights in zip([2, 3], [[0.8, 0.2], [0.8, 0.1, 0.1]]):
        # create unbalanced dataset
        X, y = make_classification(
            n_classes=n_classes, n_samples=200, n_features=10, weights=weights, random_state=0, n_informative=n_classes
        )
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
        for name, Classifier in classifiers:
            if (
                name != "NuSVC"
                # the sparse version has a parameter that doesn't do anything
                and not name.startswith("RidgeClassifier")
                # RidgeClassifier behaves unexpected
                # FIXME!
                and not name.endswith("NB")
            ):
                # NaiveBayes classifiers have a somewhat different interface.
                # FIXME SOON!
                yield (check_class_weight_auto_classifiers, name, Classifier, X_train, y_train, X_test, y_test, weights)

Esempio n. 23

0

Mostra file

File: test_gradient_boosting.py Progetto: amueller/scikit-learn

def test_gradient_boosting_early_stopping():
    X, y = make_classification(n_samples=1000, random_state=0)

    gbc = GradientBoostingClassifier(n_estimators=1000,
                                     n_iter_no_change=10,
                                     learning_rate=0.1, max_depth=3,
                                     random_state=42)

    gbr = GradientBoostingRegressor(n_estimators=1000, n_iter_no_change=10,
                                    learning_rate=0.1, max_depth=3,
                                    random_state=42)

    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        random_state=42)
    # Check if early_stopping works as expected
    for est, tol, early_stop_n_estimators in ((gbc, 1e-1, 24), (gbr, 1e-1, 13),
                                              (gbc, 1e-3, 36),
                                              (gbr, 1e-3, 28)):
        est.set_params(tol=tol)
        est.fit(X_train, y_train)
        assert_equal(est.n_estimators_, early_stop_n_estimators)
        assert est.score(X_test, y_test) > 0.7

    # Without early stopping
    gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1,
                                     max_depth=3, random_state=42)
    gbc.fit(X, y)
    gbr = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1,
                                    max_depth=3, random_state=42)
    gbr.fit(X, y)

    assert gbc.n_estimators_ == 100
    assert gbr.n_estimators_ == 200

Esempio n. 24

0

Mostra file