Beispiel #1
0
def test_path_parameters():
    X, y = make_sparse_data()
    max_iter = 50
    n_alphas = 10
    clf = ElasticNetCV(n_alphas=n_alphas,
                       eps=1e-3,
                       max_iter=max_iter,
                       l1_ratio=0.5,
                       fit_intercept=False)
    ignore_warnings(clf.fit)(X, y)  # new params
    assert_almost_equal(0.5, clf.l1_ratio)
    assert n_alphas == clf.n_alphas
    assert n_alphas == len(clf.alphas_)
    sparse_mse_path = clf.mse_path_
    ignore_warnings(clf.fit)(X.toarray(), y)  # compare with dense area_data
    assert_almost_equal(clf.mse_path_, sparse_mse_path)
def test_selectkbest_tiebreaking():
    # Test whether SelectKBest actually selects k features in case of ties.
    # Prior to 0.11, SelectKBest would return more features than requested.
    Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]]
    y = [1]
    dummy_score = lambda X, y: (X[0], X[0])
    for X in Xs:
        sel = SelectKBest(dummy_score, k=1)
        X1 = ignore_warnings(sel.fit_transform)([X], y)
        assert X1.shape[1] == 1
        assert_best_scores_kept(sel)

        sel = SelectKBest(dummy_score, k=2)
        X2 = ignore_warnings(sel.fit_transform)([X], y)
        assert X2.shape[1] == 2
        assert_best_scores_kept(sel)
Beispiel #3
0
def test_partial_fit_classification():
    # Test partial_fit on classification.
    # `partial_fit` should yield the same results as 'fit' for binary and
    # multi-class classification.
    for X, y in classification_datasets:
        mlp = MLPClassifier(
            solver="sgd",
            max_iter=100,
            random_state=1,
            tol=0,
            alpha=1e-5,
            learning_rate_init=0.2,
        )

        with ignore_warnings(category=ConvergenceWarning):
            mlp.fit(X, y)
        pred1 = mlp.predict(X)
        mlp = MLPClassifier(solver="sgd",
                            random_state=1,
                            alpha=1e-5,
                            learning_rate_init=0.2)
        for i in range(100):
            mlp.partial_fit(X, y, classes=np.unique(y))
        pred2 = mlp.predict(X)
        assert_array_equal(pred1, pred2)
        assert mlp.score(X, y) > 0.95
def test_warm_start(solver, warm_start, fit_intercept, multi_class):
    # A 1-iteration second fit on same data should give almost same result
    # with warm starting, and quite different result without warm starting.
    # Warm starting does not work with liblinear solver.
    X, y = iris.data, iris.target

    clf = LogisticRegression(
        tol=1e-4,
        multi_class=multi_class,
        warm_start=warm_start,
        solver=solver,
        random_state=42,
        fit_intercept=fit_intercept,
    )
    with ignore_warnings(category=ConvergenceWarning):
        clf.fit(X, y)
        coef_1 = clf.coef_

        clf.max_iter = 1
        clf.fit(X, y)
    cum_diff = np.sum(np.abs(coef_1 - clf.coef_))
    msg = (
        "Warm starting issue with %s solver in %s mode "
        "with fit_intercept=%s and warm_start=%s"
        % (solver, multi_class, str(fit_intercept), str(warm_start))
    )
    if warm_start:
        assert 2.0 > cum_diff, msg
    else:
        assert cum_diff > 2.0, msg
Beispiel #5
0
def test_estimators(estimator, check, request):
    # Common tests for estimator instances
    with ignore_warnings(
        category=(FutureWarning, ConvergenceWarning, UserWarning, FutureWarning)
    ):
        _set_checking_parameters(estimator)
        check(estimator)
Beispiel #6
0
def test_show_versions(capsys):
    with ignore_warnings():
        show_versions()
        out, err = capsys.readouterr()

    assert "python" in out
    assert "numpy" in out
Beispiel #7
0
def test_1d_input(name):
    X = iris.data[:, 0]
    X_2d = iris.data[:, 0].reshape((-1, 1))
    y = iris.target

    with ignore_warnings():
        check_1d_input(name, X, X_2d, y)
Beispiel #8
0
def test_sparse_encode_error_default_sparsity():
    rng = np.random.RandomState(0)
    X = rng.randn(100, 64)
    D = rng.randn(2, 64)
    code = ignore_warnings(sparse_encode)(X, D, algorithm='omp',
                                          n_nonzero_coefs=None)
    assert code.shape == (100, 2)
def test_pairwise_boolean_distance(metric):
    # test that we convert to boolean arrays for boolean distances
    rng = np.random.RandomState(0)
    X = rng.randn(5, 4)
    Y = X.copy()
    Y[0, 0] = 1 - Y[0, 0]

    # ignore conversion to boolean in pairwise_distances
    with ignore_warnings(category=DataConversionWarning):
        for Z in [Y, None]:
            res = pairwise_distances(X, Z, metric=metric)
            res[np.isnan(res)] = 0
            assert np.sum(res != 0) == 0

    # non-boolean arrays are converted to boolean for boolean
    # distance metrics with a data conversion warning
    msg = "Data was converted to boolean for metric %s" % metric
    with pytest.warns(DataConversionWarning, match=msg):
        pairwise_distances(X, metric=metric)

    # Check that the warning is raised if X is boolean by Y is not boolean:
    with pytest.warns(DataConversionWarning, match=msg):
        pairwise_distances(X.astype(bool), Y=Y, metric=metric)

    # Check that no warning is raised if X is already boolean and Y is None:
    with pytest.warns(None) as records:
        pairwise_distances(X.astype(bool), metric=metric)
    assert len(records) == 0
Beispiel #10
0
def test_non_negative_factorization_checking():
    A = np.ones((2, 2))
    # Test parameters checking is public function
    nnmf = non_negative_factorization
    msg = re.escape(
        "Number of components must be a positive integer; got (n_components=1.5)"
    )
    with pytest.raises(ValueError, match=msg):
        nnmf(A, A, A, 1.5, init="random")
    msg = re.escape(
        "Number of components must be a positive integer; got (n_components='2')"
    )
    with pytest.raises(ValueError, match=msg):
        nnmf(A, A, A, "2", init="random")
    msg = re.escape("Negative values in data passed to NMF (input H)")
    with pytest.raises(ValueError, match=msg):
        nnmf(A, A, -A, 2, init="custom")
    msg = re.escape("Negative values in data passed to NMF (input W)")
    with pytest.raises(ValueError, match=msg):
        nnmf(A, -A, A, 2, init="custom")
    msg = re.escape("Array passed to NMF (input H) is full of zeros")
    with pytest.raises(ValueError, match=msg):
        nnmf(A, A, 0 * A, 2, init="custom")

    with ignore_warnings(category=FutureWarning):
        # TODO remove in 1.2
        msg = "Invalid regularization parameter: got 'spam' instead of one of"
        with pytest.raises(ValueError, match=msg):
            nnmf(A, A, 0 * A, 2, init="custom", regularization="spam")
Beispiel #11
0
def test_transformers_get_feature_names_out(transformer):
    _set_checking_parameters(transformer)

    with ignore_warnings(category=(FutureWarning)):
        check_transformer_get_feature_names_out(transformer.__class__.__name__,
                                                transformer)
        check_transformer_get_feature_names_out_pandas(
            transformer.__class__.__name__, transformer)
Beispiel #12
0
def test_fit_docstring_attributes(name, Estimator):
    pytest.importorskip("numpydoc")
    from numpydoc import docscrape

    if Estimator.__name__ in _DOCSTRING_IGNORES:
        return

    doc = docscrape.ClassDoc(Estimator)
    attributes = doc["Attributes"]

    if Estimator.__name__ == "Pipeline":
        est = _construct_compose_pipeline_instance(Estimator)
    else:
        est = _construct_instance(Estimator)

    X, y = make_classification(
        n_samples=20,
        n_features=3,
        n_redundant=0,
        n_classes=2,
        random_state=2,
    )

    y = _enforce_estimator_tags_y(est, y)
    X = _enforce_estimator_tags_x(est, X)

    if "oob_score" in est.get_params():
        est.set_params(oob_score=True)

    if is_sampler(est):
        est.fit_resample(X, y)
    else:
        est.fit(X, y)

    skipped_attributes = set([])

    for attr in attributes:
        if attr.name in skipped_attributes:
            continue
        desc = " ".join(attr.desc).lower()
        # As certain attributes are present "only" if a certain parameter is
        # provided, this checks if the word "only" is present in the attribute
        # description, and if not the attribute is required to be present.
        if "only " in desc:
            continue
        # ignore deprecation warnings
        with ignore_warnings(category=FutureWarning):
            assert hasattr(est, attr.name)

    fit_attr = _get_all_fitted_attributes(est)
    fit_attr_names = [attr.name for attr in attributes]
    undocumented_attrs = set(fit_attr).difference(fit_attr_names)
    undocumented_attrs = set(undocumented_attrs).difference(skipped_attributes)
    if undocumented_attrs:
        raise AssertionError(
            f"Undocumented attributes for {Estimator.__name__}: {undocumented_attrs}"
        )
Beispiel #13
0
def test_same_multiple_output_sparse_dense():
    for normalize in [True, False]:
        l = ElasticNet(normalize=normalize)
        X = [[0, 1, 2, 3, 4], [0, 2, 5, 8, 11], [9, 10, 11, 12, 13],
             [10, 11, 12, 13, 14]]
        y = [[1, 2, 3, 4, 5], [1, 3, 6, 9, 12], [10, 11, 12, 13, 14],
             [11, 12, 13, 14, 15]]
        ignore_warnings(l.fit)(X, y)
        sample = np.array([1, 2, 3, 4, 5]).reshape(1, -1)
        predict_dense = l.predict(sample)

        l_sp = ElasticNet(normalize=normalize)
        X_sp = sp.coo_matrix(X)
        ignore_warnings(l_sp.fit)(X_sp, y)
        sample_sparse = sp.coo_matrix(sample)
        predict_sparse = l_sp.predict(sample_sparse)

        assert_array_almost_equal(predict_sparse, predict_dense)
Beispiel #14
0
def test_pandas_column_name_consistency(estimator):
    _set_checking_parameters(estimator)
    with ignore_warnings(category=(FutureWarning)):
        with pytest.warns(None) as record:
            check_dataframe_column_names_consistency(
                estimator.__class__.__name__, estimator)
        for warning in record:
            assert "was fitted without feature names" not in str(
                warning.message)
Beispiel #15
0
def test_ridge_gcv_sample_weights(gcv_mode, X_constructor, fit_intercept,
                                  n_features, y_shape, noise):
    alphas = [1e-3, .1, 1., 10., 1e3]
    rng = np.random.RandomState(0)
    n_targets = y_shape[-1] if len(y_shape) == 2 else 1
    X, y = _make_sparse_offset_regression(n_samples=11,
                                          n_features=n_features,
                                          n_targets=n_targets,
                                          random_state=0,
                                          shuffle=False,
                                          noise=noise)
    y = y.reshape(y_shape)

    sample_weight = 3 * rng.randn(len(X))
    sample_weight = (sample_weight - sample_weight.min() + 1).astype(int)
    indices = np.repeat(np.arange(X.shape[0]), sample_weight)
    sample_weight = sample_weight.astype(float)
    X_tiled, y_tiled = X[indices], y[indices]

    cv = GroupKFold(n_splits=X.shape[0])
    splits = cv.split(X_tiled, y_tiled, groups=indices)
    kfold = RidgeCV(alphas=alphas,
                    cv=splits,
                    scoring='neg_mean_squared_error',
                    fit_intercept=fit_intercept)
    # ignore warning from GridSearchCV: DeprecationWarning: The default of the
    # `iid` parameter will change from True to False in version 0.22 and will
    # be removed in 0.24
    with ignore_warnings(category=DeprecationWarning):
        kfold.fit(X_tiled, y_tiled)

    ridge_reg = Ridge(alpha=kfold.alpha_, fit_intercept=fit_intercept)
    splits = cv.split(X_tiled, y_tiled, groups=indices)
    predictions = cross_val_predict(ridge_reg, X_tiled, y_tiled, cv=splits)
    kfold_errors = (y_tiled - predictions)**2
    kfold_errors = [
        np.sum(kfold_errors[indices == i], axis=0)
        for i in np.arange(X.shape[0])
    ]
    kfold_errors = np.asarray(kfold_errors)

    X_gcv = X_constructor(X)
    gcv_ridge = RidgeCV(alphas=alphas,
                        store_cv_values=True,
                        gcv_mode=gcv_mode,
                        fit_intercept=fit_intercept)
    gcv_ridge.fit(X_gcv, y, sample_weight=sample_weight)
    if len(y_shape) == 2:
        gcv_errors = gcv_ridge.cv_values_[:, :, alphas.index(kfold.alpha_)]
    else:
        gcv_errors = gcv_ridge.cv_values_[:, alphas.index(kfold.alpha_)]

    assert kfold.alpha_ == pytest.approx(gcv_ridge.alpha_)
    assert_allclose(gcv_errors, kfold_errors, rtol=1e-3)
    assert_allclose(gcv_ridge.coef_, kfold.coef_, rtol=1e-3)
    assert_allclose(gcv_ridge.intercept_, kfold.intercept_, rtol=1e-3)
def test_dict_learning_lassocd_readonly_data():
    n_components = 12
    with TempMemmap(X) as X_read_only:
        dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd',
                                  transform_alpha=0.001, random_state=0,
                                  n_jobs=4)
        with ignore_warnings(category=ConvergenceWarning):
            code = dico.fit(X_read_only).transform(X_read_only)
        assert_array_almost_equal(np.dot(code, dico.components_), X_read_only,
                                  decimal=2)
def test_n_components():
    # Test n_components returned by linkage, average and ward tree
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)

    # Connectivity matrix having five components.
    connectivity = np.eye(5)

    for linkage_func in _TREE_BUILDERS.values():
        assert ignore_warnings(linkage_func)(X, connectivity)[1] == 5
Beispiel #18
0
def test_lars_precompute(classifier):
    # Check for different values of precompute
    G = np.dot(X.T, X)

    clf = classifier(precompute=G)
    output_1 = ignore_warnings(clf.fit)(X, y).coef_
    for precompute in [True, False, 'auto', None]:
        clf = classifier(precompute=precompute)
        output_2 = clf.fit(X, y).coef_
        assert_array_almost_equal(output_1, output_2, decimal=8)
Beispiel #19
0
def check_warm_start_oob(name):
    # Test that the warm start computes oob score when asked.
    X, y = hastie_X, hastie_y
    ForestEstimator = FOREST_ESTIMATORS[name]
    # Use 15 estimators to avoid 'some inputs do not have OOB scores' warning.
    est = ForestEstimator(n_estimators=15,
                          max_depth=3,
                          warm_start=False,
                          random_state=1,
                          bootstrap=True,
                          oob_score=True)
    est.fit(X, y)

    est_2 = ForestEstimator(n_estimators=5,
                            max_depth=3,
                            warm_start=False,
                            random_state=1,
                            bootstrap=True,
                            oob_score=False)
    est_2.fit(X, y)

    est_2.set_params(warm_start=True, oob_score=True, n_estimators=15)
    est_2.fit(X, y)

    assert hasattr(est_2, 'oob_score_')
    assert est.oob_score_ == est_2.oob_score_

    # Test that oob_score is computed even if we don't need to train
    # additional trees.
    est_3 = ForestEstimator(n_estimators=15,
                            max_depth=3,
                            warm_start=True,
                            random_state=1,
                            bootstrap=True,
                            oob_score=False)
    est_3.fit(X, y)
    assert not hasattr(est_3, 'oob_score_')

    est_3.set_params(oob_score=True)
    ignore_warnings(est_3.fit)(X, y)

    assert est.oob_score_ == est_3.oob_score_
def test_show_versions(capsys):
    with ignore_warnings():
        show_versions()
        out, err = capsys.readouterr()

    assert "python" in out
    assert "numpy" in out

    info = threadpool_info()
    if info:
        assert "threadpoolctl info:" in out
Beispiel #21
0
def test_search_cv(estimator, check, request):
    # Common tests for SearchCV instances
    # We have a separate test because those meta-estimators can accept a
    # wide range of base estimators (classifiers, regressors, pipelines)
    with ignore_warnings(category=(
            FutureWarning,
            ConvergenceWarning,
            UserWarning,
            FitFailedWarning,
    )):
        check(estimator)
Beispiel #22
0
def test_qda_regularization():
    # the default is reg_param=0. and will cause issues
    # when there is a constant variable
    clf = QuadraticDiscriminantAnalysis()
    with ignore_warnings():
        y_pred = clf.fit(X2, y6).predict(X2)
    assert np.any(y_pred != y6)

    # adding a little regularization fixes the problem
    clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
    with ignore_warnings():
        clf.fit(X2, y6)
    y_pred = clf.predict(X2)
    assert_array_equal(y_pred, y6)

    # Case n_samples_in_a_class < n_features
    clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
    with ignore_warnings():
        clf.fit(X5, y5)
    y_pred5 = clf.predict(X5)
    assert_array_equal(y_pred5, y5)
Beispiel #23
0
def test_unstructured_linkage_tree():
    # Check that we obtain the correct solution for unstructured linkage trees.
    rng = np.random.RandomState(0)
    X = rng.randn(50, 100)
    for this_X in (X, X[0]):
        # With specified a number of clusters just for the sake of
        # raising a warning and testing the warning code
        with ignore_warnings():
            children, n_nodes, n_leaves, parent = assert_warns(
                UserWarning, ward_tree, this_X.T, n_clusters=10)
        n_nodes = 2 * X.shape[1] - 1
        assert len(children) + n_leaves == n_nodes

    for tree_builder in _TREE_BUILDERS.values():
        for this_X in (X, X[0]):
            with ignore_warnings():
                children, n_nodes, n_leaves, parent = assert_warns(
                    UserWarning, tree_builder, this_X.T, n_clusters=10)

            n_nodes = 2 * X.shape[1] - 1
            assert len(children) + n_leaves == n_nodes
def test_iforest():
    """Check Isolation Forest for various parameter settings."""
    X_train = np.array([[0, 1], [1, 2]])
    X_test = np.array([[2, 1], [1, 1]])

    grid = ParameterGrid({"n_estimators": [3],
                          "max_samples": [0.5, 1.0, 3],
                          "bootstrap": [True, False]})

    with ignore_warnings():
        for params in grid:
            IsolationForest(random_state=rng,
                            **params).fit(X_train).predict(X_test)
Beispiel #25
0
def test_get_deps_info():
    with ignore_warnings():
        deps_info = _get_deps_info()

    assert 'pip' in deps_info
    assert 'setuptools' in deps_info
    assert 'sklearn' in deps_info
    assert 'numpy' in deps_info
    assert 'scipy' in deps_info
    assert 'Cython' in deps_info
    assert 'pandas' in deps_info
    assert 'matplotlib' in deps_info
    assert 'joblib' in deps_info
Beispiel #26
0
def test_estimators(estimator, check, request):
    # Common tests for estimator instances
    with ignore_warnings(category=(FutureWarning, ConvergenceWarning,
                                   UserWarning, FutureWarning)):
        _set_checking_parameters(estimator)

        xfail_checks = _safe_tags(estimator, '_xfail_test')
        check_name = _set_check_estimator_ids(check)
        if xfail_checks:
            if check_name in xfail_checks:
                msg = xfail_checks[check_name]
                request.applymarker(pytest.mark.xfail(reason=msg))
        check(estimator)
def test_get_deps_info():
    with ignore_warnings():
        deps_info = _get_deps_info()

    assert "pip" in deps_info
    assert "setuptools" in deps_info
    assert "sklearn" in deps_info
    assert "numpy" in deps_info
    assert "scipy" in deps_info
    assert "Cython" in deps_info
    assert "pandas" in deps_info
    assert "matplotlib" in deps_info
    assert "joblib" in deps_info
Beispiel #28
0
def test_verbose_sgd():
    # Test verbose.
    X = [[3, 2], [1, 6]]
    y = [1, 0]
    clf = MLPClassifier(solver="sgd", max_iter=2, verbose=10, hidden_layer_sizes=2)
    old_stdout = sys.stdout
    sys.stdout = output = StringIO()

    with ignore_warnings(category=ConvergenceWarning):
        clf.fit(X, y)
    clf.partial_fit(X, y)

    sys.stdout = old_stdout
    assert "Iteration" in output.getvalue()
Beispiel #29
0
def test_attibutes_shapes(Est):
    # Make sure attributes are of the correct shape depending on n_components
    d = load_linnerud()
    X = d.data
    Y = d.target
    n_components = 2
    pls = Est(n_components=n_components)
    pls.fit(X, Y)
    assert all(attr.shape[1] == n_components
               for attr in (pls.x_weights_, pls.y_weights_))
    # TODO: remove in 1.1
    with ignore_warnings(category=FutureWarning):
        assert all(attr.shape[1] == n_components
                   for attr in (pls.x_scores_, pls.y_scores_))
Beispiel #30
0
def test_warm_start():
    X, y, _, _ = build_dataset()
    clf = ElasticNet(alpha=0.1, max_iter=5, warm_start=True)
    ignore_warnings(clf.fit)(X, y)
    ignore_warnings(clf.fit)(X, y)  # do a second round with 5 iterations

    clf2 = ElasticNet(alpha=0.1, max_iter=10)
    ignore_warnings(clf2.fit)(X, y)
    assert_array_almost_equal(clf2.coef_, clf.coef_)