Beispiel #1
0
def test_nmf_negative_beta_loss():
    # Test that an error is raised if beta_loss < 0 and X contains zeros.
    # Test that the output has not NaN values when the input contains zeros.
    n_samples = 6
    n_features = 5
    n_components = 3

    rng = np.random.mtrand.RandomState(42)
    X = rng.randn(n_samples, n_features)
    np.clip(X, 0, None, out=X)
    X_csr = sp.csr_matrix(X)

    def _assert_nmf_no_nan(X, beta_loss):
        W, H, _ = non_negative_factorization(X,
                                             init='random',
                                             n_components=n_components,
                                             solver='mu',
                                             beta_loss=beta_loss,
                                             random_state=0,
                                             max_iter=1000)
        assert not np.any(np.isnan(W))
        assert not np.any(np.isnan(H))

    msg = "When beta_loss <= 0 and X contains zeros, the solver may diverge."
    for beta_loss in (-0.6, 0.):
        assert_raise_message(ValueError, msg, _assert_nmf_no_nan, X, beta_loss)
        _assert_nmf_no_nan(X + 1e-9, beta_loss)

    for beta_loss in (0.2, 1., 1.2, 2., 2.5):
        _assert_nmf_no_nan(X, beta_loss)
        _assert_nmf_no_nan(X_csr, beta_loss)
def test_bayesian_mixture_weights_prior_initialisation():
    rng = np.random.RandomState(0)
    n_samples, n_components, n_features = 10, 5, 2
    X = rng.rand(n_samples, n_features)

    # Check raise message for a bad value of weight_concentration_prior
    bad_weight_concentration_prior_ = 0.
    bgmm = BayesianGaussianMixture(
        weight_concentration_prior=bad_weight_concentration_prior_,
        random_state=0)
    assert_raise_message(
        ValueError, "The parameter 'weight_concentration_prior' "
        "should be greater than 0., but got %.3f." %
        bad_weight_concentration_prior_, bgmm.fit, X)

    # Check correct init for a given value of weight_concentration_prior
    weight_concentration_prior = rng.rand()
    bgmm = BayesianGaussianMixture(
        weight_concentration_prior=weight_concentration_prior,
        random_state=rng).fit(X)
    assert_almost_equal(weight_concentration_prior,
                        bgmm.weight_concentration_prior_)

    # Check correct init for the default value of weight_concentration_prior
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   random_state=rng).fit(X)
    assert_almost_equal(1. / n_components, bgmm.weight_concentration_prior_)
def test_score():
    covar_type = 'full'
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components
    X = rand_data.X[covar_type]

    # Check the error message if we don't call fit
    gmm1 = GaussianMixture(n_components=n_components,
                           n_init=1,
                           max_iter=1,
                           reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type)
    assert_raise_message(
        NotFittedError, "This GaussianMixture instance is not fitted "
        "yet. Call 'fit' with appropriate arguments "
        "before using this estimator.", gmm1.score, X)

    # Check score value
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        gmm1.fit(X)
    gmm_score = gmm1.score(X)
    gmm_score_proba = gmm1.score_samples(X).mean()
    assert_almost_equal(gmm_score, gmm_score_proba)

    # Check if the score increase
    gmm2 = GaussianMixture(n_components=n_components,
                           n_init=1,
                           reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type).fit(X)
    assert gmm2.score(X) > gmm1.score(X)
Beispiel #4
0
def test_column_transformer_invalid_columns(remainder):
    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T

    # general invalid
    for col in [1.5, ['string', 1], slice(1, 's'), np.array([1.])]:
        ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder)
        assert_raise_message(ValueError, "No valid specification", ct.fit,
                             X_array)

    # invalid for arrays
    for col in ['string', ['string', 'other'], slice('a', 'b')]:
        ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder)
        assert_raise_message(ValueError, "Specifying the columns", ct.fit,
                             X_array)

    # transformed n_features does not match fitted n_features
    col = [0, 1]
    ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder)
    ct.fit(X_array)
    X_array_more = np.array([[0, 1, 2], [2, 4, 6], [3, 6, 9]]).T
    msg = ("Given feature/column names or counts do not match the ones for "
           "the data given during fit.")
    with pytest.warns(FutureWarning, match=msg):
        ct.transform(X_array_more)  # Should accept added columns, for now
    X_array_fewer = np.array([
        [0, 1, 2],
    ]).T
    err_msg = 'Number of features'
    with pytest.raises(ValueError, match=err_msg):
        ct.transform(X_array_fewer)
def test_bayesian_mixture_predict_predict_proba():
    # this is the same test as test_gaussian_mixture_predict_predict_proba()
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for prior_type in PRIOR_TYPE:
        for covar_type in COVARIANCE_TYPE:
            X = rand_data.X[covar_type]
            Y = rand_data.Y
            bgmm = BayesianGaussianMixture(
                n_components=rand_data.n_components,
                random_state=rng,
                weight_concentration_prior_type=prior_type,
                covariance_type=covar_type)

            # Check a warning message arrive if we don't do fit
            assert_raise_message(
                NotFittedError, "This BayesianGaussianMixture instance"
                " is not fitted yet. Call 'fit' with "
                "appropriate arguments before using "
                "this estimator.", bgmm.predict, X)

            bgmm.fit(X)
            Y_pred = bgmm.predict(X)
            Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1)
            assert_array_equal(Y_pred, Y_pred_proba)
            assert adjusted_rand_score(Y, Y_pred) >= .95
Beispiel #6
0
def test_step_name_validation():
    bad_steps1 = [('a__q', Mult(2)), ('b', Mult(3))]
    bad_steps2 = [('a', Mult(2)), ('a', Mult(3))]
    for cls, param in [(Pipeline, 'steps'),
                       (FeatureUnion, 'transformer_list')]:
        # we validate in construction (despite scikit-learn convention)
        bad_steps3 = [('a', Mult(2)), (param, Mult(3))]
        for bad_steps, message in [
            (bad_steps1, "Estimator names must not contain __: got ['a__q']"),
            (bad_steps2, "Names provided are not unique: ['a', 'a']"),
            (bad_steps3, "Estimator names conflict with constructor "
             "arguments: ['%s']" % param),
        ]:
            # three ways to make invalid:
            # - construction
            assert_raise_message(ValueError, message, cls,
                                 **{param: bad_steps})

            # - setattr
            est = cls(**{param: [('a', Mult(1))]})
            setattr(est, param, bad_steps)
            assert_raise_message(ValueError, message, est.fit, [[1]], [1])
            assert_raise_message(ValueError, message, est.fit_transform, [[1]],
                                 [1])

            # - set_params
            est = cls(**{param: [('a', Mult(1))]})
            est.set_params(**{param: bad_steps})
            assert_raise_message(ValueError, message, est.fit, [[1]], [1])
            assert_raise_message(ValueError, message, est.fit_transform, [[1]],
                                 [1])
Beispiel #7
0
def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test):
    dense_svm.fit(X_train.toarray(), y_train)
    if sparse.isspmatrix(X_test):
        X_test_dense = X_test.toarray()
    else:
        X_test_dense = X_test
    sparse_svm.fit(X_train, y_train)
    assert sparse.issparse(sparse_svm.support_vectors_)
    assert sparse.issparse(sparse_svm.dual_coef_)
    assert_array_almost_equal(dense_svm.support_vectors_,
                              sparse_svm.support_vectors_.toarray())
    assert_array_almost_equal(dense_svm.dual_coef_,
                              sparse_svm.dual_coef_.toarray())
    if dense_svm.kernel == "linear":
        assert sparse.issparse(sparse_svm.coef_)
        assert_array_almost_equal(dense_svm.coef_, sparse_svm.coef_.toarray())
    assert_array_almost_equal(dense_svm.support_, sparse_svm.support_)
    assert_array_almost_equal(dense_svm.predict(X_test_dense),
                              sparse_svm.predict(X_test))
    assert_array_almost_equal(dense_svm.decision_function(X_test_dense),
                              sparse_svm.decision_function(X_test))
    assert_array_almost_equal(dense_svm.decision_function(X_test_dense),
                              sparse_svm.decision_function(X_test_dense))
    if isinstance(dense_svm, svm.OneClassSVM):
        msg = "cannot use sparse input in 'OneClassSVM' trained on dense data"
    else:
        assert_array_almost_equal(dense_svm.predict_proba(X_test_dense),
                                  sparse_svm.predict_proba(X_test), 4)
        msg = "cannot use sparse input in 'SVC' trained on dense data"
    if sparse.isspmatrix(X_test):
        assert_raise_message(ValueError, msg, dense_svm.predict, X_test)
Beispiel #8
0
def test_fetch_nonexiting(monkeypatch, gzip_response):
    # there is no active version of glass2
    data_id = 40675
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    # Note that we only want to search by name (not data id)
    assert_raise_message(ValueError, "No active dataset glass2 found",
                         fetch_openml, name='glass2', cache=False)
def test_zero_cosine_linkage_tree():
    # Check that zero vectors in X produce an error when
    # 'cosine' affinity is used
    X = np.array([[0, 1],
                  [0, 0]])
    msg = 'Cosine affinity cannot be used when X contains zero vectors'
    assert_raise_message(ValueError, msg, linkage_tree, X, affinity='cosine')
Beispiel #10
0
def test_n_iter():
    """Check value of n_iter."""
    X = np.array([[1], [2], [6], [8], [10]])
    y = np.array([1, 2, 6, 8, 10])
    clf = BayesianRidge(n_iter=0)
    msg = "n_iter should be greater than or equal to 1."
    assert_raise_message(ValueError, msg, clf.fit, X, y)
Beispiel #11
0
def test_enet_l1_ratio():
    # Test that an error message is raised if an estimator that
    # uses _alpha_grid is called with l1_ratio=0
    msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. "
           "Please supply a grid by providing your estimator with the "
           "appropriate `alphas=` argument.")
    X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T
    y = np.array([12, 10, 11, 21, 5])

    assert_raise_message(ValueError, msg,
                         ElasticNetCV(l1_ratio=0, random_state=42).fit, X, y)
    assert_raise_message(
        ValueError, msg,
        MultiTaskElasticNetCV(l1_ratio=0, random_state=42).fit, X, y[:, None])

    # Test that l1_ratio=0 is allowed if we supply a grid manually
    alphas = [0.1, 10]
    estkwds = {'alphas': alphas, 'random_state': 42}
    est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = ElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est_desired.fit(X, y)
        est.fit(X, y)
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)

    est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est.fit(X, y[:, None])
        est_desired.fit(X, y[:, None])
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)
Beispiel #12
0
def test_n_components():
    rng = np.random.RandomState(42)
    X = np.arange(12).reshape(4, 3)
    y = [1, 1, 2, 2]

    init = rng.rand(X.shape[1] - 1, 3)

    # n_components = X.shape[1] != transformation.shape[0]
    n_components = X.shape[1]
    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
    assert_raise_message(ValueError,
                         'The preferred dimensionality of the '
                         'projected space `n_components` ({}) does not match '
                         'the output dimensionality of the given '
                         'linear transformation `init` ({})!'
                         .format(n_components, init.shape[0]),
                         nca.fit, X, y)

    # n_components > X.shape[1]
    n_components = X.shape[1] + 2
    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
    assert_raise_message(ValueError,
                         'The preferred dimensionality of the '
                         'projected space `n_components` ({}) cannot '
                         'be greater than the given data '
                         'dimensionality ({})!'
                         .format(n_components, X.shape[1]),
                         nca.fit, X, y)

    # n_components < X.shape[1]
    nca = NeighborhoodComponentsAnalysis(n_components=2, init='identity')
    nca.fit(X, y)
Beispiel #13
0
def test_ovo_float_y():
    # Test that the OvO errors on float targets
    X = iris.data
    y = iris.data[:, 0]

    ovo = OneVsOneClassifier(LinearSVC())
    assert_raise_message(ValueError, "Unknown label type", ovo.fit, X, y)
Beispiel #14
0
def test_ovo_one_class():
    # Test error for OvO with one class
    X = np.eye(4)
    y = np.array(['a'] * 4)

    ovo = OneVsOneClassifier(LinearSVC())
    assert_raise_message(ValueError, "when only one class", ovo.fit, X, y)
def test_regularisation():
    # We train the GaussianMixture on degenerate data by defining two clusters
    # of a 0 covariance.
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 5

    X = np.vstack((np.ones(
        (n_samples // 2, n_features)), np.zeros((n_samples // 2, n_features))))

    for covar_type in COVARIANCE_TYPE:
        gmm = GaussianMixture(n_components=n_samples,
                              reg_covar=0,
                              covariance_type=covar_type,
                              random_state=rng)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", RuntimeWarning)
            assert_raise_message(
                ValueError, "Fitting the mixture model failed because "
                "some components have ill-defined empirical "
                "covariance (for instance caused by "
                "singleton or collapsed samples). Try to "
                "decrease the number of components, or "
                "increase reg_covar.", gmm.fit, X)

            gmm.set_params(reg_covar=1e-6).fit(X)
Beispiel #16
0
def test_pls_errors():
    d = load_linnerud()
    X = d.data
    Y = d.target
    for clf in [pls_.PLSCanonical(), pls_.PLSRegression(), pls_.PLSSVD()]:
        clf.n_components = 4
        assert_raise_message(ValueError, "Invalid number of components",
                             clf.fit, X, Y)
Beispiel #17
0
def test_string_attribute_without_dataframe(monkeypatch, gzip_response):
    data_id = 40945
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    # single column test
    assert_raise_message(ValueError,
                         ('STRING attributes are not supported for '
                          'array representation. Try as_frame=True'),
                         fetch_openml, data_id=data_id, cache=False)
Beispiel #18
0
def test_raises_illegal_multitarget(monkeypatch, gzip_response):
    data_id = 61
    targets = ['sepalwidth', 'class']
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    # Note that we only want to search by name (not data id)
    assert_raise_message(ValueError,
                         "Can only handle homogeneous multi-target datasets,",
                         fetch_openml, data_id=data_id,
                         target_column=targets, cache=False)
Beispiel #19
0
def test_base_zero_n_estimators():
    # Check that instantiating a BaseEnsemble with n_estimators<=0 raises
    # a ValueError.
    ensemble = BaggingClassifier(base_estimator=Perceptron(),
                                 n_estimators=0)
    iris = load_iris()
    assert_raise_message(ValueError,
                         "n_estimators must be greater than zero, got 0.",
                         ensemble.fit, iris.data, iris.target)
Beispiel #20
0
def test_linear_svc_intercept_scaling():
    # Test that the right error message is thrown when intercept_scaling <= 0

    for i in [-1, 0]:
        lsvc = svm.LinearSVC(intercept_scaling=i)
        msg = ('Intercept scaling is %r but needs to be greater than 0.'
               ' To disable fitting an intercept,'
               ' set fit_intercept=False.' % lsvc.intercept_scaling)
        assert_raise_message(ValueError, msg, lsvc.fit, X, Y)
Beispiel #21
0
def test_column_transformer_remainder():
    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T

    X_res_first = np.array([0, 1, 2]).reshape(-1, 1)
    X_res_second = np.array([2, 4, 6]).reshape(-1, 1)
    X_res_both = X_array

    # default drop
    ct = ColumnTransformer([('trans1', Trans(), [0])])
    assert_array_equal(ct.fit_transform(X_array), X_res_first)
    assert_array_equal(ct.fit(X_array).transform(X_array), X_res_first)
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] == 'remainder'
    assert ct.transformers_[-1][1] == 'drop'
    assert_array_equal(ct.transformers_[-1][2], [1])

    # specify passthrough
    ct = ColumnTransformer([('trans', Trans(), [0])], remainder='passthrough')
    assert_array_equal(ct.fit_transform(X_array), X_res_both)
    assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both)
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] == 'remainder'
    assert ct.transformers_[-1][1] == 'passthrough'
    assert_array_equal(ct.transformers_[-1][2], [1])

    # column order is not preserved (passed through added to end)
    ct = ColumnTransformer([('trans1', Trans(), [1])], remainder='passthrough')
    assert_array_equal(ct.fit_transform(X_array), X_res_both[:, ::-1])
    assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both[:, ::-1])
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] == 'remainder'
    assert ct.transformers_[-1][1] == 'passthrough'
    assert_array_equal(ct.transformers_[-1][2], [0])

    # passthrough when all actual transformers are skipped
    ct = ColumnTransformer([('trans1', 'drop', [0])], remainder='passthrough')
    assert_array_equal(ct.fit_transform(X_array), X_res_second)
    assert_array_equal(ct.fit(X_array).transform(X_array), X_res_second)
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] == 'remainder'
    assert ct.transformers_[-1][1] == 'passthrough'
    assert_array_equal(ct.transformers_[-1][2], [1])

    # error on invalid arg
    ct = ColumnTransformer([('trans1', Trans(), [0])], remainder=1)
    assert_raise_message(
        ValueError,
        "remainder keyword needs to be one of \'drop\', \'passthrough\', "
        "or estimator.", ct.fit, X_array)
    assert_raise_message(
        ValueError,
        "remainder keyword needs to be one of \'drop\', \'passthrough\', "
        "or estimator.", ct.fit_transform, X_array)

    # check default for make_column_transformer
    ct = make_column_transformer((Trans(), [0]))
    assert ct.remainder == 'drop'
Beispiel #22
0
def test_classifier_single_class():
    """tests if ValueError is thrown with only one class"""
    X = [[1, 2], [3, 4]]
    y = [1, 1]

    assert_raise_message(
        ValueError, "This solver needs samples of at least 2 classes "
        "in the data",
        LogisticRegression(solver='sag').fit, X, y)
Beispiel #23
0
def test_check_array_accept_large_sparse_raise_exception(X_64bit):
    # When large sparse are not allowed
    msg = ("Only sparse matrices with 32-bit integer indices "
           "are accepted. Got int64 indices.")
    assert_raise_message(ValueError,
                         msg,
                         check_array,
                         X_64bit,
                         accept_sparse=True,
                         accept_large_sparse=False)
def test_agg_n_clusters():
    # Test that an error is raised when n_clusters <= 0

    rng = np.random.RandomState(0)
    X = rng.rand(20, 10)
    for n_clus in [-1, 0]:
        agc = AgglomerativeClustering(n_clusters=n_clus)
        msg = ("n_clusters should be an integer greater than 0."
               " %s was provided." % str(agc.n_clusters))
        assert_raise_message(ValueError, msg, agc.fit, X)
Beispiel #25
0
def test_gpr_correct_error_message():
    X = np.arange(12).reshape(6, -1)
    y = np.ones(6)
    kernel = DotProduct()
    gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0)
    assert_raise_message(
        np.linalg.LinAlgError, "The kernel, %s, is not returning a "
        "positive definite matrix. Try gradually increasing "
        "the 'alpha' parameter of your "
        "GaussianProcessRegressor estimator." % kernel, gpr.fit, X, y)
Beispiel #26
0
def test_too_many_samples_to_find_a_safe_embedding():
    data, _ = make_sparse_random_data(1000, 100, 1000)

    for RandomProjection in all_RandomProjection:
        rp = RandomProjection(n_components='auto', eps=0.1)
        expected_msg = (
            'eps=0.100000 and n_samples=1000 lead to a target dimension'
            ' of 5920 which is larger than the original space with'
            ' n_features=100')
        assert_raise_message(ValueError, expected_msg, rp.fit, data)
Beispiel #27
0
def test_minimum_number_of_sample_check():
    # test that we check a minimum number of samples
    msg = "min_samples must be no greater than"

    # Compute OPTICS
    X = [[1, 1]]
    clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10, min_cluster_size=1)

    # Run the fit
    assert_raise_message(ValueError, msg, clust.fit, X)
def test_bayesian_mixture_check_is_fitted():
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 2

    # Check raise message
    bgmm = BayesianGaussianMixture(random_state=rng)
    X = rng.rand(n_samples, n_features)
    assert_raise_message(
        ValueError, 'This BayesianGaussianMixture instance is not '
        'fitted yet.', bgmm.score, X)
Beispiel #29
0
def test_2D_transformer_output():
    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T

    # if one transformer is dropped, test that name is still correct
    ct = ColumnTransformer([('trans1', 'drop', 0), ('trans2', TransNo2D(), 1)])
    assert_raise_message(ValueError, "the 'trans2' transformer should be 2D",
                         ct.fit_transform, X_array)
    # because fit is also doing transform, this raises already on fit
    assert_raise_message(ValueError, "the 'trans2' transformer should be 2D",
                         ct.fit, X_array)
def test_lle_init_parameters():
    X = np.random.rand(5, 3)

    clf = manifold.LocallyLinearEmbedding(eigen_solver="error")
    msg = "unrecognized eigen_solver 'error'"
    assert_raise_message(ValueError, msg, clf.fit, X)

    clf = manifold.LocallyLinearEmbedding(method="error")
    msg = "unrecognized method 'error'"
    assert_raise_message(ValueError, msg, clf.fit, X)