Beispiel #1
0
def test_raises_value_error_if_sample_weights_greater_than_1d():
    # Sample weights must be either scalar or 1D

    n_sampless = [2, 3]
    n_featuress = [3, 2]

    rng = np.random.RandomState(42)

    for n_samples, n_features in zip(n_sampless, n_featuress):
        X = rng.randn(n_samples, n_features)
        y = rng.randn(n_samples)
        sample_weights_OK = rng.randn(n_samples)**2 + 1
        sample_weights_OK_1 = 1.
        sample_weights_OK_2 = 2.
        sample_weights_not_OK = sample_weights_OK[:, np.newaxis]
        sample_weights_not_OK_2 = sample_weights_OK[np.newaxis, :]

        ridge = Ridge(alpha=1)

        # make sure the "OK" sample weights actually work
        ridge.fit(X, y, sample_weights_OK)
        ridge.fit(X, y, sample_weights_OK_1)
        ridge.fit(X, y, sample_weights_OK_2)

        def fit_ridge_not_ok():
            ridge.fit(X, y, sample_weights_not_OK)

        def fit_ridge_not_ok_2():
            ridge.fit(X, y, sample_weights_not_OK_2)

        assert_raise_message(ValueError,
                             "Sample weights must be 1D array or scalar",
                             fit_ridge_not_ok)

        assert_raise_message(ValueError,
                             "Sample weights must be 1D array or scalar",
                             fit_ridge_not_ok_2)
Beispiel #2
0
def test_gaussian_mixture_predict_predict_proba():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        Y = rand_data.Y
        g = GaussianMixture(n_components=rand_data.n_components,
                            random_state=rng,
                            weights_init=rand_data.weights,
                            means_init=rand_data.means,
                            precisions_init=rand_data.precisions[covar_type],
                            covariance_type=covar_type)

        # Check a warning message arrive if we don't do fit
        assert_raise_message(
            NotFittedError, "This GaussianMixture instance is not fitted "
            "yet. Call 'fit' with appropriate arguments "
            "before using this method.", g.predict, X)

        g.fit(X)
        Y_pred = g.predict(X)
        Y_pred_proba = g.predict_proba(X).argmax(axis=1)
        assert_array_equal(Y_pred, Y_pred_proba)
        assert adjusted_rand_score(Y, Y_pred) > .95
Beispiel #3
0
def test_warm_start():
    X = X_iris
    y = y_iris

    y_2classes = np.array([0] * 75 + [1] * 75)
    y_3classes = np.array([0] * 40 + [1] * 40 + [2] * 70)
    y_3classes_alt = np.array([0] * 50 + [1] * 50 + [3] * 50)
    y_4classes = np.array([0] * 37 + [1] * 37 + [2] * 38 + [3] * 38)
    y_5classes = np.array([0] * 30 + [1] * 30 + [2] * 30 + [3] * 30 + [4] * 30)

    # No error raised
    clf = MLPClassifier(hidden_layer_sizes=2, solver='lbfgs',
                        warm_start=True).fit(X, y)
    clf.fit(X, y)
    clf.fit(X, y_3classes)

    for y_i in (y_2classes, y_3classes_alt, y_4classes, y_5classes):
        clf = MLPClassifier(hidden_layer_sizes=2,
                            solver='lbfgs',
                            warm_start=True).fit(X, y)
        message = ('warm_start can only be used where `y` has the same '
                   'classes as in the previous call to fit.'
                   ' Previously got [0 1 2], `y` has %s' % np.unique(y_i))
        assert_raise_message(ValueError, message, clf.fit, X, y_i)
Beispiel #4
0
def test_column_transformer_special_strings():

    # one 'drop' -> ignore
    X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T
    ct = ColumnTransformer([('trans1', Trans(), [0]), ('trans2', 'drop', [1])])
    exp = np.array([[0.], [1.], [2.]])
    assert_array_equal(ct.fit_transform(X_array), exp)
    assert_array_equal(ct.fit(X_array).transform(X_array), exp)
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] != 'remainder'

    # all 'drop' -> return shape 0 array
    ct = ColumnTransformer([('trans1', 'drop', [0]), ('trans2', 'drop', [1])])
    assert_array_equal(ct.fit(X_array).transform(X_array).shape, (3, 0))
    assert_array_equal(ct.fit_transform(X_array).shape, (3, 0))
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] != 'remainder'

    # 'passthrough'
    X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T
    ct = ColumnTransformer([('trans1', Trans(), [0]),
                            ('trans2', 'passthrough', [1])])
    exp = X_array
    assert_array_equal(ct.fit_transform(X_array), exp)
    assert_array_equal(ct.fit(X_array).transform(X_array), exp)
    assert len(ct.transformers_) == 2
    assert ct.transformers_[-1][0] != 'remainder'

    # None itself / other string is not valid
    for val in [None, 'other']:
        ct = ColumnTransformer([('trans1', Trans(), [0]),
                                ('trans2', None, [1])])
        assert_raise_message(TypeError, "All estimators should implement",
                             ct.fit_transform, X_array)
        assert_raise_message(TypeError, "All estimators should implement",
                             ct.fit, X_array)
Beispiel #5
0
def test_column_transformer_error_msg_1D():
    X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T

    col_trans = ColumnTransformer([('trans', StandardScaler(), 0)])
    assert_raise_message(ValueError, "1D data passed to a transformer",
                         col_trans.fit, X_array)
    assert_raise_message(ValueError, "1D data passed to a transformer",
                         col_trans.fit_transform, X_array)

    col_trans = ColumnTransformer([('trans', TransRaise(), 0)])
    for func in [col_trans.fit, col_trans.fit_transform]:
        assert_raise_message(ValueError, "specific message", func, X_array)
Beispiel #6
0
def test_error_messages_on_wrong_input():
    for score_func in score_funcs:
        expected = ('labels_true and labels_pred must have same size,'
                    ' got 2 and 3')
        assert_raise_message(ValueError, expected, score_func,
                             [0, 1], [1, 1, 1])

        expected = "labels_true must be 1D: shape is (2"
        assert_raise_message(ValueError, expected, score_func,
                             [[0, 1], [1, 0]], [1, 1, 1])

        expected = "labels_pred must be 1D: shape is (2"
        assert_raise_message(ValueError, expected, score_func,
                             [0, 1, 0], [[1, 1], [0, 0]])
Beispiel #7
0
def test_params_validation():
    # Test that invalid parameters raise value error
    X = np.arange(12).reshape(4, 3)
    y = [1, 1, 2, 2]
    NCA = NeighborhoodComponentsAnalysis
    rng = np.random.RandomState(42)

    # TypeError
    assert_raises(TypeError, NCA(max_iter='21').fit, X, y)
    assert_raises(TypeError, NCA(verbose='true').fit, X, y)
    assert_raises(TypeError, NCA(tol='1').fit, X, y)
    assert_raises(TypeError, NCA(n_components='invalid').fit, X, y)
    assert_raises(TypeError, NCA(warm_start=1).fit, X, y)

    # ValueError
    assert_raise_message(ValueError,
                         "`init` must be 'auto', 'pca', 'lda', 'identity', "
                         "'random' or a numpy array of shape "
                         "(n_components, n_features).",
                         NCA(init=1).fit, X, y)
    assert_raise_message(ValueError,
                         '`max_iter`= -1, must be >= 1.',
                         NCA(max_iter=-1).fit, X, y)

    init = rng.rand(5, 3)
    assert_raise_message(ValueError,
                         'The output dimensionality ({}) of the given linear '
                         'transformation `init` cannot be greater than its '
                         'input dimensionality ({}).'
                         .format(init.shape[0], init.shape[1]),
                         NCA(init=init).fit, X, y)

    n_components = 10
    assert_raise_message(ValueError,
                         'The preferred dimensionality of the '
                         'projected space `n_components` ({}) cannot '
                         'be greater than the given data '
                         'dimensionality ({})!'
                         .format(n_components, X.shape[1]),
                         NCA(n_components=n_components).fit, X, y)
Beispiel #8
0
def test_column_transformer_get_feature_names():
    X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T
    ct = ColumnTransformer([('trans', Trans(), [0, 1])])
    # raise correct error when not fitted
    with pytest.raises(NotFittedError):
        ct.get_feature_names()
    # raise correct error when no feature names are available
    ct.fit(X_array)
    assert_raise_message(
        AttributeError, "Transformer trans (type Trans) does not provide "
        "get_feature_names", ct.get_feature_names)

    # working example
    X = np.array([[{
        'a': 1,
        'b': 2
    }, {
        'a': 3,
        'b': 4
    }], [{
        'c': 5
    }, {
        'c': 6
    }]],
                 dtype=object).T
    ct = ColumnTransformer([('col' + str(i), DictVectorizer(), i)
                            for i in range(2)])
    ct.fit(X)
    assert ct.get_feature_names() == ['col0__a', 'col0__b', 'col1__c']

    # passthrough transformers not supported
    ct = ColumnTransformer([('trans', 'passthrough', [0, 1])])
    ct.fit(X)
    assert_raise_message(NotImplementedError,
                         'get_feature_names is not yet supported',
                         ct.get_feature_names)

    ct = ColumnTransformer([('trans', DictVectorizer(), 0)],
                           remainder='passthrough')
    ct.fit(X)
    assert_raise_message(NotImplementedError,
                         'get_feature_names is not yet supported',
                         ct.get_feature_names)

    # drop transformer
    ct = ColumnTransformer([('col0', DictVectorizer(), 0),
                            ('col1', 'drop', 1)])
    ct.fit(X)
    assert ct.get_feature_names() == ['col0__a', 'col0__b']
Beispiel #9
0
def test_fetch_openml_raises_illegal_argument():
    assert_raise_message(ValueError,
                         "Dataset data_id=",
                         fetch_openml,
                         data_id=-1,
                         name="name")

    assert_raise_message(ValueError,
                         "Dataset data_id=",
                         fetch_openml,
                         data_id=-1,
                         name=None,
                         version="version")

    assert_raise_message(ValueError,
                         "Dataset data_id=",
                         fetch_openml,
                         data_id=-1,
                         name="name",
                         version="version")

    assert_raise_message(
        ValueError, "Neither name nor data_id are provided. "
        "Please provide name or data_id.", fetch_openml)
Beispiel #10
0
def test_assert_allclose_dense_sparse():
    x = np.arange(9).reshape(3, 3)
    msg = "Not equal to tolerance "
    y = sparse.csc_matrix(x)
    for X in [x, y]:
        # basic compare
        assert_raise_message(AssertionError, msg, assert_allclose_dense_sparse,
                             X, X * 2)
        assert_allclose_dense_sparse(X, X)

    assert_raise_message(ValueError, "Can only compare two sparse",
                         assert_allclose_dense_sparse, x, y)

    A = sparse.diags(np.ones(5), offsets=0).tocsr()
    B = sparse.csr_matrix(np.ones((1, 5)))

    assert_raise_message(AssertionError, "Arrays are not equal",
                         assert_allclose_dense_sparse, B, A)
Beispiel #11
0
def test_check_weights():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)

    n_components = rand_data.n_components
    X = rand_data.X['full']

    g = GaussianMixture(n_components=n_components)

    # Check bad shape
    weights_bad_shape = rng.rand(n_components, 1)
    g.weights_init = weights_bad_shape
    assert_raise_message(
        ValueError, "The parameter 'weights' should have the shape of "
        "(%d,), but got %s" % (n_components, str(weights_bad_shape.shape)),
        g.fit, X)

    # Check bad range
    weights_bad_range = rng.rand(n_components) + 1
    g.weights_init = weights_bad_range
    assert_raise_message(
        ValueError, "The parameter 'weights' should be in the range "
        "[0, 1], but got max value %.5f, min value %.5f" %
        (np.min(weights_bad_range), np.max(weights_bad_range)), g.fit, X)

    # Check bad normalization
    weights_bad_norm = rng.rand(n_components)
    weights_bad_norm = weights_bad_norm / (weights_bad_norm.sum() + 1)
    g.weights_init = weights_bad_norm
    assert_raise_message(
        ValueError, "The parameter 'weights' should be normalized, "
        "but got sum(weights) = %.5f" % np.sum(weights_bad_norm), g.fit, X)

    # Check good weights matrix
    weights = rand_data.weights
    g = GaussianMixture(weights_init=weights, n_components=n_components)
    g.fit(X)
    assert_array_equal(weights, g.weights_init)
Beispiel #12
0
def test_l1_min_c_l2_loss():
    # loss='l2' should raise ValueError
    assert_raise_message(ValueError, "loss type not in",
                         l1_min_c, dense_X, Y1, "l2")
Beispiel #13
0
def test_bayesian_mixture_precisions_prior_initialisation():
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 2
    X = rng.rand(n_samples, n_features)

    # Check raise message for a bad value of degrees_of_freedom_prior
    bad_degrees_of_freedom_prior_ = n_features - 1.
    bgmm = BayesianGaussianMixture(
        degrees_of_freedom_prior=bad_degrees_of_freedom_prior_,
        random_state=rng)
    assert_raise_message(
        ValueError, "The parameter 'degrees_of_freedom_prior' should be "
        "greater than %d, but got %.3f." %
        (n_features - 1, bad_degrees_of_freedom_prior_), bgmm.fit, X)

    # Check correct init for a given value of degrees_of_freedom_prior
    degrees_of_freedom_prior = rng.rand() + n_features - 1.
    bgmm = BayesianGaussianMixture(
        degrees_of_freedom_prior=degrees_of_freedom_prior,
        random_state=rng).fit(X)
    assert_almost_equal(degrees_of_freedom_prior,
                        bgmm.degrees_of_freedom_prior_)

    # Check correct init for the default value of degrees_of_freedom_prior
    degrees_of_freedom_prior_default = n_features
    bgmm = BayesianGaussianMixture(
        degrees_of_freedom_prior=degrees_of_freedom_prior_default,
        random_state=rng).fit(X)
    assert_almost_equal(degrees_of_freedom_prior_default,
                        bgmm.degrees_of_freedom_prior_)

    # Check correct init for a given value of covariance_prior
    covariance_prior = {
        'full': np.cov(X.T, bias=1) + 10,
        'tied': np.cov(X.T, bias=1) + 5,
        'diag': np.diag(np.atleast_2d(np.cov(X.T, bias=1))) + 3,
        'spherical': rng.rand()
    }

    bgmm = BayesianGaussianMixture(random_state=rng)
    for cov_type in ['full', 'tied', 'diag', 'spherical']:
        bgmm.covariance_type = cov_type
        bgmm.covariance_prior = covariance_prior[cov_type]
        bgmm.fit(X)
        assert_almost_equal(covariance_prior[cov_type], bgmm.covariance_prior_)

    # Check raise message for a bad spherical value of covariance_prior
    bad_covariance_prior_ = -1.
    bgmm = BayesianGaussianMixture(covariance_type='spherical',
                                   covariance_prior=bad_covariance_prior_,
                                   random_state=rng)
    assert_raise_message(
        ValueError, "The parameter 'spherical covariance_prior' "
        "should be greater than 0., but got %.3f." % bad_covariance_prior_,
        bgmm.fit, X)

    # Check correct init for the default value of covariance_prior
    covariance_prior_default = {
        'full': np.atleast_2d(np.cov(X.T)),
        'tied': np.atleast_2d(np.cov(X.T)),
        'diag': np.var(X, axis=0, ddof=1),
        'spherical': np.var(X, axis=0, ddof=1).mean()
    }

    bgmm = BayesianGaussianMixture(random_state=0)
    for cov_type in ['full', 'tied', 'diag', 'spherical']:
        bgmm.covariance_type = cov_type
        bgmm.fit(X)
        assert_almost_equal(covariance_prior_default[cov_type],
                            bgmm.covariance_prior_)
Beispiel #14
0
def assert_raises_on_only_one_label(func):
    """Assert message when there is only one label"""
    rng = np.random.RandomState(seed=0)
    assert_raise_message(ValueError, "Number of labels is", func,
                         rng.rand(10, 2), np.zeros(10))
Beispiel #15
0
def assert_raises_on_all_points_same_cluster(func):
    """Assert message when all point are in different clusters"""
    rng = np.random.RandomState(seed=0)
    assert_raise_message(ValueError, "Number of labels is", func,
                         rng.rand(10, 2), np.arange(10))
Beispiel #16
0
def test_fast_mcd_on_invalid_input():
    X = np.arange(100)
    assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead',
                         fast_mcd, X)
Beispiel #17
0
def test_max_iter_error():
    km = KMeans(max_iter=-1)
    assert_raise_message(ValueError, 'Number of iterations should be', km.fit,
                         X)
Beispiel #18
0
def test_check_classification_targets():
    # Test that check_classification_target return correct type. #5782
    y = np.array([0.0, 1.1, 2.0, 3.0])
    msg = type_of_target(y)
    assert_raise_message(ValueError, msg, check_classification_targets, y)
Beispiel #19
0
def test_check_docstring_parameters():
    try:
        import numpydoc  # noqa
    except ImportError:
        raise SkipTest("numpydoc is required to test the docstrings")

    incorrect = check_docstring_parameters(f_ok)
    assert incorrect == []
    incorrect = check_docstring_parameters(f_ok, ignore=['b'])
    assert incorrect == []
    incorrect = check_docstring_parameters(f_missing, ignore=['b'])
    assert incorrect == []
    assert_raise_message(RuntimeError, 'Unknown section Results',
                         check_docstring_parameters, f_bad_sections)
    assert_raise_message(RuntimeError, 'Unknown section Parameter',
                         check_docstring_parameters, Klass.f_bad_sections)

    incorrect = check_docstring_parameters(f_check_param_definition)
    assert (incorrect == [
        "mrex.utils.tests.test_testing.f_check_param_definition There "
        "was no space between the param name and colon ('a: int')",
        "mrex.utils.tests.test_testing.f_check_param_definition There "
        "was no space between the param name and colon ('b:')",
        "mrex.utils.tests.test_testing.f_check_param_definition "
        "Parameter 'c :' has an empty type spec. Remove the colon",
        "mrex.utils.tests.test_testing.f_check_param_definition There "
        "was no space between the param name and colon ('d:int')",
    ])

    messages = [
        [
            "In function: mrex.utils.tests.test_testing.f_bad_order",
            "There's a parameter name mismatch in function docstring w.r.t."
            " function signature, at index 0 diff: 'b' != 'a'", "Full diff:",
            "- ['b', 'a']", "+ ['a', 'b']"
        ],
        [
            "In function: " +
            "mrex.utils.tests.test_testing.f_too_many_param_docstring",
            "Parameters in function docstring have more items w.r.t. function"
            " signature, first extra item: c", "Full diff:", "- ['a', 'b']",
            "+ ['a', 'b', 'c']", "?          +++++"
        ],
        [
            "In function: mrex.utils.tests.test_testing.f_missing",
            "Parameters in function docstring have less items w.r.t. function"
            " signature, first missing item: b", "Full diff:", "- ['a', 'b']",
            "+ ['a']"
        ],
        [
            "In function: mrex.utils.tests.test_testing.Klass.f_missing",
            "Parameters in function docstring have less items w.r.t. function"
            " signature, first missing item: X", "Full diff:", "- ['X', 'y']",
            "+ []"
        ],
        [
            "In function: " +
            "mrex.utils.tests.test_testing.MockMetaEstimator.predict",
            "There's a parameter name mismatch in function docstring w.r.t."
            " function signature, at index 0 diff: 'X' != 'y'", "Full diff:",
            "- ['X']", "?   ^", "+ ['y']", "?   ^"
        ],
        [
            "In function: " +
            "mrex.utils.tests.test_testing.MockMetaEstimator." +
            "predict_proba",
            "Parameters in function docstring have less items w.r.t. function"
            " signature, first missing item: X", "Full diff:", "- ['X']",
            "+ []"
        ],
        [
            "In function: " +
            "mrex.utils.tests.test_testing.MockMetaEstimator.score",
            "Parameters in function docstring have less items w.r.t. function"
            " signature, first missing item: X", "Full diff:", "- ['X']",
            "+ []"
        ],
        [
            "In function: " +
            "mrex.utils.tests.test_testing.MockMetaEstimator.fit",
            "Parameters in function docstring have less items w.r.t. function"
            " signature, first missing item: X", "Full diff:", "- ['X', 'y']",
            "+ []"
        ],
    ]

    mock_meta = MockMetaEstimator(delegate=MockEst())

    for msg, f in zip(messages, [
            f_bad_order, f_too_many_param_docstring, f_missing,
            Klass.f_missing, mock_meta.predict, mock_meta.predict_proba,
            mock_meta.score, mock_meta.fit
    ]):
        incorrect = check_docstring_parameters(f)
        assert msg == incorrect, ('\n"%s"\n not in \n"%s"' % (msg, incorrect))
Beispiel #20
0
def test_meanshift_all_orphans():
    # init away from the data, crash with a sensible warning
    ms = MeanShift(bandwidth=0.1, seeds=[[-9, -9], [-10, -10]])
    msg = "No point was within bandwidth=0.1"
    assert_raise_message(ValueError, msg, ms.fit, X,)
Beispiel #21
0
def test_mcd_class_on_invalid_input():
    X = np.arange(100)
    mcd = MinCovDet()
    assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead',
                         mcd.fit, X)
Beispiel #22
0
def test_gaussian_mixture_attributes():
    # test bad parameters
    rng = np.random.RandomState(0)
    X = rng.rand(10, 2)

    n_components_bad = 0
    gmm = GaussianMixture(n_components=n_components_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'n_components': %d "
        "Estimation requires at least one component" % n_components_bad,
        gmm.fit, X)

    # covariance_type should be in [spherical, diag, tied, full]
    covariance_type_bad = 'bad_covariance_type'
    gmm = GaussianMixture(covariance_type=covariance_type_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'covariance_type': %s "
        "'covariance_type' should be in "
        "['spherical', 'tied', 'diag', 'full']" % covariance_type_bad, gmm.fit,
        X)

    tol_bad = -1
    gmm = GaussianMixture(tol=tol_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'tol': %.5f "
        "Tolerance used by the EM must be non-negative" % tol_bad, gmm.fit, X)

    reg_covar_bad = -1
    gmm = GaussianMixture(reg_covar=reg_covar_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'reg_covar': %.5f "
        "regularization on covariance must be "
        "non-negative" % reg_covar_bad, gmm.fit, X)

    max_iter_bad = 0
    gmm = GaussianMixture(max_iter=max_iter_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'max_iter': %d "
        "Estimation requires at least one iteration" % max_iter_bad, gmm.fit,
        X)

    n_init_bad = 0
    gmm = GaussianMixture(n_init=n_init_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'n_init': %d "
        "Estimation requires at least one run" % n_init_bad, gmm.fit, X)

    init_params_bad = 'bad_method'
    gmm = GaussianMixture(init_params=init_params_bad)
    assert_raise_message(
        ValueError,
        "Unimplemented initialization method '%s'" % init_params_bad, gmm.fit,
        X)

    # test good parameters
    n_components, tol, n_init, max_iter, reg_covar = 2, 1e-4, 3, 30, 1e-1
    covariance_type, init_params = 'full', 'random'
    gmm = GaussianMixture(n_components=n_components,
                          tol=tol,
                          n_init=n_init,
                          max_iter=max_iter,
                          reg_covar=reg_covar,
                          covariance_type=covariance_type,
                          init_params=init_params).fit(X)

    assert gmm.n_components == n_components
    assert gmm.covariance_type == covariance_type
    assert gmm.tol == tol
    assert gmm.reg_covar == reg_covar
    assert gmm.max_iter == max_iter
    assert gmm.n_init == n_init
    assert gmm.init_params == init_params
Beispiel #23
0
def test_set_pipeline_step_passthrough(passthrough):
    X = np.array([[1]])
    y = np.array([1])
    mult2 = Mult(mult=2)
    mult3 = Mult(mult=3)
    mult5 = Mult(mult=5)

    def make():
        return Pipeline([('m2', mult2), ('m3', mult3), ('last', mult5)])

    pipeline = make()

    exp = 2 * 3 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline.set_params(m3=passthrough)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert (pipeline.get_params(deep=True) ==
                      {'steps': pipeline.steps,
                       'm2': mult2,
                       'm3': passthrough,
                       'last': mult5,
                       'memory': None,
                       'm2__mult': 2,
                       'last__mult': 5,
                       'verbose': False
                       })

    pipeline.set_params(m2=passthrough)
    exp = 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    # for other methods, ensure no AttributeErrors on None:
    other_methods = ['predict_proba', 'predict_log_proba',
                     'decision_function', 'transform', 'score']
    for method in other_methods:
        getattr(pipeline, method)(X)

    pipeline.set_params(m2=mult2)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline = make()
    pipeline.set_params(last=passthrough)
    # mult2 and mult3 are active
    exp = 6
    assert_array_equal([[exp]], pipeline.fit(X, y).transform(X))
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_raise_message(AttributeError,
                         "'str' object has no attribute 'predict'",
                         getattr, pipeline, 'predict')

    # Check 'passthrough' step at construction time
    exp = 2 * 5
    pipeline = Pipeline(
        [('m2', mult2), ('m3', passthrough), ('last', mult5)])
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
Beispiel #24
0
def test_check_array():
    # accept_sparse == False
    # raise error on sparse inputs
    X = [[1, 2], [3, 4]]
    X_csr = sp.csr_matrix(X)
    assert_raises(TypeError, check_array, X_csr)
    # ensure_2d=False
    X_array = check_array([0, 1, 2], ensure_2d=False)
    assert X_array.ndim == 1
    # ensure_2d=True with 1d array
    assert_raise_message(ValueError,
                         'Expected 2D array, got 1D array instead',
                         check_array, [0, 1, 2],
                         ensure_2d=True)
    # ensure_2d=True with scalar array
    assert_raise_message(ValueError,
                         'Expected 2D array, got scalar array instead',
                         check_array,
                         10,
                         ensure_2d=True)
    # don't allow ndim > 3
    X_ndim = np.arange(8).reshape(2, 2, 2)
    assert_raises(ValueError, check_array, X_ndim)
    check_array(X_ndim, allow_nd=True)  # doesn't raise

    # dtype and order enforcement.
    X_C = np.arange(4).reshape(2, 2).copy("C")
    X_F = X_C.copy("F")
    X_int = X_C.astype(np.int)
    X_float = X_C.astype(np.float)
    Xs = [X_C, X_F, X_int, X_float]
    dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object]
    orders = ['C', 'F', None]
    copys = [True, False]

    for X, dtype, order, copy in product(Xs, dtypes, orders, copys):
        X_checked = check_array(X, dtype=dtype, order=order, copy=copy)
        if dtype is not None:
            assert X_checked.dtype == dtype
        else:
            assert X_checked.dtype == X.dtype
        if order == 'C':
            assert X_checked.flags['C_CONTIGUOUS']
            assert not X_checked.flags['F_CONTIGUOUS']
        elif order == 'F':
            assert X_checked.flags['F_CONTIGUOUS']
            assert not X_checked.flags['C_CONTIGUOUS']
        if copy:
            assert X is not X_checked
        else:
            # doesn't copy if it was already good
            if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS']
                    == X.flags['C_CONTIGUOUS']
                    and X_checked.flags['F_CONTIGUOUS']
                    == X.flags['F_CONTIGUOUS']):
                assert X is X_checked

    # allowed sparse != None
    X_csc = sp.csc_matrix(X_C)
    X_coo = X_csc.tocoo()
    X_dok = X_csc.todok()
    X_int = X_csc.astype(np.int)
    X_float = X_csc.astype(np.float)

    Xs = [X_csc, X_coo, X_dok, X_int, X_float]
    accept_sparses = [['csr', 'coo'], ['coo', 'dok']]
    for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses,
                                                 copys):
        with warnings.catch_warnings(record=True) as w:
            X_checked = check_array(X,
                                    dtype=dtype,
                                    accept_sparse=accept_sparse,
                                    copy=copy)
        if (dtype is object or sp.isspmatrix_dok(X)) and len(w):
            message = str(w[0].message)
            messages = [
                "object dtype is not supported by sparse matrices",
                "Can't check dok sparse matrix for nan or inf."
            ]
            assert message in messages
        else:
            assert len(w) == 0
        if dtype is not None:
            assert X_checked.dtype == dtype
        else:
            assert X_checked.dtype == X.dtype
        if X.format in accept_sparse:
            # no change if allowed
            assert X.format == X_checked.format
        else:
            # got converted
            assert X_checked.format == accept_sparse[0]
        if copy:
            assert X is not X_checked
        else:
            # doesn't copy if it was already good
            if X.dtype == X_checked.dtype and X.format == X_checked.format:
                assert X is X_checked

    # other input formats
    # convert lists to arrays
    X_dense = check_array([[1, 2], [3, 4]])
    assert isinstance(X_dense, np.ndarray)
    # raise on too deep lists
    assert_raises(ValueError, check_array, X_ndim.tolist())
    check_array(X_ndim.tolist(), allow_nd=True)  # doesn't raise
    # convert weird stuff to arrays
    X_no_array = NotAnArray(X_dense)
    result = check_array(X_no_array)
    assert isinstance(result, np.ndarray)

    # deprecation warning if string-like array with dtype="numeric"
    expected_warn_regex = r"converted to decimal numbers if dtype='numeric'"
    X_str = [['11', '12'], ['13', 'xx']]
    for X in [X_str, np.array(X_str, dtype='U'), np.array(X_str, dtype='S')]:
        with pytest.warns(FutureWarning, match=expected_warn_regex):
            check_array(X, dtype="numeric")

    # deprecation warning if byte-like array with dtype="numeric"
    X_bytes = [[b'a', b'b'], [b'c', b'd']]
    for X in [X_bytes, np.array(X_bytes, dtype='V1')]:
        with pytest.warns(FutureWarning, match=expected_warn_regex):
            check_array(X, dtype="numeric")
Beispiel #25
0
def test_check_X_y_informative_error():
    X = np.ones((2, 2))
    y = None
    assert_raise_message(ValueError, "y cannot be None", check_X_y, X, y)
Beispiel #26
0
def test_check_array_min_samples_and_features_messages():
    # empty list is considered 2D by default:
    msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required."
    assert_raise_message(ValueError, msg, check_array, [[]])

    # If considered a 1D collection when ensure_2d=False, then the minimum
    # number of samples will break:
    msg = "0 sample(s) (shape=(0,)) while a minimum of 1 is required."
    assert_raise_message(ValueError, msg, check_array, [], ensure_2d=False)

    # Invalid edge case when checking the default minimum sample of a scalar
    msg = "Singleton array array(42) cannot be considered a valid collection."
    assert_raise_message(TypeError, msg, check_array, 42, ensure_2d=False)

    # Simulate a model that would need at least 2 samples to be well defined
    X = np.ones((1, 10))
    y = np.ones(1)
    msg = "1 sample(s) (shape=(1, 10)) while a minimum of 2 is required."
    assert_raise_message(ValueError,
                         msg,
                         check_X_y,
                         X,
                         y,
                         ensure_min_samples=2)

    # The same message is raised if the data has 2 dimensions even if this is
    # not mandatory
    assert_raise_message(ValueError,
                         msg,
                         check_X_y,
                         X,
                         y,
                         ensure_min_samples=2,
                         ensure_2d=False)

    # Simulate a model that would require at least 3 features (e.g. SelectKBest
    # with k=3)
    X = np.ones((10, 2))
    y = np.ones(2)
    msg = "2 feature(s) (shape=(10, 2)) while a minimum of 3 is required."
    assert_raise_message(ValueError,
                         msg,
                         check_X_y,
                         X,
                         y,
                         ensure_min_features=3)

    # Only the feature check is enabled whenever the number of dimensions is 2
    # even if allow_nd is enabled:
    assert_raise_message(ValueError,
                         msg,
                         check_X_y,
                         X,
                         y,
                         ensure_min_features=3,
                         allow_nd=True)

    # Simulate a case where a pipeline stage as trimmed all the features of a
    # 2D dataset.
    X = np.empty(0).reshape(10, 0)
    y = np.ones(10)
    msg = "0 feature(s) (shape=(10, 0)) while a minimum of 1 is required."
    assert_raise_message(ValueError, msg, check_X_y, X, y)

    # nd-data is not checked for any minimum number of features by default:
    X = np.ones((10, 0, 28, 28))
    y = np.ones(10)
    X_checked, y_checked = check_X_y(X, y, allow_nd=True)
    assert_array_equal(X, X_checked)
    assert_array_equal(y, y_checked)
Beispiel #27
0
def test_estimator_init():
    eclf = VotingClassifier(estimators=[])
    msg = ('Invalid `estimators` attribute, `estimators` should be'
           ' a list of (string, estimator) tuples')
    assert_raise_message(AttributeError, msg, eclf.fit, X, y)

    clf = LogisticRegression(random_state=1)

    eclf = VotingClassifier(estimators=[('lr', clf)], voting='error')
    msg = ('Voting must be \'soft\' or \'hard\'; got (voting=\'error\')')
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('lr', clf)], weights=[1, 2])
    msg = ('Number of `estimators` and weights must be equal'
           '; got 2 weights, 1 estimators')
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('lr', clf), ('lr', clf)],
                            weights=[1, 2])
    msg = "Names provided are not unique: ['lr', 'lr']"
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('lr__', clf)])
    msg = "Estimator names must not contain __: got ['lr__']"
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('estimators', clf)])
    msg = "Estimator names conflict with constructor arguments: ['estimators']"
    assert_raise_message(ValueError, msg, eclf.fit, X, y)
Beispiel #28
0
def test_estimate_bandwidth_with_sparse_matrix():
    # Test estimate_bandwidth with sparse matrix
    X = sparse.lil_matrix((1000, 1000))
    msg = "A sparse matrix was passed, but dense data is required."
    assert_raise_message(TypeError, msg, estimate_bandwidth, X, 200)
Beispiel #29
0
def test_zero_cosine_linkage_tree():
    # Check that zero vectors in X produce an error when
    # 'cosine' affinity is used
    X = np.array([[0, 1], [0, 0]])
    msg = 'Cosine affinity cannot be used when X contains zero vectors'
    assert_raise_message(ValueError, msg, linkage_tree, X, affinity='cosine')
Beispiel #30
0
def test_rational_quadratic_kernel():
    kernel = RationalQuadratic(length_scale=[1., 1.])
    assert_raise_message(
        AttributeError, "RationalQuadratic kernel only supports isotropic "
        "version, please use a single "
        "scalar for length_scale", kernel, X)