Exemple #1
0
def test_check_class_weight_balanced_linear_classifier():
    # check that ill-computed balanced weights raises an exception
    assert_raises_regex(
        AssertionError, "Classifier estimator_name is not computing"
        " class_weight=balanced properly.",
        check_class_weight_balanced_linear_classifier, 'estimator_name',
        BadBalancedWeightsClassifier)
Exemple #2
0
def test_ridgecv_store_cv_values():
    rng = np.random.RandomState(42)

    n_samples = 8
    n_features = 5
    x = rng.randn(n_samples, n_features)
    alphas = [1e-1, 1e0, 1e1]
    n_alphas = len(alphas)

    r = RidgeCV(alphas=alphas, cv=None, store_cv_values=True)

    # with len(y.shape) == 1
    y = rng.randn(n_samples)
    r.fit(x, y)
    assert r.cv_values_.shape == (n_samples, n_alphas)

    # with len(y.shape) == 2
    n_targets = 3
    y = rng.randn(n_samples, n_targets)
    r.fit(x, y)
    assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)

    r = RidgeCV(cv=3, store_cv_values=True)
    assert_raises_regex(ValueError, 'cv!=None and store_cv_values', r.fit, x,
                        y)
Exemple #3
0
def test_pca_validation(setup):
    for solver in solver_list:
        # Ensures that solver-specific extreme inputs for the n_components
        # parameter raise errors
        X = mt.array([[0, 1, 0], [1, 0, 0]])
        smallest_d = 2  # The smallest dimension
        lower_limit = {"randomized": 1, "full": 0, "auto": 0}

        # We conduct the same test on X.T so that it is invariant to axis.
        for data in [X, X.T]:
            for n_components in [-1, 3]:

                if solver == "auto":
                    solver_reported = "full"
                else:
                    solver_reported = solver

                assert_raises_regex(
                    ValueError,
                    f"n_components={n_components}L? must be between "
                    rf"{lower_limit[solver]}L? and min\(n_samples, n_features\)="
                    f"{smallest_d}L? with svd_solver='{solver_reported}'",
                    PCA(n_components, svd_solver=solver).fit,
                    data,
                )

        n_components = 1.0
        type_ncom = type(n_components)
        assert_raise_message(
            ValueError,
            f"n_components={n_components} must be of type int "
            f"when greater than or equal to 1, was of type={type_ncom}",
            PCA(n_components, svd_solver=solver).fit,
            data,
        )
Exemple #4
0
    def test_pca_validation(self):
        for solver in self.solver_list:
            # Ensures that solver-specific extreme inputs for the n_components
            # parameter raise errors
            X = mt.array([[0, 1, 0], [1, 0, 0]])
            smallest_d = 2  # The smallest dimension
            lower_limit = {'randomized': 1, 'full': 0, 'auto': 0}

            # We conduct the same test on X.T so that it is invariant to axis.
            for data in [X, X.T]:
                for n_components in [-1, 3]:

                    if solver == 'auto':
                        solver_reported = 'full'
                    else:
                        solver_reported = solver

                    assert_raises_regex(
                        ValueError, "n_components={}L? must be between "
                        r"{}L? and min\(n_samples, n_features\)="
                        "{}L? with svd_solver=\'{}\'".format(
                            n_components, lower_limit[solver], smallest_d,
                            solver_reported),
                        PCA(n_components, svd_solver=solver).fit, data)

            n_components = 1.0
            type_ncom = type(n_components)
            assert_raise_message(
                ValueError, "n_components={} must be of type int "
                "when greater than or equal to 1, was of type={}".format(
                    n_components, type_ncom),
                PCA(n_components, svd_solver=solver).fit, data)
def test_not_an_array_array_function():
    if np_version < parse_version('1.17'):
        raise SkipTest("array_function protocol not supported in numpy <1.17")
    not_array = _NotAnArray(np.ones(10))
    msg = "Don't want to call array_function sum!"
    assert_raises_regex(TypeError, msg, np.sum, not_array)
    # always returns True
    assert np.may_share_memory(not_array, None)
Exemple #6
0
def test_bad_pyfunc_metric():
    def wrong_distance(x, y):
        return "1"

    X = np.ones((5, 2))
    assert_raises_regex(TypeError,
                        "Custom distance function must accept two vectors",
                        BallTree, X, metric=wrong_distance)
Exemple #7
0
def test_fit_predict_on_pipeline_without_fit_predict():
    # tests that a pipeline does not have fit_predict method when final
    # step of pipeline does not have fit_predict defined
    scaler = StandardScaler()
    pca = PCA(svd_solver='full')
    pipe = Pipeline([('scaler', scaler), ('pca', pca)])
    assert_raises_regex(AttributeError,
                        "'PCA' object has no attribute 'fit_predict'", getattr,
                        pipe, 'fit_predict')
Exemple #8
0
def check_classifiers_cont_target(name, estimator_orig):
    # Check if classifier throws an exception when fed regression targets

    X, _ = _create_small_ts_dataset()
    y = np.random.random(len(X))
    e = clone(estimator_orig)
    msg = 'Unknown label type: '
    if not e._get_tags()["no_validation"]:
        assert_raises_regex(ValueError, msg, e.fit, X, y)
Exemple #9
0
def test_check_estimators_unfitted():
    # check that a ValueError/AttributeError is raised when calling predict
    # on an unfitted estimator
    msg = "NotFittedError not raised by predict"
    assert_raises_regex(AssertionError, msg, check_estimators_unfitted,
                        "estimator", NoSparseClassifier())

    # check that CorrectNotFittedError inherit from either ValueError
    # or AttributeError
    check_estimators_unfitted("estimator", CorrectNotFittedErrorClassifier())
def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [ElasticNetCV(precompute="invalid"),
                LassoCV(precompute="invalid")]:
        assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*"
                            "array-like.*Got 'invalid'", clf.fit, X, y)

    # Precompute = 'auto' is not supported for ElasticNet
    assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
                        "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
Exemple #11
0
def test_check_classification_targets():
    for y_type in EXAMPLES.keys():
        if y_type in ["unknown", "continuous", 'continuous-multioutput']:
            for example in EXAMPLES[y_type]:
                msg = 'Unknown label type: '
                assert_raises_regex(ValueError, msg,
                                    check_classification_targets, example)
        else:
            for example in EXAMPLES[y_type]:
                check_classification_targets(example)
def test_check_non_negative(retype):
    A = np.array([[1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
    X = retype(A)
    check_non_negative(X, "")
    X = retype([[0, 0], [0, 0]])
    check_non_negative(X, "")

    A[0, 0] = -1
    X = retype(A)
    assert_raises_regex(ValueError, "Negative ", check_non_negative, X, "")
Exemple #13
0
def test_gen_even_slices():
    # check that gen_even_slices contains all samples
    some_range = range(10)
    joined_range = list(
        chain(*[some_range[slice] for slice in gen_even_slices(10, 3)]))
    assert_array_equal(some_range, joined_range)

    # check that passing negative n_chunks raises an error
    slices = gen_even_slices(10, -1)
    assert_raises_regex(ValueError, "gen_even_slices got n_packs=-1, must be"
                        " >=1", next, slices)
def test_check_estimator_required_parameters_skip():
    class MyEstimator(BaseEstimator):
        _required_parameters = ["special_parameter"]

        def __init__(self, special_parameter):
            self.special_parameter = special_parameter

    assert_raises_regex(
        SkipTest, r"Can't instantiate estimator MyEstimator "
        r"which requires parameters "
        r"\['special_parameter'\]", check_estimator, MyEstimator)
def test_check_fit2d_1feature():
    class MyEst(SVC):
        # raises a bad error message when only 1 feature is passed
        def fit(self, X, y):
            if X.shape[1] == 1:
                raise ValueError("non informative error message")

    assert_raises_regex(
        AssertionError,
        "The error message should contain one of the following",
        check_fit2d_1feature, 'estimator_name', MyEst())
Exemple #16
0
def test_pipeline_with_cache_attribute():
    X = np.array([[1, 2]])
    pipe = Pipeline([('transf', Transf()), ('clf', Mult())],
                    memory=DummyMemory())
    pipe.fit(X, y=None)
    dummy = WrongDummyMemory()
    pipe = Pipeline([('transf', Transf()), ('clf', Mult())], memory=dummy)
    assert_raises_regex(
        ValueError, "'memory' should be None, a string or"
        " have the same interface as joblib.Memory."
        " Got memory='{}' instead.".format(dummy), pipe.fit, X)
Exemple #17
0
def test_ridgecv_negative_alphas():
    X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0,
                                                                     0.0]])
    y = [1, 1, 1, -1, -1]

    # Negative integers
    ridge = RidgeCV(alphas=(-1, -10, -100))
    assert_raises_regex(ValueError, "alphas must be positive", ridge.fit, X, y)

    # Negative floats
    ridge = RidgeCV(alphas=(-0.1, -1.0, -10.0))
    assert_raises_regex(ValueError, "alphas must be positive", ridge.fit, X, y)
def test_pipeline_wrong_memory():
    # Test that an error is raised when memory is not a string or a Memory
    # instance
    X = iris.data
    y = iris.target
    # Define memory as an integer
    memory = 1
    cached_pipe = Pipeline([('transf', DummyTransf()),
                            ('svc', SVC())], memory=memory)
    assert_raises_regex(ValueError, "'memory' should be None, a string or"
                        " have the same interface as joblib.Memory."
                        " Got memory='1' instead.", cached_pipe.fit, X, y)
Exemple #19
0
def test_multi_target_sample_weights_api():
    X = [[1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [2.718, 3.141]]
    w = [0.8, 0.6]

    rgr = MultiOutputRegressor(OrthogonalMatchingPursuit())
    assert_raises_regex(ValueError, "does not support sample weights", rgr.fit,
                        X, y, w)

    # no exception should be raised if the base estimator supports weights
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X, y, w)
def test_check_estimator_required_parameters_skip():
    # TODO: remove whole test in 0.24 since passes classes to check_estimator()
    # isn't supported anymore
    class MyEstimator(BaseEstimator):
        _required_parameters = ["special_parameter"]

        def __init__(self, special_parameter):
            self.special_parameter = special_parameter

    assert_raises_regex(
        SkipTest, r"Can't instantiate estimator MyEstimator "
        r"which requires parameters "
        r"\['special_parameter'\]", check_estimator, MyEstimator)
Exemple #21
0
def test_novelty_errors():
    X = iris.data

    # check errors for novelty=False
    clf = neighbors.LocalOutlierFactor()
    clf.fit(X)
    # predict, decision_function and score_samples raise ValueError
    for method in ['predict', 'decision_function', 'score_samples']:
        msg = ('{} is not available when novelty=False'.format(method))
        assert_raises_regex(AttributeError, msg, getattr, clf, method)

    # check errors for novelty=True
    clf = neighbors.LocalOutlierFactor(novelty=True)
    msg = 'fit_predict is not available when novelty=True'
    assert_raises_regex(AttributeError, msg, getattr, clf, 'fit_predict')
Exemple #22
0
def test_ridge_regression_check_arguments_validity(return_intercept,
                                                   sample_weight, arr_type,
                                                   solver):
    """check if all combinations of arguments give valid estimations"""

    # test excludes 'svd' solver because it raises exception for sparse inputs

    rng = check_random_state(42)
    X = rng.rand(1000, 3)
    true_coefs = [1, 2, 0.1]
    y = np.dot(X, true_coefs)
    true_intercept = 0.
    if return_intercept:
        true_intercept = 10000.
    y += true_intercept
    X_testing = arr_type(X)

    alpha, atol, tol = 1e-3, 1e-4, 1e-6

    if solver not in ['sag', 'auto'] and return_intercept:
        assert_raises_regex(ValueError,
                            "In Ridge, only 'sag' solver",
                            ridge_regression,
                            X_testing,
                            y,
                            alpha=alpha,
                            solver=solver,
                            sample_weight=sample_weight,
                            return_intercept=return_intercept,
                            tol=tol)
        return

    out = ridge_regression(
        X_testing,
        y,
        alpha=alpha,
        solver=solver,
        sample_weight=sample_weight,
        return_intercept=return_intercept,
        tol=tol,
    )

    if return_intercept:
        coef, intercept = out
        assert_allclose(coef, true_coefs, rtol=0, atol=atol)
        assert_allclose(intercept, true_intercept, rtol=0, atol=atol)
    else:
        assert_allclose(out, true_coefs, rtol=0, atol=atol)
Exemple #23
0
def test_check_estimator_get_tags_default_keys():
    estimator = EstimatorMissingDefaultTags()
    err_msg = (r"EstimatorMissingDefaultTags._get_tags\(\) is missing entries"
               r" for the following default tags: {'allow_nan'}")
    assert_raises_regex(
        AssertionError,
        err_msg,
        check_estimator_get_tags_default_keys,
        estimator.__class__.__name__,
        estimator,
    )

    # noop check when _get_tags is not available
    estimator = MinimalTransformer()
    check_estimator_get_tags_default_keys(estimator.__class__.__name__,
                                          estimator)
Exemple #24
0
def test_type_of_target():
    for group, group_examples in EXAMPLES.items():
        for example in group_examples:
            assert type_of_target(example) == group, (
                'type_of_target(%r) should be %r, got %r'
                % (example, group, type_of_target(example)))

    for example in NON_ARRAY_LIKE_EXAMPLES:
        msg_regex = r'Expected array-like \(array or non-string sequence\).*'
        assert_raises_regex(ValueError, msg_regex, type_of_target, example)

    for example in MULTILABEL_SEQUENCES:
        msg = ('You appear to be using a legacy multi-label data '
               'representation. Sequence of sequences are no longer supported;'
               ' use a binary array or sparse matrix instead.')
        assert_raises_regex(ValueError, msg, type_of_target, example)
def test_feature_union():
    # basic sanity check for feature union
    X = iris.data
    X -= X.mean(axis=0)
    y = iris.target
    svd = TruncatedSVD(n_components=2, random_state=0)
    select = SelectKBest(k=1)
    fs = FeatureUnion([("svd", svd), ("select", select)])
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    assert X_transformed.shape == (X.shape[0], 3)

    # check if it does the expected thing
    assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())

    # test if it also works for sparse input
    # We use a different svd object to control the random_state stream
    fs = FeatureUnion([("svd", svd), ("select", select)])
    X_sp = sparse.csr_matrix(X)
    X_sp_transformed = fs.fit_transform(X_sp, y)
    assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())

    # Test clone
    fs2 = assert_no_warnings(clone, fs)
    assert fs.transformer_list[0][1] is not fs2.transformer_list[0][1]

    # test setting parameters
    fs.set_params(select__k=2)
    assert fs.fit_transform(X, y).shape == (X.shape[0], 4)

    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", Transf()), ("svd", svd), ("select", select)])
    X_transformed = fs.fit_transform(X, y)
    assert X_transformed.shape == (X.shape[0], 8)

    # test error if some elements do not support transform
    assert_raises_regex(TypeError,
                        'All estimators should implement fit and '
                        'transform.*\\bNoTrans\\b',
                        FeatureUnion,
                        [("transform", Transf()), ("no_transform", NoTrans())])

    # test that init accepts tuples
    fs = FeatureUnion((("svd", svd), ("select", select)))
    fs.fit(X, y)
def test_check_memory():
    memory = check_memory("cache_directory")
    assert memory.cachedir == os.path.join('cache_directory', 'joblib')
    memory = check_memory(None)
    assert memory.cachedir is None
    dummy = DummyMemory()
    memory = check_memory(dummy)
    assert memory is dummy
    assert_raises_regex(
        ValueError, "'memory' should be None, a string or"
        " have the same interface as joblib.Memory."
        " Got memory='1' instead.", check_memory, 1)
    dummy = WrongDummyMemory()
    assert_raises_regex(
        ValueError, "'memory' should be None, a string or"
        " have the same interface as joblib.Memory."
        " Got memory='{}' instead.".format(dummy), check_memory, dummy)
def test_check_no_attributes_set_in_init():
    class NonConformantEstimatorPrivateSet(BaseEstimator):
        def __init__(self):
            self.you_should_not_set_this_ = None

    class NonConformantEstimatorNoParamSet(BaseEstimator):
        def __init__(self, you_should_set_this_=None):
            pass

    assert_raises_regex(
        AssertionError, "Estimator estimator_name should not set any"
        " attribute apart from parameters during init."
        r" Found attributes \['you_should_not_set_this_'\].",
        check_no_attributes_set_in_init, 'estimator_name',
        NonConformantEstimatorPrivateSet())
    assert_raises_regex(
        AttributeError, "Estimator estimator_name should store all "
        "parameters as an attribute during init.",
        check_no_attributes_set_in_init, 'estimator_name',
        NonConformantEstimatorNoParamSet())
def test_check_consistent_length():
    check_consistent_length([1], [2], [3], [4], [5])
    check_consistent_length([[1, 2], [[1, 2]]], [1, 2], ['a', 'b'])
    check_consistent_length([1], (2, ), np.array([3]), sp.csr_matrix((1, 2)))
    assert_raises_regex(ValueError, 'inconsistent numbers of samples',
                        check_consistent_length, [1, 2], [1])
    assert_raises_regex(TypeError, r"got <\w+ 'int'>", check_consistent_length,
                        [1, 2], 1)
    assert_raises_regex(TypeError, r"got <\w+ 'object'>",
                        check_consistent_length, [1, 2], object())

    assert_raises(TypeError, check_consistent_length, [1, 2], np.array(1))
    # Despite ensembles having __len__ they must raise TypeError
    assert_raises_regex(TypeError, 'Expected sequence or array-like',
                        check_consistent_length, [1, 2],
                        RandomForestRegressor())
Exemple #29
0
def check_target_type(name, estimator):
    # should raise warning if the target is continuous (we cannot raise error)
    X = np.random.random((20, 2))
    y = np.linspace(0, 1, 20)
    msg = "Unknown label type: 'continuous'"
    assert_raises_regex(
        ValueError,
        msg,
        estimator.fit_resample,
        X,
        y,
    )
    # if the target is multilabel then we should raise an error
    rng = np.random.RandomState(42)
    y = rng.randint(2, size=(20, 3))
    msg = "Multilabel and multioutput targets are not supported."
    assert_raises_regex(
        ValueError,
        msg,
        estimator.fit_resample,
        X,
        y,
    )
def test_check_array_complex_data_error():
    X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # list of lists
    X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # tuple of tuples
    X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j))
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # list of np arrays
    X = [
        np.array([1 + 2j, 3 + 4j, 5 + 7j]),
        np.array([2 + 3j, 4 + 5j, 6 + 7j])
    ]
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # tuple of np arrays
    X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j,
                                                       6 + 7j]))
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # dataframe
    X = MockDataFrame(
        np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]))
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # sparse matrix
    X = sp.coo_matrix([[0, 1 + 2j], [0, 0]])
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)