def test_graphviz_errors():
    # Check for errors of export_graphviz
    clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2)

    # Check not-fitted decision tree error
    out = StringIO()
    assert_raises(NotFittedError, export_graphviz, clf, out)

    clf.fit(X, y)

    # Check if it errors when length of feature_names
    # mismatches with number of features
    message = ("Length of feature_names, "
               "1 does not match number of features, 2")
    assert_raise_message(ValueError, message, export_graphviz, clf, None,
                         feature_names=["a"])

    message = ("Length of feature_names, "
               "3 does not match number of features, 2")
    assert_raise_message(ValueError, message, export_graphviz, clf, None,
                         feature_names=["a", "b", "c"])

    # Check class_names error
    out = StringIO()
    assert_raises(IndexError, export_graphviz, clf, out, class_names=[])

    # Check precision error
    out = StringIO()
    assert_raises_regex(ValueError, "should be greater or equal",
                        export_graphviz, clf, out, precision=-1)
    assert_raises_regex(ValueError, "should be an integer",
                        export_graphviz, clf, out, precision="1")
def test_enet_l1_ratio():
    # Test that an error message is raised if an estimator that
    # uses _alpha_grid is called with l1_ratio=0
    msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. "
           "Please supply a grid by providing your estimator with the "
           "appropriate `alphas=` argument.")
    X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T
    y = np.array([12, 10, 11, 21, 5])

    assert_raise_message(ValueError, msg, ElasticNetCV(
        l1_ratio=0, random_state=42).fit, X, y)
    assert_raise_message(ValueError, msg, MultiTaskElasticNetCV(
        l1_ratio=0, random_state=42).fit, X, y[:, None])

    # Test that l1_ratio=0 is allowed if we supply a grid manually
    alphas = [0.1, 10]
    estkwds = {'alphas': alphas, 'random_state': 42}
    est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = ElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est_desired.fit(X, y)
        est.fit(X, y)
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)

    est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est.fit(X, y[:, None])
        est_desired.fit(X, y[:, None])
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)
Esempio n. 3
0
def test_check_solver_option(LR):
    X, y = iris.data, iris.target

    msg = ('Logistic Regression supports only liblinear, newton-cg, '
           'lbfgs, sag and saga solvers, got wrong_name')
    lr = LR(solver="wrong_name")
    assert_raise_message(ValueError, msg, lr.fit, X, y)

    msg = "multi_class should be either multinomial or ovr, got wrong_name"
    lr = LR(solver='newton-cg', multi_class="wrong_name")
    assert_raise_message(ValueError, msg, lr.fit, X, y)

    # only 'liblinear' solver
    msg = "Solver liblinear does not support a multinomial backend."
    lr = LR(solver='liblinear', multi_class='multinomial')
    assert_raise_message(ValueError, msg, lr.fit, X, y)

    # all solvers except 'liblinear'
    for solver in ['newton-cg', 'lbfgs', 'sag']:
        msg = ("Solver %s supports only l2 penalties, got l1 penalty." %
               solver)
        lr = LR(solver=solver, penalty='l1')
        assert_raise_message(ValueError, msg, lr.fit, X, y)
    for solver in ['newton-cg', 'lbfgs', 'sag', 'saga']:
        msg = ("Solver %s supports only dual=False, got dual=True" %
               solver)
        lr = LR(solver=solver, dual=True)
        assert_raise_message(ValueError, msg, lr.fit, X, y)
Esempio n. 4
0
def test_pls_errors():
    d = load_linnerud()
    X = d.data
    Y = d.target
    for clf in [pls_.PLSCanonical(), pls_.PLSRegression(), pls_.PLSSVD()]:
        clf.n_components = 4
        assert_raise_message(ValueError, "Invalid number of components", clf.fit, X, Y)
Esempio n. 5
0
def test_assert_raise_message():
    def _raise_ValueError(message):
        raise ValueError(message)

    def _no_raise():
        pass

    assert_raise_message(ValueError, "test",
                         _raise_ValueError, "test")

    assert_raises(AssertionError,
                  assert_raise_message, ValueError, "something else",
                  _raise_ValueError, "test")

    assert_raises(ValueError,
                  assert_raise_message, TypeError, "something else",
                  _raise_ValueError, "test")

    assert_raises(AssertionError,
                  assert_raise_message, ValueError, "test",
                  _no_raise)

    # multiple exceptions in a tuple
    assert_raises(AssertionError,
                  assert_raise_message, (ValueError, AttributeError),
                  "test", _no_raise)
Esempio n. 6
0
def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test):
    dense_svm.fit(X_train.toarray(), y_train)
    if sparse.isspmatrix(X_test):
        X_test_dense = X_test.toarray()
    else:
        X_test_dense = X_test
    sparse_svm.fit(X_train, y_train)
    assert sparse.issparse(sparse_svm.support_vectors_)
    assert sparse.issparse(sparse_svm.dual_coef_)
    assert_array_almost_equal(dense_svm.support_vectors_,
                              sparse_svm.support_vectors_.toarray())
    assert_array_almost_equal(dense_svm.dual_coef_,
                              sparse_svm.dual_coef_.toarray())
    if dense_svm.kernel == "linear":
        assert sparse.issparse(sparse_svm.coef_)
        assert_array_almost_equal(dense_svm.coef_, sparse_svm.coef_.toarray())
    assert_array_almost_equal(dense_svm.support_, sparse_svm.support_)
    assert_array_almost_equal(dense_svm.predict(X_test_dense),
                              sparse_svm.predict(X_test))
    assert_array_almost_equal(dense_svm.decision_function(X_test_dense),
                              sparse_svm.decision_function(X_test))
    assert_array_almost_equal(dense_svm.decision_function(X_test_dense),
                              sparse_svm.decision_function(X_test_dense))
    if isinstance(dense_svm, svm.OneClassSVM):
        msg = "cannot use sparse input in 'OneClassSVM' trained on dense data"
    else:
        assert_array_almost_equal(dense_svm.predict_proba(X_test_dense),
                                  sparse_svm.predict_proba(X_test), 4)
        msg = "cannot use sparse input in 'SVC' trained on dense data"
    if sparse.isspmatrix(X_test):
        assert_raise_message(ValueError, msg, dense_svm.predict, X_test)
def test_multilabel_binarizer_given_classes():
    inp = [(2, 3), (1,), (1, 2)]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 0, 1]])
    # fit_transform()
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
    assert_array_equal(mlb.classes_, [1, 3, 2])

    # fit().transform()
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
    assert_array_equal(mlb.classes_, [1, 3, 2])

    # ensure works with extra class
    mlb = MultiLabelBinarizer(classes=[4, 1, 3, 2])
    assert_array_equal(mlb.fit_transform(inp),
                       np.hstack(([[0], [0], [0]], indicator_mat)))
    assert_array_equal(mlb.classes_, [4, 1, 3, 2])

    # ensure fit is no-op as iterable is not consumed
    inp = iter(inp)
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)

    # ensure a ValueError is thrown if given duplicate classes
    err_msg = "The classes argument contains duplicate classes. Remove " \
              "these duplicates before passing them to MultiLabelBinarizer."
    mlb = MultiLabelBinarizer(classes=[1, 3, 2, 3])
    assert_raise_message(ValueError, err_msg, mlb.fit, inp)
Esempio n. 8
0
def test_k_means_function():
    # test calling the k_means function directly
    # catch output
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
                                                   sample_weight=None,
                                                   verbose=True)
    finally:
        sys.stdout = old_stdout
    centers = cluster_centers
    assert_equal(centers.shape, (n_clusters, n_features))

    labels = labels
    assert_equal(np.unique(labels).shape[0], n_clusters)

    # check that the labels assignment are perfect (up to a permutation)
    assert_equal(v_measure_score(true_labels, labels), 1.0)
    assert_greater(inertia, 0.0)

    # check warning when centers are passed
    assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
                 sample_weight=None, init=centers)

    # to many clusters desired
    assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1,
                  sample_weight=None)

    # kmeans for algorithm='elkan' raises TypeError on sparse matrix
    assert_raise_message(TypeError, "algorithm='elkan' not supported for "
                         "sparse input X", k_means, X=X_csr, n_clusters=2,
                         sample_weight=None, algorithm="elkan")
Esempio n. 9
0
def check_dtype_object(name, Estimator):
    # check that estimators treat dtype object as numeric if possible
    rng = np.random.RandomState(0)
    X = rng.rand(40, 10).astype(object)
    y = (X[:, 0] * 4).astype(np.int)
    y = multioutput_estimator_convert_y_2d(name, y)
    with warnings.catch_warnings():
        estimator = Estimator()
    set_fast_parameters(estimator)

    estimator.fit(X, y)
    if hasattr(estimator, "predict"):
        estimator.predict(X)

    if hasattr(estimator, "transform"):
        estimator.transform(X)

    try:
        estimator.fit(X, y.astype(object))
    except Exception as e:
        if "Unknown label type" not in str(e):
            raise

    X[0, 0] = {'foo': 'bar'}
    assert_raise_message(TypeError, "string or a number", estimator.fit, X, y)
Esempio n. 10
0
def test_nmf_negative_beta_loss():
    # Test that an error is raised if beta_loss < 0 and X contains zeros.
    # Test that the output has not NaN values when the input contains zeros.
    n_samples = 6
    n_features = 5
    n_components = 3

    rng = np.random.mtrand.RandomState(42)
    X = rng.randn(n_samples, n_features)
    np.clip(X, 0, None, out=X)
    X_csr = sp.csr_matrix(X)

    def _assert_nmf_no_nan(X, beta_loss):
        W, H, _ = non_negative_factorization(
            X, init='random', n_components=n_components, solver='mu',
            beta_loss=beta_loss, random_state=0, max_iter=1000)
        assert not np.any(np.isnan(W))
        assert not np.any(np.isnan(H))

    msg = "When beta_loss <= 0 and X contains zeros, the solver may diverge."
    for beta_loss in (-0.6, 0.):
        assert_raise_message(ValueError, msg, _assert_nmf_no_nan, X, beta_loss)
        _assert_nmf_no_nan(X + 1e-9, beta_loss)

    for beta_loss in (0.2, 1., 1.2, 2., 2.5):
        _assert_nmf_no_nan(X, beta_loss)
        _assert_nmf_no_nan(X_csr, beta_loss)
Esempio n. 11
0
def test_step_name_validation():
    bad_steps1 = [('a__q', Mult(2)), ('b', Mult(3))]
    bad_steps2 = [('a', Mult(2)), ('a', Mult(3))]
    for cls, param in [(Pipeline, 'steps'), (FeatureUnion,
                                             'transformer_list')]:
        # we validate in construction (despite scikit-learn convention)
        bad_steps3 = [('a', Mult(2)), (param, Mult(3))]
        for bad_steps, message in [
            (bad_steps1, "Step names must not contain __: got ['a__q']"),
            (bad_steps2, "Names provided are not unique: ['a', 'a']"),
            (bad_steps3, "Step names conflict with constructor "
             "arguments: ['%s']" % param),
        ]:
            # three ways to make invalid:
            # - construction
            assert_raise_message(ValueError, message, cls,
                                 **{param: bad_steps})

            # - setattr
            est = cls(**{param: [('a', Mult(1))]})
            setattr(est, param, bad_steps)
            assert_raise_message(ValueError, message, est.fit, [[1]], [1])
            assert_raise_message(ValueError, message, est.fit_transform, [[1]],
                                 [1])

            # - set_params
            est = cls(**{param: [('a', Mult(1))]})
            est.set_params(**{param: bad_steps})
            assert_raise_message(ValueError, message, est.fit, [[1]], [1])
            assert_raise_message(ValueError, message, est.fit_transform, [[1]],
                                 [1])
Esempio n. 12
0
def test_raises_value_error_if_sample_weights_greater_than_1d():
    # Sample weights must be either scalar or 1D

    n_sampless = [2, 3]
    n_featuress = [3, 2]

    rng = np.random.RandomState(42)

    for n_samples, n_features in zip(n_sampless, n_featuress):
        X = rng.randn(n_samples, n_features)
        y = rng.randn(n_samples)
        sample_weights_OK = rng.randn(n_samples) ** 2 + 1
        sample_weights_OK_1 = 1.0
        sample_weights_OK_2 = 2.0
        sample_weights_not_OK = sample_weights_OK[:, np.newaxis]
        sample_weights_not_OK_2 = sample_weights_OK[np.newaxis, :]

        ridge = Ridge(alpha=1)

        # make sure the "OK" sample weights actually work
        ridge.fit(X, y, sample_weights_OK)
        ridge.fit(X, y, sample_weights_OK_1)
        ridge.fit(X, y, sample_weights_OK_2)

        def fit_ridge_not_ok():
            ridge.fit(X, y, sample_weights_not_OK)

        def fit_ridge_not_ok_2():
            ridge.fit(X, y, sample_weights_not_OK_2)

        assert_raise_message(ValueError, "Sample weights must be 1D array or scalar", fit_ridge_not_ok)

        assert_raise_message(ValueError, "Sample weights must be 1D array or scalar", fit_ridge_not_ok_2)
Esempio n. 13
0
def test_set_pipeline_step_none():
    # Test setting Pipeline steps to None
    X = np.array([[1]])
    y = np.array([1])
    mult2 = Mult(mult=2)
    mult3 = Mult(mult=3)
    mult5 = Mult(mult=5)

    def make():
        return Pipeline([("m2", mult2), ("m3", mult3), ("last", mult5)])

    pipeline = make()

    exp = 2 * 3 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline.set_params(m3=None)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_dict_equal(
        pipeline.get_params(deep=True),
        {"steps": pipeline.steps, "m2": mult2, "m3": None, "last": mult5, "m2__mult": 2, "last__mult": 5},
    )

    pipeline.set_params(m2=None)
    exp = 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    # for other methods, ensure no AttributeErrors on None:
    other_methods = ["predict_proba", "predict_log_proba", "decision_function", "transform", "score"]
    for method in other_methods:
        getattr(pipeline, method)(X)

    pipeline.set_params(m2=mult2)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline = make()
    pipeline.set_params(last=None)
    # mult2 and mult3 are active
    exp = 6
    assert_array_equal([[exp]], pipeline.fit(X, y).transform(X))
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_raise_message(AttributeError, "'NoneType' object has no attribute 'predict'", getattr, pipeline, "predict")

    # Check None step at construction time
    exp = 2 * 5
    pipeline = Pipeline([("m2", mult2), ("m3", None), ("last", mult5)])
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
Esempio n. 14
0
def test_binary_clf_curve():
    rng = check_random_state(404)
    y_true = rng.randint(0, 3, size=10)
    y_pred = rng.rand(10)
    msg = "multiclass format is not supported"
    assert_raise_message(ValueError, msg, precision_recall_curve,
                         y_true, y_pred)
Esempio n. 15
0
def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    msg = ("In LogisticRegressionCV the liblinear solver cannot handle "
           "multiclass with class_weight of type dict. Use the lbfgs, "
           "newton-cg or sag solvers or set class_weight='balanced'")
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raise_message(ValueError, msg, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=balanced
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='balanced')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='balanced')
    clf_lib.fit(X, y)
    clf_sag = LogisticRegressionCV(solver='sag', fit_intercept=False,
                                   class_weight='balanced', max_iter=2000)
    clf_sag.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_sag.coef_, clf_lbf.coef_, decimal=4)
    assert_array_almost_equal(clf_lib.coef_, clf_sag.coef_, decimal=4)
Esempio n. 16
0
def test_ovo_one_class():
    # Test error for OvO with one class
    X = np.eye(4)
    y = np.array(['a'] * 4)

    ovo = OneVsOneClassifier(LinearSVC())
    assert_raise_message(ValueError, "when only one class", ovo.fit, X, y)
Esempio n. 17
0
def test_ovo_float_y():
    # Test that the OvO errors on float targets
    X = iris.data
    y = iris.data[:, 0]

    ovo = OneVsOneClassifier(LinearSVC())
    assert_raise_message(ValueError, "Unknown label type", ovo.fit, X, y)
Esempio n. 18
0
def test_grid_search_groups():
    # Check if ValueError (when groups is None) propagates to GridSearchCV
    # And also check if groups is correctly passed to the cv object
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 3, 15)

    clf = LinearSVC(random_state=0)
    grid = {'C': [1]}

    group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
                 GroupShuffleSplit()]
    for cv in group_cvs:
        gs = GridSearchCV(clf, grid, cv=cv)
        assert_raise_message(ValueError,
                             "The groups parameter should not be None",
                             gs.fit, X, y)
        gs.fit(X, y, groups=groups)

    non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
    for cv in non_group_cvs:
        gs = GridSearchCV(clf, grid, cv=cv)
        # Should not raise an error
        gs.fit(X, y)
Esempio n. 19
0
def test_n_iter():
    """Check value of n_iter."""
    X = np.array([[1], [2], [6], [8], [10]])
    y = np.array([1, 2, 6, 8, 10])
    clf = BayesianRidge(n_iter=0)
    msg = "n_iter should be greater than or equal to 1."
    assert_raise_message(ValueError, msg, clf.fit, X, y)
def test_sample_weight():
    """Tests sample_weight parameter of VotingClassifier"""
    clf1 = LogisticRegression(random_state=123)
    clf2 = RandomForestClassifier(random_state=123)
    clf3 = SVC(probability=True, random_state=123)
    eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('svc', clf3)],
        voting='soft').fit(X, y, sample_weight=np.ones((len(y),)))
    eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('svc', clf3)],
        voting='soft').fit(X, y)
    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
    assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))

    sample_weight = np.random.RandomState(123).uniform(size=(len(y),))
    eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft')
    eclf3.fit(X, y, sample_weight)
    clf1.fit(X, y, sample_weight)
    assert_array_equal(eclf3.predict(X), clf1.predict(X))
    assert_array_equal(eclf3.predict_proba(X), clf1.predict_proba(X))

    clf4 = KNeighborsClassifier()
    eclf3 = VotingClassifier(estimators=[
        ('lr', clf1), ('svc', clf3), ('knn', clf4)],
        voting='soft')
    msg = ('Underlying estimator \'knn\' does not support sample weights.')
    assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight)
def test_notfitted():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='soft')
    msg = ("This VotingClassifier instance is not fitted yet. Call \'fit\'"
           " with appropriate arguments before using this method.")
    assert_raise_message(NotFittedError, msg, eclf.predict_proba, X)
def test_calinski_harabaz_score():
    rng = np.random.RandomState(seed=0)

    # Assert message when there is only one label
    assert_raise_message(ValueError, "Number of labels is",
                         calinski_harabaz_score,
                         rng.rand(10, 2), np.zeros(10))

    # Assert message when all point are in different clusters
    assert_raise_message(ValueError, "Number of labels is",
                         calinski_harabaz_score,
                         rng.rand(10, 2), np.arange(10))

    # Assert the value is 1. when all samples are equals
    assert_equal(1., calinski_harabaz_score(np.ones((10, 2)),
                                            [0] * 5 + [1] * 5))

    # Assert the value is 0. when all the mean cluster are equal
    assert_equal(0., calinski_harabaz_score([[-1, -1], [1, 1]] * 10,
                                            [0] * 10 + [1] * 10))

    # General case (with non numpy arrays)
    X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
         [[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
    labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
    assert_almost_equal(calinski_harabaz_score(X, labels),
                        45 * (40 - 4) / (5 * (4 - 1)))
Esempio n. 23
0
def test_string_attribute(monkeypatch, gzip_response):
    data_id = 40945
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    # single column test
    assert_raise_message(ValueError,
                         'STRING attributes are not yet supported',
                         fetch_openml, data_id=data_id, cache=False)
Esempio n. 24
0
def test_error_on_y_transform():
    data = patsy.demo_data("x1", "x2", "x3", "y")
    est = PatsyTransformer("y ~ x1 + x2")
    msg = ("encountered outcome variables for a model"
           " that does not expect them")
    assert_raise_message(patsy.PatsyError, msg, est.fit, data)
    assert_raise_message(patsy.PatsyError, msg, est.fit_transform, data)
Esempio n. 25
0
def test_fetch_nonexiting(monkeypatch, gzip_response):
    # there is no active version of glass2
    data_id = 40675
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    # Note that we only want to search by name (not data id)
    assert_raise_message(ValueError, "No active dataset glass2 found",
                         fetch_openml, name='glass2', cache=False)
Esempio n. 26
0
def test_classification_inf_nan_input(metric):
    # Classification metrics all raise a mixed input exception
    for y_true, y_score in invalids:
        assert_raise_message(ValueError,
                             "Classification metrics can't handle a mix "
                             "of binary and continuous targets",
                             metric, y_true, y_score)
Esempio n. 27
0
def test_classification_inf_nan_input(metric):
    # Classification metrics all raise a mixed input exception
    for y_true, y_score in invalids:
        assert_raise_message(ValueError,
                             "Input contains NaN, infinity or a "
                             "value too large",
                             metric, y_true, y_score)
def test_score():
    covar_type = 'full'
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components = rand_data.n_components
    X = rand_data.X[covar_type]

    # Check the error message if we don't call fit
    gmm1 = GaussianMixture(n_components=n_components, n_init=1,
                           max_iter=1, reg_covar=0, random_state=rng,
                           covariance_type=covar_type)
    assert_raise_message(NotFittedError,
                         "This GaussianMixture instance is not fitted "
                         "yet. Call 'fit' with appropriate arguments "
                         "before using this method.", gmm1.score, X)

    # Check score value
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", ConvergenceWarning)
        gmm1.fit(X)
    gmm_score = gmm1.score(X)
    gmm_score_proba = gmm1.score_samples(X).mean()
    assert_almost_equal(gmm_score, gmm_score_proba)

    # Check if the score increase
    gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                           random_state=rng,
                           covariance_type=covar_type).fit(X)
    assert_greater(gmm2.score(X), gmm1.score(X))
Esempio n. 29
0
def test_n_components():
    rng = np.random.RandomState(42)
    X = np.arange(12).reshape(4, 3)
    y = [1, 1, 2, 2]

    init = rng.rand(X.shape[1] - 1, 3)

    # n_components = X.shape[1] != transformation.shape[0]
    n_components = X.shape[1]
    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
    assert_raise_message(ValueError,
                         'The preferred dimensionality of the '
                         'projected space `n_components` ({}) does not match '
                         'the output dimensionality of the given '
                         'linear transformation `init` ({})!'
                         .format(n_components, init.shape[0]),
                         nca.fit, X, y)

    # n_components > X.shape[1]
    n_components = X.shape[1] + 2
    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
    assert_raise_message(ValueError,
                         'The preferred dimensionality of the '
                         'projected space `n_components` ({}) cannot '
                         'be greater than the given data '
                         'dimensionality ({})!'
                         .format(n_components, X.shape[1]),
                         nca.fit, X, y)

    # n_components < X.shape[1]
    nca = NeighborhoodComponentsAnalysis(n_components=2, init='identity')
    nca.fit(X, y)
Esempio n. 30
0
def test_check_solver_option():
    X, y = iris.data, iris.target
    for LR in [LogisticRegression, LogisticRegressionCV]:

        msg = ("Logistic Regression supports only liblinear, newton-cg and"
               " lbfgs solvers, got wrong_name")
        lr = LR(solver="wrong_name")
        assert_raise_message(ValueError, msg, lr.fit, X, y)

        msg = "multi_class should be either multinomial or ovr, got wrong_name"
        lr = LR(solver='newton-cg', multi_class="wrong_name")
        assert_raise_message(ValueError, msg, lr.fit, X, y)

        # all solver except 'newton-cg' and 'lfbgs'
        for solver in ['liblinear']:
            msg = ("Solver %s does not support a multinomial backend." %
                   solver)
            lr = LR(solver=solver, multi_class='multinomial')
            assert_raise_message(ValueError, msg, lr.fit, X, y)

        # all solvers except 'liblinear'
        for solver in ['newton-cg', 'lbfgs']:
            msg = ("Solver %s supports only l2 penalties, got l1 penalty." %
                   solver)
            lr = LR(solver=solver, penalty='l1')
            assert_raise_message(ValueError, msg, lr.fit, X, y)

            msg = ("Solver %s supports only dual=False, got dual=True" %
                   solver)
            lr = LR(solver=solver, dual=True)
            assert_raise_message(ValueError, msg, lr.fit, X, y)
Esempio n. 31
0
def assert_raises_on_all_points_same_cluster(func):
    """Assert message when all point are in different clusters"""
    rng = np.random.RandomState(seed=0)
    assert_raise_message(ValueError, "Number of labels is", func,
                         rng.rand(10, 2), np.arange(10))
Esempio n. 32
0
def test_fast_dot():
    """Check fast dot blas wrapper function"""
    rng = np.random.RandomState(42)
    A = rng.random_sample([2, 10])
    B = rng.random_sample([2, 10])

    try:
        linalg.get_blas('gemm')
        has_blas = True
    except:
        has_blas = False

    if has_blas:
        # test dispatch to np.dot
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always', NonBLASDotWarning)
            # maltyped data
            for dt1, dt2 in [['f8', 'f4'], ['i4', 'i4']]:
                fast_dot(A.astype(dt1), B.astype(dt2).T)
                assert_true(type(w.pop(-1)) == NonBLASDotWarning)
            # malformed data
            # ndim == 0
            E = np.empty(0)
            fast_dot(E, E)
            assert_true(type(w.pop(-1)) == NonBLASDotWarning)
            ## ndim == 1
            fast_dot(A, A[0])
            assert_true(type(w.pop(-1)) == NonBLASDotWarning)
            ## ndim > 2
            fast_dot(A.T, np.array([A, A]))
            assert_true(type(w.pop(-1)) == NonBLASDotWarning)
            ## min(shape) == 1
            fast_dot(A, A[0, :][None, :])
            assert_true(type(w.pop(-1)) == NonBLASDotWarning)
        # test for matrix mismatch error
        msg = ('Invalid array shapes: A.shape[%d] should be the same as '
               'B.shape[0]. Got A.shape=%r B.shape=%r' %
               (A.ndim - 1, A.shape, A.shape))
        assert_raise_message(msg, fast_dot, A, A)

    # test cov-like use case + dtypes
    my_assert = assert_array_almost_equal
    for dtype in ['f8', 'f4']:
        A = A.astype(dtype)
        B = B.astype(dtype)

        #  col < row
        C = np.dot(A.T, A)
        C_ = fast_dot(A.T, A)
        my_assert(C, C_)

        C = np.dot(A.T, B)
        C_ = fast_dot(A.T, B)
        my_assert(C, C_)

        C = np.dot(A, B.T)
        C_ = fast_dot(A, B.T)
        my_assert(C, C_)

    # test square matrix * rectangular use case
    A = rng.random_sample([2, 2])
    for dtype in ['f8', 'f4']:
        A = A.astype(dtype)
        B = B.astype(dtype)

        C = np.dot(A, B)
        C_ = fast_dot(A, B)
        my_assert(C, C_)

        C = np.dot(A.T, B)
        C_ = fast_dot(A.T, B)
        my_assert(C, C_)

    if has_blas:
        for x in [np.array([[d] * 10] * 2) for d in [np.inf, np.nan]]:
            assert_raises(ValueError, fast_dot, x, x.T)
Esempio n. 33
0
def test_auc_score_non_binary_class():
    # Test that roc_auc_score function returns an error when trying
    # to compute AUC for non-binary class values.
    rng = check_random_state(404)
    y_pred = rng.rand(10)
    # y_true contains only one class value
    y_true = np.zeros(10, dtype="int")
    assert_raise_message(ValueError, "ROC AUC score is not defined",
                         roc_auc_score, y_true, y_pred)
    y_true = np.ones(10, dtype="int")
    assert_raise_message(ValueError, "ROC AUC score is not defined",
                         roc_auc_score, y_true, y_pred)
    y_true = -np.ones(10, dtype="int")
    assert_raise_message(ValueError, "ROC AUC score is not defined",
                         roc_auc_score, y_true, y_pred)
    # y_true contains three different class values
    y_true = rng.randint(0, 3, size=10)
    assert_raise_message(ValueError, "multiclass format is not supported",
                         roc_auc_score, y_true, y_pred)

    clean_warning_registry()
    with warnings.catch_warnings(record=True):
        rng = check_random_state(404)
        y_pred = rng.rand(10)
        # y_true contains only one class value
        y_true = np.zeros(10, dtype="int")
        assert_raise_message(ValueError, "ROC AUC score is not defined",
                             roc_auc_score, y_true, y_pred)
        y_true = np.ones(10, dtype="int")
        assert_raise_message(ValueError, "ROC AUC score is not defined",
                             roc_auc_score, y_true, y_pred)
        y_true = -np.ones(10, dtype="int")
        assert_raise_message(ValueError, "ROC AUC score is not defined",
                             roc_auc_score, y_true, y_pred)

        # y_true contains three different class values
        y_true = rng.randint(0, 3, size=10)
        assert_raise_message(ValueError, "multiclass format is not supported",
                             roc_auc_score, y_true, y_pred)
def test_bayesian_mixture_precisions_prior_initialisation():
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 2
    X = rng.rand(n_samples, n_features)

    # Check raise message for a bad value of degrees_of_freedom_prior
    bad_degrees_of_freedom_prior_ = n_features - 1.
    bgmm = BayesianGaussianMixture(
        degrees_of_freedom_prior=bad_degrees_of_freedom_prior_,
        random_state=rng)
    assert_raise_message(
        ValueError, "The parameter 'degrees_of_freedom_prior' should be "
        "greater than %d, but got %.3f." %
        (n_features - 1, bad_degrees_of_freedom_prior_), bgmm.fit, X)

    # Check correct init for a given value of degrees_of_freedom_prior
    degrees_of_freedom_prior = rng.rand() + n_features - 1.
    bgmm = BayesianGaussianMixture(
        degrees_of_freedom_prior=degrees_of_freedom_prior,
        random_state=rng).fit(X)
    assert_almost_equal(degrees_of_freedom_prior,
                        bgmm.degrees_of_freedom_prior_)

    # Check correct init for the default value of degrees_of_freedom_prior
    degrees_of_freedom_prior_default = n_features
    bgmm = BayesianGaussianMixture(
        degrees_of_freedom_prior=degrees_of_freedom_prior_default,
        random_state=rng).fit(X)
    assert_almost_equal(degrees_of_freedom_prior_default,
                        bgmm.degrees_of_freedom_prior_)

    # Check correct init for a given value of covariance_prior
    covariance_prior = {
        'full': np.cov(X.T, bias=1) + 10,
        'tied': np.cov(X.T, bias=1) + 5,
        'diag': np.diag(np.atleast_2d(np.cov(X.T, bias=1))) + 3,
        'spherical': rng.rand()
    }

    bgmm = BayesianGaussianMixture(random_state=rng)
    for cov_type in ['full', 'tied', 'diag', 'spherical']:
        bgmm.covariance_type = cov_type
        bgmm.covariance_prior = covariance_prior[cov_type]
        bgmm.fit(X)
        assert_almost_equal(covariance_prior[cov_type], bgmm.covariance_prior_)

    # Check raise message for a bad spherical value of covariance_prior
    bad_covariance_prior_ = -1.
    bgmm = BayesianGaussianMixture(covariance_type='spherical',
                                   covariance_prior=bad_covariance_prior_,
                                   random_state=rng)
    assert_raise_message(
        ValueError, "The parameter 'spherical covariance_prior' "
        "should be greater than 0., but got %.3f." % bad_covariance_prior_,
        bgmm.fit, X)

    # Check correct init for the default value of covariance_prior
    covariance_prior_default = {
        'full': np.atleast_2d(np.cov(X.T)),
        'tied': np.atleast_2d(np.cov(X.T)),
        'diag': np.var(X, axis=0, ddof=1),
        'spherical': np.var(X, axis=0, ddof=1).mean()
    }

    bgmm = BayesianGaussianMixture(random_state=0)
    for cov_type in ['full', 'tied', 'diag', 'spherical']:
        bgmm.covariance_type = cov_type
        bgmm.fit(X)
        assert_almost_equal(covariance_prior_default[cov_type],
                            bgmm.covariance_prior_)
Esempio n. 35
0
def test_gaussian_mixture_attributes():
    # test bad parameters
    rng = np.random.RandomState(0)
    X = rng.rand(10, 2)

    n_components_bad = 0
    gmm = GaussianMixture(n_components=n_components_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'n_components': %d "
        "Estimation requires at least one component" % n_components_bad,
        gmm.fit, X)

    # covariance_type should be in [spherical, diag, tied, full]
    covariance_type_bad = 'bad_covariance_type'
    gmm = GaussianMixture(covariance_type=covariance_type_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'covariance_type': %s "
        "'covariance_type' should be in "
        "['spherical', 'tied', 'diag', 'full']" % covariance_type_bad, gmm.fit,
        X)

    tol_bad = -1
    gmm = GaussianMixture(tol=tol_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'tol': %.5f "
        "Tolerance used by the EM must be non-negative" % tol_bad, gmm.fit, X)

    reg_covar_bad = -1
    gmm = GaussianMixture(reg_covar=reg_covar_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'reg_covar': %.5f "
        "regularization on covariance must be "
        "non-negative" % reg_covar_bad, gmm.fit, X)

    max_iter_bad = 0
    gmm = GaussianMixture(max_iter=max_iter_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'max_iter': %d "
        "Estimation requires at least one iteration" % max_iter_bad, gmm.fit,
        X)

    n_init_bad = 0
    gmm = GaussianMixture(n_init=n_init_bad)
    assert_raise_message(
        ValueError, "Invalid value for 'n_init': %d "
        "Estimation requires at least one run" % n_init_bad, gmm.fit, X)

    init_params_bad = 'bad_method'
    gmm = GaussianMixture(init_params=init_params_bad)
    assert_raise_message(
        ValueError,
        "Unimplemented initialization method '%s'" % init_params_bad, gmm.fit,
        X)

    # test good parameters
    n_components, tol, n_init, max_iter, reg_covar = 2, 1e-4, 3, 30, 1e-1
    covariance_type, init_params = 'full', 'random'
    gmm = GaussianMixture(n_components=n_components,
                          tol=tol,
                          n_init=n_init,
                          max_iter=max_iter,
                          reg_covar=reg_covar,
                          covariance_type=covariance_type,
                          init_params=init_params).fit(X)

    assert_equal(gmm.n_components, n_components)
    assert_equal(gmm.covariance_type, covariance_type)
    assert_equal(gmm.tol, tol)
    assert_equal(gmm.reg_covar, reg_covar)
    assert_equal(gmm.max_iter, max_iter)
    assert_equal(gmm.n_init, n_init)
    assert_equal(gmm.init_params, init_params)
Esempio n. 36
0
def test_max_iter_error():

    km = KMeans(max_iter=-1)
    assert_raise_message(ValueError, 'Number of iterations should be', km.fit,
                         X)
Esempio n. 37
0
def test_set_pipeline_step_none():
    # Test setting Pipeline steps to None
    X = np.array([[1]])
    y = np.array([1])
    mult2 = Mult(mult=2)
    mult3 = Mult(mult=3)
    mult5 = Mult(mult=5)

    def make():
        return Pipeline([('m2', mult2), ('m3', mult3), ('last', mult5)])

    pipeline = make()

    exp = 2 * 3 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline.set_params(m3=None)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_dict_equal(
        pipeline.get_params(deep=True), {
            'steps': pipeline.steps,
            'm2': mult2,
            'm3': None,
            'last': mult5,
            'memory': None,
            'm2__mult': 2,
            'last__mult': 5,
        })

    pipeline.set_params(m2=None)
    exp = 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    # for other methods, ensure no AttributeErrors on None:
    other_methods = [
        'predict_proba', 'predict_log_proba', 'decision_function', 'transform',
        'score'
    ]
    for method in other_methods:
        getattr(pipeline, method)(X)

    pipeline.set_params(m2=mult2)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline = make()
    pipeline.set_params(last=None)
    # mult2 and mult3 are active
    exp = 6
    assert_array_equal([[exp]], pipeline.fit(X, y).transform(X))
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_raise_message(AttributeError,
                         "'NoneType' object has no attribute 'predict'",
                         getattr, pipeline, 'predict')

    # Check None step at construction time
    exp = 2 * 5
    pipeline = Pipeline([('m2', mult2), ('m3', None), ('last', mult5)])
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
Esempio n. 38
0
def test_error():
    # Test for appropriate exception on errors
    msg = "Penalty term must be positive"
    assert_raise_message(ValueError, msg,
                         LogisticRegression(C=-1).fit, X, Y1)
    assert_raise_message(ValueError, msg,
                         LogisticRegression(C="test").fit, X, Y1)

    for LR in [LogisticRegression, LogisticRegressionCV]:
        msg = "Tolerance for stopping criteria must be positive"
        assert_raise_message(ValueError, msg, LR(tol=-1).fit, X, Y1)
        assert_raise_message(ValueError, msg, LR(tol="test").fit, X, Y1)

        msg = "Maximum number of iteration must be positive"
        assert_raise_message(ValueError, msg, LR(max_iter=-1).fit, X, Y1)
        assert_raise_message(ValueError, msg, LR(max_iter="test").fit, X, Y1)
Esempio n. 39
0
def test_parameter_checking():
    A = np.ones((2, 2))
    name = 'spam'
    msg = "Invalid solver parameter: got 'spam' instead of one of"
    assert_raise_message(ValueError, msg, NMF(solver=name).fit, A)
    msg = "Invalid init parameter: got 'spam' instead of one of"
    assert_raise_message(ValueError, msg, NMF(init=name).fit, A)
    msg = "Invalid beta_loss parameter: got 'spam' instead of one"
    assert_raise_message(ValueError, msg,
                         NMF(solver='mu', beta_loss=name).fit, A)
    msg = "Invalid beta_loss parameter: solver 'cd' does not handle "
    msg += "beta_loss = 1.0"
    assert_raise_message(ValueError, msg,
                         NMF(solver='cd', beta_loss=1.0).fit, A)

    msg = "Negative values in data passed to"
    assert_raise_message(ValueError, msg, NMF().fit, -A)
    assert_raise_message(ValueError, msg, nmf._initialize_nmf, -A, 2, 'nndsvd')
    clf = NMF(2, tol=0.1).fit(A)
    assert_raise_message(ValueError, msg, clf.transform, -A)
Esempio n. 40
0
def assert_raises_on_only_one_label(func):
    """Assert message when there is only one label"""
    rng = np.random.RandomState(seed=0)
    assert_raise_message(ValueError, "Number of labels is", func,
                         rng.rand(10, 2), np.zeros(10))
Esempio n. 41
0
def test_empty_extract():
    # Test extract where fit() has not yet been run.
    msg = ("This OPTICS instance is not fitted yet. Call 'fit' with "
           "appropriate arguments before using this method.")
    clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10)
    assert_raise_message(ValueError, msg, clust.extract_dbscan, 0.01)
Esempio n. 42
0
def test_check_classification_targets():
    # Test that check_classification_target return correct type. #5782
    y = np.array([0.0, 1.1, 2.0, 3.0])
    msg = type_of_target(y)
    assert_raise_message(ValueError, msg, check_classification_targets, y)
Esempio n. 43
0
def test_check_X_y_informative_error():
    X = np.ones((2, 2))
    y = None
    assert_raise_message(ValueError, "y cannot be None", check_X_y, X, y)
Esempio n. 44
0
def test_non_negative_factorization_checking():
    A = np.ones((2, 2))
    # Test parameters checking is public function
    nnmf = non_negative_factorization
    assert_no_warnings(nnmf, A, A, A, np.int64(1))
    msg = ("Number of components must be a positive integer; "
           "got (n_components=1.5)")
    assert_raise_message(ValueError, msg, nnmf, A, A, A, 1.5)
    msg = ("Number of components must be a positive integer; "
           "got (n_components='2')")
    assert_raise_message(ValueError, msg, nnmf, A, A, A, '2')
    msg = "Negative values in data passed to NMF (input H)"
    assert_raise_message(ValueError, msg, nnmf, A, A, -A, 2, 'custom')
    msg = "Negative values in data passed to NMF (input W)"
    assert_raise_message(ValueError, msg, nnmf, A, -A, A, 2, 'custom')
    msg = "Array passed to NMF (input H) is full of zeros"
    assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom')
    msg = "Invalid regularization parameter: got 'spam' instead of one of"
    assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom', True,
                         'cd', 2., 1e-4, 200, 0., 0., 'spam')
Esempio n. 45
0
def test_grid_search_param_grid_includes_sequence_of_a_zero_length():
    clf = MockClassifier()
    assert_raise_message(
        ValueError,
        "Parameter values for parameter (C) need to be a non-empty sequence.",
        GridSearchCV, clf, {'C': []})
Esempio n. 46
0
def test_check_array_min_samples_and_features_messages():
    # empty list is considered 2D by default:
    msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required."
    assert_raise_message(ValueError, msg, check_array, [[]])

    # If considered a 1D collection when ensure_2d=False, then the minimum
    # number of samples will break:
    msg = "0 sample(s) (shape=(0,)) while a minimum of 1 is required."
    assert_raise_message(ValueError, msg, check_array, [], ensure_2d=False)

    # Invalid edge case when checking the default minimum sample of a scalar
    msg = "Singleton array array(42) cannot be considered a valid collection."
    assert_raise_message(TypeError, msg, check_array, 42, ensure_2d=False)

    # But this works if the input data is forced to look like a 2 array with
    # one sample and one feature:
    X_checked = assert_warns(DeprecationWarning,
                             check_array, [42],
                             ensure_2d=True)
    assert_array_equal(np.array([[42]]), X_checked)

    # Simulate a model that would need at least 2 samples to be well defined
    X = np.ones((1, 10))
    y = np.ones(1)
    msg = "1 sample(s) (shape=(1, 10)) while a minimum of 2 is required."
    assert_raise_message(ValueError,
                         msg,
                         check_X_y,
                         X,
                         y,
                         ensure_min_samples=2)

    # The same message is raised if the data has 2 dimensions even if this is
    # not mandatory
    assert_raise_message(ValueError,
                         msg,
                         check_X_y,
                         X,
                         y,
                         ensure_min_samples=2,
                         ensure_2d=False)

    # Simulate a model that would require at least 3 features (e.g. SelectKBest
    # with k=3)
    X = np.ones((10, 2))
    y = np.ones(2)
    msg = "2 feature(s) (shape=(10, 2)) while a minimum of 3 is required."
    assert_raise_message(ValueError,
                         msg,
                         check_X_y,
                         X,
                         y,
                         ensure_min_features=3)

    # Only the feature check is enabled whenever the number of dimensions is 2
    # even if allow_nd is enabled:
    assert_raise_message(ValueError,
                         msg,
                         check_X_y,
                         X,
                         y,
                         ensure_min_features=3,
                         allow_nd=True)

    # Simulate a case where a pipeline stage as trimmed all the features of a
    # 2D dataset.
    X = np.empty(0).reshape(10, 0)
    y = np.ones(10)
    msg = "0 feature(s) (shape=(10, 0)) while a minimum of 1 is required."
    assert_raise_message(ValueError, msg, check_X_y, X, y)

    # nd-data is not checked for any minimum number of features by default:
    X = np.ones((10, 0, 28, 28))
    y = np.ones(10)
    X_checked, y_checked = check_X_y(X, y, allow_nd=True)
    assert_array_equal(X, X_checked)
    assert_array_equal(y, y_checked)
def test_predictproba_hardvoting():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='hard')
    msg = "predict_proba is not available when voting='hard'"
    assert_raise_message(AttributeError, msg, eclf.predict_proba, X)
Esempio n. 48
0
def test_check_array():
    # accept_sparse == False
    # raise error on sparse inputs
    X = [[1, 2], [3, 4]]
    X_csr = sp.csr_matrix(X)
    assert_raises(TypeError, check_array, X_csr)
    # ensure_2d=False
    X_array = check_array([0, 1, 2], ensure_2d=False)
    assert X_array.ndim == 1
    # ensure_2d=True with 1d array
    assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead',
                         check_array, [0, 1, 2], ensure_2d=True)
    # ensure_2d=True with scalar array
    assert_raise_message(ValueError,
                         'Expected 2D array, got scalar array instead',
                         check_array, 10, ensure_2d=True)
    # don't allow ndim > 3
    X_ndim = np.arange(8).reshape(2, 2, 2)
    assert_raises(ValueError, check_array, X_ndim)
    check_array(X_ndim, allow_nd=True)  # doesn't raise

    # dtype and order enforcement.
    X_C = np.arange(4).reshape(2, 2).copy("C")
    X_F = X_C.copy("F")
    X_int = X_C.astype(np.int)
    X_float = X_C.astype(np.float)
    Xs = [X_C, X_F, X_int, X_float]
    dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object]
    orders = ['C', 'F', None]
    copys = [True, False]

    for X, dtype, order, copy in product(Xs, dtypes, orders, copys):
        X_checked = check_array(X, dtype=dtype, order=order, copy=copy)
        if dtype is not None:
            assert X_checked.dtype == dtype
        else:
            assert X_checked.dtype == X.dtype
        if order == 'C':
            assert X_checked.flags['C_CONTIGUOUS']
            assert not X_checked.flags['F_CONTIGUOUS']
        elif order == 'F':
            assert X_checked.flags['F_CONTIGUOUS']
            assert not X_checked.flags['C_CONTIGUOUS']
        if copy:
            assert X is not X_checked
        else:
            # doesn't copy if it was already good
            if (X.dtype == X_checked.dtype and
                    X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS']
                    and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']):
                assert X is X_checked

    # allowed sparse != None
    X_csc = sp.csc_matrix(X_C)
    X_coo = X_csc.tocoo()
    X_dok = X_csc.todok()
    X_int = X_csc.astype(np.int)
    X_float = X_csc.astype(np.float)

    Xs = [X_csc, X_coo, X_dok, X_int, X_float]
    accept_sparses = [['csr', 'coo'], ['coo', 'dok']]
    for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses,
                                                 copys):
        with warnings.catch_warnings(record=True) as w:
            X_checked = check_array(X, dtype=dtype,
                                    accept_sparse=accept_sparse, copy=copy)
        if (dtype is object or sp.isspmatrix_dok(X)) and len(w):
            message = str(w[0].message)
            messages = ["object dtype is not supported by sparse matrices",
                        "Can't check dok sparse matrix for nan or inf."]
            assert message in messages
        else:
            assert len(w) == 0
        if dtype is not None:
            assert X_checked.dtype == dtype
        else:
            assert X_checked.dtype == X.dtype
        if X.format in accept_sparse:
            # no change if allowed
            assert X.format == X_checked.format
        else:
            # got converted
            assert X_checked.format == accept_sparse[0]
        if copy:
            assert X is not X_checked
        else:
            # doesn't copy if it was already good
            if X.dtype == X_checked.dtype and X.format == X_checked.format:
                assert X is X_checked

    # other input formats
    # convert lists to arrays
    X_dense = check_array([[1, 2], [3, 4]])
    assert isinstance(X_dense, np.ndarray)
    # raise on too deep lists
    assert_raises(ValueError, check_array, X_ndim.tolist())
    check_array(X_ndim.tolist(), allow_nd=True)  # doesn't raise
    # convert weird stuff to arrays
    X_no_array = NotAnArray(X_dense)
    result = check_array(X_no_array)
    assert isinstance(result, np.ndarray)

    # deprecation warning if string-like array with dtype="numeric"
    expected_warn_regex = r"converted to decimal numbers if dtype='numeric'"
    X_str = [['11', '12'], ['13', 'xx']]
    for X in [X_str, np.array(X_str, dtype='U'), np.array(X_str, dtype='S')]:
        with pytest.warns(FutureWarning, match=expected_warn_regex):
            check_array(X, dtype="numeric")

    # deprecation warning if byte-like array with dtype="numeric"
    X_bytes = [[b'a', b'b'], [b'c', b'd']]
    for X in [X_bytes, np.array(X_bytes, dtype='V1')]:
        with pytest.warns(FutureWarning, match=expected_warn_regex):
            check_array(X, dtype="numeric")
Esempio n. 49
0
def check_sample_weight_invariance(name, metric, y1, y2):
    rng = np.random.RandomState(0)
    sample_weight = rng.randint(1, 10, size=len(y1))

    # check that unit weights gives the same score as no weight
    unweighted_score = metric(y1, y2, sample_weight=None)
    assert_almost_equal(
        unweighted_score,
        metric(y1, y2, sample_weight=np.ones(shape=len(y1))),
        err_msg="For %s sample_weight=None is not equivalent to "
        "sample_weight=ones" % name)

    # check that the weighted and unweighted scores are unequal
    weighted_score = metric(y1, y2, sample_weight=sample_weight)
    assert_not_equal(unweighted_score,
                     weighted_score,
                     msg="Unweighted and weighted scores are unexpectedly "
                     "equal (%f) for %s" % (weighted_score, name))

    # check that sample_weight can be a list
    weighted_score_list = metric(y1, y2, sample_weight=sample_weight.tolist())
    assert_almost_equal(
        weighted_score,
        weighted_score_list,
        err_msg=("Weighted scores for array and list "
                 "sample_weight input are not equal (%f != %f) for %s") %
        (weighted_score, weighted_score_list, name))

    # check that integer weights is the same as repeated samples
    repeat_weighted_score = metric(np.repeat(y1, sample_weight, axis=0),
                                   np.repeat(y2, sample_weight, axis=0),
                                   sample_weight=None)
    assert_almost_equal(
        weighted_score,
        repeat_weighted_score,
        err_msg="Weighting %s is not equal to repeating samples" % name)

    # check that ignoring a fraction of the samples is equivalent to setting
    # the corresponding weights to zero
    sample_weight_subset = sample_weight[1::2]
    sample_weight_zeroed = np.copy(sample_weight)
    sample_weight_zeroed[::2] = 0
    y1_subset = y1[1::2]
    y2_subset = y2[1::2]
    weighted_score_subset = metric(y1_subset,
                                   y2_subset,
                                   sample_weight=sample_weight_subset)
    weighted_score_zeroed = metric(y1, y2, sample_weight=sample_weight_zeroed)
    assert_almost_equal(
        weighted_score_subset,
        weighted_score_zeroed,
        err_msg=("Zeroing weights does not give the same result as "
                 "removing the corresponding samples (%f != %f) for %s" %
                 (weighted_score_zeroed, weighted_score_subset, name)))

    if not name.startswith('unnormalized'):
        # check that the score is invariant under scaling of the weights by a
        # common factor
        for scaling in [2, 0.3]:
            assert_almost_equal(weighted_score,
                                metric(y1,
                                       y2,
                                       sample_weight=sample_weight * scaling),
                                err_msg="%s sample_weight is not invariant "
                                "under scaling" % name)

    # Check that if number of samples in y_true and sample_weight are not
    # equal, meaningful error is raised.
    error_message = ("Found input variables with inconsistent numbers of "
                     "samples: [{}, {}, {}]".format(
                         _num_samples(y1), _num_samples(y2),
                         _num_samples(sample_weight) * 2))
    assert_raise_message(ValueError,
                         error_message,
                         metric,
                         y1,
                         y2,
                         sample_weight=np.hstack(
                             [sample_weight, sample_weight]))
Esempio n. 50
0
def test_grid_search_incorrect_param_grid():
    clf = MockClassifier()
    assert_raise_message(
        ValueError,
        "Parameter values for parameter (C) need to be a sequence.",
        GridSearchCV, clf, {'C': 1})
Esempio n. 51
0
def test_check_array_accept_sparse_type_exception():
    X = [[1, 2], [3, 4]]
    X_csr = sp.csr_matrix(X)
    invalid_type = SVR()

    msg = ("A sparse matrix was passed, but dense data is required. "
           "Use X.toarray() to convert to a dense numpy array.")
    assert_raise_message(TypeError, msg,
                         check_array, X_csr, accept_sparse=False)
    with pytest.warns(DeprecationWarning):
        assert_raise_message(TypeError, msg,
                             check_array, X_csr, accept_sparse=None)

    msg = ("Parameter 'accept_sparse' should be a string, "
           "boolean or list of strings. You provided 'accept_sparse={}'.")
    assert_raise_message(ValueError, msg.format(invalid_type),
                         check_array, X_csr, accept_sparse=invalid_type)

    msg = ("When providing 'accept_sparse' as a tuple or list, "
           "it must contain at least one string value.")
    assert_raise_message(ValueError, msg.format([]),
                         check_array, X_csr, accept_sparse=[])
    assert_raise_message(ValueError, msg.format(()),
                         check_array, X_csr, accept_sparse=())

    assert_raise_message(TypeError, "SVR",
                         check_array, X_csr, accept_sparse=[invalid_type])

    # Test deprecation of 'None'
    assert_warns(DeprecationWarning, check_array, X, accept_sparse=None)
def test_estimator_init():
    eclf = VotingClassifier(estimators=[])
    msg = ('Invalid `estimators` attribute, `estimators` should be'
           ' a list of (string, estimator) tuples')
    assert_raise_message(AttributeError, msg, eclf.fit, X, y)

    clf = LogisticRegression(random_state=1)

    eclf = VotingClassifier(estimators=[('lr', clf)], voting='error')
    msg = ('Voting must be \'soft\' or \'hard\'; got (voting=\'error\')')
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('lr', clf)], weights=[1, 2])
    msg = ('Number of classifiers and weights must be equal'
           '; got 2 weights, 1 estimators')
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('lr', clf), ('lr', clf)],
                            weights=[1, 2])
    msg = "Names provided are not unique: ['lr', 'lr']"
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('lr__', clf)])
    msg = "Estimator names must not contain __: got ['lr__']"
    assert_raise_message(ValueError, msg, eclf.fit, X, y)

    eclf = VotingClassifier(estimators=[('estimators', clf)])
    msg = "Estimator names conflict with constructor arguments: ['estimators']"
    assert_raise_message(ValueError, msg, eclf.fit, X, y)
Esempio n. 53
0
def test_regression_thresholded_inf_nan_input(metric):

    for y_true, y_score in invalids:
        assert_raise_message(ValueError, "contains NaN, infinity", metric,
                             y_true, y_score)
def test_label_encoder_str_bad_shape(dtype):
    le = LabelEncoder()
    le.fit(np.array(["apple", "orange"], dtype=dtype))
    msg = "bad input shape"
    assert_raise_message(ValueError, msg, le.transform, "apple")
def test_fast_mcd_on_invalid_input():
    X = np.arange(100)
    assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead',
                         fast_mcd, X)
Esempio n. 56
0
def test_classification_inf_nan_input(metric):
    # Classification metrics all raise a mixed input exception
    for y_true, y_score in invalids:
        assert_raise_message(
            ValueError, "Input contains NaN, infinity or a "
            "value too large", metric, y_true, y_score)
Esempio n. 57
0
def test_get_values_invalid():
    assert_raise_message(
        TypeError, "Expected h5py.Dataset or tables.CArray. "
        "Got <class 'str'>", scprep.io.hdf5.get_values, 'invalid')
def test_mcd_class_on_invalid_input():
    X = np.arange(100)
    mcd = MinCovDet()
    assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead',
                         mcd.fit, X)
Esempio n. 59
0
def test_get_node_invalid():
    assert_raise_message(
        TypeError, "Expected h5py.File, tables.File, h5py.Group or "
        "tables.Group. Got <class 'str'>", scprep.io.hdf5.get_node, 'invalid',
        'node')
Esempio n. 60
0
def test_estimate_bandwidth_with_sparse_matrix():
    # Test estimate_bandwidth with sparse matrix
    X = sparse.lil_matrix((1000, 1000))
    msg = "A sparse matrix was passed, but dense data is required."
    assert_raise_message(TypeError, msg, estimate_bandwidth, X, 200)