Example #1
0
def test_fetch_openml_iris(monkeypatch, gzip_response):
    # classification dataset with numeric only columns
    data_id = 61
    data_name = 'iris'
    data_version = 1
    target_column = 'class'
    expected_observations = 150
    expected_features = 4
    expected_missing = 0

    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    assert_warns_message(
        UserWarning,
        "Multiple active versions of the dataset matching the name"
        " iris exist. Versions may be fundamentally different, "
        "returning version 1.",
        _fetch_dataset_from_openml,
        **{'data_id': data_id, 'data_name': data_name,
           'data_version': data_version,
           'target_column': target_column,
           'expected_observations': expected_observations,
           'expected_features': expected_features,
           'expected_missing': expected_missing,
           'expect_sparse': False,
           'expected_data_dtype': np.float64,
           'expected_target_dtype': object,
           'compare_default_target': True}
    )
def test_kfold_valueerrors():
    X1 = np.array([[1, 2], [3, 4], [5, 6]])
    X2 = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
    # Check that errors are raised if there is not enough samples
    assert_raises(ValueError, next, KFold(4).split(X1))

    # Check that a warning is raised if the least populated class has too few
    # members.
    y = np.array([3, 3, -1, -1, 2])

    skf_3 = StratifiedKFold(3)
    assert_warns_message(Warning, "The least populated class",
                         next, skf_3.split(X2, y))

    # Check that despite the warning the folds are still computed even
    # though all the classes are not necessarily represented at on each
    # side of the split at each split
    with warnings.catch_warnings():
        check_cv_coverage(skf_3, X2, y, labels=None, expected_n_iter=3)

    # Error when number of folds is <= 1
    assert_raises(ValueError, KFold, 0)
    assert_raises(ValueError, KFold, 1)
    assert_raises(ValueError, StratifiedKFold, 0)
    assert_raises(ValueError, StratifiedKFold, 1)

    # When n_folds is not integer:
    assert_raises(ValueError, KFold, 1.5)
    assert_raises(ValueError, KFold, 2.0)
    assert_raises(ValueError, StratifiedKFold, 1.5)
    assert_raises(ValueError, StratifiedKFold, 2.0)

    # When shuffle is not  a bool:
    assert_raises(TypeError, KFold, n_folds=4, shuffle=None)
def test_lda_dimension_warning(n_classes, n_features):
    # FIXME: Future warning to be removed in 0.23
    rng = check_random_state(0)
    n_samples = 10
    X = rng.randn(n_samples, n_features)
    # we create n_classes labels by repeating and truncating a
    # range(n_classes) until n_samples
    y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
    max_components = min(n_features, n_classes - 1)

    for n_components in [max_components - 1, None, max_components]:
        # if n_components <= min(n_classes - 1, n_features), no warning
        lda = LinearDiscriminantAnalysis(n_components=n_components)
        assert_no_warnings(lda.fit, X, y)

    for n_components in [max_components + 1,
                         max(n_features, n_classes - 1) + 1]:
        # if n_components > min(n_classes - 1, n_features), raise warning
        # We test one unit higher than max_components, and then something
        # larger than both n_features and n_classes - 1 to ensure the test
        # works for any value of n_component
        lda = LinearDiscriminantAnalysis(n_components=n_components)
        msg = ("n_components cannot be larger than min(n_features, "
               "n_classes - 1). Using min(n_features, "
               "n_classes - 1) = min(%d, %d - 1) = %d components." %
               (n_features, n_classes, max_components))
        assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y)
        future_msg = ("In version 0.23, setting n_components > min("
                      "n_features, n_classes - 1) will raise a "
                      "ValueError. You should set n_components to None"
                      " (default), or a value smaller or equal to "
                      "min(n_features, n_classes - 1).")
        assert_warns_message(FutureWarning, future_msg, lda.fit, X, y)
Example #4
0
def test_load_lfw_pairs_deprecation():
    msg = (
        "Function 'load_lfw_pairs' has been deprecated in 0.17 and will be "
        "removed in 0.19."
        "Use fetch_lfw_pairs(download_if_missing=False) instead."
    )
    assert_warns_message(DeprecationWarning, msg, load_lfw_pairs, data_home=SCIKIT_LEARN_DATA)
Example #5
0
def test_check_dataframe_warns_on_dtype():
    # Check that warn_on_dtype also works for DataFrames.
    # https://github.com/scikit-learn/scikit-learn/issues/10948
    pd = importorskip("pandas")

    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object)
    assert_warns_message(DataConversionWarning,
                         "Data with input dtype object were all converted to "
                         "float64.",
                         check_array, df, dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df,
                 dtype='numeric', warn_on_dtype=True)
    assert_no_warnings(check_array, df, dtype='object', warn_on_dtype=True)

    # Also check that it raises a warning for mixed dtypes in a DataFrame.
    df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype='numeric', warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=object, warn_on_dtype=True)

    # Even with numerical dtypes, a conversion can be made because dtypes are
    # uniformized throughout the array.
    df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed_numeric,
                 dtype='numeric', warn_on_dtype=True)
    assert_no_warnings(check_array, df_mixed_numeric.astype(int),
                       dtype='numeric', warn_on_dtype=True)
def test_raw_values_deprecation():
    X = [[0.0], [1.0]]
    clf = EllipticEnvelope().fit(X)
    assert_warns_message(DeprecationWarning,
                         "raw_values parameter is deprecated in 0.20 and will"
                         " be removed in 0.22.",
                         clf.decision_function, X, raw_values=True)
Example #7
0
def test_deprecated_auc_reorder():
    depr_message = ("The 'reorder' parameter has been deprecated in version "
                    "0.20 and will be removed in 0.22. It is recommended not "
                    "to set 'reorder' and ensure that x is monotonic "
                    "increasing or monotonic decreasing.")
    assert_warns_message(DeprecationWarning, depr_message, auc,
                         [1, 2], [2, 3], reorder=True)
def test_deprecated_calinski_harabaz_score():
    depr_message = ("Function 'calinski_harabaz_score' has been renamed "
                    "to 'calinski_harabasz_score' "
                    "and will be removed in version 0.23.")
    assert_warns_message(DeprecationWarning, depr_message,
                         calinski_harabaz_score,
                         np.ones((10, 2)), [0] * 5 + [1] * 5)
def test_sample_weight_warning():
    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)

    sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_test = X[n_samples:]

    for method in ['sigmoid', 'isotonic']:
        base_estimator = LinearSVC(random_state=42)
        calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
        # LinearSVC does not currently support sample weights but they
        # can still be used for the calibration step (with a warning)
        msg = "LinearSVC does not support sample_weight."
        assert_warns_message(
            UserWarning, msg,
            calibrated_clf.fit, X_train, y_train, sample_weight=sw_train)
        probs_with_sw = calibrated_clf.predict_proba(X_test)

        # As the weights are used for the calibration, they should still yield
        # a different predictions
        calibrated_clf.fit(X_train, y_train)
        probs_without_sw = calibrated_clf.predict_proba(X_test)

        diff = np.linalg.norm(probs_with_sw - probs_without_sw)
        assert_greater(diff, 0.1)
Example #10
0
def test_wishart_log_det():
    a = np.array([0.1, 0.8, 0.01, 0.09])
    b = np.array([0.2, 0.7, 0.05, 0.1])
    assert_warns_message(DeprecationWarning, "The function "
                         "wishart_log_det is deprecated in 0.18 and"
                         " will be removed in 0.20.",
                         wishart_log_det, a, b, 2, 4)
Example #11
0
def test_rfe_deprecation_estimator_params():
    deprecation_message = (
        "The parameter 'estimator_params' is deprecated as "
        "of version 0.16 and will be removed in 0.18. The "
        "parameter is no longer necessary because the "
        "value is set via the estimator initialisation or "
        "set_params method."
    )
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = iris.target
    assert_warns_message(
        DeprecationWarning,
        deprecation_message,
        RFE(estimator=SVC(), n_features_to_select=4, step=0.1, estimator_params={"kernel": "linear"}).fit,
        X=X,
        y=y,
    )

    assert_warns_message(
        DeprecationWarning,
        deprecation_message,
        RFECV(estimator=SVC(), step=1, cv=5, estimator_params={"kernel": "linear"}).fit,
        X=X,
        y=y,
    )
Example #12
0
def test_fetch_openml_australian(monkeypatch, gzip_response):
    # sparse dataset
    # Australian is the only sparse dataset that is reasonably small
    # as it is inactive, we need to catch the warning. Due to mocking
    # framework, it is not deactivated in our tests
    data_id = 292
    data_name = 'Australian'
    data_version = 1
    target_column = 'Y'
    # Not all original instances included for space reasons
    expected_observations = 85
    expected_features = 14
    expected_missing = 0
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    assert_warns_message(
        UserWarning,
        "Version 1 of dataset Australian is inactive,",
        _fetch_dataset_from_openml,
        **{'data_id': data_id, 'data_name': data_name,
           'data_version': data_version,
           'target_column': target_column,
           'expected_observations': expected_observations,
           'expected_features': expected_features,
           'expected_missing': expected_missing,
           'expect_sparse': True,
           'expected_data_dtype': np.float64,
           'expected_target_dtype': object,
           'compare_default_target': False}  # numpy specific check
    )
def test_redundant_bins(strategy, expected_bin_edges):
    X = [[0], [0], [0], [0], [3], [3]]
    kbd = KBinsDiscretizer(n_bins=3, strategy=strategy)
    msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 "
           "are removed. Consider decreasing the number of bins.")
    assert_warns_message(UserWarning, msg, kbd.fit, X)
    assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges)
Example #14
0
def test_label_binarize_multilabel():
    y_seq = [(1,), (0, 1, 2), tuple()]
    y_ind = np.array([[0, 1, 0], [1, 1, 1], [0, 0, 0]])
    classes = [0, 1, 2]
    pos_label = 2
    neg_label = 0
    expected = pos_label * y_ind
    y_sparse = [sparse_matrix(y_ind)
                for sparse_matrix in [coo_matrix, csc_matrix, csr_matrix,
                                      dok_matrix, lil_matrix]]

    for y in [y_ind] + y_sparse:
        yield (check_binarized_results, y, classes, pos_label, neg_label,
               expected)

    deprecation_message = ("Direct support for sequence of sequences " +
                           "multilabel representation will be unavailable " +
                           "from version 0.17. Use sklearn.preprocessing." +
                           "MultiLabelBinarizer to convert to a label " +
                           "indicator representation.")

    assert_warns_message(DeprecationWarning, deprecation_message,
                         check_binarized_results, y_seq, classes, pos_label,
                         neg_label, expected)

    assert_raises(ValueError, label_binarize, y, classes, neg_label=-1,
                  pos_label=pos_label, sparse_output=True)
def test_threshold_deprecation():
    X = [[0.0], [1.0]]
    clf = EllipticEnvelope().fit(X)
    assert_warns_message(DeprecationWarning,
                         "threshold_ attribute is deprecated in 0.20 and will"
                         " be removed in 0.22.",
                         getattr, clf, "threshold_")
def test_mcd_increasing_det_warning():
    # Check that a warning is raised if we observe increasing determinants
    # during the c_step. In theory the sequence of determinants should be
    # decreasing. Increasing determinants are likely due to ill-conditioned
    # covariance matrices that result in poor precision matrices.

    X = [[5.1, 3.5, 1.4, 0.2],
         [4.9, 3.0, 1.4, 0.2],
         [4.7, 3.2, 1.3, 0.2],
         [4.6, 3.1, 1.5, 0.2],
         [5.0, 3.6, 1.4, 0.2],
         [4.6, 3.4, 1.4, 0.3],
         [5.0, 3.4, 1.5, 0.2],
         [4.4, 2.9, 1.4, 0.2],
         [4.9, 3.1, 1.5, 0.1],
         [5.4, 3.7, 1.5, 0.2],
         [4.8, 3.4, 1.6, 0.2],
         [4.8, 3.0, 1.4, 0.1],
         [4.3, 3.0, 1.1, 0.1],
         [5.1, 3.5, 1.4, 0.3],
         [5.7, 3.8, 1.7, 0.3],
         [5.4, 3.4, 1.7, 0.2],
         [4.6, 3.6, 1.0, 0.2],
         [5.0, 3.0, 1.6, 0.2],
         [5.2, 3.5, 1.5, 0.2]]

    mcd = MinCovDet(random_state=1)
    assert_warns_message(RuntimeWarning,
                         "Determinant has increased",
                         mcd.fit, X)
def test_nystroem_callable():
    # Test Nystroem on a callable.
    rnd = np.random.RandomState(42)
    n_samples = 10
    X = rnd.uniform(size=(n_samples, 4))

    def logging_histogram_kernel(x, y, log):
        """Histogram kernel that writes to a log."""
        log.append(1)
        return np.minimum(x, y).sum()

    kernel_log = []
    X = list(X)     # test input validation
    Nystroem(kernel=logging_histogram_kernel,
             n_components=(n_samples - 1),
             kernel_params={'log': kernel_log}).fit(X)
    assert_equal(len(kernel_log), n_samples * (n_samples - 1) / 2)

    def linear_kernel(X, Y):
        return np.dot(X, Y.T)

    # if degree, gamma or coef0 is passed, we raise a warning
    msg = "Passing gamma, coef0 or degree to Nystroem"
    params = ({'gamma': 1}, {'coef0': 1}, {'degree': 2})
    for param in params:
        ny = Nystroem(kernel=linear_kernel, **param)
        assert_warns_message(DeprecationWarning, msg, ny.fit, X)
def test_convergence_warning(dataset, algo_class):
    X, y = dataset
    model = algo_class(max_iter=2, verbose=True)
    cls_name = model.__class__.__name__
    assert_warns_message(ConvergenceWarning,
                         '[{}] {} did not converge'.format(cls_name, cls_name),
                         model.fit, X, y)
Example #19
0
def test_pickle_version_warning():
    # check that warnings are raised when unpickling in a different version

    # first, check no warning when in the same version:
    iris = datasets.load_iris()
    tree = DecisionTreeClassifier().fit(iris.data, iris.target)
    tree_pickle = pickle.dumps(tree)
    assert_true(b"version" in tree_pickle)
    assert_no_warnings(pickle.loads, tree_pickle)

    # check that warning is raised on different version
    tree_pickle_other = tree_pickle.replace(sklearn.__version__.encode(),
                                            b"something")
    message = ("Trying to unpickle estimator DecisionTreeClassifier from "
               "version {0} when using version {1}. This might lead to "
               "breaking code or invalid results. "
               "Use at your own risk.".format("something",
                                              sklearn.__version__))
    assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)

    # check that not including any version also works:
    # TreeNoVersion has no getstate, like pre-0.18
    tree = TreeNoVersion().fit(iris.data, iris.target)

    tree_pickle_noversion = pickle.dumps(tree)
    assert_false(b"version" in tree_pickle_noversion)
    message = message.replace("something", "pre-0.18")
    message = message.replace("DecisionTreeClassifier", "TreeNoVersion")
    # check we got the warning about using pre-0.18 pickle
    assert_warns_message(UserWarning, message, pickle.loads,
                         tree_pickle_noversion)

    # check that no warning is raised for external estimators
    TreeNoVersion.__module__ = "notsklearn"
    assert_no_warnings(pickle.loads, tree_pickle_noversion)
Example #20
0
def test_vectorizer_stop_words_inconsistent():
    if PY2:
        lstr = "[u'and', u'll', u've']"
    else:
        lstr = "['and', 'll', 've']"
    message = ('Your stop_words may be inconsistent with your '
               'preprocessing. Tokenizing the stop words generated '
               'tokens %s not in stop_words.' % lstr)
    for vec in [CountVectorizer(),
                TfidfVectorizer(), HashingVectorizer()]:
        vec.set_params(stop_words=["you've", "you", "you'll", 'AND'])
        assert_warns_message(UserWarning, message, vec.fit_transform,
                             ['hello world'])
        # reset stop word validation
        del vec._stop_words_id
        assert _check_stop_words_consistency(vec) is False

    # Only one warning per stop list
    assert_no_warnings(vec.fit_transform, ['hello world'])
    assert _check_stop_words_consistency(vec) is None

    # Test caching of inconsistency assessment
    vec.set_params(stop_words=["you've", "you", "you'll", 'blah', 'AND'])
    assert_warns_message(UserWarning, message, vec.fit_transform,
                         ['hello world'])
def test_affinity_propagation_equal_mutual_similarities():
    X = np.array([[-1, 1], [1, -1]])
    S = -euclidean_distances(X, squared=True)

    # setting preference > similarity
    cluster_center_indices, labels = assert_warns_message(
        UserWarning, "mutually equal", affinity_propagation, S, preference=0)

    # expect every sample to become an exemplar
    assert_array_equal([0, 1], cluster_center_indices)
    assert_array_equal([0, 1], labels)

    # setting preference < similarity
    cluster_center_indices, labels = assert_warns_message(
        UserWarning, "mutually equal", affinity_propagation, S, preference=-10)

    # expect one cluster, with arbitrary (first) sample as exemplar
    assert_array_equal([0], cluster_center_indices)
    assert_array_equal([0, 0], labels)

    # setting different preferences
    cluster_center_indices, labels = assert_no_warnings(
        affinity_propagation, S, preference=[-20, -10])

    # expect one cluster, with highest-preference sample as exemplar
    assert_array_equal([1], cluster_center_indices)
    assert_array_equal([0, 0], labels)
Example #22
0
def test_multinomial_logistic_regression_with_classweight_auto():
    X, y = iris.data, iris.target
    model = LogisticRegression(multi_class='multinomial',
                               class_weight='auto', solver='lbfgs')
    # 'auto' is deprecated and will be removed in 0.19
    assert_warns_message(DeprecationWarning,
                         "class_weight='auto' heuristic is deprecated",
                         model.fit, X, y)
def test_convergence_warning():
    degree = 4
    y = _lifted_predict(U[:degree], X)

    est = PolynomialNetworkRegressor(degree=degree, n_components=n_components,
                                     beta=1e-10, max_iter=1, tol=1e-5,
                                     random_state=0)
    assert_warns_message(UserWarning, "converge", est.fit, X, y)
Example #24
0
def test_clone_copy_init_params():
    # test for deprecation warning when copying or casting an init parameter
    est = ModifyInitParams()
    message = ("Estimator ModifyInitParams modifies parameters in __init__. "
               "This behavior is deprecated as of 0.18 and support "
               "for this behavior will be removed in 0.20.")

    assert_warns_message(DeprecationWarning, message, clone, est)
Example #25
0
def test_pickle_version_warning_is_issued_upon_different_version():
    iris = datasets.load_iris()
    tree = TreeBadVersion().fit(iris.data, iris.target)
    tree_pickle_other = pickle.dumps(tree)
    message = pickle_error_message.format(estimator="TreeBadVersion",
                                          old_version="something",
                                          current_version=sklearn.__version__)
    assert_warns_message(UserWarning, message, pickle.loads, tree_pickle_other)
Example #26
0
def test_repeated_x(minimizer):
    assert_warns_message(
        UserWarning, "has been evaluated at", minimizer, lambda x: x[0],
        dimensions=[[0, 1]], x0=[[0], [1]], n_random_starts=0, n_calls=3)

    assert_warns_message(
        UserWarning, "has been evaluated at", minimizer, bench4,
        dimensions=[("0", "1")], x0=[["0"], ["1"]], n_calls=3,
        n_random_starts=0)
def test_future_warning():
    score_funcs_with_changing_means = [
        normalized_mutual_info_score,
        adjusted_mutual_info_score,
    ]
    warning_msg = "The behavior of "
    args = [0, 0, 0], [0, 0, 0]
    for score_func in score_funcs_with_changing_means:
        assert_warns_message(FutureWarning, warning_msg, score_func, *args)
def test_spectral_embeding_import():
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)

    assert_warns_message(DeprecationWarning, "spectral_embedding is deprecated",
                         spectral_embedding, sims)
    assert_warns_message(DeprecationWarning, "SpectralEmbedding is deprecated",
                         SpectralEmbedding)
Example #29
0
def test_dataset_with_openml_warning(monkeypatch, gzip_response):
    data_id = 3
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    assert_warns_message(
        UserWarning,
        "OpenML raised a warning on the dataset. It might be unusable. "
        "Warning:",
        fetch_openml, data_id=data_id, cache=False
    )
Example #30
0
def test_dataset_with_openml_error(monkeypatch, gzip_response):
    data_id = 1
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    assert_warns_message(
        UserWarning,
        "OpenML registered a problem with the dataset. It might be unusable. "
        "Error:",
        fetch_openml, data_id=data_id, cache=False
    )
def test_select_kbest_zero():
    # Test whether k=0 correctly returns no features.
    X, y = make_classification(n_samples=20, n_features=10,
                               shuffle=False, random_state=0)

    univariate_filter = SelectKBest(f_classif, k=0)
    univariate_filter.fit(X, y)
    support = univariate_filter.get_support()
    gtruth = np.zeros(10, dtype=bool)
    assert_array_equal(support, gtruth)
    X_selected = assert_warns_message(UserWarning, 'No features were selected',
                                      univariate_filter.transform, X)
    assert X_selected.shape == (20, 0)
Example #32
0
def check_regressors_no_decision_function(name, Regressor):
    # checks whether regressors have decision_function or predict_proba
    rng = np.random.RandomState(0)
    X = rng.normal(size=(10, 4))
    y = multioutput_estimator_convert_y_2d(name, X[:, 0])
    regressor = Regressor()

    set_fast_parameters(regressor)
    if hasattr(regressor, "n_components"):
        # FIXME CCA, PLS is not robust to rank 1 effects
        regressor.n_components = 1

    regressor.fit(X, y)
    funcs = ["decision_function", "predict_proba", "predict_log_proba"]
    for func_name in funcs:
        func = getattr(regressor, func_name, None)
        if func is None:
            # doesn't have function
            continue
        # has function. Should raise deprecation warning
        msg = func_name
        assert_warns_message(DeprecationWarning, msg, func, X)
Example #33
0
def test_min_impurity_split():
    # Test if min_impurity_split of base estimators is set
    # Regression test for #8006
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    all_estimators = [RandomForestClassifier, RandomForestRegressor,
                      ExtraTreesClassifier, ExtraTreesRegressor]

    for Estimator in all_estimators:
        est = Estimator(min_impurity_split=0.1)
        est = assert_warns_message(DeprecationWarning, "min_impurity_decrease",
                                   est.fit, X, y)
        for tree in est.estimators_:
            assert_equal(tree.min_impurity_split, 0.1)
Example #34
0
def test_vectorizer_stop_words_inconsistent():
    lstr = "['and', 'll', 've']"
    message = ('Your stop_words may be inconsistent with your '
               'preprocessing. Tokenizing the stop words generated '
               'tokens %s not in stop_words.' % lstr)
    for vec in [CountVectorizer(),
                TfidfVectorizer(), HashingVectorizer()]:
        vec.set_params(stop_words=["you've", "you", "you'll", 'AND'])
        assert_warns_message(UserWarning, message, vec.fit_transform,
                             ['hello world'])
        # reset stop word validation
        del vec._stop_words_id
        assert _check_stop_words_consistency(vec) is False

    # Only one warning per stop list
    assert_no_warnings(vec.fit_transform, ['hello world'])
    assert _check_stop_words_consistency(vec) is None

    # Test caching of inconsistency assessment
    vec.set_params(stop_words=["you've", "you", "you'll", 'blah', 'AND'])
    assert_warns_message(UserWarning, message, vec.fit_transform,
                         ['hello world'])
Example #35
0
def test_candidates():
    # Checks whether candidates are sufficient.
    # This should handle the cases when number of candidates is 0.
    # User should be warned when number of candidates is less than
    # requested number of neighbors.
    X_train = np.array([[5, 5, 2], [21, 5, 5], [1, 1, 1], [8, 9, 1],
                        [6, 10, 2]], dtype=np.float32)
    X_test = np.array([7, 10, 3], dtype=np.float32).reshape(1, -1)

    # For zero candidates
    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
        min_hash_match=32)
    ignore_warnings(lshf.fit)(X_train)

    message = ("Number of candidates is not sufficient to retrieve"
               " %i neighbors with"
               " min_hash_match = %i. Candidates are filled up"
               " uniformly from unselected"
               " indices." % (3, 32))
    assert_warns_message(UserWarning, message, lshf.kneighbors,
                         X_test, n_neighbors=3)
    distances, neighbors = lshf.kneighbors(X_test, n_neighbors=3)
    assert_equal(distances.shape[1], 3)

    # For candidates less than n_neighbors
    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
        min_hash_match=31)
    ignore_warnings(lshf.fit)(X_train)

    message = ("Number of candidates is not sufficient to retrieve"
               " %i neighbors with"
               " min_hash_match = %i. Candidates are filled up"
               " uniformly from unselected"
               " indices." % (5, 31))
    assert_warns_message(UserWarning, message, lshf.kneighbors,
                         X_test, n_neighbors=5)
    distances, neighbors = lshf.kneighbors(X_test, n_neighbors=5)
    assert_equal(distances.shape[1], 5)
Example #36
0
def test_lda_dimension_warning(n_classes, n_features):
    rng = check_random_state(0)
    n_samples = 10
    X = rng.randn(n_samples, n_features)
    # we create n_classes labels by repeating and truncating a
    # range(n_classes) until n_samples
    y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
    max_components = min(n_features, n_classes - 1)

    for n_components in [max_components - 1, None, max_components]:
        # if n_components <= min(n_classes - 1, n_features), no warning
        lda = LinearDiscriminantAnalysis(n_components=n_components)
        assert_no_warnings(lda.fit, X, y)

    for n_components in [max_components + 1,
                         max(n_features, n_classes - 1) + 1]:
        # if n_components > min(n_classes - 1, n_features), raise warning
        lda = LinearDiscriminantAnalysis(n_components=n_components)
        msg = ("n_components cannot be superior to min(n_features, "
               "n_classes - 1). Using min(n_features, "
               "n_classes - 1) = min(%d, %d - 1) = %d components." %
               (n_features, n_classes, max_components))
        assert_warns_message(ChangedBehaviorWarning, msg, lda.fit, X, y)
    def test_deprecated_grid_search_iid(self):
        depr_message = ("The default of the `iid` parameter will change from True "
                        "to False in version 0.22")
        X, y = make_blobs(n_samples=54, random_state=0, centers=2)
        grid = GridSearchCV(SVC(gamma='scale', random_state=0),
                            param_grid={'C': [10]}, cv=3)
        # no warning with equally sized test sets
        assert_no_warnings(grid.fit, X, y)

        grid = GridSearchCV(SVC(gamma='scale', random_state=0),
                            param_grid={'C': [10]}, cv=5)
        # warning because 54 % 5 != 0
        assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y)

        grid = GridSearchCV(SVC(gamma='scale', random_state=0),
                            param_grid={'C': [10]}, cv=2)
        # warning because stratification into two classes and 27 % 2 != 0
        assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y)

        grid = GridSearchCV(SVC(gamma='scale', random_state=0),
                            param_grid={'C': [10]}, cv=KFold(2))
        # no warning because no stratification and 54 % 2 == 0
        assert_no_warnings(grid.fit, X, y)
Example #38
0
def test_non_negative_factorization_checking():
    A = np.ones((2, 2))
    # Test parameters checking is public function
    nnmf = non_negative_factorization
    msg = ("The default value of init will change from "
           "random to None in 0.23 to make it consistent "
           "with decomposition.NMF.")
    assert_warns_message(FutureWarning, msg, nnmf, A, A, A, np.int64(1))
    msg = ("Number of components must be a positive integer; "
           "got (n_components=1.5)")
    assert_raise_message(ValueError, msg, nnmf, A, A, A, 1.5, 'random')
    msg = ("Number of components must be a positive integer; "
           "got (n_components='2')")
    assert_raise_message(ValueError, msg, nnmf, A, A, A, '2', 'random')
    msg = "Negative values in data passed to NMF (input H)"
    assert_raise_message(ValueError, msg, nnmf, A, A, -A, 2, 'custom')
    msg = "Negative values in data passed to NMF (input W)"
    assert_raise_message(ValueError, msg, nnmf, A, -A, A, 2, 'custom')
    msg = "Array passed to NMF (input H) is full of zeros"
    assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom')
    msg = "Invalid regularization parameter: got 'spam' instead of one of"
    assert_raise_message(ValueError, msg, nnmf, A, A, 0 * A, 2, 'custom', True,
                         'cd', 2., 1e-4, 200, 0., 0., 'spam')
Example #39
0
def test_skope_rules_error():
    """Test that it gives proper exception on deficient input."""
    X = iris.data
    y = iris.target
    y = (y != 0)

    # Test max_samples
    assert_raises(ValueError, SkopeRules(max_samples=-1).fit, X, y)
    assert_raises(ValueError, SkopeRules(max_samples=0.0).fit, X, y)
    assert_raises(ValueError, SkopeRules(max_samples=2.0).fit, X, y)
    # explicitly setting max_samples > n_samples should result in a warning.
    assert_warns_message(
        UserWarning, "max_samples will be set to n_samples for estimation",
        SkopeRules(max_samples=1000).fit, X, y)
    assert_no_warnings(SkopeRules(max_samples=np.int64(2)).fit, X, y)
    assert_raises(ValueError, SkopeRules(max_samples='foobar').fit, X, y)
    assert_raises(ValueError, SkopeRules(max_samples=1.5).fit, X, y)
    assert_raises(ValueError, SkopeRules(max_depth_duplication=1.5).fit, X, y)
    assert_raises(ValueError, SkopeRules().fit(X, y).predict, X[:, 1:])
    assert_raises(ValueError,
                  SkopeRules().fit(X, y).decision_function, X[:, 1:])
    assert_raises(ValueError, SkopeRules().fit(X, y).rules_vote, X[:, 1:])
    assert_raises(ValueError, SkopeRules().fit(X, y).score_top_rules, X[:, 1:])
Example #40
0
def test_one_hot_encoder_deprecationwarnings():
    for X in [[[3, 2, 1], [0, 1, 1]],
              [[3., 2., 1.], [0., 1., 1.]]]:
        enc = OneHotEncoder()
        assert_warns_message(FutureWarning, "handling of integer",
                             enc.fit, X)
        enc = OneHotEncoder()
        assert_warns_message(FutureWarning, "handling of integer",
                             enc.fit_transform, X)

        # check it still works correctly as well
        with ignore_warnings(category=FutureWarning):
            X_trans = enc.fit_transform(X).toarray()
        res = [[0., 1., 0., 1., 1.],
               [1., 0., 1., 0., 1.]]
        assert_array_equal(X_trans, res)

        # check deprecated attributes
        assert_warns(DeprecationWarning, lambda: enc.active_features_)
        assert_warns(DeprecationWarning, lambda: enc.feature_indices_)
        assert_warns(DeprecationWarning, lambda: enc.n_values_)

        # check no warning is raised if keyword is specified
        enc = OneHotEncoder(categories='auto')
        assert_no_warnings(enc.fit, X)
        enc = OneHotEncoder(categories='auto')
        assert_no_warnings(enc.fit_transform, X)
        X_trans = enc.fit_transform(X).toarray()
        assert_array_equal(X_trans, res)

        # check there is also a warning if the default is passed
        enc = OneHotEncoder(n_values='auto', handle_unknown='ignore')
        assert_warns(DeprecationWarning, enc.fit, X)

    X = np.array([['cat1', 'cat2']], dtype=object).T
    enc = OneHotEncoder(categorical_features='all')
    assert_warns(DeprecationWarning, enc.fit, X)
def test_check_inverse():
    X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2))

    X_list = [X_dense, sparse.csr_matrix(X_dense), sparse.csc_matrix(X_dense)]

    for X in X_list:
        if sparse.issparse(X):
            accept_sparse = True
        else:
            accept_sparse = False
        trans = FunctionTransformer(func=np.sqrt,
                                    inverse_func=np.around,
                                    accept_sparse=accept_sparse,
                                    check_inverse=True)
        assert_warns_message(
            UserWarning, "The provided functions are not strictly"
            " inverse of each other. If you are sure you"
            " want to proceed regardless, set"
            " 'check_inverse=False'.", trans.fit, X)

        trans = FunctionTransformer(func=np.expm1,
                                    inverse_func=np.log1p,
                                    accept_sparse=accept_sparse,
                                    check_inverse=True)
        Xt = assert_no_warnings(trans.fit_transform, X)
        assert_allclose_dense_sparse(X, trans.inverse_transform(Xt))

    # check that we don't check inverse when one of the func or inverse is not
    # provided.
    trans = FunctionTransformer(func=np.expm1,
                                inverse_func=None,
                                check_inverse=True)
    assert_no_warnings(trans.fit, X_dense)
    trans = FunctionTransformer(func=None,
                                inverse_func=np.expm1,
                                check_inverse=True)
    assert_no_warnings(trans.fit, X_dense)
Example #42
0
def test_check_dataframe_warns_on_dtype():
    # Check that warn_on_dtype also works for DataFrames.
    # https://github.com/scikit-learn/scikit-learn/issues/10948
    pd = importorskip("pandas")

    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], dtype=object)
    assert_warns_message(DataConversionWarning,
                         "Data with input dtype object were all converted to "
                         "float64.",
                         check_array, df, dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df,
                 dtype='numeric', warn_on_dtype=True)
    with pytest.warns(None) as record:
        warnings.simplefilter("ignore", DeprecationWarning)  # 0.23
        check_array(df, dtype='object', warn_on_dtype=True)
    assert len(record) == 0

    # Also check that it raises a warning for mixed dtypes in a DataFrame.
    df_mixed = pd.DataFrame([['1', 2, 3], ['4', 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=np.float64, warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype='numeric', warn_on_dtype=True)
    assert_warns(DataConversionWarning, check_array, df_mixed,
                 dtype=object, warn_on_dtype=True)

    # Even with numerical dtypes, a conversion can be made because dtypes are
    # uniformized throughout the array.
    df_mixed_numeric = pd.DataFrame([[1., 2, 3], [4., 5, 6]])
    assert_warns(DataConversionWarning, check_array, df_mixed_numeric,
                 dtype='numeric', warn_on_dtype=True)
    with pytest.warns(None) as record:
        warnings.simplefilter("ignore", DeprecationWarning)  # 0.23
        check_array(df_mixed_numeric.astype(int),
                    dtype='numeric', warn_on_dtype=True)
    assert len(record) == 0
Example #43
0
def test_lmvnpdf_spherical():
    n_features, n_components, n_samples = 2, 3, 10

    mu = rng.randint(10) * rng.rand(n_components, n_features)
    spherecv = rng.rand(n_components, 1)**2 + 1
    X = rng.randint(10) * rng.rand(n_samples, n_features)

    cv = np.tile(spherecv, (n_features, 1))
    reference = _naive_lmvnpdf_diag(X, mu, cv)
    lpr = assert_warns_message(
        DeprecationWarning, "The function"
        " log_multivariate_normal_density is "
        "deprecated in 0.18 and will be removed in 0.20.",
        mixture.log_multivariate_normal_density, X, mu, spherecv, 'spherical')
    assert_array_almost_equal(lpr, reference)
Example #44
0
def test_iforest_error():
    """Test that it gives proper exception on deficient input."""
    X = iris.data

    # Test max_samples
    assert_raises(ValueError,
                  IsolationForest(max_samples=-1).fit, X)
    assert_raises(ValueError,
                  IsolationForest(max_samples=0.0).fit, X)
    assert_raises(ValueError,
                  IsolationForest(max_samples=2.0).fit, X)
    # The dataset has less than 256 samples, explicitly setting
    # max_samples > n_samples should result in a warning. If not set
    # explicitly there should be no warning
    assert_warns_message(UserWarning,
                         "max_samples will be set to n_samples for estimation",
                         IsolationForest(max_samples=1000).fit, X)
    # note that assert_no_warnings does not apply since it enables a
    # PendingDeprecationWarning triggered by scipy.sparse's use of
    # np.matrix. See issue #11251.
    with pytest.warns(None) as record:
        IsolationForest(max_samples='auto').fit(X)
    user_warnings = [each for each in record
                     if issubclass(each.category, UserWarning)]
    assert len(user_warnings) == 0
    with pytest.warns(None) as record:
        IsolationForest(max_samples=np.int64(2)).fit(X)
    user_warnings = [each for each in record
                     if issubclass(each.category, UserWarning)]
    assert len(user_warnings) == 0

    assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X)
    assert_raises(ValueError, IsolationForest(max_samples=1.5).fit, X)

    # test X_test n_features match X_train one:
    assert_raises(ValueError, IsolationForest().fit(X).predict, X[:, 1:])
Example #45
0
def test_warning_scaling_integers():
    # Check warning when scaling integer data
    X = np.array([[1, 2, 0], [0, 0, 0]], dtype=np.uint8)

    w = "Data with input dtype uint8 was converted to float64"

    clean_warning_registry()
    assert_warns_message(DataConversionWarning, w, scale, X)
    assert_warns_message(DataConversionWarning, w, StandardScaler().fit, X)
    assert_warns_message(DataConversionWarning, w, MinMaxScaler().fit, X)
Example #46
0
def test_warning_scaling_integers():
    """Check warning when scaling integer data"""
    X = np.array([[1, 2, 0], [0, 0, 0]], dtype=np.uint8)

    w = "assumes floating point values as input, got uint8"

    clean_warning_registry()
    assert_warns_message(UserWarning, w, scale, X)
    assert_warns_message(UserWarning, w, StandardScaler().fit, X)
    assert_warns_message(UserWarning, w, MinMaxScaler().fit, X)
Example #47
0
def test_lmvnpdf_full():
    n_features, n_components, n_samples = 2, 3, 10

    mu = rng.randint(10) * rng.rand(n_components, n_features)
    cv = (rng.rand(n_components, n_features) + 1.0)**2
    X = rng.randint(10) * rng.rand(n_samples, n_features)

    fullcv = np.array([np.diag(x) for x in cv])

    reference = _naive_lmvnpdf_diag(X, mu, cv)
    lpr = assert_warns_message(
        DeprecationWarning, "The function"
        " log_multivariate_normal_density is "
        "deprecated in 0.18 and will be removed in 0.20.",
        mixture.log_multivariate_normal_density, X, mu, fullcv, 'full')
    assert_array_almost_equal(lpr, reference)
Example #48
0
def test_lmvnpdf_diag():
    # test a slow and naive implementation of lmvnpdf and
    # compare it to the vectorized version (mixture.lmvnpdf) to test
    # for correctness
    n_features, n_components, n_samples = 2, 3, 10
    mu = rng.randint(10) * rng.rand(n_components, n_features)
    cv = (rng.rand(n_components, n_features) + 1.0)**2
    X = rng.randint(10) * rng.rand(n_samples, n_features)

    ref = _naive_lmvnpdf_diag(X, mu, cv)
    lpr = assert_warns_message(
        DeprecationWarning, "The function"
        " log_multivariate_normal_density is "
        "deprecated in 0.18 and will be removed in 0.20.",
        mixture.log_multivariate_normal_density, X, mu, cv, 'diag')
    assert_array_almost_equal(lpr, ref)
Example #49
0
def test_gene_expression_filter_warning():
    X = data.load_10X(sparse=True)
    genes = np.arange(10)
    gene_outside_range = 100
    no_genes = 'not_a_gene'
    assert_warns_message(UserWarning,
                         "`percentile` expects values between 0 and 100."
                         "Got 0.9. Did you mean 90.0?",
                         scprep.filter.filter_gene_set_expression,
                         X,
                         genes,
                         percentile=0.90,
                         keep_cells='below')
    assert_raise_message(
        ValueError,
        "Only one of `cutoff` and `percentile` should be given.",
        scprep.filter.filter_gene_set_expression,
        X,
        genes,
        percentile=0.90,
        cutoff=50)
    assert_raise_message(ValueError,
                         "Expected `keep_cells` in ['above', 'below']. "
                         "Got neither",
                         scprep.filter.filter_gene_set_expression,
                         X,
                         genes,
                         percentile=90.0,
                         keep_cells='neither')
    assert_warns_message(UserWarning,
                         "`percentile` expects values between 0 and 100."
                         "Got 0.9. Did you mean 90.0?",
                         scprep.filter.filter_gene_set_expression,
                         X,
                         genes,
                         percentile=0.90,
                         keep_cells='below')
    assert_raise_message(
        ValueError,
        "One of either `cutoff` or `percentile` must be given.",
        scprep.filter.filter_gene_set_expression,
        X,
        genes,
        cutoff=None,
        percentile=None)
    assert_raise_message(KeyError,
                         "the label [not_a_gene] is not in the [columns]",
                         scprep.filter.filter_gene_set_expression,
                         X,
                         no_genes,
                         percentile=90.0,
                         keep_cells='below')
    assert_warns_message(UserWarning, "Selecting 0 columns",
                         scprep.utils.select_cols, X, (X.sum(axis=0) < 0))
Example #50
0
 def test_deprecated_remove(self):
     assert_warns_message(
         DeprecationWarning,
         "`scprep.filter.remove_empty_genes` is deprecated. Use "
         "`scprep.filter.filter_empty_genes` instead.",
         scprep.filter.remove_empty_genes, self.X_dense)
     assert_warns_message(
         DeprecationWarning,
         "`scprep.filter.remove_rare_genes` is deprecated. Use "
         "`scprep.filter.filter_rare_genes` instead.",
         scprep.filter.remove_rare_genes, self.X_dense)
     assert_warns_message(
         DeprecationWarning,
         "`scprep.filter.remove_empty_cells` is deprecated. Use "
         "`scprep.filter.filter_empty_cells` instead.",
         scprep.filter.remove_empty_cells, self.X_dense)
     assert_warns_message(
         DeprecationWarning,
         "`scprep.filter.remove_duplicates` is deprecated. Use "
         "`scprep.filter.filter_duplicates` instead.",
         scprep.filter.remove_duplicates, self.X_dense)
Example #51
0
def test_no_feature_selected():
    rng = np.random.RandomState(0)

    # Generate random uncorrelated data: a strict univariate test should
    # rejects all the features
    X = rng.rand(40, 10)
    y = rng.randint(0, 4, size=40)
    strict_selectors = [
        SelectFwe(alpha=0.01).fit(X, y),
        SelectFdr(alpha=0.01).fit(X, y),
        SelectFpr(alpha=0.01).fit(X, y),
        SelectPercentile(percentile=0).fit(X, y),
        SelectKBest(k=0).fit(X, y),
    ]
    for selector in strict_selectors:
        assert_array_equal(selector.get_support(), np.zeros(10))
        X_selected = assert_warns_message(
            UserWarning, 'No features were selected', selector.transform, X)
        assert_equal(X_selected.shape, (40, 0))
    def test_return_train_score_warn(self):
        from sklearn.utils.testing import ignore_warnings
        # Test that warnings are raised. Will be removed in 0.21

        X = np.arange(100).reshape(10, 10)
        y = np.array([0] * 5 + [1] * 5)
        grid = {'C': [1, 2]}

        estimators = [
            GridSearchCV(LinearSVC(random_state=0), grid, iid=False, cv=3),
            RandomizedSearchCV(LinearSVC(random_state=0),
                               grid,
                               n_iter=2,
                               iid=False,
                               cv=3)
        ]

        result = {}
        for estimator in estimators:
            for val in [True, False, 'warn']:
                estimator.set_params(return_train_score=val)
                fit_func = ignore_warnings(estimator.fit,
                                           category=ConvergenceWarning)
                result[val] = assert_no_warnings(fit_func, X, y).cv_results_

        train_keys = [
            'split0_train_score', 'split1_train_score', 'split2_train_score',
            'mean_train_score', 'std_train_score'
        ]
        for key in train_keys:
            msg = ('You are accessing a training score ({!r}), '
                   'which will not be available by default '
                   'any more in 0.21. If you need training scores, '
                   'please set return_train_score=True').format(key)
            train_score = assert_warns_message(FutureWarning, msg,
                                               result['warn'].get, key)
            assert np.allclose(train_score, result[True][key])
            assert key not in result[False]

        for key in result['warn']:
            if key not in train_keys:
                assert_no_warnings(result['warn'].get, key)
Example #53
0
def test_warn_ignore_attribute(monkeypatch, gzip_response):
    data_id = 40966
    expected_row_id_msg = "target_column={} has flag is_row_identifier."
    expected_ignore_msg = "target_column={} has flag is_ignore."
    _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
    # single column test
    assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'),
                         fetch_openml, data_id=data_id,
                         target_column='MouseID',
                         cache=False)
    assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'),
                         fetch_openml, data_id=data_id,
                         target_column='Genotype',
                         cache=False)
    # multi column test
    assert_warns_message(UserWarning, expected_row_id_msg.format('MouseID'),
                         fetch_openml, data_id=data_id,
                         target_column=['MouseID', 'class'],
                         cache=False)
    assert_warns_message(UserWarning, expected_ignore_msg.format('Genotype'),
                         fetch_openml, data_id=data_id,
                         target_column=['Genotype', 'class'],
                         cache=False)
Example #54
0
def test_unicode_decode_error():
    # decode_error default to strict, so this should fail
    # First, encode (as bytes) a unicode string.
    text = "J'ai mang\xe9 du kangourou  ce midi, c'\xe9tait pas tr\xeas bon."
    text_bytes = text.encode('utf-8')

    # Then let the Analyzer try to decode it as ascii. It should fail,
    # because we have given it an incorrect encoding.
    wa = CountVectorizer(ngram_range=(1, 2), encoding='ascii').build_analyzer()
    assert_raises(UnicodeDecodeError, wa, text_bytes)

    ca = CountVectorizer(analyzer='char', ngram_range=(3, 6),
                         encoding='ascii').build_analyzer()
    assert_raises(UnicodeDecodeError, ca, text_bytes)

    # Check the old interface
    in_warning_message = 'charset'
    ca = assert_warns_message(DeprecationWarning, in_warning_message,
                              CountVectorizer, analyzer='char',
                              ngram_range=(3, 6),
                              charset='ascii').build_analyzer()
    assert_raises(UnicodeDecodeError, ca, text_bytes)
def test_deprecation():
    X = [[0.0], [1.0]]
    clf = IsolationForest()

    assert_warns_message(
        FutureWarning, 'default contamination parameter 0.1 will change '
        'in version 0.22 to "auto"', clf.fit, X)

    assert_warns_message(
        FutureWarning, 'behaviour="old" is deprecated and will be removed '
        'in version 0.22', clf.fit, X)

    clf = IsolationForest().fit(X)
    assert_warns_message(
        DeprecationWarning,
        "threshold_ attribute is deprecated in 0.20 and will"
        " be removed in 0.22.", getattr, clf, "threshold_")
Example #56
0
def test_tfidf_no_smoothing():
    X = [[1, 1, 1], [1, 1, 0], [1, 0, 0]]
    tr = TfidfTransformer(smooth_idf=False, norm='l2')
    tfidf = tr.fit_transform(X).toarray()
    assert_true((tfidf >= 0).all())

    # check normalization
    assert_array_almost_equal((tfidf**2).sum(axis=1), [1., 1., 1.])

    # the lack of smoothing make IDF fragile in the presence of feature with
    # only zeros
    X = [[1, 1, 0], [1, 1, 0], [1, 0, 0]]
    tr = TfidfTransformer(smooth_idf=False, norm='l2')

    with warnings.catch_warnings(record=True) as w:
        1. / np.array([0.])
        numpy_provides_div0_warning = len(w) == 1

    in_warning_message = 'divide by zero'
    tfidf = assert_warns_message(RuntimeWarning, in_warning_message,
                                 tr.fit_transform, X).toarray()
    if not numpy_provides_div0_warning:
        raise SkipTest("Numpy does not provide div 0 warnings.")
def test_radius_neighbors_regressor(n_samples=40,
                                    n_features=3,
                                    n_test_pts=10,
                                    radius=0.5,
                                    random_state=0):
    # Test radius-based neighbors regression
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = np.sqrt((X**2).sum(1))
    y /= y.max()

    y_target = y[:n_test_pts]

    weight_func = _weight_func

    for algorithm in ALGORITHMS:
        for weights in ['uniform', 'distance', weight_func]:
            neigh = neighbors.RadiusNeighborsRegressor(radius=radius,
                                                       weights=weights,
                                                       algorithm=algorithm)
            neigh.fit(X, y)
            epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
            y_pred = neigh.predict(X[:n_test_pts] + epsilon)
            assert_true(np.all(abs(y_pred - y_target) < radius / 2))

    # test that nan is returned when no nearby observations
    for weights in ['uniform', 'distance']:
        neigh = neighbors.RadiusNeighborsRegressor(radius=radius,
                                                   weights=weights,
                                                   algorithm='auto')
        neigh.fit(X, y)
        X_test_nan = np.ones((1, n_features)) * -1
        empty_warning_msg = ("One or more samples have no neighbors "
                             "within specified radius; predicting NaN.")
        pred = assert_warns_message(UserWarning, empty_warning_msg,
                                    neigh.predict, X_test_nan)
        assert_true(np.all(np.isnan(pred)))
Example #58
0
def test_kfold_valueerrors():
    # Check that errors are raised if there is not enough samples
    assert_raises(ValueError, cval.KFold, 3, 4)

    # Check that a warning is raised if the least populated class has too few
    # members.
    y = [3, 3, -1, -1, 3]

    cv = assert_warns_message(Warning, "The least populated class",
                              cval.StratifiedKFold, y, 3)

    # Check that despite the warning the folds are still computed even
    # though all the classes are not necessarily represented at on each
    # side of the split at each split
    check_cv_coverage(cv, expected_n_iter=3, n_samples=len(y))

    # Check that errors are raised if all n_labels for individual
    # classes are less than n_folds.
    y = [3, 3, -1, -1, 2]

    assert_raises(ValueError, cval.StratifiedKFold, y, 3)

    # Error when number of folds is <= 1
    assert_raises(ValueError, cval.KFold, 2, 0)
    assert_raises(ValueError, cval.KFold, 2, 1)
    error_string = ("k-fold cross validation requires at least one"
                    " train / test split")
    assert_raise_message(ValueError, error_string, cval.StratifiedKFold, y, 0)
    assert_raise_message(ValueError, error_string, cval.StratifiedKFold, y, 1)

    # When n is not integer:
    assert_raises(ValueError, cval.KFold, 2.5, 2)

    # When n_folds is not integer:
    assert_raises(ValueError, cval.KFold, 5, 1.5)
    assert_raises(ValueError, cval.StratifiedKFold, y, 1.5)
Example #59
0
 def test_generate_colorbar_mappable(self):
     im = plt.imshow([np.arange(10), np.arange(10)])
     scprep.plot.tools.generate_colorbar(mappable=im)
     assert_warns_message(
         UserWarning,
         "Cannot set `vmin` or `vmax` when `mappable` is given.",
         scprep.plot.tools.generate_colorbar,
         mappable=im,
         vmin=10,
         vmax=20)
     assert_warns_message(UserWarning,
                          "Cannot set `cmap` when `mappable` is given.",
                          scprep.plot.tools.generate_colorbar,
                          mappable=im,
                          cmap='inferno')
     assert_warns_message(UserWarning,
                          "Cannot set `scale` when `mappable` is given.",
                          scprep.plot.tools.generate_colorbar,
                          mappable=im,
                          scale='log')
Example #60
0
 def test_scatter_invalid_legend(self):
     assert_warns_message(
         UserWarning, "`c` is a color array and cannot be used to create a "
         "legend. To interpret these values as labels instead, "
         "provide a `cmap` dictionary with label-color pairs.",
         scprep.plot.scatter2d,
         self.X_pca,
         legend=True,
         c=np.random.choice(['red', 'blue'],
                            self.X_pca.shape[0],
                            replace=True))
     assert_warns_message(UserWarning,
                          "Cannot create a legend with constant `c=red`",
                          scprep.plot.scatter2d,
                          self.X_pca,
                          legend=True,
                          c='red')
     assert_warns_message(UserWarning,
                          "Cannot create a legend with constant `c=None`",
                          scprep.plot.scatter2d,
                          self.X_pca,
                          legend=True,
                          c=None)