Beispiel #1
0
def check_scoring_validator_for_single_metric_usecases(scoring_validator):
    # Test all branches of single metric usecases
    estimator = EstimatorWithoutFit()
    pattern = (r"estimator should be an estimator implementing 'fit' method,"
               r" .* was passed")
    assert_raises_regexp(TypeError, pattern, scoring_validator, estimator)

    estimator = EstimatorWithFitAndScore()
    estimator.fit([[1]], [1])
    scorer = scoring_validator(estimator)
    assert scorer is _passthrough_scorer
    assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0)

    estimator = EstimatorWithFitAndPredict()
    estimator.fit([[1]], [1])
    pattern = (r"If no scoring is specified, the estimator passed should have"
               r" a 'score' method\. The estimator .* does not\.")
    assert_raises_regexp(TypeError, pattern, scoring_validator, estimator)

    scorer = scoring_validator(estimator, "accuracy")
    assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0)

    estimator = EstimatorWithFit()
    scorer = scoring_validator(estimator, "accuracy")
    assert isinstance(scorer, _PredictScorer)

    # Test the allow_none parameter for check_scoring alone
    if scoring_validator is check_scoring:
        estimator = EstimatorWithFit()
        scorer = scoring_validator(estimator, allow_none=True)
        assert scorer is None
Beispiel #2
0
def test_pairwise_precomputed_non_negative():
    # Test non-negative values
    assert_raises_regexp(ValueError,
                         '.* non-negative values.*',
                         pairwise_distances,
                         np.full((5, 5), -1),
                         metric='precomputed')
Beispiel #3
0
def test_base_estimator():
    # Test different base estimators.
    from mrex.ensemble import RandomForestClassifier

    # XXX doesn't work with y_class because RF doesn't support classes_
    # Shouldn't AdaBoost run a LabelBinarizer?
    clf = AdaBoostClassifier(RandomForestClassifier())
    clf.fit(X, y_regr)

    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
    clf.fit(X, y_class)

    from mrex.ensemble import RandomForestRegressor

    clf = AdaBoostRegressor(RandomForestRegressor(), random_state=0)
    clf.fit(X, y_regr)

    clf = AdaBoostRegressor(SVR(), random_state=0)
    clf.fit(X, y_regr)

    # Check that an empty discrete ensemble fails in fit, not predict.
    X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]]
    y_fail = ["foo", "bar", 1, 2]
    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
    assert_raises_regexp(ValueError, "worse than random", clf.fit, X_fail,
                         y_fail)
Beispiel #4
0
def test_check_scoring_and_check_multimetric_scoring():
    check_scoring_validator_for_single_metric_usecases(check_scoring)
    # To make sure the check_scoring is correctly applied to the constituent
    # scorers
    check_scoring_validator_for_single_metric_usecases(
        check_multimetric_scoring_single_metric_wrapper)

    # For multiple metric use cases
    # Make sure it works for the valid cases
    for scoring in (('accuracy', ), ['precision'], {
            'acc': 'accuracy',
            'precision': 'precision'
    }, ('accuracy', 'precision'), ['precision', 'accuracy'], {
            'accuracy': make_scorer(accuracy_score),
            'precision': make_scorer(precision_score)
    }):
        estimator = LinearSVC(random_state=0)
        estimator.fit([[1], [2], [3]], [1, 1, 0])

        scorers, is_multi = _check_multimetric_scoring(estimator, scoring)
        assert is_multi
        assert isinstance(scorers, dict)
        assert sorted(scorers.keys()) == sorted(list(scoring))
        assert all([
            isinstance(scorer, _PredictScorer)
            for scorer in list(scorers.values())
        ])

        if 'acc' in scoring:
            assert_almost_equal(
                scorers['acc'](estimator, [[1], [2], [3]], [1, 0, 0]), 2. / 3.)
        if 'accuracy' in scoring:
            assert_almost_equal(
                scorers['accuracy'](estimator, [[1], [2], [3]], [1, 0, 0]),
                2. / 3.)
        if 'precision' in scoring:
            assert_almost_equal(
                scorers['precision'](estimator, [[1], [2], [3]], [1, 0, 0]),
                0.5)

    estimator = EstimatorWithFitAndPredict()
    estimator.fit([[1]], [1])

    # Make sure it raises errors when scoring parameter is not valid.
    # More weird corner cases are tested at test_validation.py
    error_message_regexp = ".*must be unique strings.*"
    for scoring in (
        (
            make_scorer(precision_score),  # Tuple of callables
            make_scorer(accuracy_score)),
        [5],
        (make_scorer(precision_score), ),
        (),
        ('f1', 'f1')):
        assert_raises_regexp(ValueError,
                             error_message_regexp,
                             _check_multimetric_scoring,
                             estimator,
                             scoring=scoring)
Beispiel #5
0
def test_pairwise_distances_chunked_reduce_invalid(bad_reduce, err_type,
                                                   message):
    X = np.arange(10).reshape(-1, 1)
    S_chunks = pairwise_distances_chunked(X,
                                          None,
                                          reduce_func=bad_reduce,
                                          working_memory=64)
    assert_raises_regexp(err_type, message, next, S_chunks)
Beispiel #6
0
def test_ovr_partial_fit_exceptions():
    ovr = OneVsRestClassifier(MultinomialNB())
    X = np.abs(np.random.randn(14, 2))
    y = [1, 1, 1, 1, 2, 3, 3, 0, 0, 2, 3, 1, 2, 3]
    ovr.partial_fit(X[:7], y[:7], np.unique(y))
    # A new class value which was not in the first call of partial_fit
    # It should raise ValueError
    y1 = [5] + y[7:-1]
    assert_raises_regexp(ValueError, r"Mini-batch contains \[.+\] while "
                         r"classes must be subset of \[.+\]",
                         ovr.partial_fit,
                         X=X[7:],
                         y=y1)
Beispiel #7
0
def test_ransac_no_valid_model():
    def is_model_valid(estimator, X, y):
        return False

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       is_model_valid=is_model_valid,
                                       max_trials=5)

    msg = ("RANSAC could not find a valid consensus set")
    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
    assert ransac_estimator.n_skips_no_inliers_ == 0
    assert ransac_estimator.n_skips_invalid_data_ == 0
    assert ransac_estimator.n_skips_invalid_model_ == 5
Beispiel #8
0
def test_ovo_partial_fit_predict():
    temp = datasets.load_iris()
    X, y = temp.data, temp.target
    ovo1 = OneVsOneClassifier(MultinomialNB())
    ovo1.partial_fit(X[:100], y[:100], np.unique(y))
    ovo1.partial_fit(X[100:], y[100:])
    pred1 = ovo1.predict(X)

    ovo2 = OneVsOneClassifier(MultinomialNB())
    ovo2.fit(X, y)
    pred2 = ovo2.predict(X)
    assert len(ovo1.estimators_) == n_classes * (n_classes - 1) / 2
    assert np.mean(y == pred1) > 0.65
    assert_almost_equal(pred1, pred2)

    # Test when mini-batches have binary target classes
    ovo1 = OneVsOneClassifier(MultinomialNB())
    ovo1.partial_fit(X[:60], y[:60], np.unique(y))
    ovo1.partial_fit(X[60:], y[60:])
    pred1 = ovo1.predict(X)
    ovo2 = OneVsOneClassifier(MultinomialNB())
    pred2 = ovo2.fit(X, y).predict(X)

    assert_almost_equal(pred1, pred2)
    assert len(ovo1.estimators_) == len(np.unique(y))
    assert np.mean(y == pred1) > 0.65

    ovo = OneVsOneClassifier(MultinomialNB())
    X = np.random.rand(14, 2)
    y = [1, 1, 2, 3, 3, 0, 0, 4, 4, 4, 4, 4, 2, 2]
    ovo.partial_fit(X[:7], y[:7], [0, 1, 2, 3, 4])
    ovo.partial_fit(X[7:], y[7:])
    pred = ovo.predict(X)
    ovo2 = OneVsOneClassifier(MultinomialNB())
    pred2 = ovo2.fit(X, y).predict(X)
    assert_almost_equal(pred, pred2)

    # raises error when mini-batch does not have classes from all_classes
    ovo = OneVsOneClassifier(MultinomialNB())
    error_y = [0, 1, 2, 3, 4, 5, 2]
    message_re = escape("Mini-batch contains {0} while "
                        "it must be subset of {1}".format(
                            np.unique(error_y), np.unique(y)))
    assert_raises_regexp(ValueError, message_re, ovo.partial_fit, X[:7],
                         error_y, np.unique(y))

    # test partial_fit only exists if estimator has it:
    ovr = OneVsOneClassifier(SVC())
    assert not hasattr(ovr, "partial_fit")
Beispiel #9
0
def test_ransac_exceed_max_skips():
    def is_data_valid(X, y):
        return False

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       is_data_valid=is_data_valid,
                                       max_trials=5,
                                       max_skips=3)

    msg = ("RANSAC skipped more iterations than `max_skips`")
    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
    assert ransac_estimator.n_skips_no_inliers_ == 0
    assert ransac_estimator.n_skips_invalid_data_ == 4
    assert ransac_estimator.n_skips_invalid_model_ == 0
Beispiel #10
0
def test_ransac_resid_thresh_no_inliers():
    # When residual_threshold=0.0 there are no inliers and a
    # ValueError with a message should be raised
    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=0.0,
                                       random_state=0,
                                       max_trials=5)

    msg = ("RANSAC could not find a valid consensus set")
    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
    assert ransac_estimator.n_skips_no_inliers_ == 5
    assert ransac_estimator.n_skips_invalid_data_ == 0
    assert ransac_estimator.n_skips_invalid_model_ == 0
Beispiel #11
0
def test_scoring_is_not_metric():
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                         LogisticRegression(), f1_score)
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                         LogisticRegression(), roc_auc_score)
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring, Ridge(),
                         r2_score)
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring, KMeans(),
                         cluster_module.adjusted_rand_score)
Beispiel #12
0
def test_correct_labelsize():
    # Assert 1 < n_labels < n_samples
    dataset = datasets.load_iris()
    X = dataset.data

    # n_labels = n_samples
    y = np.arange(X.shape[0])
    assert_raises_regexp(
        ValueError, r'Number of labels is %d\. Valid values are 2 '
        r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
        silhouette_score, X, y)

    # n_labels = 1
    y = np.zeros(X.shape[0])
    assert_raises_regexp(
        ValueError, r'Number of labels is %d\. Valid values are 2 '
        r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
        silhouette_score, X, y)
Beispiel #13
0
def test_pairwise_precomputed(func):
    # Test correct shape
    assert_raises_regexp(ValueError,
                         '.* shape .*',
                         func,
                         np.zeros((5, 3)),
                         metric='precomputed')
    # with two args
    assert_raises_regexp(ValueError,
                         '.* shape .*',
                         func,
                         np.zeros((5, 3)),
                         np.zeros((4, 4)),
                         metric='precomputed')
    # even if shape[1] agrees (although thus second arg is spurious)
    assert_raises_regexp(ValueError,
                         '.* shape .*',
                         func,
                         np.zeros((5, 3)),
                         np.zeros((4, 3)),
                         metric='precomputed')

    # Test not copied (if appropriate dtype)
    S = np.zeros((5, 5))
    S2 = func(S, metric="precomputed")
    assert S is S2
    # with two args
    S = np.zeros((5, 3))
    S2 = func(S, np.zeros((3, 3)), metric="precomputed")
    assert S is S2

    # Test always returns float dtype
    S = func(np.array([[1]], dtype='int'), metric='precomputed')
    assert 'f' == S.dtype.kind

    # Test converts list to array-like
    S = func([[1.]], metric='precomputed')
    assert isinstance(S, np.ndarray)