Ejemplo n.º 1
0
def test_deprecated():
    # Test whether the deprecated decorator issues appropriate warnings
    # Copied almost verbatim from http://docs.python.org/library/warnings.html

    # First a function...
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")

        @deprecated()
        def ham():
            return "spam"

        spam = ham()

        assert_equal(spam, "spam")  # function must remain usable

        assert_equal(len(w), 1)
        assert_true(issubclass(w[0].category, DeprecationWarning))
        assert_true("deprecated" in str(w[0].message).lower())

    # ... then a class.
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")

        @deprecated("don't use this")
        class Ham(object):
            SPAM = 1

        ham = Ham()

        assert_true(hasattr(ham, "SPAM"))

        assert_equal(len(w), 1)
        assert_true(issubclass(w[0].category, DeprecationWarning))
        assert_true("deprecated" in str(w[0].message).lower())
Ejemplo n.º 2
0
def test_randomized_svd_sign_flip_with_transpose():
    # Check if the randomized_svd sign flipping is always done based on u
    # irrespective of transpose.
    # See https://github.com/scikit-learn/scikit-learn/issues/5608
    # for more details.
    def max_loading_is_positive(u, v):
        """
        returns bool tuple indicating if the values maximising np.abs
        are positive across all rows for u and across all columns for v.
        """
        u_based = (np.abs(u).max(axis=0) == u.max(axis=0)).all()
        v_based = (np.abs(v).max(axis=1) == v.max(axis=1)).all()
        return u_based, v_based

    mat = np.arange(10 * 8).reshape(10, -1)

    # Without transpose
    u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True)
    u_based, v_based = max_loading_is_positive(u_flipped, v_flipped)
    assert_true(u_based)
    assert_false(v_based)

    # With transpose
    u_flipped_with_transpose, _, v_flipped_with_transpose = randomized_svd(
        mat, 3, flip_sign=True, transpose=True)
    u_based, v_based = max_loading_is_positive(u_flipped_with_transpose,
                                               v_flipped_with_transpose)
    assert_true(u_based)
    assert_false(v_based)
Ejemplo n.º 3
0
def test_uniform_weights():
    # with uniform weights, results should be identical to stats.mode
    rng = np.random.RandomState(0)
    x = rng.randint(10, size=(10, 5))
    weights = np.ones(x.shape)

    for axis in (None, 0, 1):
        mode, score = stats.mode(x, axis)
        mode2, score2 = weighted_mode(x, weights, axis)

        assert_true(np.all(mode == mode2))
        assert_true(np.all(score == score2))
Ejemplo n.º 4
0
def test_resample():
    # Border case not worth mentioning in doctests
    assert_true(resample() is None)

    # Check that invalid arguments yield ValueError
    assert_raises(ValueError, resample, [0], [0, 1])
    assert_raises(ValueError,
                  resample, [0, 1], [0, 1],
                  replace=False,
                  n_samples=3)
    assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42)
    # Issue:6581, n_samples can be more when replace is True (default).
    assert_equal(len(resample([1, 2], n_samples=5)), 5)
Ejemplo n.º 5
0
def test_compute_class_weight():
    # Test (and demo) compute_class_weight.
    y = np.asarray([2, 2, 2, 3, 3, 4])
    classes = np.unique(y)
    cw = assert_warns(DeprecationWarning, compute_class_weight, "auto",
                      classes, y)
    assert_almost_equal(cw.sum(), classes.shape)
    assert_true(cw[0] < cw[1] < cw[2])

    cw = compute_class_weight("balanced", classes, y)
    # total effect of samples is preserved
    class_counts = np.bincount(y)[2:]
    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
    assert_true(cw[0] < cw[1] < cw[2])
Ejemplo n.º 6
0
def test_make_rng():
    # Check the check_random_state utility function behavior
    assert_true(check_random_state(None) is np.random.mtrand._rand)
    assert_true(check_random_state(np.random) is np.random.mtrand._rand)

    rng_42 = np.random.RandomState(42)
    assert_true(check_random_state(42).randint(100) == rng_42.randint(100))

    rng_42 = np.random.RandomState(42)
    assert_true(check_random_state(rng_42) is rng_42)

    rng_42 = np.random.RandomState(42)
    assert_true(check_random_state(43).randint(100) != rng_42.randint(100))

    assert_raises(ValueError, check_random_state, "some invalid seed")
Ejemplo n.º 7
0
def test_max_feature_regression():
    # Test to make sure random state is set properly.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)

    X_train, X_test = X[:2000], X[2000:]
    y_train, y_test = y[:2000], y[2000:]

    gbrt = GradientBoostingClassifier(n_estimators=100,
                                      min_samples_split=5,
                                      max_depth=2,
                                      learning_rate=.1,
                                      max_features=2,
                                      random_state=1)
    gbrt.fit(X_train, y_train)
    deviance = gbrt.loss_(y_test, gbrt.decision_function(X_test))
    assert_true(deviance < 0.5, "GB failed with deviance %.4f" % deviance)
Ejemplo n.º 8
0
def check_sample_int(sample_without_replacement):
    # This test is heavily inspired from test_random.py of python-core.
    #
    # For the entire allowable range of 0 <= k <= N, validate that
    # the sample is of the correct length and contains only unique items
    n_population = 100

    for n_samples in range(n_population + 1):
        s = sample_without_replacement(n_population, n_samples)
        assert_equal(len(s), n_samples)
        unique = np.unique(s)
        assert_equal(np.size(unique), n_samples)
        assert_true(np.all(unique < n_population))

    # test edge case n_population == n_samples == 0
    assert_equal(np.size(sample_without_replacement(0, 0)), 0)
Ejemplo n.º 9
0
def test_clone():
    # Tests that clone creates a correct deep copy.
    # We create an estimator, make a copy of its original state
    # (which, in this case, is the current state of the estimator),
    # and check that the obtained copy is a correct deep copy.

    from uplift.feature_selection import SelectFpr, f_classif

    selector = SelectFpr(f_classif, alpha=0.1)
    new_selector = clone(selector)
    assert_true(selector is not new_selector)
    assert_equal(selector.get_params(), new_selector.get_params())

    selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))
    new_selector = clone(selector)
    assert_true(selector is not new_selector)
Ejemplo n.º 10
0
def test_probability_log():
    # Predict probabilities.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    assert_raises(ValueError, clf.predict_proba, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # check if probabilities are in [0, 1].
    y_proba = clf.predict_proba(T)
    assert_true(np.all(y_proba >= 0.0))
    assert_true(np.all(y_proba <= 1.0))

    # derive predictions from probabilities
    y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
    assert_array_equal(y_pred, true_result)
Ejemplo n.º 11
0
def check_classification_toy(presort, loss):
    # Check classification on a toy dataset.
    clf = GradientBoostingClassifier(loss=loss,
                                     n_estimators=10,
                                     random_state=1,
                                     presort=presort)

    assert_raises(ValueError, clf.predict, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(10, len(clf.estimators_))

    deviance_decrease = (clf.train_score_[:-1] - clf.train_score_[1:])
    assert_true(np.any(deviance_decrease >= 0.0))

    leaves = clf.apply(X)
    assert_equal(leaves.shape, (6, 10, 1))
Ejemplo n.º 12
0
def test_staged_functions_defensive():
    # test that staged_functions make defensive copies
    rng = np.random.RandomState(0)
    X = rng.uniform(size=(10, 3))
    y = (4 * X[:, 0]).astype(np.int) + 1  # don't predict zeros
    for estimator in [
            GradientBoostingRegressor(),
            GradientBoostingClassifier()
    ]:
        estimator.fit(X, y)
        for func in ['predict', 'decision_function', 'predict_proba']:
            staged_func = getattr(estimator, "staged_" + func, None)
            if staged_func is None:
                # regressor has no staged_predict_proba
                continue
            with warnings.catch_warnings(record=True):
                staged_result = list(staged_func(X))
            staged_result[1][:] = 0
            assert_true(np.all(staged_result[0] != 0))
Ejemplo n.º 13
0
def check_warm_start_oob(name):
    # Test that the warm start computes oob score when asked.
    X, y = hastie_X, hastie_y
    ForestEstimator = FOREST_ESTIMATORS[name]
    # Use 15 estimators to avoid 'some inputs do not have OOB scores' warning.
    clf = ForestEstimator(n_estimators=15,
                          max_depth=3,
                          warm_start=False,
                          random_state=1,
                          bootstrap=True,
                          oob_score=True)
    clf.fit(X, y)

    clf_2 = ForestEstimator(n_estimators=5,
                            max_depth=3,
                            warm_start=False,
                            random_state=1,
                            bootstrap=True,
                            oob_score=False)
    clf_2.fit(X, y)

    clf_2.set_params(warm_start=True, oob_score=True, n_estimators=15)
    clf_2.fit(X, y)

    assert_true(hasattr(clf_2, 'oob_score_'))
    assert_equal(clf.oob_score_, clf_2.oob_score_)

    # Test that oob_score is computed even if we don't need to train
    # additional trees.
    clf_3 = ForestEstimator(n_estimators=15,
                            max_depth=3,
                            warm_start=True,
                            random_state=1,
                            bootstrap=True,
                            oob_score=False)
    clf_3.fit(X, y)
    assert_true(not (hasattr(clf_3, 'oob_score_')))

    clf_3.set_params(oob_score=True)
    ignore_warnings(clf_3.fit)(X, y)

    assert_equal(clf.oob_score_, clf_3.oob_score_)
Ejemplo n.º 14
0
def test_random_choice_csc(n_samples=10000, random_state=24):
    # Explicit class probabilities
    classes = [np.array([0, 1]),  np.array([0, 1, 2])]
    class_probabilites = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]

    got = random_choice_csc(n_samples, classes, class_probabilites,
                            random_state)
    assert_true(sp.issparse(got))

    for k in range(len(classes)):
        p = np.bincount(got.getcol(k).toarray().ravel()) / float(n_samples)
        assert_array_almost_equal(class_probabilites[k], p, decimal=1)

    # Implicit class probabilities
    classes = [[0, 1],  [1, 2]]  # test for array-like support
    class_probabilites = [np.array([0.5, 0.5]), np.array([0, 1/2, 1/2])]

    got = random_choice_csc(n_samples=n_samples,
                            classes=classes,
                            random_state=random_state)
    assert_true(sp.issparse(got))

    for k in range(len(classes)):
        p = np.bincount(got.getcol(k).toarray().ravel()) / float(n_samples)
        assert_array_almost_equal(class_probabilites[k], p, decimal=1)

    # Edge case probabilities 1.0 and 0.0
    classes = [np.array([0, 1]),  np.array([0, 1, 2])]
    class_probabilites = [np.array([1.0, 0.0]), np.array([0.0, 1.0, 0.0])]

    got = random_choice_csc(n_samples, classes, class_probabilites,
                            random_state)
    assert_true(sp.issparse(got))

    for k in range(len(classes)):
        p = np.bincount(got.getcol(k).toarray().ravel(),
                        minlength=len(class_probabilites[k])) / n_samples
        assert_array_almost_equal(class_probabilites[k], p, decimal=1)

    # One class target data
    classes = [[1],  [0]]  # test for array-like support
    class_probabilites = [np.array([0.0, 1.0]), np.array([1.0])]

    got = random_choice_csc(n_samples=n_samples,
                            classes=classes,
                            random_state=random_state)
    assert_true(sp.issparse(got))

    for k in range(len(classes)):
        p = np.bincount(got.getcol(k).toarray().ravel()) / n_samples
        assert_array_almost_equal(class_probabilites[k], p, decimal=1)
Ejemplo n.º 15
0
def test_probability_exponential():
    # Predict probabilities.
    clf = GradientBoostingClassifier(loss='exponential',
                                     n_estimators=100,
                                     random_state=1)

    assert_raises(ValueError, clf.predict_proba, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # check if probabilities are in [0, 1].
    y_proba = clf.predict_proba(T)
    assert_true(np.all(y_proba >= 0.0))
    assert_true(np.all(y_proba <= 1.0))
    score = clf.decision_function(T).ravel()
    assert_array_almost_equal(y_proba[:, 1], 1.0 / (1.0 + np.exp(-2 * score)))

    # derive predictions from probabilities
    y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
    assert_array_equal(y_pred, true_result)
Ejemplo n.º 16
0
def check_importances(name, criterion, X, y):
    ForestEstimator = FOREST_ESTIMATORS[name]

    est = ForestEstimator(n_estimators=20, criterion=criterion, random_state=0)
    est.fit(X, y)
    importances = est.feature_importances_
    n_important = np.sum(importances > 0.1)
    assert_equal(importances.shape[0], 10)
    assert_equal(n_important, 3)

    # XXX: Remove this test in 0.19 after transform support to estimators
    # is removed.
    X_new = assert_warns(DeprecationWarning,
                         est.transform,
                         X,
                         threshold="mean")
    assert_less(0 < X_new.shape[1], X.shape[1])

    # Check with parallel
    importances = est.feature_importances_
    est.set_params(n_jobs=2)
    importances_parrallel = est.feature_importances_
    assert_array_almost_equal(importances, importances_parrallel)

    # Check with sample weights
    sample_weight = check_random_state(0).randint(1, 10, len(X))
    est = ForestEstimator(n_estimators=20, random_state=0, criterion=criterion)
    est.fit(X, y, sample_weight=sample_weight)
    importances = est.feature_importances_
    assert_true(np.all(importances >= 0.0))

    for scale in [0.5, 10, 100]:
        est = ForestEstimator(n_estimators=20,
                              random_state=0,
                              criterion=criterion)
        est.fit(X, y, sample_weight=scale * sample_weight)
        importances_bis = est.feature_importances_
        assert_less(np.abs(importances - importances_bis).mean(), 0.001)
Ejemplo n.º 17
0
def test_feature_importances():
    X = np.array(boston.data, dtype=np.float32)
    y = np.array(boston.target, dtype=np.float32)

    for presort in True, False:
        clf = GradientBoostingRegressor(n_estimators=100,
                                        max_depth=5,
                                        min_samples_split=2,
                                        random_state=1,
                                        presort=presort)
        clf.fit(X, y)
        assert_true(hasattr(clf, 'feature_importances_'))

        # XXX: Remove this test in 0.19 after transform support to estimators
        # is removed.
        X_new = assert_warns(DeprecationWarning,
                             clf.transform,
                             X,
                             threshold="mean")
        assert_less(X_new.shape[1], X.shape[1])
        feature_mask = (clf.feature_importances_ >
                        clf.feature_importances_.mean())
        assert_array_almost_equal(X_new, X[:, feature_mask])
Ejemplo n.º 18
0
def test_get_params():
    test = T(K(), K())

    assert_true('a__d' in test.get_params(deep=True))
    assert_true('a__d' not in test.get_params(deep=False))

    test.set_params(a__d=2)
    assert_true(test.a.d == 2)
    assert_raises(ValueError, test.set_params, a__a=2)
Ejemplo n.º 19
0
def test_clone_nan():
    # Regression test for cloning estimators with default parameter as np.nan
    clf = MyEstimator(empty=np.nan)
    clf2 = clone(clf)

    assert_true(clf.empty is clf2.empty)
Ejemplo n.º 20
0
def test_get_params_deprecated():
    # deprecated attribute should not show up as params
    est = DeprecatedAttributeEstimator(a=1)

    assert_true('a' in est.get_params())
    assert_true('a' in est.get_params(deep=True))
    assert_true('a' in est.get_params(deep=False))

    assert_true('b' not in est.get_params())
    assert_true('b' not in est.get_params(deep=True))
    assert_true('b' not in est.get_params(deep=False))