Example #1
0
def test_dtype_of_classifier_probas(strategy):
    y = [0, 2, 1, 1]
    X = np.zeros(4)
    model = DummyClassifier(strategy=strategy, random_state=0, constant=0)
    probas = model.fit(X, y).predict_proba(X)

    assert probas.dtype == np.float64
Example #2
0
def test_constant_strategy_exceptions():
    X = [[0], [0], [0], [0]]  # ignored
    y = [2, 1, 2, 2]
    clf = DummyClassifier(strategy="constant", random_state=0)
    assert_raises(ValueError, clf.fit, X, y)
    clf = DummyClassifier(strategy="constant", random_state=0,
                          constant=[2, 0])
    assert_raises(ValueError, clf.fit, X, y)
Example #3
0
def test_uniform_strategy():
    X = [[0]] * 4  # ignored
    y = [1, 2, 1, 1]
    clf = DummyClassifier(strategy="uniform", random_state=0)
    clf.fit(X, y)

    X = [[0]] * 500
    y_pred = clf.predict(X)
    p = np.bincount(y_pred) / float(len(X))
    assert_almost_equal(p[1], 0.5, decimal=1)
    assert_almost_equal(p[2], 0.5, decimal=1)
    _check_predict_proba(clf, X, y)
Example #4
0
def test_classifier_prediction_independent_of_X(strategy):
    y = [0, 2, 1, 1]
    X1 = [[0]] * 4
    clf1 = DummyClassifier(strategy=strategy, random_state=0, constant=0)
    clf1.fit(X1, y)
    predictions1 = clf1.predict(X1)

    X2 = [[1]] * 4
    clf2 = DummyClassifier(strategy=strategy, random_state=0, constant=0)
    clf2.fit(X2, y)
    predictions2 = clf2.predict(X2)

    assert_array_equal(predictions1, predictions2)
Example #5
0
def test_most_frequent_and_prior_strategy_with_2d_column_y():
    # non-regression test added in
    # https://github.com/scikit-learn/scikit-learn/pull/13545
    X = [[0], [0], [0], [0]]
    y_1d = [1, 2, 1, 1]
    y_2d = [[1], [2], [1], [1]]

    for strategy in ("most_frequent", "prior"):
        clf_1d = DummyClassifier(strategy=strategy, random_state=0)
        clf_2d = DummyClassifier(strategy=strategy, random_state=0)

        clf_1d.fit(X, y_1d)
        clf_2d.fit(X, y_2d)
        assert_array_equal(clf_1d.predict(X), clf_2d.predict(X))
Example #6
0
def test_constant_strategy():
    X = [[0], [0], [0], [0]]  # ignored
    y = [2, 1, 2, 2]

    clf = DummyClassifier(strategy="constant", random_state=0, constant=1)
    clf.fit(X, y)
    assert_array_equal(clf.predict(X), np.ones(len(X)))
    _check_predict_proba(clf, X, y)

    X = [[0], [0], [0], [0]]  # ignored
    y = ['two', 'one', 'two', 'two']
    clf = DummyClassifier(strategy="constant", random_state=0, constant='one')
    clf.fit(X, y)
    assert_array_equal(clf.predict(X), np.array(['one'] * 4))
    _check_predict_proba(clf, X, y)
Example #7
0
def test_classification_sample_weight():
    X = [[0], [0], [1]]
    y = [0, 1, 0]
    sample_weight = [0.1, 1., 0.1]

    clf = DummyClassifier().fit(X, y, sample_weight)
    assert_array_almost_equal(clf.class_prior_, [0.2 / 1.2, 1. / 1.2])
Example #8
0
def test_constant_strategy_multioutput():
    X = [[0], [0], [0], [0]]  # ignored
    y = np.array([[2, 3],
                  [1, 3],
                  [2, 3],
                  [2, 0]])

    n_samples = len(X)

    clf = DummyClassifier(strategy="constant", random_state=0,
                          constant=[1, 0])
    clf.fit(X, y)
    assert_array_equal(clf.predict(X),
                       np.hstack([np.ones((n_samples, 1)),
                                  np.zeros((n_samples, 1))]))
    _check_predict_proba(clf, X, y)
Example #9
0
def test_constant_strategy_sparse_target():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[0, 1],
                                [4, 0],
                                [1, 1],
                                [1, 4],
                                [1, 1]]))

    n_samples = len(X)

    clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert sp.issparse(y_pred)
    assert_array_equal(y_pred.toarray(), np.hstack([np.ones((n_samples, 1)),
                                                    np.zeros((n_samples, 1))]))
Example #10
0
def test_most_frequent_and_prior_strategy_sparse_target():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[1, 0],
                                [1, 3],
                                [4, 0],
                                [0, 1],
                                [1, 0]]))

    n_samples = len(X)
    y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
    for strategy in ("most_frequent", "prior"):
        clf = DummyClassifier(strategy=strategy, random_state=0)
        clf.fit(X, y)

        y_pred = clf.predict(X)
        assert sp.issparse(y_pred)
        assert_array_equal(y_pred.toarray(), y_expected)
Example #11
0
def test_most_frequent_and_prior_strategy_multioutput():
    X = [[0], [0], [0], [0]]  # ignored
    y = np.array([[1, 0],
                  [2, 0],
                  [1, 0],
                  [1, 3]])

    n_samples = len(X)

    for strategy in ("prior", "most_frequent"):
        clf = DummyClassifier(strategy=strategy, random_state=0)
        clf.fit(X, y)
        assert_array_equal(clf.predict(X),
                           np.hstack([np.ones((n_samples, 1)),
                                      np.zeros((n_samples, 1))]))
        _check_predict_proba(clf, X, y)
        _check_behavior_2d(clf)
Example #12
0
def test_uniform_strategy_multioutput():
    X = [[0]] * 4  # ignored
    y = np.array([[2, 1],
                  [2, 2],
                  [1, 2],
                  [1, 1]])
    clf = DummyClassifier(strategy="uniform", random_state=0)
    clf.fit(X, y)

    X = [[0]] * 500
    y_pred = clf.predict(X)

    for k in range(y.shape[1]):
        p = np.bincount(y_pred[:, k]) / float(len(X))
        assert_almost_equal(p[1], 0.5, decimal=1)
        assert_almost_equal(p[2], 0.5, decimal=1)
        _check_predict_proba(clf, X, y)

    _check_behavior_2d(clf)
Example #13
0
def test_uniform_strategy_sparse_target_warning():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[2, 1],
                                [2, 2],
                                [1, 4],
                                [4, 2],
                                [1, 1]]))

    clf = DummyClassifier(strategy="uniform", random_state=0)
    assert_warns_message(UserWarning,
                         "the uniform strategy would not save memory",
                         clf.fit, X, y)

    X = [[0]] * 500
    y_pred = clf.predict(X)

    for k in range(y.shape[1]):
        p = np.bincount(y_pred[:, k]) / float(len(X))
        assert_almost_equal(p[1], 1 / 3, decimal=1)
        assert_almost_equal(p[2], 1 / 3, decimal=1)
        assert_almost_equal(p[4], 1 / 3, decimal=1)
Example #14
0
def test_stratified_strategy_sparse_target():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[4, 1],
                                [0, 0],
                                [1, 1],
                                [1, 4],
                                [1, 1]]))

    clf = DummyClassifier(strategy="stratified", random_state=0)
    clf.fit(X, y)

    X = [[0]] * 500
    y_pred = clf.predict(X)
    assert sp.issparse(y_pred)
    y_pred = y_pred.toarray()

    for k in range(y.shape[1]):
        p = np.bincount(y_pred[:, k]) / float(len(X))
        assert_almost_equal(p[1], 3. / 5, decimal=1)
        assert_almost_equal(p[0], 1. / 5, decimal=1)
        assert_almost_equal(p[4], 1. / 5, decimal=1)
Example #15
0
def test_partial_dependence_pipeline():
    # check that the partial dependence support pipeline
    iris = load_iris()

    scaler = StandardScaler()
    clf = DummyClassifier(random_state=42)
    pipe = make_pipeline(scaler, clf)

    clf.fit(scaler.fit_transform(iris.data), iris.target)
    pipe.fit(iris.data, iris.target)

    features = 0
    pdp_pipe, values_pipe = partial_dependence(pipe,
                                               iris.data,
                                               features=[features])
    pdp_clf, values_clf = partial_dependence(clf,
                                             scaler.transform(iris.data),
                                             features=[features])
    assert_allclose(pdp_pipe, pdp_clf)
    assert_allclose(
        values_pipe[0],
        values_clf[0] * scaler.scale_[features] + scaler.mean_[features])
Example #16
0
def test_warning_recursion_non_constant_init():
    # make sure that passing a non-constant init parameter to a GBDT and using
    # recursion method yields a warning.

    gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0)
    gbc.fit(X, y)

    with pytest.warns(
            UserWarning,
            match='Using recursion method with a non-constant init predictor'):
        partial_dependence(gbc, X, [0], method='recursion')

    with pytest.warns(
            UserWarning,
            match='Using recursion method with a non-constant init predictor'):
        partial_dependence(gbc, X, [0], method='recursion')
Example #17
0
def test_dummy_classifier_on_3D_array():
    X = np.array([[['foo']], [['bar']], [['baz']]])
    y = [2, 2, 2]
    y_expected = [2, 2, 2]
    y_proba_expected = [[1], [1], [1]]
    cls = DummyClassifier()
    cls.fit(X, y)
    y_pred = cls.predict(X)
    y_pred_proba = cls.predict_proba(X)
    assert_array_equal(y_pred, y_expected)
    assert_array_equal(y_pred_proba, y_proba_expected)
Example #18
0
def test_most_frequent_and_prior_strategy():
    X = [[0], [0], [0], [0]]  # ignored
    y = [1, 2, 1, 1]

    for strategy in ("most_frequent", "prior"):
        clf = DummyClassifier(strategy=strategy, random_state=0)
        clf.fit(X, y)
        assert_array_equal(clf.predict(X), np.ones(len(X)))
        _check_predict_proba(clf, X, y)

        if strategy == "prior":
            assert_array_almost_equal(clf.predict_proba([X[0]]),
                                      clf.class_prior_.reshape((1, -1)))
        else:
            assert_array_almost_equal(clf.predict_proba([X[0]]),
                                      clf.class_prior_.reshape((1, -1)) > 0.5)
Example #19
0
def test_multidimensional_X():
    """
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    """

    from mrex.dummy import DummyClassifier, DummyRegressor

    rng = np.random.RandomState(0)

    X = rng.randn(50, 3, 3)
    yc = rng.choice([0, 1], 50)
    yr = rng.randn(50)

    boost = AdaBoostClassifier(DummyClassifier(strategy='most_frequent'))
    boost.fit(X, yc)
    boost.predict(X)
    boost.predict_proba(X)

    boost = AdaBoostRegressor(DummyRegressor())
    boost.fit(X, yr)
    boost.predict(X)
Example #20
0
from mrex.dummy import DummyClassifier

from mrex.datasets import fetch_20newsgroups_vectorized
from mrex.metrics import accuracy_score
from mrex.utils.validation import check_array

from mrex.ensemble import RandomForestClassifier
from mrex.ensemble import ExtraTreesClassifier
from mrex.ensemble import AdaBoostClassifier
from mrex.linear_model import LogisticRegression
from mrex.naive_bayes import MultinomialNB

ESTIMATORS = {
    "dummy":
    DummyClassifier(),
    "random_forest":
    RandomForestClassifier(max_features="sqrt", min_samples_split=10),
    "extra_trees":
    ExtraTreesClassifier(max_features="sqrt", min_samples_split=10),
    "logistic_regression":
    LogisticRegression(),
    "naive_bayes":
    MultinomialNB(),
    "adaboost":
    AdaBoostClassifier(n_estimators=10),
}

###############################################################################
# Data
Example #21
0
    # Normalize features
    X = X / 255

    # Create train-test split (as [Joachims, 2006])
    print("Creating train-test split...")
    n_train = 60000
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_test = X[n_train:]
    y_test = y[n_train:]

    return X_train, X_test, y_train, y_test


ESTIMATORS = {
    "dummy": DummyClassifier(),
    'CART': DecisionTreeClassifier(),
    'ExtraTrees': ExtraTreesClassifier(),
    'RandomForest': RandomForestClassifier(),
    'Nystroem-SVM': make_pipeline(
        Nystroem(gamma=0.015, n_components=1000), LinearSVC(C=100)),
    'SampledRBF-SVM': make_pipeline(
        RBFSampler(gamma=0.015, n_components=1000), LinearSVC(C=100)),
    'LogisticRegression-SAG': LogisticRegression(solver='sag', tol=1e-1,
                                                 C=1e4),
    'LogisticRegression-SAGA': LogisticRegression(solver='saga', tol=1e-1,
                                                  C=1e4),
    'MultilayerPerceptron': MLPClassifier(
        hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
        solver='sgd', learning_rate_init=0.2, momentum=0.9, verbose=1,
        tol=1e-4, random_state=1),
Example #22
0
def test_string_labels():
    X = [[0]] * 5
    y = ["paris", "paris", "tokyo", "amsterdam", "berlin"]
    clf = DummyClassifier(strategy="most_frequent")
    clf.fit(X, y)
    assert_array_equal(clf.predict(X), ["paris"] * 5)
Example #23
0
def test_classifier_score_with_None(y, y_test):
    clf = DummyClassifier(strategy="most_frequent")
    clf.fit(None, y)
    assert clf.score(None, y_test) == 0.5
Example #24
0
def test_classifier_exceptions():
    clf = DummyClassifier(strategy="unknown")
    assert_raises(ValueError, clf.fit, [], [])

    assert_raises(NotFittedError, clf.predict, [])
    assert_raises(NotFittedError, clf.predict_proba, [])