Example #1
0
def test_probability():
    """Predict probabilities."""
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    with np.errstate(divide="ignore", invalid="ignore"):
        # Normal case
        print("start")
        ensemble = LazyBaggingClassifier(random_state=rng).fit(X_train, y_train)

        assert_array_almost_equal(np.sum(ensemble.predict_proba(X_test),
                                         axis=1),
                                  np.ones(len(X_test)))

        assert_array_almost_equal(ensemble.predict_proba(X_test),
                                  np.exp(ensemble.predict_log_proba(X_test)))
        print("stop")

        # Degenerate case, where some classes are missing
        ensemble = LazyBaggingClassifier(base_estimator=DecisionTreeClassifier(),
                       		             random_state=rng,
                            	         max_samples=5).fit(X_train, y_train)

        assert_array_almost_equal(np.sum(ensemble.predict_proba(X_test),
                                         axis=1),
                                  np.ones(len(X_test)))

        assert_array_almost_equal(ensemble.predict_proba(X_test),
                                  np.exp(ensemble.predict_log_proba(X_test)))
Example #2
0
def test_sparse_classification():
    """Check classification for various parameter settings on sparse input."""

    class CustomSVC(SVC):
        """SVC variant that records the nature of the training set"""

        def fit(self, X, y):
            super(CustomSVC, self).fit(X, y)
            self.data_type_ = type(X)
            return self

    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    parameter_sets = [
        {"max_samples": 0.5,
         "max_features": 2,
         "bootstrap": True,
         "bootstrap_features": True},
        {"max_samples": 1.0,
         "max_features": 4,
         "bootstrap": True,
         "bootstrap_features": True},
        {"max_features": 2,
         "bootstrap": False,
         "bootstrap_features": True},
        {"max_samples": 0.5,
         "bootstrap": True,
         "bootstrap_features": False},
    ]

    for sparse_format in [csc_matrix, csr_matrix]:
        X_train_sparse = sparse_format(X_train)
        X_test_sparse = sparse_format(X_test)
        for params in parameter_sets:

            # Trained on sparse format
            sparse_classifier = LazyBaggingClassifier(
                base_estimator=CustomSVC(),
                random_state=1,
                **params
            ).fit(X_train_sparse, y_train)
            sparse_results = sparse_classifier.predict(X_test_sparse)

            # Trained on dense format
            dense_results = LazyBaggingClassifier(
                base_estimator=CustomSVC(),
                random_state=1,
                **params
            ).fit(X_train, y_train).predict(X_test)

            sparse_type = type(X_train_sparse)
            types = [i.data_type_ for i in sparse_classifier.estimators_]

            assert_array_equal(sparse_results, dense_results)
            assert all([t == sparse_type for t in types])
Example #3
0
def test_base_estimator():
    """Check base_estimator and its default values."""
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = LazyBaggingClassifier(None,
                                 	 random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))

    ensemble = LazyBaggingClassifier(DecisionTreeClassifier(),
                                     random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))

    ensemble = LazyBaggingClassifier(Perceptron(),
                   	                 random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, Perceptron))

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = LazyBaggingRegressor(random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))

    ensemble = LazyBaggingRegressor(DecisionTreeRegressor(),
                               		random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))

    ensemble = LazyBaggingRegressor(SVR(),
                         	        random_state=0).fit(X_train, y_train)
    assert_true(isinstance(ensemble.base_estimator_, SVR))
Example #4
0
def test_gridsearch():
    """Check that bagging ensembles can be grid-searched."""
    # Transform iris into a binary classification task
    X, y = iris.data, iris.target
    y[y == 2] = 1

    # Grid search with scoring based on decision_function
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__C': (1, 2)}

    GridSearchCV(LazyBaggingClassifier(SVC()),
                 parameters,
                 scoring="roc_auc").fit(X, y)
Example #5
0
def test_classification():
    """Check classification for various parameter settings."""
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC()]:
        for params in grid:
            LazyBaggingClassifier(base_estimator=base_estimator,
                                  random_state=rng,
                                  **params).fit(X_train, y_train).predict(X_test)
Example #6
0
def test_multioutput():
    X, y = make_multilabel_classification(n_samples=100, n_labels=1,
                                          n_classes=5, random_state=0,
                                          return_indicator=True)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    est = LazyBaggingClassifier(random_state=0, n_estimators=10,
                                bootstrap=False)
    est.fit(X_train, y_train)

    assert_almost_equal(est.score(X_train, y_train), 1.)

    y_proba = est.predict_proba(X_test)
    y_log_proba = est.predict_log_proba(X_test)
    for p, log_p in zip(y_proba, y_log_proba):
        assert_array_almost_equal(p, np.exp(log_p))

    est = LazyBaggingRegressor(random_state=0, n_estimators=10,
                               bootstrap=False)
    est.fit(X_train, y_train)
    assert_almost_equal(est.score(X_train, y_train), 1.)
Example #7
0
def test_reproducibility():
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    ensemble = LazyBaggingClassifier(random_state=rng)
    ensemble.fit(X_train, y_train)

    assert_array_equal(ensemble.predict(X_test), ensemble.predict(X_test))

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)
    ensemble = LazyBaggingRegressor(random_state=rng)
    ensemble.fit(X_train, y_train)
    assert_array_equal(ensemble.predict(X_test), ensemble.predict(X_test))
Example #8
0
def test_error():
    """Test that it gives proper exception on deficient input."""
    X, y = iris.data, iris.target
    base = DecisionTreeClassifier()

    # Test max_samples
    assert_raises(ValueError,
                  LazyBaggingClassifier(base, max_samples=-1).fit, X, y)
    assert_raises(ValueError,
                  LazyBaggingClassifier(base, max_samples=0.0).fit, X, y)
    assert_raises(ValueError,
                  LazyBaggingClassifier(base, max_samples=2.0).fit, X, y)
    assert_raises(ValueError,
                  LazyBaggingClassifier(base, max_samples=1000).fit, X, y)
    assert_raises(ValueError,
                  LazyBaggingClassifier(base, max_samples="foobar").fit, X, y)

    # Test max_features
    assert_raises(ValueError,
                  LazyBaggingClassifier(base, max_features=-1).fit, X, y)
    assert_raises(ValueError,
                  LazyBaggingClassifier(base, max_features=0.0).fit, X, y)
    assert_raises(ValueError,
                  LazyBaggingClassifier(base, max_features=2.0).fit, X, y)
    assert_raises(ValueError,
                  LazyBaggingClassifier(base, max_features=5).fit, X, y)
    assert_raises(ValueError,
                  LazyBaggingClassifier(base, max_features="foobar").fit, X, y)

    # Test support of decision_function
    assert_raises(NotImplementedError,
                  LazyBaggingClassifier(base).fit(X, y).decision_function, X)