Example #1
0
def test_sparse_regression():
    """Check regression for various parameter settings on sparse input."""
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)

    class CustomSVR(SVR):
        """SVC variant that records the nature of the training set"""

        def fit(self, X, y):
            super(CustomSVR, self).fit(X, y)
            self.data_type_ = type(X)
            return self

    parameter_sets = [
        {"max_samples": 0.5,
         "max_features": 2,
         "bootstrap": True,
         "bootstrap_features": True},
        {"max_samples": 1.0,
         "max_features": 4,
         "bootstrap": True,
         "bootstrap_features": True},
        {"max_features": 2,
         "bootstrap": False,
         "bootstrap_features": True},
        {"max_samples": 0.5,
         "bootstrap": True,
         "bootstrap_features": False},
    ]

    for sparse_format in [csc_matrix, csr_matrix]:
        X_train_sparse = sparse_format(X_train)
        X_test_sparse = sparse_format(X_test)
        for params in parameter_sets:

            # Trained on sparse format
            sparse_classifier = LazyBaggingRegressor(
                base_estimator=CustomSVR(),
                random_state=1,
                **params
            ).fit(X_train_sparse, y_train)
            sparse_results = sparse_classifier.predict(X_test_sparse)

            # Trained on dense format
            dense_results = LazyBaggingRegressor(
                base_estimator=CustomSVR(),
                random_state=1,
                **params
            ).fit(X_train, y_train).predict(X_test)

            sparse_type = type(X_train_sparse)
            types = [i.data_type_ for i in sparse_classifier.estimators_]

            assert_array_equal(sparse_results, dense_results)
            assert all([t == sparse_type for t in types])
            assert_array_equal(sparse_results, dense_results)
Example #2
0
def test_single_estimator():
    """Check singleton ensembles."""
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    clf1 = LazyBaggingRegressor(base_estimator=KNeighborsRegressor(),
               		            n_estimators=1,
                    	        bootstrap=False,
                        	    bootstrap_features=False,
                            	random_state=rng).fit(X_train, y_train)

    clf2 = KNeighborsRegressor().fit(X_train, y_train)

    assert_array_equal(clf1.predict(X_test), clf2.predict(X_test))
Example #3
0
def test_base_estimator():
    """Check base_estimator and its default values."""
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    ensemble = LazyBaggingClassifier(None,
                                 	 random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))

    ensemble = LazyBaggingClassifier(DecisionTreeClassifier(),
                                     random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))

    ensemble = LazyBaggingClassifier(Perceptron(),
                   	                 random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, Perceptron))

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    ensemble = LazyBaggingRegressor(random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))

    ensemble = LazyBaggingRegressor(DecisionTreeRegressor(),
                               		random_state=0).fit(X_train, y_train)

    assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))

    ensemble = LazyBaggingRegressor(SVR(),
                         	        random_state=0).fit(X_train, y_train)
    assert_true(isinstance(ensemble.base_estimator_, SVR))
Example #4
0
def test_bootstrap_samples():
    """Test that bootstraping samples generate non-perfect base estimators."""
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    base_estimator = DecisionTreeRegressor().fit(X_train, y_train)

    # without bootstrap, all trees are perfect on the training set
    ensemble = LazyBaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                    max_samples=1.0,
                                    bootstrap=False,
                                    random_state=rng).fit(X_train, y_train)

    assert_equal(base_estimator.score(X_train, y_train),
                 ensemble.score(X_train, y_train))

    # with bootstrap, trees are no longer perfect on the training set
    ensemble = LazyBaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                    max_samples=1.0,
                                    bootstrap=True,
                                    random_state=rng).fit(X_train, y_train)

    assert_greater(base_estimator.score(X_train, y_train),
                   ensemble.score(X_train, y_train))
Example #5
0
def test_reproducibility():
    rng = check_random_state(0)

    # Classification
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    ensemble = LazyBaggingClassifier(random_state=rng)
    ensemble.fit(X_train, y_train)

    assert_array_equal(ensemble.predict(X_test), ensemble.predict(X_test))

    # Regression
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)
    ensemble = LazyBaggingRegressor(random_state=rng)
    ensemble.fit(X_train, y_train)
    assert_array_equal(ensemble.predict(X_test), ensemble.predict(X_test))
Example #6
0
def test_regression():
    """Check regression for various parameter settings."""
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR()]:
        for params in grid:
            LazyBaggingRegressor(base_estimator=base_estimator,
                                 random_state=rng,
                                 **params).fit(X_train,
                                               y_train).predict(X_test)
Example #7
0
def test_multioutput():
    X, y = make_multilabel_classification(n_samples=100, n_labels=1,
                                          n_classes=5, random_state=0,
                                          return_indicator=True)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    est = LazyBaggingClassifier(random_state=0, n_estimators=10,
                                bootstrap=False)
    est.fit(X_train, y_train)

    assert_almost_equal(est.score(X_train, y_train), 1.)

    y_proba = est.predict_proba(X_test)
    y_log_proba = est.predict_log_proba(X_test)
    for p, log_p in zip(y_proba, y_log_proba):
        assert_array_almost_equal(p, np.exp(log_p))

    est = LazyBaggingRegressor(random_state=0, n_estimators=10,
                               bootstrap=False)
    est.fit(X_train, y_train)
    assert_almost_equal(est.score(X_train, y_train), 1.)