def test_probability(): """Predict probabilities.""" rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) with np.errstate(divide="ignore", invalid="ignore"): # Normal case print("start") ensemble = LazyBaggingClassifier(random_state=rng).fit(X_train, y_train) assert_array_almost_equal(np.sum(ensemble.predict_proba(X_test), axis=1), np.ones(len(X_test))) assert_array_almost_equal(ensemble.predict_proba(X_test), np.exp(ensemble.predict_log_proba(X_test))) print("stop") # Degenerate case, where some classes are missing ensemble = LazyBaggingClassifier(base_estimator=DecisionTreeClassifier(), random_state=rng, max_samples=5).fit(X_train, y_train) assert_array_almost_equal(np.sum(ensemble.predict_proba(X_test), axis=1), np.ones(len(X_test))) assert_array_almost_equal(ensemble.predict_proba(X_test), np.exp(ensemble.predict_log_proba(X_test)))
def test_sparse_classification(): """Check classification for various parameter settings on sparse input.""" class CustomSVC(SVC): """SVC variant that records the nature of the training set""" def fit(self, X, y): super(CustomSVC, self).fit(X, y) self.data_type_ = type(X) return self rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) parameter_sets = [ {"max_samples": 0.5, "max_features": 2, "bootstrap": True, "bootstrap_features": True}, {"max_samples": 1.0, "max_features": 4, "bootstrap": True, "bootstrap_features": True}, {"max_features": 2, "bootstrap": False, "bootstrap_features": True}, {"max_samples": 0.5, "bootstrap": True, "bootstrap_features": False}, ] for sparse_format in [csc_matrix, csr_matrix]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) for params in parameter_sets: # Trained on sparse format sparse_classifier = LazyBaggingClassifier( base_estimator=CustomSVC(), random_state=1, **params ).fit(X_train_sparse, y_train) sparse_results = sparse_classifier.predict(X_test_sparse) # Trained on dense format dense_results = LazyBaggingClassifier( base_estimator=CustomSVC(), random_state=1, **params ).fit(X_train, y_train).predict(X_test) sparse_type = type(X_train_sparse) types = [i.data_type_ for i in sparse_classifier.estimators_] assert_array_equal(sparse_results, dense_results) assert all([t == sparse_type for t in types])
def test_base_estimator(): """Check base_estimator and its default values.""" rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = LazyBaggingClassifier(None, random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier)) ensemble = LazyBaggingClassifier(DecisionTreeClassifier(), random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier)) ensemble = LazyBaggingClassifier(Perceptron(), random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, Perceptron)) # Regression X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = LazyBaggingRegressor(random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor)) ensemble = LazyBaggingRegressor(DecisionTreeRegressor(), random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor)) ensemble = LazyBaggingRegressor(SVR(), random_state=0).fit(X_train, y_train) assert_true(isinstance(ensemble.base_estimator_, SVR))
def test_gridsearch(): """Check that bagging ensembles can be grid-searched.""" # Transform iris into a binary classification task X, y = iris.data, iris.target y[y == 2] = 1 # Grid search with scoring based on decision_function parameters = {'n_estimators': (1, 2), 'base_estimator__C': (1, 2)} GridSearchCV(LazyBaggingClassifier(SVC()), parameters, scoring="roc_auc").fit(X, y)
def test_classification(): """Check classification for various parameter settings.""" rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [1, 2, 4], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyClassifier(), Perceptron(), DecisionTreeClassifier(), KNeighborsClassifier(), SVC()]: for params in grid: LazyBaggingClassifier(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
def test_multioutput(): X, y = make_multilabel_classification(n_samples=100, n_labels=1, n_classes=5, random_state=0, return_indicator=True) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) est = LazyBaggingClassifier(random_state=0, n_estimators=10, bootstrap=False) est.fit(X_train, y_train) assert_almost_equal(est.score(X_train, y_train), 1.) y_proba = est.predict_proba(X_test) y_log_proba = est.predict_log_proba(X_test) for p, log_p in zip(y_proba, y_log_proba): assert_array_almost_equal(p, np.exp(log_p)) est = LazyBaggingRegressor(random_state=0, n_estimators=10, bootstrap=False) est.fit(X_train, y_train) assert_almost_equal(est.score(X_train, y_train), 1.)
def test_reproducibility(): rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = LazyBaggingClassifier(random_state=rng) ensemble.fit(X_train, y_train) assert_array_equal(ensemble.predict(X_test), ensemble.predict(X_test)) # Regression X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = LazyBaggingRegressor(random_state=rng) ensemble.fit(X_train, y_train) assert_array_equal(ensemble.predict(X_test), ensemble.predict(X_test))
def test_error(): """Test that it gives proper exception on deficient input.""" X, y = iris.data, iris.target base = DecisionTreeClassifier() # Test max_samples assert_raises(ValueError, LazyBaggingClassifier(base, max_samples=-1).fit, X, y) assert_raises(ValueError, LazyBaggingClassifier(base, max_samples=0.0).fit, X, y) assert_raises(ValueError, LazyBaggingClassifier(base, max_samples=2.0).fit, X, y) assert_raises(ValueError, LazyBaggingClassifier(base, max_samples=1000).fit, X, y) assert_raises(ValueError, LazyBaggingClassifier(base, max_samples="foobar").fit, X, y) # Test max_features assert_raises(ValueError, LazyBaggingClassifier(base, max_features=-1).fit, X, y) assert_raises(ValueError, LazyBaggingClassifier(base, max_features=0.0).fit, X, y) assert_raises(ValueError, LazyBaggingClassifier(base, max_features=2.0).fit, X, y) assert_raises(ValueError, LazyBaggingClassifier(base, max_features=5).fit, X, y) assert_raises(ValueError, LazyBaggingClassifier(base, max_features="foobar").fit, X, y) # Test support of decision_function assert_raises(NotImplementedError, LazyBaggingClassifier(base).fit(X, y).decision_function, X)