def test_base(): # Check BaseEnsemble methods. ensemble = BaggingClassifier( base_estimator=Perceptron(random_state=None), n_estimators=3) iris = load_iris() ensemble.fit(iris.data, iris.target) ensemble.estimators_ = [] # empty the list and create estimators manually ensemble._make_estimator() random_state = np.random.RandomState(3) ensemble._make_estimator(random_state=random_state) ensemble._make_estimator(random_state=random_state) ensemble._make_estimator(append=False) assert 3 == len(ensemble) assert 3 == len(ensemble.estimators_) assert isinstance(ensemble[0], Perceptron) assert ensemble[0].random_state is None assert isinstance(ensemble[1].random_state, int) assert isinstance(ensemble[2].random_state, int) assert ensemble[1].random_state != ensemble[2].random_state np_int_ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=np.int32(3)) np_int_ensemble.fit(iris.data, iris.target)
def test_perceptron_correctness(): y_bin = y.copy() y_bin[y != 1] = -1 clf1 = MyPerceptron(n_iter=2) clf1.fit(X, y_bin) clf2 = Perceptron(max_iter=2, shuffle=False, tol=None) clf2.fit(X, y_bin) assert_array_almost_equal(clf1.w, clf2.coef_.ravel())
def test_set_random_states(): # Linear Discriminant Analysis doesn't have random state: smoke test _set_random_states(LinearDiscriminantAnalysis(), random_state=17) clf1 = Perceptron(random_state=None) assert clf1.random_state is None # check random_state is None still sets _set_random_states(clf1, None) assert isinstance(clf1.random_state, int) # check random_state fixes results in consistent initialisation _set_random_states(clf1, 3) assert isinstance(clf1.random_state, int) clf2 = Perceptron(random_state=None) _set_random_states(clf2, 3) assert clf1.random_state == clf2.random_state # nested random_state def make_steps(): return [('sel', SelectFromModel(Perceptron(random_state=None))), ('clf', Perceptron(random_state=None))] est1 = Pipeline(make_steps()) _set_random_states(est1, 3) assert isinstance(est1.steps[0][1].estimator.random_state, int) assert isinstance(est1.steps[1][1].random_state, int) assert (est1.get_params()['sel__estimator__random_state'] != est1.get_params()['clf__random_state']) # ensure multiple random_state parameters are invariant to get_params() # iteration order class AlphaParamPipeline(Pipeline): def get_params(self, *args, **kwargs): params = Pipeline.get_params(self, *args, **kwargs).items() return OrderedDict(sorted(params)) class RevParamPipeline(Pipeline): def get_params(self, *args, **kwargs): params = Pipeline.get_params(self, *args, **kwargs).items() return OrderedDict(sorted(params, reverse=True)) for cls in [AlphaParamPipeline, RevParamPipeline]: est2 = cls(make_steps()) _set_random_states(est2, 3) assert (est1.get_params()['sel__estimator__random_state'] == est2.get_params()['sel__estimator__random_state']) assert (est1.get_params()['clf__random_state'] == est2.get_params()['clf__random_state'])
def test_base_not_int_n_estimators(): # Check that instantiating a BaseEnsemble with a string as n_estimators # raises a ValueError demanding n_estimators to be supplied as an integer. string_ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators='3') iris = load_iris() assert_raise_message(ValueError, "n_estimators must be an integer", string_ensemble.fit, iris.data, iris.target) float_ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=3.0) assert_raise_message(ValueError, "n_estimators must be an integer", float_ensemble.fit, iris.data, iris.target)
def test_classification(): # Check classification for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) grid = ParameterGrid({ "max_samples": [0.5, 1.0], "max_features": [1, 2, 4], "bootstrap": [True, False], "bootstrap_features": [True, False] }) for base_estimator in [ None, DummyClassifier(), Perceptron(), DecisionTreeClassifier(), KNeighborsClassifier(), SVC() ]: for params in grid: BaggingClassifier(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
def test_multiclass_roc_no_proba_scorer_errors(scorer_name): # Perceptron has no predict_proba scorer = get_scorer(scorer_name) X, y = make_classification(n_classes=3, n_informative=3, n_samples=20, random_state=0) lr = Perceptron().fit(X, y) msg = "'Perceptron' object has no attribute 'predict_proba'" with pytest.raises(AttributeError, match=msg): scorer(lr, X, y)
def test_base_zero_n_estimators(): # Check that instantiating a BaseEnsemble with n_estimators<=0 raises # a ValueError. ensemble = BaggingClassifier(base_estimator=Perceptron(), n_estimators=0) iris = load_iris() assert_raise_message(ValueError, "n_estimators must be greater than zero, got 0.", ensemble.fit, iris.data, iris.target)
def test_gridsearch_pipeline(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model. X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) kpca = KernelPCA(kernel="rbf", n_components=2) pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron(max_iter=5))]) param_grid = dict(kernel_pca__gamma=2.**np.arange(-2, 2)) grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) grid_search.fit(X, y) assert grid_search.best_score_ == 1
def test_ovo_ties2(): # test that ties can not only be won by the first two labels X = np.array([[1, 2], [2, 1], [-2, 1], [-2, -1]]) y_ref = np.array([2, 0, 1, 2]) # cycle through labels so that each label wins once for i in range(3): y = (y_ref + i) % 3 multi_clf = OneVsOneClassifier(Perceptron(shuffle=False, max_iter=4, tol=None)) ovo_prediction = multi_clf.fit(X, y).predict(X) assert ovo_prediction[0] == i % 3
def test_nested_circles(): # Test the linear separability of the first 2D KPCA transform X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) # 2D nested circles are not linearly separable train_score = Perceptron(max_iter=5).fit(X, y).score(X, y) assert train_score < 0.8 # Project the circles data into the first 2 components of a RBF Kernel # PCA model. # Note that the gamma value is data dependent. If this test breaks # and the gamma value has to be updated, the Kernel PCA example will # have to be updated too. kpca = KernelPCA(kernel="rbf", n_components=2, fit_inverse_transform=True, gamma=2.) X_kpca = kpca.fit_transform(X) # The data is perfectly linearly separable in that space train_score = Perceptron(max_iter=5).fit(X_kpca, y).score(X_kpca, y) assert train_score == 1.0
def test_base_estimator(): # Check base_estimator and its default values. rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = BaggingClassifier(None, n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier) ensemble = BaggingClassifier(Perceptron(), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, Perceptron) # Regression X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(None, n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor) ensemble = BaggingRegressor(SVR(), n_jobs=3, random_state=0).fit(X_train, y_train) assert isinstance(ensemble.base_estimator_, SVR)
def test_ovo_ties(): # Test that ties are broken using the decision function, # not defaulting to the smallest label X = np.array([[1, 2], [2, 1], [-2, 1], [-2, -1]]) y = np.array([2, 0, 1, 2]) multi_clf = OneVsOneClassifier(Perceptron(shuffle=False, max_iter=4, tol=None)) ovo_prediction = multi_clf.fit(X, y).predict(X) ovo_decision = multi_clf.decision_function(X) # Classifiers are in order 0-1, 0-2, 1-2 # Use decision_function to compute the votes and the normalized # sum_of_confidences, which is used to disambiguate when there is a tie in # votes. votes = np.round(ovo_decision) normalized_confidences = ovo_decision - votes # For the first point, there is one vote per class assert_array_equal(votes[0, :], 1) # For the rest, there is no tie and the prediction is the argmax assert_array_equal(np.argmax(votes[1:], axis=1), ovo_prediction[1:]) # For the tie, the prediction is the class with the highest score assert ovo_prediction[0] == normalized_confidences[0].argmax()
def test_undefined_methods(): clf = Perceptron(max_iter=100) for meth in ("predict_proba", "predict_log_proba"): assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
def test_perceptron_accuracy(): for data in (X, X_csr): clf = Perceptron(max_iter=100, tol=None, shuffle=False) clf.fit(data, y) score = clf.score(data, y) assert score > 0.7
def make_steps(): return [('sel', SelectFromModel(Perceptron(random_state=None))), ('clf', Perceptron(random_state=None))]