def test_stacking_classifier_sample_weight_fit_param(): # check sample_weight is passed to all invocations of fit stacker = StackingClassifier( estimators=[("lr", CheckingClassifier(expected_sample_weight=True))], final_estimator=CheckingClassifier(expected_sample_weight=True), ) stacker.fit(X_iris, y_iris, sample_weight=np.ones(X_iris.shape[0]))
def test_checking_classifier_fit_params(iris): # check the error raised when the number of samples is not the one expected X, y = iris clf = CheckingClassifier(expected_fit_params=["sample_weight"]) sample_weight = np.ones(len(X) // 2) with pytest.raises(AssertionError, match="Fit parameter sample_weight"): clf.fit(X, y, sample_weight=sample_weight)
def test_checking_classifier_fit_params(iris): # check the error raised when the number of samples is not the one expected X, y = iris clf = CheckingClassifier(expected_sample_weight=True) sample_weight = np.ones(len(X) // 2) msg = f"sample_weight.shape == ({len(X) // 2},), expected ({len(X)},)!" with pytest.raises(ValueError) as exc: clf.fit(X, y, sample_weight=sample_weight) assert exc.value.args[0] == msg
def test_checking_classifier_methods_to_check(iris, methods_to_check, predict_method): # check that methods_to_check allows to bypass checks X, y = iris clf = CheckingClassifier( check_X=sparse.issparse, methods_to_check=methods_to_check, ) clf.fit(X, y) if predict_method in methods_to_check: with pytest.raises(AssertionError): getattr(clf, predict_method)(X) else: getattr(clf, predict_method)(X)
def test_ecoc_delegate_sparse_base_estimator(): # Non-regression test for # https://github.com/scikit-learn/scikit-learn/issues/17218 X, y = iris.data, iris.target X_sp = sp.csc_matrix(X) # create an estimator that does not support sparse input base_estimator = CheckingClassifier( check_X=check_array, check_X_params={ "ensure_2d": True, "accept_sparse": False }, ) ecoc = OutputCodeClassifier(base_estimator, random_state=0) with pytest.raises(TypeError, match="A sparse matrix was passed"): ecoc.fit(X_sp, y) ecoc.fit(X, y) with pytest.raises(TypeError, match="A sparse matrix was passed"): ecoc.predict(X_sp) # smoke test to check when sparse input should be supported ecoc = OutputCodeClassifier(LinearSVC(random_state=0)) ecoc.fit(X_sp, y).predict(X_sp) assert len(ecoc.estimators_) == 4
def test_calibration_with_sample_weight_base_estimator(sample_weight, data): """Tests that sample_weight is passed to the underlying base estimator. """ X, y = data clf = CheckingClassifier(expected_sample_weight=True) pc_clf = CalibratedClassifierCV(clf) pc_clf.fit(X, y, sample_weight=sample_weight)
def test_gridsearch_feature_extractor(): X = data y = np.ones((X.shape[0],)) # dummy labels pipe = Pipeline([('FE', FeatureExtractor(sfreq=sfreq, selected_funcs=['higuchi_fd'])), ('clf', CheckingClassifier( check_X=lambda arr: arr.shape[1:] == (X.shape[1],)))]) params_grid = {'FE__higuchi_fd__kmax': [5, 10]} gs = GridSearchCV(estimator=pipe, param_grid=params_grid, cv=3) gs.fit(X, y) assert_equal(hasattr(gs, 'cv_results_'), True)
def test_calibration_with_fit_params_inconsistent_length(data): """fit_params having different length than data should raise the correct error message. """ X, y = data fit_params = {"a": y[:5]} clf = CheckingClassifier(expected_fit_params=fit_params) pc_clf = CalibratedClassifierCV(clf) msg = (r"Found input variables with inconsistent numbers of " r"samples: \[" + str(N_SAMPLES) + r", 5\]") with pytest.raises(ValueError, match=msg): pc_clf.fit(X, y, **fit_params)
def test_calibration_with_fit_params(fit_params_type, data): """Tests that fit_params are passed to the underlying base estimator. Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/12384 """ X, y = data fit_params = { "a": _convert_container(y, fit_params_type), "b": _convert_container(y, fit_params_type), } clf = CheckingClassifier(expected_fit_params=["a", "b"]) pc_clf = CalibratedClassifierCV(clf) pc_clf.fit(X, y, **fit_params)
def test_checking_classifier_missing_fit_params(iris): X, y = iris clf = CheckingClassifier(expected_fit_params=["sample_weight"]) with pytest.raises(AssertionError, match="Expected fit parameter"): clf.fit(X, y)
def test_checking_classifier(iris, input_type): # Check that the CheckingClassifier outputs what we expect X, y = iris X = _convert_container(X, input_type) clf = CheckingClassifier() clf.fit(X, y) assert_array_equal(clf.classes_, np.unique(y)) assert len(clf.classes_) == 3 assert clf.n_features_in_ == 4 y_pred = clf.predict(X) assert_array_equal(y_pred, np.zeros(y_pred.size, dtype=int)) assert clf.score(X) == pytest.approx(0) clf.set_params(foo_param=10) assert clf.fit(X, y).score(X) == pytest.approx(1) y_proba = clf.predict_proba(X) assert y_proba.shape == (150, 3) assert_allclose(y_proba[:, 0], 1) assert_allclose(y_proba[:, 1:], 0) y_decision = clf.decision_function(X) assert y_decision.shape == (150, 3) assert_allclose(y_decision[:, 0], 1) assert_allclose(y_decision[:, 1:], 0) # check the shape in case of binary classification first_2_classes = np.logical_or(y == 0, y == 1) X = _safe_indexing(X, first_2_classes) y = _safe_indexing(y, first_2_classes) clf.fit(X, y) y_proba = clf.predict_proba(X) assert y_proba.shape == (100, 2) assert_allclose(y_proba[:, 0], 1) assert_allclose(y_proba[:, 1], 0) y_decision = clf.decision_function(X) assert y_decision.shape == (100,) assert_allclose(y_decision, 0)
def test_check_X_on_predict_fail(iris, pred_func): X, y = iris clf = CheckingClassifier(check_X=_success).fit(X, y) clf.set_params(check_X=_fail) with pytest.raises(AssertionError): getattr(clf, pred_func)(X)
def test_check_X_on_predict_success(iris, pred_func): X, y = iris clf = CheckingClassifier(check_X=_success).fit(X, y) getattr(clf, pred_func)(X)
def test_check_on_fit_fail(iris, kwargs): X, y = iris clf = CheckingClassifier(**kwargs) with pytest.raises(AssertionError): clf.fit(X, y)
def test_check_on_fit_success(iris, kwargs): X, y = iris CheckingClassifier(**kwargs).fit(X, y)
def test_checking_classifier_missing_fit_params(iris): X, y = iris clf = CheckingClassifier(expected_sample_weight=True) err_msg = "Expected sample_weight to be passed" with pytest.raises(AssertionError, match=err_msg): clf.fit(X, y)
def test_checking_classifier_with_params(iris): X, y = iris X_sparse = sparse.csr_matrix(X) def check_X_is_sparse(X): if not sparse.issparse(X): raise ValueError("X is not sparse") return True clf = CheckingClassifier(check_X=check_X_is_sparse) with pytest.raises(ValueError, match="X is not sparse"): clf.fit(X, y) clf.fit(X_sparse, y) def _check_array(X, **params): check_array(X, **params) return True clf = CheckingClassifier(check_X=_check_array, check_X_params={"accept_sparse": False}) clf.fit(X, y) with pytest.raises(TypeError, match="A sparse matrix was passed"): clf.fit(X_sparse, y)
def test_checking_classifier_with_params(iris): X, y = iris X_sparse = sparse.csr_matrix(X) clf = CheckingClassifier(check_X=sparse.issparse) with pytest.raises(AssertionError): clf.fit(X, y) clf.fit(X_sparse, y) clf = CheckingClassifier( check_X=check_array, check_X_params={"accept_sparse": False} ) clf.fit(X, y) with pytest.raises(TypeError, match="A sparse matrix was passed"): clf.fit(X_sparse, y)