def test_decoder_multiclass_classification(): X, y = make_classification(n_samples=200, n_features=125, scale=3.0, n_informative=5, n_classes=4, random_state=42) X, mask = to_niimgs(X, [5, 5, 5]) # check classification with masker object model = Decoder(mask=NiftiMasker()) model.fit(X, y) y_pred = model.predict(X) assert accuracy_score(y, y_pred) > 0.95 # check classification with masker object and dummy classifier model = Decoder(estimator='dummy_classifier', mask=NiftiMasker(), scoring="accuracy") model.fit(X, y) y_pred = model.predict(X) assert model.scoring == "accuracy" # 4-class classification assert accuracy_score(y, y_pred) > 0.2 assert model.score(X, y) == accuracy_score(y, y_pred) # check different screening_percentile value for screening_percentile in [100, 20, None]: model = Decoder(mask=mask, screening_percentile=screening_percentile) model.fit(X, y) y_pred = model.predict(X) assert accuracy_score(y, y_pred) > 0.95 # check FREM with clustering or not for clustering_percentile in [100, 99]: for estimator in ['svc_l2', 'svc_l1']: model = FREMClassifier(estimator=estimator, mask=mask, clustering_percentile=clustering_percentile, screening_percentile=90, cv=5) model.fit(X, y) y_pred = model.predict(X) assert model.scoring == "roc_auc" assert accuracy_score(y, y_pred) > 0.9 # check cross-validation scheme and fit attribute with groups enabled rand_local = np.random.RandomState(42) for cv in [KFold(n_splits=5), LeaveOneGroupOut()]: model = Decoder(estimator='svc', mask=mask, standardize=True, cv=cv) if isinstance(cv, LeaveOneGroupOut): groups = rand_local.binomial(2, 0.3, size=len(y)) else: groups = None model.fit(X, y, groups=groups) assert accuracy_score(y, y_pred) > 0.9
def test_decoder_dummy_classifier(): n_samples = 400 X, y = make_classification(n_samples=n_samples, n_features=125, scale=3.0, n_informative=5, n_classes=2, random_state=42) X, mask = to_niimgs(X, [5, 5, 5]) # We make 80% of y to have value of 1.0 to check whether the stratified # strategy returns a proportion prediction value of 1.0 of roughly 80% proportion = 0.8 y = np.zeros(n_samples) y[:int(proportion * n_samples)] = 1.0 model = Decoder(estimator='dummy_classifier', mask=mask) model.fit(X, y) y_pred = model.predict(X) assert np.sum(y_pred == 1.0) / n_samples - proportion < 0.05 # Set scoring of decoder with a callable accuracy_scorer = get_scorer('accuracy') model = Decoder(estimator='dummy_classifier', mask=mask, scoring=accuracy_scorer) model.fit(X, y) y_pred = model.predict(X) assert model.scoring == accuracy_scorer assert model.score(X, y) == accuracy_score(y, y_pred) # An error should be raise when trying to compute the score whithout having # called fit first. model = Decoder(estimator='dummy_classifier', mask=mask) with pytest.raises( NotFittedError, match="This Decoder instance is not fitted yet."): model.score(X, y) # Decoder object use other strategy for dummy classifier. param = dict(strategy='prior') dummy_classifier.set_params(**param) model = Decoder(estimator=dummy_classifier, mask=mask) model.fit(X, y) y_pred = model.predict(X) assert np.all(y_pred) == 1.0 assert roc_auc_score(y, y_pred) == 0.5 # Same purpose with the above but for most_frequent strategy param = dict(strategy='most_frequent') dummy_classifier.set_params(**param) model = Decoder(estimator=dummy_classifier, mask=mask) model.fit(X, y) y_pred = model.predict(X) assert np.all(y_pred) == 1.0 # Returns model coefficients for dummy estimators as None assert model.coef_ is None # Dummy output are nothing but the attributes of the dummy estimators assert model.dummy_output_ is not None assert model.cv_scores_ is not None # decoder object use other scoring metric for dummy classifier model = Decoder(estimator='dummy_classifier', mask=mask, scoring='roc_auc') model.fit(X, y) assert np.mean(model.cv_scores_[0]) >= 0.45 # Raises a not implemented error with strategy constant param = dict(strategy='constant') dummy_classifier.set_params(**param) model = Decoder(estimator=dummy_classifier, mask=mask) pytest.raises(NotImplementedError, model.fit, X, y) # Raises an error with unknown scoring metrics model = Decoder(estimator=dummy_classifier, mask=mask, scoring="foo") with pytest.raises(ValueError, match="'foo' is not a valid scoring value"): model.fit(X, y) # Default scoring model = Decoder(estimator='dummy_classifier', scoring=None) assert model.scoring is None model.fit(X, y) assert model.scorer_ == get_scorer("accuracy") assert model.score(X, y) > 0.5
def test_decoder_binary_classification(): X, y = make_classification(n_samples=200, n_features=125, scale=3.0, n_informative=5, n_classes=2, random_state=42) X, mask = to_niimgs(X, [5, 5, 5]) # check classification with masker object model = Decoder(mask=NiftiMasker()) model.fit(X, y) y_pred = model.predict(X) assert model.scoring == "roc_auc" assert model.score(X, y) == 1.0 assert accuracy_score(y, y_pred) > 0.95 # decoder object use predict_proba for scoring with logistic model model = Decoder(estimator='logistic_l2', mask=mask) model.fit(X, y) y_pred = model.predict(X) assert accuracy_score(y, y_pred) > 0.95 # decoder object use prior as strategy (default) for dummy classifier model = Decoder(estimator='dummy_classifier', mask=mask) model.fit(X, y) y_pred = model.predict(X) assert model.scoring == "roc_auc" assert model.score(X, y) == 0.5 assert accuracy_score(y, y_pred) == 0.5 # Set scoring of decoder with a callable accuracy_scorer = get_scorer('accuracy') model = Decoder(estimator='dummy_classifier', mask=mask, scoring=accuracy_scorer) model.fit(X, y) y_pred = model.predict(X) assert model.scoring == accuracy_scorer assert model.score(X, y) == accuracy_score(y, y_pred) # An error should be raise when trying to compute # the score whithout having called fit first. model = Decoder(estimator='dummy_classifier', mask=mask) with pytest.raises(NotFittedError, match="This Decoder instance is not fitted yet."): model.score(X, y) # decoder object use other strategy for dummy classifier param = dict(strategy='stratified') dummy_classifier.set_params(**param) model = Decoder(estimator=dummy_classifier, mask=mask) model.fit(X, y) y_pred = model.predict(X) assert accuracy_score(y, y_pred) >= 0.5 # Returns model coefficients for dummy estimators as None assert model.coef_ is None # Dummy output are nothing but the attributes of the dummy estimators assert model.dummy_output_ is not None assert model.cv_scores_ is not None # model attribute n_outputs_ depending on target y ndim assert model.n_outputs_ == 1 # decoder object use other scoring metric for dummy classifier model = Decoder(estimator='dummy_classifier', mask=mask) model.fit(X, y) y_pred = model.predict(X) assert roc_auc_score(y, y_pred) == 0.5 model = Decoder(estimator='dummy_classifier', mask=mask, scoring='roc_auc') model.fit(X, y) assert np.mean(model.cv_scores_[0]) >= 0.5 # Raises a not implemented error with strategy constant param = dict(strategy='constant') dummy_classifier.set_params(**param) model = Decoder(estimator=dummy_classifier, mask=mask) pytest.raises(NotImplementedError, model.fit, X, y) # Raises an error with unknown scoring metrics model = Decoder(estimator=dummy_classifier, mask=mask, scoring="foo") with pytest.raises(ValueError, match="'foo' is not a valid scoring value"): model.fit(X, y) # Default scoring model = Decoder(estimator='dummy_classifier', scoring=None) assert model.scoring is None model.fit(X, y) assert model.scorer_ == get_scorer("accuracy") assert model.score(X, y) == 0.5 # check different screening_percentile value for screening_percentile in [100, 20, None]: model = Decoder(mask=mask, screening_percentile=screening_percentile) model.fit(X, y) y_pred = model.predict(X) assert accuracy_score(y, y_pred) > 0.95 for clustering_percentile in [100, 99]: model = FREMClassifier(estimator='logistic_l2', mask=mask, clustering_percentile=clustering_percentile, screening_percentile=90, cv=5) model.fit(X, y) y_pred = model.predict(X) assert accuracy_score(y, y_pred) > 0.9 # check cross-validation scheme and fit attribute with groups enabled rand_local = np.random.RandomState(42) for cv in [KFold(n_splits=5), LeaveOneGroupOut()]: model = Decoder(estimator='svc', mask=mask, standardize=True, cv=cv) if isinstance(cv, LeaveOneGroupOut): groups = rand_local.binomial(2, 0.3, size=len(y)) else: groups = None model.fit(X, y, groups=groups) assert accuracy_score(y, y_pred) > 0.9