def test_cross_val_predict(): """Test cross_val_predict with predict_proba.""" from sklearn.linear_model import LinearRegression from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.base import BaseEstimator, clone from sklearn.model_selection import cross_val_predict rng = np.random.RandomState(42) X = rng.randn(10, 1, 3) y = rng.randint(0, 2, 10) estimator = SlidingEstimator(LinearRegression()) cross_val_predict(estimator, X, y, cv=2) class Classifier(BaseEstimator): """Moch class that does not have classes_ attribute.""" def __init__(self): self.base_estimator = LinearDiscriminantAnalysis() def fit(self, X, y): self.estimator_ = clone(self.base_estimator).fit(X, y) return self def predict_proba(self, X): return self.estimator_.predict_proba(X) with pytest.raises(AttributeError, match="classes_ attribute"): estimator = SlidingEstimator(Classifier()) cross_val_predict(estimator, X, y, method='predict_proba', cv=2) estimator = SlidingEstimator(LinearDiscriminantAnalysis()) cross_val_predict(estimator, X, y, method='predict_proba', cv=2)
def test_get_coef_inverse_transform(inverse, Scale, kwargs): """Test get_coef with and without inverse_transform.""" from sklearn.linear_model import Ridge from sklearn.pipeline import make_pipeline lm_regression = LinearModel(Ridge()) X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1) # Check with search_light and combination of preprocessing ending with sl: # slider = SlidingEstimator(make_pipeline(StandardScaler(), lm_regression)) # XXX : line above should work but does not as only last step is # used in get_coef ... slider = SlidingEstimator(make_pipeline(lm_regression)) X = np.transpose([X, -X], [1, 2, 0]) # invert X across 2 time samples clf = make_pipeline(Scale(**kwargs), slider) clf.fit(X, y) patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, X.shape[1:]) # the two time samples get inverted patterns assert_equal(patterns[0, 0], -patterns[0, 1]) for t in [0, 1]: filters_t = get_coef( clf.named_steps['slidingestimator'].estimators_[t], 'filters_', False) if Scale is _Noop: assert_array_equal(filters_t, filters[:, t])
def test_cross_val_multiscore(): """Test cross_val_multiscore for computing scores on decoding over time. """ from sklearn.model_selection import KFold, cross_val_score from sklearn.linear_model import LogisticRegression # compare to cross-val-score X = np.random.rand(20, 3) y = np.arange(20) % 2 clf = LogisticRegression() cv = KFold(2, random_state=0) assert_array_equal(cross_val_score(clf, X, y, cv=cv), cross_val_multiscore(clf, X, y, cv=cv)) # Test with search light X = np.random.rand(20, 4, 3) y = np.arange(20) % 2 clf = SlidingEstimator(LogisticRegression(), scoring='accuracy') scores_acc = cross_val_multiscore(clf, X, y, cv=cv) assert_array_equal(np.shape(scores_acc), [2, 3]) # check values scores_acc_manual = list() for train, test in cv.split(X, y): clf.fit(X[train], y[train]) scores_acc_manual.append(clf.score(X[test], y[test])) assert_array_equal(scores_acc, scores_acc_manual) # check scoring metric # raise an error if scoring is defined at cross-val-score level and # search light, because search light does not return a 1-dimensional # prediction. assert_raises(ValueError, cross_val_multiscore, clf, X, y, cv=cv, scoring='roc_auc') clf = SlidingEstimator(LogisticRegression(), scoring='roc_auc') scores_auc = cross_val_multiscore(clf, X, y, cv=cv, n_jobs=1) scores_auc_manual = list() for train, test in cv.split(X, y): clf.fit(X[train], y[train]) scores_auc_manual.append(clf.score(X[test], y[test])) assert_array_equal(scores_auc, scores_auc_manual)
def test_get_coef(): """Test getting linear coefficients (filters/patterns) from estimators.""" from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import Ridge, LinearRegression lm = LinearModel() assert (is_classifier(lm)) lm = LinearModel(Ridge()) assert (is_regressor(lm)) # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X X, y, A = _make_data(n_samples=2000, n_features=3, n_targets=1) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert (expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline(Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for simple estimator and pipelines for clf in (lm, make_pipeline(StandardScaler(), lm)): clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): coefs = clf.steps[-1][-1].model.coef_ else: coefs = clf.model.coef_ assert_array_equal(filters, coefs[0]) patterns = get_coef(clf, 'patterns_', False) assert (filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert (filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert (patterns[0] != patterns_inv[0]) # Check with search_light and combination of preprocessing ending with sl: slider = SlidingEstimator(make_pipeline(StandardScaler(), lm)) X = np.transpose([X, -X], [1, 2, 0]) # invert X across 2 time samples clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider) for clf in clfs: clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, X.shape[1:]) # the two time samples get inverted patterns assert_equal(patterns[0, 0], -patterns[0, 1]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t]) # Check patterns with more than 1 regressor for n_features in [1, 5]: for n_targets in [1, 3]: X, Y, A = _make_data(n_samples=5000, n_features=5, n_targets=3) lm = LinearModel(LinearRegression()).fit(X, Y) assert_array_equal(lm.filters_.shape, lm.patterns_.shape) assert_array_equal(lm.filters_.shape, [3, 5]) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) lm = LinearModel(Ridge(alpha=1)).fit(X, Y) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) # Check can pass fitting parameters lm.fit(X, Y, sample_weight=np.ones(len(Y)))
def test_cross_val_multiscore(): """Test cross_val_multiscore for computing scores on decoding over time.""" from sklearn.model_selection import KFold, StratifiedKFold, cross_val_score from sklearn.linear_model import LogisticRegression, LinearRegression if check_version('sklearn', '0.20'): logreg = LogisticRegression(solver='liblinear', random_state=0) else: logreg = LogisticRegression(random_state=0) # compare to cross-val-score X = np.random.rand(20, 3) y = np.arange(20) % 2 cv = KFold(2, random_state=0, shuffle=True) clf = logreg assert_array_equal(cross_val_score(clf, X, y, cv=cv), cross_val_multiscore(clf, X, y, cv=cv)) # Test with search light X = np.random.rand(20, 4, 3) y = np.arange(20) % 2 clf = SlidingEstimator(logreg, scoring='accuracy') scores_acc = cross_val_multiscore(clf, X, y, cv=cv) assert_array_equal(np.shape(scores_acc), [2, 3]) # check values scores_acc_manual = list() for train, test in cv.split(X, y): clf.fit(X[train], y[train]) scores_acc_manual.append(clf.score(X[test], y[test])) assert_array_equal(scores_acc, scores_acc_manual) # check scoring metric # raise an error if scoring is defined at cross-val-score level and # search light, because search light does not return a 1-dimensional # prediction. pytest.raises(ValueError, cross_val_multiscore, clf, X, y, cv=cv, scoring='roc_auc') clf = SlidingEstimator(logreg, scoring='roc_auc') scores_auc = cross_val_multiscore(clf, X, y, cv=cv, n_jobs=1) scores_auc_manual = list() for train, test in cv.split(X, y): clf.fit(X[train], y[train]) scores_auc_manual.append(clf.score(X[test], y[test])) assert_array_equal(scores_auc, scores_auc_manual) # indirectly test that cross_val_multiscore rightly detects the type of # estimator and generates a StratifiedKFold for classiers and a KFold # otherwise X = np.random.randn(1000, 3) y = np.ones(1000, dtype=int) y[::2] = 0 clf = logreg reg = LinearRegression() for cross_val in (cross_val_score, cross_val_multiscore): manual = cross_val(clf, X, y, cv=StratifiedKFold(2)) auto = cross_val(clf, X, y, cv=2) assert_array_equal(manual, auto) manual = cross_val(reg, X, y, cv=KFold(2)) auto = cross_val(reg, X, y, cv=2) assert_array_equal(manual, auto)
def test_search_light(): """Test SlidingEstimator.""" from sklearn.linear_model import Ridge, LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.metrics import roc_auc_score, make_scorer from sklearn.ensemble import BaggingClassifier from sklearn.base import is_classifier X, y = make_data() n_epochs, _, n_time = X.shape # init pytest.raises(ValueError, SlidingEstimator, 'foo') sl = SlidingEstimator(Ridge()) assert (not is_classifier(sl)) sl = SlidingEstimator(LogisticRegression()) assert (is_classifier(sl)) # fit assert_equal(sl.__repr__()[:18], '<SlidingEstimator(') sl.fit(X, y) assert_equal(sl.__repr__()[-28:], ', fitted with 10 estimators>') pytest.raises(ValueError, sl.fit, X[1:], y) pytest.raises(ValueError, sl.fit, X[:, :, 0], y) sl.fit(X, y, sample_weight=np.ones_like(y)) # transforms pytest.raises(ValueError, sl.predict, X[:, :, :2]) y_pred = sl.predict(X) assert (y_pred.dtype == int) assert_array_equal(y_pred.shape, [n_epochs, n_time]) y_proba = sl.predict_proba(X) assert (y_proba.dtype == float) assert_array_equal(y_proba.shape, [n_epochs, n_time, 2]) # score score = sl.score(X, y) assert_array_equal(score.shape, [n_time]) assert (np.sum(np.abs(score)) != 0) assert (score.dtype == float) sl = SlidingEstimator(LogisticRegression()) assert_equal(sl.scoring, None) # Scoring method for scoring in ['foo', 999]: sl = SlidingEstimator(LogisticRegression(), scoring=scoring) sl.fit(X, y) pytest.raises((ValueError, TypeError), sl.score, X, y) # Check sklearn's roc_auc fix: scikit-learn/scikit-learn#6874 # -- 3 class problem sl = SlidingEstimator(LogisticRegression(random_state=0), scoring='roc_auc') y = np.arange(len(X)) % 3 sl.fit(X, y) pytest.raises(ValueError, sl.score, X, y) # -- 2 class problem not in [0, 1] y = np.arange(len(X)) % 2 + 1 sl.fit(X, y) score = sl.score(X, y) assert_array_equal(score, [ roc_auc_score(y - 1, _y_pred - 1) for _y_pred in sl.decision_function(X).T ]) y = np.arange(len(X)) % 2 # Cannot pass a metric as a scoring parameter sl1 = SlidingEstimator(LogisticRegression(), scoring=roc_auc_score) sl1.fit(X, y) pytest.raises(ValueError, sl1.score, X, y) # Now use string as scoring sl1 = SlidingEstimator(LogisticRegression(), scoring='roc_auc') sl1.fit(X, y) rng = np.random.RandomState(0) X = rng.randn(*X.shape) # randomize X to avoid AUCs in [0, 1] score_sl = sl1.score(X, y) assert_array_equal(score_sl.shape, [n_time]) assert (score_sl.dtype == float) # Check that scoring was applied adequately scoring = make_scorer(roc_auc_score, needs_threshold=True) score_manual = [ scoring(est, x, y) for est, x in zip(sl1.estimators_, X.transpose(2, 0, 1)) ] assert_array_equal(score_manual, score_sl) # n_jobs sl = SlidingEstimator(LogisticRegression(random_state=0), n_jobs=1, scoring='roc_auc') score_1job = sl.fit(X, y).score(X, y) sl.n_jobs = 2 score_njobs = sl.fit(X, y).score(X, y) assert_array_equal(score_1job, score_njobs) sl.predict(X) # n_jobs > n_estimators sl.fit(X[..., [0]], y) sl.predict(X[..., [0]]) # pipeline class _LogRegTransformer(LogisticRegression): # XXX needs transformer in pipeline to get first proba only def transform(self, X): return super(_LogRegTransformer, self).predict_proba(X)[..., 1] pipe = make_pipeline(SlidingEstimator(_LogRegTransformer()), LogisticRegression()) pipe.fit(X, y) pipe.predict(X) # n-dimensional feature space X = np.random.rand(10, 3, 4, 2) y = np.arange(10) % 2 y_preds = list() for n_jobs in [1, 2]: pipe = SlidingEstimator(make_pipeline(Vectorizer(), LogisticRegression()), n_jobs=n_jobs) y_preds.append(pipe.fit(X, y).predict(X)) features_shape = pipe.estimators_[0].steps[0][1].features_shape_ assert_array_equal(features_shape, [3, 4]) assert_array_equal(y_preds[0], y_preds[1]) # Bagging classifiers X = np.random.rand(10, 3, 4) for n_jobs in (1, 2): pipe = SlidingEstimator(BaggingClassifier(None, 2), n_jobs=n_jobs) pipe.fit(X, y) pipe.score(X, y) assert (isinstance(pipe.estimators_[0], BaggingClassifier))
def test_search_light(): """Test SlidingEstimator.""" from sklearn.linear_model import Ridge, LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.metrics import roc_auc_score, make_scorer with pytest.warns(None): # NumPy module import from sklearn.ensemble import BaggingClassifier from sklearn.base import is_classifier logreg = LogisticRegression(solver='liblinear', multi_class='ovr', random_state=0) X, y = make_data() n_epochs, _, n_time = X.shape # init pytest.raises(ValueError, SlidingEstimator, 'foo') sl = SlidingEstimator(Ridge()) assert (not is_classifier(sl)) sl = SlidingEstimator(LogisticRegression(solver='liblinear')) assert (is_classifier(sl)) # fit assert_equal(sl.__repr__()[:18], '<SlidingEstimator(') sl.fit(X, y) assert_equal(sl.__repr__()[-28:], ', fitted with 10 estimators>') pytest.raises(ValueError, sl.fit, X[1:], y) pytest.raises(ValueError, sl.fit, X[:, :, 0], y) sl.fit(X, y, sample_weight=np.ones_like(y)) # transforms pytest.raises(ValueError, sl.predict, X[:, :, :2]) y_pred = sl.predict(X) assert (y_pred.dtype == int) assert_array_equal(y_pred.shape, [n_epochs, n_time]) y_proba = sl.predict_proba(X) assert (y_proba.dtype == float) assert_array_equal(y_proba.shape, [n_epochs, n_time, 2]) # score score = sl.score(X, y) assert_array_equal(score.shape, [n_time]) assert (np.sum(np.abs(score)) != 0) assert (score.dtype == float) sl = SlidingEstimator(logreg) assert_equal(sl.scoring, None) # Scoring method for scoring in ['foo', 999]: sl = SlidingEstimator(logreg, scoring=scoring) sl.fit(X, y) pytest.raises((ValueError, TypeError), sl.score, X, y) # Check sklearn's roc_auc fix: scikit-learn/scikit-learn#6874 # -- 3 class problem sl = SlidingEstimator(logreg, scoring='roc_auc') y = np.arange(len(X)) % 3 sl.fit(X, y) pytest.raises(ValueError, sl.score, X, y) # -- 2 class problem not in [0, 1] y = np.arange(len(X)) % 2 + 1 sl.fit(X, y) score = sl.score(X, y) assert_array_equal(score, [roc_auc_score(y - 1, _y_pred - 1) for _y_pred in sl.decision_function(X).T]) y = np.arange(len(X)) % 2 # Cannot pass a metric as a scoring parameter sl1 = SlidingEstimator(logreg, scoring=roc_auc_score) sl1.fit(X, y) pytest.raises(ValueError, sl1.score, X, y) # Now use string as scoring sl1 = SlidingEstimator(logreg, scoring='roc_auc') sl1.fit(X, y) rng = np.random.RandomState(0) X = rng.randn(*X.shape) # randomize X to avoid AUCs in [0, 1] score_sl = sl1.score(X, y) assert_array_equal(score_sl.shape, [n_time]) assert (score_sl.dtype == float) # Check that scoring was applied adequately scoring = make_scorer(roc_auc_score, needs_threshold=True) score_manual = [scoring(est, x, y) for est, x in zip( sl1.estimators_, X.transpose(2, 0, 1))] assert_array_equal(score_manual, score_sl) # n_jobs sl = SlidingEstimator(logreg, n_jobs=1, scoring='roc_auc') score_1job = sl.fit(X, y).score(X, y) sl.n_jobs = 2 score_njobs = sl.fit(X, y).score(X, y) assert_array_equal(score_1job, score_njobs) sl.predict(X) # n_jobs > n_estimators sl.fit(X[..., [0]], y) sl.predict(X[..., [0]]) # pipeline class _LogRegTransformer(LogisticRegression): # XXX needs transformer in pipeline to get first proba only def __init__(self): super(_LogRegTransformer, self).__init__() self.multi_class = 'ovr' self.random_state = 0 self.solver = 'liblinear' def transform(self, X): return super(_LogRegTransformer, self).predict_proba(X)[..., 1] pipe = make_pipeline(SlidingEstimator(_LogRegTransformer()), logreg) pipe.fit(X, y) pipe.predict(X) # n-dimensional feature space X = np.random.rand(10, 3, 4, 2) y = np.arange(10) % 2 y_preds = list() for n_jobs in [1, 2]: pipe = SlidingEstimator( make_pipeline(Vectorizer(), logreg), n_jobs=n_jobs) y_preds.append(pipe.fit(X, y).predict(X)) features_shape = pipe.estimators_[0].steps[0][1].features_shape_ assert_array_equal(features_shape, [3, 4]) assert_array_equal(y_preds[0], y_preds[1]) # Bagging classifiers X = np.random.rand(10, 3, 4) for n_jobs in (1, 2): pipe = SlidingEstimator(BaggingClassifier(None, 2), n_jobs=n_jobs) pipe.fit(X, y) pipe.score(X, y) assert (isinstance(pipe.estimators_[0], BaggingClassifier))
def test_get_coef(): """Test getting linear coefficients (filters/patterns) from estimators.""" from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn import svm from sklearn.linear_model import Ridge, LinearRegression from sklearn.model_selection import GridSearchCV lm_classification = LinearModel() assert (is_classifier(lm_classification)) lm_regression = LinearModel(Ridge()) assert (is_regressor(lm_regression)) parameters = {'kernel': ['linear'], 'C': [1, 10]} lm_gs_classification = LinearModel( GridSearchCV(svm.SVC(), parameters, cv=2, refit=True, iid=False, n_jobs=1)) assert (is_classifier(lm_gs_classification)) lm_gs_regression = LinearModel( GridSearchCV(svm.SVR(), parameters, cv=2, refit=True, iid=False, n_jobs=1)) assert (is_regressor(lm_gs_regression)) # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert (expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline(Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for classification/regression estimators and pipelines rng = np.random.RandomState(0) for clf in (lm_regression, lm_gs_classification, make_pipeline(StandardScaler(), lm_classification), make_pipeline(StandardScaler(), lm_gs_regression)): # generate some categorical/continuous data # according to the type of estimator. if is_classifier(clf): n, n_features = 1000, 3 X = rng.rand(n, n_features) y = np.arange(n) % 2 else: X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1) y = np.ravel(y) clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): if hasattr(clf.steps[-1][-1].model, 'best_estimator_'): # Linear Model with GridSearchCV coefs = clf.steps[-1][-1].model.best_estimator_.coef_ else: # Standard Linear Model coefs = clf.steps[-1][-1].model.coef_ else: if hasattr(clf.model, 'best_estimator_'): # Linear Model with GridSearchCV coefs = clf.model.best_estimator_.coef_ else: # Standard Linear Model coefs = clf.model.coef_ if coefs.ndim == 2 and coefs.shape[0] == 1: coefs = coefs[0] assert_array_equal(filters, coefs) patterns = get_coef(clf, 'patterns_', False) assert (filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert (filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert (patterns[0] != patterns_inv[0]) # Check with search_light and combination of preprocessing ending with sl: slider = SlidingEstimator(make_pipeline(StandardScaler(), lm_regression)) X = np.transpose([X, -X], [1, 2, 0]) # invert X across 2 time samples clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider) for clf in clfs: clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, X.shape[1:]) # the two time samples get inverted patterns assert_equal(patterns[0, 0], -patterns[0, 1]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t]) # Check patterns with more than 1 regressor for n_features in [1, 5]: for n_targets in [1, 3]: X, Y, A = _make_data(n_samples=3000, n_features=5, n_targets=3) lm = LinearModel(LinearRegression()).fit(X, Y) assert_array_equal(lm.filters_.shape, lm.patterns_.shape) assert_array_equal(lm.filters_.shape, [3, 5]) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) lm = LinearModel(Ridge(alpha=1)).fit(X, Y) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) # Check can pass fitting parameters lm.fit(X, Y, sample_weight=np.ones(len(Y)))
def test_get_coef(): """Test the retrieval of linear coefficients (filters and patterns) from simple and pipeline estimators. """ from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X np.random.RandomState(0) n_samples, n_features = 20, 3 y = (np.arange(n_samples) % 2) * 2 - 1 w = np.random.randn(n_features, 1) X = w.dot(y[np.newaxis, :]).T + np.random.randn(n_samples, n_features) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert_true(expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline(Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for simple estimator and pipelines for clf in (LinearModel(), make_pipeline(StandardScaler(), LinearModel())): clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): coefs = clf.steps[-1][-1].model.coef_ else: coefs = clf.model.coef_ assert_array_equal(filters, coefs[0]) patterns = get_coef(clf, 'patterns_', False) assert_true(filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert_true(filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert_true(patterns[0] != patterns_inv[0]) # Check patterns values clf = make_pipeline(StandardScaler(), LinearModel(LinearRegression())) clf.fit(X, y) patterns = get_coef(clf, 'patterns_', True) mean, std = X.mean(0), X.std(0) X = (X - mean) / std coef = np.linalg.pinv(X.T.dot(X)).dot(X.T.dot(y)) patterns_manual = np.cov(X.T).dot(coef) assert_array_almost_equal(patterns, patterns_manual * std + mean) # Check with search_light and combination of preprocessing ending with sl: n_samples, n_features, n_times = 20, 3, 5 y = np.arange(n_samples) % 2 X = np.random.rand(n_samples, n_features, n_times) slider = SlidingEstimator(make_pipeline(StandardScaler(), LinearModel())) clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider) for clf in clfs: clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, [n_features, n_times]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t])
def test_get_coef(): """Test the retrieval of linear coefficients (filters and patterns) from simple and pipeline estimators. """ from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X np.random.RandomState(0) n_samples, n_features = 20, 3 y = (np.arange(n_samples) % 2) * 2 - 1 w = np.random.randn(n_features, 1) X = w.dot(y[np.newaxis, :]).T + np.random.randn(n_samples, n_features) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert_true(expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline( Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for simple estimator and pipelines for clf in (LinearModel(), make_pipeline(StandardScaler(), LinearModel())): clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): coefs = clf.steps[-1][-1].model.coef_ else: coefs = clf.model.coef_ assert_array_equal(filters, coefs[0]) patterns = get_coef(clf, 'patterns_', False) assert_true(filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert_true(filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert_true(patterns[0] != patterns_inv[0]) # Check patterns values clf = make_pipeline(StandardScaler(), LinearModel(LinearRegression())) clf.fit(X, y) patterns = get_coef(clf, 'patterns_', True) mean, std = X.mean(0), X.std(0) X = (X - mean) / std coef = np.linalg.pinv(X.T.dot(X)).dot(X.T.dot(y)) patterns_manual = np.cov(X.T).dot(coef) assert_array_almost_equal(patterns, patterns_manual * std + mean) # Check with search_light: n_samples, n_features, n_times = 20, 3, 5 y = np.arange(n_samples) % 2 X = np.random.rand(n_samples, n_features, n_times) clf = SlidingEstimator(make_pipeline(StandardScaler(), LinearModel())) clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, [n_features, n_times]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t])