def test_linearmodel(): """Test LinearModel class for computing filters and patterns. """ clf = LinearModel() X = np.random.rand(20, 3) y = np.arange(20) % 2 clf.fit(X, y) assert_equal(clf.filters_.shape, (3, )) assert_equal(clf.patterns_.shape, (3, )) assert_raises(ValueError, clf.fit, np.random.rand(20, 3, 2), y)
def test_linearmodel(): """Test LinearModel class for computing filters and patterns. """ clf = LinearModel() X = np.random.rand(20, 3) y = np.arange(20) % 2 clf.fit(X, y) assert_equal(clf.filters_.shape, (3,)) assert_equal(clf.patterns_.shape, (3,)) assert_raises(ValueError, clf.fit, np.random.rand(20, 3, 2), y)
def test_get_coef_multiclass_full(n_classes, n_channels, n_times): """Test a full example with pattern extraction.""" from sklearn.pipeline import make_pipeline from sklearn.linear_model import LogisticRegression from sklearn.model_selection import StratifiedKFold data = np.zeros((10 * n_classes, n_channels, n_times)) # Make only the first channel informative for ii in range(n_classes): data[ii * 10:(ii + 1) * 10, 0] = ii events = np.zeros((len(data), 3), int) events[:, 0] = np.arange(len(events)) events[:, 2] = data[:, 0, 0] info = create_info(n_channels, 1000., 'eeg') epochs = EpochsArray(data, info, events, tmin=0) clf = make_pipeline( Scaler(epochs.info), Vectorizer(), LinearModel(LogisticRegression(random_state=0, multi_class='ovr')), ) scorer = 'roc_auc_ovr_weighted' time_gen = GeneralizingEstimator(clf, scorer, verbose=True) X = epochs.get_data() y = epochs.events[:, 2] n_splits = 3 cv = StratifiedKFold(n_splits=n_splits) scores = cross_val_multiscore(time_gen, X, y, cv=cv, verbose=True) want = (n_splits, ) if n_times > 1: want += (n_times, n_times) assert scores.shape == want assert_array_less(0.8, scores) clf.fit(X, y) patterns = get_coef(clf, 'patterns_', inverse_transform=True) assert patterns.shape == (n_classes, n_channels, n_times) assert_allclose(patterns[:, 1:], 0., atol=1e-7) # no other channels useful
def test_get_coef_inverse_transform(inverse, Scale, kwargs): """Test get_coef with and without inverse_transform.""" from sklearn.linear_model import Ridge from sklearn.pipeline import make_pipeline lm_regression = LinearModel(Ridge()) X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1) # Check with search_light and combination of preprocessing ending with sl: # slider = SlidingEstimator(make_pipeline(StandardScaler(), lm_regression)) # XXX : line above should work but does not as only last step is # used in get_coef ... slider = SlidingEstimator(make_pipeline(lm_regression)) X = np.transpose([X, -X], [1, 2, 0]) # invert X across 2 time samples clf = make_pipeline(Scale(**kwargs), slider) clf.fit(X, y) patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, X.shape[1:]) # the two time samples get inverted patterns assert_equal(patterns[0, 0], -patterns[0, 1]) for t in [0, 1]: filters_t = get_coef( clf.named_steps['slidingestimator'].estimators_[t], 'filters_', False) if Scale is _Noop: assert_array_equal(filters_t, filters[:, t])
def test_get_coef_multiclass(n_features, n_targets): """Test get_coef on multiclass problems.""" # Check patterns with more than 1 regressor from sklearn.linear_model import LinearRegression, Ridge from sklearn.pipeline import make_pipeline X, Y, A = _make_data(n_samples=30000, n_features=n_features, n_targets=n_targets) lm = LinearModel(LinearRegression()).fit(X, Y) assert_array_equal(lm.filters_.shape, lm.patterns_.shape) if n_targets == 1: want_shape = (n_features, ) else: want_shape = (n_targets, n_features) assert_array_equal(lm.filters_.shape, want_shape) if n_features > 1 and n_targets > 1: assert_array_almost_equal(A, lm.patterns_.T, decimal=2) lm = LinearModel(Ridge(alpha=0)) clf = make_pipeline(lm) clf.fit(X, Y) if n_features > 1 and n_targets > 1: assert_allclose(A, lm.patterns_.T, atol=2e-2) coef = get_coef(clf, 'patterns_', inverse_transform=True) assert_allclose(lm.patterns_, coef, atol=1e-5) # With epochs, scaler, and vectorizer (typical use case) X_epo = X.reshape(X.shape + (1, )) info = create_info(n_features, 1000., 'eeg') lm = LinearModel(Ridge(alpha=1)) clf = make_pipeline( Scaler(info, scalings=dict(eeg=1.)), # XXX adding this step breaks Vectorizer(), lm, ) clf.fit(X_epo, Y) if n_features > 1 and n_targets > 1: assert_allclose(A, lm.patterns_.T, atol=2e-2) coef = get_coef(clf, 'patterns_', inverse_transform=True) lm_patterns_ = lm.patterns_[..., np.newaxis] assert_allclose(lm_patterns_, coef, atol=1e-5) # Check can pass fitting parameters lm.fit(X, Y, sample_weight=np.ones(len(Y)))
def test_linearmodel(): """Test LinearModel class for computing filters and patterns.""" from sklearn.linear_model import LinearRegression np.random.seed(42) clf = LinearModel() n, n_features = 20, 3 X = np.random.rand(n, n_features) y = np.arange(n) % 2 clf.fit(X, y) assert_equal(clf.filters_.shape, (n_features, )) assert_equal(clf.patterns_.shape, (n_features, )) pytest.raises(ValueError, clf.fit, np.random.rand(n, n_features, 99), y) # check multi-target fit n_targets = 5 clf = LinearModel(LinearRegression()) Y = np.random.rand(n, n_targets) clf.fit(X, Y) assert_equal(clf.filters_.shape, (n_targets, n_features)) assert_equal(clf.patterns_.shape, (n_targets, n_features)) pytest.raises(ValueError, clf.fit, X, np.random.rand(n, n_features, 99))
def test_linearmodel(): """Test LinearModel class for computing filters and patterns.""" from sklearn.linear_model import LinearRegression np.random.seed(42) clf = LinearModel() n, n_features = 20, 3 X = np.random.rand(n, n_features) y = np.arange(n) % 2 clf.fit(X, y) assert_equal(clf.filters_.shape, (n_features,)) assert_equal(clf.patterns_.shape, (n_features,)) assert_raises(ValueError, clf.fit, np.random.rand(n, n_features, 99), y) # check multi-target fit n_targets = 5 clf = LinearModel(LinearRegression()) Y = np.random.rand(n, n_targets) clf.fit(X, Y) assert_equal(clf.filters_.shape, (n_targets, n_features)) assert_equal(clf.patterns_.shape, (n_targets, n_features)) assert_raises(ValueError, clf.fit, X, np.random.rand(n, n_features, 99))
def test_get_coef(): """Test getting linear coefficients (filters/patterns) from estimators.""" from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import Ridge, LinearRegression lm = LinearModel() assert (is_classifier(lm)) lm = LinearModel(Ridge()) assert (is_regressor(lm)) # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X X, y, A = _make_data(n_samples=2000, n_features=3, n_targets=1) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert (expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline(Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for simple estimator and pipelines for clf in (lm, make_pipeline(StandardScaler(), lm)): clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): coefs = clf.steps[-1][-1].model.coef_ else: coefs = clf.model.coef_ assert_array_equal(filters, coefs[0]) patterns = get_coef(clf, 'patterns_', False) assert (filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert (filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert (patterns[0] != patterns_inv[0]) # Check with search_light and combination of preprocessing ending with sl: slider = SlidingEstimator(make_pipeline(StandardScaler(), lm)) X = np.transpose([X, -X], [1, 2, 0]) # invert X across 2 time samples clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider) for clf in clfs: clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, X.shape[1:]) # the two time samples get inverted patterns assert_equal(patterns[0, 0], -patterns[0, 1]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t]) # Check patterns with more than 1 regressor for n_features in [1, 5]: for n_targets in [1, 3]: X, Y, A = _make_data(n_samples=5000, n_features=5, n_targets=3) lm = LinearModel(LinearRegression()).fit(X, Y) assert_array_equal(lm.filters_.shape, lm.patterns_.shape) assert_array_equal(lm.filters_.shape, [3, 5]) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) lm = LinearModel(Ridge(alpha=1)).fit(X, Y) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) # Check can pass fitting parameters lm.fit(X, Y, sample_weight=np.ones(len(Y)))
def test_get_coef(): """Test getting linear coefficients (filters/patterns) from estimators.""" from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn import svm from sklearn.linear_model import Ridge from sklearn.model_selection import GridSearchCV lm_classification = LinearModel() assert (is_classifier(lm_classification)) lm_regression = LinearModel(Ridge()) assert (is_regressor(lm_regression)) parameters = {'kernel': ['linear'], 'C': [1, 10]} lm_gs_classification = LinearModel( GridSearchCV(svm.SVC(), parameters, cv=2, refit=True, n_jobs=1)) assert (is_classifier(lm_gs_classification)) lm_gs_regression = LinearModel( GridSearchCV(svm.SVR(), parameters, cv=2, refit=True, n_jobs=1)) assert (is_regressor(lm_gs_regression)) # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert (expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline(Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for classification/regression estimators and pipelines rng = np.random.RandomState(0) for clf in (lm_regression, lm_gs_classification, make_pipeline(StandardScaler(), lm_classification), make_pipeline(StandardScaler(), lm_gs_regression)): # generate some categorical/continuous data # according to the type of estimator. if is_classifier(clf): n, n_features = 1000, 3 X = rng.rand(n, n_features) y = np.arange(n) % 2 else: X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1) y = np.ravel(y) clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): if hasattr(clf.steps[-1][-1].model, 'best_estimator_'): # Linear Model with GridSearchCV coefs = clf.steps[-1][-1].model.best_estimator_.coef_ else: # Standard Linear Model coefs = clf.steps[-1][-1].model.coef_ else: if hasattr(clf.model, 'best_estimator_'): # Linear Model with GridSearchCV coefs = clf.model.best_estimator_.coef_ else: # Standard Linear Model coefs = clf.model.coef_ if coefs.ndim == 2 and coefs.shape[0] == 1: coefs = coefs[0] assert_array_equal(filters, coefs) patterns = get_coef(clf, 'patterns_', False) assert (filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert (filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert (patterns[0] != patterns_inv[0])
def test_linearmodel(): """Test LinearModel class for computing filters and patterns.""" # check categorical target fit in standard linear model from sklearn.linear_model import LinearRegression rng = np.random.RandomState(0) clf = LinearModel() n, n_features = 20, 3 X = rng.rand(n, n_features) y = np.arange(n) % 2 clf.fit(X, y) assert_equal(clf.filters_.shape, (n_features, )) assert_equal(clf.patterns_.shape, (n_features, )) with pytest.raises(ValueError): wrong_X = rng.rand(n, n_features, 99) clf.fit(wrong_X, y) # check categorical target fit in standard linear model with GridSearchCV from sklearn import svm from sklearn.model_selection import GridSearchCV parameters = {'kernel': ['linear'], 'C': [1, 10]} clf = LinearModel( GridSearchCV(svm.SVC(), parameters, cv=2, refit=True, n_jobs=1)) clf.fit(X, y) assert_equal(clf.filters_.shape, (n_features, )) assert_equal(clf.patterns_.shape, (n_features, )) with pytest.raises(ValueError): wrong_X = rng.rand(n, n_features, 99) clf.fit(wrong_X, y) # check continuous target fit in standard linear model with GridSearchCV n_targets = 1 Y = rng.rand(n, n_targets) clf = LinearModel( GridSearchCV(svm.SVR(), parameters, cv=2, refit=True, n_jobs=1)) clf.fit(X, y) assert_equal(clf.filters_.shape, (n_features, )) assert_equal(clf.patterns_.shape, (n_features, )) with pytest.raises(ValueError): wrong_y = rng.rand(n, n_features, 99) clf.fit(X, wrong_y) # check multi-target fit in standard linear model n_targets = 5 Y = rng.rand(n, n_targets) clf = LinearModel(LinearRegression()) clf.fit(X, Y) assert_equal(clf.filters_.shape, (n_targets, n_features)) assert_equal(clf.patterns_.shape, (n_targets, n_features)) with pytest.raises(ValueError): wrong_y = rng.rand(n, n_features, 99) clf.fit(X, wrong_y)
def test_get_coef(): """Test getting linear coefficients (filters/patterns) from estimators.""" from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import Ridge, LinearRegression lm = LinearModel() assert_true(is_classifier(lm)) lm = LinearModel(Ridge()) assert_true(is_regressor(lm)) # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X X, y, A = _make_data(n_samples=2000, n_features=3, n_targets=1) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert_true(expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline( Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for simple estimator and pipelines for clf in (lm, make_pipeline(StandardScaler(), lm)): clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): coefs = clf.steps[-1][-1].model.coef_ else: coefs = clf.model.coef_ assert_array_equal(filters, coefs[0]) patterns = get_coef(clf, 'patterns_', False) assert_true(filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert_true(filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert_true(patterns[0] != patterns_inv[0]) # Check with search_light and combination of preprocessing ending with sl: slider = SlidingEstimator(make_pipeline(StandardScaler(), lm)) X = np.transpose([X, -X], [1, 2, 0]) # invert X across 2 time samples clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider) for clf in clfs: clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, X.shape[1:]) # the two time samples get inverted patterns assert_equal(patterns[0, 0], -patterns[0, 1]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t]) # Check patterns with more than 1 regressor for n_features in [1, 5]: for n_targets in [1, 3]: X, Y, A = _make_data(n_samples=5000, n_features=5, n_targets=3) lm = LinearModel(LinearRegression()).fit(X, Y) assert_array_equal(lm.filters_.shape, lm.patterns_.shape) assert_array_equal(lm.filters_.shape, [3, 5]) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) lm = LinearModel(Ridge(alpha=1)).fit(X, Y) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) # Check can pass fitting parameters lm.fit(X, Y, sample_weight=np.ones(len(Y)))
def test_get_coef(): """Test getting linear coefficients (filters/patterns) from estimators.""" from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn import svm from sklearn.linear_model import Ridge, LinearRegression from sklearn.model_selection import GridSearchCV lm_classification = LinearModel() assert (is_classifier(lm_classification)) lm_regression = LinearModel(Ridge()) assert (is_regressor(lm_regression)) parameters = {'kernel': ['linear'], 'C': [1, 10]} lm_gs_classification = LinearModel( GridSearchCV(svm.SVC(), parameters, cv=2, refit=True, iid=False, n_jobs=1)) assert (is_classifier(lm_gs_classification)) lm_gs_regression = LinearModel( GridSearchCV(svm.SVR(), parameters, cv=2, refit=True, iid=False, n_jobs=1)) assert (is_regressor(lm_gs_regression)) # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert (expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline(Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for classification/regression estimators and pipelines rng = np.random.RandomState(0) for clf in (lm_regression, lm_gs_classification, make_pipeline(StandardScaler(), lm_classification), make_pipeline(StandardScaler(), lm_gs_regression)): # generate some categorical/continuous data # according to the type of estimator. if is_classifier(clf): n, n_features = 1000, 3 X = rng.rand(n, n_features) y = np.arange(n) % 2 else: X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1) y = np.ravel(y) clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): if hasattr(clf.steps[-1][-1].model, 'best_estimator_'): # Linear Model with GridSearchCV coefs = clf.steps[-1][-1].model.best_estimator_.coef_ else: # Standard Linear Model coefs = clf.steps[-1][-1].model.coef_ else: if hasattr(clf.model, 'best_estimator_'): # Linear Model with GridSearchCV coefs = clf.model.best_estimator_.coef_ else: # Standard Linear Model coefs = clf.model.coef_ if coefs.ndim == 2 and coefs.shape[0] == 1: coefs = coefs[0] assert_array_equal(filters, coefs) patterns = get_coef(clf, 'patterns_', False) assert (filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert (filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert (patterns[0] != patterns_inv[0]) # Check with search_light and combination of preprocessing ending with sl: slider = SlidingEstimator(make_pipeline(StandardScaler(), lm_regression)) X = np.transpose([X, -X], [1, 2, 0]) # invert X across 2 time samples clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider) for clf in clfs: clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, X.shape[1:]) # the two time samples get inverted patterns assert_equal(patterns[0, 0], -patterns[0, 1]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t]) # Check patterns with more than 1 regressor for n_features in [1, 5]: for n_targets in [1, 3]: X, Y, A = _make_data(n_samples=3000, n_features=5, n_targets=3) lm = LinearModel(LinearRegression()).fit(X, Y) assert_array_equal(lm.filters_.shape, lm.patterns_.shape) assert_array_equal(lm.filters_.shape, [3, 5]) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) lm = LinearModel(Ridge(alpha=1)).fit(X, Y) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) # Check can pass fitting parameters lm.fit(X, Y, sample_weight=np.ones(len(Y)))
def test_get_coef(): """Test the retrieval of linear coefficients (filters and patterns) from simple and pipeline estimators. """ from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X np.random.RandomState(0) n_samples, n_features = 20, 3 y = (np.arange(n_samples) % 2) * 2 - 1 w = np.random.randn(n_features, 1) X = w.dot(y[np.newaxis, :]).T + np.random.randn(n_samples, n_features) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert_true(expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline(Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for simple estimator and pipelines for clf in (LinearModel(), make_pipeline(StandardScaler(), LinearModel())): clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): coefs = clf.steps[-1][-1].model.coef_ else: coefs = clf.model.coef_ assert_array_equal(filters, coefs[0]) patterns = get_coef(clf, 'patterns_', False) assert_true(filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert_true(filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert_true(patterns[0] != patterns_inv[0]) # Check patterns values clf = make_pipeline(StandardScaler(), LinearModel(LinearRegression())) clf.fit(X, y) patterns = get_coef(clf, 'patterns_', True) mean, std = X.mean(0), X.std(0) X = (X - mean) / std coef = np.linalg.pinv(X.T.dot(X)).dot(X.T.dot(y)) patterns_manual = np.cov(X.T).dot(coef) assert_array_almost_equal(patterns, patterns_manual * std + mean) # Check with search_light and combination of preprocessing ending with sl: n_samples, n_features, n_times = 20, 3, 5 y = np.arange(n_samples) % 2 X = np.random.rand(n_samples, n_features, n_times) slider = SlidingEstimator(make_pipeline(StandardScaler(), LinearModel())) clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider) for clf in clfs: clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, [n_features, n_times]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t])