def test_rank_deficiency(): """Test signals that are rank deficient.""" # See GH#4253 from sklearn.linear_model import Ridge N = 256 fs = 1. tmin, tmax = -50, 100 reg = 0.1 rng = np.random.RandomState(0) eeg = rng.randn(N, 1) eeg *= 100 eeg = np.fft.rfft(eeg, axis=0) eeg[N // 4:] = 0 # rank-deficient lowpass eeg = np.fft.irfft(eeg, axis=0) win = np.hanning(N // 8) win /= win.mean() y = np.apply_along_axis(np.convolve, 0, eeg, win, mode='same') y += rng.randn(*y.shape) * 100 for est in (Ridge(reg), reg): rf = ReceptiveField(tmin, tmax, fs, estimator=est, patterns=True) rf.fit(eeg, y) pred = rf.predict(eeg) assert_equal(y.shape, pred.shape) corr = np.corrcoef(y.ravel(), pred.ravel())[0, 1] assert corr > 0.995
def __init__(self, lag_u, penal_weight=1e3): self.lag_u = lag_u self.penal_weight = penal_weight self.model = ReceptiveField(tmin=0., tmax=lag_u, sfreq=1., estimator=self.penal_weight) self.n_channels_u = 0
def test_linalg_warning(): """Test that warnings are issued when no regularization is applied.""" from sklearn.linear_model import Ridge n_feats, n_targets, n_samples = 5, 60, 50 X, y = _make_data(n_feats, n_targets, n_samples, tmin, tmax) for estimator in (0., Ridge(alpha=0.)): rf = ReceptiveField(tmin, tmax, 1., estimator=estimator) with pytest.warns((RuntimeWarning, UserWarning), match='[Singular|scipy.linalg.solve]'): rf.fit(y, X)
def test_receptive_field_1d(n_jobs): """Test that the fast solving works like Ridge.""" from sklearn.linear_model import Ridge rng = np.random.RandomState(0) x = rng.randn(500, 1) for delay in range(-2, 3): y = np.zeros(500) slims = [(-2, 4)] if delay == 0: y[:] = x[:, 0] elif delay < 0: y[:delay] = x[-delay:, 0] slims += [(-4, -1)] else: y[delay:] = x[:-delay, 0] slims += [(1, 2)] for ndim in (1, 2): y.shape = (y.shape[0],) + (1,) * (ndim - 1) for slim in slims: smin, smax = slim lap = TimeDelayingRidge(smin, smax, 1., 0.1, 'laplacian', fit_intercept=False, n_jobs=n_jobs) for estimator in (Ridge(alpha=0.), Ridge(alpha=0.1), 0., 0.1, lap): for offset in (-100, 0, 100): model = ReceptiveField(smin, smax, 1., estimator=estimator, n_jobs=n_jobs) use_x = x + offset model.fit(use_x, y) if estimator is lap: continue # these checks are too stringent assert_allclose(model.estimator_.intercept_, -offset, atol=1e-1) assert_array_equal(model.delays_, np.arange(smin, smax + 1)) expected = (model.delays_ == delay).astype(float) expected = expected[np.newaxis] # features if y.ndim == 2: expected = expected[np.newaxis] # outputs assert_equal(model.coef_.ndim, ndim + 1) assert_allclose(model.coef_, expected, atol=1e-3) start = model.valid_samples_.start or 0 stop = len(use_x) - (model.valid_samples_.stop or 0) assert stop - start >= 495 assert_allclose( model.predict(use_x)[model.valid_samples_], y[model.valid_samples_], atol=1e-2) score = np.mean(model.score(use_x, y)) assert score > 0.9999
def test_inverse_coef(): """Test inverse coefficients computation.""" from sklearn.linear_model import Ridge tmin, tmax = 0., 10. n_feats, n_targets, n_samples = 3, 2, 1000 n_delays = int((tmax - tmin) + 1) # Check coefficient dims, for all estimator types X, y = _make_data(n_feats, n_targets, n_samples, tmin, tmax) tdr = TimeDelayingRidge(tmin, tmax, 1., 0.1, 'laplacian') for estimator in (0., 0.01, Ridge(alpha=0.), tdr): rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) rf.fit(X, y) inv_rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) inv_rf.fit(y, X) assert_array_equal(rf.coef_.shape, rf.patterns_.shape, (n_targets, n_feats, n_delays)) assert_array_equal(inv_rf.coef_.shape, inv_rf.patterns_.shape, (n_feats, n_targets, n_delays)) # we should have np.dot(patterns.T,coef) ~ np.eye(n) c0 = rf.coef_.reshape(n_targets, n_feats * n_delays) c1 = rf.patterns_.reshape(n_targets, n_feats * n_delays) assert_allclose(np.dot(c0, c1.T), np.eye(c0.shape[0]), atol=0.2)
def test_inverse_coef(): """Test inverse coefficients computation.""" from sklearn.linear_model import Ridge rng = np.random.RandomState(0) tmin, tmax = 0., 10. n_feats, n_targets, n_samples = 64, 2, 10000 n_delays = int((tmax - tmin) + 1) def make_data(n_feats, n_targets, n_samples, tmin, tmax): X = rng.randn(n_samples, n_feats) w = rng.randn(int((tmax - tmin) + 1) * n_feats, n_targets) # Delay inputs X_del = np.concatenate( _delay_time_series(X, tmin, tmax, 1.).transpose(2, 0, 1), axis=1) y = np.dot(X_del, w) return X, y # Check coefficient dims, for all estimator types X, y = make_data(n_feats, n_targets, n_samples, tmin, tmax) tdr = TimeDelayingRidge(tmin, tmax, 1., 0.1, 'laplacian') for estimator in (0., 0.01, Ridge(alpha=0.), tdr): rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) rf.fit(X, y) inv_rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) inv_rf.fit(y, X) assert_array_equal(rf.coef_.shape, rf.patterns_.shape, (n_targets, n_feats, n_delays)) assert_array_equal(inv_rf.coef_.shape, inv_rf.patterns_.shape, (n_feats, n_targets, n_delays)) # we should have np.dot(patterns.T,coef) ~ np.eye(n) c0 = rf.coef_.reshape(n_targets, n_feats * n_delays) c1 = rf.patterns_.reshape(n_targets, n_feats * n_delays) assert_allclose(np.dot(c0, c1.T), np.eye(c0.shape[0]), atol=0.1) # Check that warnings are issued when no regularization is applied n_feats, n_targets, n_samples = 5, 60, 50 X, y = make_data(n_feats, n_targets, n_samples, tmin, tmax) for estimator in (0., Ridge(alpha=0.)): rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) with warnings.catch_warnings(record=True) as w: rf.fit(y, X) # For some reason there is no warning if estimator and not check_version('numpy', '1.13'): continue assert_equal(len(w), 1) assert_true(any(x in str(w[0].message).lower() for x in ('singular', 'scipy.linalg.solve')), msg=str(w[0].message))
def test_inverse_coef(): """Test inverse coefficients computation.""" from sklearn.linear_model import Ridge rng = np.random.RandomState(0) tmin, tmax = 0., 10. n_feats, n_targets, n_samples = 3, 2, 1000 n_delays = int((tmax - tmin) + 1) def make_data(n_feats, n_targets, n_samples, tmin, tmax): X = rng.randn(n_samples, n_feats) w = rng.randn(int((tmax - tmin) + 1) * n_feats, n_targets) # Delay inputs X_del = np.concatenate( _delay_time_series(X, tmin, tmax, 1.).transpose(2, 0, 1), axis=1) y = np.dot(X_del, w) return X, y # Check coefficient dims, for all estimator types X, y = make_data(n_feats, n_targets, n_samples, tmin, tmax) tdr = TimeDelayingRidge(tmin, tmax, 1., 0.1, 'laplacian') for estimator in (0., 0.01, Ridge(alpha=0.), tdr): rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) rf.fit(X, y) inv_rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) inv_rf.fit(y, X) assert_array_equal(rf.coef_.shape, rf.patterns_.shape, (n_targets, n_feats, n_delays)) assert_array_equal(inv_rf.coef_.shape, inv_rf.patterns_.shape, (n_feats, n_targets, n_delays)) # we should have np.dot(patterns.T,coef) ~ np.eye(n) c0 = rf.coef_.reshape(n_targets, n_feats * n_delays) c1 = rf.patterns_.reshape(n_targets, n_feats * n_delays) assert_allclose(np.dot(c0, c1.T), np.eye(c0.shape[0]), atol=0.2) # Check that warnings are issued when no regularization is applied n_feats, n_targets, n_samples = 5, 60, 50 X, y = make_data(n_feats, n_targets, n_samples, tmin, tmax) for estimator in (0., Ridge(alpha=0.)): rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) with pytest.warns((RuntimeWarning, UserWarning), match='[Singular|scipy.linalg.solve]'): rf.fit(y, X)
def test_receptive_field_1d(n_jobs): """Test that the fast solving works like Ridge.""" from sklearn.linear_model import Ridge rng = np.random.RandomState(0) x = rng.randn(500, 1) for delay in range(-2, 3): y = np.zeros(500) slims = [(-2, 4)] if delay == 0: y[:] = x[:, 0] elif delay < 0: y[:delay] = x[-delay:, 0] slims += [(-4, -1)] else: y[delay:] = x[:-delay, 0] slims += [(1, 2)] for ndim in (1, 2): y.shape = (y.shape[0], ) + (1, ) * (ndim - 1) for slim in slims: smin, smax = slim lap = TimeDelayingRidge(smin, smax, 1., 0.1, 'laplacian', fit_intercept=False, n_jobs=n_jobs) for estimator in (Ridge(alpha=0.), Ridge(alpha=0.1), 0., 0.1, lap): for offset in (-100, 0, 100): model = ReceptiveField(smin, smax, 1., estimator=estimator, n_jobs=n_jobs) use_x = x + offset model.fit(use_x, y) if estimator is lap: continue # these checks are too stringent assert_allclose(model.estimator_.intercept_, -offset, atol=1e-1) assert_array_equal(model.delays_, np.arange(smin, smax + 1)) expected = (model.delays_ == delay).astype(float) expected = expected[np.newaxis] # features if y.ndim == 2: expected = expected[np.newaxis] # outputs assert_equal(model.coef_.ndim, ndim + 1) assert_allclose(model.coef_, expected, atol=1e-3) start = model.valid_samples_.start or 0 stop = len(use_x) - (model.valid_samples_.stop or 0) assert stop - start >= 495 assert_allclose( model.predict(use_x)[model.valid_samples_], y[model.valid_samples_], atol=1e-2) score = np.mean(model.score(use_x, y)) assert score > 0.9999
def test_receptive_field_basic(n_jobs): """Test model prep and fitting.""" from sklearn.linear_model import Ridge # Make sure estimator pulling works mod = Ridge() rng = np.random.RandomState(1337) # Test the receptive field model # Define parameters for the model and simulate inputs + weights tmin, tmax = -10., 0 n_feats = 3 rng = np.random.RandomState(0) X = rng.randn(10000, n_feats) w = rng.randn(int((tmax - tmin) + 1) * n_feats) # Delay inputs and cut off first 4 values since they'll be cut in the fit X_del = np.concatenate(_delay_time_series(X, tmin, tmax, 1.).transpose(2, 0, 1), axis=1) y = np.dot(X_del, w) # Fit the model and test values feature_names = ['feature_%i' % ii for ii in [0, 1, 2]] rf = ReceptiveField(tmin, tmax, 1, feature_names, estimator=mod, patterns=True) rf.fit(X, y) assert_array_equal(rf.delays_, np.arange(tmin, tmax + 1)) y_pred = rf.predict(X) assert_allclose(y[rf.valid_samples_], y_pred[rf.valid_samples_], atol=1e-2) scores = rf.score(X, y) assert scores > .99 assert_allclose(rf.coef_.T.ravel(), w, atol=1e-3) # Make sure different input shapes work rf.fit(X[:, np.newaxis:], y[:, np.newaxis]) rf.fit(X, y[:, np.newaxis]) with pytest.raises(ValueError, match='If X has 3 .* y must have 2 or 3'): rf.fit(X[..., np.newaxis], y) with pytest.raises(ValueError, match='X must be shape'): rf.fit(X[:, 0], y) with pytest.raises(ValueError, match='X and y do not have the same n_epo'): rf.fit(X[:, np.newaxis], np.tile(y[:, np.newaxis, np.newaxis], [1, 2, 1])) with pytest.raises(ValueError, match='X and y do not have the same n_tim'): rf.fit(X, y[:-2]) with pytest.raises(ValueError, match='n_features in X does not match'): rf.fit(X[:, :1], y) # auto-naming features feature_names = ['feature_%s' % ii for ii in [0, 1, 2]] rf = ReceptiveField(tmin, tmax, 1, estimator=mod, feature_names=feature_names) assert_equal(rf.feature_names, feature_names) rf = ReceptiveField(tmin, tmax, 1, estimator=mod) rf.fit(X, y) assert_equal(rf.feature_names, None) # Float becomes ridge rf = ReceptiveField(tmin, tmax, 1, ['one', 'two', 'three'], estimator=0) str(rf) # repr works before fit rf.fit(X, y) assert isinstance(rf.estimator_, TimeDelayingRidge) str(rf) # repr works after fit rf = ReceptiveField(tmin, tmax, 1, ['one'], estimator=0) rf.fit(X[:, [0]], y) str(rf) # repr with one feature # Should only accept estimators or floats with pytest.raises(ValueError, match='`estimator` must be a float or'): ReceptiveField(tmin, tmax, 1, estimator='foo').fit(X, y) with pytest.raises(ValueError, match='`estimator` must be a float or'): ReceptiveField(tmin, tmax, 1, estimator=np.array([1, 2, 3])).fit(X, y) with pytest.raises(ValueError, match='tmin .* must be at most tmax'): ReceptiveField(5, 4, 1).fit(X, y) # scorers for key, val in _SCORERS.items(): rf = ReceptiveField(tmin, tmax, 1, ['one'], estimator=0, scoring=key, patterns=True) rf.fit(X[:, [0]], y) y_pred = rf.predict(X[:, [0]]).T.ravel()[:, np.newaxis] assert_allclose(val(y[:, np.newaxis], y_pred, multioutput='raw_values'), rf.score(X[:, [0]], y), rtol=1e-2) with pytest.raises(ValueError, match='inputs must be shape'): _SCORERS['corrcoef'](y.ravel(), y_pred, multioutput='raw_values') # Need correct scorers with pytest.raises(ValueError, match='scoring must be one of'): ReceptiveField(tmin, tmax, 1., scoring='foo').fit(X, y)
def test_inverse_coef(): """Test inverse coefficients computation.""" from sklearn.linear_model import Ridge rng = np.random.RandomState(0) tmin, tmax = 0., 10. n_feats, n_targets, n_samples = 64, 2, 10000 n_delays = int((tmax - tmin) + 1) def make_data(n_feats, n_targets, n_samples, tmin, tmax): X = rng.randn(n_samples, n_feats) w = rng.randn(int((tmax - tmin) + 1) * n_feats, n_targets) # Delay inputs X_del = np.concatenate(_delay_time_series(X, tmin, tmax, 1.).transpose(2, 0, 1), axis=1) y = np.dot(X_del, w) return X, y # Check coefficient dims, for all estimator types X, y = make_data(n_feats, n_targets, n_samples, tmin, tmax) tdr = TimeDelayingRidge(tmin, tmax, 1., 0.1, 'laplacian') for estimator in (0., 0.01, Ridge(alpha=0.), tdr): rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) rf.fit(X, y) inv_rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) inv_rf.fit(y, X) assert_array_equal(rf.coef_.shape, rf.patterns_.shape, (n_targets, n_feats, n_delays)) assert_array_equal(inv_rf.coef_.shape, inv_rf.patterns_.shape, (n_feats, n_targets, n_delays)) # we should have np.dot(patterns.T,coef) ~ np.eye(n) c0 = rf.coef_.reshape(n_targets, n_feats * n_delays) c1 = rf.patterns_.reshape(n_targets, n_feats * n_delays) assert_allclose(np.dot(c0, c1.T), np.eye(c0.shape[0]), atol=0.1) # Check that warnings are issued when no regularization is applied n_feats, n_targets, n_samples = 5, 60, 50 X, y = make_data(n_feats, n_targets, n_samples, tmin, tmax) for estimator in (0., Ridge(alpha=0.)): rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) with pytest.warns((RuntimeWarning, UserWarning), match='[Singular|scipy.linalg.solve]'): rf.fit(y, X)
def test_receptive_field_fast(): """Test that the fast solving works like Ridge.""" from sklearn.linear_model import Ridge rng = np.random.RandomState(0) y = np.zeros(500) x = rng.randn(500) for delay in range(-2, 3): y.fill(0.) slims = [(-4, 2)] if delay == 0: y = x.copy() elif delay < 0: y[-delay:] = x[:delay] slims += [(-4, -1)] else: y[:-delay] = x[delay:] slims += [(1, 2)] for slim in slims: tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.1, 'laplacian', fit_intercept=False) for estimator in (Ridge(alpha=0.), 0., 0.1, tdr): model = ReceptiveField(slim[0], slim[1], 1., estimator=estimator) model.fit(x[:, np.newaxis], y) assert_array_equal(model.delays_, np.arange(slim[0], slim[1] + 1)) expected = (model.delays_ == delay).astype(float) assert_allclose(model.coef_[0], expected, atol=1e-2) p = model.predict(x[:, np.newaxis])[:, 0] assert_allclose(y, p, atol=5e-2) # multidimensional x = rng.randn(1000, 3) y = np.zeros((1000, 2)) slim = [-5, 0] # This is a weird assignment, but it's just a way to distribute some # unique values at various delays, and "expected" explains how they # should appear in the resulting RF for ii in range(1, 5): y[ii:, ii % 2] += (-1) ** ii * ii * x[:-ii, ii % 3] expected = [ [[0, 0, 0, 0, 0, 0], [0, 4, 0, 0, 0, 0], [0, 0, 0, 2, 0, 0]], [[0, 0, -3, 0, 0, 0], [0, 0, 0, 0, -1, 0], [0, 0, 0, 0, 0, 0]], ] tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.1, 'laplacian') for estimator in (Ridge(alpha=0.), 0., 0.01, tdr): model = ReceptiveField(slim[0], slim[1], 1., estimator=estimator) model.fit(x, y) assert_array_equal(model.delays_, np.arange(slim[0], slim[1] + 1)) assert_allclose(model.coef_, expected, atol=1e-1) tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.01, reg_type='foo') model = ReceptiveField(slim[0], slim[1], 1., estimator=tdr) assert_raises(ValueError, model.fit, x, y) tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.01, reg_type=['laplacian']) model = ReceptiveField(slim[0], slim[1], 1., estimator=tdr) assert_raises(ValueError, model.fit, x, y) # Now check the intercept_ tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.) tdr_no = TimeDelayingRidge(slim[0], slim[1], 1., 0., fit_intercept=False) for estimator in (Ridge(alpha=0.), tdr, Ridge(alpha=0., fit_intercept=False), tdr_no): x -= np.mean(x, axis=0) y -= np.mean(y, axis=0) model = ReceptiveField(slim[0], slim[1], 1., estimator=estimator) model.fit(x, y) assert_allclose(model.estimator_.intercept_, 0., atol=1e-2) assert_allclose(model.coef_, expected, atol=1e-1) x += 1e3 model.fit(x, y) if estimator.fit_intercept: val, itol, ctol = [-6000, 4000], 20., 1e-1 else: val, itol, ctol = 0., 0., 2. # much worse assert_allclose(model.estimator_.intercept_, val, atol=itol) assert_allclose(model.coef_, expected, atol=ctol) model = ReceptiveField(slim[0], slim[1], 1., fit_intercept=False) model.fit(x, y) assert_allclose(model.estimator_.intercept_, 0., atol=1e-7)
def test_receptive_field(): """Test model prep and fitting.""" from sklearn.linear_model import Ridge # Make sure estimator pulling works mod = Ridge() # Test the receptive field model # Define parameters for the model and simulate inputs + weights tmin, tmax = 0., 10. n_feats = 3 X = rng.randn(n_feats, 10000) w = rng.randn(int((tmax - tmin) + 1) * n_feats) # Delay inputs and cut off first 4 values since they'll be cut in the fit X_del = np.vstack(_delay_time_series(X, tmin, tmax, 1., axis=-1)) y = np.dot(w, X_del) X = np.rollaxis(X, -1, 0) # time to first dimension # Fit the model and test values feature_names = ['feature_%i' % ii for ii in [0, 1, 2]] rf = ReceptiveField(tmin, tmax, 1, feature_names, estimator=mod) rf.fit(X, y) assert_array_equal(rf.delays_, np.arange(tmin, tmax + 1)) y_pred = rf.predict(X) assert_array_almost_equal(y[rf.keep_samples_], y_pred.squeeze()[rf.keep_samples_], 2) scores = rf.score(X, y) assert_true(scores > .99) assert_array_almost_equal(rf.coef_.reshape(-1, order='F'), w, 2) # Make sure different input shapes work rf.fit(X[:, np.newaxis:, ], y[:, np.newaxis]) rf.fit(X, y[:, np.newaxis]) assert_raises(ValueError, rf.fit, X[..., np.newaxis], y) assert_raises(ValueError, rf.fit, X[:, 0], y) assert_raises(ValueError, rf.fit, X[..., np.newaxis], np.tile(y[..., np.newaxis], [2, 1, 1])) # stim features must match length of input data assert_raises(ValueError, rf.fit, X[:, :1], y) # auto-naming features rf = ReceptiveField(tmin, tmax, 1, estimator=mod) rf.fit(X, y) assert_equal(rf.feature_names, ['feature_%s' % ii for ii in [0, 1, 2]]) # X/y same n timepoints assert_raises(ValueError, rf.fit, X, y[:-2]) # Float becomes ridge rf = ReceptiveField(tmin, tmax, 1, ['one', 'two', 'three'], estimator=0) str(rf) # repr works before fit rf.fit(X, y) assert_true(isinstance(rf.estimator_, TimeDelayingRidge)) str(rf) # repr works after fit rf = ReceptiveField(tmin, tmax, 1, ['one'], estimator=0) rf.fit(X[:, [0]], y) str(rf) # repr with one feature # Should only accept estimators or floats rf = ReceptiveField(tmin, tmax, 1, estimator='foo') assert_raises(ValueError, rf.fit, X, y) rf = ReceptiveField(tmin, tmax, 1, estimator=np.array([1, 2, 3])) assert_raises(ValueError, rf.fit, X, y) # tmin must be <= tmax rf = ReceptiveField(5, 4, 1) assert_raises(ValueError, rf.fit, X, y) # scorers for key, val in _SCORERS.items(): rf = ReceptiveField(tmin, tmax, 1, ['one'], estimator=0, scoring=key) rf.fit(X[:, [0]], y) y_pred = rf.predict(X[:, [0]]) assert_array_almost_equal(val(y[:, np.newaxis], y_pred), rf.score(X[:, [0]], y), 4) # Need 2D input assert_raises(ValueError, _SCORERS['corrcoef'], y.squeeze(), y_pred) # Need correct scorers rf = ReceptiveField(tmin, tmax, 1., scoring='foo') assert_raises(ValueError, rf.fit, X, y)
ax.set(title="Sample activity", xlabel="Time (s)") mne.viz.tight_layout() ############################################################################### # Create and fit a receptive field model # -------------------------------------- # # We will construct a model to find the linear relationship between the EEG # signal and a time-delayed version of the speech envelope. This allows # us to make predictions about the response to new stimuli. # Define the delays that we will use in the receptive field tmin, tmax = -.4, .2 # Initialize the model rf = ReceptiveField(tmin, tmax, sfreq, feature_names=['envelope'], estimator=1., scoring='corrcoef') # We'll have (tmax - tmin) * sfreq delays # and an extra 2 delays since we are inclusive on the beginning / end index n_delays = int((tmax - tmin) * sfreq) + 2 n_splits = 3 cv = KFold(n_splits) # Prepare model data (make time the first dimension) speech = speech.T Y, _ = raw[:] # Outputs for the model Y = Y.T # Iterate through splits, fit the model, and predict/test on held-out data coefs = np.zeros((n_splits, n_channels, n_delays)) scores = np.zeros((n_splits, n_channels))
ax.set(title="Sample activity", xlabel="Time (s)") mne.viz.tight_layout() ############################################################################### # Create and fit a receptive field model # -------------------------------------- # # We will construct an encoding model to find the linear relationship between # a time-delayed version of the speech envelope and the EEG signal. This allows # us to make predictions about the response to new stimuli. # Define the delays that we will use in the receptive field tmin, tmax = -.2, .4 # Initialize the model rf = ReceptiveField(tmin, tmax, sfreq, feature_names=['envelope'], estimator=1., scoring='corrcoef') # We'll have (tmax - tmin) * sfreq delays # and an extra 2 delays since we are inclusive on the beginning / end index n_delays = int((tmax - tmin) * sfreq) + 2 n_splits = 3 cv = KFold(n_splits) # Prepare model data (make time the first dimension) speech = speech.T Y, _ = raw[:] # Outputs for the model Y = Y.T # Iterate through splits, fit the model, and predict/test on held-out data coefs = np.zeros((n_splits, n_channels, n_delays)) scores = np.zeros((n_splits, n_channels))
class RField: def __init__(self, lag_u, penal_weight=1e3): self.lag_u = lag_u self.penal_weight = penal_weight self.model = ReceptiveField(tmin=0., tmax=lag_u, sfreq=1., estimator=self.penal_weight) self.n_channels_u = 0 def fit(self, U, Y): self.n_channels_u = U.shape[2] # swap 2 first axes for MNE: # (n_samples, n_times, n_channels) -> (n_times, n_samples, n_channels) self.model.fit(np.swapaxes(U, 0, 1), np.swapaxes(Y, 0, 1)) def plot_weights(self, summarize=True, names_u=[]): if len(names_u) == 0: names_u = [ "U Channel " + str(channel_u) for channel_u in range(self.n_channels_u) ] if not summarize: # plot forcing weights fig, axes = plt.subplots(self.n_channels_u, 1, sharex=True) for increment, channel_u in enumerate( list(range(self.n_channels_u))): axes[channel_u].set_title(names_u[channel_u] + " Weights") weights = self.model.coef_[:, channel_u, :].T axes[channel_u].plot(weights) plt.xlabel("Lags") plt.tight_layout() plt.show() plt.close() if summarize: fig, axes = plt.subplots(2, 1, figsize=(10, 5)) # forcing weights for increment, channel_u in enumerate( list(range(self.n_channels_u))): weights = self.model.coef_[:, channel_u, :].T axes[0].fill_between(range(weights.shape[0]), np.sum(weights**2, axis=1), label=names_u[channel_u], alpha=0.25) axes[0].set_title("Forcing Weights over Lags") axes[0].legend() plt.tight_layout() plt.show() plt.close() def predict(self, U, U_ini=np.array([]), Y_ini=np.array([])): return np.swapaxes(self.model.predict(np.swapaxes(U, 0, 1)), 0, 1)
def test_inverse_coef(): """Test inverse coefficients computation.""" from sklearn.linear_model import Ridge rng = np.random.RandomState(0) tmin, tmax = 0., 10. n_feats, n_targets, n_samples = 64, 2, 10000 n_delays = int((tmax - tmin) + 1) def make_data(n_feats, n_targets, n_samples, tmin, tmax): X = rng.randn(n_samples, n_feats) w = rng.randn(int((tmax - tmin) + 1) * n_feats, n_targets) # Delay inputs X_del = np.concatenate(_delay_time_series(X, tmin, tmax, 1.).transpose(2, 0, 1), axis=1) y = np.dot(X_del, w) return X, y # Check coefficient dims, for all estimator types X, y = make_data(n_feats, n_targets, n_samples, tmin, tmax) tdr = TimeDelayingRidge(tmin, tmax, 1., 0.1, 'laplacian') for estimator in (0., 0.01, Ridge(alpha=0.), tdr): rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) rf.fit(X, y) inv_rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) inv_rf.fit(y, X) assert_array_equal(rf.coef_.shape, rf.patterns_.shape, (n_targets, n_feats, n_delays)) assert_array_equal(inv_rf.coef_.shape, inv_rf.patterns_.shape, (n_feats, n_targets, n_delays)) # we should have np.dot(patterns.T,coef) ~ np.eye(n) c0 = rf.coef_.reshape(n_targets, n_feats * n_delays) c1 = rf.patterns_.reshape(n_targets, n_feats * n_delays) assert_allclose(np.dot(c0, c1.T), np.eye(c0.shape[0]), atol=0.1) # Check that warnings are issued when no regularization is applied n_feats, n_targets, n_samples = 5, 60, 50 X, y = make_data(n_feats, n_targets, n_samples, tmin, tmax) for estimator in (0., Ridge(alpha=0.)): rf = ReceptiveField(tmin, tmax, 1., estimator=estimator, patterns=True) with warnings.catch_warnings(record=True) as w: rf.fit(y, X) # For some reason there is no warning if estimator and not check_version('numpy', '1.13'): continue assert_equal(len(w), 1) assert any(x in str(w[0].message).lower() for x in ('singular', 'scipy.linalg.solve'))
def cross_validate(data, lambda_config=(2**0, 2**20, 11), t_config=(-.25, .1)): """ Parameters ---------- data : Pandas Dataframe load dataframe using from data_load, using getData function. lambda_config : tuple, optional Lambda range for Ridge regression. The default is (2e0, 2e20, 11). Range from Cross et al 2016 publication. t_config : tuple, optional Jitter lag range for MNE in ms. The default is (-.25, .1). Returns ------- NA """ # Define lambda values for training models lambdas = np.linspace(lambda_config[0], lambda_config[1], lambda_config[2]) # Parameters for MNE tmin, tmax = t_config sfreq = 64 # Split into train/testing with leave-one-group-out logo = LeaveOneGroupOut() # Define result DataFrame df_cols = ["corr_true", "corr_mask", "corr_rand", "TA", "SNR"] df = pd.DataFrame(columns=df_cols) TAs = np.unique(data["TA"]) #TAs = np.array([4, 5, 6]) for TA in TAs: SNRs = np.unique(data[data["TA"] == TA]["SNR"]) for SNR in SNRs: data_sub = data[(data["TA"] == TA) & (data["SNR"] == SNR)] # Assign X, y and group variable (trial, as to do leave-trial-out) X = data_sub[data.columns[:16]] y = data_sub["target"] masks = data_sub["mask"] groups = data_sub["trial"] n_outer_groups = len(np.unique(groups)) ### Two-layer CV starts here ### # Outer fold i = 0 for out_train_idx, out_test_idx in logo.split(X, y, groups): #print("Outer fold %i / %i" %(i + 1, n_outer_groups)) X_train = X.iloc[out_train_idx] y_train = y.iloc[out_train_idx] X_test = X.iloc[out_test_idx] y_test = y.iloc[out_test_idx] # Define inner groups, these are n - 1 of n total groups inner_groups = data["trial"].iloc[out_train_idx] n_inner_groups = len(np.unique(inner_groups)) # Initiate errors for inner fold validations vals = np.zeros((n_inner_groups, lambda_config[2])) # Inner fold j = 0 for inn_train_idx, inn_test_idx in logo.split( X_train, y_train, inner_groups): print( "TA = %i / %i\tSNR = %i / %i\nOuter fold %i / %i\t Inner fold %i / %i" % (TA + 1, len(TAs), SNR + 1, len(SNRs), i + 1, n_outer_groups, j + 1, n_inner_groups)) inn_X_train = X_train.iloc[inn_train_idx] inn_y_train = y_train.iloc[inn_train_idx] inn_X_test = X_train.iloc[inn_test_idx] inn_y_test = y_train.iloc[inn_test_idx] # Validate model with all parameters k = 0 for l in lambdas: # Define model with l parameter model = ReceptiveField(tmin, tmax, sfreq, feature_names=list( data.columns[:16]), estimator=l, scoring="corrcoef") # Fit model to inner fold training data model.fit(np.asarray(inn_X_train), np.asarray(inn_y_train)) # Compute cross correlation for regressional value val = model.score(np.asarray(inn_X_test), np.asarray(inn_y_test)) # Add score to matrix vals[j, k] = val k += 1 j += 1 # Get optimal parameter param_score = np.sum(vals, axis=0) #plt.title("Lambda scores, TA: %i, SNR: %i" %(TA, SNR)) #plt.plot(lambdas, param_score) #plt.xlabel("Lambda value") #plt.ylabel("Correlation score") #plt.show() lambda_opt = lambdas[np.argmax(param_score)] print("Optimal lambda = %f" % lambda_opt) # Train optimal model model_opt = ReceptiveField(tmin, tmax, sfreq, feature_names=list( data.columns[:16]), estimator=lambda_opt, scoring="corrcoef") # Fit model to train data model_opt.fit(np.asarray(X_train), np.asarray(y_train)) # Predict envelope y_pred = model_opt.predict(np.asarray(X_test)) #plt.plot(y_pred) #plt.plot(y_test) #plt.show() trial_test = np.unique(data_sub.iloc[out_test_idx]["trial"])[0] y_rand = random_trial(data, TA=TA, trial=trial_test)["target"] corr_true = pearsonr(y_pred, np.asarray(y_test)) corr_mask = pearsonr(y_pred, np.asarray(masks.iloc[out_test_idx])) corr_rand = pearsonr(y_pred, np.asarray(y_rand)) # Evaluate envelope, compare with random trial ### Add correlations to dataframe ### # Convert to DataFrame data_results = np.zeros((1, len(df_cols))) data_results[:, 0] = corr_true[0] data_results[:, 1] = corr_mask[0] data_results[:, 2] = corr_rand[0] data_results[:, 3] = TA data_results[:, 4] = SNR df_ = pd.DataFrame(data=data_results, columns=df_cols) # Concatenate df = pd.concat([df, df_], ignore_index=True) i += 1 df.to_pickle("local_data/results/result_%i_%i.pkl" % (TA, SNR)) return df
# Finally, we'll use the :class:`mne.decoding.ReceptiveField` class to recover # the linear receptive field of this signal. Note that properties of the # receptive field (e.g. smoothness) will depend on the autocorrelation in the # inputs and outputs. # Create training and testing data train, test = np.arange(n_epochs - 1), n_epochs - 1 X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test] X_train, X_test, y_train, y_test = [np.rollaxis(ii, -1, 0) for ii in (X_train, X_test, y_train, y_test)] # Model the simulated data as a function of the spectrogram input alphas = np.logspace(-4, 0, 10) scores = np.zeros_like(alphas) models = [] for ii, alpha in enumerate(alphas): rf = ReceptiveField(tmin, tmax, sfreq, freqs, estimator=alpha) rf.fit(X_train, y_train) # Now make predictions about the model output, given input stimuli. scores[ii] = rf.score(X_test, y_test) models.append(rf) times = rf.delays_ / float(rf.sfreq) # Choose the model that performed best on the held out data ix_best_alpha = np.argmax(scores) best_mod = models[ix_best_alpha] coefs = best_mod.coef_ best_pred = best_mod.predict(X_test)[:, 0] # Plot the original STRF, and the one that we recovered with modeling.
def test_receptive_field(n_jobs): """Test model prep and fitting.""" from sklearn.linear_model import Ridge # Make sure estimator pulling works mod = Ridge() rng = np.random.RandomState(1337) # Test the receptive field model # Define parameters for the model and simulate inputs + weights tmin, tmax = -10., 0 n_feats = 3 rng = np.random.RandomState(0) X = rng.randn(10000, n_feats) w = rng.randn(int((tmax - tmin) + 1) * n_feats) # Delay inputs and cut off first 4 values since they'll be cut in the fit X_del = np.concatenate( _delay_time_series(X, tmin, tmax, 1.).transpose(2, 0, 1), axis=1) y = np.dot(X_del, w) # Fit the model and test values feature_names = ['feature_%i' % ii for ii in [0, 1, 2]] rf = ReceptiveField(tmin, tmax, 1, feature_names, estimator=mod, patterns=True) rf.fit(X, y) assert_array_equal(rf.delays_, np.arange(tmin, tmax + 1)) y_pred = rf.predict(X) assert_allclose(y[rf.valid_samples_], y_pred[rf.valid_samples_], atol=1e-2) scores = rf.score(X, y) assert scores > .99 assert_allclose(rf.coef_.T.ravel(), w, atol=1e-3) # Make sure different input shapes work rf.fit(X[:, np.newaxis:, ], y[:, np.newaxis]) rf.fit(X, y[:, np.newaxis]) pytest.raises(ValueError, rf.fit, X[..., np.newaxis], y) pytest.raises(ValueError, rf.fit, X[:, 0], y) pytest.raises(ValueError, rf.fit, X[..., np.newaxis], np.tile(y[..., np.newaxis], [2, 1, 1])) # stim features must match length of input data pytest.raises(ValueError, rf.fit, X[:, :1], y) # auto-naming features feature_names = ['feature_%s' % ii for ii in [0, 1, 2]] rf = ReceptiveField(tmin, tmax, 1, estimator=mod, feature_names=feature_names) assert_equal(rf.feature_names, feature_names) rf = ReceptiveField(tmin, tmax, 1, estimator=mod) rf.fit(X, y) assert_equal(rf.feature_names, None) # X/y same n timepoints pytest.raises(ValueError, rf.fit, X, y[:-2]) # Float becomes ridge rf = ReceptiveField(tmin, tmax, 1, ['one', 'two', 'three'], estimator=0, patterns=True) str(rf) # repr works before fit rf.fit(X, y) assert isinstance(rf.estimator_, TimeDelayingRidge) str(rf) # repr works after fit rf = ReceptiveField(tmin, tmax, 1, ['one'], estimator=0, patterns=True) rf.fit(X[:, [0]], y) str(rf) # repr with one feature # Should only accept estimators or floats rf = ReceptiveField(tmin, tmax, 1, estimator='foo', patterns=True) pytest.raises(ValueError, rf.fit, X, y) rf = ReceptiveField(tmin, tmax, 1, estimator=np.array([1, 2, 3])) pytest.raises(ValueError, rf.fit, X, y) # tmin must be <= tmax rf = ReceptiveField(5, 4, 1, patterns=True) pytest.raises(ValueError, rf.fit, X, y) # scorers for key, val in _SCORERS.items(): rf = ReceptiveField(tmin, tmax, 1, ['one'], estimator=0, scoring=key, patterns=True) rf.fit(X[:, [0]], y) y_pred = rf.predict(X[:, [0]]).T.ravel()[:, np.newaxis] assert_allclose(val(y[:, np.newaxis], y_pred, multioutput='raw_values'), rf.score(X[:, [0]], y), rtol=1e-2) # Need 2D input pytest.raises(ValueError, _SCORERS['corrcoef'], y.ravel(), y_pred, multioutput='raw_values') # Need correct scorers rf = ReceptiveField(tmin, tmax, 1., scoring='foo') pytest.raises(ValueError, rf.fit, X, y)
def decoding(band,regularization,tmin,tmax,n_fold,subject_name, savepath): data_path = "./ProcessedData/Final_" eeg="_processed-epo.fif" features="_Features-epo.fif" sfreq=100 n_delays = int((tmax - tmin) * sfreq) + 1 T= [51, 61, 71, 81, 91, 101, 111, 121, 131, 141, 151] results_speech= np.zeros((len(regularization),len(T)))# each raw is the results' vector for one regularization parameter results_lips= np.zeros((len(regularization),len(T)))# each raw is the results' vector for one regularization parameter results_speech_all_sub={} results_lips_all_sub={} predictions_lips_all_sub={} predictions_speech_all_sub={} for s in subject_name: print('subject '+str(s)) X_orig = use_FreqBand(mne.read_epochs(data_path+s+eeg),band) Features_orig = use_FreqBand(mne.read_epochs(data_path + s + features),band) if band=='original': X_orig=X_orig.get_data() # 3d array (N_trial, N_channel, N_time) Y_envelope_sp_orig=Features_orig.get_data()[:,0,:] # 2d array (N_trial, N_time) Y_lips_ap_orig=Features_orig.get_data()[:,2,:] # 2d array (N_trial, N_time) else: X_orig= np.mean(X_orig.data,2) # 3d array (N_trial, N_channel, N_time) #averaging power across frequencies Y_envelope_sp_orig=np.mean(Features_orig.data[:,0,:,:],1) Y_lips_ap_orig=np.mean(Features_orig.data[:,2,:,:],1) time = mne.read_epochs(data_path + s + features).times # 1d array (N_time) channels = mne.read_epochs(data_path + s + eeg).ch_names predictions_speech = np.zeros((Y_envelope_sp_orig.shape[0], 200, len(T),len(regularization))) predictions_lips = np.zeros((Y_lips_ap_orig.shape[0],200,len(T),len(regularization))) train_index, test_index = k_fold(Y_envelope_sp_orig,n_fold) # define index for train and test for each of the k folds #data standardizers eegScaler= Scaler(scalings='mean') speechScaler= Scaler(scalings='mean') lipsScaler = Scaler(scalings='mean') scores_speech = np.zeros((n_fold,)) scores_lips = np.zeros((n_fold,)) coefs_speech = np.zeros((n_fold, X_orig.shape[1], n_delays)) patterns_speech = coefs_speech.copy() coefs_lips = np.zeros((n_fold, X_orig.shape[1], n_delays)) patterns_lips = coefs_lips.copy() for i, r in enumerate(regularization): rf_speech = RField(tmin, tmax, sfreq, feature_names=channels, scoring='r2', patterns=True, estimator=r) rf_lips = RField(tmin, tmax, sfreq, feature_names=channels, scoring='r2', patterns=True, estimator=r) print('reg parameter #'+str(i)) for j, t_start in enumerate(T): ##estracting the temporal interval of interest t_end= t_start+200 X = X_orig[:,:,t_start:t_end] #only the eeg window is shifting Y_envelope_sp = Y_envelope_sp_orig[:,101:301] Y_lips_ap = Y_lips_ap_orig[:,101:301] for k in range(0,n_fold): #####COPY X AND Y VARIABLES X_standard=np.zeros((X.shape)) Y_lips_ap_standard=np.zeros((Y_lips_ap.shape)) Y_envelope_sp_standard = np.zeros((Y_envelope_sp.shape)) #standardazing data X_standard[train_index[k], :, :] = eegScaler.fit_transform(X[train_index[k], :, :]) X_standard[test_index[k], :, :] = eegScaler.transform(X[test_index[k], :, :]) Y_lips_ap_standard[train_index[k], :] = lipsScaler.fit_transform(Y_lips_ap[train_index[k], :])[:,:,0] Y_lips_ap_standard[test_index[k], :] = lipsScaler.transform(Y_lips_ap[test_index[k], :])[:,:,0] Y_envelope_sp_standard[train_index[k], :] = speechScaler.fit_transform(Y_envelope_sp[train_index[k], :])[:,:,0] Y_envelope_sp_standard[test_index[k], :] = speechScaler.transform(Y_envelope_sp[test_index[k], :])[:,:,0] #shaping data as desired by the decoding model (receptive field function) X_standard = np.rollaxis(X_standard, 2, 0) Y_envelope_sp_standard = np.rollaxis(Y_envelope_sp_standard, 1, 0) Y_lips_ap_standard = np.rollaxis(Y_lips_ap_standard, 1, 0) X_TRAIN= X_standard[:,train_index[k],:] X_TEST= X_standard[:,test_index[k],:] Y_envelope_sp_TRAIN = Y_envelope_sp_standard[:,train_index[k]] Y_envelope_sp_TEST = Y_envelope_sp_standard[:,test_index[k]] Y_lips_ap_TRAIN = Y_lips_ap_standard[:,train_index[k]] Y_lips_ap_TEST = Y_lips_ap_standard[:,test_index[k]] #training models and predict rf_speech.fit(X_TRAIN,Y_envelope_sp_TRAIN) rf_lips.fit(X_TRAIN,Y_lips_ap_TRAIN) reconstructed_speech = rf_speech.predict(X_TEST) reconstructed_lips = rf_lips.predict(X_TEST) predictions_speech[test_index[k],:,j,i]=reconstructed_speech.T predictions_lips[test_index[k],:,j,i]=reconstructed_lips.T #computing scores tmp_score_speech=0 tmp_score_lips = 0 for n, rec in enumerate(reconstructed_speech[:,:,0].T): tmp_score_speech = tmp_score_speech + mean_squared_error(Y_envelope_sp_TEST[:,n]/max(abs(Y_envelope_sp_TEST[:,n])), rec/max(abs(rec))) scores_speech[k]= tmp_score_speech/(n+1) for n, rec in enumerate(reconstructed_lips[:,:,0].T): tmp_score_lips = tmp_score_lips + mean_squared_error(Y_lips_ap_TEST[:, n]/max(abs(Y_lips_ap_TEST[:, n])), rec/max(abs(rec))) scores_lips[k] = tmp_score_lips / (n+1) # scores_speech[k] = rf_speech.score(X_TEST,Y_envelope_sp_TEST)[0] # scores_lips[k] = rf_speech.score(X_TEST,Y_lips_ap_TEST)[0] ##coef_ is shape (n_outputs, n_features, n_delays). # coefs_speech[k] = rf_speech.coef_[0, :, :] # patterns_speech[k] = rf_speech.patterns_[0, :, :] # coefs_lips[k] = rf_lips.coef_[0, :, :] # patterns_lips[k] = rf_lips.patterns_[0, :, :] # mean_coefs_lips = coefs_lips.mean(axis=0) # mean_patterns_lips = patterns_lips.mean(axis=0) mean_scores_lips = scores_lips.mean(axis=0) # mean_coefs_speech = coefs_speech.mean(axis=0) # mean_patterns_speech = patterns_speech.mean(axis=0) mean_scores_speech = scores_speech.mean(axis=0) #saving results for the i-th reg parameter and j-th time lag results_speech[i, j] = mean_scores_speech results_lips[i, j] = mean_scores_lips results_speech_all_sub[s]=results_speech.copy() results_lips_all_sub[s]=results_lips.copy() predictions_speech_all_sub[s]=predictions_speech.copy() predictions_lips_all_sub[s]=predictions_lips.copy() np.save(savepath+'/results_speech_all_sub',results_speech_all_sub) np.save(savepath+'/results_lips_all_sub',results_lips_all_sub) np.save(savepath+'/predictions_speech_all_sub',predictions_speech_all_sub) np.save(savepath+'/predictions_lips_all_sub',predictions_lips_all_sub) tmp_results_speech = [] tmp_results_lips = [] for N, s in enumerate(subject_name): if N ==0: tmp_results_speech= np.asarray(results_speech_all_sub[s]) tmp_results_lips= np.asarray(results_lips_all_sub[s]) tmp_results_speech=np.dstack((tmp_results_speech, np.asarray(results_speech_all_sub[s]))) tmp_results_lips=np.dstack((tmp_results_lips,np.asarray(results_lips_all_sub[s]))) # computing grand average and standard deviation for each time lag GAVG_sp = np.reshape(np.mean(tmp_results_speech,2),(len(regularization),11)) GAVG_lip = np.reshape(np.mean(tmp_results_lips,2),(len(regularization),11)) GAVG_sp_std = np.reshape(np.std(tmp_results_speech,2),(len(regularization),11)) GAVG_lip_std = np.reshape(np.std(tmp_results_lips,2),(len(regularization),11)) np.save(savepath+'/GAVG_sp',GAVG_sp) np.save(savepath+'/GAVG_lip',GAVG_lip) np.save(savepath+'/GAVG_sp_std',GAVG_sp_std) np.save(savepath+'/GAVG_lip_std',GAVG_lip_std) ####PLOTTING RESULTS##### T = np.reshape(T, (1, len(T))) pp.figure(0) for n, r in enumerate(regularization): pp.errorbar((T[0,:] - 100) * 10, GAVG_sp[n,:], yerr=GAVG_sp_std[n,:]) pp.legend(regularization) pp.title('speech MSE') sfig=savepath+'/GAVG_specch.png' pp.savefig(fname=sfig) pp.figure(1) for n, r in enumerate(regularization): pp.errorbar((T[0, :] - 100) * 10, GAVG_lip[n, :], yerr=GAVG_lip_std[n, :]) pp.legend(regularization) pp.title('lips MSE') sfig = savepath +'/GAVG_lips.png' pp.savefig(fname=sfig) #pp.show() print('bla')
def test_receptive_field(): """Test model prep and fitting.""" from sklearn.linear_model import Ridge # Make sure estimator pulling works mod = Ridge() # Test the receptive field model # Define parameters for the model and simulate inputs + weights tmin, tmax = -10., 0 n_feats = 3 X = rng.randn(10000, n_feats) w = rng.randn(int((tmax - tmin) + 1) * n_feats) # Delay inputs and cut off first 4 values since they'll be cut in the fit X_del = np.concatenate(_delay_time_series(X, tmin, tmax, 1.).transpose(2, 0, 1), axis=1) y = np.dot(X_del, w) # Fit the model and test values feature_names = ['feature_%i' % ii for ii in [0, 1, 2]] rf = ReceptiveField(tmin, tmax, 1, feature_names, estimator=mod, patterns=True) rf.fit(X, y) assert_array_equal(rf.delays_, np.arange(tmin, tmax + 1)) y_pred = rf.predict(X) assert_allclose(y[rf.valid_samples_], y_pred[rf.valid_samples_], atol=1e-2) scores = rf.score(X, y) assert scores > .99 assert_allclose(rf.coef_.T.ravel(), w, atol=1e-2) # Make sure different input shapes work rf.fit(X[:, np.newaxis:, ], y[:, np.newaxis]) rf.fit(X, y[:, np.newaxis]) pytest.raises(ValueError, rf.fit, X[..., np.newaxis], y) pytest.raises(ValueError, rf.fit, X[:, 0], y) pytest.raises(ValueError, rf.fit, X[..., np.newaxis], np.tile(y[..., np.newaxis], [2, 1, 1])) # stim features must match length of input data pytest.raises(ValueError, rf.fit, X[:, :1], y) # auto-naming features rf = ReceptiveField(tmin, tmax, 1, estimator=mod) rf.fit(X, y) assert_equal(rf.feature_names, ['feature_%s' % ii for ii in [0, 1, 2]]) # X/y same n timepoints pytest.raises(ValueError, rf.fit, X, y[:-2]) # Float becomes ridge rf = ReceptiveField(tmin, tmax, 1, ['one', 'two', 'three'], estimator=0, patterns=True) str(rf) # repr works before fit rf.fit(X, y) assert isinstance(rf.estimator_, TimeDelayingRidge) str(rf) # repr works after fit rf = ReceptiveField(tmin, tmax, 1, ['one'], estimator=0, patterns=True) rf.fit(X[:, [0]], y) str(rf) # repr with one feature # Should only accept estimators or floats rf = ReceptiveField(tmin, tmax, 1, estimator='foo', patterns=True) pytest.raises(ValueError, rf.fit, X, y) rf = ReceptiveField(tmin, tmax, 1, estimator=np.array([1, 2, 3])) pytest.raises(ValueError, rf.fit, X, y) # tmin must be <= tmax rf = ReceptiveField(5, 4, 1, patterns=True) pytest.raises(ValueError, rf.fit, X, y) # scorers for key, val in _SCORERS.items(): rf = ReceptiveField(tmin, tmax, 1, ['one'], estimator=0, scoring=key, patterns=True) rf.fit(X[:, [0]], y) y_pred = rf.predict(X[:, [0]]).T.ravel()[:, np.newaxis] assert_allclose(val(y[:, np.newaxis], y_pred, multioutput='raw_values'), rf.score(X[:, [0]], y), rtol=1e-2) # Need 2D input pytest.raises(ValueError, _SCORERS['corrcoef'], y.ravel(), y_pred, multioutput='raw_values') # Need correct scorers rf = ReceptiveField(tmin, tmax, 1., scoring='foo') pytest.raises(ValueError, rf.fit, X, y)
def cross_validate(data, lambda_config=(2e0, 2e20, 11), TA=0, name="data_0"): """ Parameters ---------- data : Pandas Dataframe load dataframe using from data_load, using getData function. lambda_config : tuple, optional Lambda range for Ridge regression. The default is (2e0, 2e20, 11). Range from Cross et al 2016 publication. TA : int., optional Test subject nr. The default is 0. name : str., optional Name of txt file with model performance. The default is "data_0". Returns ------- CSV-file with results Plots """ # For reproducibility random_state = 9999 # Train one model per TA data = data[data['TA'] == TA] # Define lambda values for training models lambdas = np.linspace(lambda_config[0], lambda_config[1], lambda_config[2]) # Split into train/testing with leave-one-group-out logo = LeaveOneGroupOut() # Shift EEG 150 ms back EEG_shifted = shift(data[data.columns[:16]].T, lag=-150, freq=64) data = data.iloc[:len(EEG_shifted.T)] data[data.columns[:16]] = EEG_shifted.T # Assign X, y and group variable X = data[data.columns[:16]] y = data['target'] groups = data["trial"] n_outer_groups = len(np.unique(groups)) # Initiate test errors MSEs = [] MSEdummies = [] # For cross correlation scores = [] opt_lambda_list = [] # Parameters for MNE tmin = -.25 tmax = .1 sfreq = 64 ## Leave-trial-out CV ## # Outer fold i = 0 for out_train_idx, out_test_idx in logo.split(X, y, groups): print("Outer fold %i / %i" % (i + 1, n_outer_groups)) X_train = X.iloc[out_train_idx] y_train = y.iloc[out_train_idx] X_test = X.iloc[out_test_idx] y_test = y.iloc[out_test_idx] # Define inner groups, these are n - 1 of n total groups inner_groups = data["trial"].iloc[out_train_idx] n_inner_groups = len(np.unique(inner_groups)) # Initiate errors for inner fold validations vals = np.zeros((n_inner_groups, lambda_config[2])) # Inner fold j = 0 for inn_train_idx, inn_test_idx in logo.split(X_train, y_train, inner_groups): print("\t Inner fold %i / %i" % (j + 1, n_inner_groups)) inn_X_train = X_train.iloc[inn_train_idx] inn_y_train = y_train.iloc[inn_train_idx] inn_X_test = X_train.iloc[inn_test_idx] inn_y_test = y_train.iloc[inn_test_idx] # Validate model with all parameters k = 0 for l in lambdas: # Define model with l parameter model = ReceptiveField(tmin, tmax, sfreq, feature_names=None, estimator=l, scoring="corrcoef") # Fit model to inner fold training data model.fit(np.asarray(inn_X_train), np.asarray(inn_y_train)) # Compute cross correlation for regressional value val = model.score(np.asarray(inn_X_test), np.asarray(inn_y_test)) # Add score to matrix vals[j, k] = val k += 1 j += 1 # Get optimal parameter param_score = np.sum(vals, axis=0) lambda_opt = lambdas[np.argmax(param_score)] print("Optimal lambda = %f" % lambda_opt) # Store optimal lambda parameter opt_lambda_list.append(lambda_opt) # Train optimal model model_opt = ReceptiveField(tmin, tmax, sfreq, feature_names=None, estimator=lambda_opt, scoring="corrcoef") # Fit model to inner fold training data model_opt.fit(np.asarray(X_train), np.asarray(y_train)) # Compute error of optimal model score = model_opt.score(np.asarray(X_test), np.asarray(y_test)) print('Score:') print(score) # Fit dummy model dummy_regr = DummyRegressor(strategy="mean") dummy_regr.fit(np.asarray(X_train), np.asarray(y_train)) # Add error to list scores.append(score) MSE = mean_squared_error(np.asarray(y_test), model_opt.predict(np.asarray(X_test)), squared=True) MSEs.append(MSE) MSEdummy = mean_squared_error(np.asarray(y_test), dummy_regr.predict(np.asarray(X_test)), squared=True) MSEdummies.append(MSEdummy) i += 1 ## Training and testing for optimal model ## # Making dataframes for each SNR cond data_0 = data[data['SNR'] == 0] data_1 = data[data['SNR'] == 1] data_2 = data[data['SNR'] == 2] # Shuffle data data_0 = data_0.sample(frac=1, random_state=random_state) data_1 = data_1.sample(frac=1, random_state=random_state) data_2 = data_2.sample(frac=1, random_state=random_state) # Split data 80/20 for training/testing train_0, test_0 = train_test_split(data_0, test_size=0.2, random_state=random_state) train_1, test_1 = train_test_split(data_1, test_size=0.2, random_state=random_state) train_2, test_2 = train_test_split(data_2, test_size=0.2, random_state=random_state) # Combine training dataframes into one data = train_0.append(train_1, ignore_index=True) data = data.append(train_2, ignore_index=True) # Combine testing dataframes into one data_test = test_0.append(test_1, ignore_index=True) data_test = data_test.append(test_2, ignore_index=True) # Mean score across all folds mu_score = np.mean(scores) print("Mean score = %f" % mu_score) best_fold = np.argmax(scores) + 1 print("Best fold = %i" % best_fold) # Optimal model model_optimal = ReceptiveField( tmin, tmax, sfreq, feature_names=None, estimator=opt_lambda_list[np.argmax(scores)], scoring="corrcoef") # Fit optimal model to training data model_optimal.fit(np.asarray(data[data.columns[:16]]), np.asarray(data["target"])) # Dummy classifier dummy_regr = DummyRegressor(strategy="mean") dummy_regr.fit(np.asarray(data[data.columns[:16]]), np.asarray(data["target"])) # Compute cross correlation scores on test data for all three SNR conds score_0 = signal.correlate(model_optimal.predict( np.asarray(test_0[test_0.columns[:16]])), np.asarray(test_0['target']), mode='same') / len(test_0['target']) score_1 = signal.correlate(model_optimal.predict( np.asarray(test_1[test_1.columns[:16]])), np.asarray(test_1['target']), mode='same') / len(test_1['target']) score_2 = signal.correlate(model_optimal.predict( np.asarray(test_2[test_2.columns[:16]])), np.asarray(test_2['target']), mode='same') / len(test_2['target']) # Compute cross correlation scores on test data for all three SNR conds with dummy regressor score_0_dummy = signal.correlate(dummy_regr.predict( np.asarray(test_0[test_0.columns[:16]])), np.asarray(test_0['target']), mode='same') / len(test_0['target']) score_1_dummy = signal.correlate(dummy_regr.predict( np.asarray(test_1[test_1.columns[:16]])), np.asarray(test_1['target']), mode='same') / len(test_1['target']) score_2_dummy = signal.correlate(dummy_regr.predict( np.asarray(test_2[test_2.columns[:16]])), np.asarray(test_2['target']), mode='same') / len(test_2['target']) # Cross correlate with random speech random_corr = signal.correlate( model_optimal.predict(np.asarray(test_1[test_1.columns[:16]])), np.random.uniform(low=min(test_1['target']), high=max(test_1['target']), size=(np.asarray(test_1).shape[0], )), mode='same') / len( np.random.uniform(low=min(test_1['target']), high=max(test_1['target']), size=(np.asarray(test_1).shape[0], ))) ### SHOW RESULTS IN PLOTS ### ## Make line plots ## # Define x-axes x_axis_0 = np.linspace(0, len(test_0['target']), num=len(test_0['target'])) x_axis_1 = np.linspace(0, len(test_1['target']), num=len(test_1['target'])) x_axis_2 = np.linspace(0, len(test_2['target']), num=len(test_2['target'])) x_all = np.linspace(0, len(data_test['target']), num=len(data_test['target'])) ## All SNRs ## # For True plt.plot(x_all, np.asarray(data_test['target']), color='sandybrown', label='True') # For MNE Ridge regression plt.plot(x_all, model_optimal.predict( np.asarray(data_test[data_test.columns[:16]])), color='deepskyblue', label='Predicted') # For baseline dummy plt.plot(x_all, dummy_regr.predict(np.asarray(data_test[data_test.columns[:16]])), color='rebeccapurple', dashes=[6, 2], label='Baseline (mean)') plt.grid() plt.title(f'TA: {TA} · Predicted and True Speech Envelopes · All SNRs') plt.xlabel('Samples') plt.ylabel('Speech Envelope') plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left') plt.show() plt.savefig(f'Figure All - TA {TA}.png') ## -5 DB SNR ## # For True plt.plot(x_axis_0, np.asarray(test_0['target']), color='sandybrown', label='True') # For MNE Ridge regression plt.plot(x_axis_0, model_optimal.predict(np.asarray(test_0[test_0.columns[:16]])), color='deepskyblue', label='Predicted') # For baseline dummy plt.plot(x_axis_0, dummy_regr.predict(np.asarray(test_0[test_0.columns[:16]])), color='rebeccapurple', dashes=[6, 2], label='Baseline (mean)') plt.grid() plt.title(f'TA: {TA} · Predicted and True Speech Envelopes · -5 DB SNR') plt.xlabel('Samples') plt.ylabel('Speech Envelope') plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left') plt.show() plt.savefig(f'Figure -5 DB - TA {TA}.png') ## 0 DB SNR ## # For True plt.plot(x_axis_1, np.asarray(test_1['target']), color='sandybrown', label='True') # For MNE Ridge regression plt.plot(x_axis_1, model_optimal.predict(np.asarray(test_1[test_1.columns[:16]])), color='deepskyblue', label='Predicted') # For baseline dummy plt.plot(x_axis_1, dummy_regr.predict(np.asarray(test_1[test_1.columns[:16]])), color='rebeccapurple', dashes=[6, 2], label='Baseline (mean)') plt.grid() plt.title(f'TA: {TA} · Predicted and True Speech Envelopes · 0 DB SNR') plt.xlabel('Samples') plt.ylabel('Speech Envelope') plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left') plt.show() plt.savefig(f'Figure 0 DB - TA {TA}.png') ## +5 DB SNR ## # For True plt.plot(x_axis_2, np.asarray(test_2['target']), color='sandybrown', label='True') # For MNE Ridge regression plt.plot(x_axis_2, model_optimal.predict(np.asarray(test_2[test_2.columns[:16]])), color='deepskyblue', label='Predicted') # For baseline dummy plt.plot(x_axis_2, dummy_regr.predict(np.asarray(test_2[test_2.columns[:16]])), color='rebeccapurple', dashes=[6, 2], label='Baseline (mean)') plt.grid() plt.title(f'TA: {TA} · Predicted and True Speech Envelopes · +5 DB SNR') plt.xlabel('Samples') plt.ylabel('Speech Envelope') plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left') plt.show() plt.savefig(f'Figure +5 DB - TA {TA}.png') ## Bar chart to compare MSE for L2 and baseline ## measure = [np.mean(MSEs), np.mean(MSEdummies)] variance = [np.var(MSEs), np.var(MSEdummies)] x_labels = ['L2 (MNE)', 'Baseline'] x_pos = [i for i, _ in enumerate(x_labels)] plt.bar(x_pos, measure, color='sandybrown', yerr=variance) plt.grid() plt.xlabel("Model") plt.ylabel("MSE") plt.title("MSEs of L-2 and Baseline Model Compared") plt.xticks(x_pos, x_labels) plt.show() plt.savefig(f'MSEs compared - TA {TA}.png') ## Make boxplot of CrossCorrs ## ticks = ['-5 DB', '0 DB', '+5 DB', 'Random'] # Function to set the colors of the boxplots def set_box_color(bp, color): plt.setp(bp['boxes'], color=color) plt.setp(bp['whiskers'], color=color) plt.setp(bp['caps'], color=color) plt.setp(bp['medians'], color=color) plt.figure() bpl = plt.boxplot( [score_0, score_1, score_2], positions=np.array(range(len([score_0, score_1, score_2]))) * 2.0 - 0.4, sym='', widths=0.6) bpr = plt.boxplot( [score_0_dummy, score_1_dummy, score_2_dummy], positions=np.array( range(len([score_0_dummy, score_1_dummy, score_2_dummy]))) * 2.0 + 0.4, sym='', widths=0.6) bpu = plt.boxplot(random_corr, positions=[6], sym='', widths=0.6) set_box_color(bpl, 'deepskyblue') set_box_color(bpr, 'rebeccapurple') set_box_color(bpu, 'sandybrown') # Draw temporary purple and blue lines and use them to create a legend plt.plot([], c='deepskyblue', label='L2 MNE') plt.plot([], c='rebeccapurple', label='Baseline') plt.plot([], c='sandybrown', label='Random') plt.title( "Cross-correlation Between L-2-Predicted and True Envelopes in All SNR Levels & Random" ) plt.legend() plt.ylabel("Cross-correlation") plt.grid() plt.xticks(range(0, len(ticks) * 2, 2), ticks) plt.xlim(-2, len(ticks) * 2) plt.tight_layout() plt.show() plt.savefig(f'Boxplot over CrossCorr - TA {TA}.png') # Make data matrix data_matrix = np.array([ 'TA:', TA, 'Optimal Lambda Value:', opt_lambda_list[np.argmax(scores)], 'Best Score (Pearson´s R):', scores[np.argmax(scores)], 'Mean Score (Pearson´s R):', np.mean(scores), 'Best MSE:', min(MSEs), 'Mean MSE:', np.mean(MSEs), 'Best MSE Dummy:', min(MSEdummies), 'Mean MSE Dummy', np.mean(MSEdummies), 'CrossCorr for -5DB SNR:', np.mean(score_0), 'CrossCorr for 0DB SNR:', np.mean(score_1), 'CrossCorr for +5DB SNR:', np.mean(score_2), 'CrossCorr for random:', np.mean(random_corr), 'Dummy CrossCorr for -5DB SNR:', np.mean(score_0_dummy), 'Dummy CrossCorr for 0DB SNR:', np.mean(score_1_dummy), 'Dummy CrossCorr for +5DB SNR:', np.mean(score_2_dummy) ]).T # Save as CSV in working directory np.savetxt(name, data_matrix, delimiter=",", fmt='%s')
def test_receptive_field_nd(n_jobs): """Test multidimensional support.""" from sklearn.linear_model import Ridge # multidimensional rng = np.random.RandomState(3) x = rng.randn(1000, 3) y = np.zeros((1000, 2)) smin, smax = 0, 5 # This is a weird assignment, but it's just a way to distribute some # unique values at various delays, and "expected" explains how they # should appear in the resulting RF for ii in range(1, 5): y[ii:, ii % 2] += (-1) ** ii * ii * x[:-ii, ii % 3] y -= np.mean(y, axis=0) x -= np.mean(x, axis=0) x_off = x + 1e3 expected = [ [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 4, 0], [0, 0, 2, 0, 0, 0]], [[0, 0, 0, -3, 0, 0], [0, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], ] tdr_l = TimeDelayingRidge(smin, smax, 1., 0.1, 'laplacian', n_jobs=n_jobs) tdr_nc = TimeDelayingRidge(smin, smax, 1., 0.1, n_jobs=n_jobs, edge_correction=False) for estimator, atol in zip((Ridge(alpha=0.), 0., 0.01, tdr_l, tdr_nc), (1e-3, 1e-3, 1e-3, 5e-3, 5e-2)): model = ReceptiveField(smin, smax, 1., estimator=estimator) model.fit(x, y) assert_array_equal(model.delays_, np.arange(smin, smax + 1)) assert_allclose(model.coef_, expected, atol=atol) tdr = TimeDelayingRidge(smin, smax, 1., 0.01, reg_type='foo', n_jobs=n_jobs) model = ReceptiveField(smin, smax, 1., estimator=tdr) pytest.raises(ValueError, model.fit, x, y) tdr = TimeDelayingRidge(smin, smax, 1., 0.01, reg_type=['laplacian'], n_jobs=n_jobs) model = ReceptiveField(smin, smax, 1., estimator=tdr) pytest.raises(ValueError, model.fit, x, y) # Now check the intercept_ tdr = TimeDelayingRidge(smin, smax, 1., 0., n_jobs=n_jobs) tdr_no = TimeDelayingRidge(smin, smax, 1., 0., fit_intercept=False, n_jobs=n_jobs) for estimator in (Ridge(alpha=0.), tdr, Ridge(alpha=0., fit_intercept=False), tdr_no): # first with no intercept in the data model = ReceptiveField(smin, smax, 1., estimator=estimator) model.fit(x, y) assert_allclose(model.estimator_.intercept_, 0., atol=1e-7, err_msg=repr(estimator)) assert_allclose(model.coef_, expected, atol=1e-3, err_msg=repr(estimator)) y_pred = model.predict(x) assert_allclose(y_pred[model.valid_samples_], y[model.valid_samples_], atol=1e-2, err_msg=repr(estimator)) score = np.mean(model.score(x, y)) assert score > 0.9999 # now with an intercept in the data model.fit(x_off, y) if estimator.fit_intercept: val = [-6000, 4000] itol = 0.5 ctol = 5e-4 else: val = itol = 0. ctol = 2. assert_allclose(model.estimator_.intercept_, val, atol=itol, err_msg=repr(estimator)) assert_allclose(model.coef_, expected, atol=ctol, rtol=ctol, err_msg=repr(estimator)) if estimator.fit_intercept: ptol = 1e-2 stol = 0.999999 else: ptol = 10 stol = 0.6 y_pred = model.predict(x_off)[model.valid_samples_] assert_allclose(y_pred, y[model.valid_samples_], atol=ptol, err_msg=repr(estimator)) score = np.mean(model.score(x_off, y)) assert score > stol, estimator model = ReceptiveField(smin, smax, 1., fit_intercept=False) model.fit(x_off, y) assert_allclose(model.estimator_.intercept_, 0., atol=1e-7) score = np.mean(model.score(x_off, y)) assert score > 0.6
# Time delays to use in the receptive field tmin, tmax = -1, 0 # Initialize the model interpolator = interp1d(np.arange(len(envelope)) / sfreq, envelope, fill_value=0., bounds_error=False, assume_sorted=True) envelope_rs = interpolator(speech_epochs.times) envelope_rs[0] = 0. assert np.isfinite(envelope_rs).all() est = TimeDelayingRidge(tmin, tmax, epochs.info['sfreq'], 1., 'laplacian') rf = ReceptiveField(tmin, tmax, epochs.info['sfreq'], estimator=est, scoring='corrcoef') n_delays = int((tmax - tmin) * sfreq) + 2 rf.fit(envelope_rs[:, np.newaxis], virtual_channels) score = rf.score(envelope_rs[:, np.newaxis], virtual_channels) coefs = rf.coef_[0, :] times = rf.delays_ / float(rf.sfreq) fig, ax = plt.subplots() ax.plot(times, coefs) ax.axhline(0, ls='--', color='r') ax.set(title="WTF is this?", xlabel="time (s)", ylabel="($r$)") mne.viz.tight_layout() plt.show()
def test_receptive_field(): """Test model prep and fitting""" from sklearn.linear_model import Ridge # Make sure estimator pulling works mod = Ridge() # Test the receptive field model # Define parameters for the model and simulate inputs + weights tmin, tmax = 0., 10. n_feats = 3 X = rng.randn(n_feats, 10000) w = rng.randn(int((tmax - tmin) + 1) * n_feats) # Delay inputs and cut off first 4 values since they'll be cut in the fit X_del = np.vstack(_delay_time_series(X, tmin, tmax, 1., axis=-1)) y = np.dot(w, X_del) X = np.rollaxis(X, -1, 0) # time to first dimension # Fit the model and test values feature_names = ['feature_%i' % ii for ii in [0, 1, 2]] rf = ReceptiveField(tmin, tmax, 1, feature_names, estimator=mod) rf.fit(X, y) assert_array_equal(rf.delays_, np.arange(tmin, tmax + 1)) y_pred = rf.predict(X) assert_array_almost_equal(y[rf.keep_samples_], y_pred.squeeze()[rf.keep_samples_], 2) scores = rf.score(X, y) assert_true(scores > .99) assert_array_almost_equal(rf.coef_.reshape(-1, order='F'), w, 2) # Make sure different input shapes work rf.fit(X[:, np.newaxis:, ], y[:, np.newaxis]) rf.fit(X, y[:, np.newaxis]) assert_raises(ValueError, rf.fit, X[..., np.newaxis], y) assert_raises(ValueError, rf.fit, X[:, 0], y) assert_raises(ValueError, rf.fit, X[..., np.newaxis], np.tile(y[..., np.newaxis], [2, 1, 1])) # stim features must match length of input data assert_raises(ValueError, rf.fit, X[:, :1], y) # auto-naming features rf = ReceptiveField(tmin, tmax, 1, estimator=mod) rf.fit(X, y) assert_equal(rf.feature_names, ['feature_%s' % ii for ii in [0, 1, 2]]) # X/y same n timepoints assert_raises(ValueError, rf.fit, X, y[:-2]) # Float becomes ridge rf = ReceptiveField(tmin, tmax, 1, ['one', 'two', 'three'], estimator=0) str(rf) # repr works before fit rf.fit(X, y) assert_true(isinstance(rf.estimator_, Ridge)) str(rf) # repr works after fit rf = ReceptiveField(tmin, tmax, 1, ['one'], estimator=0) rf.fit(X[:, [0]], y) str(rf) # repr with one feature # Should only accept estimators or floats rf = ReceptiveField(tmin, tmax, 1, estimator='foo') assert_raises(ValueError, rf.fit, X, y) rf = ReceptiveField(tmin, tmax, 1, estimator=np.array([1, 2, 3])) assert_raises(ValueError, rf.fit, X, y) # tmin must be <= tmax rf = ReceptiveField(5, 4, 1) assert_raises(ValueError, rf.fit, X, y) # scorers for key, val in _SCORERS.items(): rf = ReceptiveField(tmin, tmax, 1, ['one'], estimator=0, scoring=key) rf.fit(X[:, [0]], y) y_pred = rf.predict(X[:, [0]]) assert_array_almost_equal(val(y[:, np.newaxis], y_pred), rf.score(X[:, [0]], y), 4) # Need 2D input assert_raises(ValueError, _SCORERS['corrcoef'], y.squeeze(), y_pred) # Need correct scorers assert_raises(ValueError, ReceptiveField, tmin, tmax, 1, scoring='foo')
def test_receptive_field_nd(): """Test multidimensional support.""" from sklearn.linear_model import Ridge # multidimensional x = rng.randn(1000, 3) y = np.zeros((1000, 2)) slim = [0, 5] # This is a weird assignment, but it's just a way to distribute some # unique values at various delays, and "expected" explains how they # should appear in the resulting RF for ii in range(1, 5): y[ii:, ii % 2] += (-1)**ii * ii * x[:-ii, ii % 3] y -= np.mean(y, axis=0) x -= np.mean(x, axis=0) x_off = x + 1e3 expected = [ [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 4, 0], [0, 0, 2, 0, 0, 0]], [[0, 0, 0, -3, 0, 0], [0, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], ] tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.1, 'laplacian') for estimator in (Ridge(alpha=0.), 0., 0.01, tdr): model = ReceptiveField(slim[0], slim[1], 1., estimator=estimator) model.fit(x, y) assert_array_equal(model.delays_, np.arange(slim[0], slim[1] + 1)) assert_allclose(model.coef_, expected, atol=1e-1) tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.01, reg_type='foo') model = ReceptiveField(slim[0], slim[1], 1., estimator=tdr) pytest.raises(ValueError, model.fit, x, y) tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.01, reg_type=['laplacian']) model = ReceptiveField(slim[0], slim[1], 1., estimator=tdr) pytest.raises(ValueError, model.fit, x, y) # Now check the intercept_ tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.) tdr_no = TimeDelayingRidge(slim[0], slim[1], 1., 0., fit_intercept=False) for estimator in (Ridge(alpha=0.), tdr, Ridge(alpha=0., fit_intercept=False), tdr_no): # first with no intercept in the data model = ReceptiveField(slim[0], slim[1], 1., estimator=estimator) model.fit(x, y) assert_allclose(model.estimator_.intercept_, 0., atol=1e-7, err_msg=repr(estimator)) assert_allclose(model.coef_, expected, atol=1e-3, err_msg=repr(estimator)) y_pred = model.predict(x) assert_allclose(y_pred[model.valid_samples_], y[model.valid_samples_], atol=1e-2, err_msg=repr(estimator)) score = np.mean(model.score(x, y)) assert score > 0.9999 # now with an intercept in the data model.fit(x_off, y) if estimator.fit_intercept: val = [-6000, 4000] itol = 0.5 ctol = 5e-4 else: val = itol = 0. ctol = 2. assert_allclose(model.estimator_.intercept_, val, atol=itol, err_msg=repr(estimator)) assert_allclose(model.coef_, expected, atol=ctol, rtol=ctol, err_msg=repr(estimator)) if estimator.fit_intercept: ptol = 1e-2 stol = 0.999999 else: ptol = 10 stol = 0.6 y_pred = model.predict(x_off)[model.valid_samples_] assert_allclose(y_pred, y[model.valid_samples_], atol=ptol, err_msg=repr(estimator)) score = np.mean(model.score(x_off, y)) assert score > stol, estimator model = ReceptiveField(slim[0], slim[1], 1., fit_intercept=False) model.fit(x_off, y) assert_allclose(model.estimator_.intercept_, 0., atol=1e-7) score = np.mean(model.score(x_off, y)) assert score > 0.6
# the linear receptive field of this signal. Note that properties of the # receptive field (e.g. smoothness) will depend on the autocorrelation in the # inputs and outputs. # Create training and testing data train, test = np.arange(n_epochs - 1), n_epochs - 1 X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test] X_train, X_test, y_train, y_test = [ np.rollaxis(ii, -1, 0) for ii in (X_train, X_test, y_train, y_test) ] # Model the simulated data as a function of the spectrogram input alphas = np.logspace(-4, 0, 10) scores = np.zeros_like(alphas) models = [] for ii, alpha in enumerate(alphas): rf = ReceptiveField(tmin, tmax, sfreq, freqs, estimator=alpha) rf.fit(X_train, y_train) # Now make predictions about the model output, given input stimuli. scores[ii] = rf.score(X_test, y_test) models.append(rf) times = rf.delays_ / float(rf.sfreq) # Choose the model that performed best on the held out data ix_best_alpha = np.argmax(scores) best_mod = models[ix_best_alpha] coefs = best_mod.coef_ best_pred = best_mod.predict(X_test)[:, 0] # Plot the original STRF, and the one that we recovered with modeling.
def test_receptive_field_fast(): """Test that the fast solving works like Ridge.""" from sklearn.linear_model import Ridge rng = np.random.RandomState(0) y = np.zeros(500) x = rng.randn(500) for delay in range(-2, 3): y.fill(0.) slims = [(-4, 2)] if delay == 0: y = x.copy() elif delay < 0: y[-delay:] = x[:delay] slims += [(-4, -1)] else: y[:-delay] = x[delay:] slims += [(1, 2)] for slim in slims: tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.1, 'laplacian', fit_intercept=False) for estimator in (Ridge(alpha=0.), 0., 0.1, tdr): model = ReceptiveField(slim[0], slim[1], 1., estimator=estimator) model.fit(x[:, np.newaxis], y) assert_array_equal(model.delays_, np.arange(slim[0], slim[1] + 1)) expected = (model.delays_ == delay).astype(float) assert_allclose(model.coef_[0], expected, atol=1e-2) p = model.predict(x[:, np.newaxis])[:, 0] assert_allclose(y, p, atol=5e-2) # multidimensional x = rng.randn(1000, 3) y = np.zeros((1000, 2)) slim = [-5, 0] # This is a weird assignment, but it's just a way to distribute some # unique values at various delays, and "expected" explains how they # should appear in the resulting RF for ii in range(1, 5): y[ii:, ii % 2] += (-1)**ii * ii * x[:-ii, ii % 3] expected = [ [[0, 0, 0, 0, 0, 0], [0, 4, 0, 0, 0, 0], [0, 0, 0, 2, 0, 0]], [[0, 0, -3, 0, 0, 0], [0, 0, 0, 0, -1, 0], [0, 0, 0, 0, 0, 0]], ] tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.1, 'laplacian') for estimator in (Ridge(alpha=0.), 0., 0.01, tdr): model = ReceptiveField(slim[0], slim[1], 1., estimator=estimator) model.fit(x, y) assert_array_equal(model.delays_, np.arange(slim[0], slim[1] + 1)) assert_allclose(model.coef_, expected, atol=1e-1) tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.01, reg_type='foo') model = ReceptiveField(slim[0], slim[1], 1., estimator=tdr) assert_raises(ValueError, model.fit, x, y) tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.01, reg_type=['laplacian']) model = ReceptiveField(slim[0], slim[1], 1., estimator=tdr) assert_raises(ValueError, model.fit, x, y) # Now check the intercept_ tdr = TimeDelayingRidge(slim[0], slim[1], 1., 0.) tdr_no = TimeDelayingRidge(slim[0], slim[1], 1., 0., fit_intercept=False) for estimator in (Ridge(alpha=0.), tdr, Ridge(alpha=0., fit_intercept=False), tdr_no): x -= np.mean(x, axis=0) y -= np.mean(y, axis=0) model = ReceptiveField(slim[0], slim[1], 1., estimator=estimator) model.fit(x, y) assert_allclose(model.estimator_.intercept_, 0., atol=1e-2) assert_allclose(model.coef_, expected, atol=1e-1) x += 1e3 model.fit(x, y) if estimator.fit_intercept: val, itol, ctol = [-6000, 4000], 20., 1e-1 else: val, itol, ctol = 0., 0., 2. # much worse assert_allclose(model.estimator_.intercept_, val, atol=itol) assert_allclose(model.coef_, expected, atol=ctol) model = ReceptiveField(slim[0], slim[1], 1., fit_intercept=False) model.fit(x, y) assert_allclose(model.estimator_.intercept_, 0., atol=1e-7)
def cross_validate(data, TA=None, lambda_config=(2e0, 2e20, 11), t_config=(-.25, .1)): """ Parameters ---------- data : Pandas Dataframe load dataframe using from data_load, using getData function. lambda_config : tuple, optional Lambda range for Ridge regression. The default is (2e0, 2e20, 11). Range from Cross et al 2016 publication. t_config : tuple, optional Jitter lag range for MNE in ms. The default is (-.25, .1). Returns ------- NA """ np.random.seed(999) # Define lambda values for training models lambdas = np.linspace(lambda_config[0], lambda_config[1], lambda_config[2]) # Parameters for MNE tmin, tmax = t_config sfreq = 64 # Define result DataFrame df_cols = ["corr_true", "corr_mask", "corr_rand", "TA", "SNR"] df = pd.DataFrame(columns=df_cols) if TA == None: TAs = np.unique(data["TA"]) else: TAs = np.array([TA]) for TA in TAs: data_sub = data[data["TA"] == TA] data_train = data_sub SNRs = np.unique(data_sub["SNR"]) SNR_order = [] for SNR in SNRs: trials = data_sub[data_sub["SNR"] == SNR][ "trial"] # Get the trials trials = np.unique(trials) # Get the unique trial indicies np.random.shuffle(trials) # Shuffle the order of the trials SNR_order.append(trials) # Store the order # Get the lowest possible k for k-fold K = np.inf for order in SNR_order: if len(order) < K: K = len(order) # Outer fold for k in range(K): # Split into test and training data_train = data_sub data_test = pd.DataFrame() # Filter the test data away for i in range(len(SNR_order)): data_test = pd.concat([ data_test, data_train[(data_sub["SNR"] == i) & (data_train["trial"] == SNR_order[i][k])] ], ignore_index=True) data_train = data_train.drop( data_train[(data_train["SNR"] == i) & (data_train["trial"] == SNR_order[i][k])].index) # Initiate errors for inner fold validations vals = np.zeros((K - 1, lambda_config[2])) # Get the list of validation trials SNR_valid_order = SNR_order.copy() for i in range(len(SNR_order)): SNR_valid_order[i] = np.delete(SNR_valid_order[i], k) # Inner fold for j in range(K - 1): print("TA: %i / %i\n\tFold: %i / %i\n\tInner fold: %i / %i" % (TA + 1, len(TAs), k + 1, K, j + 1, K - 1)) # Find optimal hyperparameter data_valid_train = data_train data_valid_test = pd.DataFrame() for i in range(len(SNR_order)): data_valid_test = pd.concat([ data_valid_test, data_valid_train[(data_valid_train["SNR"] == i) & (data_valid_train["trial"] == SNR_valid_order[i][j])] ], ignore_index=True) data_valid_train = data_valid_train.drop( data_valid_train[(data_valid_train["SNR"] == i) & (data_valid_train["trial"] == SNR_valid_order[i][j])].index) i = 0 for l in lambdas: # Define model with l parameter model = ReceptiveField(tmin, tmax, sfreq, feature_names=list( data.columns[:16]), estimator=l, scoring="corrcoef") # Fit model to inner fold training data model.fit(np.asarray(data_valid_train[data.columns[:16]]), np.asarray(data_valid_train["target"])) # Compute cross correlation for regressional value val = np.zeros(len(SNR_order)) for i_ in range(len(SNR_order)): val[i_] = model.score( np.asarray( data_valid_test[data_valid_test["SNR"] == i_][ data.columns[:16]]), np.asarray(data_valid_test[data_valid_test["SNR"] == i_]["target"])) # Add score to matrix vals[j, i] = np.mean(val) i += 1 j += 1 # Get optimal parameter param_score = np.sum(vals, axis=0) lambda_opt = lambdas[np.argmax(param_score)] print("Optimal lambda = %f" % lambda_opt) # Train optimal model model_opt = ReceptiveField(tmin, tmax, sfreq, feature_names=list(data.columns[:16]), estimator=lambda_opt, scoring="corrcoef") # Fit model to train data model_opt.fit(np.asarray(data_train[data.columns[:16]]), np.asarray(data_train["target"])) for i in range(len(SNR_order)): # Predict envelope data_test_SNR = data_test[data_test["SNR"] == i] y_pred = model_opt.predict( np.asarray(data_test_SNR[data.columns[:16]])) y_rand = random_trial(data, TA=TA, trial=SNR_order[i][k])["target"] corr_true = pearsonr(y_pred, np.asarray(data_test_SNR["target"])) corr_mask = pearsonr(y_pred, np.asarray(data_test_SNR["mask"])) corr_rand = pearsonr(y_pred, np.asarray(y_rand)) # Convert to DataFrame data_results = np.zeros((1, len(df_cols))) data_results[:, 0] = corr_true[0] data_results[:, 1] = corr_mask[0] data_results[:, 2] = corr_rand[0] data_results[:, 3] = TA data_results[:, 4] = i df_ = pd.DataFrame(data=data_results, columns=df_cols) # Concatenate df = pd.concat([df, df_], ignore_index=True) df.to_pickle("local_data/results/result_%i_%i.pkl" % (TA, k)) print("Done") return df