def test_scaler(): """Test methods of Scaler.""" raw = io.read_raw_fif(raw_fname, preload=False, add_eeg_ref=False) events = read_events(event_name) picks = pick_types(raw.info, meg=True, stim=False, ecg=False, eog=False, exclude='bads') picks = picks[1:13:3] epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), preload=True, add_eeg_ref=False) epochs_data = epochs.get_data() scaler = Scaler(epochs.info) y = epochs.events[:, -1] # np invalid divide value warnings with warnings.catch_warnings(record=True): X = scaler.fit_transform(epochs_data, y) assert_true(X.shape == epochs_data.shape) X2 = scaler.fit(epochs_data, y).transform(epochs_data) assert_array_equal(X2, X) # Test inverse_transform with warnings.catch_warnings(record=True): # invalid value in mult Xi = scaler.inverse_transform(X, y) assert_array_equal(epochs_data, Xi) # Test init exception assert_raises(ValueError, scaler.fit, epochs, y) assert_raises(ValueError, scaler.transform, epochs, y)
def test_scaler(): """Test methods of Scaler """ raw = io.Raw(raw_fname, preload=False) events = read_events(event_name) picks = pick_types(raw.info, meg=True, stim=False, ecg=False, eog=False, exclude='bads') picks = picks[1:13:3] epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), preload=True) epochs_data = epochs.get_data() scaler = Scaler(epochs.info) y = epochs.events[:, -1] # np invalid divide value warnings with warnings.catch_warnings(record=True): X = scaler.fit_transform(epochs_data, y) assert_true(X.shape == epochs_data.shape) X2 = scaler.fit(epochs_data, y).transform(epochs_data) assert_array_equal(X2, X) # Test inverse_transform with warnings.catch_warnings(record=True): # invalid value in mult Xi = scaler.inverse_transform(X, y) assert_array_equal(epochs_data, Xi) # Test init exception assert_raises(ValueError, scaler.fit, epochs, y) assert_raises(ValueError, scaler.transform, epochs, y)
def standard_scaling(data, scalings="mean", log=False): if log: data = np.log(data + np.finfo(np.float32).eps) if scalings in ["mean", "median"]: scaler = Scaler(scalings=scalings) data = scaler.fit_transform(data) else: raise ValueError("scalings should be mean or median") return data
def test_get_coef_multiclass_full(n_classes, n_channels, n_times): """Test a full example with pattern extraction.""" from sklearn.pipeline import make_pipeline from sklearn.linear_model import LogisticRegression from sklearn.model_selection import StratifiedKFold data = np.zeros((10 * n_classes, n_channels, n_times)) # Make only the first channel informative for ii in range(n_classes): data[ii * 10:(ii + 1) * 10, 0] = ii events = np.zeros((len(data), 3), int) events[:, 0] = np.arange(len(events)) events[:, 2] = data[:, 0, 0] info = create_info(n_channels, 1000., 'eeg') epochs = EpochsArray(data, info, events, tmin=0) clf = make_pipeline( Scaler(epochs.info), Vectorizer(), LinearModel(LogisticRegression(random_state=0, multi_class='ovr')), ) scorer = 'roc_auc_ovr_weighted' time_gen = GeneralizingEstimator(clf, scorer, verbose=True) X = epochs.get_data() y = epochs.events[:, 2] n_splits = 3 cv = StratifiedKFold(n_splits=n_splits) scores = cross_val_multiscore(time_gen, X, y, cv=cv, verbose=True) want = (n_splits, ) if n_times > 1: want += (n_times, n_times) assert scores.shape == want assert_array_less(0.8, scores) clf.fit(X, y) patterns = get_coef(clf, 'patterns_', inverse_transform=True) assert patterns.shape == (n_classes, n_channels, n_times) assert_allclose(patterns[:, 1:], 0., atol=1e-7) # no other channels useful
def test_scaler(): """Test methods of Scaler.""" raw = io.read_raw_fif(raw_fname) events = read_events(event_name) picks = pick_types(raw.info, meg=True, stim=False, ecg=False, eog=False, exclude='bads') picks = picks[1:13:3] epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), preload=True) epochs_data = epochs.get_data() y = epochs.events[:, -1] methods = (None, dict(mag=5, grad=10, eeg=20), 'mean', 'median') infos = (epochs.info, epochs.info, None, None) epochs_data_t = epochs_data.transpose([1, 0, 2]) for method, info in zip(methods, infos): if method == 'median' and not check_version('sklearn', '0.17'): assert_raises(ValueError, Scaler, info, method) continue if method == 'mean' and not check_version('sklearn', ''): assert_raises(ImportError, Scaler, info, method) continue scaler = Scaler(info, method) X = scaler.fit_transform(epochs_data, y) assert_equal(X.shape, epochs_data.shape) if method is None or isinstance(method, dict): sd = DEFAULTS['scalings'] if method is None else method stds = np.zeros(len(picks)) for key in ('mag', 'grad'): stds[pick_types(epochs.info, meg=key)] = 1. / sd[key] stds[pick_types(epochs.info, meg=False, eeg=True)] = 1. / sd['eeg'] means = np.zeros(len(epochs.ch_names)) elif method == 'mean': stds = np.array([np.std(ch_data) for ch_data in epochs_data_t]) means = np.array([np.mean(ch_data) for ch_data in epochs_data_t]) else: # median percs = np.array([np.percentile(ch_data, [25, 50, 75]) for ch_data in epochs_data_t]) stds = percs[:, 2] - percs[:, 0] means = percs[:, 1] assert_allclose(X * stds[:, np.newaxis] + means[:, np.newaxis], epochs_data, rtol=1e-12, atol=1e-20, err_msg=method) X2 = scaler.fit(epochs_data, y).transform(epochs_data) assert_array_equal(X, X2) # inverse_transform Xi = scaler.inverse_transform(X) assert_array_almost_equal(epochs_data, Xi) # Test init exception assert_raises(ValueError, Scaler, None, None) assert_raises(ValueError, scaler.fit, epochs, y) assert_raises(ValueError, scaler.transform, epochs) epochs_bad = Epochs(raw, events, event_id, 0, 0.01, picks=np.arange(len(raw.ch_names))) # non-data chs scaler = Scaler(epochs_bad.info, None) assert_raises(ValueError, scaler.fit, epochs_bad.get_data(), y)
def test_get_coef_multiclass(n_features, n_targets): """Test get_coef on multiclass problems.""" # Check patterns with more than 1 regressor from sklearn.linear_model import LinearRegression, Ridge from sklearn.pipeline import make_pipeline X, Y, A = _make_data(n_samples=30000, n_features=n_features, n_targets=n_targets) lm = LinearModel(LinearRegression()).fit(X, Y) assert_array_equal(lm.filters_.shape, lm.patterns_.shape) if n_targets == 1: want_shape = (n_features, ) else: want_shape = (n_targets, n_features) assert_array_equal(lm.filters_.shape, want_shape) if n_features > 1 and n_targets > 1: assert_array_almost_equal(A, lm.patterns_.T, decimal=2) lm = LinearModel(Ridge(alpha=0)) clf = make_pipeline(lm) clf.fit(X, Y) if n_features > 1 and n_targets > 1: assert_allclose(A, lm.patterns_.T, atol=2e-2) coef = get_coef(clf, 'patterns_', inverse_transform=True) assert_allclose(lm.patterns_, coef, atol=1e-5) # With epochs, scaler, and vectorizer (typical use case) X_epo = X.reshape(X.shape + (1, )) info = create_info(n_features, 1000., 'eeg') lm = LinearModel(Ridge(alpha=1)) clf = make_pipeline( Scaler(info, scalings=dict(eeg=1.)), # XXX adding this step breaks Vectorizer(), lm, ) clf.fit(X_epo, Y) if n_features > 1 and n_targets > 1: assert_allclose(A, lm.patterns_.T, atol=2e-2) coef = get_coef(clf, 'patterns_', inverse_transform=True) lm_patterns_ = lm.patterns_[..., np.newaxis] assert_allclose(lm_patterns_, coef, atol=1e-5) # Check can pass fitting parameters lm.fit(X, Y, sample_weight=np.ones(len(Y)))
def test_scaler(): """Test methods of Scaler.""" raw = io.read_raw_fif(raw_fname) events = read_events(event_name) picks = pick_types(raw.info, meg=True, stim=False, ecg=False, eog=False, exclude='bads') picks = picks[1:13:3] epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), preload=True) epochs_data = epochs.get_data() scaler = Scaler(epochs.info) y = epochs.events[:, -1] X = scaler.fit_transform(epochs_data, y) assert_true(X.shape == epochs_data.shape) X2 = scaler.fit(epochs_data, y).transform(epochs_data) assert_array_equal(X2, X) # these should be across time assert_allclose(X.std(axis=-2), 1.) assert_allclose(X.mean(axis=-2), 0., atol=1e-12) # Test inverse_transform Xi = scaler.inverse_transform(X, y) assert_array_almost_equal(epochs_data, Xi) for kwargs in [{'with_mean': False}, {'with_std': False}]: scaler = Scaler(epochs.info, **kwargs) scaler.fit(epochs_data, y) assert_array_almost_equal( X, scaler.inverse_transform(scaler.transform(X))) # Test init exception assert_raises(ValueError, scaler.fit, epochs, y) assert_raises(ValueError, scaler.transform, epochs, y)
def get_Xy(self, slicers=None, df=None, dropbad=True, downsample=None, crop=None, scaling=None, verbose=None): """Get training data and target values. Parameters ---------- slicers : dict of slices The slicers on which the data is filtered. See self.filter(). If None, the full dataset will be used. Defaults to None. df : instance of pandas.core.frame.DataFrame The dataframe to use as input. If None, the dataframe loaded with self.load() will be used. Defaults to None. dropbad : bool Whether artifacts will be rejected or not. Defaults to True. downsample : int or None. The resampling rate. If None, the data will not be downsampled. Defaults to None. crop : tuple of float (tmin, tmax) or None The crop time interval from epochs object, in seconds. If None, epochs will not be cropped. Defaults to None. scaling : dict, string or None The scaling method to be applied to data channel wise. See: http://martinos.org/mne/stable/generated/mne.decoding.Scaler.html Defaults to None. verbose : bool, string, int or None The verbosity level, as detailed at: https://www.martinos.org/mne/stable/generated/mne.set_log_level.html. Defaults to None. Returns ------- X : instance of numpy.ndarray The training data. y : instance of numpy.ndarray The target values. epochs : instance of mne.epochs.Epochs The MNE Epochs object. """ if df is None: df = self.df if slicers: df = self.filter(slicers, df) raw = self.raw(df, verbose=verbose) raw.filter(0.5, 40, method='iir') # bandpass filter events = mne.find_events(raw, verbose=verbose) event_id = {'distractor': 1, 'target': 2} # Reject epochs were the signal exceeds 100uV in EEG channels or 200uV in the EOG channel reject = {'eeg': 100e-6, 'eog': 200e-6} if dropbad else None # See: http://martinos.org/mne/stable/generated/mne.Epochs.html epochs = mne.Epochs(raw, events, event_id=event_id, tmin=self.tmin, tmax=self.tmax, baseline=(self.tmin, 0), reject=reject, verbose=verbose) epochs.load_data() if dropbad: epochs.drop_bad() if downsample: epochs.resample(downsample, npad='auto') if crop: epochs.crop(*crop) epochs.pick_types(eeg=True) X = epochs.get_data() y = epochs.events[:, -1] == 2 # binary events # See: http://martinos.org/mne/stable/generated/mne.decoding.Scaler.html X = Scaler(epochs.info, scaling).fit_transform(X, y) return X, y, epochs
def raw_to_data(raw_edf, training=True, drop_rejects=True, subj=None): tmin, tmax = 0, 4. stim_code = dict([(32766,1),(769,2), (770,3), (771,4), (772,5),(783,6),(276,7),(277,8),(768,9), (1023,10),(1072,11)]) if training: path = op.join('data_i2r', 'BCI_IV_2a', 'TrainingSet') if not training: path = op.join('data_i2r', 'BCI_IV_2a', 'TestingSet') label_path = op.join('data_i2r', 'BCI_IV_2a', 'true_labels') label_files_list = glob.glob(label_path + '/*E.mat') label_subj = [ int(f.split('A0')[1][0]) for f in label_files_list ] file_list = glob.glob(path + '/*.gdf') subjects = [ int(f.split('A0')[1][0]) for f in file_list ] if not training: label_subj = [ np.argwhere(np.array(label_subj)==subjects[i])[0][0] for i in range(len(subjects))] event_id = dict() events_from_edf = [] sampling_frequency = raw_edf._raw_extras[0]['max_samp'] original_event = raw_edf.find_edf_events() annot_list = list(zip(original_event[1], original_event[4], original_event[2])) # Remove rejected trials from events if drop_rejects: annot_list = pd.DataFrame(annot_list) rejected = annot_list[0].isin(annot_list[annot_list[2] == 1023][0]) accepted_trials_index = [True] * 288 ind=-1 for row in annot_list.itertuples(): if row[3] == 1023: rejected.loc[row[0]+1] = True accepted_trials_index[ind] = False if row[3] == 768: ind = ind + 1 annot_list = annot_list[~rejected] annot_list = list(zip(annot_list[0], annot_list[1], annot_list[2])) events_from_edf.extend(annot_list) events_from_edf = np.array(events_from_edf) events_arr = np.zeros(events_from_edf.shape, dtype=int) for (i, i_event) in enumerate(events_from_edf): index = int((float(i_event[0])) * sampling_frequency) events_arr[i,:] = index,0,stim_code[int(i_event[2])] i=i+1 # strip channel names of "." characters raw_edf.rename_channels(lambda x: x.strip('.')) #create Event dictionary based on File events_in_edf = [event[2] for event in events_arr[:]] if(events_in_edf.__contains__(2)): event_id['LEFT_HAND'] = 2 if (events_in_edf.__contains__(3)): event_id['RIGHT_HAND'] = 3 if (events_in_edf.__contains__(4)): event_id['FEET'] = 4 if (events_in_edf.__contains__(5)): event_id['TONGUE'] = 5 if (events_in_edf.__contains__(6)): event_id['CUE_UNKNOWN'] = 6 # Read epochs (train will be done only between -0.5 and 4s) # Testing will be done with a running classifier # raw_edf.filter(0., 40., fir_design='firwin', skip_by_annotation='edge') # 4-40Hz picks = pick_types(raw_edf.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads') epochs = Epochs(raw_edf, events_arr, event_id, tmin, tmax, proj=True, picks=picks, baseline=None, preload=True) y = epochs.events[:, 2] - 2 filter_data = [] #filter_bank = [(4.,40.)] filter_bank = [(4.,8.),(8.,12.),(12.,16.),(16.,20.),(20.,24.),(24.,28.),(28.,32.),(32.,36.),(36.,40)] for _filter in filter_bank: #filter_data.append(np.abs(signal.hilbert(epochs.copy().filter(_filter[0], _filter[1], fir_design='firwin').get_data()))) filter_data.append(epochs.copy().filter(_filter[0], _filter[1], fir_design='firwin').get_data()) filter_data = np.array(filter_data) if training: oScaler = Scaler(scalings='mean').fit(filter_data.flatten().reshape(-1,1)) #oScaler = MinMaxScaler(copy=True, feature_range=(-1, 1)).fit(filter_data.flatten().reshape(-1,1)) pk.dump(oScaler,open("./fb/subject{}_filter_oscaler.pk".format(subjects[subj]),'wb')) else: oScaler = pk.load(open("./fb/subject{}_filter_oscaler.pk".format(subjects[subj]),'rb')) shape = filter_data.shape filter_data = oScaler.transform(filter_data.flatten().reshape(-1,1)) filter_data = filter_data.reshape(shape) filter_data = filter_data.transpose(1,3,2,0) # 273, 1001, 22, 10 # Augment and reshape data into image filter_data = filter_data.transpose(2,0,1,3) # 22, 273, 1001, 10 filter_data = np.split(filter_data,[1,6,13,18,21]) empty_ch = np.zeros(filter_data[0].shape) filter_data = np.vstack([empty_ch,empty_ch,empty_ch,filter_data[0],empty_ch,empty_ch,empty_ch, empty_ch,filter_data[1],empty_ch, filter_data[2], empty_ch,filter_data[3],empty_ch, empty_ch,empty_ch,filter_data[4],empty_ch,empty_ch, empty_ch,empty_ch,empty_ch,filter_data[5],empty_ch,empty_ch,empty_ch]) filter_data = filter_data.transpose(1,2,0,3) # 273, 1001, 42, 10 filter_data = filter_data.reshape(filter_data.shape[0],filter_data.shape[1],6,7,filter_data.shape[3]) # 273, 1001, 6, 7, 10 if training: return filter_data, y else: y = sio.loadmat(label_files_list[label_subj[subj]])['classlabel'].flatten() y = np.array([ i - 1 for i in y ]) if drop_rejects: y_drop = [ i for i in range(288) if not accepted_trials_index[i] ] y = np.delete(y, y_drop, None) return filter_data, y
def decoding_withKfold(X, Y_speech, Y_lips, n_fold, train_index, test_index, examples, feature): predictions_speech = np.zeros((Y_speech.shape)) speech = np.zeros((Y_speech.shape)) predictions_lips = np.zeros((Y_lips.shape)) lips = np.zeros((Y_lips.shape)) scores_speech = np.zeros((n_fold, )) for k in range(0, n_fold): eegScaler = MultiChannelScaler(scalings='mean') speechScaler = MultiChannelScaler(scalings='mean') lipsScaler = MultiChannelScaler(scalings='mean') speechModel = LReg() lipsModel = LReg() #####COPY X AND Y VARIABLES X_standard = np.zeros((X.shape)) Y_lips_standard = np.zeros((Y_lips.shape)) Y_speech_standard = np.zeros((Y_speech.shape)) # standardazing data X_standard[train_index[k], :, :] = eegScaler.fit_transform( X[train_index[k], :, :]) X_standard[test_index[k], :, :] = eegScaler.transform( X[test_index[k], :, :]) Y_lips_standard[train_index[k], :] = lipsScaler.fit_transform( Y_lips[train_index[k], :]).squeeze() Y_lips_standard[test_index[k], :] = lipsScaler.transform( Y_lips[test_index[k], :]).squeeze() Y_speech_standard[train_index[k], :] = speechScaler.fit_transform( Y_speech[train_index[k], :]).squeeze() Y_speech_standard[test_index[k], :] = speechScaler.transform( Y_speech[test_index[k], :]).squeeze() X_TRAIN = X_standard[train_index[k], :, :] X_TEST = X_standard[test_index[k], :, :] Y_envelope_sp_TRAIN = Y_speech_standard[train_index[k], :] Y_envelope_sp_TEST = Y_speech_standard[test_index[k], :] Y_lips_ap_TRAIN = Y_lips_standard[train_index[k], :] Y_lips_ap_TEST = Y_lips_standard[test_index[k], :] #X_train and test now are (#trials,#channnels,#timepoints) n_trial = X_TRAIN.shape[0] n_trial_test = X_TEST.shape[0] n_ch = X_TRAIN.shape[1] trial_length = X_TRAIN.shape[2] if examples == 'are_Trials': X_TRAIN_tmp = np.zeros((X_TRAIN.shape[0], n_ch * trial_length)) X_TEST_tmp = np.zeros((X_TEST.shape[0], n_ch * trial_length)) for i in range(0, n_ch): X_TRAIN_tmp[:, i * trial_length:(i + 1) * trial_length] = X_TRAIN[:, i, :] X_TEST_tmp[:, i * trial_length:(i + 1) * trial_length] = X_TEST[:, i, :] X_TRAIN = X_TRAIN_tmp X_TEST = X_TEST_tmp elif examples == 'are_Time': X_TRAIN_tmp = np.zeros((n_trial * trial_length, n_ch)) X_TEST_tmp = np.zeros((n_trial_test * trial_length, n_ch)) Y_envelope_sp_TRAIN_tmp = np.zeros((n_trial * trial_length, )) Y_envelope_sp_TEST_tmp = np.zeros((n_trial_test * trial_length, )) Y_lips_ap_TRAIN_tmp = np.zeros((n_trial * trial_length, )) Y_lips_ap_TEST_tmp = np.zeros((n_trial_test * trial_length, )) for i in range(0, n_trial): X_TRAIN_tmp[i * trial_length:(i + 1) * trial_length, :] = X_TRAIN[i, :, :].T Y_envelope_sp_TRAIN_tmp[i * trial_length:(i + 1) * trial_length] = Y_envelope_sp_TRAIN[ i, :] Y_lips_ap_TRAIN_tmp[i * trial_length:(i + 1) * trial_length] = Y_lips_ap_TRAIN[i, :] if i < X_TEST.shape[0]: #test trials are less than train X_TEST_tmp[i * trial_length:(i + 1) * trial_length, :] = X_TEST[i, :, :].T Y_envelope_sp_TEST_tmp[i * trial_length:(i + 1) * trial_length] = Y_envelope_sp_TEST[ i, :] Y_lips_ap_TEST_tmp[i * trial_length:(i + 1) * trial_length] = Y_lips_ap_TEST[i, :] X_TRAIN = X_TRAIN_tmp X_TEST = X_TEST_tmp Y_envelope_sp_TRAIN = Y_envelope_sp_TRAIN_tmp Y_envelope_sp_TEST = Y_envelope_sp_TEST_tmp Y_lips_ap_TRAIN = Y_lips_ap_TRAIN_tmp Y_lips_ap_TEST = Y_lips_ap_TEST_tmp if feature == 'pca': [pca, n_comp] = pca_decomposition(X_TRAIN) X_TRAIN = pca.transform(X_TRAIN)[:, :n_comp] X_TEST = pca.transform(X_TEST)[:, :n_comp] if feature == 'Kpca': [pca, n_comp] = kernel_pca_decomposition(X_TRAIN) X_TRAIN = pca.transform(X_TRAIN)[:, :n_comp] X_TEST = pca.transform(X_TEST)[:, :n_comp] if feature == 'ica': ICA_decomposition [ica, selected_comps] = ICA_decomposition(X_TRAIN) X_TRAIN = ica.transform(X_TRAIN)[:, selected_comps.astype('int')] X_TEST = ica.transform(X_TEST)[:, selected_comps.astype('int')] if feature == 'derivative1': de1 = np.diff(X_TRAIN, axis=0) / 0.01 de1 = np.concatenate((np.zeros((1, de1.shape[1])), de1), axis=0) for i in range(0, de1.shape[0], trial_length): de1[i, :] = np.zeros((1, de1.shape[1])) X_TRAIN = np.concatenate((X_TRAIN, de1), 1) de1 = np.diff(X_TEST, axis=0) / 0.01 de1 = np.concatenate((np.zeros((1, de1.shape[1])), de1), axis=0) for i in range(0, de1.shape[0], trial_length): de1[i, :] = np.zeros((1, de1.shape[1])) X_TEST = np.concatenate((X_TEST, de1), 1) if feature == 'derivative2': de1 = np.diff(X_TRAIN, axis=0) / 0.01 de1 = np.concatenate((np.zeros((1, de1.shape[1])), de1), axis=0) for i in range(0, de1.shape[0], trial_length): de1[i, :] = np.zeros((1, de1.shape[1])) de2 = np.diff(de1, axis=0) de2 = np.concatenate((np.zeros((1, de2.shape[1])), de2), axis=0) for i in range(0, de2.shape[0], trial_length): de2[i, :] = np.zeros((1, de2.shape[1])) de2[i + 1, :] = np.zeros((1, de2.shape[1])) X_TRAIN = np.concatenate((np.concatenate( (X_TRAIN, de1), 1), de2), 1) de1 = np.diff(X_TEST, axis=0) / 0.01 de1 = np.concatenate((np.zeros((1, de1.shape[1])), de1), axis=0) for i in range(0, de1.shape[0], trial_length): de1[i, :] = np.zeros((1, de1.shape[1])) de2 = np.diff(de1, axis=0) de2 = np.concatenate((np.zeros((1, de2.shape[1])), de2), axis=0) for i in range(0, de2.shape[0], trial_length): de2[i, :] = np.zeros((1, de2.shape[1])) de2[i + 1, :] = np.zeros((1, de2.shape[1])) X_TEST = np.concatenate((np.concatenate( (X_TEST, de1), 1), de2), 1) if feature == 'polynomial': X_TRAIN = np.concatenate((X_TRAIN, np.power(X_TRAIN, 2)), 1) X_TEST = np.concatenate((X_TEST, np.power(X_TEST, 2)), 1) # training models and predict speechModel.fit(X_TRAIN, Y_envelope_sp_TRAIN) lipsModel.fit(X_TRAIN, Y_lips_ap_TRAIN) reconstructed_speech = speechModel.predict(X_TEST) reconstructed_lips = lipsModel.predict(X_TEST) if examples == 'are_Time': reconstructed_speech_tmp = np.zeros((n_trial_test, trial_length)) reconstructed_lips_tmp = np.zeros((n_trial_test, trial_length)) Y_envelope_sp_TEST_tmp = np.zeros((n_trial_test, trial_length)) Y_lips_ap_TEST_tmp = np.zeros((n_trial_test, trial_length)) t = 0 for i in range(0, len(reconstructed_speech), trial_length): reconstructed_speech_tmp[ t, :] = reconstructed_speech[i:i + trial_length] reconstructed_lips_tmp[t, :] = reconstructed_lips[i:i + trial_length] Y_envelope_sp_TEST_tmp[t, :] = Y_envelope_sp_TEST[i:i + trial_length] Y_lips_ap_TEST_tmp[t, :] = Y_lips_ap_TEST[i:i + trial_length] t += 1 reconstructed_speech = reconstructed_speech_tmp reconstructed_lips = reconstructed_lips_tmp Y_envelope_sp_TEST = Y_envelope_sp_TEST_tmp Y_lips_ap_TEST = Y_lips_ap_TEST_tmp predictions_speech[test_index[k], :] = reconstructed_speech speech[test_index[k], :] = Y_envelope_sp_TEST predictions_lips[test_index[k], :] = reconstructed_lips lips[test_index[k], :] = Y_lips_ap_TEST # computing scores speech_score = evaluate(speech.T, predictions_speech.T, 'corrcoeff') lips_score = evaluate(lips.T, predictions_lips.T, 'corrcoeff') return speech_score, lips_score, predictions_speech, predictions_lips, speech, lips
# # Vectorizer # ^^^^^^^^^^ # Scikit-learn API provides functionality to chain transformers and estimators # by using :class:`sklearn.pipeline.Pipeline`. We can construct decoding # pipelines and perform cross-validation and grid-search. However scikit-learn # transformers and estimators generally expect 2D data # (n_samples * n_features), whereas MNE transformers typically output data # with a higher dimensionality # (e.g. n_samples * n_channels * n_frequencies * n_times). A Vectorizer # therefore needs to be applied between the MNE and the scikit-learn steps # like: # Uses all MEG sensors and time points as separate classification # features, so the resulting filters used are spatio-temporal clf = make_pipeline(Scaler(epochs.info), Vectorizer(), LogisticRegression(solver='lbfgs')) scores = cross_val_multiscore(clf, X, y, cv=5, n_jobs=1) # Mean scores across cross-validation splits score = np.mean(scores, axis=0) print('Spatio-temporal: %0.1f%%' % (100 * score,)) ############################################################################### # PSDEstimator # ^^^^^^^^^^^^ # The :class:`mne.decoding.PSDEstimator` # computes the power spectral density (PSD) using the multitaper # method. It takes a 3D array as input, converts it into 2D and computes the
def decoding_withKfold(X,Y_speech,Y_lips,n_fold,train_index,test_index,polynomialReg): predictions_speech= np.zeros((Y_speech.shape)) speech = np.zeros((Y_speech.shape)) predictions_lips= np.zeros((Y_lips.shape)) lips = np.zeros((Y_lips.shape)) scores_speech=np.zeros((n_fold,)) for k in range(0, n_fold): eegScaler = Scaler() speechScaler = Scaler() lipsScaler = Scaler() speechModel = LReg() lipsModel = LReg() #####COPY X AND Y VARIABLES X_standard = np.zeros((X.shape)) Y_lips_standard = np.zeros((Y_lips.shape)) Y_speech_standard = np.zeros((Y_speech.shape)) # standardazing data X_standard[train_index[k], :] = eegScaler.fit_transform(X[train_index[k], :]) X_standard[test_index[k], :] = eegScaler.transform(X[test_index[k], :]) Y_lips_standard[train_index[k], :] = lipsScaler.fit_transform(Y_lips[train_index[k], :]) Y_lips_standard[test_index[k], :] = lipsScaler.transform(Y_lips[test_index[k], :]) Y_speech_standard[train_index[k], :] = speechScaler.fit_transform(Y_speech[train_index[k], :]) Y_speech_standard[test_index[k], :] = speechScaler.transform(Y_speech[test_index[k], :]) X_TRAIN = X_standard[ train_index[k], :] X_TEST = X_standard[ test_index[k], :] Y_envelope_sp_TRAIN = Y_speech_standard[train_index[k], :] Y_envelope_sp_TEST = Y_speech_standard[test_index[k], :] Y_lips_ap_TRAIN = Y_lips_standard[train_index[k], :] Y_lips_ap_TEST = Y_lips_standard[test_index[k], :] if polynomialReg == True: X_TRAIN= np.concatenate((X_TRAIN,np.power(X_TRAIN,2)),1) X_TEST = np.concatenate((X_TEST, np.power(X_TEST, 2)), 1) # training models and predict speechModel.fit(X_TRAIN, Y_envelope_sp_TRAIN) lipsModel.fit(X_TRAIN, Y_lips_ap_TRAIN) reconstructed_speech = speechModel.predict(X_TEST) reconstructed_lips = lipsModel.predict(X_TEST) predictions_speech[test_index[k], :] = reconstructed_speech speech[test_index[k], :] = Y_envelope_sp_TEST predictions_lips[test_index[k], :] = reconstructed_lips lips[test_index[k], :] = Y_lips_ap_TEST # computing scores speech_score = evaluate(speech.T, predictions_speech.T, 'corrcoeff') lips_score = evaluate(lips.T, predictions_lips.T, 'corrcoeff') return speech_score, lips_score, predictions_speech, predictions_lips, speech, lips
elif "Second" in interval: sl = slice(len(eps) // 3, 2 * len(eps) // 3) elif "Third" in interval: sl = slice(2 * len(eps) // 3, None) elif "235-530" in interval: sl = slice(ix[0], ix[1]) else: assert interval == "All" sl = slice(None) eps = eps[sl] info = eps.info time = eps.times s_ix = slice(ix[0], ix[1]) c1, c2 = list(eps.event_id.keys()) clf = make_pipeline( Scaler(eps.info), Vectorizer(), PCA(0.9999), LinearModel( LogisticRegression( solver=solver, penalty="l1", max_iter=1000, multi_class="auto", random_state=seed, )), ) time_decode = SlidingEstimator(clf, n_jobs=n_jobs, scoring="roc_auc", verbose=False)
p + '_task-fearcond_cues_singletrials-epo.fif')) # downsample if necessary if epo.info['sfreq'] != param['testresampfreq']: epo = epo.resample(param['testresampfreq']) # Drop bad trials and get indices goodtrials = np.where(df['badtrial'] == 0)[0] # Get external data for this part df = df.iloc[goodtrials] epo = epo[goodtrials] # Standardize data before regression scale = Scaler(scalings='mean') # Says mean but is z score, see docs epo_z = mne.EpochsArray(scale.fit_transform(epo.get_data()), epo.info) betasnp = [] for idx, regvar in enumerate(regvars): # Standardize data df[regvar + '_z'] = scipy.stats.zscore(df[regvar]) epo.metadata = df.assign(Intercept=1) # Add an intercept for later # Perform regression names = ["Intercept"] + [regvar + '_z'] res = mne.stats.linear_regression(epo_z, epo.metadata[names], names=names)
def test_scaler(info, method): """Test methods of Scaler.""" raw = io.read_raw_fif(raw_fname) events = read_events(event_name) picks = pick_types(raw.info, meg=True, stim=False, ecg=False, eog=False, exclude='bads') picks = picks[1:13:3] epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), preload=True) epochs_data = epochs.get_data() y = epochs.events[:, -1] epochs_data_t = epochs_data.transpose([1, 0, 2]) if method in ('mean', 'median'): if not check_version('sklearn'): with pytest.raises(ImportError, match='No module'): Scaler(info, method) return if check_version('sklearn', '1.0'): # 1.0.dev0 is a problem pending # https://github.com/scikit-learn/scikit-learn/issues/19726 pytest.skip('Bug on sklear main as of 2021/03/19') if info: info = epochs.info scaler = Scaler(info, method) X = scaler.fit_transform(epochs_data, y) assert_equal(X.shape, epochs_data.shape) if method is None or isinstance(method, dict): sd = DEFAULTS['scalings'] if method is None else method stds = np.zeros(len(picks)) for key in ('mag', 'grad'): stds[pick_types(epochs.info, meg=key)] = 1. / sd[key] stds[pick_types(epochs.info, meg=False, eeg=True)] = 1. / sd['eeg'] means = np.zeros(len(epochs.ch_names)) elif method == 'mean': stds = np.array([np.std(ch_data) for ch_data in epochs_data_t]) means = np.array([np.mean(ch_data) for ch_data in epochs_data_t]) else: # median percs = np.array([ np.percentile(ch_data, [25, 50, 75]) for ch_data in epochs_data_t ]) stds = percs[:, 2] - percs[:, 0] means = percs[:, 1] assert_allclose(X * stds[:, np.newaxis] + means[:, np.newaxis], epochs_data, rtol=1e-12, atol=1e-20, err_msg=method) X2 = scaler.fit(epochs_data, y).transform(epochs_data) assert_array_equal(X, X2) # inverse_transform Xi = scaler.inverse_transform(X) assert_array_almost_equal(epochs_data, Xi) # Test init exception pytest.raises(ValueError, Scaler, None, None) pytest.raises(TypeError, scaler.fit, epochs, y) pytest.raises(TypeError, scaler.transform, epochs) epochs_bad = Epochs(raw, events, event_id, 0, 0.01, baseline=None, picks=np.arange(len(raw.ch_names))) # non-data chs scaler = Scaler(epochs_bad.info, None) pytest.raises(ValueError, scaler.fit, epochs_bad.get_data(), y)
def test_get_coef(): """Test the retrieval of linear coefficients (filters and patterns) from simple and pipeline estimators. """ from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X np.random.RandomState(0) n_samples, n_features = 20, 3 y = (np.arange(n_samples) % 2) * 2 - 1 w = np.random.randn(n_features, 1) X = w.dot(y[np.newaxis, :]).T + np.random.randn(n_samples, n_features) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert_true(expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline(Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for simple estimator and pipelines for clf in (LinearModel(), make_pipeline(StandardScaler(), LinearModel())): clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): coefs = clf.steps[-1][-1].model.coef_ else: coefs = clf.model.coef_ assert_array_equal(filters, coefs[0]) patterns = get_coef(clf, 'patterns_', False) assert_true(filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert_true(filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert_true(patterns[0] != patterns_inv[0]) # Check patterns values clf = make_pipeline(StandardScaler(), LinearModel(LinearRegression())) clf.fit(X, y) patterns = get_coef(clf, 'patterns_', True) mean, std = X.mean(0), X.std(0) X = (X - mean) / std coef = np.linalg.pinv(X.T.dot(X)).dot(X.T.dot(y)) patterns_manual = np.cov(X.T).dot(coef) assert_array_almost_equal(patterns, patterns_manual * std + mean) # Check with search_light and combination of preprocessing ending with sl: n_samples, n_features, n_times = 20, 3, 5 y = np.arange(n_samples) % 2 X = np.random.rand(n_samples, n_features, n_times) slider = SlidingEstimator(make_pipeline(StandardScaler(), LinearModel())) clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider) for clf in clfs: clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, [n_features, n_times]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t])
def raw_to_data(raw_edf, training=False, drop_rejects=True, subj=None): tmin, tmax = -0.5, 4. X, y = [], [] stim_code = dict([(32766, 1), (769, 2), (770, 3), (771, 4), (772, 5), (783, 6), (276, 7), (277, 8), (768, 9), (1023, 10), (1072, 11)]) if training: path = op.join('data_i2r', 'BCI_IV_2a', 'TrainingSet') if not training: path = op.join('data_i2r', 'BCI_IV_2a', 'TestingSet') label_path = op.join('data_i2r', 'BCI_IV_2a', 'true_labels') label_files_list = glob.glob(label_path + '/*E.mat') label_subj = [int(f.split('A0')[1][0]) for f in label_files_list] file_list = glob.glob(path + '/*.gdf') subjects = [int(f.split('A0')[1][0]) for f in file_list] if not training: label_subj = [ np.argwhere(np.array(label_subj) == subjects[i])[0][0] for i in range(len(subjects)) ] event_id = dict() events_from_edf = [] sampling_frequency = raw_edf._raw_extras[0]['max_samp'] original_event = raw_edf.find_edf_events() annot_list = list( zip(original_event[1], original_event[4], original_event[2])) # Remove rejected trials from events if drop_rejects: annot_list = pd.DataFrame(annot_list) rejected = annot_list[0].isin(annot_list[annot_list[2] == 1023][0]) accepted_trials_index = [True] * 288 ind = -1 for row in annot_list.itertuples(): if row[3] == 1023: rejected.loc[row[0] + 1] = True accepted_trials_index[ind] = False if row[3] == 768: ind = ind + 1 annot_list = annot_list[~rejected] annot_list = list(zip(annot_list[0], annot_list[1], annot_list[2])) events_from_edf.extend(annot_list) events_from_edf = np.array(events_from_edf) events_arr = np.zeros(events_from_edf.shape, dtype=int) for (i, i_event) in enumerate(events_from_edf): index = int((float(i_event[0])) * sampling_frequency) events_arr[i, :] = index, 0, stim_code[int(i_event[2])] i = i + 1 # strip channel names of "." characters raw_edf.rename_channels(lambda x: x.strip('.')) #create Event dictionary based on File events_in_edf = [event[2] for event in events_arr[:]] if (events_in_edf.__contains__(2)): event_id['LEFT_HAND'] = 2 if (events_in_edf.__contains__(3)): event_id['RIGHT_HAND'] = 3 if (events_in_edf.__contains__(4)): event_id['FEET'] = 4 if (events_in_edf.__contains__(5)): event_id['TONGUE'] = 5 if (events_in_edf.__contains__(6)): event_id['CUE_UNKNOWN'] = 6 # Apply band-pass filter raw_edf.filter(0., 38., fir_design='firwin', skip_by_annotation='edge') # 4-40Hz picks = pick_types(raw_edf.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads') # Read epochs (train will be done only between -0.5 and 4s) # Testing will be done with a running classifier epochs = Epochs(raw_edf, events_arr, event_id, tmin, tmax, proj=True, picks=picks, baseline=None, preload=True) X = epochs.get_data().transpose(0, 2, 1) X_shape = X.shape if training: scaler = Scaler(scalings='median').fit(X.flatten().reshape(-1, 1)) #scaler = MinMaxScaler(copy=True, feature_range=(-1, 1)).fit(X.flatten().reshape(-1,1)) pk.dump( scaler, open( "./shallow_convnet/subject{}_oscaler.pk".format( subjects[subj]), 'wb')) else: scaler = pk.load( open( "./shallow_convnet/subject{}_oscaler.pk".format( subjects[subj]), 'rb')) y = epochs.events[:, 2] - 2 X = scaler.transform(X.flatten().reshape(-1, 1)) X = X.reshape(X_shape) if training: return X, y, scaler else: y = sio.loadmat( label_files_list[label_subj[subj]])['classlabel'].flatten() y = np.array([i - 1 for i in y]) if drop_rejects: y_drop = [i for i in range(288) if not accepted_trials_index[i]] y = np.delete(y, y_drop, None) return X, y
# Vectorizer # ^^^^^^^^^^ # Scikit-learn API provides functionality to chain transformers and estimators # by using :class:`sklearn.pipeline.Pipeline`. We can construct decoding # pipelines and perform cross-validation and grid-search. However scikit-learn # transformers and estimators generally expect 2D data # (n_samples * n_features), whereas MNE transformers typically output data # with a higher dimensionality # (e.g. n_samples * n_channels * n_frequencies * n_times). A Vectorizer # therefore needs to be applied between the MNE and the scikit-learn steps # like: # Uses all MEG sensors and time points as separate classification # features, so the resulting filters used are spatio-temporal clf = make_pipeline( Scaler(epochs.info), Vectorizer(), LogisticRegression(solver='liblinear') # liblinear is faster than lbfgs ) scores = cross_val_multiscore(clf, X, y, cv=5, n_jobs=None) # Mean scores across cross-validation splits score = np.mean(scores, axis=0) print('Spatio-temporal: %0.1f%%' % (100 * score, )) # %% # PSDEstimator # ^^^^^^^^^^^^ # The :class:`mne.decoding.PSDEstimator` # computes the power spectral density (PSD) using the multitaper
d = method(d, axis=-1) return d df = pd.DataFrame() df['label'] = label df['session'] = session df['max'] = crash(data) df['min'] = crash(data, np.min) df # %% n_jobs = 48 clf = make_pipeline( Scaler(info), Vectorizer(), StandardScaler(), LinearModel(LogisticRegression(solver='liblinear')), ) time_decoder = SlidingEstimator( clf, scoring='roc_auc', n_jobs=n_jobs, ) y = df['label'].values.copy() y[y == 2] = 0 scores = cross_val_multiscore(
def test_get_coef(): """Test getting linear coefficients (filters/patterns) from estimators.""" from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn import svm from sklearn.linear_model import Ridge, LinearRegression from sklearn.model_selection import GridSearchCV lm_classification = LinearModel() assert (is_classifier(lm_classification)) lm_regression = LinearModel(Ridge()) assert (is_regressor(lm_regression)) parameters = {'kernel': ['linear'], 'C': [1, 10]} lm_gs_classification = LinearModel( GridSearchCV(svm.SVC(), parameters, cv=2, refit=True, iid=False, n_jobs=1)) assert (is_classifier(lm_gs_classification)) lm_gs_regression = LinearModel( GridSearchCV(svm.SVR(), parameters, cv=2, refit=True, iid=False, n_jobs=1)) assert (is_regressor(lm_gs_regression)) # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert (expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline(Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for classification/regression estimators and pipelines rng = np.random.RandomState(0) for clf in (lm_regression, lm_gs_classification, make_pipeline(StandardScaler(), lm_classification), make_pipeline(StandardScaler(), lm_gs_regression)): # generate some categorical/continuous data # according to the type of estimator. if is_classifier(clf): n, n_features = 1000, 3 X = rng.rand(n, n_features) y = np.arange(n) % 2 else: X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1) y = np.ravel(y) clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): if hasattr(clf.steps[-1][-1].model, 'best_estimator_'): # Linear Model with GridSearchCV coefs = clf.steps[-1][-1].model.best_estimator_.coef_ else: # Standard Linear Model coefs = clf.steps[-1][-1].model.coef_ else: if hasattr(clf.model, 'best_estimator_'): # Linear Model with GridSearchCV coefs = clf.model.best_estimator_.coef_ else: # Standard Linear Model coefs = clf.model.coef_ if coefs.ndim == 2 and coefs.shape[0] == 1: coefs = coefs[0] assert_array_equal(filters, coefs) patterns = get_coef(clf, 'patterns_', False) assert (filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert (filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert (patterns[0] != patterns_inv[0]) # Check with search_light and combination of preprocessing ending with sl: slider = SlidingEstimator(make_pipeline(StandardScaler(), lm_regression)) X = np.transpose([X, -X], [1, 2, 0]) # invert X across 2 time samples clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider) for clf in clfs: clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, X.shape[1:]) # the two time samples get inverted patterns assert_equal(patterns[0, 0], -patterns[0, 1]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t]) # Check patterns with more than 1 regressor for n_features in [1, 5]: for n_targets in [1, 3]: X, Y, A = _make_data(n_samples=3000, n_features=5, n_targets=3) lm = LinearModel(LinearRegression()).fit(X, Y) assert_array_equal(lm.filters_.shape, lm.patterns_.shape) assert_array_equal(lm.filters_.shape, [3, 5]) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) lm = LinearModel(Ridge(alpha=1)).fit(X, Y) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) # Check can pass fitting parameters lm.fit(X, Y, sample_weight=np.ones(len(Y)))
# This approach classifies the data within, rather than across, subjects. for chroma in ['hbo', 'hbr']: st_scores = [] for sub in subjects: bids_path = dataset.update(subject=sub) raw_haemo, epochs = epoch_preprocessing(bids_path) epochs.pick(chroma) X = epochs.get_data() y = epochs.events[:, 2] clf = make_pipeline(Scaler(epochs.info), Vectorizer(), LogisticRegression(solver='liblinear')) scores = 100 * cross_val_multiscore( clf, X, y, cv=5, n_jobs=1, scoring='roc_auc') st_scores.append(np.mean(scores, axis=0)) print(f"Average spatio-temporal ROC-AUC performance ({chroma}) = " f"{np.round(np.mean(st_scores))} % ({np.round(np.std(st_scores))})") # %% # Conclusion # ---------- # # Data were epoched then decoding was performed on the hbo signal and the hbr
allSubs_face.append(face_avg) mne.viz.plot_compare_evokeds(allSubs_scene, picks=[ 6, 7, 12, 13, 22 ]) # Plotting all the individual evoked arrays (up to 10) mne.viz.plot_compare_evokeds(allSubs_face, picks=[ 6, 7, 12, 13, 22 ]) # Plotting all the individual evoked arrays (up to 10) # If using 01, 02, Oz, PO3 and PO4. 6,7,12,13,22. These values are not z-scored. Standardize when extracting in offline_analysis. from mne.decoding import (SlidingEstimator, GeneralizingEstimator, Scaler, cross_val_multiscore, LinearModel, get_coef, Vectorizer, CSP, PSDEstimator) scale_test = Scaler(scalings='mean').fit_transform( allSubs_MNE[0][2].get_data()) # What is going on w. sub 15 # allSubs_MNE[2][2]['face'].average().plot(spatial_colors=True, time_unit='s',picks=[7]) #%% # Plot showing where the correct predictions are located pr. run. n_it = 5 pred_run = np.reshape(sub13n['RT_correct_NFtest_pred'], [n_it, 200]) for run in range(n_it): plt.figure(run) plt.bar(np.arange(200), pred_run[run, :])
def test_get_coef(): """Test getting linear coefficients (filters/patterns) from estimators.""" from sklearn.base import TransformerMixin, BaseEstimator from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import Ridge, LinearRegression lm = LinearModel() assert (is_classifier(lm)) lm = LinearModel(Ridge()) assert (is_regressor(lm)) # Define a classifier, an invertible transformer and an non-invertible one. class Clf(BaseEstimator): def fit(self, X, y): return self class NoInv(TransformerMixin): def fit(self, X, y): return self def transform(self, X): return X class Inv(NoInv): def inverse_transform(self, X): return X X, y, A = _make_data(n_samples=2000, n_features=3, n_targets=1) # I. Test inverse function # Check that we retrieve the right number of inverse functions even if # there are nested pipelines good_estimators = [ (1, make_pipeline(Inv(), Clf())), (2, make_pipeline(Inv(), Inv(), Clf())), (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())), ] for expected_n, est in good_estimators: est.fit(X, y) assert (expected_n == len(_get_inverse_funcs(est))) bad_estimators = [ Clf(), # no preprocessing Inv(), # final estimator isn't classifier make_pipeline(NoInv(), Clf()), # first step isn't invertible make_pipeline(Inv(), make_pipeline(Inv(), NoInv()), Clf()), # nested step isn't invertible ] for est in bad_estimators: est.fit(X, y) invs = _get_inverse_funcs(est) assert_equal(invs, list()) # II. Test get coef for simple estimator and pipelines for clf in (lm, make_pipeline(StandardScaler(), lm)): clf.fit(X, y) # Retrieve final linear model filters = get_coef(clf, 'filters_', False) if hasattr(clf, 'steps'): coefs = clf.steps[-1][-1].model.coef_ else: coefs = clf.model.coef_ assert_array_equal(filters, coefs[0]) patterns = get_coef(clf, 'patterns_', False) assert (filters[0] != patterns[0]) n_chans = X.shape[1] assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans]) # Inverse transform linear model filters_inv = get_coef(clf, 'filters_', True) assert (filters[0] != filters_inv[0]) patterns_inv = get_coef(clf, 'patterns_', True) assert (patterns[0] != patterns_inv[0]) # Check with search_light and combination of preprocessing ending with sl: slider = SlidingEstimator(make_pipeline(StandardScaler(), lm)) X = np.transpose([X, -X], [1, 2, 0]) # invert X across 2 time samples clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider) for clf in clfs: clf.fit(X, y) for inverse in (True, False): patterns = get_coef(clf, 'patterns_', inverse) filters = get_coef(clf, 'filters_', inverse) assert_array_equal(filters.shape, patterns.shape, X.shape[1:]) # the two time samples get inverted patterns assert_equal(patterns[0, 0], -patterns[0, 1]) for t in [0, 1]: assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False), filters[:, t]) # Check patterns with more than 1 regressor for n_features in [1, 5]: for n_targets in [1, 3]: X, Y, A = _make_data(n_samples=5000, n_features=5, n_targets=3) lm = LinearModel(LinearRegression()).fit(X, Y) assert_array_equal(lm.filters_.shape, lm.patterns_.shape) assert_array_equal(lm.filters_.shape, [3, 5]) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) lm = LinearModel(Ridge(alpha=1)).fit(X, Y) assert_array_almost_equal(A, lm.patterns_.T, decimal=2) # Check can pass fitting parameters lm.fit(X, Y, sample_weight=np.ones(len(Y)))
def decoding(band,regularization,tmin,tmax,n_fold,subject_name, savepath): data_path = "./ProcessedData/Final_" eeg="_processed-epo.fif" features="_Features-epo.fif" sfreq=100 n_delays = int((tmax - tmin) * sfreq) + 1 T= [51, 61, 71, 81, 91, 101, 111, 121, 131, 141, 151] results_speech= np.zeros((len(regularization),len(T)))# each raw is the results' vector for one regularization parameter results_lips= np.zeros((len(regularization),len(T)))# each raw is the results' vector for one regularization parameter results_speech_all_sub={} results_lips_all_sub={} predictions_lips_all_sub={} predictions_speech_all_sub={} for s in subject_name: print('subject '+str(s)) X_orig = use_FreqBand(mne.read_epochs(data_path+s+eeg),band) Features_orig = use_FreqBand(mne.read_epochs(data_path + s + features),band) if band=='original': X_orig=X_orig.get_data() # 3d array (N_trial, N_channel, N_time) Y_envelope_sp_orig=Features_orig.get_data()[:,0,:] # 2d array (N_trial, N_time) Y_lips_ap_orig=Features_orig.get_data()[:,2,:] # 2d array (N_trial, N_time) else: X_orig= np.mean(X_orig.data,2) # 3d array (N_trial, N_channel, N_time) #averaging power across frequencies Y_envelope_sp_orig=np.mean(Features_orig.data[:,0,:,:],1) Y_lips_ap_orig=np.mean(Features_orig.data[:,2,:,:],1) time = mne.read_epochs(data_path + s + features).times # 1d array (N_time) channels = mne.read_epochs(data_path + s + eeg).ch_names predictions_speech = np.zeros((Y_envelope_sp_orig.shape[0], 200, len(T),len(regularization))) predictions_lips = np.zeros((Y_lips_ap_orig.shape[0],200,len(T),len(regularization))) train_index, test_index = k_fold(Y_envelope_sp_orig,n_fold) # define index for train and test for each of the k folds #data standardizers eegScaler= Scaler(scalings='mean') speechScaler= Scaler(scalings='mean') lipsScaler = Scaler(scalings='mean') scores_speech = np.zeros((n_fold,)) scores_lips = np.zeros((n_fold,)) coefs_speech = np.zeros((n_fold, X_orig.shape[1], n_delays)) patterns_speech = coefs_speech.copy() coefs_lips = np.zeros((n_fold, X_orig.shape[1], n_delays)) patterns_lips = coefs_lips.copy() for i, r in enumerate(regularization): rf_speech = RField(tmin, tmax, sfreq, feature_names=channels, scoring='r2', patterns=True, estimator=r) rf_lips = RField(tmin, tmax, sfreq, feature_names=channels, scoring='r2', patterns=True, estimator=r) print('reg parameter #'+str(i)) for j, t_start in enumerate(T): ##estracting the temporal interval of interest t_end= t_start+200 X = X_orig[:,:,t_start:t_end] #only the eeg window is shifting Y_envelope_sp = Y_envelope_sp_orig[:,101:301] Y_lips_ap = Y_lips_ap_orig[:,101:301] for k in range(0,n_fold): #####COPY X AND Y VARIABLES X_standard=np.zeros((X.shape)) Y_lips_ap_standard=np.zeros((Y_lips_ap.shape)) Y_envelope_sp_standard = np.zeros((Y_envelope_sp.shape)) #standardazing data X_standard[train_index[k], :, :] = eegScaler.fit_transform(X[train_index[k], :, :]) X_standard[test_index[k], :, :] = eegScaler.transform(X[test_index[k], :, :]) Y_lips_ap_standard[train_index[k], :] = lipsScaler.fit_transform(Y_lips_ap[train_index[k], :])[:,:,0] Y_lips_ap_standard[test_index[k], :] = lipsScaler.transform(Y_lips_ap[test_index[k], :])[:,:,0] Y_envelope_sp_standard[train_index[k], :] = speechScaler.fit_transform(Y_envelope_sp[train_index[k], :])[:,:,0] Y_envelope_sp_standard[test_index[k], :] = speechScaler.transform(Y_envelope_sp[test_index[k], :])[:,:,0] #shaping data as desired by the decoding model (receptive field function) X_standard = np.rollaxis(X_standard, 2, 0) Y_envelope_sp_standard = np.rollaxis(Y_envelope_sp_standard, 1, 0) Y_lips_ap_standard = np.rollaxis(Y_lips_ap_standard, 1, 0) X_TRAIN= X_standard[:,train_index[k],:] X_TEST= X_standard[:,test_index[k],:] Y_envelope_sp_TRAIN = Y_envelope_sp_standard[:,train_index[k]] Y_envelope_sp_TEST = Y_envelope_sp_standard[:,test_index[k]] Y_lips_ap_TRAIN = Y_lips_ap_standard[:,train_index[k]] Y_lips_ap_TEST = Y_lips_ap_standard[:,test_index[k]] #training models and predict rf_speech.fit(X_TRAIN,Y_envelope_sp_TRAIN) rf_lips.fit(X_TRAIN,Y_lips_ap_TRAIN) reconstructed_speech = rf_speech.predict(X_TEST) reconstructed_lips = rf_lips.predict(X_TEST) predictions_speech[test_index[k],:,j,i]=reconstructed_speech.T predictions_lips[test_index[k],:,j,i]=reconstructed_lips.T #computing scores tmp_score_speech=0 tmp_score_lips = 0 for n, rec in enumerate(reconstructed_speech[:,:,0].T): tmp_score_speech = tmp_score_speech + mean_squared_error(Y_envelope_sp_TEST[:,n]/max(abs(Y_envelope_sp_TEST[:,n])), rec/max(abs(rec))) scores_speech[k]= tmp_score_speech/(n+1) for n, rec in enumerate(reconstructed_lips[:,:,0].T): tmp_score_lips = tmp_score_lips + mean_squared_error(Y_lips_ap_TEST[:, n]/max(abs(Y_lips_ap_TEST[:, n])), rec/max(abs(rec))) scores_lips[k] = tmp_score_lips / (n+1) # scores_speech[k] = rf_speech.score(X_TEST,Y_envelope_sp_TEST)[0] # scores_lips[k] = rf_speech.score(X_TEST,Y_lips_ap_TEST)[0] ##coef_ is shape (n_outputs, n_features, n_delays). # coefs_speech[k] = rf_speech.coef_[0, :, :] # patterns_speech[k] = rf_speech.patterns_[0, :, :] # coefs_lips[k] = rf_lips.coef_[0, :, :] # patterns_lips[k] = rf_lips.patterns_[0, :, :] # mean_coefs_lips = coefs_lips.mean(axis=0) # mean_patterns_lips = patterns_lips.mean(axis=0) mean_scores_lips = scores_lips.mean(axis=0) # mean_coefs_speech = coefs_speech.mean(axis=0) # mean_patterns_speech = patterns_speech.mean(axis=0) mean_scores_speech = scores_speech.mean(axis=0) #saving results for the i-th reg parameter and j-th time lag results_speech[i, j] = mean_scores_speech results_lips[i, j] = mean_scores_lips results_speech_all_sub[s]=results_speech.copy() results_lips_all_sub[s]=results_lips.copy() predictions_speech_all_sub[s]=predictions_speech.copy() predictions_lips_all_sub[s]=predictions_lips.copy() np.save(savepath+'/results_speech_all_sub',results_speech_all_sub) np.save(savepath+'/results_lips_all_sub',results_lips_all_sub) np.save(savepath+'/predictions_speech_all_sub',predictions_speech_all_sub) np.save(savepath+'/predictions_lips_all_sub',predictions_lips_all_sub) tmp_results_speech = [] tmp_results_lips = [] for N, s in enumerate(subject_name): if N ==0: tmp_results_speech= np.asarray(results_speech_all_sub[s]) tmp_results_lips= np.asarray(results_lips_all_sub[s]) tmp_results_speech=np.dstack((tmp_results_speech, np.asarray(results_speech_all_sub[s]))) tmp_results_lips=np.dstack((tmp_results_lips,np.asarray(results_lips_all_sub[s]))) # computing grand average and standard deviation for each time lag GAVG_sp = np.reshape(np.mean(tmp_results_speech,2),(len(regularization),11)) GAVG_lip = np.reshape(np.mean(tmp_results_lips,2),(len(regularization),11)) GAVG_sp_std = np.reshape(np.std(tmp_results_speech,2),(len(regularization),11)) GAVG_lip_std = np.reshape(np.std(tmp_results_lips,2),(len(regularization),11)) np.save(savepath+'/GAVG_sp',GAVG_sp) np.save(savepath+'/GAVG_lip',GAVG_lip) np.save(savepath+'/GAVG_sp_std',GAVG_sp_std) np.save(savepath+'/GAVG_lip_std',GAVG_lip_std) ####PLOTTING RESULTS##### T = np.reshape(T, (1, len(T))) pp.figure(0) for n, r in enumerate(regularization): pp.errorbar((T[0,:] - 100) * 10, GAVG_sp[n,:], yerr=GAVG_sp_std[n,:]) pp.legend(regularization) pp.title('speech MSE') sfig=savepath+'/GAVG_specch.png' pp.savefig(fname=sfig) pp.figure(1) for n, r in enumerate(regularization): pp.errorbar((T[0, :] - 100) * 10, GAVG_lip[n, :], yerr=GAVG_lip_std[n, :]) pp.legend(regularization) pp.title('lips MSE') sfig = savepath +'/GAVG_lips.png' pp.savefig(fname=sfig) #pp.show() print('bla')