Ejemplo n.º 1
0
def test_scaler():
    """Test methods of Scaler."""
    raw = io.read_raw_fif(raw_fname, preload=False, add_eeg_ref=False)
    events = read_events(event_name)
    picks = pick_types(raw.info, meg=True, stim=False, ecg=False,
                       eog=False, exclude='bads')
    picks = picks[1:13:3]

    epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks,
                    baseline=(None, 0), preload=True, add_eeg_ref=False)
    epochs_data = epochs.get_data()
    scaler = Scaler(epochs.info)
    y = epochs.events[:, -1]

    # np invalid divide value warnings
    with warnings.catch_warnings(record=True):
        X = scaler.fit_transform(epochs_data, y)
        assert_true(X.shape == epochs_data.shape)
        X2 = scaler.fit(epochs_data, y).transform(epochs_data)

    assert_array_equal(X2, X)

    # Test inverse_transform
    with warnings.catch_warnings(record=True):  # invalid value in mult
        Xi = scaler.inverse_transform(X, y)
    assert_array_equal(epochs_data, Xi)

    # Test init exception
    assert_raises(ValueError, scaler.fit, epochs, y)
    assert_raises(ValueError, scaler.transform, epochs, y)
Ejemplo n.º 2
0
def test_scaler():
    """Test methods of Scaler
    """
    raw = io.Raw(raw_fname, preload=False)
    events = read_events(event_name)
    picks = pick_types(raw.info, meg=True, stim=False, ecg=False,
                       eog=False, exclude='bads')
    picks = picks[1:13:3]

    epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks,
                    baseline=(None, 0), preload=True)
    epochs_data = epochs.get_data()
    scaler = Scaler(epochs.info)
    y = epochs.events[:, -1]

    # np invalid divide value warnings
    with warnings.catch_warnings(record=True):
        X = scaler.fit_transform(epochs_data, y)
        assert_true(X.shape == epochs_data.shape)
        X2 = scaler.fit(epochs_data, y).transform(epochs_data)

    assert_array_equal(X2, X)

    # Test inverse_transform
    with warnings.catch_warnings(record=True):  # invalid value in mult
        Xi = scaler.inverse_transform(X, y)
    assert_array_equal(epochs_data, Xi)

    # Test init exception
    assert_raises(ValueError, scaler.fit, epochs, y)
    assert_raises(ValueError, scaler.transform, epochs, y)
Ejemplo n.º 3
0
def standard_scaling(data, scalings="mean", log=False):

    if log:
        data = np.log(data + np.finfo(np.float32).eps)

    if scalings in ["mean", "median"]:
        scaler = Scaler(scalings=scalings)
        data = scaler.fit_transform(data)
    else:
        raise ValueError("scalings should be mean or median")

    return data
Ejemplo n.º 4
0
def test_get_coef_multiclass_full(n_classes, n_channels, n_times):
    """Test a full example with pattern extraction."""
    from sklearn.pipeline import make_pipeline
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import StratifiedKFold
    data = np.zeros((10 * n_classes, n_channels, n_times))
    # Make only the first channel informative
    for ii in range(n_classes):
        data[ii * 10:(ii + 1) * 10, 0] = ii
    events = np.zeros((len(data), 3), int)
    events[:, 0] = np.arange(len(events))
    events[:, 2] = data[:, 0, 0]
    info = create_info(n_channels, 1000., 'eeg')
    epochs = EpochsArray(data, info, events, tmin=0)
    clf = make_pipeline(
        Scaler(epochs.info),
        Vectorizer(),
        LinearModel(LogisticRegression(random_state=0, multi_class='ovr')),
    )
    scorer = 'roc_auc_ovr_weighted'
    time_gen = GeneralizingEstimator(clf, scorer, verbose=True)
    X = epochs.get_data()
    y = epochs.events[:, 2]
    n_splits = 3
    cv = StratifiedKFold(n_splits=n_splits)
    scores = cross_val_multiscore(time_gen, X, y, cv=cv, verbose=True)
    want = (n_splits, )
    if n_times > 1:
        want += (n_times, n_times)
    assert scores.shape == want
    assert_array_less(0.8, scores)
    clf.fit(X, y)
    patterns = get_coef(clf, 'patterns_', inverse_transform=True)
    assert patterns.shape == (n_classes, n_channels, n_times)
    assert_allclose(patterns[:, 1:], 0., atol=1e-7)  # no other channels useful
Ejemplo n.º 5
0
def test_scaler():
    """Test methods of Scaler."""
    raw = io.read_raw_fif(raw_fname)
    events = read_events(event_name)
    picks = pick_types(raw.info, meg=True, stim=False, ecg=False,
                       eog=False, exclude='bads')
    picks = picks[1:13:3]

    epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks,
                    baseline=(None, 0), preload=True)
    epochs_data = epochs.get_data()
    y = epochs.events[:, -1]

    methods = (None, dict(mag=5, grad=10, eeg=20), 'mean', 'median')
    infos = (epochs.info, epochs.info, None, None)
    epochs_data_t = epochs_data.transpose([1, 0, 2])
    for method, info in zip(methods, infos):
        if method == 'median' and not check_version('sklearn', '0.17'):
            assert_raises(ValueError, Scaler, info, method)
            continue
        if method == 'mean' and not check_version('sklearn', ''):
            assert_raises(ImportError, Scaler, info, method)
            continue
        scaler = Scaler(info, method)
        X = scaler.fit_transform(epochs_data, y)
        assert_equal(X.shape, epochs_data.shape)
        if method is None or isinstance(method, dict):
            sd = DEFAULTS['scalings'] if method is None else method
            stds = np.zeros(len(picks))
            for key in ('mag', 'grad'):
                stds[pick_types(epochs.info, meg=key)] = 1. / sd[key]
            stds[pick_types(epochs.info, meg=False, eeg=True)] = 1. / sd['eeg']
            means = np.zeros(len(epochs.ch_names))
        elif method == 'mean':
            stds = np.array([np.std(ch_data) for ch_data in epochs_data_t])
            means = np.array([np.mean(ch_data) for ch_data in epochs_data_t])
        else:  # median
            percs = np.array([np.percentile(ch_data, [25, 50, 75])
                              for ch_data in epochs_data_t])
            stds = percs[:, 2] - percs[:, 0]
            means = percs[:, 1]
        assert_allclose(X * stds[:, np.newaxis] + means[:, np.newaxis],
                        epochs_data, rtol=1e-12, atol=1e-20, err_msg=method)

        X2 = scaler.fit(epochs_data, y).transform(epochs_data)
        assert_array_equal(X, X2)

        # inverse_transform
        Xi = scaler.inverse_transform(X)
        assert_array_almost_equal(epochs_data, Xi)

    # Test init exception
    assert_raises(ValueError, Scaler, None, None)
    assert_raises(ValueError, scaler.fit, epochs, y)
    assert_raises(ValueError, scaler.transform, epochs)
    epochs_bad = Epochs(raw, events, event_id, 0, 0.01,
                        picks=np.arange(len(raw.ch_names)))  # non-data chs
    scaler = Scaler(epochs_bad.info, None)
    assert_raises(ValueError, scaler.fit, epochs_bad.get_data(), y)
Ejemplo n.º 6
0
def test_get_coef_multiclass(n_features, n_targets):
    """Test get_coef on multiclass problems."""
    # Check patterns with more than 1 regressor
    from sklearn.linear_model import LinearRegression, Ridge
    from sklearn.pipeline import make_pipeline
    X, Y, A = _make_data(n_samples=30000,
                         n_features=n_features,
                         n_targets=n_targets)
    lm = LinearModel(LinearRegression()).fit(X, Y)
    assert_array_equal(lm.filters_.shape, lm.patterns_.shape)
    if n_targets == 1:
        want_shape = (n_features, )
    else:
        want_shape = (n_targets, n_features)
    assert_array_equal(lm.filters_.shape, want_shape)
    if n_features > 1 and n_targets > 1:
        assert_array_almost_equal(A, lm.patterns_.T, decimal=2)
    lm = LinearModel(Ridge(alpha=0))
    clf = make_pipeline(lm)
    clf.fit(X, Y)
    if n_features > 1 and n_targets > 1:
        assert_allclose(A, lm.patterns_.T, atol=2e-2)
    coef = get_coef(clf, 'patterns_', inverse_transform=True)
    assert_allclose(lm.patterns_, coef, atol=1e-5)

    # With epochs, scaler, and vectorizer (typical use case)
    X_epo = X.reshape(X.shape + (1, ))
    info = create_info(n_features, 1000., 'eeg')
    lm = LinearModel(Ridge(alpha=1))
    clf = make_pipeline(
        Scaler(info, scalings=dict(eeg=1.)),  # XXX adding this step breaks
        Vectorizer(),
        lm,
    )
    clf.fit(X_epo, Y)
    if n_features > 1 and n_targets > 1:
        assert_allclose(A, lm.patterns_.T, atol=2e-2)
    coef = get_coef(clf, 'patterns_', inverse_transform=True)
    lm_patterns_ = lm.patterns_[..., np.newaxis]
    assert_allclose(lm_patterns_, coef, atol=1e-5)

    # Check can pass fitting parameters
    lm.fit(X, Y, sample_weight=np.ones(len(Y)))
Ejemplo n.º 7
0
def test_scaler():
    """Test methods of Scaler."""
    raw = io.read_raw_fif(raw_fname)
    events = read_events(event_name)
    picks = pick_types(raw.info, meg=True, stim=False, ecg=False,
                       eog=False, exclude='bads')
    picks = picks[1:13:3]

    epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks,
                    baseline=(None, 0), preload=True)
    epochs_data = epochs.get_data()
    scaler = Scaler(epochs.info)
    y = epochs.events[:, -1]

    X = scaler.fit_transform(epochs_data, y)
    assert_true(X.shape == epochs_data.shape)
    X2 = scaler.fit(epochs_data, y).transform(epochs_data)
    assert_array_equal(X2, X)
    # these should be across time
    assert_allclose(X.std(axis=-2), 1.)
    assert_allclose(X.mean(axis=-2), 0., atol=1e-12)

    # Test inverse_transform
    Xi = scaler.inverse_transform(X, y)
    assert_array_almost_equal(epochs_data, Xi)

    for kwargs in [{'with_mean': False}, {'with_std': False}]:
        scaler = Scaler(epochs.info, **kwargs)
        scaler.fit(epochs_data, y)
        assert_array_almost_equal(
            X, scaler.inverse_transform(scaler.transform(X)))
    # Test init exception
    assert_raises(ValueError, scaler.fit, epochs, y)
    assert_raises(ValueError, scaler.transform, epochs, y)
Ejemplo n.º 8
0
    def get_Xy(self,
               slicers=None,
               df=None,
               dropbad=True,
               downsample=None,
               crop=None,
               scaling=None,
               verbose=None):
        """Get training data and target values.

		Parameters
		----------
		slicers : dict of slices
			The slicers on which the data is filtered. See self.filter(). If None, the full dataset will be used.
			Defaults to None.
		df : instance of pandas.core.frame.DataFrame
			The dataframe to use as input. If None, the dataframe loaded with self.load() will be used.
			Defaults to None.
		dropbad : bool
			Whether artifacts will be rejected or not.
			Defaults to True.
		downsample : int or None.
			The resampling rate. If None, the data will not be downsampled.
			Defaults to None.
		crop : tuple of float (tmin, tmax) or None
			The crop time interval from epochs object, in seconds. If None, epochs will not be cropped.
			Defaults to None.
		scaling : dict, string or None
			The scaling method to be applied to data channel wise. See: http://martinos.org/mne/stable/generated/mne.decoding.Scaler.html
			Defaults to None.
		verbose : bool, string, int or None
			The verbosity level, as detailed at: https://www.martinos.org/mne/stable/generated/mne.set_log_level.html.
			Defaults to None.

		Returns
		-------
		X : instance of numpy.ndarray
			The training data.
		y : instance of numpy.ndarray
			The target values.
		epochs : instance of mne.epochs.Epochs
			The MNE Epochs object.

		"""

        if df is None: df = self.df
        if slicers: df = self.filter(slicers, df)
        raw = self.raw(df, verbose=verbose)
        raw.filter(0.5, 40, method='iir')  # bandpass filter
        events = mne.find_events(raw, verbose=verbose)
        event_id = {'distractor': 1, 'target': 2}
        # Reject epochs were the signal exceeds 100uV in EEG channels or 200uV in the EOG channel
        reject = {'eeg': 100e-6, 'eog': 200e-6} if dropbad else None
        # See: http://martinos.org/mne/stable/generated/mne.Epochs.html
        epochs = mne.Epochs(raw,
                            events,
                            event_id=event_id,
                            tmin=self.tmin,
                            tmax=self.tmax,
                            baseline=(self.tmin, 0),
                            reject=reject,
                            verbose=verbose)
        epochs.load_data()
        if dropbad: epochs.drop_bad()
        if downsample: epochs.resample(downsample, npad='auto')
        if crop: epochs.crop(*crop)
        epochs.pick_types(eeg=True)
        X = epochs.get_data()
        y = epochs.events[:, -1] == 2  # binary events
        # See: http://martinos.org/mne/stable/generated/mne.decoding.Scaler.html
        X = Scaler(epochs.info, scaling).fit_transform(X, y)
        return X, y, epochs
Ejemplo n.º 9
0
def raw_to_data(raw_edf, training=True, drop_rejects=True, subj=None):

    tmin, tmax = 0, 4.

    stim_code = dict([(32766,1),(769,2), (770,3), (771,4), (772,5),(783,6),(276,7),(277,8),(768,9),
                      (1023,10),(1072,11)])
    
    if training:
        path = op.join('data_i2r', 'BCI_IV_2a', 'TrainingSet')
    if not training:
        path = op.join('data_i2r', 'BCI_IV_2a', 'TestingSet')
        label_path = op.join('data_i2r', 'BCI_IV_2a', 'true_labels')
        label_files_list = glob.glob(label_path + '/*E.mat')
        label_subj = [ int(f.split('A0')[1][0]) for f in label_files_list ]
        
    file_list = glob.glob(path + '/*.gdf')
    subjects = [ int(f.split('A0')[1][0]) for f in file_list ]
    
    if not training:
        label_subj = [ np.argwhere(np.array(label_subj)==subjects[i])[0][0]
                    for i in range(len(subjects))]
    
    event_id = dict()
    events_from_edf = []
    sampling_frequency = raw_edf._raw_extras[0]['max_samp']
    original_event = raw_edf.find_edf_events()
    annot_list = list(zip(original_event[1], original_event[4], original_event[2]))
    
    # Remove rejected trials from events
    if drop_rejects:
        annot_list = pd.DataFrame(annot_list)
        rejected = annot_list[0].isin(annot_list[annot_list[2] == 1023][0])
        accepted_trials_index = [True] * 288
        ind=-1
        for row in annot_list.itertuples():
            if row[3] == 1023:
                rejected.loc[row[0]+1] = True
                accepted_trials_index[ind] = False
            if row[3] == 768:
                ind = ind + 1
            
    annot_list = annot_list[~rejected]
    annot_list = list(zip(annot_list[0], annot_list[1], annot_list[2]))
    
    events_from_edf.extend(annot_list)
    events_from_edf = np.array(events_from_edf)
    
    events_arr = np.zeros(events_from_edf.shape, dtype=int)
    for (i, i_event) in enumerate(events_from_edf):

        index = int((float(i_event[0])) * sampling_frequency)
        events_arr[i,:] = index,0,stim_code[int(i_event[2])]
        i=i+1

    # strip channel names of "." characters
    raw_edf.rename_channels(lambda x: x.strip('.'))
    #create Event dictionary based on File
    events_in_edf = [event[2] for event in events_arr[:]]
    if(events_in_edf.__contains__(2)):
        event_id['LEFT_HAND'] = 2
    if (events_in_edf.__contains__(3)):
        event_id['RIGHT_HAND'] = 3
    if (events_in_edf.__contains__(4)):
        event_id['FEET'] = 4
    if (events_in_edf.__contains__(5)):
        event_id['TONGUE'] = 5
    if (events_in_edf.__contains__(6)):
        event_id['CUE_UNKNOWN'] = 6

    # Read epochs (train will be done only between -0.5 and 4s)
    # Testing will be done with a running classifier

    # raw_edf.filter(0., 40., fir_design='firwin', skip_by_annotation='edge')   # 4-40Hz
    picks = pick_types(raw_edf.info, meg=False, eeg=True, 
                       stim=False, eog=False, exclude='bads')
    epochs = Epochs(raw_edf, events_arr, event_id, tmin, tmax, proj=True, picks=picks,
            baseline=None, preload=True)
    y = epochs.events[:, 2] - 2

    filter_data = []
    #filter_bank = [(4.,40.)]
    filter_bank = [(4.,8.),(8.,12.),(12.,16.),(16.,20.),(20.,24.),(24.,28.),(28.,32.),(32.,36.),(36.,40)]
    for _filter in filter_bank:
        #filter_data.append(np.abs(signal.hilbert(epochs.copy().filter(_filter[0], _filter[1], fir_design='firwin').get_data())))
        filter_data.append(epochs.copy().filter(_filter[0], _filter[1], fir_design='firwin').get_data())
    filter_data = np.array(filter_data)
        
    if training:
        oScaler = Scaler(scalings='mean').fit(filter_data.flatten().reshape(-1,1))
        #oScaler = MinMaxScaler(copy=True, feature_range=(-1, 1)).fit(filter_data.flatten().reshape(-1,1))
        pk.dump(oScaler,open("./fb/subject{}_filter_oscaler.pk".format(subjects[subj]),'wb'))
    else:
        oScaler = pk.load(open("./fb/subject{}_filter_oscaler.pk".format(subjects[subj]),'rb'))
    
    shape = filter_data.shape
    filter_data = oScaler.transform(filter_data.flatten().reshape(-1,1))
    filter_data = filter_data.reshape(shape)
    filter_data = filter_data.transpose(1,3,2,0) # 273, 1001, 22, 10

    # Augment and reshape data into image
    filter_data = filter_data.transpose(2,0,1,3) # 22, 273, 1001, 10
    filter_data = np.split(filter_data,[1,6,13,18,21])
    empty_ch = np.zeros(filter_data[0].shape)
    filter_data = np.vstack([empty_ch,empty_ch,empty_ch,filter_data[0],empty_ch,empty_ch,empty_ch,
                             empty_ch,filter_data[1],empty_ch,
                             filter_data[2],
                             empty_ch,filter_data[3],empty_ch,
                             empty_ch,empty_ch,filter_data[4],empty_ch,empty_ch,
                             empty_ch,empty_ch,empty_ch,filter_data[5],empty_ch,empty_ch,empty_ch])
    
    filter_data = filter_data.transpose(1,2,0,3) # 273, 1001, 42, 10
    filter_data = filter_data.reshape(filter_data.shape[0],filter_data.shape[1],6,7,filter_data.shape[3]) # 273, 1001, 6, 7, 10
    
    if training:
        return filter_data, y
    else:
        y = sio.loadmat(label_files_list[label_subj[subj]])['classlabel'].flatten()
        y = np.array([ i - 1 for i in y ])
        if drop_rejects:
            y_drop = [ i for i in range(288) if not accepted_trials_index[i] ]
            y = np.delete(y, y_drop, None)
        return filter_data, y
def decoding_withKfold(X, Y_speech, Y_lips, n_fold, train_index, test_index,
                       examples, feature):

    predictions_speech = np.zeros((Y_speech.shape))
    speech = np.zeros((Y_speech.shape))
    predictions_lips = np.zeros((Y_lips.shape))
    lips = np.zeros((Y_lips.shape))

    scores_speech = np.zeros((n_fold, ))

    for k in range(0, n_fold):

        eegScaler = MultiChannelScaler(scalings='mean')
        speechScaler = MultiChannelScaler(scalings='mean')
        lipsScaler = MultiChannelScaler(scalings='mean')

        speechModel = LReg()
        lipsModel = LReg()

        #####COPY X AND Y VARIABLES

        X_standard = np.zeros((X.shape))
        Y_lips_standard = np.zeros((Y_lips.shape))
        Y_speech_standard = np.zeros((Y_speech.shape))

        # standardazing data
        X_standard[train_index[k], :, :] = eegScaler.fit_transform(
            X[train_index[k], :, :])
        X_standard[test_index[k], :, :] = eegScaler.transform(
            X[test_index[k], :, :])

        Y_lips_standard[train_index[k], :] = lipsScaler.fit_transform(
            Y_lips[train_index[k], :]).squeeze()
        Y_lips_standard[test_index[k], :] = lipsScaler.transform(
            Y_lips[test_index[k], :]).squeeze()

        Y_speech_standard[train_index[k], :] = speechScaler.fit_transform(
            Y_speech[train_index[k], :]).squeeze()
        Y_speech_standard[test_index[k], :] = speechScaler.transform(
            Y_speech[test_index[k], :]).squeeze()

        X_TRAIN = X_standard[train_index[k], :, :]
        X_TEST = X_standard[test_index[k], :, :]

        Y_envelope_sp_TRAIN = Y_speech_standard[train_index[k], :]
        Y_envelope_sp_TEST = Y_speech_standard[test_index[k], :]

        Y_lips_ap_TRAIN = Y_lips_standard[train_index[k], :]
        Y_lips_ap_TEST = Y_lips_standard[test_index[k], :]

        #X_train and test now are (#trials,#channnels,#timepoints)
        n_trial = X_TRAIN.shape[0]
        n_trial_test = X_TEST.shape[0]
        n_ch = X_TRAIN.shape[1]
        trial_length = X_TRAIN.shape[2]

        if examples == 'are_Trials':
            X_TRAIN_tmp = np.zeros((X_TRAIN.shape[0], n_ch * trial_length))
            X_TEST_tmp = np.zeros((X_TEST.shape[0], n_ch * trial_length))
            for i in range(0, n_ch):
                X_TRAIN_tmp[:, i * trial_length:(i + 1) *
                            trial_length] = X_TRAIN[:, i, :]
                X_TEST_tmp[:, i * trial_length:(i + 1) *
                           trial_length] = X_TEST[:, i, :]
            X_TRAIN = X_TRAIN_tmp
            X_TEST = X_TEST_tmp

        elif examples == 'are_Time':
            X_TRAIN_tmp = np.zeros((n_trial * trial_length, n_ch))
            X_TEST_tmp = np.zeros((n_trial_test * trial_length, n_ch))
            Y_envelope_sp_TRAIN_tmp = np.zeros((n_trial * trial_length, ))
            Y_envelope_sp_TEST_tmp = np.zeros((n_trial_test * trial_length, ))
            Y_lips_ap_TRAIN_tmp = np.zeros((n_trial * trial_length, ))
            Y_lips_ap_TEST_tmp = np.zeros((n_trial_test * trial_length, ))
            for i in range(0, n_trial):
                X_TRAIN_tmp[i * trial_length:(i + 1) *
                            trial_length, :] = X_TRAIN[i, :, :].T
                Y_envelope_sp_TRAIN_tmp[i * trial_length:(i + 1) *
                                        trial_length] = Y_envelope_sp_TRAIN[
                                            i, :]
                Y_lips_ap_TRAIN_tmp[i * trial_length:(i + 1) *
                                    trial_length] = Y_lips_ap_TRAIN[i, :]
                if i < X_TEST.shape[0]:  #test trials are less than train
                    X_TEST_tmp[i * trial_length:(i + 1) *
                               trial_length, :] = X_TEST[i, :, :].T
                    Y_envelope_sp_TEST_tmp[i * trial_length:(i + 1) *
                                           trial_length] = Y_envelope_sp_TEST[
                                               i, :]
                    Y_lips_ap_TEST_tmp[i * trial_length:(i + 1) *
                                       trial_length] = Y_lips_ap_TEST[i, :]
            X_TRAIN = X_TRAIN_tmp
            X_TEST = X_TEST_tmp
            Y_envelope_sp_TRAIN = Y_envelope_sp_TRAIN_tmp
            Y_envelope_sp_TEST = Y_envelope_sp_TEST_tmp
            Y_lips_ap_TRAIN = Y_lips_ap_TRAIN_tmp
            Y_lips_ap_TEST = Y_lips_ap_TEST_tmp

            if feature == 'pca':
                [pca, n_comp] = pca_decomposition(X_TRAIN)
                X_TRAIN = pca.transform(X_TRAIN)[:, :n_comp]
                X_TEST = pca.transform(X_TEST)[:, :n_comp]
            if feature == 'Kpca':
                [pca, n_comp] = kernel_pca_decomposition(X_TRAIN)
                X_TRAIN = pca.transform(X_TRAIN)[:, :n_comp]
                X_TEST = pca.transform(X_TEST)[:, :n_comp]
            if feature == 'ica':
                ICA_decomposition
                [ica, selected_comps] = ICA_decomposition(X_TRAIN)
                X_TRAIN = ica.transform(X_TRAIN)[:,
                                                 selected_comps.astype('int')]
                X_TEST = ica.transform(X_TEST)[:, selected_comps.astype('int')]

            if feature == 'derivative1':
                de1 = np.diff(X_TRAIN, axis=0) / 0.01
                de1 = np.concatenate((np.zeros((1, de1.shape[1])), de1),
                                     axis=0)
                for i in range(0, de1.shape[0], trial_length):
                    de1[i, :] = np.zeros((1, de1.shape[1]))
                X_TRAIN = np.concatenate((X_TRAIN, de1), 1)

                de1 = np.diff(X_TEST, axis=0) / 0.01
                de1 = np.concatenate((np.zeros((1, de1.shape[1])), de1),
                                     axis=0)
                for i in range(0, de1.shape[0], trial_length):
                    de1[i, :] = np.zeros((1, de1.shape[1]))
                X_TEST = np.concatenate((X_TEST, de1), 1)

            if feature == 'derivative2':
                de1 = np.diff(X_TRAIN, axis=0) / 0.01
                de1 = np.concatenate((np.zeros((1, de1.shape[1])), de1),
                                     axis=0)
                for i in range(0, de1.shape[0], trial_length):
                    de1[i, :] = np.zeros((1, de1.shape[1]))

                de2 = np.diff(de1, axis=0)
                de2 = np.concatenate((np.zeros((1, de2.shape[1])), de2),
                                     axis=0)
                for i in range(0, de2.shape[0], trial_length):
                    de2[i, :] = np.zeros((1, de2.shape[1]))
                    de2[i + 1, :] = np.zeros((1, de2.shape[1]))

                X_TRAIN = np.concatenate((np.concatenate(
                    (X_TRAIN, de1), 1), de2), 1)

                de1 = np.diff(X_TEST, axis=0) / 0.01
                de1 = np.concatenate((np.zeros((1, de1.shape[1])), de1),
                                     axis=0)
                for i in range(0, de1.shape[0], trial_length):
                    de1[i, :] = np.zeros((1, de1.shape[1]))

                de2 = np.diff(de1, axis=0)
                de2 = np.concatenate((np.zeros((1, de2.shape[1])), de2),
                                     axis=0)
                for i in range(0, de2.shape[0], trial_length):
                    de2[i, :] = np.zeros((1, de2.shape[1]))
                    de2[i + 1, :] = np.zeros((1, de2.shape[1]))

                X_TEST = np.concatenate((np.concatenate(
                    (X_TEST, de1), 1), de2), 1)

        if feature == 'polynomial':
            X_TRAIN = np.concatenate((X_TRAIN, np.power(X_TRAIN, 2)), 1)
            X_TEST = np.concatenate((X_TEST, np.power(X_TEST, 2)), 1)

        # training models and predict
        speechModel.fit(X_TRAIN, Y_envelope_sp_TRAIN)
        lipsModel.fit(X_TRAIN, Y_lips_ap_TRAIN)

        reconstructed_speech = speechModel.predict(X_TEST)
        reconstructed_lips = lipsModel.predict(X_TEST)

        if examples == 'are_Time':
            reconstructed_speech_tmp = np.zeros((n_trial_test, trial_length))
            reconstructed_lips_tmp = np.zeros((n_trial_test, trial_length))
            Y_envelope_sp_TEST_tmp = np.zeros((n_trial_test, trial_length))
            Y_lips_ap_TEST_tmp = np.zeros((n_trial_test, trial_length))
            t = 0
            for i in range(0, len(reconstructed_speech), trial_length):
                reconstructed_speech_tmp[
                    t, :] = reconstructed_speech[i:i + trial_length]
                reconstructed_lips_tmp[t, :] = reconstructed_lips[i:i +
                                                                  trial_length]
                Y_envelope_sp_TEST_tmp[t, :] = Y_envelope_sp_TEST[i:i +
                                                                  trial_length]
                Y_lips_ap_TEST_tmp[t, :] = Y_lips_ap_TEST[i:i + trial_length]
                t += 1
            reconstructed_speech = reconstructed_speech_tmp
            reconstructed_lips = reconstructed_lips_tmp
            Y_envelope_sp_TEST = Y_envelope_sp_TEST_tmp
            Y_lips_ap_TEST = Y_lips_ap_TEST_tmp

        predictions_speech[test_index[k], :] = reconstructed_speech
        speech[test_index[k], :] = Y_envelope_sp_TEST

        predictions_lips[test_index[k], :] = reconstructed_lips
        lips[test_index[k], :] = Y_lips_ap_TEST

    # computing scores
    speech_score = evaluate(speech.T, predictions_speech.T, 'corrcoeff')
    lips_score = evaluate(lips.T, predictions_lips.T, 'corrcoeff')

    return speech_score, lips_score, predictions_speech, predictions_lips, speech, lips
Ejemplo n.º 11
0
#
# Vectorizer
# ^^^^^^^^^^
# Scikit-learn API provides functionality to chain transformers and estimators
# by using :class:`sklearn.pipeline.Pipeline`. We can construct decoding
# pipelines and perform cross-validation and grid-search. However scikit-learn
# transformers and estimators generally expect 2D data
# (n_samples * n_features), whereas MNE transformers typically output data
# with a higher dimensionality
# (e.g. n_samples * n_channels * n_frequencies * n_times). A Vectorizer
# therefore needs to be applied between the MNE and the scikit-learn steps
# like:

# Uses all MEG sensors and time points as separate classification
# features, so the resulting filters used are spatio-temporal
clf = make_pipeline(Scaler(epochs.info),
                    Vectorizer(),
                    LogisticRegression(solver='lbfgs'))

scores = cross_val_multiscore(clf, X, y, cv=5, n_jobs=1)

# Mean scores across cross-validation splits
score = np.mean(scores, axis=0)
print('Spatio-temporal: %0.1f%%' % (100 * score,))

###############################################################################
# PSDEstimator
# ^^^^^^^^^^^^
# The :class:`mne.decoding.PSDEstimator`
# computes the power spectral density (PSD) using the multitaper
# method. It takes a 3D array as input, converts it into 2D and computes the
def decoding_withKfold(X,Y_speech,Y_lips,n_fold,train_index,test_index,polynomialReg):

    predictions_speech= np.zeros((Y_speech.shape))
    speech = np.zeros((Y_speech.shape))
    predictions_lips=  np.zeros((Y_lips.shape))
    lips = np.zeros((Y_lips.shape))


    scores_speech=np.zeros((n_fold,))

    for k in range(0, n_fold):

        eegScaler = Scaler()
        speechScaler = Scaler()
        lipsScaler = Scaler()

        speechModel = LReg()
        lipsModel = LReg()

        #####COPY X AND Y VARIABLES

        X_standard = np.zeros((X.shape))
        Y_lips_standard = np.zeros((Y_lips.shape))
        Y_speech_standard = np.zeros((Y_speech.shape))

        # standardazing data
        X_standard[train_index[k], :] = eegScaler.fit_transform(X[train_index[k], :])
        X_standard[test_index[k], :] = eegScaler.transform(X[test_index[k], :])

        Y_lips_standard[train_index[k], :] = lipsScaler.fit_transform(Y_lips[train_index[k], :])
        Y_lips_standard[test_index[k], :] = lipsScaler.transform(Y_lips[test_index[k], :])

        Y_speech_standard[train_index[k], :] = speechScaler.fit_transform(Y_speech[train_index[k], :])
        Y_speech_standard[test_index[k], :] = speechScaler.transform(Y_speech[test_index[k], :])

        X_TRAIN = X_standard[ train_index[k], :]
        X_TEST = X_standard[ test_index[k], :]

        Y_envelope_sp_TRAIN = Y_speech_standard[train_index[k], :]
        Y_envelope_sp_TEST = Y_speech_standard[test_index[k], :]

        Y_lips_ap_TRAIN = Y_lips_standard[train_index[k], :]
        Y_lips_ap_TEST = Y_lips_standard[test_index[k], :]


        if polynomialReg == True:
            X_TRAIN= np.concatenate((X_TRAIN,np.power(X_TRAIN,2)),1)
            X_TEST = np.concatenate((X_TEST, np.power(X_TEST, 2)), 1)

        # training models and predict
        speechModel.fit(X_TRAIN, Y_envelope_sp_TRAIN)
        lipsModel.fit(X_TRAIN, Y_lips_ap_TRAIN)

        reconstructed_speech = speechModel.predict(X_TEST)
        reconstructed_lips = lipsModel.predict(X_TEST)

        predictions_speech[test_index[k], :] = reconstructed_speech
        speech[test_index[k], :] = Y_envelope_sp_TEST

        predictions_lips[test_index[k], :] = reconstructed_lips
        lips[test_index[k], :] = Y_lips_ap_TEST

    # computing scores
    speech_score = evaluate(speech.T, predictions_speech.T, 'corrcoeff')
    lips_score = evaluate(lips.T, predictions_lips.T, 'corrcoeff')

    return speech_score, lips_score, predictions_speech, predictions_lips, speech, lips
Ejemplo n.º 13
0
 elif "Second" in interval:
     sl = slice(len(eps) // 3, 2 * len(eps) // 3)
 elif "Third" in interval:
     sl = slice(2 * len(eps) // 3, None)
 elif "235-530" in interval:
     sl = slice(ix[0], ix[1])
 else:
     assert interval == "All"
     sl = slice(None)
 eps = eps[sl]
 info = eps.info
 time = eps.times
 s_ix = slice(ix[0], ix[1])
 c1, c2 = list(eps.event_id.keys())
 clf = make_pipeline(
     Scaler(eps.info),
     Vectorizer(),
     PCA(0.9999),
     LinearModel(
         LogisticRegression(
             solver=solver,
             penalty="l1",
             max_iter=1000,
             multi_class="auto",
             random_state=seed,
         )),
 )
 time_decode = SlidingEstimator(clf,
                                n_jobs=n_jobs,
                                scoring="roc_auc",
                                verbose=False)
Ejemplo n.º 14
0
            p + '_task-fearcond_cues_singletrials-epo.fif'))

    # downsample if necessary
    if epo.info['sfreq'] != param['testresampfreq']:
        epo = epo.resample(param['testresampfreq'])

    # Drop bad trials and get indices
    goodtrials = np.where(df['badtrial'] == 0)[0]

    # Get external data for this part
    df = df.iloc[goodtrials]

    epo = epo[goodtrials]

    # Standardize data before regression
    scale = Scaler(scalings='mean')  # Says mean but is z score, see docs
    epo_z = mne.EpochsArray(scale.fit_transform(epo.get_data()), epo.info)

    betasnp = []
    for idx, regvar in enumerate(regvars):
        # Standardize data
        df[regvar + '_z'] = scipy.stats.zscore(df[regvar])

        epo.metadata = df.assign(Intercept=1)  # Add an intercept for later

        # Perform regression
        names = ["Intercept"] + [regvar + '_z']
        res = mne.stats.linear_regression(epo_z,
                                          epo.metadata[names],
                                          names=names)
Ejemplo n.º 15
0
def test_scaler(info, method):
    """Test methods of Scaler."""
    raw = io.read_raw_fif(raw_fname)
    events = read_events(event_name)
    picks = pick_types(raw.info,
                       meg=True,
                       stim=False,
                       ecg=False,
                       eog=False,
                       exclude='bads')
    picks = picks[1:13:3]

    epochs = Epochs(raw,
                    events,
                    event_id,
                    tmin,
                    tmax,
                    picks=picks,
                    baseline=(None, 0),
                    preload=True)
    epochs_data = epochs.get_data()
    y = epochs.events[:, -1]

    epochs_data_t = epochs_data.transpose([1, 0, 2])
    if method in ('mean', 'median'):
        if not check_version('sklearn'):
            with pytest.raises(ImportError, match='No module'):
                Scaler(info, method)
            return
        if check_version('sklearn', '1.0'):
            # 1.0.dev0 is a problem pending
            # https://github.com/scikit-learn/scikit-learn/issues/19726
            pytest.skip('Bug on sklear main as of 2021/03/19')

    if info:
        info = epochs.info
    scaler = Scaler(info, method)
    X = scaler.fit_transform(epochs_data, y)
    assert_equal(X.shape, epochs_data.shape)
    if method is None or isinstance(method, dict):
        sd = DEFAULTS['scalings'] if method is None else method
        stds = np.zeros(len(picks))
        for key in ('mag', 'grad'):
            stds[pick_types(epochs.info, meg=key)] = 1. / sd[key]
        stds[pick_types(epochs.info, meg=False, eeg=True)] = 1. / sd['eeg']
        means = np.zeros(len(epochs.ch_names))
    elif method == 'mean':
        stds = np.array([np.std(ch_data) for ch_data in epochs_data_t])
        means = np.array([np.mean(ch_data) for ch_data in epochs_data_t])
    else:  # median
        percs = np.array([
            np.percentile(ch_data, [25, 50, 75]) for ch_data in epochs_data_t
        ])
        stds = percs[:, 2] - percs[:, 0]
        means = percs[:, 1]
    assert_allclose(X * stds[:, np.newaxis] + means[:, np.newaxis],
                    epochs_data,
                    rtol=1e-12,
                    atol=1e-20,
                    err_msg=method)

    X2 = scaler.fit(epochs_data, y).transform(epochs_data)
    assert_array_equal(X, X2)

    # inverse_transform
    Xi = scaler.inverse_transform(X)
    assert_array_almost_equal(epochs_data, Xi)

    # Test init exception
    pytest.raises(ValueError, Scaler, None, None)
    pytest.raises(TypeError, scaler.fit, epochs, y)
    pytest.raises(TypeError, scaler.transform, epochs)
    epochs_bad = Epochs(raw,
                        events,
                        event_id,
                        0,
                        0.01,
                        baseline=None,
                        picks=np.arange(len(raw.ch_names)))  # non-data chs
    scaler = Scaler(epochs_bad.info, None)
    pytest.raises(ValueError, scaler.fit, epochs_bad.get_data(), y)
Ejemplo n.º 16
0
def test_get_coef():
    """Test the retrieval of linear coefficients (filters and patterns) from
    simple and pipeline estimators.
    """
    from sklearn.base import TransformerMixin, BaseEstimator
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import LinearRegression

    # Define a classifier, an invertible transformer and an non-invertible one.

    class Clf(BaseEstimator):
        def fit(self, X, y):
            return self

    class NoInv(TransformerMixin):
        def fit(self, X, y):
            return self

        def transform(self, X):
            return X

    class Inv(NoInv):
        def inverse_transform(self, X):
            return X

    np.random.RandomState(0)
    n_samples, n_features = 20, 3
    y = (np.arange(n_samples) % 2) * 2 - 1
    w = np.random.randn(n_features, 1)
    X = w.dot(y[np.newaxis, :]).T + np.random.randn(n_samples, n_features)

    # I. Test inverse function

    # Check that we retrieve the right number of inverse functions even if
    # there are nested pipelines
    good_estimators = [
        (1, make_pipeline(Inv(), Clf())),
        (2, make_pipeline(Inv(), Inv(), Clf())),
        (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())),
    ]

    for expected_n, est in good_estimators:
        est.fit(X, y)
        assert_true(expected_n == len(_get_inverse_funcs(est)))

    bad_estimators = [
        Clf(),  # no preprocessing
        Inv(),  # final estimator isn't classifier
        make_pipeline(NoInv(), Clf()),  # first step isn't invertible
        make_pipeline(Inv(), make_pipeline(Inv(), NoInv()),
                      Clf()),  # nested step isn't invertible
    ]
    for est in bad_estimators:
        est.fit(X, y)
        invs = _get_inverse_funcs(est)
        assert_equal(invs, list())

    # II. Test get coef for simple estimator and pipelines
    for clf in (LinearModel(), make_pipeline(StandardScaler(), LinearModel())):
        clf.fit(X, y)
        # Retrieve final linear model
        filters = get_coef(clf, 'filters_', False)
        if hasattr(clf, 'steps'):
            coefs = clf.steps[-1][-1].model.coef_
        else:
            coefs = clf.model.coef_
        assert_array_equal(filters, coefs[0])
        patterns = get_coef(clf, 'patterns_', False)
        assert_true(filters[0] != patterns[0])
        n_chans = X.shape[1]
        assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans])

    # Inverse transform linear model
    filters_inv = get_coef(clf, 'filters_', True)
    assert_true(filters[0] != filters_inv[0])
    patterns_inv = get_coef(clf, 'patterns_', True)
    assert_true(patterns[0] != patterns_inv[0])

    # Check patterns values
    clf = make_pipeline(StandardScaler(), LinearModel(LinearRegression()))
    clf.fit(X, y)
    patterns = get_coef(clf, 'patterns_', True)
    mean, std = X.mean(0), X.std(0)
    X = (X - mean) / std
    coef = np.linalg.pinv(X.T.dot(X)).dot(X.T.dot(y))
    patterns_manual = np.cov(X.T).dot(coef)
    assert_array_almost_equal(patterns, patterns_manual * std + mean)

    # Check with search_light and combination of preprocessing ending with sl:
    n_samples, n_features, n_times = 20, 3, 5
    y = np.arange(n_samples) % 2
    X = np.random.rand(n_samples, n_features, n_times)
    slider = SlidingEstimator(make_pipeline(StandardScaler(), LinearModel()))

    clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider)
    for clf in clfs:
        clf.fit(X, y)
        for inverse in (True, False):
            patterns = get_coef(clf, 'patterns_', inverse)
            filters = get_coef(clf, 'filters_', inverse)
            assert_array_equal(filters.shape, patterns.shape,
                               [n_features, n_times])
    for t in [0, 1]:
        assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False),
                           filters[:, t])
Ejemplo n.º 17
0
def raw_to_data(raw_edf, training=False, drop_rejects=True, subj=None):

    tmin, tmax = -0.5, 4.
    X, y = [], []

    stim_code = dict([(32766, 1), (769, 2), (770, 3), (771, 4), (772, 5),
                      (783, 6), (276, 7), (277, 8), (768, 9), (1023, 10),
                      (1072, 11)])

    if training:
        path = op.join('data_i2r', 'BCI_IV_2a', 'TrainingSet')
    if not training:
        path = op.join('data_i2r', 'BCI_IV_2a', 'TestingSet')
        label_path = op.join('data_i2r', 'BCI_IV_2a', 'true_labels')
        label_files_list = glob.glob(label_path + '/*E.mat')
        label_subj = [int(f.split('A0')[1][0]) for f in label_files_list]

    file_list = glob.glob(path + '/*.gdf')
    subjects = [int(f.split('A0')[1][0]) for f in file_list]

    if not training:
        label_subj = [
            np.argwhere(np.array(label_subj) == subjects[i])[0][0]
            for i in range(len(subjects))
        ]

    event_id = dict()
    events_from_edf = []
    sampling_frequency = raw_edf._raw_extras[0]['max_samp']
    original_event = raw_edf.find_edf_events()
    annot_list = list(
        zip(original_event[1], original_event[4], original_event[2]))

    # Remove rejected trials from events
    if drop_rejects:
        annot_list = pd.DataFrame(annot_list)
        rejected = annot_list[0].isin(annot_list[annot_list[2] == 1023][0])
        accepted_trials_index = [True] * 288
        ind = -1
        for row in annot_list.itertuples():
            if row[3] == 1023:
                rejected.loc[row[0] + 1] = True
                accepted_trials_index[ind] = False
            if row[3] == 768:
                ind = ind + 1

    annot_list = annot_list[~rejected]
    annot_list = list(zip(annot_list[0], annot_list[1], annot_list[2]))

    events_from_edf.extend(annot_list)
    events_from_edf = np.array(events_from_edf)

    events_arr = np.zeros(events_from_edf.shape, dtype=int)
    for (i, i_event) in enumerate(events_from_edf):

        index = int((float(i_event[0])) * sampling_frequency)

        events_arr[i, :] = index, 0, stim_code[int(i_event[2])]
        i = i + 1

    # strip channel names of "." characters
    raw_edf.rename_channels(lambda x: x.strip('.'))
    #create Event dictionary based on File
    events_in_edf = [event[2] for event in events_arr[:]]
    if (events_in_edf.__contains__(2)):
        event_id['LEFT_HAND'] = 2
    if (events_in_edf.__contains__(3)):
        event_id['RIGHT_HAND'] = 3
    if (events_in_edf.__contains__(4)):
        event_id['FEET'] = 4
    if (events_in_edf.__contains__(5)):
        event_id['TONGUE'] = 5
    if (events_in_edf.__contains__(6)):
        event_id['CUE_UNKNOWN'] = 6

    # Apply band-pass filter
    raw_edf.filter(0., 38., fir_design='firwin',
                   skip_by_annotation='edge')  # 4-40Hz

    picks = pick_types(raw_edf.info,
                       meg=False,
                       eeg=True,
                       stim=False,
                       eog=False,
                       exclude='bads')

    # Read epochs (train will be done only between -0.5 and 4s)
    # Testing will be done with a running classifier

    epochs = Epochs(raw_edf,
                    events_arr,
                    event_id,
                    tmin,
                    tmax,
                    proj=True,
                    picks=picks,
                    baseline=None,
                    preload=True)

    X = epochs.get_data().transpose(0, 2, 1)
    X_shape = X.shape
    if training:
        scaler = Scaler(scalings='median').fit(X.flatten().reshape(-1, 1))
        #scaler = MinMaxScaler(copy=True, feature_range=(-1, 1)).fit(X.flatten().reshape(-1,1))
        pk.dump(
            scaler,
            open(
                "./shallow_convnet/subject{}_oscaler.pk".format(
                    subjects[subj]), 'wb'))
    else:
        scaler = pk.load(
            open(
                "./shallow_convnet/subject{}_oscaler.pk".format(
                    subjects[subj]), 'rb'))

    y = epochs.events[:, 2] - 2
    X = scaler.transform(X.flatten().reshape(-1, 1))
    X = X.reshape(X_shape)

    if training:
        return X, y, scaler
    else:
        y = sio.loadmat(
            label_files_list[label_subj[subj]])['classlabel'].flatten()
        y = np.array([i - 1 for i in y])
        if drop_rejects:
            y_drop = [i for i in range(288) if not accepted_trials_index[i]]
            y = np.delete(y, y_drop, None)
        return X, y
Ejemplo n.º 18
0
# Vectorizer
# ^^^^^^^^^^
# Scikit-learn API provides functionality to chain transformers and estimators
# by using :class:`sklearn.pipeline.Pipeline`. We can construct decoding
# pipelines and perform cross-validation and grid-search. However scikit-learn
# transformers and estimators generally expect 2D data
# (n_samples * n_features), whereas MNE transformers typically output data
# with a higher dimensionality
# (e.g. n_samples * n_channels * n_frequencies * n_times). A Vectorizer
# therefore needs to be applied between the MNE and the scikit-learn steps
# like:

# Uses all MEG sensors and time points as separate classification
# features, so the resulting filters used are spatio-temporal
clf = make_pipeline(
    Scaler(epochs.info),
    Vectorizer(),
    LogisticRegression(solver='liblinear')  # liblinear is faster than lbfgs
)

scores = cross_val_multiscore(clf, X, y, cv=5, n_jobs=None)

# Mean scores across cross-validation splits
score = np.mean(scores, axis=0)
print('Spatio-temporal: %0.1f%%' % (100 * score, ))

# %%
# PSDEstimator
# ^^^^^^^^^^^^
# The :class:`mne.decoding.PSDEstimator`
# computes the power spectral density (PSD) using the multitaper
Ejemplo n.º 19
0
    d = method(d, axis=-1)
    return d


df = pd.DataFrame()
df['label'] = label
df['session'] = session
df['max'] = crash(data)
df['min'] = crash(data, np.min)

df

# %%
n_jobs = 48
clf = make_pipeline(
    Scaler(info),
    Vectorizer(),
    StandardScaler(),
    LinearModel(LogisticRegression(solver='liblinear')),
)

time_decoder = SlidingEstimator(
    clf,
    scoring='roc_auc',
    n_jobs=n_jobs,
)

y = df['label'].values.copy()
y[y == 2] = 0

scores = cross_val_multiscore(
Ejemplo n.º 20
0
def test_get_coef():
    """Test getting linear coefficients (filters/patterns) from estimators."""
    from sklearn.base import TransformerMixin, BaseEstimator
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn import svm
    from sklearn.linear_model import Ridge, LinearRegression
    from sklearn.model_selection import GridSearchCV

    lm_classification = LinearModel()
    assert (is_classifier(lm_classification))

    lm_regression = LinearModel(Ridge())
    assert (is_regressor(lm_regression))

    parameters = {'kernel': ['linear'], 'C': [1, 10]}
    lm_gs_classification = LinearModel(
        GridSearchCV(svm.SVC(),
                     parameters,
                     cv=2,
                     refit=True,
                     iid=False,
                     n_jobs=1))
    assert (is_classifier(lm_gs_classification))

    lm_gs_regression = LinearModel(
        GridSearchCV(svm.SVR(),
                     parameters,
                     cv=2,
                     refit=True,
                     iid=False,
                     n_jobs=1))
    assert (is_regressor(lm_gs_regression))

    # Define a classifier, an invertible transformer and an non-invertible one.

    class Clf(BaseEstimator):
        def fit(self, X, y):
            return self

    class NoInv(TransformerMixin):
        def fit(self, X, y):
            return self

        def transform(self, X):
            return X

    class Inv(NoInv):
        def inverse_transform(self, X):
            return X

    X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1)

    # I. Test inverse function

    # Check that we retrieve the right number of inverse functions even if
    # there are nested pipelines
    good_estimators = [
        (1, make_pipeline(Inv(), Clf())),
        (2, make_pipeline(Inv(), Inv(), Clf())),
        (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())),
    ]

    for expected_n, est in good_estimators:
        est.fit(X, y)
        assert (expected_n == len(_get_inverse_funcs(est)))

    bad_estimators = [
        Clf(),  # no preprocessing
        Inv(),  # final estimator isn't classifier
        make_pipeline(NoInv(), Clf()),  # first step isn't invertible
        make_pipeline(Inv(), make_pipeline(Inv(), NoInv()),
                      Clf()),  # nested step isn't invertible
    ]
    for est in bad_estimators:
        est.fit(X, y)
        invs = _get_inverse_funcs(est)
        assert_equal(invs, list())

    # II. Test get coef for classification/regression estimators and pipelines
    rng = np.random.RandomState(0)
    for clf in (lm_regression, lm_gs_classification,
                make_pipeline(StandardScaler(), lm_classification),
                make_pipeline(StandardScaler(), lm_gs_regression)):

        # generate some categorical/continuous data
        # according to the type of estimator.
        if is_classifier(clf):
            n, n_features = 1000, 3
            X = rng.rand(n, n_features)
            y = np.arange(n) % 2
        else:
            X, y, A = _make_data(n_samples=1000, n_features=3, n_targets=1)
            y = np.ravel(y)

        clf.fit(X, y)

        # Retrieve final linear model
        filters = get_coef(clf, 'filters_', False)
        if hasattr(clf, 'steps'):
            if hasattr(clf.steps[-1][-1].model, 'best_estimator_'):
                # Linear Model with GridSearchCV
                coefs = clf.steps[-1][-1].model.best_estimator_.coef_
            else:
                # Standard Linear Model
                coefs = clf.steps[-1][-1].model.coef_
        else:
            if hasattr(clf.model, 'best_estimator_'):
                # Linear Model with GridSearchCV
                coefs = clf.model.best_estimator_.coef_
            else:
                # Standard Linear Model
                coefs = clf.model.coef_
        if coefs.ndim == 2 and coefs.shape[0] == 1:
            coefs = coefs[0]
        assert_array_equal(filters, coefs)
        patterns = get_coef(clf, 'patterns_', False)
        assert (filters[0] != patterns[0])
        n_chans = X.shape[1]
        assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans])

    # Inverse transform linear model
    filters_inv = get_coef(clf, 'filters_', True)
    assert (filters[0] != filters_inv[0])
    patterns_inv = get_coef(clf, 'patterns_', True)
    assert (patterns[0] != patterns_inv[0])

    # Check with search_light and combination of preprocessing ending with sl:
    slider = SlidingEstimator(make_pipeline(StandardScaler(), lm_regression))
    X = np.transpose([X, -X], [1, 2, 0])  # invert X across 2 time samples
    clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider)
    for clf in clfs:
        clf.fit(X, y)
        for inverse in (True, False):
            patterns = get_coef(clf, 'patterns_', inverse)
            filters = get_coef(clf, 'filters_', inverse)
            assert_array_equal(filters.shape, patterns.shape, X.shape[1:])
            # the two time samples get inverted patterns
            assert_equal(patterns[0, 0], -patterns[0, 1])
    for t in [0, 1]:
        assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False),
                           filters[:, t])

    # Check patterns with more than 1 regressor
    for n_features in [1, 5]:
        for n_targets in [1, 3]:
            X, Y, A = _make_data(n_samples=3000, n_features=5, n_targets=3)
            lm = LinearModel(LinearRegression()).fit(X, Y)
            assert_array_equal(lm.filters_.shape, lm.patterns_.shape)
            assert_array_equal(lm.filters_.shape, [3, 5])
            assert_array_almost_equal(A, lm.patterns_.T, decimal=2)
            lm = LinearModel(Ridge(alpha=1)).fit(X, Y)
            assert_array_almost_equal(A, lm.patterns_.T, decimal=2)

    # Check can pass fitting parameters
    lm.fit(X, Y, sample_weight=np.ones(len(Y)))
Ejemplo n.º 21
0
# This approach classifies the data within, rather than across, subjects.

for chroma in ['hbo', 'hbr']:

    st_scores = []
    for sub in subjects:

        bids_path = dataset.update(subject=sub)
        raw_haemo, epochs = epoch_preprocessing(bids_path)

        epochs.pick(chroma)

        X = epochs.get_data()
        y = epochs.events[:, 2]

        clf = make_pipeline(Scaler(epochs.info), Vectorizer(),
                            LogisticRegression(solver='liblinear'))

        scores = 100 * cross_val_multiscore(
            clf, X, y, cv=5, n_jobs=1, scoring='roc_auc')

        st_scores.append(np.mean(scores, axis=0))

    print(f"Average spatio-temporal ROC-AUC performance ({chroma}) = "
          f"{np.round(np.mean(st_scores))} % ({np.round(np.std(st_scores))})")

# %%
# Conclusion
# ----------
#
# Data were epoched then decoding was performed on the hbo signal and the hbr
Ejemplo n.º 22
0
    allSubs_face.append(face_avg)

mne.viz.plot_compare_evokeds(allSubs_scene, picks=[
    6, 7, 12, 13, 22
])  # Plotting all the individual evoked arrays (up to 10)
mne.viz.plot_compare_evokeds(allSubs_face, picks=[
    6, 7, 12, 13, 22
])  # Plotting all the individual evoked arrays (up to 10)

# If using 01, 02, Oz, PO3 and PO4. 6,7,12,13,22. These values are not z-scored. Standardize when extracting in offline_analysis.

from mne.decoding import (SlidingEstimator, GeneralizingEstimator, Scaler,
                          cross_val_multiscore, LinearModel, get_coef,
                          Vectorizer, CSP, PSDEstimator)

scale_test = Scaler(scalings='mean').fit_transform(
    allSubs_MNE[0][2].get_data())

# What is going on w. sub 15
# allSubs_MNE[2][2]['face'].average().plot(spatial_colors=True, time_unit='s',picks=[7])

#%%
# Plot showing where the correct predictions are located pr. run.

n_it = 5

pred_run = np.reshape(sub13n['RT_correct_NFtest_pred'], [n_it, 200])

for run in range(n_it):
    plt.figure(run)
    plt.bar(np.arange(200), pred_run[run, :])
Ejemplo n.º 23
0
def test_get_coef():
    """Test getting linear coefficients (filters/patterns) from estimators."""
    from sklearn.base import TransformerMixin, BaseEstimator
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import Ridge, LinearRegression

    lm = LinearModel()
    assert (is_classifier(lm))

    lm = LinearModel(Ridge())
    assert (is_regressor(lm))

    # Define a classifier, an invertible transformer and an non-invertible one.

    class Clf(BaseEstimator):
        def fit(self, X, y):
            return self

    class NoInv(TransformerMixin):
        def fit(self, X, y):
            return self

        def transform(self, X):
            return X

    class Inv(NoInv):
        def inverse_transform(self, X):
            return X

    X, y, A = _make_data(n_samples=2000, n_features=3, n_targets=1)

    # I. Test inverse function

    # Check that we retrieve the right number of inverse functions even if
    # there are nested pipelines
    good_estimators = [
        (1, make_pipeline(Inv(), Clf())),
        (2, make_pipeline(Inv(), Inv(), Clf())),
        (3, make_pipeline(Inv(), make_pipeline(Inv(), Inv()), Clf())),
    ]

    for expected_n, est in good_estimators:
        est.fit(X, y)
        assert (expected_n == len(_get_inverse_funcs(est)))

    bad_estimators = [
        Clf(),  # no preprocessing
        Inv(),  # final estimator isn't classifier
        make_pipeline(NoInv(), Clf()),  # first step isn't invertible
        make_pipeline(Inv(), make_pipeline(Inv(), NoInv()),
                      Clf()),  # nested step isn't invertible
    ]
    for est in bad_estimators:
        est.fit(X, y)
        invs = _get_inverse_funcs(est)
        assert_equal(invs, list())

    # II. Test get coef for simple estimator and pipelines
    for clf in (lm, make_pipeline(StandardScaler(), lm)):
        clf.fit(X, y)
        # Retrieve final linear model
        filters = get_coef(clf, 'filters_', False)
        if hasattr(clf, 'steps'):
            coefs = clf.steps[-1][-1].model.coef_
        else:
            coefs = clf.model.coef_
        assert_array_equal(filters, coefs[0])
        patterns = get_coef(clf, 'patterns_', False)
        assert (filters[0] != patterns[0])
        n_chans = X.shape[1]
        assert_array_equal(filters.shape, patterns.shape, [n_chans, n_chans])

    # Inverse transform linear model
    filters_inv = get_coef(clf, 'filters_', True)
    assert (filters[0] != filters_inv[0])
    patterns_inv = get_coef(clf, 'patterns_', True)
    assert (patterns[0] != patterns_inv[0])

    # Check with search_light and combination of preprocessing ending with sl:
    slider = SlidingEstimator(make_pipeline(StandardScaler(), lm))
    X = np.transpose([X, -X], [1, 2, 0])  # invert X across 2 time samples
    clfs = (make_pipeline(Scaler(None, scalings='mean'), slider), slider)
    for clf in clfs:
        clf.fit(X, y)
        for inverse in (True, False):
            patterns = get_coef(clf, 'patterns_', inverse)
            filters = get_coef(clf, 'filters_', inverse)
            assert_array_equal(filters.shape, patterns.shape, X.shape[1:])
            # the two time samples get inverted patterns
            assert_equal(patterns[0, 0], -patterns[0, 1])
    for t in [0, 1]:
        assert_array_equal(get_coef(clf.estimators_[t], 'filters_', False),
                           filters[:, t])

    # Check patterns with more than 1 regressor
    for n_features in [1, 5]:
        for n_targets in [1, 3]:
            X, Y, A = _make_data(n_samples=5000, n_features=5, n_targets=3)
            lm = LinearModel(LinearRegression()).fit(X, Y)
            assert_array_equal(lm.filters_.shape, lm.patterns_.shape)
            assert_array_equal(lm.filters_.shape, [3, 5])
            assert_array_almost_equal(A, lm.patterns_.T, decimal=2)
            lm = LinearModel(Ridge(alpha=1)).fit(X, Y)
            assert_array_almost_equal(A, lm.patterns_.T, decimal=2)

    # Check can pass fitting parameters
    lm.fit(X, Y, sample_weight=np.ones(len(Y)))
Ejemplo n.º 24
0
def decoding(band,regularization,tmin,tmax,n_fold,subject_name, savepath):

    data_path = "./ProcessedData/Final_"
    eeg="_processed-epo.fif"
    features="_Features-epo.fif"

    sfreq=100

    n_delays = int((tmax - tmin) * sfreq) + 1

    T= [51, 61, 71, 81, 91, 101, 111, 121, 131, 141, 151]


    results_speech= np.zeros((len(regularization),len(T)))# each raw is the results' vector for one regularization parameter
    results_lips= np.zeros((len(regularization),len(T)))# each raw is the results' vector for one regularization parameter


    results_speech_all_sub={}
    results_lips_all_sub={}
    predictions_lips_all_sub={}
    predictions_speech_all_sub={}


    for s in subject_name:

        print('subject '+str(s))

        X_orig = use_FreqBand(mne.read_epochs(data_path+s+eeg),band)
        Features_orig = use_FreqBand(mne.read_epochs(data_path + s + features),band)
        if band=='original':
            X_orig=X_orig.get_data() # 3d array (N_trial, N_channel, N_time)
            Y_envelope_sp_orig=Features_orig.get_data()[:,0,:] # 2d array (N_trial,  N_time)
            Y_lips_ap_orig=Features_orig.get_data()[:,2,:] # 2d array (N_trial,  N_time)
        else:
            X_orig= np.mean(X_orig.data,2)          # 3d array (N_trial, N_channel, N_time)  #averaging power across frequencies
            Y_envelope_sp_orig=np.mean(Features_orig.data[:,0,:,:],1)
            Y_lips_ap_orig=np.mean(Features_orig.data[:,2,:,:],1)
        time = mne.read_epochs(data_path + s + features).times # 1d array (N_time)
        channels = mne.read_epochs(data_path + s + eeg).ch_names

        predictions_speech = np.zeros((Y_envelope_sp_orig.shape[0], 200, len(T),len(regularization)))
        predictions_lips = np.zeros((Y_lips_ap_orig.shape[0],200,len(T),len(regularization)))

        train_index, test_index = k_fold(Y_envelope_sp_orig,n_fold) # define index for train and test for each of the k folds

        #data standardizers
        eegScaler= Scaler(scalings='mean')
        speechScaler= Scaler(scalings='mean')
        lipsScaler = Scaler(scalings='mean')

        scores_speech = np.zeros((n_fold,))
        scores_lips = np.zeros((n_fold,))

        coefs_speech = np.zeros((n_fold, X_orig.shape[1], n_delays))
        patterns_speech = coefs_speech.copy()
        coefs_lips = np.zeros((n_fold, X_orig.shape[1], n_delays))
        patterns_lips = coefs_lips.copy()



        for i, r in enumerate(regularization):

           rf_speech = RField(tmin, tmax, sfreq, feature_names=channels, scoring='r2', patterns=True, estimator=r)
           rf_lips = RField(tmin, tmax, sfreq, feature_names=channels, scoring='r2', patterns=True, estimator=r)

           print('reg parameter #'+str(i))

           for j, t_start in enumerate(T): ##estracting the temporal interval of interest

                t_end= t_start+200
                X = X_orig[:,:,t_start:t_end] #only the eeg window is shifting
                Y_envelope_sp = Y_envelope_sp_orig[:,101:301]
                Y_lips_ap = Y_lips_ap_orig[:,101:301]



                for k in range(0,n_fold):

                    #####COPY X AND Y VARIABLES

                    X_standard=np.zeros((X.shape))
                    Y_lips_ap_standard=np.zeros((Y_lips_ap.shape))
                    Y_envelope_sp_standard = np.zeros((Y_envelope_sp.shape))

                    #standardazing data
                    X_standard[train_index[k], :, :] = eegScaler.fit_transform(X[train_index[k], :, :])
                    X_standard[test_index[k], :, :] = eegScaler.transform(X[test_index[k], :, :])
                    Y_lips_ap_standard[train_index[k], :] = lipsScaler.fit_transform(Y_lips_ap[train_index[k], :])[:,:,0]
                    Y_lips_ap_standard[test_index[k], :] = lipsScaler.transform(Y_lips_ap[test_index[k], :])[:,:,0]
                    Y_envelope_sp_standard[train_index[k], :] = speechScaler.fit_transform(Y_envelope_sp[train_index[k], :])[:,:,0]
                    Y_envelope_sp_standard[test_index[k], :] = speechScaler.transform(Y_envelope_sp[test_index[k], :])[:,:,0]

                    #shaping data as desired by the decoding model (receptive field function)
                    X_standard = np.rollaxis(X_standard, 2, 0)
                    Y_envelope_sp_standard = np.rollaxis(Y_envelope_sp_standard, 1, 0)
                    Y_lips_ap_standard = np.rollaxis(Y_lips_ap_standard, 1, 0)


                    X_TRAIN= X_standard[:,train_index[k],:]
                    X_TEST= X_standard[:,test_index[k],:]
                    Y_envelope_sp_TRAIN = Y_envelope_sp_standard[:,train_index[k]]
                    Y_envelope_sp_TEST = Y_envelope_sp_standard[:,test_index[k]]
                    Y_lips_ap_TRAIN = Y_lips_ap_standard[:,train_index[k]]
                    Y_lips_ap_TEST = Y_lips_ap_standard[:,test_index[k]]

                    #training models and predict
                    rf_speech.fit(X_TRAIN,Y_envelope_sp_TRAIN)
                    rf_lips.fit(X_TRAIN,Y_lips_ap_TRAIN)

                    reconstructed_speech = rf_speech.predict(X_TEST)
                    reconstructed_lips = rf_lips.predict(X_TEST)

                    predictions_speech[test_index[k],:,j,i]=reconstructed_speech.T
                    predictions_lips[test_index[k],:,j,i]=reconstructed_lips.T


                    #computing scores
                    tmp_score_speech=0
                    tmp_score_lips = 0

                    for n, rec in enumerate(reconstructed_speech[:,:,0].T):
                        tmp_score_speech = tmp_score_speech + mean_squared_error(Y_envelope_sp_TEST[:,n]/max(abs(Y_envelope_sp_TEST[:,n])), rec/max(abs(rec)))
                    scores_speech[k]= tmp_score_speech/(n+1)

                    for n, rec in enumerate(reconstructed_lips[:,:,0].T):
                        tmp_score_lips = tmp_score_lips + mean_squared_error(Y_lips_ap_TEST[:, n]/max(abs(Y_lips_ap_TEST[:, n])), rec/max(abs(rec)))
                    scores_lips[k] = tmp_score_lips / (n+1)

                    # scores_speech[k] = rf_speech.score(X_TEST,Y_envelope_sp_TEST)[0]
                    # scores_lips[k] = rf_speech.score(X_TEST,Y_lips_ap_TEST)[0]


                    ##coef_ is shape (n_outputs, n_features, n_delays).
                    # coefs_speech[k] = rf_speech.coef_[0, :, :]
                    # patterns_speech[k] = rf_speech.patterns_[0, :, :]

                    # coefs_lips[k] = rf_lips.coef_[0, :, :]
                    # patterns_lips[k] = rf_lips.patterns_[0, :, :]

                # mean_coefs_lips = coefs_lips.mean(axis=0)
                # mean_patterns_lips = patterns_lips.mean(axis=0)

                mean_scores_lips = scores_lips.mean(axis=0)


                # mean_coefs_speech = coefs_speech.mean(axis=0)
                # mean_patterns_speech = patterns_speech.mean(axis=0)

                mean_scores_speech = scores_speech.mean(axis=0)

                #saving results for the i-th reg parameter and j-th time lag
                results_speech[i, j] = mean_scores_speech
                results_lips[i, j] = mean_scores_lips


        results_speech_all_sub[s]=results_speech.copy()
        results_lips_all_sub[s]=results_lips.copy()
        predictions_speech_all_sub[s]=predictions_speech.copy()
        predictions_lips_all_sub[s]=predictions_lips.copy()




    np.save(savepath+'/results_speech_all_sub',results_speech_all_sub)
    np.save(savepath+'/results_lips_all_sub',results_lips_all_sub)
    np.save(savepath+'/predictions_speech_all_sub',predictions_speech_all_sub)
    np.save(savepath+'/predictions_lips_all_sub',predictions_lips_all_sub)



    tmp_results_speech = []
    tmp_results_lips = []
    for N, s in enumerate(subject_name):
        if N ==0:
            tmp_results_speech= np.asarray(results_speech_all_sub[s])
            tmp_results_lips= np.asarray(results_lips_all_sub[s])
        tmp_results_speech=np.dstack((tmp_results_speech, np.asarray(results_speech_all_sub[s])))
        tmp_results_lips=np.dstack((tmp_results_lips,np.asarray(results_lips_all_sub[s])))

    # computing grand average and standard deviation for each time lag
    GAVG_sp = np.reshape(np.mean(tmp_results_speech,2),(len(regularization),11))
    GAVG_lip = np.reshape(np.mean(tmp_results_lips,2),(len(regularization),11))
    GAVG_sp_std = np.reshape(np.std(tmp_results_speech,2),(len(regularization),11))
    GAVG_lip_std = np.reshape(np.std(tmp_results_lips,2),(len(regularization),11))

    np.save(savepath+'/GAVG_sp',GAVG_sp)
    np.save(savepath+'/GAVG_lip',GAVG_lip)
    np.save(savepath+'/GAVG_sp_std',GAVG_sp_std)
    np.save(savepath+'/GAVG_lip_std',GAVG_lip_std)



    ####PLOTTING RESULTS#####
    T = np.reshape(T, (1, len(T)))
    pp.figure(0)
    for n, r in enumerate(regularization):
        pp.errorbar((T[0,:] - 100) * 10, GAVG_sp[n,:], yerr=GAVG_sp_std[n,:])
    pp.legend(regularization)
    pp.title('speech MSE')
    sfig=savepath+'/GAVG_specch.png'
    pp.savefig(fname=sfig)

    pp.figure(1)
    for n, r in enumerate(regularization):
        pp.errorbar((T[0, :] - 100) * 10, GAVG_lip[n, :], yerr=GAVG_lip_std[n, :])
    pp.legend(regularization)
    pp.title('lips MSE')
    sfig = savepath +'/GAVG_lips.png'
    pp.savefig(fname=sfig)


    #pp.show()

    print('bla')