Exemple #1
0
def ml_classifier(inputs, targets, classifier=None, pipeline=None):
    """Uses sklearn to fit a model given inputs and targets
    Args:
        inputs: list containing (N trials * M channels) data segments of length(number of features).
        targets: list containing (N trials * M channels) of marker data (0 or 1).
        classifier: pre-trained lda classifier; if None train from scratch
        pipeline: name of pipeline to create if classifier is None
    Returns:
        classifier: classifier object
    """
    pipeline_dict = {
        'vect_lr':
        make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()),
        'vecct_reglda':
        make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')),
        'xdawn_reglda':
        make_pipeline(Xdawn(2, classes=[1]), Vectorizer(),
                      LDA(shrinkage='auto', solver='eigen')),
        'erpcov_ts':
        make_pipeline(ERPCovariances(), TangentSpace(), LogisticRegression()),
        'erpcov_mdm':
        make_pipeline(ERPCovariances(), MDM())
    }
    if not classifier and pipeline:
        classifier = pipeline_dict[pipeline.lower()]
    classifier.fit(inputs, targets)
    return classifier
Exemple #2
0
def N170_test(session_data):
    markers = N170_MARKERS
    epochs = get_session_erp_epochs(session_data, markers)
    conditions = OrderedDict()
    for i in range(len(markers)):
        conditions[markers[i]] = [i+1]
   
    clfs = OrderedDict()
    
    clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression())
    clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen'))
    clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
    clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM())
    clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
    clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM())
    methods_list = ['Vect + LR','Vect + RegLDA','ERPCov + TS','ERPCov + MDM','XdawnCov + TS','XdawnCov + MDM']
    # format data
    epochs.pick_types(eeg=True)
    X = epochs.get_data() * 1e6
    times = epochs.times
    y = epochs.events[:, -1]

    # define cross validation 
    cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, 
                                random_state=42)

    # run cross validation for each pipeline
    auc = []
    methods = []
    print('Calcul in progress...')
    for m in clfs:
        try:

            res = cross_val_score(clfs[m], X, y==2, scoring='roc_auc', 
                                  cv=cv, n_jobs=-1)
            auc.extend(res)
            methods.extend([m]*len(res))
        except Exception:
            print("exception")
        
    ## Plot Decoding Results

    results = pd.DataFrame(data=auc, columns=['AUC'])
    results['Method'] = methods
    n_row,n_column = results.shape
    auc_means = []
    for method in methods_list:
        auc = []
        for i in range(n_row):
            if results.loc[i,'Method']== method:
                auc.append(results.loc[i,'AUC'])
        auc_means.append(np.mean(auc))
    counter = 0
    for i in range(len(methods_list)):
        color = 'green' if auc_means[i]>=0.7 else 'red'
        counter = counter +1 if auc_means[i]>=0.7 else counter
        
    return counter > 0, counter
Exemple #3
0
def test_xdawn_decoding_performance():
    """Test decoding performance and extracted pattern on synthetic data."""
    from sklearn.model_selection import KFold
    from sklearn.pipeline import make_pipeline
    from sklearn.linear_model import LogisticRegression
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.metrics import accuracy_score

    n_xdawn_comps = 3
    expected_accuracy = 0.98

    epochs, mixing_mat = _simulate_erplike_mixed_data(n_epochs=100)
    y = epochs.events[:, 2]

    # results of Xdawn and _XdawnTransformer should match
    xdawn_pipe = make_pipeline(
        Xdawn(n_components=n_xdawn_comps),
        Vectorizer(),
        MinMaxScaler(),
        LogisticRegression(solver='liblinear'))
    xdawn_trans_pipe = make_pipeline(
        _XdawnTransformer(n_components=n_xdawn_comps),
        Vectorizer(),
        MinMaxScaler(),
        LogisticRegression(solver='liblinear'))

    cv = KFold(n_splits=3, shuffle=False)
    for pipe, X in (
            (xdawn_pipe, epochs),
            (xdawn_trans_pipe, epochs.get_data())):
        predictions = np.empty_like(y, dtype=float)
        for train, test in cv.split(X, y):
            pipe.fit(X[train], y[train])
            predictions[test] = pipe.predict(X[test])

        cv_accuracy_xdawn = accuracy_score(y, predictions)
        assert_allclose(cv_accuracy_xdawn, expected_accuracy, atol=0.01)

        # for both event types, the first component should "match" the mixing
        fitted_xdawn = pipe.steps[0][1]
        if isinstance(fitted_xdawn, Xdawn):
            relev_patterns = np.concatenate(
                [comps[[0]] for comps in fitted_xdawn.patterns_.values()])
        else:
            relev_patterns = fitted_xdawn.patterns_[::n_xdawn_comps]

        for i in range(len(relev_patterns)):
            r, _ = stats.pearsonr(relev_patterns[i, :], mixing_mat[0, :])
            assert np.abs(r) > 0.99
Exemple #4
0
    def svm_proba(train_xx6, train_y, test_xx6):
        selects = []
        for j, y in enumerate(train_y):
            if y == 1:
                [selects.append(j - e) for e in [-2, -1, 0, 1, 2]]
        train_xx6 = train_xx6[selects]
        train_xx6 = train_xx6[:, :, 40:80]
        test_xx6 = test_xx6[:, :, 40:80]
        train_y = train_y[selects]
        train_y[train_y != 1] = 0

        # s = train_xx6.shape
        # train_xx6 = train_xx6.reshape((s[0], s[1] * s[2]))
        # s = test_xx6.shape
        # test_xx6 = test_xx6.reshape((s[0], s[1] * s[2]))

        # return test_xx6

        clf = make_pipeline(
            Vectorizer(), StandardScaler(), pca.PCA(n_components=.95),
            svm.SVC(gamma='scale',
                    kernel='rbf',
                    class_weight={
                        0: 1,
                        1: 2
                    },
                    probability=True))
        clf.fit(train_xx6, train_y)
        return clf.predict_proba(test_xx6)
Exemple #5
0
def test_get_coef_multiclass_full(n_classes, n_channels, n_times):
    """Test a full example with pattern extraction."""
    from sklearn.pipeline import make_pipeline
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import StratifiedKFold
    data = np.zeros((10 * n_classes, n_channels, n_times))
    # Make only the first channel informative
    for ii in range(n_classes):
        data[ii * 10:(ii + 1) * 10, 0] = ii
    events = np.zeros((len(data), 3), int)
    events[:, 0] = np.arange(len(events))
    events[:, 2] = data[:, 0, 0]
    info = create_info(n_channels, 1000., 'eeg')
    epochs = EpochsArray(data, info, events, tmin=0)
    clf = make_pipeline(
        Scaler(epochs.info),
        Vectorizer(),
        LinearModel(LogisticRegression(random_state=0, multi_class='ovr')),
    )
    scorer = 'roc_auc_ovr_weighted'
    time_gen = GeneralizingEstimator(clf, scorer, verbose=True)
    X = epochs.get_data()
    y = epochs.events[:, 2]
    n_splits = 3
    cv = StratifiedKFold(n_splits=n_splits)
    scores = cross_val_multiscore(time_gen, X, y, cv=cv, verbose=True)
    want = (n_splits, )
    if n_times > 1:
        want += (n_times, n_times)
    assert scores.shape == want
    assert_array_less(0.8, scores)
    clf.fit(X, y)
    patterns = get_coef(clf, 'patterns_', inverse_transform=True)
    assert patterns.shape == (n_classes, n_channels, n_times)
    assert_allclose(patterns[:, 1:], 0., atol=1e-7)  # no other channels useful
Exemple #6
0
    def __init__(self, subject=None, date=None, mode='train', **kwargs):
        if subject is None:
            subject = cfg.subj_info.subjname
        self._subj_path = os.path.dirname(__file__) + '/../data/' + subject
        if date is None:
            self._date = utils.find_nearest_time(self._subj_path)
        else:
            if isinstance(date, datetime):
                # convert datetime to str
                self._date = date.strftime("%Y-%m-%d-%H-%M-%S")
            else:
                self._date = date

        self.mode = mode.lower()
        assert self.mode in ['train', 'test']
        if self.mode == 'test':
            # loading trained coefficient
            self.data_dict = np.load(os.path.join(self._subj_path, self._date, 'coef.npz'))
            # loading trained model
            self.__cls = joblib.load(os.path.join(self._subj_path, self._date, 'model.pkl'))
            self._ch_ind = self.data_dict['ind_ch_scores']
        else:
            self.data_dict = {}
            C = kwargs.pop('C', 1)
            n_components = kwargs.pop('n_components', 3)
            self.__cls = make_pipeline(
                _XdawnTransformer(n_components=n_components),
                ChannelScaler(),
                Vectorizer(),
                LogisticRegression(C=C, class_weight='balanced', solver='liblinear', multi_class='ovr')
            )
Exemple #7
0
def mvpa(name):
    # Perform MVPA
    # Setting
    BASELINE = (None, 0)
    CROP = (0, 0.8)
    EVENTS = ['1', '2']

    # Load epochs
    loader = FileLoader(name)
    loader.load_epochs(recompute=False)
    print(loader.epochs_list)

    # Prepare [predicts] for results
    predicts = []

    # Cross validation
    num_epochs = len(loader.epochs_list)
    for exclude in range(num_epochs):
        # Start on separate training and testing dataset
        print(f'---- {name}: {exclude} | {num_epochs} ----------------------')
        includes = [
            e for e in range(len(loader.epochs_list)) if not e == exclude
        ]
        excludes = [exclude]
        train_epochs, test_epochs = loader.leave_one_session_out(
            includes, excludes)
        print(train_epochs, test_epochs)

        def prepare_epochs(epochs):
            # A tool for prepare epochs
            epochs = epochs['1', '2']
            epochs.apply_baseline(BASELINE)
            return epochs.crop(CROP[0], CROP[1])

        # print('Xdawn --------------------------------')
        # enhancer = Enhancer(train_epochs=train_epochs,
        #                     test_epochs=test_epochs)
        # train_epochs, test_epochs = enhancer.fit_apply()

        # Prepare epochs
        train_epochs = prepare_epochs(train_epochs)
        test_epochs = prepare_epochs(test_epochs)

        X_train, y_train = get_X_y(train_epochs)
        X_test, y_test = get_X_y(test_epochs)

        print('Training -----------------------------')
        clf = svm.SVC(gamma='scale', kernel='rbf', class_weight='balanced')
        pipeline = make_pipeline(Vectorizer(), StandardScaler(), clf)
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
        # y_pred = y_test

        print('Testing ------------------------------')
        predicts.append(dict(y_test=y_test, y_pred=y_pred))

    with open(os.path.join(results_dir, f'{name}.json'), 'wb') as f:
        pickle.dump(predicts, f)

        pass
Exemple #8
0
def UpdateModel():
    try:
        SetPathsVars()
        userlist = np.load(USER_LIST, allow_pickle='TRUE').item()
        if (len(userlist) < 2):
            return True
        database = np.load(DB_PATH, allow_pickle='TRUE')
        print(database)
        database = database[1:]
        rows, cols = len(database), len(database[0])
        print(rows, cols)
        X = []
        y = []

        for r in database:
            y.append(r[-1])
            arrr = r[:cols - 1]
            arrr = np.reshape(arrr, (15, 113))
            X.append(arrr)

        from sklearn.pipeline import make_pipeline
        from sklearn.preprocessing import StandardScaler
        from sklearn.svm import LinearSVC
        from mne.decoding import Vectorizer
        clf = make_pipeline(Vectorizer(), StandardScaler(), LinearSVC())
        clf.fit(X, y)
        pkl.dump(clf, open(getPath('Res/model.pkl'), 'wb'))
        print('Successfully updated model')
        return True
    except Exception as e:
        print(e)
        print("Update Model haga")
        return False
Exemple #9
0
    def train_test_lda(self, X_train, y_train, X_test, y_test):
        """Regulated LDA

		Parameters
		----------
		X_train: instance of numpy.ndarray
			The training data.
		y_train: instance of numpy.ndarray
			The training target values.
		X_test: instance of numpy.ndarray
			The testing data.
		y_test: instance of numpy.ndarray
			The testing target values.

		Returns
		-------
		model : instance of sklearn.pipeline.Pipeline
			The final model.
		auc : float
			The AUC score.
		"""

        model = make_pipeline(Vectorizer(),
                              LDA(shrinkage='auto', solver='eigen'))
        model.fit(X_train, y_train)
        auc = roc_auc_score(y_test, model.predict(X_test))
        return model, auc
def make_clf(pattern=False,vectorized=False):
    clf = []
    from sklearn.svm import SVC
    clf.append(('vectorizer',Vectorizer()))
    # use linear SVM as the estimator
    estimator = SVC(max_iter=-1,kernel='linear',random_state=12345,class_weight='balanced',probability=True)
    clf.append(('estimator',estimator))
    clf = Pipeline(clf)
    return clf
def mvpa(name):
    # Perform MVPA
    # Load epochs
    loader = FileLoader(name)
    loader.load_epochs(recompute=False)
    print(loader.epochs_list)

    # Prepare [predicts] for results
    predicts = []

    # Cross validation
    num_epochs = len(loader.epochs_list)
    for exclude in range(num_epochs):
        # Start on separate training and testing dataset
        print(f'---- {name}: {exclude} | {num_epochs} ----------------------')
        includes = [
            e for e in range(len(loader.epochs_list)) if not e == exclude
        ]
        excludes = [exclude]
        train_epochs, test_epochs = loader.leave_one_session_out(
            includes, excludes)
        print(train_epochs, test_epochs)

        print('Xdawn --------------------------------')
        enhancer = Enhancer(train_epochs=train_epochs, test_epochs=test_epochs)
        train_epochs, test_epochs = enhancer.fit_apply()

        # Prepare epochs
        train_epochs = prepare_epochs(train_epochs)
        test_epochs = prepare_epochs(test_epochs)

        X_train, y_train = get_X_y(train_epochs)
        X_test, y_test = get_X_y(test_epochs)

        print('Preprocess ---------------------------')
        pipeline = make_pipeline(Vectorizer(), StandardScaler())
        X_train = pipeline.fit(X_train)
        X_test = pipeline.fit(X_test)
        y_train[not y_train == 1] = 0
        y_test[not y_test == 1] = 0

        print('Training -----------------------------')
        eegnet = EEGNet_classifier()
        eegnet.fit(X_train, y_train, quiet=False)
        y_pred = eegnet.predict(X_test)
        # y_pred = y_test

        print('Testing ------------------------------')
        predicts.append(dict(y_test=y_test, y_pred=y_pred))

    with open(os.path.join(results_dir, f'{name}.json'), 'wb') as f:
        pickle.dump(predicts, f)

        pass
def make_clf():
    clf = []
    clf.append(('vectorizer',Vectorizer()))
    # hyper parameters were optimized and here we just directly use them in the random forest model
    clf.append(('estimator',RandomForestClassifier(n_estimators=190,# number of trees
                                                   max_depth=None, # no need to specifiy the depth, in order words, feature depth
                                                   random_state=12345,
                                                  class_weight='balanced',
                                                  max_features=10, # dimension reduction
                                                  min_samples_leaf=4,# minimum feature span
                                                  min_samples_split=4)))# minimum feature split
    clf = Pipeline(clf)
    return clf
    def svm_fit_predict(train_xraw, train_y, test_xraw):
        clf = svm.SVC(gamma='scale',
                      kernel='rbf',
                      class_weight='balanced',
                      probability=True)
        selects = []
        for j, y in enumerate(train_y):
            if y == 1:
                [selects.append(j - e) for e in [-1, 0, 1]]

        pipeline = make_pipeline(Vectorizer(), clf)
        pipeline.fit(train_xraw[selects, :, 40:80], train_y[selects])
        pred = pipeline.predict(test_xraw[:, :, 40:80])
        prob = pipeline.predict_proba(test_xraw[:, :, 40:80])
        return pred, prob
def make_clf(pattern=False, vectorized=False):
    clf = []
    if vectorized:
        clf.append(('vectorizer', Vectorizer()))
    clf.append(('scaler', MinMaxScaler()))
    # use linear SVM as the estimator
    estimator = SVC(max_iter=-1,
                    kernel='linear',
                    random_state=12345,
                    class_weight='balanced',
                    probability=True)
    if pattern:
        estimator = LinearModel(estimator)
    clf.append(('estimator', estimator))
    clf = Pipeline(clf)
    return clf
Exemple #15
0
def test_vectorizer():
    """Test Vectorizer."""
    data = np.random.rand(150, 18, 6)
    vect = Vectorizer()
    result = vect.fit_transform(data)
    assert_equal(result.ndim, 2)

    # check inverse_trasnform
    orig_data = vect.inverse_transform(result)
    assert_equal(orig_data.ndim, 3)
    assert_array_equal(orig_data, data)
    assert_array_equal(vect.inverse_transform(result[1:]), data[1:])

    # check with different shape
    assert_equal(vect.fit_transform(np.random.rand(150, 18, 6, 3)).shape,
                 (150, 324))
    assert_equal(vect.fit_transform(data[1:]).shape, (149, 108))

    # check if raised errors are working correctly
    vect.fit(np.random.rand(105, 12, 3))
    assert_raises(ValueError, vect.transform, np.random.rand(105, 12, 3, 1))
    assert_raises(ValueError, vect.inverse_transform,
                  np.random.rand(102, 12, 12))
Exemple #16
0
def test_get_coef_multiclass(n_features, n_targets):
    """Test get_coef on multiclass problems."""
    # Check patterns with more than 1 regressor
    from sklearn.linear_model import LinearRegression, Ridge
    from sklearn.pipeline import make_pipeline
    X, Y, A = _make_data(n_samples=30000,
                         n_features=n_features,
                         n_targets=n_targets)
    lm = LinearModel(LinearRegression()).fit(X, Y)
    assert_array_equal(lm.filters_.shape, lm.patterns_.shape)
    if n_targets == 1:
        want_shape = (n_features, )
    else:
        want_shape = (n_targets, n_features)
    assert_array_equal(lm.filters_.shape, want_shape)
    if n_features > 1 and n_targets > 1:
        assert_array_almost_equal(A, lm.patterns_.T, decimal=2)
    lm = LinearModel(Ridge(alpha=0))
    clf = make_pipeline(lm)
    clf.fit(X, Y)
    if n_features > 1 and n_targets > 1:
        assert_allclose(A, lm.patterns_.T, atol=2e-2)
    coef = get_coef(clf, 'patterns_', inverse_transform=True)
    assert_allclose(lm.patterns_, coef, atol=1e-5)

    # With epochs, scaler, and vectorizer (typical use case)
    X_epo = X.reshape(X.shape + (1, ))
    info = create_info(n_features, 1000., 'eeg')
    lm = LinearModel(Ridge(alpha=1))
    clf = make_pipeline(
        Scaler(info, scalings=dict(eeg=1.)),  # XXX adding this step breaks
        Vectorizer(),
        lm,
    )
    clf.fit(X_epo, Y)
    if n_features > 1 and n_targets > 1:
        assert_allclose(A, lm.patterns_.T, atol=2e-2)
    coef = get_coef(clf, 'patterns_', inverse_transform=True)
    lm_patterns_ = lm.patterns_[..., np.newaxis]
    assert_allclose(lm_patterns_, coef, atol=1e-5)

    # Check can pass fitting parameters
    lm.fit(X, Y, sample_weight=np.ones(len(Y)))
Exemple #17
0
def test_vectorizer():
    """Test Vectorizer."""
    data = np.random.rand(150, 18, 6)
    vect = Vectorizer()
    result = vect.fit_transform(data)
    assert_equal(result.ndim, 2)

    # check inverse_trasnform
    orig_data = vect.inverse_transform(result)
    assert_equal(orig_data.ndim, 3)
    assert_array_equal(orig_data, data)
    assert_array_equal(vect.inverse_transform(result[1:]), data[1:])

    # check with different shape
    assert_equal(vect.fit_transform(np.random.rand(150, 18, 6, 3)).shape,
                 (150, 324))
    assert_equal(vect.fit_transform(data[1:]).shape, (149, 108))

    # check if raised errors are working correctly
    vect.fit(np.random.rand(105, 12, 3))
    assert_raises(ValueError, vect.transform, np.random.rand(105, 12, 3, 1))
    assert_raises(ValueError, vect.inverse_transform,
                  np.random.rand(102, 12, 12))
    tmin = i * 1.0
    tmax = (i + 1) * 1.0

    #Create :class:'Epochs <mne.Epochs>' object
    epochs = mne.Epochs(raw,
                        events=events,
                        event_id=event_id,
                        tmin=tmin,
                        tmax=tmax,
                        baseline=None,
                        verbose=True,
                        preload=True)
    for i in range(0, len(epochs.events)):
        if i % 2 == 0:
            epochs.events[i, 2] = 3
    #epochs.plot(scalings = 'auto',block = True,n_epochs=10)
    X = epochs.pick_types(meg=False, eeg=True)
    y = epochs.events[:, -1]

    # Define a unique pipeline to sequentially:
    clf = make_pipeline(
        Vectorizer(),  # 1) vectorize across time and channels
        StandardScaler(),  # 2) normalize features across trials
        LinearModel(LogisticRegression()))  # 3) fits a logistic regression
    clf.fit(X, y)

    coef = get_coef(clf, 'patterns_', inverse_transform=True)
    evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin)
    fig = evoked.plot_topomap(title='EEG Patterns', size=3, show=False)
    fig.savefig(title + "_ti_" + str(tmin) + "_tf_" + str(tmax) + '.png')
    return X, y


# %%
for idx in range(1, 11):
    # Loading data ------------------------------------------
    running_name = f'MEG_S{idx:02d}'
    band_name = 'U07'

    worker = MEG_Worker(running_name=running_name)
    worker.pipeline(band_name=band_name)

    # MVPA ----------------------------------------------------------------
    # Prepare classifiers
    _svm = svm.SVC(gamma='scale', kernel='rbf', class_weight='balanced')
    clf = make_pipeline(Vectorizer(), StandardScaler(), _svm)

    # Prepare paired X and y
    # Set crop
    crops = dict(a=(0.2, 0.4),
                 b=(0.4, 0.6),
                 c=(0.6, 0.8),
                 d=(0.2, 0.8),
                 e=(0.0, 1.0))

    output_dict = dict()

    for crop_key in crops:
        crop = crops[crop_key]
        # Get X and y for class 1
        X1, y1 = pair_X_y(worker.clean_epochs, 1, crop)
Exemple #20
0
# This approach classifies the data within, rather than across, subjects.

for chroma in ['hbo', 'hbr']:

    st_scores = []
    for sub in subjects:

        bids_path = dataset.update(subject=sub)
        raw_haemo, epochs = epoch_preprocessing(bids_path)

        epochs.pick(chroma)

        X = epochs.get_data()
        y = epochs.events[:, 2]

        clf = make_pipeline(Scaler(epochs.info), Vectorizer(),
                            LogisticRegression(solver='liblinear'))

        scores = 100 * cross_val_multiscore(
            clf, X, y, cv=5, n_jobs=1, scoring='roc_auc')

        st_scores.append(np.mean(scores, axis=0))

    print(f"Average spatio-temporal ROC-AUC performance ({chroma}) = "
          f"{np.round(np.mean(st_scores))} % ({np.round(np.std(st_scores))})")

# %%
# Conclusion
# ----------
#
# Data were epoched then decoding was performed on the hbo signal and the hbr
Exemple #21
0
def SVM_decoding_on_full_epochs(X,
                                y,
                                plot_conf_matrix=0,
                                class_names=None,
                                test_size=0.2,
                                n_splits=5):
    """ This function decodes on the full epoch using standard SVM algorithm

    Parameters
    ---------
    X : data extracted from the epochs provided to the decoder
    y : categorical variable (i.e. discrete but it can be more then 2 categories)
    plot_confusion_matrix : set to 1 if you wanna see the confusion matrix
    class_names: needed for the legend if confusion matrices are plotted ['cat1','cat2','cat3']
    test_size : proportion of the data on which you want to test the decoder
    n_splits : when calculating the score, number of cross-validation folds

    Returns:
    -------
    score, y_test, y_pred

    """

    # ------- define the classifier -------
    scaler = preprocessing.StandardScaler()
    vectorizer = Vectorizer()
    clf = SVC(C=1, kernel='linear', decision_function_shape='ovr')
    concat_classifier = Pipeline([('vector', vectorizer), ('scaler', scaler),
                                  ('svm', clf)])

    # This returns the 5 scores calculated for each fold

    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    y = np.asarray(y)
    scores = []
    for train_index, test_index in kf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # Train on X_train, y_train
        concat_classifier.fit(X_train, y_train)
        # Test on X_test and then score
        y_pred = concat_classifier.predict(X_test)
        scores.append(accuracy_score(y_true=y_test, y_pred=y_pred))
    scores = np.asarray(scores)

    if plot_conf_matrix == 1:
        print('you chose to plot the confusion matrix')
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=7, stratify=y)
        y_pred = concat_classifier.fit(X_train, y_train).predict(X_test)

        # Compute confusion matrix
        cnf_matrix = confusion_matrix(y_test, y_pred)
        np.set_printoptions(precision=3)
        print(cnf_matrix)

        # Plot non-normalized confusion matrix
        plt.figure()
        plot_confusion_matrix(cnf_matrix,
                              classes=class_names,
                              title='Confusion matrix, without normalization')

        # Plot normalized confusion matrix
        plt.figure()
        plot_confusion_matrix(cnf_matrix,
                              classes=class_names,
                              normalize=True,
                              title='Normalized confusion matrix')

        plt.show()
        return scores, y_test, y_pred, cnf_matrix

    return scores, cnf_matrix
Exemple #22
0
def run_ica_experiment(_run, method_idx):

    # filepaths = Path(r"C:\Users\paull\Documents\GIT\BCI_MsC\notebooks\BCI_Comp_IV_2a\BCICIV_2a_gdf/").glob("*T.gdf")

    dataset, metadata = BCI_IV_Comp_Dataset.load_dataset(filepaths,
                                                         as_epochs=True,
                                                         concatenate=False,
                                                         drop_bad=True,
                                                         return_metadata=True,
                                                         tmin=-1.,
                                                         tmax=3.)

    all_methods = get_all_methods()
    methods = all_methods if method_idx is None else [all_methods[method_idx]]
    name = "" if method_idx is None else "_{}".format(all_methods[method_idx])
    print("Using methods", methods)

    results = dict()
    for method in methods:
        print("Running for method", method)
        clf = make_pipeline(
            CSP(n_components=CSP_N_COMPONENTS), Vectorizer(), MinMaxScaler(),
            LogisticRegression(penalty='l2', multi_class='auto'))
        results[method] = list()
        for i, (epochs, mdata) in enumerate(zip(dataset, metadata)):
            print("\t", i, mdata["id"])
            ICA = get_ica_instance(method, n_components=ICA_N_COMPONENTS)
            start = time.time()

            epochs = epochs.copy().load_data().filter(l_freq=None,
                                                      h_freq=40).resample(90.)

            transformed_epochs = ICA.fit(epochs).get_sources(epochs)
            duration = time.time() - start

            scores = dict()
            signal = np.hstack(transformed_epochs.get_data())
            for fn_name in SCORING_FN_DICT:
                score = apply_pairwise_parallel(signal,
                                                SCORING_FN_DICT[fn_name])
                scores[fn_name] = score

            X, Y = transformed_epochs.get_data(), transformed_epochs.events[:,
                                                                            2]

            del epochs, transformed_epochs

            try:
                clf.fit(X, Y)
            except Exception:
                print("\t\tFailed during fit")
                results[method].append({
                    "id": mdata["id"],
                    "score": None,
                    "bas": None,
                    "duration": duration
                })
                continue

            pred = clf.predict(X)
            bas = balanced_accuracy_score(Y, pred)
            results[method].append({
                "id": mdata["id"],
                "score": scores,
                "bas": bas,
                "duration": duration
            })

    results_filepath = f"./results{name}.json"
    with open(results_filepath, "w") as json_file:
        json.dump(results, json_file, indent=4)

    _run.add_artifact(results_filepath, content_type="json")
    if subject_id in exclude:
        continue
    subject = 'S%02d' % subject_id
    data_path = os.path.join('/home/claire/DATA/Data_Face_House/' + subject +
                             '/EEG/Evoked_Lowpass')
    fname_in = os.path.join(data_path, '%s-epo.fif' % subject)
    epochs = mne.read_epochs(fname_in)
    epochs.interpolate_bads()
#    all_epochs.append(epochs)

#epochs = mne.concatenate_epochs(all_epochs)

epochs.pick_types(eeg=True)

# Create classification pipeline
clf = make_pipeline(Xdawn(n_components=3, reg='oas'), Vectorizer(),
                    MinMaxScaler(), LogisticRegression(penalty='l1'))

le = LabelEncoder()
labels = le.fit_transform(epochs.events[:, 2])

# Cross validator
cv = StratifiedKFold(y=labels, n_folds=10, shuffle=True, random_state=42)

# Do cross-validation
preds = np.empty(len(labels))
for train, test in cv:
    clf.fit(epochs[train], labels[train])
    preds[test] = clf.predict(epochs[test])

# Classification report
                   exclude='bads')

epochs = Epochs(raw,
                events,
                event_id,
                tmin,
                tmax,
                proj=False,
                picks=picks,
                baseline=None,
                preload=True,
                verbose=False)

# Create classification pipeline
clf = make_pipeline(
    Xdawn(n_components=n_filter), Vectorizer(), MinMaxScaler(),
    LogisticRegression(penalty='l1', solver='liblinear', multi_class='auto'))

# Get the labels
labels = epochs.events[:, -1]

# Cross validator
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Do cross-validation
preds = np.empty(len(labels))
for train, test in cv.split(epochs, labels):
    clf.fit(epochs[train], labels[train])
    preds[test] = clf.predict(epochs[test])

# Classification report
Exemple #25
0
                tmax=0.8,
                baseline=None,
                reject={'eeg': 75e-6},
                preload=True,
                verbose=False,
                picks=[0, 1, 2, 3])

print('sample drop %: ', (1 - len(epochs.events) / len(events)) * 100)
epochs

###################################################################################################
# Run classification
# ----------------------------

clfs = OrderedDict()
clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(),
                                  LogisticRegression())
clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(),
                                      LDA(shrinkage='auto', solver='eigen'))
clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'),
                                    TangentSpace(), LogisticRegression())
clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM())
clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'),
                                      TangentSpace(), LogisticRegression())
clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'),
                                       MDM())

# format data
epochs.pick_types(eeg=True)
X = epochs.get_data() * 1e6
times = epochs.times
Exemple #26
0
                   eog=False,
                   exclude='bads')

epochs = Epochs(raw,
                events,
                event_id,
                tmin,
                tmax,
                proj=False,
                picks=picks,
                baseline=None,
                preload=True,
                verbose=False)

# Create classification pipeline
clf = make_pipeline(Xdawn(n_components=3), Vectorizer(), MinMaxScaler(),
                    LogisticRegression(penalty='l1'))

# Get the labels
labels = epochs.events[:, -1]

# Cross validator
cv = StratifiedKFold(y=labels, n_folds=10, shuffle=True, random_state=42)

# Do cross-validation
preds = np.empty(len(labels))
for train, test in cv:
    clf.fit(epochs[train], labels[train])
    preds[test] = clf.predict(epochs[test])

# Classification report
epochs = Epochs(raw,
                events,
                event_id,
                tmin,
                tmax,
                proj=False,
                picks=picks,
                baseline=None,
                preload=True,
                verbose=False)

X = epochs.get_data()
y = label_binarize(epochs.events[:, 2], classes=[1, 3]).ravel()

clf = make_pipeline(XdawnTransformer(n_components=2), Vectorizer(),
                    StandardScaler(), LogisticRegression())

# Define a monte-carlo cross-validation generator (reduce variance):
cv = ShuffleSplit(len(y), 10, test_size=0.2, random_state=42)

scores = cross_val_score(clf, X, y, cv=cv)

class_balance = np.mean(y == y[0])
class_balance = max(class_balance, 1. - class_balance)
print("Classification accuracy: %f / Chance level: %f" %
      (np.mean(scores), class_balance))

###############################################################################
# plot Xdawn patterns estimated on full data for visualization
# Decoding in sensor space using a linear SVM
n_times = len(rt_epochs.times)

from sklearn import preprocessing  # noqa
from sklearn.svm import SVC  # noqa
from sklearn.pipeline import Pipeline  # noqa
from sklearn.model_selection import cross_val_score, ShuffleSplit  # noqa
from mne.decoding import Vectorizer, FilterEstimator  # noqa

scores_x, scores, std_scores = [], [], []

# don't highpass filter because it's epoched data and the signal length
# is small
filt = FilterEstimator(rt_epochs.info, None, 40, fir_design='firwin')
scaler = preprocessing.StandardScaler()
vectorizer = Vectorizer()
clf = SVC(C=1, kernel='linear')

concat_classifier = Pipeline([('filter', filt), ('vector', vectorizer),
                              ('scaler', scaler), ('svm', clf)])

data_picks = mne.pick_types(rt_epochs.info,
                            meg='grad',
                            eeg=False,
                            eog=True,
                            stim=False,
                            exclude=raw.info['bads'])
ax = plt.subplot(111)
ax.set_xlabel('Trials')
ax.set_ylabel('Classification score (% correct)')
ax.set_title('Real-time decoding')
Exemple #29
0
    test_epochs = test_epochs.crop(0.2, 0.8)

    # Select epochs in [train_epochs]
    selects = select_events(train_epochs)
    selected_train_epochs = train_epochs[selects]

    display(train_epochs, selected_train_epochs, test_epochs)

    # Get X and y
    train_X, train_y = get_X_y(selected_train_epochs)
    test_X, test_y = get_X_y(test_epochs)

    # Fit and pred ---------------------------------------------
    # Init
    clf = make_pipeline(
        Vectorizer(), StandardScaler(), pca.PCA(n_components=.95),
        svm.SVC(
            gamma='scale',
            kernel='rbf',
            class_weight='balanced',
        ))

    # Fit
    print('Fitting')
    clf.fit(X=train_X, y=train_y)

    # Predict
    print('Predicting')
    crop_pred_y = clf.predict(X=test_X)

    break
Exemple #30
0
    design = metadata[predictors]

    # # dummy code cue variable
    # dummies = pd.get_dummies(design[predictors], drop_first=True)
    # design = pd.concat([design.drop(predictors, axis=1), dummies], axis=1)
    # design.cue_B = design.cue_B - design.cue_B.unique().mean()

    # create design matrix
    design = patsy.dmatrix("cue", design, return_type='dataframe')
    design = design[['cue[T.B]']]

    # 4.2) vectorise channel data for linear regression
    # data to be analysed
    dat = cues[subj].get_data()
    dat = dat[:, :, times_to_use]
    Y = Vectorizer().fit_transform(dat)

    # 4.3) fit linear model with sklearn's LinearRegression
    weights = compute_sample_weight(class_weight='balanced',
                                    y=metadata.cue.to_numpy())
    linear_model = LinearRegression(n_jobs=n_jobs, fit_intercept=True)
    linear_model.fit(design, Y, sample_weight=weights)

    # 4.4) extract the resulting coefficients (i.e., betas)
    # extract betas
    coefs = get_coef(linear_model, 'coef_')
    inter = linear_model.intercept_

    # 4.5) extract model r_squared
    r2 = r2_score(Y, linear_model.predict(design), multioutput='raw_values')
    # save model R-squared
evokeds = [] 
for sbj in sbjs:
    print(sbj)
    if sbj == 'VP12': #No REM here
        continue
    if os.path.exists(os.path.join(save_path, sbj + '.p')):
        continue
    #sleep_epochs = myload(base_path_data, typ='epoch_preprocessed', sbj=sbj, preload=True) # WAKE!
    sleep_epochs = myload(sleep_path_data, typ='epoch_preprocessed', sbj=sbj, preload=True) # SLEEP!
    sleep_epochs.event_id = sleep_event_id # event_id remapping. For wake this step works during preprocessing # SLEEP !

    sleep_epochs = sleep_epochs.crop(tmin=tmin, tmax=tmax)
    
    X1, y1 = get_Xy_balanced(sleep_epochs, contrast1)
    
    clf =  make_pipeline(Vectorizer(), StandardScaler(), LinearModel(LogisticRegression(max_iter = 4000))) #StandardScaler(),
     
    cv  = StratifiedKFold(n_splits=2, shuffle=True)
    
    coef_folds = [] 
    for train_idx, test_idx in cv.split(X1, y1):
        clf.fit(X1[train_idx], y=y1[train_idx])
        #scores1.append(clf.score(X1[test_idx], y=y1[test_idx]))
        coef_folds.append(get_coef(clf, attr='patterns_', inverse_transform=True))
    coef = np.asarray(coef_folds).mean(0).reshape([173, -1]) #mean folds and reshape
    evoked = EvokedArray(coef, sleep_epochs.info, tmin=tmin)
    evokeds.append(evoked)

ga = mne.grand_average(evokeds)

#SLEEP