Ejemplo n.º 1
0
def test_csp_init(nfilter, metric, log, get_covmats, get_labels):
    n_classes, n_matrices, n_channels = 2, 6, 3
    covmats = get_covmats(n_matrices, n_channels)
    labels = get_labels(n_matrices, n_classes)
    csp = CSP(nfilter=nfilter, metric=metric, log=log)
    csp.fit(covmats, labels)
    Xtr = csp.transform(covmats)
    if log:
        assert Xtr.shape == (n_matrices, n_channels)
    else:
        assert Xtr.shape == (n_matrices, n_channels, n_channels)
    assert csp.filters_.shape == (n_channels, n_channels)
    assert csp.patterns_.shape == (n_channels, n_channels)
Ejemplo n.º 2
0
def test_permutation_distance():
    """Test one way permutation test"""
    covset = generate_cov(10, 5)
    labels = np.array([0, 1]).repeat(5)
    groups = np.array([0] * 5 + [1] * 5)

    with pytest.raises(ValueError):
        PermutationDistance(mode='badmode')

    # pairwise
    p = PermutationDistance(100, mode='pairwise')
    p.test(covset, labels)
    # with group
    p.test(covset, labels, groups=groups)
    # t-test
    p = PermutationDistance(100, mode='ttest')
    p.test(covset, labels)
    # f-test
    p = PermutationDistance(100, mode='ftest')
    p.test(covset, labels)
    # with custom estimator
    p = PermutationDistance(10, mode='pairwise', estimator=CSP(2, log=False))
    p.test(covset, labels)
    # unique perms
    p = PermutationDistance(1000)
    p.test(covset, labels)
    p.plot(nbins=2)
Ejemplo n.º 3
0
def _train_raw(df):
    """Train a classifier on raw EEG data"""
    X, y = transform.signal_ndarray(df)
    # print(X, y)

    # Fixes non-convergence for binary classification
    dual = set(y) == 2

    clfs: Dict[str, Pipeline] = {
        # These four are from https://neurotechx.github.io/eeg-notebooks/auto_examples/visual_ssvep/02r__ssvep_decoding.html
        "CSP + Cov + TS":
        make_pipeline(
            Covariances(),
            CSP(4, log=False),
            TangentSpace(),
            LogisticRegression(dual=dual),
        ),
        "Cov + TS":
        make_pipeline(Covariances(), TangentSpace(),
                      LogisticRegression(dual=dual)),
        # Performs meh
        # "CSP + RegLDA": make_pipeline(
        #     Covariances(), CSP(4), LDA(shrinkage="auto", solver="eigen")
        # ),
        # Performs badly
        # "Cov + MDM": make_pipeline(Covariances(), MDM()),
    }

    for name, clf in clfs.items():
        logger.info(f"===== Training with {name} =====")
        _train(X, y, clf)
Ejemplo n.º 4
0
def test_permutation_pairwise_estimator(get_covmats, get_labels):
    """Test one way permutation with estimator"""
    n_matrices, n_channels, n_classes = 6, 3, 2
    covmats = get_covmats(n_matrices, n_channels)
    labels = get_labels(n_matrices, n_classes)
    # with custom estimator
    p = PermutationDistance(10, mode="pairwise", estimator=CSP(2, log=False))
    p.test(covmats, labels)
def RHvsLH_cross(out_dir, pipelines):
    name = 'RHvsLH_cross'
    datasets = utils.dataset_search('imagery',
                                    events=['right_hand', 'left_hand'],
                                    has_all_events=True,
                                    min_subjects=2,
                                    multi_session=False)

    print(datasets)
    pipelines = OrderedDict()
    pipelines['TS'] = make_pipeline(Covariances('oas'), TSclassifier())
    pipelines['CSP+LDA'] = make_pipeline(Covariances('oas'), CSP(6), LDA())
    pipelines['CSP+SVM'] = make_pipeline(Covariances('oas'), CSP(6), SVC())  #

    context = LeftRightImagery(pipelines, CrossSubjectEvaluation(n_jobs=10),
                               datasets)

    results = context.process()
Ejemplo n.º 6
0
p_test = PermutationDistance(n_perms, metric='riemann', mode='ftest')
p, F = p_test.test(covmats, labels)
duration = time() - t_init

fig, axes = plt.subplots(1, 1, figsize=[6, 3], sharey=True)
p_test.plot(nbins=10, axes=axes)
plt.title('F-test distance - %.2f sec.' % duration)
print('p-value: %.3f' % p)
sns.despine()
plt.tight_layout()
plt.show()

###############################################################################
# Classification based permutation test
###############################################################################

clf = make_pipeline(CSP(4), LogisticRegression())

t_init = time()
p_test = PermutationModel(n_perms, model=clf, cv=3, scoring='roc_auc')
p, F = p_test.test(covmats, labels)
duration = time() - t_init

fig, axes = plt.subplots(1, 1, figsize=[6, 3], sharey=True)
p_test.plot(nbins=10, axes=axes)
plt.title('Classification - %.2f sec.' % duration)
print('p-value: %.3f' % p)
sns.despine()
plt.tight_layout()
plt.show()
Ejemplo n.º 7
0
def decode(epochs,
           get_y_label_func,
           epoch_filter=None,
           decoding_method='standard',
           sliding_window_size=None,
           sliding_window_step=None,
           n_jobs=multiprocessing.cpu_count(),
           equalize_event_counts=True,
           only_fit=False,
           generalize_across_time=True):
    """
    Basic flow for decoding
    """

    config = dict(equalize_event_counts=equalize_event_counts,
                  only_fit=only_fit,
                  sliding_window_size=sliding_window_size,
                  sliding_window_step=sliding_window_step,
                  decoding_method=decoding_method,
                  generalize_across_time=generalize_across_time,
                  epoch_filter=str(epoch_filter))

    if epoch_filter is not None:
        epochs = epochs[epoch_filter]

    #-- Classify epochs into groups (training epochs)
    y_labels = get_y_label_func(epochs)

    if equalize_event_counts:
        epochs.events[:, 2] = y_labels
        epochs.event_id = {str(label): label for label in np.unique(y_labels)}
        min_n_items_per_y_label = min(
            [len(epochs[cond]) for cond in epochs.event_id.keys()])
        print("\nEqualizing the number of epochs to %d per condition..." %
              min_n_items_per_y_label)
        epochs.equalize_event_counts(epochs.event_id.keys())
        y_labels = epochs.events[:, 2]

    print("The epochs were classified into %d groups:" % len(set(y_labels)))
    for g in set(y_labels):
        print("Group {:}: {:} epochs".format(g, sum(np.array(y_labels) == g)))

    #-- Create the decoding pipeline
    print("Creating the classification pipeline...")

    epochs_data = epochs.get_data()

    preprocess_pipeline = None

    if decoding_method.startswith('standard'):

        if 'reg' in decoding_method:
            clf = make_pipeline(StandardScaler(), Ridge())
        else:
            clf = make_pipeline(
                StandardScaler(),
                svm.SVC(C=1, kernel='linear', class_weight='balanced'))

        if 'raw' not in decoding_method:
            assert sliding_window_size is not None
            assert sliding_window_step is not None
            preprocess_pipeline = \
                make_pipeline(umne.transformers.SlidingWindow(window_size=sliding_window_size, step=sliding_window_step, average=True))

    elif decoding_method == 'ERP_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(20), average=False),
            ERPCovariances(
                estimator='lwf'),  # todo how to apply sliding window?
            CSP(30, log=False),
            TangentSpace('logeuclid'),
            LogisticRegression('l2'))  # todo why logistic regression?

    elif decoding_method == 'Xdawn_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(50), average=False),
            XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'),
            TangentSpace('logeuclid'), LogisticRegression('l2'))

    elif decoding_method == 'Hankel_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(70), average=False),
            HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'),
            CSP(15, log=False), TangentSpace('logeuclid'),
            LogisticRegression('l2'))

    else:
        raise Exception('Unknown decoding method: {:}'.format(decoding_method))

    print('\nDecoding pipeline:')
    for i in range(len(clf.steps)):
        print('Step #{:}: {:}'.format(i + 1, clf.steps[i][1]))

    if preprocess_pipeline is not None:
        print('\nApplying the pre-processing pipeline:')
        for i in range(len(preprocess_pipeline.steps)):
            print('Step #{:}: {:}'.format(i + 1,
                                          preprocess_pipeline.steps[i][1]))
        epochs_data = preprocess_pipeline.fit_transform(epochs_data)

    if only_fit:

        #-- Only fit the decoders

        procedure = 'only_fit'
        scores = None
        cv = None

        if decoding_method.startswith('standard'):
            if 'reg' in decoding_method:
                if 'r2' in decoding_method:
                    scoring = metrics.make_scorer(metrics.r2_score)
                else:
                    scoring = metrics.make_scorer(metrics.mean_squared_error)
            else:
                scoring = 'accuracy'
            if generalize_across_time:
                estimator = GeneralizingEstimator(clf,
                                                  scoring=scoring,
                                                  n_jobs=n_jobs)
            else:
                estimator = SlidingEstimator(clf,
                                             scoring=scoring,
                                             n_jobs=n_jobs)
        else:
            estimator = clf

        estimator.fit(X=epochs_data, y=y_labels)

    else:

        #-- Classify & score -- cross-validation

        procedure = 'fit_and_score'
        print(
            "\nCreating a classifier and calculating accuracy scores (this may take some time)..."
        )

        cv = StratifiedKFold(n_splits=5)
        if decoding_method.startswith('standard'):
            if 'reg' in decoding_method:
                if 'r2' in decoding_method:
                    scoring = metrics.make_scorer(metrics.r2_score)
                else:
                    scoring = metrics.make_scorer(metrics.mean_squared_error)

            else:
                scoring = 'accuracy'
            if generalize_across_time:
                estimator = GeneralizingEstimator(clf,
                                                  scoring=scoring,
                                                  n_jobs=n_jobs)
            else:
                estimator = SlidingEstimator(clf,
                                             scoring=scoring,
                                             n_jobs=n_jobs)

            scores = cross_val_multiscore(estimator=estimator,
                                          X=epochs_data,
                                          y=np.array(y_labels),
                                          cv=cv)
        else:
            scores = _run_cross_validation(X=epochs_data,
                                           y=np.array(y_labels),
                                           clf=clf,
                                           cv=cv)
            estimator = 'None'  # Estimator is not defined in the case of Riemannian decoding

    times = np.linspace(epochs.tmin, epochs.tmax, epochs_data.shape[2])

    return dict(procedure=procedure,
                estimator=estimator,
                scores=scores,
                pipeline=clf,
                preprocess=preprocess_pipeline,
                cv=cv,
                times=times,
                config=config)
Ejemplo n.º 8
0
# Decoding
# ----------------------------

# Next, we will use 4 different machine learning pipelines to classify the SSVEP based on the data we collected. The

# - CSP + RegLDA : Common Spatial Patterns + Regularized Linear Discriminat Analysis. This is a very common EEG analysis pipeline.
# - Cov + TS : Covariance + Tangent space mapping. One of the most reliable Riemannian geometry-based pipelines.
# - Cov + MDM: Covariance + MDM. A very simple, yet effective (for low channel count), Riemannian geometry classifier.
# - CSP + Cov + TS: Common Spatial Patterns + Covariance + Tangent spacem mapping. Riemannian pipeline with the standard CSP procedure beforehand

# Evaluation is done through cross-validation, with area-under-the-curve (AUC) as metric (AUC is probably the best metric for binary and unbalanced classification problem)

# Note: because we're doing machine learning here, the following cell may take a while to complete

clfs = OrderedDict()
clfs['CSP + RegLDA'] = make_pipeline(Covariances(), CSP(4),
                                     LDA(shrinkage='auto', solver='eigen'))
clfs['Cov + TS'] = make_pipeline(Covariances(), TangentSpace(),
                                 LogisticRegression())
clfs['Cov + MDM'] = make_pipeline(Covariances(), MDM())
clfs['CSP + Cov + TS'] = make_pipeline(Covariances(), CSP(4, log=False),
                                       TangentSpace(), LogisticRegression())

# define cross validation
cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42)

# run cross validation for each pipeline
auc = []
methods = []
for m in clfs:
    print(m)
Ejemplo n.º 9
0
    def __train_predefined_classifier(
            self,
            epochs,
            RG_Pipeline_Num=0,
            estimator='lwf',
            estimate_accuracy=False,
            random_state=44,
            class_names=['Rest', '13 Hz', '17 Hz', '21 Hz']):
        """
        Train a predefined Riemannian Geometery pipeline on a single dataset using
        MNE and pyriemann.
        Parameters
        ----------
        epochs : Epoch Object from MNE
            Epoch data held in an appropriate MNE format. This could be derived from
            mne.Epochs, or using the `build_epochs` command included in this script.
        RG_Pipeline_Num :int, optional
            Which pre-defined Riemannian Geometery pipeline to run for analysis.
            Can be 0,1,2,3:
                Pipeline 0:
                    Covariance w/ estimator -> Riemannian KNN
                Pipeline 1:
                    Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression
                    LogReg uses a 'balanced' option for class weights, l2 penalty.
                Pipeline 2:
                    XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression
                    LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag.
                Pipeline 3:
                    Covariance w/ estimator -> MDM.
                    Minimum distance to mean (MDM) is the main classification scheme.
            The default is 0.
        estimator :  str, optional
            Covariance matrix estimator to use. For regularization consider 'lwf'
            or 'oas'. For complete lists, see pyriemann.utils.covariance.
            The default is 'lwf'.
        estimate_accuracy : bool, optional
            Estimate model accuracy roughly using a simple data-hold out train/test split.
            A default hold out of 75/25% train, test respectively is used.
            The default is False.
        random_state : int, optional
            The value to be used as the 'seed' for `numpy.random.RandomState`.
            See sklearn.model_selection.StratifiedKFold for more details.
            The default is 42.
        class_names : List, optional
            List of names for the confusion matrix plot.
            The default is ['Rest','13 Hz','17 Hz','21 Hz'].
        Returns
        -------
        clf : Classifier object (sklearn)
            Returns a trained classifier object based on the given epoch data and
            Riemannian Geometry pipeline.
        See Also
        --------
        mne.Epochs
        sklearn.model_selection.StratifiedKFold
        sklearn.linear_model.LogisticRegression
        pyriemann.estimation.Covariances
        pyriemann.estimation.XdawnCovariances
        pyriemann.spatialfilters.CSP
        pyriemann.tangentspace.TangentSpace
        pyriemann.classification.MDM
        pyriemann.classification.KNearestNeighbor (riemmanian KNN)
        """

        # Run one of the pre-defined pipelines
        if RG_Pipeline_Num == 1:
            clf = make_pipeline(
                Covariances(estimator=estimator), CSP(log=False),
                TangentSpace(),
                LogisticRegression(class_weight='balanced', max_iter=500))
        elif RG_Pipeline_Num == 2:
            clf = make_pipeline(
                XdawnCovariances(estimator=estimator,
                                 xdawn_estimator=estimator), TangentSpace(),
                LogisticRegression(l1,
                                   class_weight=None,
                                   solver='saga',
                                   multi_class='multinomial',
                                   max_iter=500))
        elif RG_Pipeline_Num == 3:
            clf = make_pipeline(Covariances(estimator=estimator),
                                MDM())  # This is the best so far
        else:
            print(
                "...Running a default pipeline for RG using Covariance, and KNN..."
            )
            clf = make_pipeline(Covariances(estimator=estimator), riem_KNN())

        # Get the labels for the data
        labels = epochs.events[:, -1]
        # Identify the data itself
        X_data = epochs.get_data()
        # Get the class names for the confusion matrix
        class_names = class_names

        # This is NOT a great measure of the model accuracy. This just will give you
        # a rough estimate of how it is performing within its own dataset. This
        # should be used sparingly!
        if estimate_accuracy is True:
            # Do a simple data-hold out for testing
            x_train, x_test, y_train, y_test = train_test_split(
                X_data, labels, test_size=0.25, random_state=random_state)

            clf_estimate = clf

            clf_estimate.fit(x_train, y_train)

            pred_vals = clf_estimate.predict(x_test)

            accuracy_val = np.mean(pred_vals == y_test)

            fig = plt.figure()
            plot_confusion_matrix(y_test, pred_vals, class_names)

        # Fit the data to the given epoch information
        clf.fit(X_data, labels)

        return clf
Ejemplo n.º 10
0
    def __run_strat_validation_RG(
            self,
            epochs,
            n_strat_folds=5,
            shuffle=False,
            random_state=42,
            RG_Pipeline_Num=0,
            estimator='lwf',
            class_names=['Rest', '13 Hz', '17 Hz', '21 Hz'],
            accuracy_threshold=0.7):
        """
        Complete a stratified cross-validation using Riemannian Geometery pipeline.
        Parameters
        ----------
        epochs : Epoch Object from MNE
            Epoch data held in an appropriate MNE format. This could be derived from
            mne.Epochs, or using the `build_epochs` command included in this script.
        n_strat_folds : int, optional
            Number of folds for the stratified K-Fold cross-validation.
            This value should be chosen carefully to avoid unbalanced classes.
            The default is 5.
        shuffle : bool, optional
            Shuffle training set data. See sklearn.model_selection.StratifiedKFold
            for more details.
            The default is False.
        random_state : int, optional
            The value to be used as the 'seed' for `numpy.random.RandomState`.
            See sklearn.model_selection.StratifiedKFold for more details.
            The default is 42.
        RG_Pipeline_Num : int, optional
            Which pre-defined Riemannian Geometery pipeline to run for analysis.
            Can be 0,1,2,3:
                Pipeline 0:
                    Covariance w/ estimator -> Riemannian KNN
                Pipeline 1:
                    Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression
                    LogReg uses a 'balanced' option for class weights, l2 penalty.
                Pipeline 2:
                    XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression
                    LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag.
                Pipeline 3:
                    Covariance w/ estimator -> MDM.
                    Minimum distance to mean (MDM) is the main classification scheme.
            The default is 0.
        estimator : str, optional
            Covariance matrix estimator to use. For regularization consider 'lwf'
            or 'oas'. For complete lists, see pyriemann.utils.covariance.
            The default is 'lwf'.
        class_names : List, optional
            List of names for the confusion matrix plot.
            The default is ['Rest','13 Hz','17 Hz','21 Hz'].
        accuracy_threshold : float, optional
            Threshold for determining which folds are 'good' fits. Accuracy found
            above the threshold (e.g. 70% or greater) will be reported as good fit
            folds.
            The default is 0.7.
        Returns
        -------
        DICT
            Dictionary of outputs are returned for the user.
            In order:
                Fold accuracy -'Fold Acc'
                Indices for `good` training folds > or = to accuracy_threshold value - 'Good Train Ind'
                Indices for `good` test folds > or = to given accuracy_threshold value -  'Good Test Ind'
                Indices for `bad` train folds < given accuracy_threshold value - 'Bad Train Ind'
                Indices for `bad` test folds < given accuracy_threshold value - 'Bad Test Ind'
                List of predicted classes from the RG Pipeline - 'Prediction List'
                List of true classes from the RG Pipeline - 'True Class List'
        See Also
        --------
        mne.Epochs
        sklearn.model_selection.StratifiedKFold
        sklearn.linear_model.LogisticRegression
        pyriemann.estimation.Covariances
        pyriemann.estimation.XdawnCovariances
        pyriemann.spatialfilters.CSP
        pyriemann.tangentspace.TangentSpace
        pyriemann.classification.MDM
        pyriemann.classification.KNearestNeighbor (riemmanian KNN)
        """

        # Set the stratified CV model
        cv_strat = StratifiedKFold(
            n_splits=n_strat_folds, shuffle=True, random_state=random_state
        )  # Requires us to input in the ylabels as well...need to figure out how to get this.

        # Run one of the pre-defined pipelines
        if RG_Pipeline_Num == 1:
            clf = make_pipeline(
                Covariances(estimator=estimator), CSP(log=False),
                TangentSpace(),
                LogisticRegression(class_weight='balanced', max_iter=500))
        elif RG_Pipeline_Num == 2:
            clf = make_pipeline(
                XdawnCovariances(estimator=estimator,
                                 xdawn_estimator=estimator), TangentSpace(),
                LogisticRegression(penalty='elasticnet',
                                   class_weight=None,
                                   solver='saga',
                                   multi_class='multinomial',
                                   l1_ratio=0.5,
                                   max_iter=500))
        elif RG_Pipeline_Num == 3:
            clf = make_pipeline(Covariances(estimator=estimator),
                                MDM())  # This is the best so far
        else:
            print(
                "...Running a default pipeline for RG using Covariance, and KNN..."
            )
            clf = make_pipeline(Covariances(estimator=estimator), riem_KNN())

        # Get the labels for the data
        labels = epochs.events[:, -1]
        # Identify the data itself
        X_data = epochs.get_data()
        # Get the class names for the confusion matrix
        class_names = class_names

        # Make empty lists for each item in the stratified CV
        acc_list = []
        preds_list = []
        true_class_list = []
        good_train_indx = []
        good_test_indx = []
        bad_train_indx = []
        bad_test_indx = []

        # For loop testing each iteration of the stratified cross-validation
        for train_idx, test_idx in cv_strat.split(X_data, labels):
            # Get the x_train and x_test data for this fold
            x_train, x_test = X_data[train_idx], X_data[test_idx]
            # Get the y_train and y_test data for this fold
            y_train, y_test = labels[train_idx], labels[test_idx]
            # Fit the classifier
            clf.fit(x_train, y_train)
            # Find the predicted value on the test data in this fold
            preds = clf.predict(x_test)
            # Save in list
            preds_list.append(preds)
            # Save the true class labels in a list for this fold
            true_class_list.append(y_test)
            # Find the accuracy on average from this prediction
            acc_mean = np.average(preds == y_test)
            # Save the accuracy to a list
            acc_list.append(acc_mean)
            # Find out where the 'Good' training folds are. (Greater than threshold)
            if acc_mean >= accuracy_threshold:
                print(
                    "Train indices above accuracy threshold of " +
                    str(accuracy_threshold * 100) + "% are: ", train_idx)
                print(
                    "Test indices above accuracy threshold of " +
                    str(accuracy_threshold * 100) + "% are: ", test_idx)
                good_train_indx.append(train_idx)
                good_test_indx.append(test_idx)
            # Find out where the 'Bad' training folds are. (Less than threshold)
            else:
                bad_train_indx.append(train_idx)
                bad_test_indx.append(test_idx)
            # Make a plot for the confusion matrix
            fig = plt.figure()
            plot_confusion_matrix(y_test, preds, class_names)
        # Print out the final results from across all folds on average
        print(
            "The overall accuracy with " + str(n_strat_folds) +
            "-fold stratified CV was: ", np.average(acc_list))

        # Return output vals
        return dict({
            'Fold Acc': acc_list,
            'Good Train Ind': good_train_indx,
            'Good Test Ind': good_test_indx,
            'Bad Train Ind': bad_train_indx,
            'Bad Test Ind': bad_test_indx,
            'Prediction List': preds_list,
            'True Class List': true_class_list
        })
Ejemplo n.º 11
0
def test_CSP():
    """Test CSP"""
    n_trials = 90
    X = generate_cov(n_trials, 3)
    labels = np.array([0, 1, 2]).repeat(n_trials // 3)

    # Test Init
    csp = CSP()
    assert csp.nfilter == 4
    assert csp.metric == 'euclid'
    assert csp.log
    csp = CSP(3, 'riemann', False)
    assert csp.nfilter == 3
    assert csp.metric == 'riemann'
    assert not csp.log

    with pytest.raises(TypeError):
        CSP('foo')

    with pytest.raises(ValueError):
        CSP(metric='foo')

    with pytest.raises(TypeError):
        CSP(log='foo')

    # Test fit
    csp = CSP()
    csp.fit(X, labels % 2)  # two classes
    csp.fit(X, labels)  # 3 classes

    with pytest.raises(ValueError):
        csp.fit(X, labels * 0.)  # 1 class
    with pytest.raises(ValueError):
        csp.fit(X, labels[:1])  # unequal # of samples
    with pytest.raises(TypeError):
        csp.fit(X, 'foo')  # y must be an array
    with pytest.raises(TypeError):
        csp.fit('foo', labels)  # X must be an array
    with pytest.raises(ValueError):
        csp.fit(X[:, 0], labels)
    with pytest.raises(ValueError):
        csp.fit(X, X)

    assert_array_equal(csp.filters_.shape, [X.shape[1], X.shape[1]])
    assert_array_equal(csp.patterns_.shape, [X.shape[1], X.shape[1]])

    # Test transform
    Xt = csp.transform(X)
    assert_array_equal(Xt.shape, [len(X), X.shape[1]])

    with pytest.raises(TypeError):
        csp.transform('foo')
    with pytest.raises(ValueError):
        csp.transform(X[:, 1:, :])  # unequal # of chans

    csp.log = False
    Xt = csp.transform(X)
Ejemplo n.º 12
0
import logging
import coloredlogs
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger()
coloredlogs.install(level=logging.DEBUG)

datasets = utils.dataset_search('imagery',
                                events=['supination', 'hand_close'],
                                has_all_events=False,
                                min_subjects=2,
                                multi_session=False)

for d in datasets:
    d.subject_list = d.subject_list[:10]

paradigm = ImageryNClass(2)
context = WithinSessionEvaluation(paradigm=paradigm,
                                  datasets=datasets,
                                  random_state=42)

pipelines = OrderedDict()
pipelines['av+TS'] = make_pipeline(Covariances(estimator='oas'),
                                   TSclassifier())
pipelines['av+CSP+LDA'] = make_pipeline(Covariances(estimator='oas'), CSP(8),
                                        LDA())

results = context.process(pipelines, overwrite=True)

analyze(results, './')
Ejemplo n.º 13
0
baseclf = make_pipeline(
    ElectrodeSelection(10, metric=dict(mean='logeuclid', distance='riemann')),
    TangentSpace('riemann'), LogisticRegression('l1'))

array_clfs['Cosp'] = make_pipeline(
    CospCovariances(fs=1000, window=32, overlap=0.95, fmax=300, fmin=1),
    CospBoostingClassifier(baseclf))

array_clfs['HankelCov'] = make_pipeline(
    DownSampler(2), HankelCovariances(delays=[2, 4, 8, 12, 16],
                                      estimator='oas'),
    TangentSpace('logeuclid'), LogisticRegression('l1'))

array_clfs['CSSP'] = make_pipeline(
    HankelCovariances(delays=[2, 4, 8, 12, 16], estimator='oas'), CSP(30),
    LogisticRegression('l1'))

patients = dataframe1.PatientID.values

index = array_clfs.keys() + ['Ensemble']
columns = ['ca', 'de', 'fp', 'ja', 'mv', 'wc', 'zt']
res_acc = pd.DataFrame(index=index, columns=columns)
res_auc = pd.DataFrame(index=index, columns=columns)

fnames = glob('./fhpred/data/*/*.mat')

for fname in fnames:
    data = loadmat(fname)
    p = fname[-18:-16]
    clfs = deepcopy(array_clfs)
Ejemplo n.º 14
0
def pyR_decoding_on_full_epochs(X,
                                y,
                                plot_conf_matrix=0,
                                class_names=None,
                                test_size=0.2,
                                n_splits=5,
                                classifier='ERP_cov'):
    """ This function decodes on the full epoch using the pyRiemannian decoder
    cf https://github.com/Team-BK/Biomag2016/blob/master/Final_Submission.ipynb

    Parameters
    ---------
    X : data extracted from the epochs provided to the decoder
    y : categorical variable (i.e. discrete but it can be more then 2 categories)
    plot_confusion_matrix : set to 1 if you wanna see the confusion matrix
    class_names: needed for the legend if confusion matrices are plotted ['cat1','cat2','cat3']
    test_size : proportion of the data on which you wanna test the decoder
    n_splits : when calculating the score, number of cross-validation folds
    classifier : set it to 'ERP_cov', 'Xdawn_cov' or 'Hankel_cov' depending on the classification you want to do.

    Returns: scores, y_test, y_pred, cnf_matrix or just scores if you don't want the confusion matrix
    -------

    """

    # ------- define the classifier -------
    if classifier == 'ERP_cov':
        spatial_filter = UnsupervisedSpatialFilter(PCA(20), average=False)
        ERP_cov = ERPCovariances(estimator='lwf')
        CSP_30 = CSP(30, log=False)
        tang = TangentSpace('logeuclid')
        clf = make_pipeline(spatial_filter, ERP_cov, CSP_30, tang,
                            LogisticRegression('l2'))

    if classifier == 'Xdawn_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(50), average=False),
            XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'),
            TangentSpace('logeuclid'), LogisticRegression('l2'))

    if classifier == 'Hankel_cov':
        clf = make_pipeline(
            UnsupervisedSpatialFilter(PCA(70), average=False),
            HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'),
            CSP(15, log=False), TangentSpace('logeuclid'),
            LogisticRegression('l2'))

    cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=4343)
    y = np.asarray(y)
    scores = []
    for train_index, test_index in cv.split(X, y):
        print(train_index)
        print(test_index)
        print('we are in the CV loop')
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # Train on X_train, y_train
        clf.fit(X_train, y_train)
        # Predict the category on X_test
        y_pred = clf.predict(X_test)

        scores.append(accuracy_score(y_true=y_test, y_pred=y_pred))
    scores = np.asarray(scores)

    if plot_conf_matrix == 1:

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=7, stratify=y)
        print('train and test have been split')
        y_pred = clf.fit(X_train, y_train).predict(X_test)
        # Compute confusion matrix
        cnf_matrix = confusion_matrix(y_test, y_pred)
        np.set_printoptions(precision=2)
        print(cnf_matrix)

        # Plot non-normalized confusion matrix
        plt.figure()
        plot_confusion_matrix(cnf_matrix,
                              classes=class_names,
                              title='Confusion matrix, without normalization')

        # Plot normalized confusion matrix
        plt.figure()
        plot_confusion_matrix(cnf_matrix,
                              classes=class_names,
                              normalize=True,
                              title='Normalized confusion matrix')

        plt.show()
        return scores, y_test, y_pred, cnf_matrix

    return scores, y_test, y_pred, cnf_matrix
Ejemplo n.º 15
0
def test_CSP():
    """Test CSP"""
    n_trials = 90
    X = generate_cov(n_trials, 3)
    labels = np.array([0, 1, 2]).repeat(n_trials // 3)

    # Test Init
    csp = CSP()
    assert_true(csp.nfilter == 4)
    assert_true(csp.metric == 'euclid')
    assert_true(csp.log)
    csp = CSP(3, 'riemann', False)
    assert_true(csp.nfilter == 3)
    assert_true(csp.metric == 'riemann')
    assert_true(not csp.log)
    assert_raises(TypeError, CSP, 'foo')
    assert_raises(ValueError, CSP, metric='foo')
    assert_raises(TypeError, CSP, log='foo')

    # Test fit
    csp = CSP()
    csp.fit(X, labels % 2)  # two classes
    csp.fit(X, labels)  # 3 classes
    assert_raises(ValueError, csp.fit, X, labels * 0.)  # 1 class
    assert_raises(ValueError, csp.fit, X, labels[:1])  # unequal # of samples
    assert_raises(TypeError, csp.fit, X, 'foo')  # y must be an array
    assert_raises(TypeError, csp.fit, 'foo', labels)  # X must be an array
    assert_raises(ValueError, csp.fit, X[:, 0], labels)
    assert_raises(ValueError, csp.fit, X, X)

    assert_array_equal(csp.filters_.shape, [X.shape[1], X.shape[1]])
    assert_array_equal(csp.patterns_.shape, [X.shape[1], X.shape[1]])

    # Test transform
    Xt = csp.transform(X)
    assert_array_equal(Xt.shape, [len(X), X.shape[1]])
    assert_raises(TypeError, csp.transform, 'foo')
    assert_raises(ValueError, csp.transform, X[:, 1:, :])  # unequal # of chans
    csp.log = False
    Xt = csp.transform(X)
Ejemplo n.º 16
0
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from pyriemann.spatialfilters import CSP
from pyriemann.estimation import Covariances
from sklearn.pipeline import make_pipeline

parameters = {'kernel': ('linear', 'rbf'), 'C': [0.1, 1, 10]}
clf = GridSearchCV(SVC(), parameters, cv=3)
pipe = make_pipeline(Covariances('oas'), CSP(6), clf)

# this is what will be loaded
PIPELINE = {
    'name': 'CSP + optSVM',
    'paradigms': ['LeftRightImagery'],
    'pipeline': pipe
}
                               channels=['C3', 'C4'])
    results = context.process(suffix='C3C4')


def run_analyses(out_dir):
    for suffix in ['', 'C3C4']:
        for ev in [CrossSubjectEvaluation, WithinSessionEvaluation]:
            analyze((ev, LeftRightImagery),
                    out_dir,
                    suffix=suffix,
                    name='{}_{}'.format(ev.__name__, suffix))


if __name__ == '__main__':
    import mne
    # alter mne directories
    mne.utils.set_config('MNE_DATA',
                         '/agbs/bcigroup/Studies/z008_ExternalDatasets/data')
    out_dir = os.path.dirname(os.path.realpath(__file__))

    pipelines = OrderedDict()
    pipelines['TS'] = make_pipeline(Covariances('oas'), TSclassifier())
    pipelines['CSP+LDA'] = make_pipeline(Covariances('oas'), CSP(8), LDA())
    pipelines['CSP+SVM'] = make_pipeline(Covariances('oas'), CSP(8), SVC())  #

    # OnlyC3C4_cross(out_dir, pipelines)
    # OnlyC3C4_within(out_dir, pipelines)
    # # RHvsLH_cross(out_dir, pipelines)
    # RHvsLH_within(out_dir, pipelines)
    run_analyses('/agbs/bcigroup/_Share/Vinay/MOABB')
Ejemplo n.º 18
0
import os.path as osp
import unittest
from collections import OrderedDict

import numpy as np
from pyriemann.estimation import Covariances
from pyriemann.spatialfilters import CSP
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.pipeline import make_pipeline

from moabb.datasets.fake import FakeDataset
from moabb.evaluations import evaluations as ev
from moabb.paradigms.motor_imagery import FakeImageryParadigm

pipelines = OrderedDict()
pipelines["C"] = make_pipeline(Covariances("oas"), CSP(8), LDA())
dataset = FakeDataset(["left_hand", "right_hand"], n_subjects=2)
if not osp.isdir(osp.join(osp.expanduser("~"), "mne_data")):
    os.makedirs(osp.join(osp.expanduser("~"), "mne_data"))


class Test_WithinSess(unittest.TestCase):
    """This is actually integration testing but I don't know how to do this
    better. A paradigm implements pre-processing so it needs files to run MNE
    stuff on. To test the scoring and train/test we need to also have data and
    run it. Putting this on the future docket...

    """
    def setUp(self):
        self.eval = ev.WithinSessionEvaluation(paradigm=FakeImageryParadigm(),
                                               datasets=[dataset])
Ejemplo n.º 19
0
from sklearn.svm import SVC
from pyriemann.estimation import Covariances
from pyriemann.spatialfilters import CSP
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from moabb.pipelines.utils import FilterBank
from sklearn.pipeline import make_pipeline
import numpy as np

parameters = {'C': np.logspace(-2, 2, 10)}
clf = GridSearchCV(SVC(kernel='linear'), parameters)
fb = FilterBank(make_pipeline(Covariances(estimator='oas'), CSP(nfilter=4)))
pipe = make_pipeline(fb, SelectKBest(score_func=mutual_info_classif, k=10),
                     clf)

# this is what will be loaded
PIPELINE = {
    'name': 'FBCSP + optSVM',
    'paradigms': ['FilterBankMotorImagery'],
    'pipeline': pipe
}
Ejemplo n.º 20
0
from moabb.evaluations import evaluations as ev
from moabb.datasets.fake import FakeDataset
from moabb.paradigms.motor_imagery import FakeImageryParadigm
import unittest
import os

from pyriemann.spatialfilters import CSP
from pyriemann.estimation import Covariances
from sklearn.pipeline import make_pipeline

from collections import OrderedDict
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

pipelines = OrderedDict()
pipelines['C'] = make_pipeline(Covariances('oas'), CSP(8), LDA())
dataset = FakeDataset(['left_hand', 'right_hand'], n_subjects=2)


class Test_WithinSess(unittest.TestCase):
    '''This is actually integration testing but I don't know how to do this
    better. A paradigm implements pre-processing so it needs files to run MNE
    stuff on. To test the scoring and train/test we need to also have data and
    run it. Putting this on the future docket...

    '''
    def setUp(self):
        self.eval = ev.WithinSessionEvaluation(paradigm=FakeImageryParadigm(),
                                               datasets=[dataset])

    def tearDown(self):
        path = self.eval.results.filepath
Ejemplo n.º 21
0
from pyriemann.estimation import Covariances
from pyriemann.spatialfilters import CSP
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC


parameters = {"kernel": ("linear", "rbf"), "C": [0.1, 1, 10]}
clf = GridSearchCV(SVC(), parameters, cv=3)
pipe = make_pipeline(Covariances("oas"), CSP(6), clf)

# this is what will be loaded
PIPELINE = {"name": "CSP + optSVM", "paradigms": ["LeftRightImagery"], "pipeline": pipe}