Ejemplo n.º 1
0
def test_confusion_matrix():
    """Test confusion_matrix"""
    target = np.array([0, 1] * 10)
    preds = np.array([0, 1] * 10)
    with pytest.warns(DeprecationWarning,
                      match="plot_confusion_matrix is deprecated"):
        plot_confusion_matrix(target, preds, ["a", "b"])
X_train = X_train.reshape(X_train.shape[0], chans, samples)
X_test = X_test.reshape(X_test.shape[0], chans, samples)

# train a classifier with xDAWN spatial filtering + Riemannian Geometry (RG)
# labels need to be back in single-column format
history = clf.fit(X_train, Y_train.argmax(axis=-1))
preds_rg = clf.predict(X_test)

# Printing the results
acc2 = np.mean(preds_rg == Y_test.argmax(axis=-1))
print("Classification accuracy: %f " % (acc2))

# plot the confusion matrices for both classifiers
names = ['audio left', 'audio right', 'vis left', 'vis right']
plt.figure(0)
plot_confusion_matrix(preds, Y_test.argmax(axis=-1), names, title='EEGNet-8,2')

plt.figure(1)
plot_confusion_matrix(preds_rg,
                      Y_test.argmax(axis=-1),
                      names,
                      title='xDAWN + RG')
fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')

acc_ax.plot(hist.history['accuracy'], 'b', label='train acc')
acc_ax.plot(hist.history['val_accuracy'], 'g', label='val acc')
def test_confusion_matrix():
    """Test confusion_matrix"""
    target = np.array([0, 1] * 10)
    preds = np.array([0, 1] * 10)
    plot_confusion_matrix(target, preds, ['a', 'b'])
Ejemplo n.º 4
0
print('Multiclass classification with XDAWN + MDM')

clf = make_pipeline(XdawnCovariances(n_components), MDM())

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = labels[train_idx], labels[test_idx]

    clf.fit(epochs_data[train_idx], y_train)
    pr[test_idx] = clf.predict(epochs_data[test_idx])

print(classification_report(labels, pr))

###############################################################################
# plot the spatial patterns
xd = XdawnCovariances(n_components)
xd.fit(epochs_data, labels)

evoked.data = xd.Xd_.patterns_.T
evoked.times = np.arange(evoked.data.shape[0])
evoked.plot_topomap(
    times=[0, n_components, 2 * n_components, 3 * n_components],
    ch_type='grad',
    colorbar=False,
    size=1.5)

###############################################################################
# plot the confusion matrix
names = ['audio left', 'audio right', 'vis left', 'vis right']
plot_confusion_matrix(labels, pr, names)
plt.show()
epochs_data = epochs.get_data()

print("Multiclass classification with XDAWN + MDM")

clf = make_pipeline(XdawnCovariances(n_components), MDM())

for train_idx, test_idx in cv:
    y_train, y_test = labels[train_idx], labels[test_idx]

    clf.fit(epochs_data[train_idx], y_train)
    pr[test_idx] = clf.predict(epochs_data[test_idx])

print(classification_report(labels, pr))

###############################################################################
# plot the spatial patterns
xd = XdawnCovariances(n_components)
xd.fit(epochs_data, labels)

evoked.data = xd.Xd.patterns_.T
evoked.times = np.arange(evoked.data.shape[0])
evoked.plot_topomap(
    times=[0, n_components, 2 * n_components, 3 * n_components], ch_type="grad", colorbar=False, size=1.5
)

###############################################################################
# plot the confusion matrix
names = ["audio left", "audio right", "vis left", "vis right"]
plot_confusion_matrix(labels, pr, names)
plt.show()
Ejemplo n.º 6
0
    def __train_predefined_classifier(
            self,
            epochs,
            RG_Pipeline_Num=0,
            estimator='lwf',
            estimate_accuracy=False,
            random_state=44,
            class_names=['Rest', '13 Hz', '17 Hz', '21 Hz']):
        """
        Train a predefined Riemannian Geometery pipeline on a single dataset using
        MNE and pyriemann.
        Parameters
        ----------
        epochs : Epoch Object from MNE
            Epoch data held in an appropriate MNE format. This could be derived from
            mne.Epochs, or using the `build_epochs` command included in this script.
        RG_Pipeline_Num :int, optional
            Which pre-defined Riemannian Geometery pipeline to run for analysis.
            Can be 0,1,2,3:
                Pipeline 0:
                    Covariance w/ estimator -> Riemannian KNN
                Pipeline 1:
                    Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression
                    LogReg uses a 'balanced' option for class weights, l2 penalty.
                Pipeline 2:
                    XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression
                    LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag.
                Pipeline 3:
                    Covariance w/ estimator -> MDM.
                    Minimum distance to mean (MDM) is the main classification scheme.
            The default is 0.
        estimator :  str, optional
            Covariance matrix estimator to use. For regularization consider 'lwf'
            or 'oas'. For complete lists, see pyriemann.utils.covariance.
            The default is 'lwf'.
        estimate_accuracy : bool, optional
            Estimate model accuracy roughly using a simple data-hold out train/test split.
            A default hold out of 75/25% train, test respectively is used.
            The default is False.
        random_state : int, optional
            The value to be used as the 'seed' for `numpy.random.RandomState`.
            See sklearn.model_selection.StratifiedKFold for more details.
            The default is 42.
        class_names : List, optional
            List of names for the confusion matrix plot.
            The default is ['Rest','13 Hz','17 Hz','21 Hz'].
        Returns
        -------
        clf : Classifier object (sklearn)
            Returns a trained classifier object based on the given epoch data and
            Riemannian Geometry pipeline.
        See Also
        --------
        mne.Epochs
        sklearn.model_selection.StratifiedKFold
        sklearn.linear_model.LogisticRegression
        pyriemann.estimation.Covariances
        pyriemann.estimation.XdawnCovariances
        pyriemann.spatialfilters.CSP
        pyriemann.tangentspace.TangentSpace
        pyriemann.classification.MDM
        pyriemann.classification.KNearestNeighbor (riemmanian KNN)
        """

        # Run one of the pre-defined pipelines
        if RG_Pipeline_Num == 1:
            clf = make_pipeline(
                Covariances(estimator=estimator), CSP(log=False),
                TangentSpace(),
                LogisticRegression(class_weight='balanced', max_iter=500))
        elif RG_Pipeline_Num == 2:
            clf = make_pipeline(
                XdawnCovariances(estimator=estimator,
                                 xdawn_estimator=estimator), TangentSpace(),
                LogisticRegression(l1,
                                   class_weight=None,
                                   solver='saga',
                                   multi_class='multinomial',
                                   max_iter=500))
        elif RG_Pipeline_Num == 3:
            clf = make_pipeline(Covariances(estimator=estimator),
                                MDM())  # This is the best so far
        else:
            print(
                "...Running a default pipeline for RG using Covariance, and KNN..."
            )
            clf = make_pipeline(Covariances(estimator=estimator), riem_KNN())

        # Get the labels for the data
        labels = epochs.events[:, -1]
        # Identify the data itself
        X_data = epochs.get_data()
        # Get the class names for the confusion matrix
        class_names = class_names

        # This is NOT a great measure of the model accuracy. This just will give you
        # a rough estimate of how it is performing within its own dataset. This
        # should be used sparingly!
        if estimate_accuracy is True:
            # Do a simple data-hold out for testing
            x_train, x_test, y_train, y_test = train_test_split(
                X_data, labels, test_size=0.25, random_state=random_state)

            clf_estimate = clf

            clf_estimate.fit(x_train, y_train)

            pred_vals = clf_estimate.predict(x_test)

            accuracy_val = np.mean(pred_vals == y_test)

            fig = plt.figure()
            plot_confusion_matrix(y_test, pred_vals, class_names)

        # Fit the data to the given epoch information
        clf.fit(X_data, labels)

        return clf
Ejemplo n.º 7
0
    def __run_strat_validation_RG(
            self,
            epochs,
            n_strat_folds=5,
            shuffle=False,
            random_state=42,
            RG_Pipeline_Num=0,
            estimator='lwf',
            class_names=['Rest', '13 Hz', '17 Hz', '21 Hz'],
            accuracy_threshold=0.7):
        """
        Complete a stratified cross-validation using Riemannian Geometery pipeline.
        Parameters
        ----------
        epochs : Epoch Object from MNE
            Epoch data held in an appropriate MNE format. This could be derived from
            mne.Epochs, or using the `build_epochs` command included in this script.
        n_strat_folds : int, optional
            Number of folds for the stratified K-Fold cross-validation.
            This value should be chosen carefully to avoid unbalanced classes.
            The default is 5.
        shuffle : bool, optional
            Shuffle training set data. See sklearn.model_selection.StratifiedKFold
            for more details.
            The default is False.
        random_state : int, optional
            The value to be used as the 'seed' for `numpy.random.RandomState`.
            See sklearn.model_selection.StratifiedKFold for more details.
            The default is 42.
        RG_Pipeline_Num : int, optional
            Which pre-defined Riemannian Geometery pipeline to run for analysis.
            Can be 0,1,2,3:
                Pipeline 0:
                    Covariance w/ estimator -> Riemannian KNN
                Pipeline 1:
                    Covariance w/ estimator -> CSP -> TangentSpace -> LogisticRegression
                    LogReg uses a 'balanced' option for class weights, l2 penalty.
                Pipeline 2:
                    XDawnCovariance w/ estimator -> TangentSpace -> LogisticRegression
                    LogReg uses elasticnet penalty, solver soga and a multinominal multi_class flag.
                Pipeline 3:
                    Covariance w/ estimator -> MDM.
                    Minimum distance to mean (MDM) is the main classification scheme.
            The default is 0.
        estimator : str, optional
            Covariance matrix estimator to use. For regularization consider 'lwf'
            or 'oas'. For complete lists, see pyriemann.utils.covariance.
            The default is 'lwf'.
        class_names : List, optional
            List of names for the confusion matrix plot.
            The default is ['Rest','13 Hz','17 Hz','21 Hz'].
        accuracy_threshold : float, optional
            Threshold for determining which folds are 'good' fits. Accuracy found
            above the threshold (e.g. 70% or greater) will be reported as good fit
            folds.
            The default is 0.7.
        Returns
        -------
        DICT
            Dictionary of outputs are returned for the user.
            In order:
                Fold accuracy -'Fold Acc'
                Indices for `good` training folds > or = to accuracy_threshold value - 'Good Train Ind'
                Indices for `good` test folds > or = to given accuracy_threshold value -  'Good Test Ind'
                Indices for `bad` train folds < given accuracy_threshold value - 'Bad Train Ind'
                Indices for `bad` test folds < given accuracy_threshold value - 'Bad Test Ind'
                List of predicted classes from the RG Pipeline - 'Prediction List'
                List of true classes from the RG Pipeline - 'True Class List'
        See Also
        --------
        mne.Epochs
        sklearn.model_selection.StratifiedKFold
        sklearn.linear_model.LogisticRegression
        pyriemann.estimation.Covariances
        pyriemann.estimation.XdawnCovariances
        pyriemann.spatialfilters.CSP
        pyriemann.tangentspace.TangentSpace
        pyriemann.classification.MDM
        pyriemann.classification.KNearestNeighbor (riemmanian KNN)
        """

        # Set the stratified CV model
        cv_strat = StratifiedKFold(
            n_splits=n_strat_folds, shuffle=True, random_state=random_state
        )  # Requires us to input in the ylabels as well...need to figure out how to get this.

        # Run one of the pre-defined pipelines
        if RG_Pipeline_Num == 1:
            clf = make_pipeline(
                Covariances(estimator=estimator), CSP(log=False),
                TangentSpace(),
                LogisticRegression(class_weight='balanced', max_iter=500))
        elif RG_Pipeline_Num == 2:
            clf = make_pipeline(
                XdawnCovariances(estimator=estimator,
                                 xdawn_estimator=estimator), TangentSpace(),
                LogisticRegression(penalty='elasticnet',
                                   class_weight=None,
                                   solver='saga',
                                   multi_class='multinomial',
                                   l1_ratio=0.5,
                                   max_iter=500))
        elif RG_Pipeline_Num == 3:
            clf = make_pipeline(Covariances(estimator=estimator),
                                MDM())  # This is the best so far
        else:
            print(
                "...Running a default pipeline for RG using Covariance, and KNN..."
            )
            clf = make_pipeline(Covariances(estimator=estimator), riem_KNN())

        # Get the labels for the data
        labels = epochs.events[:, -1]
        # Identify the data itself
        X_data = epochs.get_data()
        # Get the class names for the confusion matrix
        class_names = class_names

        # Make empty lists for each item in the stratified CV
        acc_list = []
        preds_list = []
        true_class_list = []
        good_train_indx = []
        good_test_indx = []
        bad_train_indx = []
        bad_test_indx = []

        # For loop testing each iteration of the stratified cross-validation
        for train_idx, test_idx in cv_strat.split(X_data, labels):
            # Get the x_train and x_test data for this fold
            x_train, x_test = X_data[train_idx], X_data[test_idx]
            # Get the y_train and y_test data for this fold
            y_train, y_test = labels[train_idx], labels[test_idx]
            # Fit the classifier
            clf.fit(x_train, y_train)
            # Find the predicted value on the test data in this fold
            preds = clf.predict(x_test)
            # Save in list
            preds_list.append(preds)
            # Save the true class labels in a list for this fold
            true_class_list.append(y_test)
            # Find the accuracy on average from this prediction
            acc_mean = np.average(preds == y_test)
            # Save the accuracy to a list
            acc_list.append(acc_mean)
            # Find out where the 'Good' training folds are. (Greater than threshold)
            if acc_mean >= accuracy_threshold:
                print(
                    "Train indices above accuracy threshold of " +
                    str(accuracy_threshold * 100) + "% are: ", train_idx)
                print(
                    "Test indices above accuracy threshold of " +
                    str(accuracy_threshold * 100) + "% are: ", test_idx)
                good_train_indx.append(train_idx)
                good_test_indx.append(test_idx)
            # Find out where the 'Bad' training folds are. (Less than threshold)
            else:
                bad_train_indx.append(train_idx)
                bad_test_indx.append(test_idx)
            # Make a plot for the confusion matrix
            fig = plt.figure()
            plot_confusion_matrix(y_test, preds, class_names)
        # Print out the final results from across all folds on average
        print(
            "The overall accuracy with " + str(n_strat_folds) +
            "-fold stratified CV was: ", np.average(acc_list))

        # Return output vals
        return dict({
            'Fold Acc': acc_list,
            'Good Train Ind': good_train_indx,
            'Good Test Ind': good_test_indx,
            'Bad Train Ind': bad_train_indx,
            'Bad Test Ind': bad_test_indx,
            'Prediction List': preds_list,
            'True Class List': true_class_list
        })
Ejemplo n.º 8
0
#------------------------------------------------------------------------------
##Another method to plot confusion matrix

#cnf = confusion_matrix(labels,preds)
#plt.figure()
#plot_confusion_matrix_melv(cnf,classes=names,title='confusion_matrix_without_normalization')
#
#plt.figure()
#plot_confusion_matrix_melv(cnf, classes=names, normalize=True,
#                      title='Normalized confusion matrix')

#------------------------------------------------------------------------------

names = ['zero', 'one']
plot_confusion_matrix(preds, labels, names, title='Logistic Regression confusion matrix')

print('Classification report: ')
print(classification_report(labels, preds, target_names=names))

for name in ('patterns_', 'filters_'):
    # The `inverse_transform` parameter will call this method on any estimator
    # contained in the pipeline, in reverse order.
    coef = get_coef(clf, name)
    evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin)
    evoked.plot_topomap(title='EEG %s' % name[:-1], time_unit='s')

#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
Ejemplo n.º 9
0
clf = make_pipeline(Covariances(), TangentSpace(metric='riemann'),
                    LogisticRegression())

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = labels[train_idx], labels[test_idx]
    clf.fit(epochs_data[:28], y_train[:min(28, len(y_train))])
    preds[test_idx] = clf.predict(epochs_data[test_idx])

# clf.fit(epochs.get_data(), labels)
# clf.predict(events)

# Printing the results
acc = np.mean(preds == labels)
print("Classification accuracy: %f " % (acc))
names = ['resting-state', '13 Hz', '21 Hz', '17 Hz']
plot_confusion_matrix(preds, labels, names)
plt.show()

#from sklearn.linear_model import LogisticRegression
#var = epochs.get_data()
# cov_raw = scikitlearn.pipeline(Covariances(estimator='lwf').transform(var), TangentSpace().transform(epochs.get_data(var), logisticRegression.fit(x,y,weight))

# Tangent

print("stop")
# load your data
# X = ... # your EEG data, in format Ntrials x Nchannels X Nsamples
# y = ... # the labels

# # estimate covariances matrices
# cov = pyriemann.estimation.Covariances().fit_transform(X)
Ejemplo n.º 10
0
def test_confusion_matrix():
    """Test confusion_matrix"""
    target = np.array([0, 1] * 10)
    preds = np.array([0, 1] * 10)
    plot_confusion_matrix(target, preds, ['a', 'b'])
###############################################################################
# Decoding in tangent space with a logistic regression

n_components = 2  # pick some components

# Define a monte-carlo cross-validation generator (reduce variance):
cv = KFold(len(labels), 10, shuffle=True, random_state=42)
epochs_data = epochs.get_data()


clf = make_pipeline(XdawnCovariances(n_components),
                    TangentSpace(metric='riemann'),
                    LogisticRegression())

preds = np.zeros(len(labels))

for train_idx, test_idx in cv:
    y_train, y_test = labels[train_idx], labels[test_idx]

    clf.fit(epochs_data[train_idx], y_train)
    preds[test_idx] = clf.predict(epochs_data[test_idx])

# Printing the results
acc = np.mean(preds == labels)
print("Classification accuracy: %f " % (acc))

names = ['audio left', 'audio right', 'vis left', 'vis right']
plot_confusion_matrix(preds, labels, names)
plt.show()