Exemple #1
0
def test_TangentSpace_inversetransform_without_fit():
    """Test inverse transform of Tangent Space without fit."""
    Nt = 10
    Ne = 3 * 4 / 2
    tsv = np.random.randn(Nt, Ne)
    ts = TangentSpace(metric='riemann')
    ts.inverse_transform(tsv)
Exemple #2
0
def _train_raw(df):
    """Train a classifier on raw EEG data"""
    X, y = transform.signal_ndarray(df)
    # print(X, y)

    # Fixes non-convergence for binary classification
    dual = set(y) == 2

    clfs: Dict[str, Pipeline] = {
        # These four are from https://neurotechx.github.io/eeg-notebooks/auto_examples/visual_ssvep/02r__ssvep_decoding.html
        "CSP + Cov + TS":
        make_pipeline(
            Covariances(),
            CSP(4, log=False),
            TangentSpace(),
            LogisticRegression(dual=dual),
        ),
        "Cov + TS":
        make_pipeline(Covariances(), TangentSpace(),
                      LogisticRegression(dual=dual)),
        # Performs meh
        # "CSP + RegLDA": make_pipeline(
        #     Covariances(), CSP(4), LDA(shrinkage="auto", solver="eigen")
        # ),
        # Performs badly
        # "Cov + MDM": make_pipeline(Covariances(), MDM()),
    }

    for name, clf in clfs.items():
        logger.info(f"===== Training with {name} =====")
        _train(X, y, clf)
def test_TangentSpace_inversetransform_without_fit():
    """Test inverse transform of Tangent Space without fit."""
    Nt = 10
    Ne = 3 * 4 / 2
    tsv = np.random.randn(Nt, Ne)
    ts = TangentSpace(metric='riemann')
    ts.inverse_transform(tsv)
def test_TangentSpace_inversetransform():
    """Test inverse transform of Tangent Space"""
    covset = generate_cov(10,3)
    ts = TangentSpace(metric='riemann')
    ts.fit(covset)
    t = ts.transform(covset)
    cov = ts.inverse_transform(t)
    assert_array_almost_equal(covset,cov)
Exemple #5
0
def N170_test(session_data):
    markers = N170_MARKERS
    epochs = get_session_erp_epochs(session_data, markers)
    conditions = OrderedDict()
    for i in range(len(markers)):
        conditions[markers[i]] = [i+1]
   
    clfs = OrderedDict()
    
    clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression())
    clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen'))
    clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
    clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM())
    clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression())
    clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM())
    methods_list = ['Vect + LR','Vect + RegLDA','ERPCov + TS','ERPCov + MDM','XdawnCov + TS','XdawnCov + MDM']
    # format data
    epochs.pick_types(eeg=True)
    X = epochs.get_data() * 1e6
    times = epochs.times
    y = epochs.events[:, -1]

    # define cross validation 
    cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, 
                                random_state=42)

    # run cross validation for each pipeline
    auc = []
    methods = []
    print('Calcul in progress...')
    for m in clfs:
        try:

            res = cross_val_score(clfs[m], X, y==2, scoring='roc_auc', 
                                  cv=cv, n_jobs=-1)
            auc.extend(res)
            methods.extend([m]*len(res))
        except Exception:
            print("exception")
        
    ## Plot Decoding Results

    results = pd.DataFrame(data=auc, columns=['AUC'])
    results['Method'] = methods
    n_row,n_column = results.shape
    auc_means = []
    for method in methods_list:
        auc = []
        for i in range(n_row):
            if results.loc[i,'Method']== method:
                auc.append(results.loc[i,'AUC'])
        auc_means.append(np.mean(auc))
    counter = 0
    for i in range(len(methods_list)):
        color = 'green' if auc_means[i]>=0.7 else 'red'
        counter = counter +1 if auc_means[i]>=0.7 else counter
        
    return counter > 0, counter
def test_TangentSpace_init(fit, tsupdate, metric, get_covmats):
    n_trials, n_channels = 4, 3
    n_ts = (n_channels * (n_channels + 1)) // 2
    covmats = get_covmats(n_trials, n_channels)
    ts = TangentSpace(metric=metric, tsupdate=tsupdate)
    if fit:
        ts.fit(covmats)
    Xtr = ts.transform(covmats)
    assert Xtr.shape == (n_trials, n_ts)
 def transform(self, X):
     """
     Detect and remove dropped.
     """
     features = []
     for x in X:
         ts = TangentSpace(metric=self.metric)
         tmp = ts.fit_transform(x.transpose(2, 0, 1))
         features.append(tmp.ravel())
     features = np.array(features)
     return features
Exemple #8
0
 def transform(self, X):
     """
     Detect and remove dropped.
     """
     features = []
     for x in X:
         ts = TangentSpace(metric=self.metric)
         tmp = ts.fit_transform(x.transpose(2, 0, 1))
         features.append(tmp.ravel())
     features = np.array(features)
     return features
Exemple #9
0
    def fit_representation(self):
        print(np.array(self.data).shape)
        for k in range(len(self.data)):
            subject_data = np.array(self.data[k])
            print(subject_data.shape)
            subject_labels = self.labels[k]
            model_xDawn_enCours = pyriemann.estimation.XdawnCovariances(
                4, xdawn_estimator='lwf')

            subject_data = model_xDawn_enCours.fit_transform(
                subject_data, subject_labels)
            self.model_xDawn.append(model_xDawn_enCours)
            model_tangentSpace_enCours = TangentSpace(metric='riemann')
            model_tangentSpace_enCours.fit(subject_data, subject_labels)
            self.model_tangentSpace.append(model_tangentSpace_enCours)
Exemple #10
0
def proj_covs_ts(covs):
    n_sub, n_fb, p, _ = covs.shape
    covs_ts = np.zeros((n_sub, n_fb, (p * (p + 1)) // 2))
    for fb in range(n_fb):
        covs_ts[:, fb, :] = TangentSpace(metric="wasserstein").fit(
            covs[:, fb, :, :]).transform(covs[:, fb, :, :])
    return covs_ts
def project_tangent_space(subjects,
                          rank=65,
                          picks="all",
                          mode="common",
                          reg=1e-6):
    if mode == "common":
        X, y = project_common_space(subjects, rank, picks)
    elif mode == 'own':
        X, y = project_common_space(subjects, rank, picks)
    elif mode == 'spoc':
        X, y = spoc(subjects, rank, picks)
    elif mode == "csf":
        X, y = get_covs_and_ages(subjects, picks=picks)
        X = pcs(X, rank, common_f=True)
    elif mode == "cs":
        X, y = get_covs_and_ages(subjects, picks=picks)
        X = pcs(X, rank, common_f=False)
    else:
        X, y = get_covs_and_ages(subjects, picks=picks)
    print("projecting in the tangent space")
    n_subj, n_freqs, p, _ = X.shape
    if reg:
        for i in range(n_subj):
            for f in range(n_freqs):
                X[i, f] += reg * np.eye(p)
    ts = np.zeros((n_subj, n_freqs, int(p * (p + 1) / 2)))
    n_s_train = 100
    for f in range(n_freqs):
        sl = np.random.permutation(np.arange(640))[:n_s_train]
        ts[:, f, :] = TangentSpace().fit(X[sl, f, :, :]).transform(X[:,
                                                                     f, :, :])
    return ts, y
Exemple #12
0
def ml_classifier(inputs, targets, classifier=None, pipeline=None):
    """Uses sklearn to fit a model given inputs and targets
    Args:
        inputs: list containing (N trials * M channels) data segments of length(number of features).
        targets: list containing (N trials * M channels) of marker data (0 or 1).
        classifier: pre-trained lda classifier; if None train from scratch
        pipeline: name of pipeline to create if classifier is None
    Returns:
        classifier: classifier object
    """
    pipeline_dict = {
        'vect_lr':
        make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()),
        'vecct_reglda':
        make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')),
        'xdawn_reglda':
        make_pipeline(Xdawn(2, classes=[1]), Vectorizer(),
                      LDA(shrinkage='auto', solver='eigen')),
        'erpcov_ts':
        make_pipeline(ERPCovariances(), TangentSpace(), LogisticRegression()),
        'erpcov_mdm':
        make_pipeline(ERPCovariances(), MDM())
    }
    if not classifier and pipeline:
        classifier = pipeline_dict[pipeline.lower()]
    classifier.fit(inputs, targets)
    return classifier
def svm_tangent_space_cross_validate(data):
    """A cross validated tangent space classifier with svm.

    Parameters
    ----------
    data : dict
        A dictionary containing training and testing data

    Returns
    -------
    cross validated scores
        A list of cross validated scores.

    """

    # Combine the dataset
    x = np.concatenate((data['train_x'], data['test_x']), axis=0)
    y = np.concatenate((data['train_y'], data['test_y']), axis=0)

    # Construct sklearn pipeline
    clf = Pipeline([('cov_transform', Covariances(estimator='lwf')),
                    ('tangent_space', TangentSpace(metric='riemann')),
                    ('svm_classify', SVC(kernel='rbf', gamma='auto'))])
    # cross validation
    scores = cross_val_score(clf, x, y, cv=KFold(5, shuffle=True))
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    print('\n')

    return scores
def xdawn_embedding(data, use_xdawn):
    """Perform embedding of EEG data in 2D Euclidean space
    with Laplacian Eigenmaps.

    Parameters
    ----------
    data : dict
        A dictionary containing training and testing data

    Returns
    -------
    array
        Embedded

    """

    if use_xdawn:
        nfilter = 3
        xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter)
        covs = xdwn.fit(data['train_x'],
                        data['train_y']).transform(data['test_x'])

        lapl = Embedding(metric='riemann', n_components=3)
        embd = lapl.fit_transform(covs)
    else:
        tangent_space = Pipeline([
            ('cov_transform', Covariances(estimator='lwf')),
            ('tangent_space', TangentSpace(metric='riemann'))
        ])
        t_space = tangent_space.fit(data['train_x'],
                                    data['train_y']).transform(data['test_x'])
        reducer = umap.UMAP(n_neighbors=30, min_dist=1, spread=2)
        embd = reducer.fit_transform(t_space)

    return embd
def test_TangentSpace_transform():
    """Test transform of Tangent Space."""
    covset = generate_cov(10, 3)
    ts = TangentSpace(metric='riemann')
    ts.fit(covset)
    ts.transform(covset)

    X = np.zeros(shape=(10, 9))
    assert_raises(ValueError, ts.transform, X)

    X = np.zeros(shape=(10, 9, 8))
    assert_raises(ValueError, ts.transform, X)

    X = np.zeros(shape=(10))
    assert_raises(ValueError, ts.transform, X)

    X = np.zeros(shape=(12, 8, 8))
    assert_raises(ValueError, ts.transform, X)
def erpcov_ts_lr():
    """Obtains Riemannian features and classifies them with logregression"""
    return make_pipeline(
        ERPCovariances(estimator="oas"),
        TangentSpace(),
        LogisticRegression(solver="liblinear",
                           C=1.0,
                           class_weight="balanced",
                           penalty="l1"),
    )
def tangent_space_classifier(features, labels, classifier):
    """A tangent space classifier with svm for 3 classes.

    Parameters
    ----------
    features : array
        A array of features
    labels : array
        True labels
    classifier : string
        option : Support Vector Machines (svc) or Random Forest (rf)
    Returns
    -------
    sklearn classifier
        Learnt classifier.

    """
    # Construct sklearn pipeline

    if classifier == 'svc':
        clf = Pipeline([('covariance_transform', Covariances(estimator='scm')),
                        ('tangent_space', TangentSpace(metric='riemann')),
                        ('classifier',
                         SVC(kernel='rbf',
                             gamma='auto',
                             decision_function_shape='ovr'))])
    elif classifier == 'rf':
        clf = Pipeline([('covariance_transform', Covariances(estimator='scm')),
                        ('tangent_space', TangentSpace(metric='riemann')),
                        ('classifier',
                         RandomForestClassifier(n_estimators=100,
                                                oob_score=True))])
    else:
        print("Please select the appropriate classifier ")
        return

    # cross validation
    clf.fit(features, labels)

    return clf
def subject_independent_cov_data(config):
    """Get subject independent covariance data (pooled data).

    Parameters
    ----------
    config : yaml
        The configuration file

    Returns
    -------
    features, labels, leave_leave_tags
        2 arrays features and labels.
        A tag determines whether the data point is used in training.

    """

    path = str(Path(__file__).parents[2] / config['clean_emg_data'])
    data = dd.io.load(path)

    # Parameters
    subjects = config['subjects']

    # Empty array (list)
    x = []
    y = []
    leave_tags = np.empty((0, 1))

    for subject in subjects:
        cov_temp = Covariances().transform(data['subject_' +
                                                subject]['features'])
        x_temp = TangentSpace(metric='riemann').transform(cov_temp)
        y_temp = data['subject_' + subject]['labels']
        x.append(x_temp)
        y.append(y_temp)
        leave_tags = np.concatenate((leave_tags, y_temp[:, 0:1] * 0 + 1),
                                    axis=0)

    # Convert to array
    x = np.concatenate(x, axis=0)
    y = np.concatenate(y, axis=0)

    # Balance the dataset
    rus = RandomUnderSampler()
    rus.fit_resample(y, y)

    # Store them in dictionary
    features = x[rus.sample_indices_, :]
    labels = y[rus.sample_indices_, :]
    leave_tags = leave_tags[rus.sample_indices_, :]

    return features, labels, leave_tags
def test_TangentSpace_inversetransform():
    """Test inverse transform of Tangent Space."""
    covset = generate_cov(10, 3)
    ts = TangentSpace(metric='riemann')
    ts.fit(covset)
    t = ts.transform(covset)
    cov = ts.inverse_transform(t)
    assert_array_almost_equal(covset, cov)
def test_TangentSpace_inversetransform_without_fit():
    """Test inverse transform of Tangent Space without fit."""
    covset = generate_cov(10, 3)
    ts = TangentSpace(metric='identity')
    tsv = ts.fit_transform(covset)
    ts = TangentSpace(metric='riemann')
    cov = ts.inverse_transform(tsv)
    assert_array_almost_equal(covset, cov)
def test_TS_matdim_error(get_covmats):
    n_trials, n_channels = 4, 3
    ts = TangentSpace()
    with pytest.raises(ValueError):
        not_square_mat = np.empty((n_trials, n_channels, n_channels + 1))
        ts.transform(not_square_mat)
    with pytest.raises(ValueError):
        too_many_dim = np.empty((1, 2, 3, 4))
        ts.transform(too_many_dim)
Exemple #22
0
class Riemann(BaseEstimator, TransformerMixin):
    def __init__(self, metric='wasserstein'):
        self.metric = metric

    def fit(self, X, y=None):
        X = np.array(list(np.squeeze(X)))
        self.ts = TangentSpace(metric=self.metric).fit(X)
        return self

    def transform(self, X):
        X = np.array(list(np.squeeze(X)))
        n_sub, p, _ = X.shape
        Xout = np.empty((n_sub, p * (p + 1) // 2))
        Xout = self.ts.transform(X)
        return pd.DataFrame({'cov': list(Xout.reshape(n_sub, -1))})
class Riemann(BaseEstimator, TransformerMixin):
    def __init__(self, metric='wasserstein', return_data_frame=True):
        self.metric = metric
        self.return_data_frame = return_data_frame

    def fit(self, X, y=None):
        X = _check_data(X)
        self.ts = TangentSpace(metric=self.metric).fit(X)
        return self

    def transform(self, X):
        X = _check_data(X)
        X_out = self.ts.transform(X)
        if self.return_data_frame:
            X_out = pd.DataFrame(X_out)
        return X_out  # (sub, c*(c+1)/2)
def forest_tangent_space_cross_validate(data, cv=False):
    """A cross validated tangent space classifier with svm.

    Parameters
    ----------
    data : dict
        A dictionary containing training and testing data

    Returns
    -------
    cross validated scores
        A list of cross validated scores.

    """

    # Construct sklearn pipeline
    clf = Pipeline([('cov_transform', Covariances('lwf')),
                    ('tangent_space', TangentSpace(metric='riemann')),
                    ('random_forest_classify',
                     RandomForestClassifier(n_estimators=20,
                                            max_depth=10,
                                            random_state=43))])
    if cv:
        # Combine the dataset
        x = np.concatenate((data['train_x'], data['test_x']), axis=0)
        y = np.concatenate((data['train_y'], data['test_y']), axis=0)

        # cross validation
        scores = cross_val_score(clf, x, y, cv=KFold(5, shuffle=True))
        print("Accuracy: %0.4f (+/- %0.4f)" %
              (scores.mean(), scores.std() * 2))
        print('\n')
    else:
        clf = RandomForestClassifier(n_estimators=20,
                                     max_depth=10,
                                     random_state=43)
        plt.style.use('clean')
        y_train = np.argmax(data['train_y'], axis=1) + 1
        y_test = np.argmax(data['test_y'], axis=1) + 1
        classifier = clf.fit(data['train_x'], y_train)
        plot_confusion_matrix(classifier,
                              data['test_x'],
                              y_test,
                              normalize='true',
                              cmap=plt.cm.Blues)
    return None
def svm_tangent_space_classifier(features, labels):
    """A tangent space classifier with svm for 3 classes.

    Parameters
    ----------
    features : array
        A array of features
    labels : array
        True labels

    Returns
    -------
    sklearn classifier
        Learnt classifier.

    """
    # Construct sklearn pipeline
    clf = Pipeline([('cov_transform', Covariances('oas')),
                    ('tangent_space', TangentSpace(metric='riemann')),
                    ('svm_classify', SVC(kernel='rbf', gamma='auto'))])
    # cross validation
    clf.fit(features, labels)

    return clf
def classify_tangentSpace_features(clf, emg_data, flag):
    """extract the tangent space features from the epochs and 
    obtain the maximum log-likelihood
    
    Parameters
    ----------
    emg_data : numpy array
        epoched emg data with size epochs x channels x samples
    clf : trained sklearn classifier
        A sklearn classifier model such as SVM or RF previously trained on the user provided data
    flag : string
        Predict maximum log-likelihood if flag=log_proba otherwise just the predicted label
    Returns
    -------
    rms_array : numpy array
        average rms values calculated for each epoch across all the channels
    """
    cov = Covariances().fit_transform(emg_data)
    ts  = TangentSpace().fit_transform(cov)

    if flag == 'log_proba':
        return np.amax(clf.predict_log_proba(ts), axis=1)
    else:
        return clf.predict(ts)
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.cross_validation import KFold
from sklearn.metrics import roc_auc_score

from utils import (DownSampler, EpochsVectorizer, CospBoostingClassifier,
                   epoch_data)

dataframe1 = pd.read_csv('ecog_train_with_labels.csv')

array_clfs = OrderedDict()

# ERPs models
array_clfs['XdawnCov'] = make_pipeline(XdawnCovariances(6, estimator='oas'),
                                       TangentSpace('riemann'),
                                       LogisticRegression('l2'))

array_clfs['Xdawn'] = make_pipeline(Xdawn(12, estimator='oas'), DownSampler(5),
                                    EpochsVectorizer(),
                                    LogisticRegression('l2'))

# Induced activity models

baseclf = make_pipeline(
    ElectrodeSelection(10, metric=dict(mean='logeuclid', distance='riemann')),
    TangentSpace('riemann'), LogisticRegression('l1'))

array_clfs['Cosp'] = make_pipeline(
    CospCovariances(fs=1000, window=32, overlap=0.95, fmax=300, fmin=1),
    CospBoostingClassifier(baseclf))
 def __init__(self, n_fb=9, metric='wasserstein'):
     self.n_fb = n_fb
     self.ts = [TangentSpace(metric=metric) for fb in range(n_fb)]
Exemple #29
0
def test_TangentSpace_transform_with_ts_update():
    """Test transform of Tangent Space with TSupdate"""
    covset = generate_cov(10,3)
    ts = TangentSpace(metric='riemann',tsupdate=True)
    ts.fit(covset)
    ts.transform(covset)
Exemple #30
0
def test_TangentSpace_transform():
    """Test transform of Tangent Space"""
    covset = generate_cov(10,3)
    ts = TangentSpace(metric='riemann')
    ts.fit(covset)
    ts.transform(covset)
Exemple #31
0
def test_TangentSpace_fit():
    """Test Fit of Tangent Space"""
    covset = generate_cov(10,3)
    ts = TangentSpace(metric='riemann')
    ts.fit(covset)
def test_TangentSpace_transform_without_fit():
    """Test transform of Tangent Space without fit."""
    covset = generate_cov(10, 3)
    ts = TangentSpace(metric='riemann')
    ts.transform(covset)
# Create pipelines
# ----------------
#
# Pipelines must be a dict of sklearn pipeline transformer.
#
# The csp implementation from MNE is used. We selected 8 CSP components, as
# usually done in the litterature.
#
# The riemannian geometry pipeline consists in covariance estimation, tangent
# space mapping and finaly a logistic regression for the classification.

pipelines = {}

pipelines['CSP + LDA'] = make_pipeline(CSP(n_components=8), LDA())

pipelines['RG + LR'] = make_pipeline(Covariances(), TangentSpace(),
                                     LogisticRegression())

##############################################################################
# Evaluation
# ----------
#
# We define the paradigm (LeftRightImagery) and the dataset (BNCI2014001).
# The evaluation will return a dataframe containing a single AUC score for
# each subject / session of the dataset, and for each pipeline.
#
# Results are saved into the database, so that if you add a new pipeline, it
# will not run again the evaluation unless a parameter has changed. Results can
# be overwrited if necessary.

paradigm = LeftRightImagery()
Exemple #34
0
def train_classifiers(data_files, valid_runs_dict_uiuc, valid_runs_dict_whasc):
    '''
    Produces test data and tests whether projecting matrices into the tangent space finds the correct discriminative connection.
    
        Parameters:
            data_files (list of pairs (filename,data)): the input data
            valid_runs_dict_uiuc (dictionary): dictionary containing valid runs for each patient
            valid_runs_dict_whasc (dictionary): dictionary containing valid runs for each patient
            
        Returns:
            accDict (dictionary): mean accuracy on each file's data
            simDict (dictionary): mean cosine similarity of classifier coefficients for each file
            matDict (dictionary): mean confusion matrix for each file
            corrDict (dictionary): before and after projection correlations
            spearDict (dictionary): before and after projection spearman correlations
    '''
    accDict = {}
    simDict = {}
    matDict = {}
    corrDict = {}
    spearDict = {}

    simArr = []
    for fname, data in data_files:
        # get time series data to make covariance matrices
        X = np.array([sample['TimeSeries'] for sample in data['samples']
                      ])  # if data_selector(sample)])
        y = np.array([
            get_label_8(sample['Group'], sample['Location'])
            for sample in data['samples']
        ])  # if data_selector(sample)])

        # gsr seems to produce a rank deficient covariance matrix, so oas regularization is necessary
        covest = Covariances()
        ts = TangentSpace()
        #sym = to_symm_mat(0,33)
        #diag = to_upper_tri(1)
        svc = SVC(kernel='linear')
        clf_riem = make_pipeline(covest, ts, svc)

        rf = RandomForestClassifier(200)
        clf_rf = make_pipeline(covest, ts, rf)

        covest2 = Correlations()

        svc2 = SVC(kernel='linear')
        get_tri_inds = to_upper_tri(0)
        clf_cov = make_pipeline(covest2, get_tri_inds, svc2)

        #Check clustering
        #to_TS = make_pipeline(covest,ts)
        #X_in_TS = to_TS.transform(X)
        #kmeans = KMeans(n_clusters=4,random_state=0).fit(X_in_TS)

        # Monte Carlo, in theory should run this len(y)^2 times, but I need to save my poor computer's memory.
        accRiemList = []
        accCovList = []
        accRfList = []
        coeffArr = []
        matRiemList = []
        corrArrBefore = []
        corrArrAfter = []
        spearArrBefore = []
        spearArrAfter = []

        rs = StratifiedShuffleSplit(n_splits=100, test_size=.3)
        for i, (train_inds, test_inds) in enumerate(rs.split(X, y)):

            X_train, X_test, y_train, y_test = X[train_inds], X[test_inds], y[
                train_inds], y[test_inds]
            X_train_cov, X_test_cov, y_train_cov, y_test_cov = X_train.copy(
            ), X_test.copy(), y_train.copy(), y_test.copy()

            clf_riem.fit(X_train, y_train)
            clf_rf.fit(X_train, y_train)
            clf_cov.fit(X_train_cov, y_train_cov)

            #get riemann svm coefficients
            coeffArr.append(clf_riem[2].coef_)

            #compare correlation
            corr_coeffs_before = np.corrcoef(np.vstack(
                [x[np.triu_indices(33)].flatten() for x in X_train]),
                                             rowvar=False)
            corrArrBefore.append(np.linalg.norm(corr_coeffs_before))
            #spearman correlation
            spearman_coeffs_before, _ = scipy.stats.spearmanr(np.vstack(
                [x[np.triu_indices(33)].flatten() for x in X_train]),
                                                              axis=0)
            spearArrBefore.append(np.linalg.norm(spearman_coeffs_before))

            ref = ts.reference_
            covs = covest.transform(X_train)
            mapped = ts.transform(covs)
            corr_coeffs_after = np.corrcoef(mapped, rowvar=False)
            spearman_coeffs_after = scipy.stats.spearmanr(mapped, axis=0)
            corrArrAfter.append(np.linalg.norm(corr_coeffs_after))
            spearArrAfter.append(np.linalg.norm(spearman_coeffs_after))

            y_pred = clf_riem.predict(X_test)
            y_pred_cov = clf_cov.predict(X_test_cov)
            y_pred_rf = clf_rf.predict(X_test)

            # save accuracy
            accRiemList.append(accuracy_score(y_pred, y_test))
            accCovList.append(accuracy_score(y_pred_cov, y_test_cov))
            accRfList.append(accuracy_score(y_pred_rf, y_test))

            # confusion matrix
            mat = confusion_matrix(y_test,
                                   y_pred,
                                   normalize='true',
                                   labels=[0, 1, 2, 3, 4, 5, 6, 7])
            matRiemList.append(mat)

        for z in range(0, len(coeffArr[0])):
            class_z_coeffs = [x[z] for x in coeffArr]
            cos_sim = cosine_similarity(class_z_coeffs)
            upperTri = cos_sim[np.triu_indices(cos_sim.shape[0], 1)]
            cos_avg = np.mean(upperTri.flatten())
            simArr.append(cos_avg)

        avgMatRiem = sum(matRiemList) / len(matRiemList)
        simDict.update({fname: simArr})
        matDict.update({fname: avgMatRiem})
        riemAcc = np.mean(accRiemList)
        covAcc = np.mean(accCovList)
        rfAcc = np.mean(accRfList)

        accDict.update(
            {'raw_data': {
                'riem': riemAcc,
                'rf': rfAcc,
                'cov': covAcc
            }})
        corrDict.update({
            'raw_data': {
                'before': np.mean(corrArrBefore),
                'after': np.mean(corrArrAfter)
            }
        })
        spearDict.update({
            'raw_data': {
                'before': np.mean(spearArrBefore),
                'after': np.mean(spearArrAfter)
            }
        })
        print("Mean Accuracy w/ Riemann on data " + fname + ": " +
              str(riemAcc))
        print("Mean Accuracy w/ Cov on data " + fname + ": " + str(covAcc))
        print("Mean Accuracy w/ RF on data " + fname + ": " + str(rfAcc))
        print("----------------")

    return accDict, corrDict, spearDict, matDict, simDict
Exemple #35
0
def permutation_bootstrap(samples, labels, n_states, rois, p=5):
    '''
    Perform permutation bootstrap to find discriminative connections.
    
        Parameters:
            samples (ndarray shape (n_samples, n_channels, n_vars)): The input dataset
            labels (ndarray shape (n_samples)): The input labels
            n_states (int): Number of distinct classes
            p (int): Percentile of significance (default 5)
            
        Returns:
            discrim_conn_max (list of tuples): Significant functional positive connections
            discrim_conn_min (list of tuples): Significant functional negative connections
    '''
    # First, make null model
    # Use integer labeling so we can be sure that the one vs one classifiers are
    # in the correct orders
    X = samples
    y = labels

    # Randomly permute labels (only labels, not training input)
    NUM_BOOTSTRAP = 10
    covest = Covariances()
    ts = TangentSpace()
    sym = to_symm_mat(0, X.shape[1])
    diag = to_upper_tri(1)
    svc = SVC(kernel='linear')
    clf_riem = make_pipeline(covest, ts, sym, diag, svc)
    maxcoeffs = []
    mincoeffs = []
    nullcoeffs = []
    nullcos = []
    num_pairs = int(scipy.special.binom(n_states, 2))
    for i in range(0, 100):
        y_permuted = np.random.permutation(y)
        coeffArr = []
        rs = ShuffleSplit(n_splits=NUM_BOOTSTRAP, test_size=.3)
        for train, test in rs.split(X):
            X_train, X_test, y_train, y_test = X[train], X[test], y_permuted[
                train], y_permuted[test]
            clf_riem.fit(X_train, y_train)
            coeffArr.append(
                clf_riem[4].coef_ /
                np.std(clf_riem[4].coef_, axis=-1).reshape(num_pairs, 1))

        meancoeff = sum(coeffArr) / len(coeffArr)
        classcos = []
        for z in range(0, len(coeffArr[0])):
            class_z_coeffs = [x[z] for x in coeffArr]
            cos_sim = cosine_similarity(class_z_coeffs)
            upperTri = cos_sim[np.triu_indices(cos_sim.shape[0], 1)]
            cos_max = np.max(upperTri.flatten())
            classcos.append(cos_max)

        nullcos.append(classcos)
        nullcoeffs.append(meancoeff)
        maxcoeff = np.max(meancoeff, axis=-1)
        mincoeff = np.min(meancoeff, axis=-1)
        maxcoeffs.append(maxcoeff)
        mincoeffs.append(mincoeff)

    coeffArr = []
    rs = ShuffleSplit(n_splits=NUM_BOOTSTRAP, test_size=.3)
    for train, test in rs.split(X):
        X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test]
        clf_riem.fit(X_train, y_train)
        coeffArr.append(
            clf_riem[4].coef_ /
            np.std(clf_riem[4].coef_, axis=-1).reshape(num_pairs, 1))
    meancoeff = sum(coeffArr) / len(coeffArr)

    sig_pairs_max, boolarr_max = get_sig_pairs(meancoeff, maxcoeffs, num_pairs,
                                               p, rois, "max")
    sig_pairs_min, boolarr_min = get_sig_pairs(meancoeff, mincoeffs, num_pairs,
                                               p, rois, "min")

    combs = list(itertools.combinations(range(0, n_states), 2))
    #discrim_conn_max = [[combs[z] for z,flag in enumerate(one_vs_one_conns) if flag] for one_vs_one_conns in boolarr_max]
    #discrim_conn_min = [[combs[z] for z,flag in enumerate(one_vs_one_conns) if flag] for one_vs_one_conns in boolarr_min]

    return sig_pairs_max, sig_pairs_min
Exemple #36
0
# Also, use a specific resampling. In this example, all datasets are
# set to 200 Hz.

paradigm = LeftRightImagery(channels=['C3', 'C4', 'Cz'], resample=200.)

##############################################################################
# Evaluation
# ----------
#
# The evaluation is conducted on with CSP+LDA, only on the 3 electrodes, with
# a sampling rate of 200 Hz.

evaluation = WithinSessionEvaluation(paradigm=paradigm, datasets=datasets)
csp_lda = make_pipeline(CSP(n_components=2), LDA())
ts_lr = make_pipeline(Covariances(estimator='oas'),
                      TangentSpace(metric='riemann'), LR(C=1.0))
results = evaluation.process({'csp+lda': csp_lda, 'ts+lr': ts_lr})
print(results.head())

##############################################################################
# Electrode selection
# -------------------
#
# It is possible to select the electrodes that are shared by all datasets
# using the `find_intersecting_channels` function. Datasets that have 0
# overlap with others are discarded. It returns the set of common channels,
# as well as the list of datasets with valid channels.

electrodes, datasets = find_intersecting_channels(datasets)
evaluation = WithinSessionEvaluation(paradigm=paradigm,
                                     datasets=datasets,
Exemple #37
0
                                     clean=True,
                                     physical=True,
                                     downsample=False)

            data_size = y_train.shape[0]
            shuffle_index = utils.shuffle_data(data_size)
            x_train = x_train[shuffle_index]
            x_train = np.squeeze(x_train)
            y_train = y_train[shuffle_index]

            # Build Model
            xd = XdawnCovariances(nfilter=5,
                                  applyfilters=True,
                                  estimator='lwf')
            # es = ElectrodeSelection(nelec=25, metric='riemann')
            ts = TangentSpace(metric='logeuclid')
            lr = LogisticRegression(solver='liblinear', max_iter=200, C=0.01)

            model = Pipeline([('xDAWN', xd), ('TangentSpace', ts), ('LR', lr)])

            model.fit(x_train, y_train)

            # Test Model
            y_pred = np.argmax(model.predict_proba(np.squeeze(x_test)), axis=1)
            bca = utils.bca(y_test, y_pred)
            acc = np.sum(y_pred == y_test).astype(np.float32) / len(y_pred)
            print('{}: acc-{} bca-{}'.format(data_name, acc, bca))

            # poison performance
            test_asr = []
            for test_param in params:
acc_ax.set_ylabel('accuray')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()
############################# PyRiemann Portion ##############################

# code is taken from PyRiemann's ERP sample script, which is decoding in
# the tangent space with a logistic regression

n_components = 2  # pick some components

# set up sklearn pipeline
clf = make_pipeline(XdawnCovariances(n_components),
                    TangentSpace(metric='riemann'), LogisticRegression())

preds_rg = np.zeros(len(Y_test))

# reshape back to (trials, channels, samples)
X_train = X_train.reshape(X_train.shape[0], chans, samples)
X_test = X_test.reshape(X_test.shape[0], chans, samples)

# train a classifier with xDAWN spatial filtering + Riemannian Geometry (RG)
# labels need to be back in single-column format
history = clf.fit(X_train, Y_train.argmax(axis=-1))
preds_rg = clf.predict(X_test)

# Printing the results
acc2 = np.mean(preds_rg == Y_test.argmax(axis=-1))
print("Classification accuracy: %f " % (acc2))