def test_TangentSpace_inversetransform_without_fit(): """Test inverse transform of Tangent Space without fit.""" Nt = 10 Ne = 3 * 4 / 2 tsv = np.random.randn(Nt, Ne) ts = TangentSpace(metric='riemann') ts.inverse_transform(tsv)
def _train_raw(df): """Train a classifier on raw EEG data""" X, y = transform.signal_ndarray(df) # print(X, y) # Fixes non-convergence for binary classification dual = set(y) == 2 clfs: Dict[str, Pipeline] = { # These four are from https://neurotechx.github.io/eeg-notebooks/auto_examples/visual_ssvep/02r__ssvep_decoding.html "CSP + Cov + TS": make_pipeline( Covariances(), CSP(4, log=False), TangentSpace(), LogisticRegression(dual=dual), ), "Cov + TS": make_pipeline(Covariances(), TangentSpace(), LogisticRegression(dual=dual)), # Performs meh # "CSP + RegLDA": make_pipeline( # Covariances(), CSP(4), LDA(shrinkage="auto", solver="eigen") # ), # Performs badly # "Cov + MDM": make_pipeline(Covariances(), MDM()), } for name, clf in clfs.items(): logger.info(f"===== Training with {name} =====") _train(X, y, clf)
def test_TangentSpace_inversetransform(): """Test inverse transform of Tangent Space""" covset = generate_cov(10,3) ts = TangentSpace(metric='riemann') ts.fit(covset) t = ts.transform(covset) cov = ts.inverse_transform(t) assert_array_almost_equal(covset,cov)
def N170_test(session_data): markers = N170_MARKERS epochs = get_session_erp_epochs(session_data, markers) conditions = OrderedDict() for i in range(len(markers)): conditions[markers[i]] = [i+1] clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM()) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) methods_list = ['Vect + LR','Vect + RegLDA','ERPCov + TS','ERPCov + MDM','XdawnCov + TS','XdawnCov + MDM'] # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline auc = [] methods = [] print('Calcul in progress...') for m in clfs: try: res = cross_val_score(clfs[m], X, y==2, scoring='roc_auc', cv=cv, n_jobs=-1) auc.extend(res) methods.extend([m]*len(res)) except Exception: print("exception") ## Plot Decoding Results results = pd.DataFrame(data=auc, columns=['AUC']) results['Method'] = methods n_row,n_column = results.shape auc_means = [] for method in methods_list: auc = [] for i in range(n_row): if results.loc[i,'Method']== method: auc.append(results.loc[i,'AUC']) auc_means.append(np.mean(auc)) counter = 0 for i in range(len(methods_list)): color = 'green' if auc_means[i]>=0.7 else 'red' counter = counter +1 if auc_means[i]>=0.7 else counter return counter > 0, counter
def test_TangentSpace_init(fit, tsupdate, metric, get_covmats): n_trials, n_channels = 4, 3 n_ts = (n_channels * (n_channels + 1)) // 2 covmats = get_covmats(n_trials, n_channels) ts = TangentSpace(metric=metric, tsupdate=tsupdate) if fit: ts.fit(covmats) Xtr = ts.transform(covmats) assert Xtr.shape == (n_trials, n_ts)
def transform(self, X): """ Detect and remove dropped. """ features = [] for x in X: ts = TangentSpace(metric=self.metric) tmp = ts.fit_transform(x.transpose(2, 0, 1)) features.append(tmp.ravel()) features = np.array(features) return features
def fit_representation(self): print(np.array(self.data).shape) for k in range(len(self.data)): subject_data = np.array(self.data[k]) print(subject_data.shape) subject_labels = self.labels[k] model_xDawn_enCours = pyriemann.estimation.XdawnCovariances( 4, xdawn_estimator='lwf') subject_data = model_xDawn_enCours.fit_transform( subject_data, subject_labels) self.model_xDawn.append(model_xDawn_enCours) model_tangentSpace_enCours = TangentSpace(metric='riemann') model_tangentSpace_enCours.fit(subject_data, subject_labels) self.model_tangentSpace.append(model_tangentSpace_enCours)
def proj_covs_ts(covs): n_sub, n_fb, p, _ = covs.shape covs_ts = np.zeros((n_sub, n_fb, (p * (p + 1)) // 2)) for fb in range(n_fb): covs_ts[:, fb, :] = TangentSpace(metric="wasserstein").fit( covs[:, fb, :, :]).transform(covs[:, fb, :, :]) return covs_ts
def project_tangent_space(subjects, rank=65, picks="all", mode="common", reg=1e-6): if mode == "common": X, y = project_common_space(subjects, rank, picks) elif mode == 'own': X, y = project_common_space(subjects, rank, picks) elif mode == 'spoc': X, y = spoc(subjects, rank, picks) elif mode == "csf": X, y = get_covs_and_ages(subjects, picks=picks) X = pcs(X, rank, common_f=True) elif mode == "cs": X, y = get_covs_and_ages(subjects, picks=picks) X = pcs(X, rank, common_f=False) else: X, y = get_covs_and_ages(subjects, picks=picks) print("projecting in the tangent space") n_subj, n_freqs, p, _ = X.shape if reg: for i in range(n_subj): for f in range(n_freqs): X[i, f] += reg * np.eye(p) ts = np.zeros((n_subj, n_freqs, int(p * (p + 1) / 2))) n_s_train = 100 for f in range(n_freqs): sl = np.random.permutation(np.arange(640))[:n_s_train] ts[:, f, :] = TangentSpace().fit(X[sl, f, :, :]).transform(X[:, f, :, :]) return ts, y
def ml_classifier(inputs, targets, classifier=None, pipeline=None): """Uses sklearn to fit a model given inputs and targets Args: inputs: list containing (N trials * M channels) data segments of length(number of features). targets: list containing (N trials * M channels) of marker data (0 or 1). classifier: pre-trained lda classifier; if None train from scratch pipeline: name of pipeline to create if classifier is None Returns: classifier: classifier object """ pipeline_dict = { 'vect_lr': make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()), 'vecct_reglda': make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')), 'xdawn_reglda': make_pipeline(Xdawn(2, classes=[1]), Vectorizer(), LDA(shrinkage='auto', solver='eigen')), 'erpcov_ts': make_pipeline(ERPCovariances(), TangentSpace(), LogisticRegression()), 'erpcov_mdm': make_pipeline(ERPCovariances(), MDM()) } if not classifier and pipeline: classifier = pipeline_dict[pipeline.lower()] classifier.fit(inputs, targets) return classifier
def svm_tangent_space_cross_validate(data): """A cross validated tangent space classifier with svm. Parameters ---------- data : dict A dictionary containing training and testing data Returns ------- cross validated scores A list of cross validated scores. """ # Combine the dataset x = np.concatenate((data['train_x'], data['test_x']), axis=0) y = np.concatenate((data['train_y'], data['test_y']), axis=0) # Construct sklearn pipeline clf = Pipeline([('cov_transform', Covariances(estimator='lwf')), ('tangent_space', TangentSpace(metric='riemann')), ('svm_classify', SVC(kernel='rbf', gamma='auto'))]) # cross validation scores = cross_val_score(clf, x, y, cv=KFold(5, shuffle=True)) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print('\n') return scores
def xdawn_embedding(data, use_xdawn): """Perform embedding of EEG data in 2D Euclidean space with Laplacian Eigenmaps. Parameters ---------- data : dict A dictionary containing training and testing data Returns ------- array Embedded """ if use_xdawn: nfilter = 3 xdwn = XdawnCovariances(estimator='scm', nfilter=nfilter) covs = xdwn.fit(data['train_x'], data['train_y']).transform(data['test_x']) lapl = Embedding(metric='riemann', n_components=3) embd = lapl.fit_transform(covs) else: tangent_space = Pipeline([ ('cov_transform', Covariances(estimator='lwf')), ('tangent_space', TangentSpace(metric='riemann')) ]) t_space = tangent_space.fit(data['train_x'], data['train_y']).transform(data['test_x']) reducer = umap.UMAP(n_neighbors=30, min_dist=1, spread=2) embd = reducer.fit_transform(t_space) return embd
def test_TangentSpace_transform(): """Test transform of Tangent Space.""" covset = generate_cov(10, 3) ts = TangentSpace(metric='riemann') ts.fit(covset) ts.transform(covset) X = np.zeros(shape=(10, 9)) assert_raises(ValueError, ts.transform, X) X = np.zeros(shape=(10, 9, 8)) assert_raises(ValueError, ts.transform, X) X = np.zeros(shape=(10)) assert_raises(ValueError, ts.transform, X) X = np.zeros(shape=(12, 8, 8)) assert_raises(ValueError, ts.transform, X)
def erpcov_ts_lr(): """Obtains Riemannian features and classifies them with logregression""" return make_pipeline( ERPCovariances(estimator="oas"), TangentSpace(), LogisticRegression(solver="liblinear", C=1.0, class_weight="balanced", penalty="l1"), )
def tangent_space_classifier(features, labels, classifier): """A tangent space classifier with svm for 3 classes. Parameters ---------- features : array A array of features labels : array True labels classifier : string option : Support Vector Machines (svc) or Random Forest (rf) Returns ------- sklearn classifier Learnt classifier. """ # Construct sklearn pipeline if classifier == 'svc': clf = Pipeline([('covariance_transform', Covariances(estimator='scm')), ('tangent_space', TangentSpace(metric='riemann')), ('classifier', SVC(kernel='rbf', gamma='auto', decision_function_shape='ovr'))]) elif classifier == 'rf': clf = Pipeline([('covariance_transform', Covariances(estimator='scm')), ('tangent_space', TangentSpace(metric='riemann')), ('classifier', RandomForestClassifier(n_estimators=100, oob_score=True))]) else: print("Please select the appropriate classifier ") return # cross validation clf.fit(features, labels) return clf
def subject_independent_cov_data(config): """Get subject independent covariance data (pooled data). Parameters ---------- config : yaml The configuration file Returns ------- features, labels, leave_leave_tags 2 arrays features and labels. A tag determines whether the data point is used in training. """ path = str(Path(__file__).parents[2] / config['clean_emg_data']) data = dd.io.load(path) # Parameters subjects = config['subjects'] # Empty array (list) x = [] y = [] leave_tags = np.empty((0, 1)) for subject in subjects: cov_temp = Covariances().transform(data['subject_' + subject]['features']) x_temp = TangentSpace(metric='riemann').transform(cov_temp) y_temp = data['subject_' + subject]['labels'] x.append(x_temp) y.append(y_temp) leave_tags = np.concatenate((leave_tags, y_temp[:, 0:1] * 0 + 1), axis=0) # Convert to array x = np.concatenate(x, axis=0) y = np.concatenate(y, axis=0) # Balance the dataset rus = RandomUnderSampler() rus.fit_resample(y, y) # Store them in dictionary features = x[rus.sample_indices_, :] labels = y[rus.sample_indices_, :] leave_tags = leave_tags[rus.sample_indices_, :] return features, labels, leave_tags
def test_TangentSpace_inversetransform(): """Test inverse transform of Tangent Space.""" covset = generate_cov(10, 3) ts = TangentSpace(metric='riemann') ts.fit(covset) t = ts.transform(covset) cov = ts.inverse_transform(t) assert_array_almost_equal(covset, cov)
def test_TangentSpace_inversetransform_without_fit(): """Test inverse transform of Tangent Space without fit.""" covset = generate_cov(10, 3) ts = TangentSpace(metric='identity') tsv = ts.fit_transform(covset) ts = TangentSpace(metric='riemann') cov = ts.inverse_transform(tsv) assert_array_almost_equal(covset, cov)
def test_TS_matdim_error(get_covmats): n_trials, n_channels = 4, 3 ts = TangentSpace() with pytest.raises(ValueError): not_square_mat = np.empty((n_trials, n_channels, n_channels + 1)) ts.transform(not_square_mat) with pytest.raises(ValueError): too_many_dim = np.empty((1, 2, 3, 4)) ts.transform(too_many_dim)
class Riemann(BaseEstimator, TransformerMixin): def __init__(self, metric='wasserstein'): self.metric = metric def fit(self, X, y=None): X = np.array(list(np.squeeze(X))) self.ts = TangentSpace(metric=self.metric).fit(X) return self def transform(self, X): X = np.array(list(np.squeeze(X))) n_sub, p, _ = X.shape Xout = np.empty((n_sub, p * (p + 1) // 2)) Xout = self.ts.transform(X) return pd.DataFrame({'cov': list(Xout.reshape(n_sub, -1))})
class Riemann(BaseEstimator, TransformerMixin): def __init__(self, metric='wasserstein', return_data_frame=True): self.metric = metric self.return_data_frame = return_data_frame def fit(self, X, y=None): X = _check_data(X) self.ts = TangentSpace(metric=self.metric).fit(X) return self def transform(self, X): X = _check_data(X) X_out = self.ts.transform(X) if self.return_data_frame: X_out = pd.DataFrame(X_out) return X_out # (sub, c*(c+1)/2)
def forest_tangent_space_cross_validate(data, cv=False): """A cross validated tangent space classifier with svm. Parameters ---------- data : dict A dictionary containing training and testing data Returns ------- cross validated scores A list of cross validated scores. """ # Construct sklearn pipeline clf = Pipeline([('cov_transform', Covariances('lwf')), ('tangent_space', TangentSpace(metric='riemann')), ('random_forest_classify', RandomForestClassifier(n_estimators=20, max_depth=10, random_state=43))]) if cv: # Combine the dataset x = np.concatenate((data['train_x'], data['test_x']), axis=0) y = np.concatenate((data['train_y'], data['test_y']), axis=0) # cross validation scores = cross_val_score(clf, x, y, cv=KFold(5, shuffle=True)) print("Accuracy: %0.4f (+/- %0.4f)" % (scores.mean(), scores.std() * 2)) print('\n') else: clf = RandomForestClassifier(n_estimators=20, max_depth=10, random_state=43) plt.style.use('clean') y_train = np.argmax(data['train_y'], axis=1) + 1 y_test = np.argmax(data['test_y'], axis=1) + 1 classifier = clf.fit(data['train_x'], y_train) plot_confusion_matrix(classifier, data['test_x'], y_test, normalize='true', cmap=plt.cm.Blues) return None
def svm_tangent_space_classifier(features, labels): """A tangent space classifier with svm for 3 classes. Parameters ---------- features : array A array of features labels : array True labels Returns ------- sklearn classifier Learnt classifier. """ # Construct sklearn pipeline clf = Pipeline([('cov_transform', Covariances('oas')), ('tangent_space', TangentSpace(metric='riemann')), ('svm_classify', SVC(kernel='rbf', gamma='auto'))]) # cross validation clf.fit(features, labels) return clf
def classify_tangentSpace_features(clf, emg_data, flag): """extract the tangent space features from the epochs and obtain the maximum log-likelihood Parameters ---------- emg_data : numpy array epoched emg data with size epochs x channels x samples clf : trained sklearn classifier A sklearn classifier model such as SVM or RF previously trained on the user provided data flag : string Predict maximum log-likelihood if flag=log_proba otherwise just the predicted label Returns ------- rms_array : numpy array average rms values calculated for each epoch across all the channels """ cov = Covariances().fit_transform(emg_data) ts = TangentSpace().fit_transform(cov) if flag == 'log_proba': return np.amax(clf.predict_log_proba(ts), axis=1) else: return clf.predict(ts)
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.cross_validation import KFold from sklearn.metrics import roc_auc_score from utils import (DownSampler, EpochsVectorizer, CospBoostingClassifier, epoch_data) dataframe1 = pd.read_csv('ecog_train_with_labels.csv') array_clfs = OrderedDict() # ERPs models array_clfs['XdawnCov'] = make_pipeline(XdawnCovariances(6, estimator='oas'), TangentSpace('riemann'), LogisticRegression('l2')) array_clfs['Xdawn'] = make_pipeline(Xdawn(12, estimator='oas'), DownSampler(5), EpochsVectorizer(), LogisticRegression('l2')) # Induced activity models baseclf = make_pipeline( ElectrodeSelection(10, metric=dict(mean='logeuclid', distance='riemann')), TangentSpace('riemann'), LogisticRegression('l1')) array_clfs['Cosp'] = make_pipeline( CospCovariances(fs=1000, window=32, overlap=0.95, fmax=300, fmin=1), CospBoostingClassifier(baseclf))
def __init__(self, n_fb=9, metric='wasserstein'): self.n_fb = n_fb self.ts = [TangentSpace(metric=metric) for fb in range(n_fb)]
def test_TangentSpace_transform_with_ts_update(): """Test transform of Tangent Space with TSupdate""" covset = generate_cov(10,3) ts = TangentSpace(metric='riemann',tsupdate=True) ts.fit(covset) ts.transform(covset)
def test_TangentSpace_transform(): """Test transform of Tangent Space""" covset = generate_cov(10,3) ts = TangentSpace(metric='riemann') ts.fit(covset) ts.transform(covset)
def test_TangentSpace_fit(): """Test Fit of Tangent Space""" covset = generate_cov(10,3) ts = TangentSpace(metric='riemann') ts.fit(covset)
def test_TangentSpace_transform_without_fit(): """Test transform of Tangent Space without fit.""" covset = generate_cov(10, 3) ts = TangentSpace(metric='riemann') ts.transform(covset)
# Create pipelines # ---------------- # # Pipelines must be a dict of sklearn pipeline transformer. # # The csp implementation from MNE is used. We selected 8 CSP components, as # usually done in the litterature. # # The riemannian geometry pipeline consists in covariance estimation, tangent # space mapping and finaly a logistic regression for the classification. pipelines = {} pipelines['CSP + LDA'] = make_pipeline(CSP(n_components=8), LDA()) pipelines['RG + LR'] = make_pipeline(Covariances(), TangentSpace(), LogisticRegression()) ############################################################################## # Evaluation # ---------- # # We define the paradigm (LeftRightImagery) and the dataset (BNCI2014001). # The evaluation will return a dataframe containing a single AUC score for # each subject / session of the dataset, and for each pipeline. # # Results are saved into the database, so that if you add a new pipeline, it # will not run again the evaluation unless a parameter has changed. Results can # be overwrited if necessary. paradigm = LeftRightImagery()
def train_classifiers(data_files, valid_runs_dict_uiuc, valid_runs_dict_whasc): ''' Produces test data and tests whether projecting matrices into the tangent space finds the correct discriminative connection. Parameters: data_files (list of pairs (filename,data)): the input data valid_runs_dict_uiuc (dictionary): dictionary containing valid runs for each patient valid_runs_dict_whasc (dictionary): dictionary containing valid runs for each patient Returns: accDict (dictionary): mean accuracy on each file's data simDict (dictionary): mean cosine similarity of classifier coefficients for each file matDict (dictionary): mean confusion matrix for each file corrDict (dictionary): before and after projection correlations spearDict (dictionary): before and after projection spearman correlations ''' accDict = {} simDict = {} matDict = {} corrDict = {} spearDict = {} simArr = [] for fname, data in data_files: # get time series data to make covariance matrices X = np.array([sample['TimeSeries'] for sample in data['samples'] ]) # if data_selector(sample)]) y = np.array([ get_label_8(sample['Group'], sample['Location']) for sample in data['samples'] ]) # if data_selector(sample)]) # gsr seems to produce a rank deficient covariance matrix, so oas regularization is necessary covest = Covariances() ts = TangentSpace() #sym = to_symm_mat(0,33) #diag = to_upper_tri(1) svc = SVC(kernel='linear') clf_riem = make_pipeline(covest, ts, svc) rf = RandomForestClassifier(200) clf_rf = make_pipeline(covest, ts, rf) covest2 = Correlations() svc2 = SVC(kernel='linear') get_tri_inds = to_upper_tri(0) clf_cov = make_pipeline(covest2, get_tri_inds, svc2) #Check clustering #to_TS = make_pipeline(covest,ts) #X_in_TS = to_TS.transform(X) #kmeans = KMeans(n_clusters=4,random_state=0).fit(X_in_TS) # Monte Carlo, in theory should run this len(y)^2 times, but I need to save my poor computer's memory. accRiemList = [] accCovList = [] accRfList = [] coeffArr = [] matRiemList = [] corrArrBefore = [] corrArrAfter = [] spearArrBefore = [] spearArrAfter = [] rs = StratifiedShuffleSplit(n_splits=100, test_size=.3) for i, (train_inds, test_inds) in enumerate(rs.split(X, y)): X_train, X_test, y_train, y_test = X[train_inds], X[test_inds], y[ train_inds], y[test_inds] X_train_cov, X_test_cov, y_train_cov, y_test_cov = X_train.copy( ), X_test.copy(), y_train.copy(), y_test.copy() clf_riem.fit(X_train, y_train) clf_rf.fit(X_train, y_train) clf_cov.fit(X_train_cov, y_train_cov) #get riemann svm coefficients coeffArr.append(clf_riem[2].coef_) #compare correlation corr_coeffs_before = np.corrcoef(np.vstack( [x[np.triu_indices(33)].flatten() for x in X_train]), rowvar=False) corrArrBefore.append(np.linalg.norm(corr_coeffs_before)) #spearman correlation spearman_coeffs_before, _ = scipy.stats.spearmanr(np.vstack( [x[np.triu_indices(33)].flatten() for x in X_train]), axis=0) spearArrBefore.append(np.linalg.norm(spearman_coeffs_before)) ref = ts.reference_ covs = covest.transform(X_train) mapped = ts.transform(covs) corr_coeffs_after = np.corrcoef(mapped, rowvar=False) spearman_coeffs_after = scipy.stats.spearmanr(mapped, axis=0) corrArrAfter.append(np.linalg.norm(corr_coeffs_after)) spearArrAfter.append(np.linalg.norm(spearman_coeffs_after)) y_pred = clf_riem.predict(X_test) y_pred_cov = clf_cov.predict(X_test_cov) y_pred_rf = clf_rf.predict(X_test) # save accuracy accRiemList.append(accuracy_score(y_pred, y_test)) accCovList.append(accuracy_score(y_pred_cov, y_test_cov)) accRfList.append(accuracy_score(y_pred_rf, y_test)) # confusion matrix mat = confusion_matrix(y_test, y_pred, normalize='true', labels=[0, 1, 2, 3, 4, 5, 6, 7]) matRiemList.append(mat) for z in range(0, len(coeffArr[0])): class_z_coeffs = [x[z] for x in coeffArr] cos_sim = cosine_similarity(class_z_coeffs) upperTri = cos_sim[np.triu_indices(cos_sim.shape[0], 1)] cos_avg = np.mean(upperTri.flatten()) simArr.append(cos_avg) avgMatRiem = sum(matRiemList) / len(matRiemList) simDict.update({fname: simArr}) matDict.update({fname: avgMatRiem}) riemAcc = np.mean(accRiemList) covAcc = np.mean(accCovList) rfAcc = np.mean(accRfList) accDict.update( {'raw_data': { 'riem': riemAcc, 'rf': rfAcc, 'cov': covAcc }}) corrDict.update({ 'raw_data': { 'before': np.mean(corrArrBefore), 'after': np.mean(corrArrAfter) } }) spearDict.update({ 'raw_data': { 'before': np.mean(spearArrBefore), 'after': np.mean(spearArrAfter) } }) print("Mean Accuracy w/ Riemann on data " + fname + ": " + str(riemAcc)) print("Mean Accuracy w/ Cov on data " + fname + ": " + str(covAcc)) print("Mean Accuracy w/ RF on data " + fname + ": " + str(rfAcc)) print("----------------") return accDict, corrDict, spearDict, matDict, simDict
def permutation_bootstrap(samples, labels, n_states, rois, p=5): ''' Perform permutation bootstrap to find discriminative connections. Parameters: samples (ndarray shape (n_samples, n_channels, n_vars)): The input dataset labels (ndarray shape (n_samples)): The input labels n_states (int): Number of distinct classes p (int): Percentile of significance (default 5) Returns: discrim_conn_max (list of tuples): Significant functional positive connections discrim_conn_min (list of tuples): Significant functional negative connections ''' # First, make null model # Use integer labeling so we can be sure that the one vs one classifiers are # in the correct orders X = samples y = labels # Randomly permute labels (only labels, not training input) NUM_BOOTSTRAP = 10 covest = Covariances() ts = TangentSpace() sym = to_symm_mat(0, X.shape[1]) diag = to_upper_tri(1) svc = SVC(kernel='linear') clf_riem = make_pipeline(covest, ts, sym, diag, svc) maxcoeffs = [] mincoeffs = [] nullcoeffs = [] nullcos = [] num_pairs = int(scipy.special.binom(n_states, 2)) for i in range(0, 100): y_permuted = np.random.permutation(y) coeffArr = [] rs = ShuffleSplit(n_splits=NUM_BOOTSTRAP, test_size=.3) for train, test in rs.split(X): X_train, X_test, y_train, y_test = X[train], X[test], y_permuted[ train], y_permuted[test] clf_riem.fit(X_train, y_train) coeffArr.append( clf_riem[4].coef_ / np.std(clf_riem[4].coef_, axis=-1).reshape(num_pairs, 1)) meancoeff = sum(coeffArr) / len(coeffArr) classcos = [] for z in range(0, len(coeffArr[0])): class_z_coeffs = [x[z] for x in coeffArr] cos_sim = cosine_similarity(class_z_coeffs) upperTri = cos_sim[np.triu_indices(cos_sim.shape[0], 1)] cos_max = np.max(upperTri.flatten()) classcos.append(cos_max) nullcos.append(classcos) nullcoeffs.append(meancoeff) maxcoeff = np.max(meancoeff, axis=-1) mincoeff = np.min(meancoeff, axis=-1) maxcoeffs.append(maxcoeff) mincoeffs.append(mincoeff) coeffArr = [] rs = ShuffleSplit(n_splits=NUM_BOOTSTRAP, test_size=.3) for train, test in rs.split(X): X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test] clf_riem.fit(X_train, y_train) coeffArr.append( clf_riem[4].coef_ / np.std(clf_riem[4].coef_, axis=-1).reshape(num_pairs, 1)) meancoeff = sum(coeffArr) / len(coeffArr) sig_pairs_max, boolarr_max = get_sig_pairs(meancoeff, maxcoeffs, num_pairs, p, rois, "max") sig_pairs_min, boolarr_min = get_sig_pairs(meancoeff, mincoeffs, num_pairs, p, rois, "min") combs = list(itertools.combinations(range(0, n_states), 2)) #discrim_conn_max = [[combs[z] for z,flag in enumerate(one_vs_one_conns) if flag] for one_vs_one_conns in boolarr_max] #discrim_conn_min = [[combs[z] for z,flag in enumerate(one_vs_one_conns) if flag] for one_vs_one_conns in boolarr_min] return sig_pairs_max, sig_pairs_min
# Also, use a specific resampling. In this example, all datasets are # set to 200 Hz. paradigm = LeftRightImagery(channels=['C3', 'C4', 'Cz'], resample=200.) ############################################################################## # Evaluation # ---------- # # The evaluation is conducted on with CSP+LDA, only on the 3 electrodes, with # a sampling rate of 200 Hz. evaluation = WithinSessionEvaluation(paradigm=paradigm, datasets=datasets) csp_lda = make_pipeline(CSP(n_components=2), LDA()) ts_lr = make_pipeline(Covariances(estimator='oas'), TangentSpace(metric='riemann'), LR(C=1.0)) results = evaluation.process({'csp+lda': csp_lda, 'ts+lr': ts_lr}) print(results.head()) ############################################################################## # Electrode selection # ------------------- # # It is possible to select the electrodes that are shared by all datasets # using the `find_intersecting_channels` function. Datasets that have 0 # overlap with others are discarded. It returns the set of common channels, # as well as the list of datasets with valid channels. electrodes, datasets = find_intersecting_channels(datasets) evaluation = WithinSessionEvaluation(paradigm=paradigm, datasets=datasets,
clean=True, physical=True, downsample=False) data_size = y_train.shape[0] shuffle_index = utils.shuffle_data(data_size) x_train = x_train[shuffle_index] x_train = np.squeeze(x_train) y_train = y_train[shuffle_index] # Build Model xd = XdawnCovariances(nfilter=5, applyfilters=True, estimator='lwf') # es = ElectrodeSelection(nelec=25, metric='riemann') ts = TangentSpace(metric='logeuclid') lr = LogisticRegression(solver='liblinear', max_iter=200, C=0.01) model = Pipeline([('xDAWN', xd), ('TangentSpace', ts), ('LR', lr)]) model.fit(x_train, y_train) # Test Model y_pred = np.argmax(model.predict_proba(np.squeeze(x_test)), axis=1) bca = utils.bca(y_test, y_pred) acc = np.sum(y_pred == y_test).astype(np.float32) / len(y_pred) print('{}: acc-{} bca-{}'.format(data_name, acc, bca)) # poison performance test_asr = [] for test_param in params:
acc_ax.set_ylabel('accuray') loss_ax.legend(loc='upper left') acc_ax.legend(loc='lower left') plt.show() ############################# PyRiemann Portion ############################## # code is taken from PyRiemann's ERP sample script, which is decoding in # the tangent space with a logistic regression n_components = 2 # pick some components # set up sklearn pipeline clf = make_pipeline(XdawnCovariances(n_components), TangentSpace(metric='riemann'), LogisticRegression()) preds_rg = np.zeros(len(Y_test)) # reshape back to (trials, channels, samples) X_train = X_train.reshape(X_train.shape[0], chans, samples) X_test = X_test.reshape(X_test.shape[0], chans, samples) # train a classifier with xDAWN spatial filtering + Riemannian Geometry (RG) # labels need to be back in single-column format history = clf.fit(X_train, Y_train.argmax(axis=-1)) preds_rg = clf.predict(X_test) # Printing the results acc2 = np.mean(preds_rg == Y_test.argmax(axis=-1)) print("Classification accuracy: %f " % (acc2))