def test_MDM_transform(): """Test transform of MDM""" covset = generate_cov(100,3) labels = np.array([0,1]).repeat(50) mdm = MDM(metric='riemann') mdm.fit(covset,labels) mdm.transform(covset)
def test_MDM_predict(): """Test prediction of MDM""" covset = generate_cov(100, 3) labels = np.array([0, 1]).repeat(50) mdm = MDM(metric='riemann') mdm.fit(covset, labels) mdm.predict(covset)
def test_MDM_transform(): """Test transform of MDM""" covset = generate_cov(100, 3) labels = np.array([0, 1]).repeat(50) mdm = MDM(metric='riemann') mdm.fit(covset, labels) mdm.transform(covset)
def test_MDM_predict(): """Test prediction of MDM""" covset = generate_cov(100,3) labels = np.array([0,1]).repeat(50) mdm = MDM(metric='riemann') mdm.fit(covset,labels) mdm.predict(covset)
def check_other_classifiers(train_X, train_y, test_X, test_y): from pyriemann.classification import MDM, TSclassifier from sklearn.linear_model import LogisticRegression from pyriemann.estimation import Covariances from sklearn.pipeline import Pipeline from mne.decoding import CSP import seaborn as sns import pandas as pd train_y = [np.where(i == 1)[0][0] for i in train_y] test_y = [np.where(i == 1)[0][0] for i in test_y] cov_data_train = Covariances().transform(train_X) cov_data_test = Covariances().transform(test_X) cv = KFold(n_splits=10, random_state=42) clf = TSclassifier() scores = cross_val_score(clf, cov_data_train, train_y, cv=cv, n_jobs=1) print("Tangent space Classification accuracy: ", np.mean(scores)) clf = TSclassifier() clf.fit(cov_data_train, train_y) print(clf.score(cov_data_test, test_y)) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) scores = cross_val_score(mdm, cov_data_train, train_y, cv=cv, n_jobs=1) print("MDM Classification accuracy: ", np.mean(scores)) mdm = MDM() mdm.fit(cov_data_train, train_y) fig, axes = plt.subplots(1, 2) ch_names = [ch for ch in range(8)] df = pd.DataFrame(data=mdm.covmeans_[0], index=ch_names, columns=ch_names) g = sns.heatmap(df, ax=axes[0], square=True, cbar=False, xticklabels=2, yticklabels=2) g.set_title('Mean covariance - feet') df = pd.DataFrame(data=mdm.covmeans_[1], index=ch_names, columns=ch_names) g = sns.heatmap(df, ax=axes[1], square=True, cbar=False, xticklabels=2, yticklabels=2) plt.xticks(rotation='vertical') plt.yticks(rotation='horizontal') g.set_title('Mean covariance - hands') # dirty fix plt.sca(axes[0]) plt.xticks(rotation='vertical') plt.yticks(rotation='horizontal') plt.savefig("meancovmat.png") plt.show()
def create_mdm(raw, event_id): tmin, tmax = -1., 4. events = find_events(raw, shortest_event=0, stim_channel='STI 014') epochs = Epochs(raw, events, event_id, tmin, tmax, proj=True, baseline=None, preload=True, verbose=False) labels = epochs.events[:, -1] epochs_data_train = epochs.get_data()[:, :-1] cov_data_train = Covariances().transform(epochs_data_train) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) mdm.fit(cov_data_train, labels) return mdm
def erp_cov_vr_pc(X_training, labels_training, X_test, labels_test, class_name, class_info): # estimate the extended ERP covariance matrices with Xdawn erpc = ERPCovariances(classes=[class_info[class_name]], estimator='lwf') erpc.fit(X_training, labels_training) covs_training = erpc.transform(X_training) covs_test = erpc.transform(X_test) # get the AUC for the classification clf = MDM() clf.fit(covs_training, labels_training) labels_pred = clf.predict(covs_test) return roc_auc_score(labels_test, labels_pred)
class FgMDM2(BaseEstimator, ClassifierMixin, TransformerMixin): def __init__(self, metric='riemann', tsupdate=False, n_jobs=1): """Init.""" self.metric = metric self.n_jobs = n_jobs self.tsupdate = tsupdate if isinstance(metric, str): self.metric_mean = metric elif isinstance(metric, dict): # check keys for key in ['mean', 'distance']: if key not in metric.keys(): raise KeyError('metric must contain "mean" and "distance"') self.metric_mean = metric['mean'] else: raise TypeError('metric must be dict or str') def fit(self, X, y): self.classes_ = unique_labels(y) self._mdm = MDM(metric=self.metric, n_jobs=self.n_jobs) self._fgda = FGDA(metric=self.metric_mean, tsupdate=self.tsupdate) cov = self._fgda.fit_transform(X, y) self._mdm.fit(cov, y) return self def predict(self, X): cov = self._fgda.transform(X) return self._mdm.predict(cov) def predict_proba(self, X): cov = self._fgda.transform(X) return self._mdm.predict_proba(cov) def transform(self, X): cov = self._fgda.transform(X) return self._mdm.transform(cov)
def test_MDM_predict(): """Test prediction of MDM""" covset = generate_cov(100, 3) labels = np.array([0, 1]).repeat(50) mdm = MDM(metric='riemann') mdm.fit(covset, labels) mdm.predict(covset) # test fit_predict mdm = MDM(metric='riemann') mdm.fit_predict(covset, labels) # test transform mdm.transform(covset) # predict proba mdm.predict_proba(covset) # test n_jobs mdm = MDM(metric='riemann', n_jobs=2) mdm.fit(covset, labels) mdm.predict(covset)
class DistanceCalculatorAlex(BaseEstimator, TransformerMixin): """Distance Calulator Based on MDM.""" def __init__(self, metric_mean='logeuclid', metric_dist=['riemann'], n_jobs=7, subsample=10): """Init.""" self.metric_mean = metric_mean self.metric_dist = metric_dist self.n_jobs = n_jobs self.subsample = subsample def fit(self, X, y): """Fit.""" self.mdm = MDM(metric=self.metric_mean, n_jobs=self.n_jobs) labels = np.squeeze(create_sequence(y.T)[::self.subsample]) self.mdm.fit(X, labels) return self def transform(self, X, y=None): """Transform.""" feattr = [] for metric in self.metric_dist: self.mdm.metric_dist = metric feat = self.mdm.transform(X) # substract distance of the class 0 feat = feat[:, 1:] - np.atleast_2d(feat[:, 0]).T feattr.append(feat) feattr = np.concatenate(feattr, axis=1) feattr[np.isnan(feattr)] = 0 return feattr def fit_transform(self, X, y): """Fit and transform.""" self.fit(X, y) return self.transform(X)
class wrapper_MDM(machine_learning_method): """wrapper for pyriemann MDM""" def __init__(self, method_name, method_args): super(wrapper_MDM, self).__init__(method_name, method_args) self.init_method() def init_method(self, n_jobs=1): self.classifier = MDM(metric=self.method_args['metric'], n_jobs=n_jobs) def set_parallel(self, is_parallel=False, n_jobs=8): logging.warning( 'The call to this set_parallel method is reseting the class, and must be fitted again' ) self.parallel = is_parallel self.n_jobs = n_jobs if self.parallel: self.init_method(n_jobs) def fit(self, X, y): return self.classifier.fit(X, y) def predict(self, X): return self.classifier.predict(X)
csp = CSP(n_components=4, reg='ledoit_wolf', log=True) clf = Pipeline([('CSP', csp), ('LogisticRegression', lr)]) scores = cross_val_score(clf, epochs_data_train, labels, cv=cv, n_jobs=1) # Printing the results class_balance = np.mean(labels == labels[0]) class_balance = max(class_balance, 1. - class_balance) print("CSP + LDA Classification accuracy: %f / Chance level: %f" % (np.mean(scores), class_balance)) ############################################################################### # Display MDM centroid mdm = MDM() mdm.fit(cov_data_train, labels) fig, axes = plt.subplots(1, 2, figsize=[8, 4]) ch_names = [ch.replace('.', '') for ch in epochs.ch_names] df = pd.DataFrame(data=mdm.covmeans_[0], index=ch_names, columns=ch_names) g = sns.heatmap(df, ax=axes[0], square=True, cbar=False, xticklabels=2, yticklabels=2) g.set_title('Mean covariance - hands') df = pd.DataFrame(data=mdm.covmeans_[1], index=ch_names, columns=ch_names) g = sns.heatmap(df,
epochs = make_fixed_length_epochs( raw_ext, duration=duration, overlap=duration - interval, verbose=False) x_test = BlockCovariances( estimator='lwf', block_size=ch_count).transform(epochs.get_data()) ############################################################################### # Classification with minimum distance to mean (MDM) # -------------------------------------------------- # # Classification for a 4-class SSVEP BCI, including resting-state class. print("Number of training trials: {}".format(len(x_train))) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) mdm.fit(x_train, y_train) ############################################################################### # Projection in tangent space with principal geodesic analysis (PGA) # ------------------------------------------------------------------ # # Project covariance matrices from the Riemannian manifold into the Euclidean # tangent space at the grand average, and apply a principal component analysis # (PCA) to obtain an unsupervised dimension reduction [1]_. pga = make_pipeline( TangentSpace(metric="riemann", tsupdate=False), PCA(n_components=2) )
def fit(self, X, y): # validate X, y = check_X_y(X, y, allow_nd=True) X = check_array(X, allow_nd=True) # set internal vars self.classes_ = unique_labels(y) self.X_ = X self.y_ = y ################################################## # split X into train and test sets, so that # grid search can be performed on train set only seed = 7 np.random.seed(seed) #X_TRAIN, X_TEST, y_TRAIN, y_TEST = train_test_split(X, y, test_size=0.25, random_state=seed) for epoch_trim in self.epoch_bounds: for bandpass in self.bandpass_filters: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25, random_state=seed) # X_train = np.copy(X_TRAIN) # X_test = np.copy(X_TEST) # y_train = np.copy(y_TRAIN) # y_test = np.copy(y_TEST) # separate out inputs that are tuples bandpass_start, bandpass_end = bandpass epoch_trim_start, epoch_trim_end = epoch_trim # bandpass filter coefficients b, a = butter( 5, np.array([bandpass_start, bandpass_end]) / (self.sfreq * 0.5), 'bandpass') # filter and crop TRAINING SET X_train = self.preprocess_X(X_train, b, a, epoch_trim_start, epoch_trim_end) # validate X_train, y_train = check_X_y(X_train, y_train, allow_nd=True) X_train = check_array(X_train, allow_nd=True) # filter and crop TEST SET X_test = self.preprocess_X(X_test, b, a, epoch_trim_start, epoch_trim_end) # validate X_test, y_test = check_X_y(X_test, y_test, allow_nd=True) X_test = check_array(X_test, allow_nd=True) ########################################################################### # self-tune CSP to find optimal number of filters to use at these settings #[best_num_filters, best_num_filters_score] = self.self_tune(X_train, y_train) best_num_filters = 5 # as an option, we could tune optimal CSP filter num against complete train set #X_tune = self.preprocess_X(X, b, a, epoch_trim_start, epoch_trim_end) #[best_num_filters, best_num_filters_score] = self.self_tune(X_tune, y) # now use this insight to really fit with optimal CSP spatial filters """ reg : float | str | None (default None) if not None, allow regularization for covariance estimation if float, shrinkage covariance is used (0 <= shrinkage <= 1). if str, optimal shrinkage using Ledoit-Wolf Shrinkage ('ledoit_wolf') or Oracle Approximating Shrinkage ('oas'). """ transformer = CSP(n_components=best_num_filters, reg='ledoit_wolf') transformer.fit(X_train, y_train) # use these CSP spatial filters to transform train and test spatial_filters_train = transformer.transform(X_train) spatial_filters_test = transformer.transform(X_test) # put this back in as failsafe if NaN or inf starts cropping up # spatial_filters_train = np.nan_to_num(spatial_filters_train) # check_X_y(spatial_filters_train, y_train) # spatial_filters_test = np.nan_to_num(spatial_filters_test) # check_X_y(spatial_filters_test, y_test) # train LDA classifier = LinearDiscriminantAnalysis() classifier.fit(spatial_filters_train, y_train) score = classifier.score(spatial_filters_test, y_test) #print "current score",score print "bandpass:"******"epoch window:", epoch_trim_start, epoch_trim_end #print best_num_filters,"filters chosen" # put in ranked order Top 10 list idx = bisect(self.ranked_scores, score) self.ranked_scores.insert(idx, score) self.ranked_scores_opts.insert( idx, dict(bandpass=bandpass, epoch_trim=epoch_trim, filters=best_num_filters)) self.ranked_classifiers.insert(idx, classifier) self.ranked_transformers.insert(idx, transformer) if len(self.ranked_scores) > self.num_votes: self.ranked_scores.pop(0) if len(self.ranked_scores_opts) > self.num_votes: self.ranked_scores_opts.pop(0) if len(self.ranked_classifiers) > self.num_votes: self.ranked_classifiers.pop(0) if len(self.ranked_transformers) > self.num_votes: self.ranked_transformers.pop(0) """ Covariance computation """ # compute covariance matrices cov_data_train = covariances(X=X_train) cov_data_test = covariances(X=X_test) clf_mdm = MDM(metric=dict(mean='riemann', distance='riemann')) clf_mdm.fit(cov_data_train, y_train) score_mdm = clf_mdm.score(cov_data_test, y_test) # print "MDM prediction score:",score_mdm # put in ranked order Top 10 list idx = bisect(self.ranked_scores_mdm, score_mdm) self.ranked_scores_mdm.insert(idx, score_mdm) self.ranked_scores_opts_mdm.insert( idx, dict(bandpass=bandpass, epoch_trim=epoch_trim, filters=best_num_filters)) self.ranked_classifiers_mdm.insert(idx, clf_mdm) if len(self.ranked_scores_mdm) > self.num_votes: self.ranked_scores_mdm.pop(0) if len(self.ranked_scores_opts_mdm) > self.num_votes: self.ranked_scores_opts_mdm.pop(0) if len(self.ranked_classifiers_mdm) > self.num_votes: self.ranked_classifiers_mdm.pop(0) clf_ts = TSclassifier() clf_ts.fit(cov_data_train, y_train) score_ts = clf_ts.score(cov_data_test, y_test) # put in ranked order Top 10 list idx = bisect(self.ranked_scores_ts, score_ts) self.ranked_scores_ts.insert(idx, score_ts) self.ranked_scores_opts_ts.insert( idx, dict(bandpass=bandpass, epoch_trim=epoch_trim, filters=best_num_filters)) self.ranked_classifiers_ts.insert(idx, clf_ts) if len(self.ranked_scores_ts) > self.num_votes: self.ranked_scores_ts.pop(0) if len(self.ranked_scores_opts_ts) > self.num_votes: self.ranked_scores_opts_ts.pop(0) if len(self.ranked_classifiers_ts) > self.num_votes: self.ranked_classifiers_ts.pop(0) print "CSP+LDA score:", score, "Tangent space w/LR score:", score_ts print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" print " T O P ", self.num_votes, " C L A S S I F I E R S" print #j=1 for i in xrange(len(self.ranked_scores)): print i, ",", round(self.ranked_scores[i], 4), ",", print self.ranked_scores_opts[i] print "-------------------------------------" for i in xrange(len(self.ranked_scores_ts)): print i, ",", round(self.ranked_scores_ts[i], 4), ",", print self.ranked_scores_opts_ts[i] print "-------------------------------------" for i in xrange(len(self.ranked_scores_mdm)): print i, ",", round(self.ranked_scores_mdm[i], 4), ",", print self.ranked_scores_opts_mdm[i] # finish up, set the flag to indicate "fitted" state self.fit_ = True # Return the classifier return self
def get_score(subject=7): ############################################################################### # Set parameters and read data # avoid classification of evoked responses by using epochs that start 1s after # cue onset. tmin, tmax = 1., 2. event_id = dict(hands=2, feet=3) runs = [6, 10, 14] # motor imagery: hands vs feet raw_files = [ read_raw_edf(f, preload=True) for f in eegbci.load_data(subject, runs) ] raw = concatenate_raws(raw_files) picks = pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads') # subsample elecs picks = picks[::2] # Apply band-pass filter raw.filter(7., 35., method='iir', picks=picks) events = find_events(raw, shortest_event=0, stim_channel='STI 014') # Read epochs (train will be done only between 1 and 2s) # Testing will be done with a running classifier epochs = Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks, baseline=None, preload=True, verbose=False) labels = epochs.events[:, -1] - 2 # cross validation cv = KFold(len(labels), 10, shuffle=True, random_state=42) # get epochs epochs_data_train = 1e6 * epochs.get_data() # compute covariance matrices cov_data_train = Covariances().transform(epochs_data_train) ############################################################################### # Classification with Minimum distance to mean mdm = MDM(metric=dict(mean='riemann', distance='riemann')) # Use scikit-learn Pipeline with cross_val_score function mdm.fit(cov_data_train, labels) print(123) ############################################################################### # Classification with Tangent Space Logistic Regression clf = TSclassifier() # Use scikit-learn Pipeline with cross_val_score function scores = cross_val_score(clf, cov_data_train, labels, cv=cv, n_jobs=1) # Printing the results class_balance = np.mean(labels == labels[0]) class_balance = max(class_balance, 1. - class_balance) ts_score = np.mean(scores) print("Tangent space Classification accuracy: %f / Chance level: %f" % (ts_score, class_balance)) ############################################################################### return [subject, mdm_score, ts_score]
def score_ensemble_rot(settings, subject_target, ntop): dataset = settings['dataset'] paradigm = settings['paradigm'] session = settings['session'] storage = settings['storage'] filepath = '../results/' + dataset + '/TL_intra-subject_scores.pkl' acc_intra_dict = joblib.load(filepath) scores = [] subject_sources = [] for subject in settings['subject_list']: if subject == subject_target: continue else: scores.append(acc_intra_dict[subject]) subject_sources.append(subject) scores = np.array(scores) subject_sources = np.array(subject_sources) idx_sort = scores.argsort()[::-1] scores = scores[idx_sort] subject_sources = subject_sources[idx_sort] subject_sources_ntop = subject_sources[:ntop] # get the geometric means for each subject (each class and also the center) filename = '../results/' + dataset + '/subject_means.pkl' subj_means = joblib.load(filename) # get the data for the target subject target_org = GD.get_dataset(dataset, subject_target, session, storage) if paradigm == 'MI': # things here are only implemented for MI for now target_org['covs'] = Covariances(estimator='oas').fit_transform( target_org['signals']) target_org['labels'] = target_org['labels'] ncovs = settings['ncovs_list'][0] nrzt = 10 score_rzt = 0.0 for rzt in range(nrzt): # split randomly the target dataset target_org_train, target_org_test = get_target_split_motorimagery( target_org, ncovs) covs_train_target = target_org_train['covs'] labs_train_target = target_org_train['labels'] MC_target = mean_riemann(covs_train_target) M1_target = mean_riemann( covs_train_target[labs_train_target == 'left_hand']) M2_target = mean_riemann( covs_train_target[labs_train_target == 'right_hand']) M1_target_rct = np.dot(invsqrtm(MC_target), np.dot(M1_target, invsqrtm(MC_target))) M2_target_rct = np.dot(invsqrtm(MC_target), np.dot(M2_target, invsqrtm(MC_target))) covs_train_target = np.stack([M1_target_rct, M2_target_rct]) labs_train_target = np.array(['left_hand', 'right_hand']) clf = [] for subj_source in subject_sources_ntop: MC_source = subj_means[subj_source]['center'] M1_source = subj_means[subj_source]['left_hand'] M2_source = subj_means[subj_source]['right_hand'] M1_source_rct = np.dot(invsqrtm(MC_source), np.dot(M1_source, invsqrtm(MC_source))) M2_source_rct = np.dot(invsqrtm(MC_source), np.dot(M2_source, invsqrtm(MC_source))) M = [M1_target_rct, M2_target_rct] Mtilde = [M1_source_rct, M2_source_rct] R = manifoptim.get_rotation_matrix(M, Mtilde) M1_source_rot = np.dot(R, np.dot(M1_source_rct, R.T)) M2_source_rot = np.dot(R, np.dot(M2_source_rct, R.T)) covs_train_source = np.stack([M1_source_rot, M2_source_rot]) labs_train_source = np.array(['left_hand', 'right_hand']) covs_train = np.concatenate([covs_train_source, covs_train_target]) labs_train = np.concatenate([labs_train_source, labs_train_target]) clfi = MDM() # problems here when using integer instead of floats on the sample_weight clfi.fit(covs_train, labs_train, sample_weight=np.array( [200.0, 200.0, 2.0 * ncovs, 2.0 * ncovs])) clf.append(clfi) covs_test = target_org_test['covs'] labs_test = target_org_test['labels'] ypred = [] for clfi in clf: yi = clfi.predict(covs_test) ypred.append(yi) ypred = np.array(ypred) majorvoting = [] for j in range(ypred.shape[1]): ypredj = ypred[:, j] values_unique, values_count = np.unique(ypredj, return_counts=True) majorvoting.append(values_unique[np.argmax(values_count)]) majorvoting = np.array(majorvoting) score_rzt = score_rzt + np.mean(majorvoting == labs_test) score = score_rzt / nrzt return score
def get_score(subject=7, runs=[6, 10, 14], event_id=dict(hands=2, feet=3)): tmin, tmax = -1., 4. # learn all suject exclude target subject. ############################# first_sub = 2 if subject == 1 else 1 raw = get_raw(first_sub, runs) for i in range(first_sub + 1, 3): if i != subject and not (i in [88, 89, 92, 100]): # print(i) raw.append(get_raw(i, runs)) raw.append(get_raw(subject, runs)) events = find_events(raw, shortest_event=0, stim_channel='STI 014') epochs = Epochs(raw, events, event_id, tmin, tmax, proj=True, baseline=None, preload=True, verbose=False) labels = epochs.events[:, -1] epochs_data_train = 1e6 * epochs.get_data()[:, :-1] cov_data_train = Covariances().transform(epochs_data_train) weights = np.arange(0.1, 1.0, 0.1) scores = [] for weight in weights: mdm = MDM(metric=dict(mean='riemann', distance='riemann')) others_sample_weight_base = np.ones(len(epochs) - EPOCH_COUNT) * (1. - weight) target_sample_weight_base = np.ones(EPOCH_COUNT) * weight sample_weight = np.hstack( (others_sample_weight_base, target_sample_weight_base)) others_size = others_sample_weight_base.size others_index = np.arange(others_size) cv = KFold(n_splits=5, shuffle=True, random_state=42) train_scores = [] test_scores = [] dumy_array = np.ones(EPOCH_COUNT) for train_index, test_index in cv.split(dumy_array): train_index = np.hstack((others_index, train_index + others_size)) x = cov_data_train[train_index] y = labels[train_index] mdm.fit(x, y, sample_weight=sample_weight[train_index]) score = (mdm.predict(x) == y).sum() / len(train_index) train_scores.append(score) test_index = test_index + others_size y = mdm.predict(cov_data_train[test_index]) score = (y == labels[test_index]).sum() / len(test_index) test_scores.append(score) train_score = np.mean(train_scores) test_score = np.mean(test_scores) scores.append([subject, weight, train_score, test_score]) # print("train:%s test:%s" % (train_score, test_score)) return scores
class ERPDistance(BaseEstimator, TransformerMixin): """ERP distance cov estimator. This transformer estimates Riemannian distance for ERP covariance matrices. After estimation of special form ERP covariance matrices using the ERP transformer, a MDM [1] algorithm is used to compute Riemannian distance. References: [1] A. Barachant, S. Bonnet, M. Congedo and C. Jutten, "Multiclass Brain-Computer Interface Classification by Riemannian Geometry," in IEEE Transactions on Biomedical Engineering, vol. 59, no. 4, p. 920-928, 2012 """ def __init__(self, window=500, nfilters=3, subsample=1, metric='riemann', n_jobs=1): """Init.""" self.window = window self.nfilters = nfilters self.subsample = subsample self.metric = metric self.n_jobs = n_jobs self._fitted = False def fit(self, X, y): """fit.""" # Create ERP and get cov mat self.ERP = ERP(self.window, self.nfilters, self.subsample) train_cov = self.ERP.fit_transform(X, y) labels_train = self.ERP.labels_train # Add rest epochs rest_cov = self._get_rest_cov(X, y) train_cov = np.concatenate((train_cov, rest_cov), axis=0) labels_train = np.concatenate((labels_train, [0] * len(rest_cov))) # fit MDM self.MDM = MDM(metric=self.metric, n_jobs=self.n_jobs) self.MDM.fit(train_cov, labels_train) self._fitted = True return self def transform(self, X, y=None): """Transform.""" test_cov = self.ERP.transform(X) dist = self.MDM.transform(test_cov) dist = dist[:, 1:] - np.atleast_2d(dist[:, 0]).T return dist def update_subsample(self, old_sub, new_sub): """update subsampling.""" if self._fitted: self.ERP.update_subsample(old_sub, new_sub) def _get_rest_cov(self, X, y): """Sample rest epochs from data and compute the cov mat.""" ix = np.where(np.diff(y[:, 0]) == 1)[0] rest = [] offset = -self.window for i in ix: start = i + offset - self.window stop = i + offset rest.append(self.ERP.erp_cov(X[slice(start, stop)].T)) return np.array(rest)
blocks = np.arange(1, 12+1) for train_idx, test_idx in kf.split(np.arange(12)): # split in training and testing blocks X_training, labels_training, _ = get_block_repetition(X, labels, meta, blocks[train_idx], repetitions) X_test, labels_test, _ = get_block_repetition(X, labels, meta, blocks[test_idx], repetitions) # estimate the extended ERP covariance matrices with Xdawn dict_labels = {'Target':1, 'NonTarget':0} erpc = ERPCovariances(classes=[dict_labels['Target']], estimator='lwf') erpc.fit(X_training, labels_training) covs_training = erpc.transform(X_training) covs_test = erpc.transform(X_test) # get the AUC for the classification clf = MDM() clf.fit(covs_training, labels_training) labels_pred = clf.predict(covs_test) auc.append(roc_auc_score(labels_test, labels_pred)) # stock scores scores_subject.append(np.mean(auc)) scores.append(scores_subject) # print results df[tmax] = pd.DataFrame(scores, columns=['subject', 'VR', 'PC']) filename = './results.pkl' joblib.dump(df, filename)
def score_pooling_rct(settings, subject_target, ntop): dataset = settings['dataset'] paradigm = settings['paradigm'] session = settings['session'] storage = settings['storage'] filepath = '../results/' + dataset + '/TL_intra-subject_scores.pkl' acc_intra_dict = joblib.load(filepath) scores = [] subject_sources = [] for subject in settings['subject_list']: if subject == subject_target: continue else: scores.append(acc_intra_dict[subject]) subject_sources.append(subject) scores = np.array(scores) subject_sources = np.array(subject_sources) idx_sort = scores.argsort()[::-1] scores = scores[idx_sort] subject_sources = subject_sources[idx_sort] subject_sources_ntop = subject_sources[:ntop] # get the geometric means for each subject (each class and also the center) filename = '../results/' + dataset + '/subject_means.pkl' subj_means = joblib.load(filename) # get the data for the target subject target_org = GD.get_dataset(dataset, subject_target, session, storage) if paradigm == 'MI': # things here are only implemented for MI for now target_org['covs'] = Covariances(estimator='oas').fit_transform( target_org['signals']) target_org['labels'] = target_org['labels'] ncovs = settings['ncovs_list'][0] score_rzt = 0.0 nrzt = 10 for rzt in range(nrzt): # split randomly the target dataset target_org_train, target_org_test = get_target_split_motorimagery( target_org, ncovs) # get the data from the sources and pool it all together class_mean_1 = [] class_mean_2 = [] for subj_source in subject_sources_ntop: MC_source = subj_means[subj_source]['center'] M1_source = subj_means[subj_source]['left_hand'] M2_source = subj_means[subj_source]['right_hand'] M1_source_rct = np.dot(invsqrtm(MC_source), np.dot(M1_source, invsqrtm(MC_source))) class_mean_1.append(M1_source_rct) M2_source_rct = np.dot(invsqrtm(MC_source), np.dot(M2_source, invsqrtm(MC_source))) class_mean_2.append(M2_source_rct) class_mean_1_source = np.stack(class_mean_1) class_mean_2_source = np.stack(class_mean_2) covs_train_source = np.concatenate( [class_mean_1_source, class_mean_2_source]) labs_train_source = np.concatenate([ len(class_mean_1_source) * ['left_hand'], len(class_mean_2_source) * ['right_hand'] ]) # re-center data for the target covs_train_target = target_org['covs'] MC_target = mean_riemann(covs_train_target) labs_train_target = target_org['labels'] class_mean_1_target = mean_riemann( covs_train_target[labs_train_target == 'left_hand']) class_mean_1_target = np.dot( invsqrtm(MC_target), np.dot(class_mean_1_target, invsqrtm(MC_target))) class_mean_2_target = mean_riemann( covs_train_target[labs_train_target == 'right_hand']) class_mean_2_target = np.dot( invsqrtm(MC_target), np.dot(class_mean_2_target, invsqrtm(MC_target))) covs_train_target = np.stack( [class_mean_1_target, class_mean_2_target]) labs_train_target = np.array(['left_hand', 'right_hand']) covs_train = np.concatenate([covs_train_source, covs_train_target]) labs_train = np.concatenate([labs_train_source, labs_train_target]) covs_test = target_org_test['covs'] labs_test = target_org_test['labels'] # do the classification clf = MDM() clf.fit(covs_train, labs_train) score_rzt = score_rzt + clf.score(covs_test, labs_test) score = score_rzt / nrzt return score
off_events, event_id, tmin=2, tmax=5, baseline=None) # Get Epochs data (signal) off_epochs_data = off_epochs.get_data() epochs_data = copy.deepcopy(off_epochs_data) # Get Labels labels = off_epochs.events[:, -1] labels_base = copy.deepcopy(labels) # Covariance Matrix transorm off_cov_matrix = Covariances(estimator='lwf').transform(off_epochs_data) # MDM model init and fit mdm = MDM(metric=dict(mean='riemann', distance='riemann')) mdm.fit(off_cov_matrix, labels) # End of offline training # EEG stream on the lab network print("looking for an EEG stream...") streams = resolve_stream('name', 'openvibeSignal') # Create a new inlet to read from the stream inlet = StreamInlet(streams[0]) kmeans = Kmeans(n_clusters=4) time_window = timeWindowInit(inlet) time_window_base = copy.deepcopy(time_window) timeBase = time.time()
ext_signal = _bandpass_filter(raw, frequencies, frequency_range) ############################################################################### raw_ext = createNewRaw(ext_signal, raw) ############################################################################### event_id = {'13 Hz': 2, '17 Hz': 4, '21 Hz': 3, 'resting-state': 1} epochs = Epochs(raw_ext, events, event_id, tmin=2, tmax=5, baseline=None) t1 = time.time() cov_ext_trials = Covariances(estimator='lwf').transform(epochs.get_data()) ############################################################################### # Get labels labels = epochs.events[:, -1] mdm = MDM(metric=dict(mean='riemann', distance='riemann')) mdm.fit(cov_ext_trials, labels) t2 = time.time() prediction_labeled = mdm.predict(cov_ext_trials) print("predict time = " + str(t2 - t1)) print(labels) print(prediction_labeled)
clf.fit(epochs_data[train_idx], y_train) pr[test_idx] = clf.predict(epochs_data[test_idx]) print classification_report(labels, pr) print confusion_matrix(labels, pr) # spatial patterns xd = XdawnCovariances(n_components) Cov = xd.fit_transform(epochs_data, labels) evoked.data = xd.Xd._patterns.T evoked.times = np.arange(evoked.data.shape[0]) evoked.plot_topomap( times=[0, n_components, 2 * n_components, 3 * n_components], ch_type='grad', colorbar=False, size=1.5) # prototyped covariance matrices mdm = MDM() mdm.fit(Cov, labels) fig, axe = plt.subplots(1, 4) axe[0].matshow(mdm.covmeans[0]) axe[0].set_title('Class 1 covariance matrix') axe[1].matshow(mdm.covmeans[1]) axe[1].set_title('Class 2 covariance matrix') axe[2].matshow(mdm.covmeans[2]) axe[2].set_title('Class 3 covariance matrix') axe[3].matshow(mdm.covmeans[3]) axe[3].set_title('Class 4 covariance matrix') plt.show()
clf = Pipeline([('CSP', csp), ('LDA', lda)]) scores = cross_val_score(clf, epochs_data_train, labels, cv=cv, n_jobs=1) # Printing the results class_balance = np.mean(labels == labels[0]) class_balance = max(class_balance, 1. - class_balance) print("CSP + LDA Classification accuracy: %f / Chance level: %f" % (np.mean(scores), class_balance)) ############################################################################### # Display MDM centroid mdm = MDM() mdm.fit(cov_data_train, labels) fig, axes = plt.subplots(1, 2, figsize=[8, 4]) ch_names = [ch.replace('.', '') for ch in epochs.ch_names] df = pd.DataFrame(data=mdm.covmeans[0], index=ch_names, columns=ch_names) g = sns.heatmap(df, ax=axes[0], square=True, cbar=False, xticklabels=2, yticklabels=2) g.set_title('Mean covariance - hands') df = pd.DataFrame(data=mdm.covmeans[1], index=ch_names, columns=ch_names) g = sns.heatmap(df, ax=axes[1], square=True, cbar=False, xticklabels=2, yticklabels=2) plt.xticks(rotation='vertical') plt.yticks(rotation='horizontal') g.set_title('Mean covariance - feets')
def get_score(subject=7, runs=[6, 10, 14], event_id=dict(hands=2, feet=3)): if subject in EXCLUDE_SUBJECTS: return tmin, tmax = -1., 4. weights = np.arange(0.1, 1.0, 0.1) for weight in weights: first_sub = 2 if subject == 1 else 1 raw = get_raw(subject, runs) scores = [] for i in range(first_sub, TRANS_SUBJECT_COUNT): print(i) if i == subject or (i in EXCLUDE_SUBJECTS): continue raw.append(get_raw(i, runs)) events = find_events(raw, shortest_event=0, stim_channel='STI 014') epochs = Epochs(raw, events, event_id, tmin, tmax, proj=True, baseline=None, preload=True, verbose=False) labels = epochs.events[:, -1] epochs_data_train = 1e6*epochs.get_data()[:, :-1] cov_data_train = Covariances().transform(epochs_data_train) target_sample_weight_base = np.ones(EPOCH_COUNT)*weight others_sample_weight_base = np.ones( len(epochs)-EPOCH_COUNT)*(1.-weight) sample_weight = np.hstack( (target_sample_weight_base, others_sample_weight_base)) others_size = others_sample_weight_base.size others_index = np.arange(others_size) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) cv = KFold(n_splits=5, shuffle=True, random_state=42) train_scores = [] test_scores = [] dumy_array = np.ones(EPOCH_COUNT) for train_index, test_index in cv.split(dumy_array): train_index = np.hstack( (others_index, train_index+others_size)) x = cov_data_train[train_index] y = labels[train_index] mdm.fit(x, y, sample_weight=sample_weight[train_index]) score = (mdm.predict(x) == y).sum()/len(train_index) train_scores.append(score) test_index = test_index + others_size y = mdm.predict(cov_data_train[test_index]) score = (y == labels[test_index]).sum()/len(test_index) test_scores.append(score) train_score = np.mean(train_scores) test_score = np.mean(test_scores) scores.append([subject, i, train_score, test_score]) df = pd.DataFrame( scores, columns=["subject", "transfer_count", "train_score", "test_score"]) df.to_excel("data/riemann/gradually/test_subject_%d_weight_%e.xlsx" % (subject, weight), index=False)
for train_idx, test_idx in cv: y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) scores.append(clf.score(epochs_data[test_idx], y_test)) # Printing the results class_balance = np.mean(labels == labels[0]) class_balance = max(class_balance, 1. - class_balance) print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores), class_balance)) # spatial patterns xd = XdawnCovariances(n_components) Cov = xd.fit_transform(epochs_data,labels) evoked.data = xd.Xd._patterns.T evoked.times = np.arange(evoked.data.shape[0]) evoked.plot_topomap(times=[0, 1, n_components, n_components+1], ch_type='grad', colorbar=False, size=1.5) # prototyped covariance matrices mdm = MDM() mdm.fit(Cov,labels) fig,axe = plt.subplots(1,2) axe[0].matshow(mdm.covmeans[0]) axe[0].set_title('Class 1 covariance matrix') axe[1].matshow(mdm.covmeans[1]) axe[1].set_title('Class 2 covariance matrix') plt.show()
frequency_range) offline_raw = createRaw(filtered_offline_signal, offline_raw, filtered=True) offline_epochs = Epochs(offline_raw, offline_events, event_id, tmin=0, tmax=5, baseline=None) offline_epochs_data = offline_epochs.get_data() # Creating ML model offline_cov_matrix = Covariances( estimator='lwf').transform(offline_epochs_data) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) mdm.fit(offline_cov_matrix, labels) # Evoking trials to simulate online input iter_evoked = epochs.iter_evoked() epochs_data = offline_epochs_data time_array = [] pre_predict = mdm.predict(offline_cov_matrix) print("Labels: ") print(labels) for i, evoked in enumerate(iter_evoked): evoked_raw = createRaw(evoked.data, raw, filtered=False) ## Start Time Counting
class ERPDistance(BaseEstimator, TransformerMixin): """ERP distance cov estimator. This transformer estimates Riemannian distance for ERP covariance matrices. After estimation of special form ERP covariance matrices using the ERP transformer, a MDM [1] algorithm is used to compute Riemannian distance. References: [1] A. Barachant, S. Bonnet, M. Congedo and C. Jutten, "Multiclass Brain-Computer Interface Classification by Riemannian Geometry," in IEEE Transactions on Biomedical Engineering, vol. 59, no. 4, p. 920-928, 2012 """ def __init__(self, window=500, nfilters=3, subsample=1, metric='riemann', n_jobs=1): """Init.""" self.window = window self.nfilters = nfilters self.subsample = subsample self.metric = metric self.n_jobs = n_jobs self._fitted = False def fit(self, X, y): """fit.""" # Create ERP and get cov mat self.ERP = ERP(self.window, self.nfilters, self.subsample) train_cov = self.ERP.fit_transform(X, y) labels_train = self.ERP.labels_train # Add rest epochs rest_cov = self._get_rest_cov(X, y) train_cov = np.concatenate((train_cov, rest_cov), axis=0) labels_train = np.concatenate((labels_train, [0] * len(rest_cov))) # fit MDM self.MDM = MDM(metric=self.metric, n_jobs=self.n_jobs) self.MDM.fit(train_cov, labels_train) self._fitted = True return self def transform(self, X, y=None): """Transform.""" test_cov = self.ERP.transform(X) dist = self.MDM.transform(test_cov) dist = dist[:, 1:] - np.atleast_2d(dist[:, 0]).T return dist def update_subsample(self, old_sub, new_sub): """update subsampling.""" if self._fitted: self.ERP.update_subsample(old_sub, new_sub) def _get_rest_cov(self, X, y): """Sample rest epochs from data and compute the cov mat.""" ix = np.where(np.diff(y[:, 0]) == 1)[0] rest = [] offset = - self.window for i in ix: start = i + offset - self.window stop = i + offset rest.append(self.ERP.erp_cov(X[slice(start, stop)].T)) return np.array(rest)
i = 1 for train_index, test_index in kf.split(X_train): logging.info(f'Doing fold {i}') clf_knn = KNearestNeighbor(n_neighbors, metric, n_jobs) clf_mdm = MDM(metric, n_jobs) X_train_fold, X_test_fold = X_train[train_index], X_train[test_index] y_train_fold, y_test_fold = y_train[train_index], y_train[test_index] clf_knn.fit(X_train_fold, y_train_fold) y_predicted = clf_knn.predict(X_test_fold) accuracy = (y_test_fold == y_predicted).sum() / len(y_test_fold) clf_knn_k_fold.append(clf_knn) accuracy_list_training_knn.append(accuracy) clf_mdm.fit(X_train_fold, y_train_fold) y_predicted = clf_mdm.predict(X_test_fold) accuracy = (y_test_fold == y_predicted).sum() / len(y_test_fold) clf_mdm_k_fold.append(clf_mdm) accuracy_list_training_mdm.append(accuracy) i += 1 # Testing on test dataset logging.info('Doing testing') accuracy_list_testing_knn = [] accuracy_list_testing_mdm = [] X_test, y_test = shuffle(X_test, y_test, random_state=args.seed) for clf_knn in clf_knn_k_fold: y_predicted = clf_knn.predict(X_test)
filtered_off_signal = _bandpass_filter(raw_off, frequencies, frequency_range) raw_off = createOfflineRaw(filtered_off_signal, raw_off) epochs_off = Epochs(raw_off, events_off, event_id, tmin=0, tmax=5, baseline=None) epochs_data = epochs_off.get_data() covariance_off = Covariances(estimator='lwf').transform(epochs_data) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) mdm.fit(covariance_off, labels) # Epochs iter_evoked = epochs.iter_evoked() for evoked_number, evoked in enumerate(iter_evoked): evoked_raw = createOnlineRaw(evoked.data, raw) time_1 = time.time() evoked_filtered_signal = _bandpass_filter(evoked_raw, frequencies, frequency_range) evoked_filtered_signal = np.array(evoked_filtered_signal) evoked_filtered_signal = np.expand_dims(evoked_filtered_signal, axis=0)
kf = KFold(n_splits=n_splits) for train_index, test_index in tqdm(kf.split(covs), total=n_splits): # split into training and testing datasets covs_train = covs[train_index] labs_train = labels[train_index] covs_test = covs[test_index] labs_test = labels[test_index] # reduce the dimensions with ['covpca', 'gpcaRiemann'] for meth in ['covpca', 'gpcaRiemann']: trf = DR.RDR(n_components=pred, method=meth) trf.fit(covs_train) covs_train_red = trf.transform(covs_train) covs_test_red = trf.transform(covs_test) clf.fit(covs_train_red, labs_train) scores[meth].append(clf.score(covs_test_red, labs_test)) # reduce the dimensions with [SELg, SELb] for meth, sel in zip(['SELg', 'SELb'], [SELg, SELb]): covs_train_red = covs_train[:, sel, :][:, :, sel] covs_test_red = covs_test[:, sel, :][:, :, sel] clf.fit(covs_train_red, labs_train) scores[meth].append(clf.score(covs_test_red, labs_test)) print('subject ', subject) # print the scores for meth in scores.keys(): print(meth, np.mean(scores[meth])) print('')
CSP_svm_record = [] for fold in range(1, 6): train = cov_data_bad[index[bad_subject_index] != fold] train_CSP = epochs_data_train_bad[index[bad_subject_index] != fold] train_label = labels_bad[index[bad_subject_index] != fold] test = cov_data_bad[index[bad_subject_index] == fold] test_CSP = epochs_data_train_bad[index[bad_subject_index] == fold] test_label = labels_bad[index[bad_subject_index] == fold] box_length = np.sum([index[bad_subject_index] == fold]) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) mdm.fit(train, train_label) pred = mdm.predict(test) print('MDM: {:4f}'.format(np.sum(pred == test_label) / box_length)) MDM_record.append(np.sum(pred == test_label) / box_length) print('-----------------------------------------') Fgmdm = FgMDM(metric=dict(mean='riemann', distance='riemann')) Fgmdm.fit(train, train_label) pred = Fgmdm.predict(test) print('FGMDM: {:4f}'.format( np.sum(pred == test_label) / box_length)) FGMDM_record.append(np.sum(pred == test_label) / box_length) print('-----------------------------------------')