def fit(self, X, y): # validate X, y = check_X_y(X, y, allow_nd=True) X = check_array(X, allow_nd=True) # set internal vars self.classes_ = unique_labels(y) self.X_ = X self.y_ = y ################################################## # split X into train and test sets, so that # grid search can be performed on train set only seed = 7 np.random.seed(seed) #X_TRAIN, X_TEST, y_TRAIN, y_TEST = train_test_split(X, y, test_size=0.25, random_state=seed) for epoch_trim in self.epoch_bounds: for bandpass in self.bandpass_filters: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25, random_state=seed) # X_train = np.copy(X_TRAIN) # X_test = np.copy(X_TEST) # y_train = np.copy(y_TRAIN) # y_test = np.copy(y_TEST) # separate out inputs that are tuples bandpass_start, bandpass_end = bandpass epoch_trim_start, epoch_trim_end = epoch_trim # bandpass filter coefficients b, a = butter( 5, np.array([bandpass_start, bandpass_end]) / (self.sfreq * 0.5), 'bandpass') # filter and crop TRAINING SET X_train = self.preprocess_X(X_train, b, a, epoch_trim_start, epoch_trim_end) # validate X_train, y_train = check_X_y(X_train, y_train, allow_nd=True) X_train = check_array(X_train, allow_nd=True) # filter and crop TEST SET X_test = self.preprocess_X(X_test, b, a, epoch_trim_start, epoch_trim_end) # validate X_test, y_test = check_X_y(X_test, y_test, allow_nd=True) X_test = check_array(X_test, allow_nd=True) ########################################################################### # self-tune CSP to find optimal number of filters to use at these settings #[best_num_filters, best_num_filters_score] = self.self_tune(X_train, y_train) best_num_filters = 5 # as an option, we could tune optimal CSP filter num against complete train set #X_tune = self.preprocess_X(X, b, a, epoch_trim_start, epoch_trim_end) #[best_num_filters, best_num_filters_score] = self.self_tune(X_tune, y) # now use this insight to really fit with optimal CSP spatial filters """ reg : float | str | None (default None) if not None, allow regularization for covariance estimation if float, shrinkage covariance is used (0 <= shrinkage <= 1). if str, optimal shrinkage using Ledoit-Wolf Shrinkage ('ledoit_wolf') or Oracle Approximating Shrinkage ('oas'). """ transformer = CSP(n_components=best_num_filters, reg='ledoit_wolf') transformer.fit(X_train, y_train) # use these CSP spatial filters to transform train and test spatial_filters_train = transformer.transform(X_train) spatial_filters_test = transformer.transform(X_test) # put this back in as failsafe if NaN or inf starts cropping up # spatial_filters_train = np.nan_to_num(spatial_filters_train) # check_X_y(spatial_filters_train, y_train) # spatial_filters_test = np.nan_to_num(spatial_filters_test) # check_X_y(spatial_filters_test, y_test) # train LDA classifier = LinearDiscriminantAnalysis() classifier.fit(spatial_filters_train, y_train) score = classifier.score(spatial_filters_test, y_test) #print "current score",score print "bandpass:"******"epoch window:", epoch_trim_start, epoch_trim_end #print best_num_filters,"filters chosen" # put in ranked order Top 10 list idx = bisect(self.ranked_scores, score) self.ranked_scores.insert(idx, score) self.ranked_scores_opts.insert( idx, dict(bandpass=bandpass, epoch_trim=epoch_trim, filters=best_num_filters)) self.ranked_classifiers.insert(idx, classifier) self.ranked_transformers.insert(idx, transformer) if len(self.ranked_scores) > self.num_votes: self.ranked_scores.pop(0) if len(self.ranked_scores_opts) > self.num_votes: self.ranked_scores_opts.pop(0) if len(self.ranked_classifiers) > self.num_votes: self.ranked_classifiers.pop(0) if len(self.ranked_transformers) > self.num_votes: self.ranked_transformers.pop(0) """ Covariance computation """ # compute covariance matrices cov_data_train = covariances(X=X_train) cov_data_test = covariances(X=X_test) clf_mdm = MDM(metric=dict(mean='riemann', distance='riemann')) clf_mdm.fit(cov_data_train, y_train) score_mdm = clf_mdm.score(cov_data_test, y_test) # print "MDM prediction score:",score_mdm # put in ranked order Top 10 list idx = bisect(self.ranked_scores_mdm, score_mdm) self.ranked_scores_mdm.insert(idx, score_mdm) self.ranked_scores_opts_mdm.insert( idx, dict(bandpass=bandpass, epoch_trim=epoch_trim, filters=best_num_filters)) self.ranked_classifiers_mdm.insert(idx, clf_mdm) if len(self.ranked_scores_mdm) > self.num_votes: self.ranked_scores_mdm.pop(0) if len(self.ranked_scores_opts_mdm) > self.num_votes: self.ranked_scores_opts_mdm.pop(0) if len(self.ranked_classifiers_mdm) > self.num_votes: self.ranked_classifiers_mdm.pop(0) clf_ts = TSclassifier() clf_ts.fit(cov_data_train, y_train) score_ts = clf_ts.score(cov_data_test, y_test) # put in ranked order Top 10 list idx = bisect(self.ranked_scores_ts, score_ts) self.ranked_scores_ts.insert(idx, score_ts) self.ranked_scores_opts_ts.insert( idx, dict(bandpass=bandpass, epoch_trim=epoch_trim, filters=best_num_filters)) self.ranked_classifiers_ts.insert(idx, clf_ts) if len(self.ranked_scores_ts) > self.num_votes: self.ranked_scores_ts.pop(0) if len(self.ranked_scores_opts_ts) > self.num_votes: self.ranked_scores_opts_ts.pop(0) if len(self.ranked_classifiers_ts) > self.num_votes: self.ranked_classifiers_ts.pop(0) print "CSP+LDA score:", score, "Tangent space w/LR score:", score_ts print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" print " T O P ", self.num_votes, " C L A S S I F I E R S" print #j=1 for i in xrange(len(self.ranked_scores)): print i, ",", round(self.ranked_scores[i], 4), ",", print self.ranked_scores_opts[i] print "-------------------------------------" for i in xrange(len(self.ranked_scores_ts)): print i, ",", round(self.ranked_scores_ts[i], 4), ",", print self.ranked_scores_opts_ts[i] print "-------------------------------------" for i in xrange(len(self.ranked_scores_mdm)): print i, ",", round(self.ranked_scores_mdm[i], 4), ",", print self.ranked_scores_opts_mdm[i] # finish up, set the flag to indicate "fitted" state self.fit_ = True # Return the classifier return self
kf = KFold(n_splits=n_splits) for train_index, test_index in tqdm(kf.split(covs), total=n_splits): # split into training and testing datasets covs_train = covs[train_index] labs_train = labels[train_index] covs_test = covs[test_index] labs_test = labels[test_index] # reduce the dimensions with ['covpca', 'gpcaRiemann'] for meth in ['covpca', 'gpcaRiemann']: trf = DR.RDR(n_components=pred, method=meth) trf.fit(covs_train) covs_train_red = trf.transform(covs_train) covs_test_red = trf.transform(covs_test) clf.fit(covs_train_red, labs_train) scores[meth].append(clf.score(covs_test_red, labs_test)) # reduce the dimensions with [SELg, SELb] for meth, sel in zip(['SELg', 'SELb'], [SELg, SELb]): covs_train_red = covs_train[:, sel, :][:, :, sel] covs_test_red = covs_test[:, sel, :][:, :, sel] clf.fit(covs_train_red, labs_train) scores[meth].append(clf.score(covs_test_red, labs_test)) print('subject ', subject) # print the scores for meth in scores.keys(): print(meth, np.mean(scores[meth])) print('')
def score_pooling_rct(settings, subject_target, ntop): dataset = settings['dataset'] paradigm = settings['paradigm'] session = settings['session'] storage = settings['storage'] filepath = '../results/' + dataset + '/TL_intra-subject_scores.pkl' acc_intra_dict = joblib.load(filepath) scores = [] subject_sources = [] for subject in settings['subject_list']: if subject == subject_target: continue else: scores.append(acc_intra_dict[subject]) subject_sources.append(subject) scores = np.array(scores) subject_sources = np.array(subject_sources) idx_sort = scores.argsort()[::-1] scores = scores[idx_sort] subject_sources = subject_sources[idx_sort] subject_sources_ntop = subject_sources[:ntop] # get the geometric means for each subject (each class and also the center) filename = '../results/' + dataset + '/subject_means.pkl' subj_means = joblib.load(filename) # get the data for the target subject target_org = GD.get_dataset(dataset, subject_target, session, storage) if paradigm == 'MI': # things here are only implemented for MI for now target_org['covs'] = Covariances(estimator='oas').fit_transform( target_org['signals']) target_org['labels'] = target_org['labels'] ncovs = settings['ncovs_list'][0] score_rzt = 0.0 nrzt = 10 for rzt in range(nrzt): # split randomly the target dataset target_org_train, target_org_test = get_target_split_motorimagery( target_org, ncovs) # get the data from the sources and pool it all together class_mean_1 = [] class_mean_2 = [] for subj_source in subject_sources_ntop: MC_source = subj_means[subj_source]['center'] M1_source = subj_means[subj_source]['left_hand'] M2_source = subj_means[subj_source]['right_hand'] M1_source_rct = np.dot(invsqrtm(MC_source), np.dot(M1_source, invsqrtm(MC_source))) class_mean_1.append(M1_source_rct) M2_source_rct = np.dot(invsqrtm(MC_source), np.dot(M2_source, invsqrtm(MC_source))) class_mean_2.append(M2_source_rct) class_mean_1_source = np.stack(class_mean_1) class_mean_2_source = np.stack(class_mean_2) covs_train_source = np.concatenate( [class_mean_1_source, class_mean_2_source]) labs_train_source = np.concatenate([ len(class_mean_1_source) * ['left_hand'], len(class_mean_2_source) * ['right_hand'] ]) # re-center data for the target covs_train_target = target_org['covs'] MC_target = mean_riemann(covs_train_target) labs_train_target = target_org['labels'] class_mean_1_target = mean_riemann( covs_train_target[labs_train_target == 'left_hand']) class_mean_1_target = np.dot( invsqrtm(MC_target), np.dot(class_mean_1_target, invsqrtm(MC_target))) class_mean_2_target = mean_riemann( covs_train_target[labs_train_target == 'right_hand']) class_mean_2_target = np.dot( invsqrtm(MC_target), np.dot(class_mean_2_target, invsqrtm(MC_target))) covs_train_target = np.stack( [class_mean_1_target, class_mean_2_target]) labs_train_target = np.array(['left_hand', 'right_hand']) covs_train = np.concatenate([covs_train_source, covs_train_target]) labs_train = np.concatenate([labs_train_source, labs_train_target]) covs_test = target_org_test['covs'] labs_test = target_org_test['labels'] # do the classification clf = MDM() clf.fit(covs_train, labs_train) score_rzt = score_rzt + clf.score(covs_test, labs_test) score = score_rzt / nrzt return score