def main(): save_dir = "Results/ensembles/stacking/per_sample/15hz" all_x_train = pickle.load( open( "DataTransformed/wavelet_complex/15hz/pca_80/x_train_all_samples.pkl", "rb")) all_freq_pipelines = get_freq_pipelines(15) all_results = np.zeros((21, 50)) for sample in range(21): print("sample {}".format(sample)) all_y_train = get_y_train(sample + 1) sample_x_train = np.array(all_x_train[sample]) time_results = np.zeros(50) for time in range(50): intervals = np.arange(start=time, stop=all_y_train.shape[0], step=50) y_train = all_y_train[intervals] x_train = sample_x_train[:, intervals] x_train = x_train.transpose(1, 0, 2).reshape(x_train.shape[1], -1) model = StackingClassifier(estimators=all_freq_pipelines, final_estimator=LogisticRegression(), cv=5, stack_method='predict_proba') scores = cross_val_score(model, x_train, y_train, cv=5) print("Time {} accuracy: %0.2f (+/- %0.2f)".format(time) % (scores.mean(), scores.std() * 2)) time_results[time] = scores.mean() sns.set() ax = sns.lineplot(data=time_results, dashes=False) ax.set( ylim=(0, 1), xlabel='Timepoints', ylabel='Accuracy', title='Cross Val Accuracy Stacking Ensemble for Sample {}'.format( sample + 1)) plt.axvline(x=15, color='b', linestyle='--') plt.axhline(0.125, color='k', linestyle='--') ax.figure.savefig("{}/LOOCV_sample_{}.png".format( save_dir, sample + 1), dpi=300) plt.clf() all_results[sample] = time_results sns.set() ax = sns.lineplot(data=np.mean(all_results, axis=0), dashes=False) ax.set( ylim=(0, 1), xlabel='Timepoints', ylabel='Accuracy', title='Average Cross Val Accuracy Stacking Ensemble for All Samples') plt.axvline(x=15, color='b', linestyle='--') plt.axhline(0.125, color='k', linestyle='--') ax.figure.savefig("{}/LOOCV_all_samples.png".format(save_dir), dpi=300) plt.clf() results_df = pd.DataFrame(np.mean(all_results, axis=0)) results_df.to_csv("{}/LOOCV_all_samples.csv".format(save_dir))
def vis_embeddings(dim_red_method, epochs, sample): n_comp = 2 x_train = epochs.get_data() x_train = x_train.transpose(0, 2, 1).reshape(-1, x_train.shape[1]) x_train = StandardScaler().fit_transform(x_train) y_train = get_y_train(sample) inds = np.arange(15, 8000, 50) x_train = x_train[inds] y_train = y_train[inds] print('fitting {}'.format(dim_red_method)) if dim_red_method == 'pca': pca = PCA(n_components=n_comp) reduced_data = pca.fit_transform(x_train) elif dim_red_method == 'ica': ica = FastICA(n_components=n_comp) reduced_data = ica.fit_transform(x_train) elif dim_red_method == 'se': se = SpectralEmbedding(n_components=n_comp) reduced_data = se.fit_transform(x_train) elif dim_red_method == 'tsne': pca = PCA(n_components=50) pca_data = pca.fit_transform(x_train) tsne = TSNE(n_components=n_comp, verbose=1, perplexity=10, learning_rate=200) reduced_data = tsne.fit_transform(pca_data) else: raise ValueError("{} method not implemented".format(dim_red_method)) print('fitting done') if n_comp == 2: reduced_data_df = pd.DataFrame(data=reduced_data, columns=['PC1', 'PC2']) elif n_comp == 3: reduced_data_df = pd.DataFrame(data=reduced_data, columns=['PC1', 'PC2', 'PC3']) y_train_df = pd.DataFrame(data=y_train, columns=["labels"]) final_df = pd.concat([reduced_data_df, y_train_df[['labels']]], axis=1) if n_comp == 2: sns.set() palette = sns.color_palette("bright", 8) ax = sns.scatterplot(x='PC1', y='PC2', hue='labels', data=final_df, palette=palette, legend='full') ax.set(xlabel='PC1', ylabel='PC2', title='2 component {}'.format(dim_red_method)) plt.show() elif n_comp == 3: ax = plt.figure(figsize=(16, 10)).gca(projection='3d') ax.scatter(xs=final_df["PC1"], ys=final_df["PC2"], zs=final_df["PC2"], c=final_df["labels"], cmap='tab10') ax.set_xlabel('PC1') ax.set_ylabel('PC2') ax.set_zlabel('PC3') plt.show()
def main(): model_type = "lda" exp_name = "wavelet_class/lsqr/complex" save_dir = "Results/{}/{}".format(model_type, exp_name) sample_preds = [] for sample in range(1, 22): print("sample {}".format(sample)) epochs = get_epochs(sample, scale=False) freqs = np.logspace(*np.log10([2, 25]), num=15) n_cycles = freqs / 4. print("applying morlet wavelet") # returns (n_epochs, n_channels, n_freqs, n_times) wavelet_output = tfr_array_morlet(epochs.get_data(), sfreq=epochs.info['sfreq'], freqs=freqs, n_cycles=n_cycles, output='complex') y_train = get_y_train(sample) freq_preds = [] for freq in range(wavelet_output.shape[2]): print("frequency: {}".format(freqs[freq])) wavelet_epochs = wavelet_output[:, :, freq, :] wavelet_epochs = np.append(wavelet_epochs.real, wavelet_epochs.imag, axis=1) wavelet_info = mne.create_info(ch_names=wavelet_epochs.shape[1], sfreq=epochs.info['sfreq'], ch_types='mag') wavelet_epochs = mne.EpochsArray(wavelet_epochs, info=wavelet_info, events=epochs.events) reduced = pca(80, wavelet_epochs, plot=False) x_train = reduced.transpose(0, 2, 1).reshape(-1, reduced.shape[1]) time_preds = [] for time in range(50): print("time {}".format(time)) intervals = np.arange(start=time, stop=x_train.shape[0], step=50) x_sample = x_train[intervals, :] y_sample = y_train[intervals] model = LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto') cross_val_preds = cross_val_predict(model, x_sample, y_sample, cv=5, method="predict_proba") time_preds.append(cross_val_preds) freq_preds.append(time_preds) sample_preds.append(freq_preds) print('saving preds for sample {}'.format(sample)) pickle.dump( freq_preds, open( "{}/sample_{}/all_freq_proba_preds.pkl".format( save_dir, sample), "wb")) print("preds saved") print('saving preds for all samples') pickle.dump(sample_preds, open("{}/all_proba_preds.pkl".format(save_dir), "wb")) print("preds saved")
def main(): model_type = "lda" exp_name = "wavelet_class/lsqr/complex/15hz" for sample in range(1, 22): print("sample {}".format(sample)) if not os.path.isdir("Results/{}/{}/sample_{}".format( model_type, exp_name, sample)): os.mkdir("Results/{}/{}/sample_{}".format(model_type, exp_name, sample)) epochs = get_epochs(sample, scale=False) freqs = np.logspace(*np.log10([2, 15]), num=15) n_cycles = freqs / 4. print("applying morlet wavelet") # returns (n_epochs, n_channels, n_freqs, n_times) if exp_name.split("/")[-2] == "real" or exp_name.split( "/")[-2] == "complex": wavelet_output = tfr_array_morlet(epochs.get_data(), sfreq=epochs.info['sfreq'], freqs=freqs, n_cycles=n_cycles, output='complex') elif exp_name.split("/")[-2] == "power": wavelet_output = tfr_array_morlet(epochs.get_data(), sfreq=epochs.info['sfreq'], freqs=freqs, n_cycles=n_cycles, output='power') elif exp_name.split("/")[-2] == "phase": wavelet_output = tfr_array_morlet(epochs.get_data(), sfreq=epochs.info['sfreq'], freqs=freqs, n_cycles=n_cycles, output='phase') else: raise ValueError("{} not an output of wavelet function".format( exp_name.split("/")[-2])) y_train = get_y_train(sample) freq_results = np.zeros((wavelet_output.shape[2], 50)) for freq in range(wavelet_output.shape[2]): print("frequency: {}".format(freqs[freq])) wavelet_epochs = wavelet_output[:, :, freq, :] if exp_name.split("/")[-2] == "real": wavelet_epochs = wavelet_epochs.real if exp_name.split("/")[-2] == "complex": wavelet_epochs = np.append(wavelet_epochs.real, wavelet_epochs.imag, axis=1) wavelet_info = mne.create_info(ch_names=wavelet_epochs.shape[1], sfreq=epochs.info['sfreq'], ch_types='mag') wavelet_epochs = mne.EpochsArray(wavelet_epochs, info=wavelet_info, events=epochs.events) reduced = pca(80, wavelet_epochs, plot=False) x_train = reduced.transpose(0, 2, 1).reshape(-1, reduced.shape[1]) results = linear_models(x_train, y_train, model_type=model_type) freq_results[freq] = results curr_freq = str(round(freqs[freq], 2)) sns.set() ax = sns.lineplot(data=results, dashes=False) ax.set(ylim=(0, 1), xlabel='Time', ylabel='Accuracy', title='Cross Val Accuracy {} for Subject {} for Freq {}'. format(model_type, sample, curr_freq)) plt.axvline(x=15, color='b', linestyle='--') ax.figure.savefig("Results/{}/{}/sample_{}/freq_{}.png".format( model_type, exp_name, sample, curr_freq), dpi=300) plt.clf() all_results_df = pd.DataFrame(freq_results) all_results_df.to_csv( "Results/{}/{}/sample_{}/all_freq_results.csv".format( model_type, exp_name, sample))
def main(): mode = "soft" pca_comp = 80 diff_freqs = True save_dir = "Results/ensembles/voting/diff_freqs/15hz/pca_{}/{}".format( pca_comp, mode) all_x_train = pickle.load( open( "DataTransformed/wavelet_complex/15hz/pca_{}/x_train_all_samples.pkl" .format(pca_comp), "rb")) all_freq_pipelines = get_freq_pipelines(15) all_results = np.zeros((21, 50)) for sample in range(21): print("sample {}".format(sample)) all_y_train = get_y_train(sample + 1) sample_x_train = np.array(all_x_train[sample]) time_results = np.zeros(50) for time in range(50): if diff_freqs: if time <= 23: freq_pipelines = all_freq_pipelines else: freq_pipelines = all_freq_pipelines[:11] else: freq_pipelines = all_freq_pipelines intervals = np.arange(start=time, stop=all_y_train.shape[0], step=50) y_train = all_y_train[intervals] x_train = sample_x_train[:, intervals] x_train = x_train.transpose(1, 0, 2).reshape(x_train.shape[1], -1) model = VotingClassifier(estimators=freq_pipelines, voting=mode) scores = cross_val_score(model, x_train, y_train, cv=5) print("Time {} accuracy: %0.2f (+/- %0.2f)".format(time) % (scores.mean(), scores.std() * 2)) time_results[time] = scores.mean() sns.set() ax = sns.lineplot(data=time_results, dashes=False) ax.set(ylim=(0, 1), xlabel='Timepoints', ylabel='Accuracy', title='Cross Val Accuracy Soft Voting Ensemble for Sample {}'. format(sample + 1)) plt.axvline(x=15, color='b', linestyle='--') if diff_freqs: plt.axvline(x=23, color='g', linestyle='--') plt.axhline(0.125, color='k', linestyle='--') ax.figure.savefig("{}/LOOCV_sample_{}.png".format( save_dir, sample + 1), dpi=300) plt.clf() all_results[sample] = time_results sns.set() ax = sns.lineplot(data=np.mean(all_results, axis=0), dashes=False) ax.set( ylim=(0, 1), xlabel='Timepoints', ylabel='Accuracy', title='Average Cross Val Accuracy Soft Voting Ensemble for All Samples' ) plt.axvline(x=15, color='b', linestyle='--') if diff_freqs: plt.axvline(x=23, color='g', linestyle='--') plt.axhline(0.125, color='k', linestyle='--') ax.figure.savefig("{}/LOOCV_all_samples.png".format(save_dir), dpi=300) plt.clf() results_df = pd.DataFrame(np.mean(all_results, axis=0)) results_df.to_csv("{}/LOOCV_all_samples.csv".format(save_dir))
def main(): base_model_type = "lda" base_model_dir = "wavelet_class/lsqr/complex" save_dir = "Results/ensembles/stacking_ensemble/" load_dir = "Results/{}/{}".format(base_model_type, base_model_dir) # (21, 15, 50, epochs, 8) all_sample_preds = np.array( pickle.load(open(load_dir + "/all_proba_preds.pkl", "rb"))) # (21, all_time) all_y_train = [] for sample in range(1, 22): all_y_train.append(get_y_train(sample)) results = np.zeros(50) for time in range(50): print("time {}".format(time)) # (21, 15, epochs, 8) sample_preds = all_sample_preds[:, :, time] # (21, epochs) sample_y_train = [] for sample in range(21): intervals = np.arange(start=time, stop=all_y_train[sample].shape[0], step=50) sample_y_train.append(all_y_train[sample][intervals]) sample_predictions_proba = [ np.vstack(sample).astype(np.float) for sample in sample_preds ] sample_y_train = np.array(sample_y_train) sample_y_train = np.repeat(sample_y_train[:, np.newaxis], 15, axis=1) sample_y_train = [ np.vstack(sample).astype(np.int) for sample in sample_y_train ] final = [] for sample in range(21): final.append([ data for freq_data in sample_y_train[sample] for data in freq_data ]) sample_y_train = final all_val_acc = [] for val_sample in range(21): print("left out validation subject: {}".format(val_sample + 1)) x_train = sample_predictions_proba[: val_sample] + sample_predictions_proba[ val_sample + 1:] x_train = [data for freq_data in x_train for data in freq_data] y_train = sample_y_train[:val_sample] + sample_y_train[val_sample + 1:] y_train = [data for freq_data in y_train for data in freq_data] x_val = sample_predictions_proba[val_sample] # x_val = [data for freq_data in x_val for data in freq_data] y_val = sample_y_train[val_sample] # y_val = [data for freq_data in y_val for data in freq_data] meta_model = LogisticRegression() meta_model.fit(x_train, y_train) acc_score = meta_model.score(x_val, y_val) all_val_acc.append(acc_score) all_val_acc = np.array(all_val_acc) avg_val_acc = np.mean(all_val_acc, axis=0) print("average cross val score: {}".format(avg_val_acc)) results[time] = avg_val_acc sns.set() ax = sns.lineplot(data=results, dashes=False) ax.set(ylim=(0, 1), xlabel='Timepoints', ylabel='Accuracy', title='Average Cross Val Accuracy Stacking Ensemble {} Base Models'. format(base_model_type)) plt.axvline(x=15, color='b', linestyle='--') ax.figure.savefig("{}/LOOCV.png".format(save_dir), dpi=300) plt.clf() results_df = pd.DataFrame(results) results_df.to_csv("{}/LOOCV.csv".format(save_dir))
def main(): base_model_type = "lda" base_model_dir = "wavelet_class/lsqr/complex" save_dir = "Results/ensembles/stacking_ensemble/custom/per_sample" load_dir = "Results/{}/{}".format(base_model_type, base_model_dir) all_sample_preds = np.array( pickle.load(open(load_dir + "/all_proba_preds.pkl", "rb"))) all_sample_results = np.zeros((21, 50)) for sample in range(21): print("sample {}".format(sample)) sample_y_train = get_y_train(sample + 1) freq_preds = all_sample_preds[sample] results = np.zeros(50) for time in range(50): intervals = np.arange(start=time, stop=sample_y_train.shape[0], step=50) y_train = sample_y_train[intervals] time_preds = freq_preds[:, time] y_train = np.tile(y_train, 15) x_train = [data for freq_data in time_preds for data in freq_data] x_train = np.array(x_train) meta_model = LogisticRegression() scores = cross_val_score(meta_model, x_train, y_train, cv=5) print("Time {} accuracy: %0.2f (+/- %0.2f)".format(time) % (scores.mean(), scores.std() * 2)) results[time] = scores.mean() all_sample_results[sample] = results sns.set() ax = sns.lineplot(data=results, dashes=False) ax.set( ylim=(0, 0.7), xlabel='Timepoints', ylabel='Accuracy', title= 'Cross Val Accuracy Stacking Ensemble {} Base Models for Sample {}' .format(base_model_type, sample + 1)) plt.axvline(x=15, color='b', linestyle='--') ax.figure.savefig("{}/LOOCV_sample_{}.png".format( save_dir, sample + 1), dpi=300) plt.clf() sns.set() ax = sns.lineplot(data=np.mean(all_sample_results, axis=0), dashes=False) ax.set( ylim=(0, 0.6), xlabel='Timepoints', ylabel='Accuracy', title= 'Average Cross Val Accuracy Stacking Ensemble {} Base Models for All Samples' .format(base_model_type)) plt.axvline(x=15, color='b', linestyle='--') ax.figure.savefig("{}/LOOCV_all_samples.png".format(save_dir), dpi=300) plt.clf() results_df = pd.DataFrame(np.mean(all_sample_results, axis=0)) results_df.to_csv("{}/LOOCV_all_samples.csv".format(save_dir))
def main(): save_dir = "Results/ensembles/bagging/decision_tree" all_x_train = pickle.load( open( "DataTransformed/wavelet_complex/25hz/pca_80/x_train_all_samples.pkl", "rb")) all_results = np.zeros((21, 50)) for sample in range(21): print("sample {}".format(sample)) all_y_train = get_y_train(sample + 1) sample_x_train = np.array(all_x_train[sample]) time_results = np.zeros(50) for time in range(50): intervals = np.arange(start=time, stop=all_y_train.shape[0], step=50) y_train = all_y_train[intervals] x_train = sample_x_train[:, intervals] x_train = x_train.transpose(1, 0, 2).reshape(x_train.shape[1], -1) model = BaggingClassifier( base_estimator=LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto'), n_estimators=15, max_samples=0.8, max_features=0.5, random_state=0) scores = cross_val_score(model, x_train, y_train, cv=5) print("Time {} accuracy: %0.2f (+/- %0.2f)".format(time) % (scores.mean(), scores.std() * 2)) time_results[time] = scores.mean() sns.set() ax = sns.lineplot(data=time_results, dashes=False) ax.set( ylim=(0, 1), xlabel='Timepoints', ylabel='Accuracy', title='Cross Val Accuracy Bagging Ensemble for Sample {}'.format( sample + 1)) plt.axvline(x=15, color='b', linestyle='--') plt.axhline(0.125, color='k', linestyle='--') ax.figure.savefig("{}/LOOCV_sample_{}.png".format( save_dir, sample + 1), dpi=300) plt.clf() all_results[sample] = time_results sns.set() ax = sns.lineplot(data=np.mean(all_results, axis=0), dashes=False) ax.set(ylim=(0, 1), xlabel='Timepoints', ylabel='Accuracy', title='Average Cross Val Accuracy Bagging Ensemble for All Samples') plt.axvline(x=15, color='b', linestyle='--') plt.axhline(0.125, color='k', linestyle='--') ax.figure.savefig("{}/LOOCV_all_samples.png".format(save_dir), dpi=300) plt.clf() results_df = pd.DataFrame(np.mean(all_results, axis=0)) results_df.to_csv("{}/LOOCV_all_samples.csv".format(save_dir))