# Epoch length is 1.5 second meg_epochs = Epochs(raw, events, tmin=0., tmax=1.500, baseline=None, detrend=1, decim=8) emg_epochs = Epochs(emg, events, tmin=0., tmax=1.500, baseline=None) # Prepare classification X = meg_epochs.get_data() y = emg_epochs.get_data().var(axis=2)[:, 0] # target is EMG power # Classification pipeline with SPoC spatial filtering and Ridge Regression spoc = SPoC(n_components=2, log=True, reg='oas', rank='full') clf = make_pipeline(spoc, Ridge()) # Define a two fold cross-validation cv = KFold(n_splits=2, shuffle=False) # Run cross validaton y_preds = cross_val_predict(clf, X, y, cv=cv) # Plot the True EMG power and the EMG power predicted from MEG data fig, ax = plt.subplots(1, 1, figsize=[10, 4]) times = raw.times[meg_epochs.events[:, 0] - raw.first_samp] ax.plot(times, y_preds, color='b', label='Predicted EMG') ax.plot(times, y, color='r', label='True EMG') ax.set_xlabel('Time (s)') ax.set_ylabel('EMG Power') ax.set_title('SPoC MEG Predictions')
# Epoch length is 1.5 second meg_epochs = Epochs(raw, events, tmin=0., tmax=1.500, baseline=None, detrend=1, decim=8) emg_epochs = Epochs(emg, events, tmin=0., tmax=1.500, baseline=None) # Prepare classification X = meg_epochs.get_data() y = emg_epochs.get_data().var(axis=2)[:, 0] # target is EMG power # Classification pipeline with SPoC spatial filtering and Ridge Regression clf = make_pipeline(SPoC(n_components=2, log=True, reg='oas'), Ridge()) # Define a two fold cross-validation cv = KFold(n_splits=2, shuffle=False) # Run cross validaton y_preds = cross_val_predict(clf, X, y, cv=cv) # plot the True EMG power and the EMG power predicted from MEG data fig, ax = plt.subplots(1, 1, figsize=[10, 4]) times = raw.times[meg_epochs.events[:, 0] - raw.first_samp] ax.plot(times, y_preds, color='b', label='Predicted EMG') ax.plot(times, y, color='r', label='True EMG') ax.set_xlabel('Time (s)') ax.set_ylabel('EMG Power') ax.set_title('SPoC MEG Predictions')
def main(args): data_dir = args.data_dir figure_path = args.figure_dir model_path = args.model_dir file_name = "data.hdf5" # Generate the parameters class. parameters = SPoC_params( subject_n=args.sub, hand=args.hand, duration=args.duration, overlap=args.overlap, y_measure=args.y_measure, alpha=args.alpha, ) X_train, y_train, _ = import_MEG_cross_subject_train( data_dir, file_name, parameters.subject_n, parameters.hand) X_test, y_test, _ = import_MEG_cross_subject_test(data_dir, file_name, parameters.subject_n, parameters.hand) # Required conversion and double float precision. if parameters.hand == 0: X_train, y_train = ( np.array(X_train.squeeze()).astype(np.float64), np.array(y_train[..., 0].squeeze()).astype(np.float64), ) X_test, y_test = ( np.array(X_test.squeeze()).astype(np.float64), np.array(y_test[..., 0].squeeze()).astype(np.float64), ) else: X_train, y_train = ( np.array(X_train.squeeze()).astype(np.float64), np.array(y_train[..., 1].squeeze()).astype(np.float64), ) X_test, y_test = ( np.array(X_test.squeeze()).astype(np.float64), np.array(y_test[..., 1].squeeze()).astype(np.float64), ) # Add the transfer part to the train_set test_len, transfer_len = len_split_cross(X_test.shape[0]) X_transfer = X_test[:transfer_len, ...] X_test = X_test[transfer_len:, ...] X_train = np.concatenate((X_train, X_transfer), axis=0) y_transfer = y_test[:transfer_len, ...] y_test = y_test[transfer_len:, ...] y_train = np.concatenate((y_train, y_transfer), axis=0) print("Processing hand {}".format("sx" if parameters.hand == 0 else "dx")) print( "X_train shape {}, y_train shape {} \n X_test shape {}, y_test shape {}" .format(X_train.shape, y_train.shape, X_test.shape, y_test.shape)) pipeline = Pipeline([ ("Spoc", SPoC(log=True, reg="oas", rank="full")), ("Ridge", Ridge(alpha=parameters.alpha)), ]) # %% # Initialize the cross-validation pipeline and grid search cv = KFold(n_splits=5, shuffle=False) tuned_parameters = [{ "Spoc__n_components": list(map(int, list(np.arange(1, 30, 5)))) }] clf = GridSearchCV( pipeline, tuned_parameters, scoring=["neg_mean_squared_error", "r2"], n_jobs=-1, cv=cv, refit="neg_mean_squared_error", verbose=3, ) #%% # Tune the pipeline start = time.time() print("Start Fitting model ...") clf.fit(X_train, y_train) print(clf) print(f"Training time : {time.time() - start}s ") print("Number of cross-validation splits folds/iteration: {}".format( clf.n_splits_)) print("Best Score and parameter combination: ") print(clf.best_score_) print(clf.best_params_["Spoc__n_components"]) print("CV results") print(clf.cv_results_) print("Number of splits") print(clf.n_splits_) #%% # Validate the pipeline y_new = clf.predict(X_test) mse = mean_squared_error(y_test, y_new) rmse = mean_squared_error(y_test, y_new, squared=False) mae = mean_absolute_error(y_test, y_new) r2 = r2_score(y_test, y_new) print("mean squared error {}".format(mse)) print("root mean squared error {}".format(rmse)) print("mean absolute error {}".format(mae)) print("r2 score {}".format(r2)) #%% # Plot the y expected vs y predicted. fig, ax = plt.subplots(1, 1, figsize=[10, 4]) times = np.arange(100) ax.plot(times, y_new[100:200], color="b", label="Predicted") ax.plot(times, y_test[100:200], color="r", label="True") ax.set_xlabel("Times") ax.set_ylabel("{}".format(parameters.y_measure)) ax.set_title("SPoC: Sub {}, hand {}, {} prediction".format( str(parameters.subject_n), "sx" if parameters.hand == 0 else "dx", parameters.y_measure, )) plt.legend() viz.tight_layout() plt.savefig(os.path.join(figure_path, "MEG_SPoC_focus.pdf")) plt.show() # plot y_new against the true value fig, ax = plt.subplots(1, 1, figsize=[10, 4]) times = np.arange(len(y_new)) ax.plot(times, y_new, color="b", label="Predicted") ax.plot(times, y_test, color="r", label="True") ax.set_xlabel("Times") ax.set_ylabel("{}".format(parameters.y_measure)) ax.set_title("Sub {}, hand {}, {} prediction".format( str(parameters.subject_n), "sx" if parameters.hand == 0 else "dx", parameters.y_measure, )) plt.legend() plt.savefig(os.path.join(figure_path, "MEG_SPoC.pdf")) plt.show() # scatterplot y predicted against the true value fig, ax = plt.subplots(1, 1, figsize=[10, 4]) ax.scatter(np.array(y_test), np.array(y_new), color="b", label="Predicted") ax.set_xlabel("True") ax.set_ylabel("Predicted") # plt.legend() plt.savefig(os.path.join(figure_path, "Scatter.pdf")) plt.show() # %% n_components = np.ma.getdata(clf.cv_results_["param_Spoc__n_components"]) MSE_valid = clf.cv_results_["mean_test_neg_mean_squared_error"][0] R2_valid = clf.cv_results_["mean_test_r2"][0] # %% # Save the model. name = "MEG_SPoC.p" save_skl_model(clf, model_path, name) # log the model with mlflow.start_run(experiment_id=args.experiment) as run: for key, value in vars(parameters).items(): mlflow.log_param(key, value) mlflow.log_metric("MSE", mse) mlflow.log_metric("RMSE", rmse) mlflow.log_metric("MAE", mae) mlflow.log_metric("R2", r2) mlflow.log_metric("RMSE_Valid", MSE_valid) mlflow.log_metric("R2_Valid", R2_valid) mlflow.log_param("n_components", clf.best_params_["Spoc__n_components"]) mlflow.log_param("alpha", parameters.alpha) mlflow.log_artifact(os.path.join(figure_path, "MEG_SPoC_focus.pdf")) mlflow.log_artifact(os.path.join(figure_path, "MEG_SPoC.pdf")) mlflow.log_artifact( os.path.join(figure_path, "MEG_SPoC_Components_Analysis.pdf")) mlflow.sklearn.log_model(clf, "models")
raw.filter(15., 30., fir_design='firwin') # Build epochs as sliding windows over the continuous raw file events = mne.make_fixed_length_events(raw, id=1, duration=.250) # Epoch length is 1.5 second meg_epochs = Epochs(raw, events, tmin=0., tmax=1.500, baseline=None, detrend=1, decim=8) emg_epochs = Epochs(emg, events, tmin=0., tmax=1.500, baseline=None) # Prepare classification X = meg_epochs.get_data() y = emg_epochs.get_data().var(axis=2)[:, 0] # target is EMG power # Classification pipeline with SPoC spatial filtering and Ridge Regression spoc = SPoC(n_components=2, log=True, reg='oas', rank='full') clf = make_pipeline(spoc, Ridge()) # Define a two fold cross-validation cv = KFold(n_splits=2, shuffle=False) # Run cross validaton y_preds = cross_val_predict(clf, X, y, cv=cv) # Plot the True EMG power and the EMG power predicted from MEG data fig, ax = plt.subplots(1, 1, figsize=[10, 4]) times = raw.times[meg_epochs.events[:, 0] - raw.first_samp] ax.plot(times, y_preds, color='b', label='Predicted EMG') ax.plot(times, y, color='r', label='True EMG') ax.set_xlabel('Time (s)') ax.set_ylabel('EMG Power') ax.set_title('SPoC MEG Predictions')
Y_con.append(label_con) Y_ips.append(label_ips) gc.collect() X=np.concatenate(X, axis=0) Y_con=np.concatenate(Y_con, axis=0) Y_ips=np.concatenate(Y_ips, axis=0) nt,nc,ns,nf=np.shape(X) #X in full beta x=np.squeeze(X[:,:,:,7]) spoc= SPoC(n_components=1, log=None, reg='oas', transform_into ='csp_space', rank='full') Ypre_tr= OrderedDict() score_tr= OrderedDict() Ypre_te= OrderedDict() score_te= OrderedDict() lag= OrderedDict() corr= OrderedDict() Patterns= OrderedDict() Filters= OrderedDict() Label_tr= OrderedDict() Label_te= OrderedDict() for l, mov in enumerate(laterality): print("training %s" %mov)
# #train enet # return optimizer.max def append_time_dim(arr, y_, time_stamps): """ apply added time dimension for the data array and label given time_stamps (with downsample_rate=100) in 100ms / need to check with 1375Hz """ time_arr = np.zeros([arr.shape[0]-time_stamps, int(time_stamps*arr.shape[1])]) for time_idx, time_ in enumerate(np.arange(time_stamps, arr.shape[0])): for time_point in range(time_stamps): time_arr[time_idx, time_point*arr.shape[1]:(time_point+1)*arr.shape[1]] = arr[time_-time_point,:] return time_arr, y_[time_stamps:] #%% spoc= SPoC(n_components=1, log=True, reg='oas', transform_into ='average_power', rank='full') laterality=["CON", "IPS"] signal=["STN", "ECOG"] cv = KFold(n_splits=3, shuffle=False) #%% CV split len(settings['num_patients']) for m, eeg in enumerate(signal): for s in range(1,len(settings['num_patients'])): gc.collect() subject_path=settings['BIDS_path'] + 'sub-' + settings['num_patients'][s] subfolder=IO.get_subfolders(subject_path) for ss in range(len(subfolder)):
def main(args): data_dir = args.data_dir figure_path = args.figure_dir model_path = args.model_dir parameters = SPoC_params( subject_n=args.sub, finger=args.finger, duration=args.duration, overlap=args.overlap, ) #%% file_name = "/sub" + str(parameters.subject_n) + "_comp.mat" sampling_rate = 1000 #%% # import ECoG <-- datadir, filename, finger, duration, overlap, normalize_input=True, y_measure="mean" X, y = import_ECoG( data_dir, file_name, parameters.finger, parameters.duration, parameters.overlap, ) # %% print("X shape {}, y shape {}".format(X.shape, y.shape)) X_train, X_test, y_train, y_test = split_data(X, y, 0.3) print( "X_train shape {}, y_train shape {} \n X_test shape {}, y_test shape {}" .format(X_train.shape, y_train.shape, X_test.shape, y_test.shape)) pipeline = Pipeline([("Spoc", SPoC(log=True, reg="oas", rank="full")), ("Ridge", Ridge())]) # %% cv = KFold(n_splits=10, shuffle=False) tuned_parameters = [{ "Spoc__n_components": list(map(int, list(np.arange(2, 30)))), "Ridge__alpha": [0.8, 1.0, 2, 5, 10, 15], }] clf = GridSearchCV( pipeline, tuned_parameters, scoring="neg_mean_squared_error", n_jobs=4, cv=cv, verbose=3, ) # %% start = time.time() print("Start Fitting model ...") clf.fit(X_train, y_train) print(f"Training time : {time.time() - start}s ") print("Number of cross-validation splits folds/iteration: {}".format( clf.n_splits_)) print("Best Score and parameter combination: ") print(clf.best_score_) print(clf.best_params_["Spoc__n_components"]) # %% y_new = clf.predict(X_test) mse = mean_squared_error(y_test, y_new) rmse = mean_squared_error(y_test, y_new, squared=False) mae = mean_absolute_error(y_test, y_new) print("mean squared error {}".format(mse)) print("root mean squared error {}".format(rmse)) print("mean absolute error {}".format(mae)) # %% fig, ax = plt.subplots(1, 1, figsize=[10, 4]) times = np.arange(100) ax.plot(times, y_new[100:200], color="b", label="Predicted") ax.plot(times, y_test[100:200], color="r", label="True") ax.set_xlabel("Times") ax.set_ylabel("Finger Movement") ax.set_title("Sub {}, finger {} prediction".format( str(parameters.subject_n), parameters.finger)) plt.legend() viz.tight_layout() plt.savefig(os.path.join(figure_path, "ECoG_SPoC_focus.pdf")) plt.show() # plot y_new against the true value fig, ax = plt.subplots(1, 1, figsize=[10, 4]) times = np.arange(len(y_new)) ax.plot(times, y_new, color="b", label="Predicted") ax.plot(times, y_test, color="r", label="True") ax.set_xlabel("Times") ax.set_ylabel("Finger Movement") ax.set_title("Sub {}, finger {} prediction".format( str(parameters.subject_n), parameters.finger)) plt.legend() plt.savefig(os.path.join(figure_path, "ECoG_SPoC.pdf")) plt.show() # %% n_components = np.ma.getdata(clf.cv_results_["param_Spoc__n_components"]) MSEs = clf.cv_results_["mean_test_score"] # %% fig, ax = plt.subplots(1, 1, figsize=[10, 4]) ax.plot(n_components, MSEs, color="b") ax.set_xlabel("Number of SPoC components") ax.set_ylabel("MSE") ax.set_title("SPoC Components Analysis") # plt.legend() plt.xticks(n_components, n_components) viz.tight_layout() plt.savefig(os.path.join(figure_path, "ECoG_SPoC_Components_Analysis.pdf")) plt.show() # %% name = "ECoG_SPoC.p" save_skl_model(clf, model_path, name) # log the model with mlflow.start_run(experiment_id=args.experiment) as run: for key, value in vars(parameters).items(): mlflow.log_param(key, value) mlflow.log_metric("MSE", mse) mlflow.log_metric("RMSE", rmse) mlflow.log_metric("MAE", mae) mlflow.log_param("n_components", clf.best_params_["Spoc__n_components"]) mlflow.log_artifact(os.path.join(figure_path, "ECoG_SPoC_focus.pdf")) mlflow.log_artifact(os.path.join(figure_path, "ECoG_SPoC.pdf")) mlflow.log_artifact( os.path.join(figure_path, "ECoG_SPoC_Components_Analysis.pdf")) mlflow.sklearn.log_model(clf, "models")
index_bad=np.where(GVV>Thr)[0] if verbose: if len(index_bad)>0 : print('Detected bad trials') X_clean=X[index_good,:,:] Y_clean=y[index_good] return X_clean,Y_clean #%% spoc= SPoC(n_components=1, log=True, reg='oas', transform_into ='average_power', rank='full') laterality=["CON", "IPS"] signal=["ECOG","STN"] # signal=[] #clf=LinearRegression(normalize=True, n_jobs=-1) # clf=LinearRegression() cv = KFold(n_splits=3, shuffle=False) #%% CV split len(settings['num_patients']) for m, eeg in enumerate(signal): for s in range(len(settings['num_patients'])): gc.collect()
# Epoch length is 1.5 second meg_epochs = Epochs(raw, events, tmin=0., tmax=1.500, baseline=None, detrend=1, decim=8) emg_epochs = Epochs(emg, events, tmin=0., tmax=1.500, baseline=None) # Prepare classification X = meg_epochs.get_data() y = emg_epochs.get_data().var(axis=2)[:, 0] # target is EMG power # Classification pipeline with SPoC spatial filtering and Ridge Regression clf = make_pipeline(SPoC(n_components=2, log=True, reg='oas', rank='full'), Ridge()) # Define a two fold cross-validation cv = KFold(n_splits=2, shuffle=False) # Run cross validaton y_preds = cross_val_predict(clf, X, y, cv=cv) # Plot the True EMG power and the EMG power predicted from MEG data fig, ax = plt.subplots(1, 1, figsize=[10, 4]) times = raw.times[meg_epochs.events[:, 0] - raw.first_samp] ax.plot(times, y_preds, color='b', label='Predicted EMG') ax.plot(times, y, color='r', label='True EMG') ax.set_xlabel('Time (s)') ax.set_ylabel('EMG Power')