# Setup the experiment to test the above function output_dir = '/lustre03/project/6010672/yacine08/eeg_pain_result/' bootstrap_filename = output_dir + 'bootstrap.pickle' # Classifier for Healthy, MSK and Both clf = LogisticRegression() pipe = Pipeline([ ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')), ('scaler', StandardScaler()), ('SVM', clf)]) # Training and bootstrap interval generation X, y, group, df = pre_process(cfg.DF_FILE_PATH, cfg.PARTICIPANT_TYPE) acc_distribution, acc_interval = bootstrap_interval(X, y, group, pipe, num_resample=1000, p_value=0.05) # Save the data to disk bootstrap_file = open(bootstrap_filename, 'ab') bootstrap_data = { 'distribution': acc_distribution, 'interval': acc_interval } pickle.dump(bootstrap_data, bootstrap_file) bootstrap_file.close() # Print out some high level sumarry print("F1 Distribution:") print(acc_distribution) print(f"Mean: {np.mean(acc_distribution)} and std: {np.std(acc_distribution)}")
# we have some decent outputs import pickle from ml_tools.classification import classify_loso_model_selection from ml_tools.classification import create_gridsearch_pipeline from ml_tools.classification import save_model from ml_tools.pre_processing import pre_process if __name__ == '__main__': # Global Experimental Variable input_filename = '/lustre03/project/6010672/yacine08/eeg_pain_result/features_all.csv' #input_filename = '/home/yacine/Documents/features_all.csv' gs = create_gridsearch_pipeline() X, y, group, df = pre_process(input_filename) accuracies, best_params = classify_loso_model_selection(X, y, group, gs) # Create the files and save them save_model(gs, 'trained_gs.pickle') accuracy_file = open('accuracies_result.pickle', 'ab') pickle.dump(accuracies, accuracy_file) accuracy_file.close() best_params_file = open('best_params.pickle', 'ab') pickle.dump(best_params, best_params_file) best_params_file.close()