# Setup the experiment to test the above function
output_dir = '/lustre03/project/6010672/yacine08/eeg_pain_result/'
bootstrap_filename = output_dir + 'bootstrap.pickle'

# Classifier for Healthy, MSK and Both
clf = LogisticRegression()

pipe = Pipeline([
    ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
    ('scaler', StandardScaler()),
    ('SVM', clf)])

# Training and bootstrap interval generation
X, y, group, df = pre_process(cfg.DF_FILE_PATH, cfg.PARTICIPANT_TYPE)
acc_distribution, acc_interval = bootstrap_interval(X, y, group, pipe, num_resample=1000, p_value=0.05)

# Save the data to disk
bootstrap_file = open(bootstrap_filename, 'ab')
bootstrap_data = {
    'distribution': acc_distribution,
    'interval': acc_interval
}
pickle.dump(bootstrap_data, bootstrap_file)
bootstrap_file.close()

# Print out some high level sumarry
print("F1 Distribution:")
print(acc_distribution)
print(f"Mean: {np.mean(acc_distribution)} and std: {np.std(acc_distribution)}")
# we have some decent outputs

import pickle

from ml_tools.classification import classify_loso_model_selection
from ml_tools.classification import create_gridsearch_pipeline
from ml_tools.classification import save_model
from ml_tools.pre_processing import pre_process




if __name__ == '__main__':
    # Global Experimental Variable
    input_filename = '/lustre03/project/6010672/yacine08/eeg_pain_result/features_all.csv'
    #input_filename = '/home/yacine/Documents/features_all.csv'
    gs = create_gridsearch_pipeline()
    X, y, group, df = pre_process(input_filename)
    
    accuracies, best_params = classify_loso_model_selection(X, y, group, gs)

    # Create the files and save them
    save_model(gs, 'trained_gs.pickle')

    accuracy_file = open('accuracies_result.pickle', 'ab')
    pickle.dump(accuracies, accuracy_file)
    accuracy_file.close()

    best_params_file = open('best_params.pickle', 'ab')
    pickle.dump(best_params, best_params_file)
    best_params_file.close()