import pickle from config.load_config import get_config from config.config_utils import initialize_config_preproc, initialize_config_split, initialize_config_training from preproc.preprocess import generate_labels, correct_data_label from preproc.train_val_test_split import prepare_data_for_train from train_single_model.run_training import run_training if __name__ == '__main__': # Configuring the files here for now cfg_template = get_config(filename=pathlib.Path(os.getcwd()).parent / 'config' / 'default_config.yml') cfg_template.user = '******' cfg_template.load_mode = 'csv' cfg_template.overwrite = True cfg_template = initialize_config_preproc(cfg_template) # now load the actual cfg generated from the data vec_idx_patient = [1, 310] f_cfg_handle = "preproc_cfg_{}_{}.pkl".format(vec_idx_patient[0], vec_idx_patient[1]) f_cfg = cfg_template.d_preproc / f_cfg_handle with open(str(f_cfg), 'rb') as handle: cfg = pickle.load(handle) # name of particular feature that will be used # note if want to test for disease label then have to specify this to be 'disease' # otherwise it has to be one of ['IRF/SRF', 'Scar', 'GA', 'CNV', 'Large PED'] cfg.str_feature = 'disease' # whether or not to make the training set balanced - note this will give you imbalanced test set
from config.load_config import get_config from config.config_utils import initialize_config_preproc from preproc.preprocess import data_loading # Initialize the configuration cfg = get_config(filename=pathlib.Path(os.getcwd()) / 'config' / 'default_config.yml') cfg.user = '******' # specify the loading mode: 'csv' vs 'folder' # if csv, then loading based on a csv file # if folder, then loading based on existing folder structure cfg.load_mode = 'csv' cfg.overwrite = True cfg = initialize_config_preproc(cfg) vec_idx_patient = [1, 310] X, _ = data_loading(vec_idx_patient, cfg) f_data_handle = "preproc_data_{}_{}.pkl".format(vec_idx_patient[0], vec_idx_patient[1]) f_cfg_handle = "preproc_cfg_{}_{}.pkl".format(vec_idx_patient[0], vec_idx_patient[1]) # now actually save the data f_data = cfg.d_preproc / f_data_handle if not f_data.exists() or cfg.overwrite: with open(str(f_data), 'wb') as handle: pickle.dump(X, handle)