コード例 #1
0
import pickle

from config.load_config import get_config
from config.config_utils import initialize_config_preproc, initialize_config_split, initialize_config_training
from preproc.preprocess import generate_labels, correct_data_label
from preproc.train_val_test_split import prepare_data_for_train
from train_single_model.run_training import run_training

if __name__ == '__main__':
    # Configuring the files here for now
    cfg_template = get_config(filename=pathlib.Path(os.getcwd()).parent /
                              'config' / 'default_config.yml')
    cfg_template.user = '******'
    cfg_template.load_mode = 'csv'
    cfg_template.overwrite = True
    cfg_template = initialize_config_preproc(cfg_template)

    # now load the actual cfg generated from the data
    vec_idx_patient = [1, 310]
    f_cfg_handle = "preproc_cfg_{}_{}.pkl".format(vec_idx_patient[0],
                                                  vec_idx_patient[1])
    f_cfg = cfg_template.d_preproc / f_cfg_handle
    with open(str(f_cfg), 'rb') as handle:
        cfg = pickle.load(handle)

    # name of particular feature that will be used
    # note if want to test for disease label then have to specify this to be 'disease'
    # otherwise it has to be one of ['IRF/SRF', 'Scar', 'GA', 'CNV', 'Large PED']
    cfg.str_feature = 'disease'

    # whether or not to make the training set balanced - note this will give you imbalanced test set
コード例 #2
0
from config.load_config import get_config
from config.config_utils import initialize_config_preproc
from preproc.preprocess import data_loading

# Initialize the configuration
cfg = get_config(filename=pathlib.Path(os.getcwd()) / 'config' /
                 'default_config.yml')
cfg.user = '******'

# specify the loading mode: 'csv' vs 'folder'
# if csv, then loading based on a csv file
# if folder, then loading based on existing folder structure
cfg.load_mode = 'csv'
cfg.overwrite = True
cfg = initialize_config_preproc(cfg)

vec_idx_patient = [1, 310]
X, _ = data_loading(vec_idx_patient, cfg)

f_data_handle = "preproc_data_{}_{}.pkl".format(vec_idx_patient[0],
                                                vec_idx_patient[1])
f_cfg_handle = "preproc_cfg_{}_{}.pkl".format(vec_idx_patient[0],
                                              vec_idx_patient[1])

# now actually save the data
f_data = cfg.d_preproc / f_data_handle
if not f_data.exists() or cfg.overwrite:
    with open(str(f_data), 'wb') as handle:
        pickle.dump(X, handle)