# name of particular feature that will be used # note if want to test for disease label then have to specify this to be 'disease' # otherwise it has to be one of ['IRF/SRF', 'Scar', 'GA', 'CNV', 'Large PED'] cfg.str_feature = 'disease' # whether or not to make the training set balanced - note this will give you imbalanced test set cfg.balanced = True # specify model architecture and whether to use debug mode cfg.str_arch = 'arch_009' cfg.debug_mode = False # now load the preprocessed data and the label f_data_handle = "preproc_data_{}_{}.pkl".format(vec_idx_patient[0], vec_idx_patient[1]) f_data = cfg_template.d_preproc / f_data_handle with open(str(f_data), 'rb') as handle: X = pickle.load(handle) y = generate_labels(cfg.str_feature, cfg, bool_append_csv_to_cfg=True) # now prepare data for training cfg = initialize_config_split(cfg) X, y = correct_data_label(X, y, cfg) Xs, ys = prepare_data_for_train(X, y, cfg) # finally set the training parameters cfg = initialize_config_training(cfg, bool_debug=cfg.debug_mode) cfg = run_training(Xs, ys, cfg)
def run_repeated_training_binary(cfg_in): # create the empty lists for holding all the data vec_train_acc = [] vec_valid_acc = [] vec_test_acc = [] vec_y_true = [] vec_y_pred = [] vec_y_pred_prob = [] mat_vec_idx_absolute_test = [] # set the random seed for all the splits temp = initialize_config_split(copy.deepcopy(cfg_in)) np.random.seed(temp.random_seed) # initiate the repeated training for i in range(cfg_in.n_repeats): cfg = copy.deepcopy(cfg_in) print("\n\nIteration: {}".format(i + 1)) # now load the preprocessed data and the label f_data_handle = "preproc_data_{}_{}.pkl".format( cfg.vec_idx_patient[0], cfg.vec_idx_patient[1]) f_data = cfg.d_preproc / f_data_handle with open(str(f_data), 'rb') as handle: X = pickle.load(handle) y = generate_labels(cfg.str_feature, cfg, bool_append_csv_to_cfg=True) # now prepare data for training cfg = initialize_config_split(cfg) X, y = correct_data_label(X, y, cfg) Xs, ys = prepare_data_for_train(X, y, cfg) print("\nx_train Angiography cube shape: {}".format(Xs[0][0].shape)) print("x_train Structure OCT cube shape: {}".format(Xs[0][1].shape)) print("x_train B scan shape: {}".format(Xs[0][2].shape)) print("y_train onehot shape: {}".format(ys[0].shape)) print("\nx_valid Angiography cube shape: {}".format(Xs[1][0].shape)) print("x_valid Structure OCT cube shape: {}".format(Xs[1][1].shape)) print("x_valid B scan shape: {}".format(Xs[1][2].shape)) print("y_valid onehot shape: {}".format(ys[1].shape)) print("\nx_test Angiography cube shape: {}".format(Xs[2][0].shape)) print("x_test Structure OCT cube shape: {}".format(Xs[2][1].shape)) print("x_test B scan shape: {}".format(Xs[2][2].shape)) print("y_test onehot shape: {}".format(ys[2].shape)) # finally set the training parameters cfg = initialize_config_training(cfg, bool_debug=cfg.bool_debug) model = get_model(cfg.str_arch, cfg) callbacks = get_callbacks(cfg) h = model.fit(Xs[0], ys[0], batch_size=cfg.batch_size, epochs=cfg.n_epoch, verbose=2, callbacks=callbacks, validation_data=(Xs[1], ys[1]), shuffle=False, validation_batch_size=Xs[1][0].shape[0]) # Now perform prediction train_set_score = model.evaluate(Xs[0], ys[0], callbacks=callbacks, verbose=0) valid_set_score = model.evaluate(Xs[1], ys[1], callbacks=callbacks, verbose=0) test_set_score = model.evaluate(Xs[2], ys[2], callbacks=callbacks, verbose=0) vec_train_acc.append(train_set_score[1]) vec_valid_acc.append(valid_set_score[1]) vec_test_acc.append(test_set_score[1]) if cfg.num_classes == 2: y_true = ys[-1] y_pred_logits = model.predict(Xs[2]) y_pred = y_pred_logits.copy() y_pred[y_pred >= 0.5] = 1 y_pred[y_pred < 0.5] = 0 y_pred = y_pred.reshape(-1) # now transform the logits into a matrix of two class probabilities and append y_pred_logits_both = np.concatenate( [1 - y_pred_logits, y_pred_logits], axis=1) else: raise ValueError("The number of classes should be two") vec_y_true.append(y_true) vec_y_pred.append(y_pred) vec_y_pred_prob.append(y_pred_logits_both) mat_vec_idx_absolute_test.append(cfg.vec_idx_absolute[2]) Xs = [] ys = [] print('\n' + '=' * 80) print("Average train set accuracy: {} + ".format(np.mean(vec_train_acc)), np.std(vec_train_acc)) print("Average valid set accuracy: {} + ".format(np.mean(vec_valid_acc)), np.std(vec_valid_acc)) print("Average test set accuracy: {} + ".format(np.mean(vec_test_acc)), np.std(vec_test_acc)) # now concatenate the results together y_true = np.concatenate(vec_y_true, axis=0) y_pred = np.concatenate(vec_y_pred, axis=0) y_pred_prob = np.concatenate(vec_y_pred_prob, axis=0) vec_idx_absolute_test = np.concatenate(mat_vec_idx_absolute_test, axis=0) # sanity check if not y_true.shape[0] == y_pred.shape[0] == y_pred_prob.shape[ 0] == vec_idx_absolute_test.shape[0]: raise ValueError("These should be equal") # print the overall test set accuracy print("\nOverall test set accuracy: {}".format( np.sum(y_true == y_pred) / len(y_true))) # now load the preprocessed data and the label f_data_handle = "preproc_data_{}_{}.pkl".format(cfg_in.vec_idx_patient[0], cfg_in.vec_idx_patient[1]) f_data = cfg_in.d_preproc / f_data_handle with open(str(f_data), 'rb') as handle: X = pickle.load(handle) y = generate_labels(cfg_in.str_feature, cfg_in, bool_append_csv_to_cfg=True) # now prepare data for training cfg_in = initialize_config_split(cfg_in) X, y = correct_data_label(X, y, cfg_in) _, _ = prepare_data_for_train(X, y, cfg_in) X, y = None, None # append the hyperparameters to the cfg structure cfg_in.vec_train_acc = vec_train_acc cfg_in.vec_valid_acc = vec_valid_acc cfg_in.vec_test_acc = vec_test_acc cfg_in.y_test_true = y_true cfg_in.y_test_pred = y_pred cfg_in.y_test_pred_prob = y_pred_prob cfg_in.vec_idx_absolute_test = vec_idx_absolute_test cfg_in.vec_y_true = vec_y_true cfg_in.vec_y_pred = vec_y_pred cfg_in.vec_y_pred_prob = vec_y_pred_prob cfg_in.mat_vec_idx_absolute_test = mat_vec_idx_absolute_test # now get the configuration right for saving the output cfg_in.str_model = cfg_in.str_arch cfg_in.f_model = "{}_{}".format(cfg_in.str_arch, time.strftime("%Y%m%d_%H%M%S")) cfg_in.p_figure = cfg_in.d_model / cfg_in.str_model / cfg_in.f_model cfg_in.p_figure.mkdir(parents=True, exist_ok=True) cfg_in.p_cfg = cfg_in.p_figure # plot the confusion matrices plot_raw_conf_matrix(y_true, y_pred, cfg_in, save=True, f_figure=cfg_in.str_arch) plot_norm_conf_matrix(y_true, y_pred, cfg_in, save=True, f_figure=cfg_in.str_arch) # save the cfg, which contains configurations and results save_cfg(cfg_in, overwrite=True) # save the mat file, which contains the binary results save_mat(cfg_in, overwrite=True)