def sweep_core(config, graph_path, res_dir): # load the data data = load_data(config) # create a model model = build_model(config) # create trainer trainer = build_trainer(model, data, config) # train the model history = trainer.train() analyse_model_performance(model, data, history, config, graph_path=graph_path, res_dir=res_dir) # evaluate model eval_res = trainer.evaluate() model = trainer.model_train res_dict = OrderedDict() for key in history.history.keys(): res_dict[key] = history.history[key][-1] return res_dict, history.history
def main(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() config = preprocess_meta_data() # load the data data = load_data(config) if not config.quiet: config.print() # create a model model = build_model(config) # create trainer trainer = build_trainer(model, data, config) # train the model history = trainer.train() # visualize training performance graph_path = os.path.join(SRC_DIR, 'graphs') analyse_model_performance(model, data, graph_path, history) # evaluate model trainer.evaluate() # run on MAFAT test model = trainer.model_train test_model(model, SRC_DIR, config)
def main(): # capture the config path from the run arguments # then process configuration file config = preprocess_meta_data() if not config.quiet: config.print() # load the data data,test_segment_id = load_data(config) # preprocess data before training data = preprocess_data(data,config) # create a model model = build_model(config) # create trainer p_test = train(model, data, config) submission_save = pd.DataFrame() submission_save['segment_id'] = test_segment_id submission_save['time_to_eruption'] = p_test submission_save.to_csv(f'{config.exp_name}.csv', header=True, index=False)
def main(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) config = preprocess_meta_data() exp_name = config.exp_name # for graph_dir and log file now = datetime.now() date = now.strftime("%Y_%m_%d_%H_%M_%S") exp_name_time = '{}_{}'.format(exp_name, date) # visualize training performance graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time) LOG_DIR = os.path.join(RADAR_DIR, 'logs') if os.path.exists(LOG_DIR) is False: os.makedirs(LOG_DIR) log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time) if config.use_mti_improvement is True: config.__setattr__("model_input_dim", [125, 32, 1]) if bool(re.search('tcn', config.exp_name, re.IGNORECASE)) and config.use_mti_improvement: config.__setattr__("model_input_dim", [32, 125, 1]) # load the data data = load_data(config) # create a model model = build_model(config) # create trainer trainer = build_trainer(model, data, config) # train the model history = trainer.train() analyse_model_performance(model, data, history, config, graph_path=graph_path, res_dir=exp_name_time) # evaluate model eval_res = trainer.evaluate() SUB_DIR = os.path.join(RADAR_DIR, 'submission_files') if os.path.exists(SUB_DIR) is False: os.makedirs(SUB_DIR) sub_path = "{}/submission_{}.csv".format(SUB_DIR, exp_name_time) test_model(model['train'], sub_path, SRC_DIR, config) print('#' * 70) print('log file is located at {}'.format(log_path)) print('graphs are located at {}'.format(graph_path)) print('submission file is at: {}'.format(sub_path)) print('')
def sweep_core(config, graph_path, res_dir, best_preformance_dir, current_sweep, param_value): # load the data data = load_data(config) # create a model model = build_model(config) # create trainer trainer = build_trainer(model, data, config) # train the model history = trainer.train() if config.learn_background is False: analyse_model_performance(model, data, history, config, graph_path=graph_path, res_dir=res_dir) result_data = analyse_model_performance(model, data, history, config, graph_path=graph_path, res_dir=res_dir) result_data['Log path'] = res_dir result_data['Graph path'] = graph_path result_data[ 'Submission path'] = "None - Sweep {} results with value {}".format( current_sweep, param_value) result_data['Model name'] = config.model_name result_data['Exp name'] = config.exp_name result_data['Snr type'] = config.snr_type # compare model performance if os.path.exists(best_preformance_dir) is False: os.makedirs(best_preformance_dir) compare_to_best_model_performance(result_data, model, best_preformance_dir, config) # evaluate model eval_res = trainer.evaluate() model = trainer.model_train # predictions for later use in sweep visualizations pred_dict = get_predictions_dict_per_model(model, data) res_dict = OrderedDict() for key in history.history.keys(): res_dict[key] = history.history[key][-1] return res_dict, history.history, pred_dict
def main(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) config = preprocess_meta_data(SRC_DIR) exp_name = config.exp_name # for graph_dir and log file now = datetime.now() date = now.strftime("%Y_%m_%d_%H_%M_%S") exp_name_time = '{}_{}'.format(exp_name, date) # visualize training performance graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time) if os.path.exists(graph_path) is False: os.makedirs(graph_path) LOG_DIR = os.path.join(RADAR_DIR, 'logs') if os.path.exists(LOG_DIR) is False: os.makedirs(LOG_DIR) log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time) config = adjust_input_size(config) # assert configurations assert not (config.learn_background and config.with_rect_augmentation) # assert not(config.background_implicit_inference) assert not (config.load_complete_model_from_file and config.load_model_weights_from_file) assert config.load_complete_model_from_file or config.load_model_weights_from_file if config.load_model_weights_from_file: # build the model print('CURRENT DIR: {}'.format(os.getcwd())) model_dict = build_model(config) model_dict['train'].load_weights(config.model_weights_file) model = model_dict['train'] elif config.load_complete_model_from_file: model = tf.keras.models.load_model(config.complete_model_file) else: raise Exception('Invalid Configuration...') SUB_DIR = os.path.join(RADAR_DIR, 'submission_files') BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history') if os.path.exists(SUB_DIR) is False: os.makedirs(SUB_DIR) sub_path = "{}/submission_{}.csv".format(SUB_DIR, exp_name_time) test_model(model, sub_path, SRC_DIR, config, BEST_RESULT_DIR) #if config.save_history_buffer is True: print('#' * 70) print('submission file is at: {}'.format(sub_path)) print('')
def main(): # capture the config path from the run arguments # then process configuration file config = preprocess_meta_data() # load the data data = load_data(config) if not config.quiet: config.print() # create a model model = build_model(config) # create trainer and pass all the previous components to it trainer = build_trainer(model, data, config) # train the model trainer.train()
def main(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) config = preprocess_meta_data(SRC_DIR) exp_name = config.exp_name # for graph_dir and log file now = datetime.now() date = now.strftime("%Y_%m_%d_%H_%M_%S") exp_name_time = '{}_{}'.format(exp_name, date) # visualize training performance graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time) if os.path.exists(graph_path) is False: os.makedirs(graph_path) LOG_DIR = os.path.join(RADAR_DIR, 'logs') if os.path.exists(LOG_DIR) is False: os.makedirs(LOG_DIR) log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time) ''' Configure multiprocess ''' strategy = tf.distribute.MirroredStrategy() if strategy.num_replicas_in_sync != 1: config.__setattr__("batch_size", config.batch_size * strategy.num_replicas_in_sync) config = adjust_input_size(config) # assert configurations assert not(config.learn_background and (config.with_rect_augmentation or config.with_preprocess_rect_augmentation)) # assert not(config.background_implicit_inference) # load the data data = load_data(config) with strategy.scope(): # create a model model = build_model(config) # create trainer trainer = build_trainer(model, data, config) # train the model history = trainer.train() # evaluate model eval_res = trainer.evaluate() SUB_DIR = os.path.join(RADAR_DIR,'submission_files') BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history') if os.path.exists(SUB_DIR) is False: os.makedirs(SUB_DIR) sub_path = "{}/submission_{}.csv".format(SUB_DIR,exp_name_time) test_model(model['train'], sub_path, SRC_DIR, config,BEST_RESULT_DIR) if config.learn_background is False: result_data = analyse_model_performance(model, data, history, config, graph_path=graph_path, res_dir=exp_name_time) result_data['Log path'] = log_path result_data['Graph path'] = graph_path result_data['Submission path'] = sub_path result_data['Model name'] = config.model_name result_data['Exp name'] = config.exp_name result_data['Snr type'] = config.snr_type # compare model performance if os.path.exists(BEST_RESULT_DIR) is False: os.makedirs(BEST_RESULT_DIR) compare_to_best_model_performance(result_data, model, BEST_RESULT_DIR, config) PREVIOUS_MODELS_DIR = os.path.join(RADAR_DIR, 'previous_models_files') if config.save_model is True: if os.path.exists(PREVIOUS_MODELS_DIR) is False: os.makedirs(PREVIOUS_MODELS_DIR) os.chdir(PREVIOUS_MODELS_DIR) save_model(name='{}_{}_{}'.format(config.model_name,config.exp_name,exp_name_time), model=model['train']) #if config.save_history_buffer is True: print('#' * 70) print('log file is located at {}'.format(log_path)) print('graphs are located at {}'.format(graph_path)) print('submission file is at: {}'.format(sub_path)) print('')
def main(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) config = preprocess_meta_data(SRC_DIR) exp_name = config.exp_name # for graph_dir and log file now = datetime.now() date = now.strftime("%Y_%m_%d_%H_%M_%S") exp_name_time = '{}_{}'.format(exp_name, date) # visualize training performance graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time) if os.path.exists(graph_path) is False: os.makedirs(graph_path) LOG_DIR = os.path.join(RADAR_DIR, 'logs') if os.path.exists(LOG_DIR) is False: os.makedirs(LOG_DIR) log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time) config = adjust_input_size(config) # assert configurations assert not (config.learn_background and config.with_rect_augmentation) # assert not(config.background_implicit_inference) assert not (config.load_complete_model_from_file and config.load_model_weights_from_file) assert config.load_complete_model_from_file or config.load_model_weights_from_file if config.load_model_weights_from_file: # build the model print('CURRENT DIR: {}'.format(os.getcwd())) adjust_input_size(config) model_dict = build_model(config) model_dict['train'].load_weights(config.model_weights_file) model = model_dict['train'] model.compile(optimizer=Adam(learning_rate=config.learning_rate), loss=BinaryCrossentropy(), metrics=['accuracy', AUC()]) # model_name = 'full_test_auc_95_0168' # print('saveing model to: {}/{}'.format(os.getcwd(),model_name)) # model.save(model_name) elif config.load_complete_model_from_file: model = tf.keras.models.load_model(config.complete_model_file) else: raise Exception('Invalid Configuration...') # evaluate model if config.use_public_test_set: print(40 * '#') print('Model evaluation on FULL public test set:') os.chdir(SRC_DIR) eval_dataparser = DataSetParser(stable_mode=False, read_validation_only=True, config=config) X_valid, labels_valid = eval_dataparser.get_dataset_by_snr( dataset_type='validation', snr_type=config.snr_type) y_valid = np.array(labels_valid['target_type']) if config.with_rect_augmentation: X_augmented_test = expand_test_by_sampling_rect(data=X_valid, config=config) y_pred = [] for sampled_list_x, test_index in zip(X_augmented_test, range( len(X_augmented_test))): sample_result_list = [] sampled_list_x = np.array(sampled_list_x) x = np.expand_dims(sampled_list_x, axis=-1) sample_result_list.extend( model.predict(x, batch_size=x.shape[0]).flatten().tolist()) y_pred.append(np.mean(sample_result_list)) # raise Exception('Currently not supported') y_pred = np.array(y_pred) else: X_valid = np.expand_dims(X_valid, axis=-1) y_pred = model.predict(X_valid) res = model.evaluate(X_valid, y_valid) print('roc_auc_score on FULL public test: {}'.format( roc_auc_score(y_valid, y_pred))) else: raise Exception( 'Invalid Configuration..., use config.use_public_test_set = True') SUB_DIR = os.path.join(RADAR_DIR, 'submission_files') BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history') if os.path.exists(SUB_DIR) is False: os.makedirs(SUB_DIR) sub_path = "{}/submission_{}.csv".format(SUB_DIR, exp_name_time) test_model(model, sub_path, SRC_DIR, config, BEST_RESULT_DIR) # if config.save_history_buffer is True: print('#' * 70) print('submission file is at: {}'.format(sub_path)) print('')
def main(argv): gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) AUTOTUNE = tf.data.experimental.AUTOTUNE os.makedirs(FLAGS.ckpt_dir, exist_ok=True) os.makedirs(FLAGS.logdir, exist_ok=True) os.makedirs(FLAGS.test_result_dir, exist_ok=True) tfr_list = os.path.join(FLAGS.tfrecord_dir, "*.tfrecord") train_dataset = tf.data.Dataset.list_files(tfr_list) train_dataset = train_dataset.interleave(tf.data.TFRecordDataset, num_parallel_calls=AUTOTUNE, deterministic=False) train_dataset = train_dataset.map(parse_tfrecords) train_dataset = train_dataset.map(preprocess_for_training, num_parallel_calls=AUTOTUNE) train_dataset = train_dataset.batch(batch_size=FLAGS.batch_size, drop_remainder=True) train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE) test_dataset = tf.data.Dataset.list_files(os.path.join(tfr_list)) test_dataset = test_dataset.interleave(tf.data.TFRecordDataset, deterministic=True) test_dataset = test_dataset.map(parse_tfrecords) test_dataset = test_dataset.map(preprocess_for_training) test_dataset = test_dataset.batch(batch_size=FLAGS.num_test, drop_remainder=True) gen, disc = build_model(FLAGS.num_cond) gen_opt = tf.keras.optimizers.Adam(FLAGS.g_lr, FLAGS.beta1, FLAGS.beta2) disc_opt = tf.keras.optimizers.Adam(FLAGS.d_lr, FLAGS.beta1, FLAGS.beta2) # Set the checkpoint and the checkpoint manager. ckpt = tf.train.Checkpoint(epoch=tf.Variable(0, dtype=tf.int64), gen=gen, disc=disc, gen_opt=gen_opt, disc_opt=disc_opt) ckpt_manager = tf.train.CheckpointManager(ckpt, FLAGS.ckpt_dir, max_to_keep=5) # If a checkpoint exists, restore the latest checkpoint. if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print("Latest checkpoint is restored!") # Create a summary writer to track the losses summary_writer = tf.summary.create_file_writer( os.path.join(FLAGS.logdir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) ) d_loss_list, g_loss_list = initialize_loss_trackers() test_img, _, test_fin_cond = next(iter(test_dataset.take(1))) # Train the discriminator and the generator while ckpt.epoch < FLAGS.num_epochs: ckpt.epoch.assign_add(1) step = tf.constant(0) reset_loss_trackers(d_loss_list) reset_loss_trackers(g_loss_list) start = time.time() for x_real, label_ini_cond, label_fin_cond in tqdm(train_dataset): step += 1 #if step.numpy() > FLAGS.num_iters_decay: # update_lr_by_iter(gen_opt, # disc_opt, # step, # diff_iter, # FLAGS.g_lr, # FLAGS.d_lr) d_losses = train_disc(disc, gen, x_real, label_ini_cond, label_fin_cond, FLAGS.lambda_cond, FLAGS.lambda_gp, disc_opt) update_loss_trackers(d_loss_list, d_losses) if step.numpy() % FLAGS.num_critic_updates == 0: g_losses = train_gen(disc, gen, x_real, label_ini_cond, label_fin_cond, FLAGS.lambda_cond, FLAGS.lambda_rec, FLAGS.lambda_attn, FLAGS.lambda_tv, gen_opt) update_loss_trackers(g_loss_list, g_losses) end = time.time() print_log(ckpt.epoch.numpy(), start, end, d_losses, g_losses) # keep the log for the losses with summary_writer.as_default(): tf.summary.scalar("d_loss", d_loss_list[0].result(), step=ckpt.epoch) tf.summary.scalar("d_loss_cond", d_loss_list[1].result(), step=ckpt.epoch) tf.summary.scalar("d_loss_real", d_loss_list[2].result(), step=ckpt.epoch) tf.summary.scalar("d_loss_fake", d_loss_list[3].result(), step=ckpt.epoch) tf.summary.scalar("d_loss_gp", d_loss_list[4].result(), step=ckpt.epoch) tf.summary.scalar("g_loss", g_loss_list[0].result(), step=ckpt.epoch) tf.summary.scalar("g_loss_fake", g_loss_list[1].result(), step=ckpt.epoch) tf.summary.scalar("g_loss_cond", g_loss_list[2].result(), step=ckpt.epoch) tf.summary.scalar("g_loss_rec", g_loss_list[3].result(), step=ckpt.epoch) tf.summary.scalar("g_fake_attn_mask_loss", g_loss_list[0].result(), step=ckpt.epoch) tf.summary.scalar("g_rec_attn_mask_loss", g_loss_list[1].result(), step=ckpt.epoch) tf.summary.scalar("g_fake_tv_loss", g_loss_list[2].result(), step=ckpt.epoch) tf.summary.scalar("g_rec_tv_loss", g_loss_list[3].result(), step=ckpt.epoch) # test the generator model and save the results for each epoch save_path = os.path.join(FLAGS.test_result_dir, "{}-images.jpg".format(ckpt.epoch.numpy())) save_test_results(gen, test_img, test_fin_cond, save_path) if (ckpt.epoch) % FLAGS.model_save_epoch == 0: ckpt_save_path = ckpt_manager.save() print("Saving a checkpoint for epoch {} at {}".format(ckpt.epoch.numpy(), ckpt_save_path)) if ckpt.epoch > FLAGS.num_epochs_decay: update_lr(gen_opt, disc_opt, FLAGS.num_epochs, ckpt.epoch, FLAGS.g_lr, FLAGS.d_lr)