def main(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() config = preprocess_meta_data() # load the data data = load_data(config) if not config.quiet: config.print() # create a model model = build_model(config) # create trainer trainer = build_trainer(model, data, config) # train the model history = trainer.train() # visualize training performance graph_path = os.path.join(SRC_DIR, 'graphs') analyse_model_performance(model, data, graph_path, history) # evaluate model trainer.evaluate() # run on MAFAT test model = trainer.model_train test_model(model, SRC_DIR, config)
def main(): # capture the config path from the run arguments # then process configuration file config = preprocess_meta_data() if not config.quiet: config.print() # load the data data,test_segment_id = load_data(config) # preprocess data before training data = preprocess_data(data,config) # create a model model = build_model(config) # create trainer p_test = train(model, data, config) submission_save = pd.DataFrame() submission_save['segment_id'] = test_segment_id submission_save['time_to_eruption'] = p_test submission_save.to_csv(f'{config.exp_name}.csv', header=True, index=False)
def main(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) config = preprocess_meta_data() exp_name = config.exp_name # for graph_dir and log file now = datetime.now() date = now.strftime("%Y_%m_%d_%H_%M_%S") exp_name_time = '{}_{}'.format(exp_name, date) # visualize training performance graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time) LOG_DIR = os.path.join(RADAR_DIR, 'logs') if os.path.exists(LOG_DIR) is False: os.makedirs(LOG_DIR) log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time) if config.use_mti_improvement is True: config.__setattr__("model_input_dim", [125, 32, 1]) if bool(re.search('tcn', config.exp_name, re.IGNORECASE)) and config.use_mti_improvement: config.__setattr__("model_input_dim", [32, 125, 1]) # load the data data = load_data(config) # create a model model = build_model(config) # create trainer trainer = build_trainer(model, data, config) # train the model history = trainer.train() analyse_model_performance(model, data, history, config, graph_path=graph_path, res_dir=exp_name_time) # evaluate model eval_res = trainer.evaluate() SUB_DIR = os.path.join(RADAR_DIR, 'submission_files') if os.path.exists(SUB_DIR) is False: os.makedirs(SUB_DIR) sub_path = "{}/submission_{}.csv".format(SUB_DIR, exp_name_time) test_model(model['train'], sub_path, SRC_DIR, config) print('#' * 70) print('log file is located at {}'.format(log_path)) print('graphs are located at {}'.format(graph_path)) print('submission file is at: {}'.format(sub_path)) print('')
def main(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) config = preprocess_meta_data(SRC_DIR) exp_name = config.exp_name # for graph_dir and log file now = datetime.now() date = now.strftime("%Y_%m_%d_%H_%M_%S") exp_name_time = '{}_{}'.format(exp_name, date) # visualize training performance graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time) if os.path.exists(graph_path) is False: os.makedirs(graph_path) LOG_DIR = os.path.join(RADAR_DIR, 'logs') if os.path.exists(LOG_DIR) is False: os.makedirs(LOG_DIR) log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time) config = adjust_input_size(config) # assert configurations assert not (config.learn_background and config.with_rect_augmentation) # assert not(config.background_implicit_inference) assert not (config.load_complete_model_from_file and config.load_model_weights_from_file) assert config.load_complete_model_from_file or config.load_model_weights_from_file if config.load_model_weights_from_file: # build the model print('CURRENT DIR: {}'.format(os.getcwd())) model_dict = build_model(config) model_dict['train'].load_weights(config.model_weights_file) model = model_dict['train'] elif config.load_complete_model_from_file: model = tf.keras.models.load_model(config.complete_model_file) else: raise Exception('Invalid Configuration...') SUB_DIR = os.path.join(RADAR_DIR, 'submission_files') BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history') if os.path.exists(SUB_DIR) is False: os.makedirs(SUB_DIR) sub_path = "{}/submission_{}.csv".format(SUB_DIR, exp_name_time) test_model(model, sub_path, SRC_DIR, config, BEST_RESULT_DIR) #if config.save_history_buffer is True: print('#' * 70) print('submission file is at: {}'.format(sub_path)) print('')
def split_public_test_to_valid(): SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) config = preprocess_meta_data(SRC_DIR) exp_name = config.exp_name data_parser = DataSetParser(stable_mode=False, config=config, read_validation_only=True) test_data, train_data = data_parser.split_public_test_valid() print('saving FULL PUBLIC TRAIN AND TEST IN DIR: {}'.format(os.getcwd())) np.save('FULL_PUBLIC_TRAIN', train_data) np.save('FULL_PUBLIC_TEST', test_data)
def main(): # capture the config path from the run arguments # then process configuration file config = preprocess_meta_data() # load the data data = load_data(config) if not config.quiet: config.print() # create a model model = build_model(config) # create trainer and pass all the previous components to it trainer = build_trainer(model, data, config) # train the model trainer.train()
def main(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) config = preprocess_meta_data(SRC_DIR) exp_name = config.exp_name # for graph_dir and log file now = datetime.now() date = now.strftime("%Y_%m_%d_%H_%M_%S") exp_name_time = '{}_{}'.format(exp_name, date) # visualize training performance graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time) if os.path.exists(graph_path) is False: os.makedirs(graph_path) LOG_DIR = os.path.join(RADAR_DIR, 'logs') if os.path.exists(LOG_DIR) is False: os.makedirs(LOG_DIR) log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time) ''' Configure multiprocess ''' strategy = tf.distribute.MirroredStrategy() if strategy.num_replicas_in_sync != 1: config.__setattr__("batch_size", config.batch_size * strategy.num_replicas_in_sync) config = adjust_input_size(config) # assert configurations assert not(config.learn_background and (config.with_rect_augmentation or config.with_preprocess_rect_augmentation)) # assert not(config.background_implicit_inference) # load the data data = load_data(config) with strategy.scope(): # create a model model = build_model(config) # create trainer trainer = build_trainer(model, data, config) # train the model history = trainer.train() # evaluate model eval_res = trainer.evaluate() SUB_DIR = os.path.join(RADAR_DIR,'submission_files') BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history') if os.path.exists(SUB_DIR) is False: os.makedirs(SUB_DIR) sub_path = "{}/submission_{}.csv".format(SUB_DIR,exp_name_time) test_model(model['train'], sub_path, SRC_DIR, config,BEST_RESULT_DIR) if config.learn_background is False: result_data = analyse_model_performance(model, data, history, config, graph_path=graph_path, res_dir=exp_name_time) result_data['Log path'] = log_path result_data['Graph path'] = graph_path result_data['Submission path'] = sub_path result_data['Model name'] = config.model_name result_data['Exp name'] = config.exp_name result_data['Snr type'] = config.snr_type # compare model performance if os.path.exists(BEST_RESULT_DIR) is False: os.makedirs(BEST_RESULT_DIR) compare_to_best_model_performance(result_data, model, BEST_RESULT_DIR, config) PREVIOUS_MODELS_DIR = os.path.join(RADAR_DIR, 'previous_models_files') if config.save_model is True: if os.path.exists(PREVIOUS_MODELS_DIR) is False: os.makedirs(PREVIOUS_MODELS_DIR) os.chdir(PREVIOUS_MODELS_DIR) save_model(name='{}_{}_{}'.format(config.model_name,config.exp_name,exp_name_time), model=model['train']) #if config.save_history_buffer is True: print('#' * 70) print('log file is located at {}'.format(log_path)) print('graphs are located at {}'.format(graph_path)) print('submission file is at: {}'.format(sub_path)) print('')
def main_sweep(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history') config = preprocess_meta_data(SRC_DIR) exp_name = config.exp_name # for graph_dir and log file now = datetime.now() date = now.strftime("%Y_%m_%d_%H_%M_%S") exp_name_time = '{}_{}'.format(exp_name, date) # visualize training performance graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time) if os.path.exists(graph_path) is False: os.makedirs(graph_path) LOG_DIR = os.path.join(RADAR_DIR, 'logs') if os.path.exists(LOG_DIR) is False: os.makedirs(LOG_DIR) log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time) # sys.stdout = Unbuffered(stream=sys.stdout, path=log_path) sweep_list = config.params_to_sweep res_dict = OrderedDict() hist_dict = OrderedDict() predictions_dict = OrderedDict() orig_config = config for list_name in sweep_list: param_name = re.sub('_list', '', list_name) res_dict[param_name] = {} hist_dict[param_name] = {} predictions_dict[param_name] = {} config = orig_config for param in config.get(list_name): config.__setattr__(param_name, param) if bool(re.search('tcn', config.exp_name, re.IGNORECASE)): config.__setattr__("model_input_dim", [32, 126, 1]) elif config.with_preprocess_rect_augmentation or config.with_rect_augmentation: config.__setattr__("model_input_dim", [126, config.rect_augment_num_of_cols, 1]) else: config.__setattr__("model_input_dim", [126, 32, 1]) res_dir = '{}_{}'.format(param_name, param) print('#' * 70) print('Sweeping parameter: {}, with value: {}'.format( param_name, param)) if type(param) == list: str_param = str(param) param = re.sub(' ', '_', re.sub('[,\[\]]', '', str_param)) res_dict[param_name][param], hist_dict[param_name][ param], predictions_dict[param_name][param] = sweep_core( config, graph_path=graph_path, res_dir=res_dir, best_preformance_dir=BEST_RESULT_DIR, current_sweep=param_name, param_value=param) print('') print('#' * 70) print('Sweep results summary') print('#' * 70) print('log file is located at {}'.format(log_path)) print('graphs are located at {}'.format(graph_path)) print('') for param_name in res_dict.keys(): for param in res_dict[param_name].keys(): print('{} = {}'.format(param_name, param)) for metric in res_dict[param_name][param].keys(): print('{}: {}, '.format(metric, res_dict[param_name][param][metric]), end='') print('') for list_name in sweep_list: param_name = re.sub('_list', '', list_name) print_sweep_by_parameter(hist_dict, param_name=param_name, metric_list=['val_accuracy', 'accuracy'], graph_path=graph_path, title='{} Accuracy'.format(param_name)) print_sweep_by_parameter(hist_dict, param_name=param_name, metric_list=['val_auc', 'auc'], graph_path=graph_path, title='{} AUC'.format(param_name)) if config.learn_background is False: print_roc_auc_by_parameter(predictions_dict, param_name=param_name, title='{} ROC-AUC'.format(param_name), graph_path=graph_path)
def main_sweep(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) config = preprocess_meta_data() exp_name = config.exp_name # for graph_dir and log file now = datetime.now() date = now.strftime("%Y_%m_%d_%H_%M_%S") exp_name_time = '{}_{}'.format(exp_name, date) # visualize training performance graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time) LOG_DIR = os.path.join(RADAR_DIR, 'logs') if os.path.exists(LOG_DIR) is False: os.makedirs(LOG_DIR) log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time) sys.stdout = Unbuffered(stream=sys.stdout, path=log_path) sweep_list = config.params_to_sweep res_dict = OrderedDict() hist_dict = OrderedDict() orig_config = config for list_name in sweep_list: param_name = re.sub('_list', '', list_name) res_dict[param_name] = {} hist_dict[param_name] = {} for param in config.get(list_name): config = orig_config config.__setattr__(param_name, param) res_dir = '{}_{}'.format(param_name, param) print('#' * 70) print('Sweeping parameter: {}, with value: {}'.format( param_name, param)) if type(param) == list: param = '_'.join(param) res_dict[param_name][param], hist_dict[param_name][ param] = sweep_core(config, graph_path=graph_path, res_dir=res_dir) print('') print('#' * 70) print('Sweep results summary') print('#' * 70) print('log file is located at {}'.format(log_path)) print('graphs are located at {}'.format(graph_path)) print('') for param_name in res_dict.keys(): for param in res_dict[param_name].keys(): print('{} = {}'.format(param_name, param)) for metric in res_dict[param_name][param].keys(): print('{}: {}, '.format(metric, res_dict[param_name][param][metric]), end='') print('') for list_name in sweep_list: param_name = re.sub('_list', '', list_name) print_sweep_by_parameter(hist_dict, param_name=param_name, metric_list=['val_accuracy', 'accuracy'], graph_path=graph_path, title='{} Accuracy'.format(param_name)) print_sweep_by_parameter(hist_dict, param_name=param_name, metric_list=['val_auc', 'auc'], graph_path=graph_path, title='{} AUC'.format(param_name))
def main(): # capture the config path from the run arguments # then process configuration file SRC_DIR = os.getcwd() RADAR_DIR = os.path.join(SRC_DIR, os.pardir) config = preprocess_meta_data(SRC_DIR) exp_name = config.exp_name # for graph_dir and log file now = datetime.now() date = now.strftime("%Y_%m_%d_%H_%M_%S") exp_name_time = '{}_{}'.format(exp_name, date) # visualize training performance graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time) if os.path.exists(graph_path) is False: os.makedirs(graph_path) LOG_DIR = os.path.join(RADAR_DIR, 'logs') if os.path.exists(LOG_DIR) is False: os.makedirs(LOG_DIR) log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time) config = adjust_input_size(config) # assert configurations assert not (config.learn_background and config.with_rect_augmentation) # assert not(config.background_implicit_inference) assert not (config.load_complete_model_from_file and config.load_model_weights_from_file) assert config.load_complete_model_from_file or config.load_model_weights_from_file if config.load_model_weights_from_file: # build the model print('CURRENT DIR: {}'.format(os.getcwd())) adjust_input_size(config) model_dict = build_model(config) model_dict['train'].load_weights(config.model_weights_file) model = model_dict['train'] model.compile(optimizer=Adam(learning_rate=config.learning_rate), loss=BinaryCrossentropy(), metrics=['accuracy', AUC()]) # model_name = 'full_test_auc_95_0168' # print('saveing model to: {}/{}'.format(os.getcwd(),model_name)) # model.save(model_name) elif config.load_complete_model_from_file: model = tf.keras.models.load_model(config.complete_model_file) else: raise Exception('Invalid Configuration...') # evaluate model if config.use_public_test_set: print(40 * '#') print('Model evaluation on FULL public test set:') os.chdir(SRC_DIR) eval_dataparser = DataSetParser(stable_mode=False, read_validation_only=True, config=config) X_valid, labels_valid = eval_dataparser.get_dataset_by_snr( dataset_type='validation', snr_type=config.snr_type) y_valid = np.array(labels_valid['target_type']) if config.with_rect_augmentation: X_augmented_test = expand_test_by_sampling_rect(data=X_valid, config=config) y_pred = [] for sampled_list_x, test_index in zip(X_augmented_test, range( len(X_augmented_test))): sample_result_list = [] sampled_list_x = np.array(sampled_list_x) x = np.expand_dims(sampled_list_x, axis=-1) sample_result_list.extend( model.predict(x, batch_size=x.shape[0]).flatten().tolist()) y_pred.append(np.mean(sample_result_list)) # raise Exception('Currently not supported') y_pred = np.array(y_pred) else: X_valid = np.expand_dims(X_valid, axis=-1) y_pred = model.predict(X_valid) res = model.evaluate(X_valid, y_valid) print('roc_auc_score on FULL public test: {}'.format( roc_auc_score(y_valid, y_pred))) else: raise Exception( 'Invalid Configuration..., use config.use_public_test_set = True') SUB_DIR = os.path.join(RADAR_DIR, 'submission_files') BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history') if os.path.exists(SUB_DIR) is False: os.makedirs(SUB_DIR) sub_path = "{}/submission_{}.csv".format(SUB_DIR, exp_name_time) test_model(model, sub_path, SRC_DIR, config, BEST_RESULT_DIR) # if config.save_history_buffer is True: print('#' * 70) print('submission file is at: {}'.format(sub_path)) print('')