コード例 #1
0
def main():
    # capture the config path from the run arguments
    # then process configuration file
    SRC_DIR = os.getcwd()
    config = preprocess_meta_data()

    # load the data
    data = load_data(config)

    if not config.quiet:
        config.print()

    # create a model
    model = build_model(config)

    # create trainer
    trainer = build_trainer(model, data, config)

    # train the model
    history = trainer.train()

    # visualize training performance
    graph_path = os.path.join(SRC_DIR, 'graphs')

    analyse_model_performance(model, data, graph_path, history)

    # evaluate model
    trainer.evaluate()

    # run on MAFAT test
    model = trainer.model_train
    test_model(model, SRC_DIR, config)
コード例 #2
0
def main():
    # capture the config path from the run arguments
    # then process configuration file
    config = preprocess_meta_data()

    if not config.quiet:
        config.print()

    # load the data
    data,test_segment_id = load_data(config)

    # preprocess data before training

    data = preprocess_data(data,config)

    # create a model
    model = build_model(config)

    # create trainer
    p_test = train(model, data, config)

    submission_save = pd.DataFrame()
    submission_save['segment_id'] = test_segment_id
    submission_save['time_to_eruption'] = p_test
    submission_save.to_csv(f'{config.exp_name}.csv', header=True, index=False)
def main():
    # capture the config path from the run arguments
    # then process configuration file
    SRC_DIR = os.getcwd()
    RADAR_DIR = os.path.join(SRC_DIR, os.pardir)
    config = preprocess_meta_data()
    exp_name = config.exp_name

    # for graph_dir and log file
    now = datetime.now()
    date = now.strftime("%Y_%m_%d_%H_%M_%S")
    exp_name_time = '{}_{}'.format(exp_name, date)
    # visualize training performance
    graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time)
    LOG_DIR = os.path.join(RADAR_DIR, 'logs')
    if os.path.exists(LOG_DIR) is False:
        os.makedirs(LOG_DIR)
    log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time)

    if config.use_mti_improvement is True:
        config.__setattr__("model_input_dim", [125, 32, 1])

    if bool(re.search('tcn', config.exp_name,
                      re.IGNORECASE)) and config.use_mti_improvement:
        config.__setattr__("model_input_dim", [32, 125, 1])

    # load the data
    data = load_data(config)

    # create a model
    model = build_model(config)

    # create trainer
    trainer = build_trainer(model, data, config)

    # train the model
    history = trainer.train()

    analyse_model_performance(model,
                              data,
                              history,
                              config,
                              graph_path=graph_path,
                              res_dir=exp_name_time)

    # evaluate model
    eval_res = trainer.evaluate()

    SUB_DIR = os.path.join(RADAR_DIR, 'submission_files')
    if os.path.exists(SUB_DIR) is False:
        os.makedirs(SUB_DIR)
    sub_path = "{}/submission_{}.csv".format(SUB_DIR, exp_name_time)
    test_model(model['train'], sub_path, SRC_DIR, config)

    print('#' * 70)
    print('log file is located at {}'.format(log_path))
    print('graphs are located at {}'.format(graph_path))
    print('submission file is at: {}'.format(sub_path))
    print('')
コード例 #4
0
def main():
    # capture the config path from the run arguments
    # then process configuration file
    SRC_DIR = os.getcwd()
    RADAR_DIR = os.path.join(SRC_DIR, os.pardir)
    config = preprocess_meta_data(SRC_DIR)
    exp_name = config.exp_name

    # for graph_dir and log file
    now = datetime.now()
    date = now.strftime("%Y_%m_%d_%H_%M_%S")
    exp_name_time = '{}_{}'.format(exp_name, date)
    # visualize training performance
    graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time)
    if os.path.exists(graph_path) is False:
        os.makedirs(graph_path)
    LOG_DIR = os.path.join(RADAR_DIR, 'logs')
    if os.path.exists(LOG_DIR) is False:
        os.makedirs(LOG_DIR)
    log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time)

    config = adjust_input_size(config)

    # assert configurations
    assert not (config.learn_background and config.with_rect_augmentation)
    # assert not(config.background_implicit_inference)
    assert not (config.load_complete_model_from_file
                and config.load_model_weights_from_file)
    assert config.load_complete_model_from_file or config.load_model_weights_from_file

    if config.load_model_weights_from_file:
        # build the model
        print('CURRENT DIR: {}'.format(os.getcwd()))
        model_dict = build_model(config)
        model_dict['train'].load_weights(config.model_weights_file)
        model = model_dict['train']
    elif config.load_complete_model_from_file:
        model = tf.keras.models.load_model(config.complete_model_file)
    else:
        raise Exception('Invalid Configuration...')

    SUB_DIR = os.path.join(RADAR_DIR, 'submission_files')
    BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history')
    if os.path.exists(SUB_DIR) is False:
        os.makedirs(SUB_DIR)
    sub_path = "{}/submission_{}.csv".format(SUB_DIR, exp_name_time)
    test_model(model, sub_path, SRC_DIR, config, BEST_RESULT_DIR)

    #if config.save_history_buffer is True:

    print('#' * 70)
    print('submission file is at: {}'.format(sub_path))
    print('')
コード例 #5
0
def split_public_test_to_valid():
    SRC_DIR = os.getcwd()
    RADAR_DIR = os.path.join(SRC_DIR, os.pardir)
    config = preprocess_meta_data(SRC_DIR)
    exp_name = config.exp_name
    data_parser = DataSetParser(stable_mode=False,
                                config=config,
                                read_validation_only=True)
    test_data, train_data = data_parser.split_public_test_valid()
    print('saving FULL PUBLIC TRAIN AND TEST IN DIR: {}'.format(os.getcwd()))
    np.save('FULL_PUBLIC_TRAIN', train_data)
    np.save('FULL_PUBLIC_TEST', test_data)
コード例 #6
0
def main():
    # capture the config path from the run arguments
    # then process configuration file
    config = preprocess_meta_data()

    # load the data
    data = load_data(config)

    if not config.quiet:
        config.print()

    # create a model
    model = build_model(config)

    # create trainer and pass all the previous components to it
    trainer = build_trainer(model, data, config)

    # train the model
    trainer.train()
コード例 #7
0
def main():
    # capture the config path from the run arguments
    # then process configuration file
    SRC_DIR = os.getcwd()
    RADAR_DIR = os.path.join(SRC_DIR, os.pardir)
    config = preprocess_meta_data(SRC_DIR)
    exp_name = config.exp_name


    # for graph_dir and log file
    now = datetime.now()
    date = now.strftime("%Y_%m_%d_%H_%M_%S")
    exp_name_time = '{}_{}'.format(exp_name, date)
    # visualize training performance
    graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time)
    if os.path.exists(graph_path) is False:
        os.makedirs(graph_path)
    LOG_DIR = os.path.join(RADAR_DIR, 'logs')
    if os.path.exists(LOG_DIR) is False:
        os.makedirs(LOG_DIR)
    log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time)

    '''
    Configure multiprocess
    '''
    strategy = tf.distribute.MirroredStrategy()

    if strategy.num_replicas_in_sync != 1:
        config.__setattr__("batch_size", config.batch_size * strategy.num_replicas_in_sync)

    config = adjust_input_size(config)

    # assert configurations
    assert not(config.learn_background and (config.with_rect_augmentation or config.with_preprocess_rect_augmentation))
    # assert not(config.background_implicit_inference)
    # load the data
    data = load_data(config)


    with strategy.scope():
        # create a model
        model = build_model(config)

        # create trainer
        trainer = build_trainer(model, data, config)

        # train the model
        history = trainer.train()

    # evaluate model
    eval_res = trainer.evaluate()

    SUB_DIR = os.path.join(RADAR_DIR,'submission_files')
    BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history')
    if os.path.exists(SUB_DIR) is False:
        os.makedirs(SUB_DIR)
    sub_path = "{}/submission_{}.csv".format(SUB_DIR,exp_name_time)
    test_model(model['train'], sub_path, SRC_DIR, config,BEST_RESULT_DIR)


    if config.learn_background is False:
        result_data = analyse_model_performance(model, data, history, config, graph_path=graph_path, res_dir=exp_name_time)
        result_data['Log path'] = log_path
        result_data['Graph path'] = graph_path
        result_data['Submission path'] = sub_path
        result_data['Model name'] = config.model_name
        result_data['Exp name'] = config.exp_name
        result_data['Snr type'] = config.snr_type

        # compare model performance
        if os.path.exists(BEST_RESULT_DIR) is False:
            os.makedirs(BEST_RESULT_DIR)

        compare_to_best_model_performance(result_data, model, BEST_RESULT_DIR, config)

    PREVIOUS_MODELS_DIR = os.path.join(RADAR_DIR, 'previous_models_files')
    if config.save_model is True:
        if os.path.exists(PREVIOUS_MODELS_DIR) is False:
            os.makedirs(PREVIOUS_MODELS_DIR)
        os.chdir(PREVIOUS_MODELS_DIR)
        save_model(name='{}_{}_{}'.format(config.model_name,config.exp_name,exp_name_time), model=model['train'])

    #if config.save_history_buffer is True:

    print('#' * 70)
    print('log file is located at {}'.format(log_path))
    print('graphs are located at {}'.format(graph_path))
    print('submission file is at: {}'.format(sub_path))
    print('')
コード例 #8
0
def main_sweep():
    # capture the config path from the run arguments
    # then process configuration file
    SRC_DIR = os.getcwd()
    RADAR_DIR = os.path.join(SRC_DIR, os.pardir)
    BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history')

    config = preprocess_meta_data(SRC_DIR)
    exp_name = config.exp_name
    # for graph_dir and log file
    now = datetime.now()
    date = now.strftime("%Y_%m_%d_%H_%M_%S")
    exp_name_time = '{}_{}'.format(exp_name, date)
    # visualize training performance
    graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time)
    if os.path.exists(graph_path) is False:
        os.makedirs(graph_path)
    LOG_DIR = os.path.join(RADAR_DIR, 'logs')
    if os.path.exists(LOG_DIR) is False:
        os.makedirs(LOG_DIR)
    log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time)

    # sys.stdout = Unbuffered(stream=sys.stdout, path=log_path)

    sweep_list = config.params_to_sweep
    res_dict = OrderedDict()
    hist_dict = OrderedDict()
    predictions_dict = OrderedDict()
    orig_config = config
    for list_name in sweep_list:
        param_name = re.sub('_list', '', list_name)
        res_dict[param_name] = {}
        hist_dict[param_name] = {}
        predictions_dict[param_name] = {}
        config = orig_config
        for param in config.get(list_name):
            config.__setattr__(param_name, param)
            if bool(re.search('tcn', config.exp_name, re.IGNORECASE)):
                config.__setattr__("model_input_dim", [32, 126, 1])
            elif config.with_preprocess_rect_augmentation or config.with_rect_augmentation:
                config.__setattr__("model_input_dim",
                                   [126, config.rect_augment_num_of_cols, 1])
            else:
                config.__setattr__("model_input_dim", [126, 32, 1])
            res_dir = '{}_{}'.format(param_name, param)
            print('#' * 70)
            print('Sweeping parameter: {}, with value: {}'.format(
                param_name, param))
            if type(param) == list:
                str_param = str(param)
                param = re.sub(' ', '_', re.sub('[,\[\]]', '', str_param))
            res_dict[param_name][param], hist_dict[param_name][
                param], predictions_dict[param_name][param] = sweep_core(
                    config,
                    graph_path=graph_path,
                    res_dir=res_dir,
                    best_preformance_dir=BEST_RESULT_DIR,
                    current_sweep=param_name,
                    param_value=param)

    print('')
    print('#' * 70)
    print('Sweep results summary')
    print('#' * 70)
    print('log file is located at {}'.format(log_path))
    print('graphs are located at {}'.format(graph_path))
    print('')
    for param_name in res_dict.keys():
        for param in res_dict[param_name].keys():
            print('{} = {}'.format(param_name, param))
            for metric in res_dict[param_name][param].keys():
                print('{}: {}, '.format(metric,
                                        res_dict[param_name][param][metric]),
                      end='')
            print('')

    for list_name in sweep_list:
        param_name = re.sub('_list', '', list_name)
        print_sweep_by_parameter(hist_dict,
                                 param_name=param_name,
                                 metric_list=['val_accuracy', 'accuracy'],
                                 graph_path=graph_path,
                                 title='{} Accuracy'.format(param_name))
        print_sweep_by_parameter(hist_dict,
                                 param_name=param_name,
                                 metric_list=['val_auc', 'auc'],
                                 graph_path=graph_path,
                                 title='{} AUC'.format(param_name))
        if config.learn_background is False:
            print_roc_auc_by_parameter(predictions_dict,
                                       param_name=param_name,
                                       title='{} ROC-AUC'.format(param_name),
                                       graph_path=graph_path)
def main_sweep():
    # capture the config path from the run arguments
    # then process configuration file
    SRC_DIR = os.getcwd()
    RADAR_DIR = os.path.join(SRC_DIR, os.pardir)
    config = preprocess_meta_data()
    exp_name = config.exp_name
    # for graph_dir and log file
    now = datetime.now()
    date = now.strftime("%Y_%m_%d_%H_%M_%S")
    exp_name_time = '{}_{}'.format(exp_name, date)
    # visualize training performance
    graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time)
    LOG_DIR = os.path.join(RADAR_DIR, 'logs')
    if os.path.exists(LOG_DIR) is False:
        os.makedirs(LOG_DIR)
    log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time)

    sys.stdout = Unbuffered(stream=sys.stdout, path=log_path)

    sweep_list = config.params_to_sweep
    res_dict = OrderedDict()
    hist_dict = OrderedDict()
    orig_config = config
    for list_name in sweep_list:
        param_name = re.sub('_list', '', list_name)
        res_dict[param_name] = {}
        hist_dict[param_name] = {}
        for param in config.get(list_name):
            config = orig_config
            config.__setattr__(param_name, param)
            res_dir = '{}_{}'.format(param_name, param)
            print('#' * 70)
            print('Sweeping parameter: {}, with value: {}'.format(
                param_name, param))
            if type(param) == list:
                param = '_'.join(param)
            res_dict[param_name][param], hist_dict[param_name][
                param] = sweep_core(config,
                                    graph_path=graph_path,
                                    res_dir=res_dir)

    print('')
    print('#' * 70)
    print('Sweep results summary')
    print('#' * 70)
    print('log file is located at {}'.format(log_path))
    print('graphs are located at {}'.format(graph_path))
    print('')
    for param_name in res_dict.keys():
        for param in res_dict[param_name].keys():
            print('{} = {}'.format(param_name, param))
            for metric in res_dict[param_name][param].keys():
                print('{}: {}, '.format(metric,
                                        res_dict[param_name][param][metric]),
                      end='')
            print('')

    for list_name in sweep_list:
        param_name = re.sub('_list', '', list_name)
        print_sweep_by_parameter(hist_dict,
                                 param_name=param_name,
                                 metric_list=['val_accuracy', 'accuracy'],
                                 graph_path=graph_path,
                                 title='{} Accuracy'.format(param_name))
        print_sweep_by_parameter(hist_dict,
                                 param_name=param_name,
                                 metric_list=['val_auc', 'auc'],
                                 graph_path=graph_path,
                                 title='{} AUC'.format(param_name))
コード例 #10
0
def main():
    # capture the config path from the run arguments
    # then process configuration file
    SRC_DIR = os.getcwd()
    RADAR_DIR = os.path.join(SRC_DIR, os.pardir)
    config = preprocess_meta_data(SRC_DIR)
    exp_name = config.exp_name

    # for graph_dir and log file
    now = datetime.now()
    date = now.strftime("%Y_%m_%d_%H_%M_%S")
    exp_name_time = '{}_{}'.format(exp_name, date)
    # visualize training performance
    graph_path = os.path.join(RADAR_DIR, 'graphs', exp_name_time)
    if os.path.exists(graph_path) is False:
        os.makedirs(graph_path)
    LOG_DIR = os.path.join(RADAR_DIR, 'logs')
    if os.path.exists(LOG_DIR) is False:
        os.makedirs(LOG_DIR)
    log_path = '{}/{}.log'.format(LOG_DIR, exp_name_time)

    config = adjust_input_size(config)

    # assert configurations
    assert not (config.learn_background and config.with_rect_augmentation)
    # assert not(config.background_implicit_inference)
    assert not (config.load_complete_model_from_file
                and config.load_model_weights_from_file)
    assert config.load_complete_model_from_file or config.load_model_weights_from_file

    if config.load_model_weights_from_file:
        # build the model
        print('CURRENT DIR: {}'.format(os.getcwd()))
        adjust_input_size(config)
        model_dict = build_model(config)
        model_dict['train'].load_weights(config.model_weights_file)
        model = model_dict['train']
        model.compile(optimizer=Adam(learning_rate=config.learning_rate),
                      loss=BinaryCrossentropy(),
                      metrics=['accuracy', AUC()])
        # model_name = 'full_test_auc_95_0168'
        # print('saveing model to: {}/{}'.format(os.getcwd(),model_name))
        # model.save(model_name)
    elif config.load_complete_model_from_file:
        model = tf.keras.models.load_model(config.complete_model_file)
    else:
        raise Exception('Invalid Configuration...')

        # evaluate model
    if config.use_public_test_set:
        print(40 * '#')
        print('Model evaluation on FULL public test set:')
        os.chdir(SRC_DIR)
        eval_dataparser = DataSetParser(stable_mode=False,
                                        read_validation_only=True,
                                        config=config)
        X_valid, labels_valid = eval_dataparser.get_dataset_by_snr(
            dataset_type='validation', snr_type=config.snr_type)
        y_valid = np.array(labels_valid['target_type'])
        if config.with_rect_augmentation:
            X_augmented_test = expand_test_by_sampling_rect(data=X_valid,
                                                            config=config)
            y_pred = []
            for sampled_list_x, test_index in zip(X_augmented_test,
                                                  range(
                                                      len(X_augmented_test))):
                sample_result_list = []
                sampled_list_x = np.array(sampled_list_x)
                x = np.expand_dims(sampled_list_x, axis=-1)
                sample_result_list.extend(
                    model.predict(x, batch_size=x.shape[0]).flatten().tolist())
                y_pred.append(np.mean(sample_result_list))
            # raise Exception('Currently not supported')
            y_pred = np.array(y_pred)
        else:
            X_valid = np.expand_dims(X_valid, axis=-1)
            y_pred = model.predict(X_valid)
            res = model.evaluate(X_valid, y_valid)
        print('roc_auc_score on FULL public test: {}'.format(
            roc_auc_score(y_valid, y_pred)))
    else:
        raise Exception(
            'Invalid Configuration..., use config.use_public_test_set = True')

    SUB_DIR = os.path.join(RADAR_DIR, 'submission_files')
    BEST_RESULT_DIR = os.path.join(RADAR_DIR, 'best_preformance_history')
    if os.path.exists(SUB_DIR) is False:
        os.makedirs(SUB_DIR)
    sub_path = "{}/submission_{}.csv".format(SUB_DIR, exp_name_time)
    test_model(model, sub_path, SRC_DIR, config, BEST_RESULT_DIR)

    # if config.save_history_buffer is True:

    print('#' * 70)
    print('submission file is at: {}'.format(sub_path))
    print('')