Ejemplo n.º 1
0
def run_hyperparam_search(questions_to_run, directory, q_docpartitions, bertmodel, sents_embed_dir, question_gpu_map,
                          attention):
    hyperparam_search_dir = create_directory('hyperparam_search', directory)
    hyperparam_model_search_parallel(questions_to_run, q_docpartitions, bertmodel, sents_embed_dir,
                                     hyperparam_search_dir,
                                     question_gpu_map,
                                     fdtype=torch.float32,
                                     num_epochs=15,
                                     prob_interval_truemax=0.05,
                                     prob_estim=0.95, random_seed=42, attention=attention)
Ejemplo n.º 2
0
def write_sents_embeddings(directory, bertmodel, sents_embed_dir_name, docs_data_tensor):
    # === Generate sents embedding ===
    # load BertModel

    # define BertEmbedder
    bert_config = {'bert_train_flag': False,
                   'bert_all_output': False}
    bertembeder = BertEmbedder(bertmodel, bert_config)
    sents_embed_dir = create_directory(sents_embed_dir_name, directory)
    fdtype = torch.float32

    # generate and dump bert embedding for the tokens inside the specificed embedding directory
    bert_proc_docs = generate_sents_embeds_from_docs(docs_data_tensor, bertembeder, sents_embed_dir, fdtype)
    ReaderWriter.dump_data(bert_proc_docs, os.path.join(sents_embed_dir, 'bert_proc_docs.pkl'))
Ejemplo n.º 3
0
def run_predict(q_docpartitions,
                q_fold_config_map,
                bertmodel,
                q_state_dict_path_map,
                results_dir,
                sents_embed_dir,
                question_fold_map,
                to_gpu,
                gpu_index,
                num_epochs=1) -> Dict:
    q_predictions = {}
    for question in q_fold_config_map:
        mconfig, options, __ = q_fold_config_map[question]
        options[
            'num_epochs'] = num_epochs  # override number of epochs using user specified value

        # update options fold num to the current fold
        options['fold_num'] = question_fold_map[question]
        data_partition = q_docpartitions[question][options['fold_num']]

        results_path = os.path.join(results_dir,
                                    'question_{}'.format(question),
                                    'fold_{}'.format(options['fold_num']))
        results_wrk_dir = create_directory(results_path)

        q_predictions[question] = predict_neural_discern(
            data_partition,
            bertmodel,
            mconfig,
            options,
            results_wrk_dir,
            sents_embed_dir,
            state_dict_dir=q_state_dict_path_map[question],
            to_gpu=to_gpu,
            gpu_index=gpu_index)
    return q_predictions
Ejemplo n.º 4
0
def evaluate_on_test_set(directory, q_docpartitions, q_config_map, bertmodel, train_val_dir, sents_embed_dir,
                         gpu_index):
    test_dir = create_directory('test', directory)
    test_run(q_docpartitions, q_config_map, bertmodel, train_val_dir, test_dir, sents_embed_dir, gpu_index,
             num_epochs=1)
    return test_dir
Ejemplo n.º 5
0
def run_training(directory, q_docpartitions, q_config_map, bertmodel, sents_embed_dir, question_gpu_map, num_epochs,
                 max_folds):
    train_val_dir = create_directory('train_validation', directory)
    train_val_run(q_docpartitions, q_config_map, bertmodel, train_val_dir, sents_embed_dir, question_gpu_map,
                  num_epochs, max_folds)
    return train_val_dir
Ejemplo n.º 6
0
              "The pre-built experiment_to_rerun will be used. Hyperparam search will not be run.")

    # under test mode (for faster debugging), run a smaller set of partitions and epochs, and no hyper param search
    if config['test_mode']:
        config['max_folds'] = 1  # max number of data partition folds to run (for faster testing)
        config['num_epochs'] = 1
        config['run_hyper_param_search'] = False

    time_stamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if config['experiment_to_rerun']:
        if config['copy_exp_dir']:
            from distutils.dir_util import copy_tree
            rerun_dir_name = '{}_{}_{}'.format(config['experiment_to_rerun'], 'rerun', time_stamp)
            orig_exp_dir = os.path.join(config['base_dir'], 'experiments', config['experiment_to_rerun'])
            exp_dir = os.path.join(config['base_dir'], 'experiments', rerun_dir_name)
            create_directory(exp_dir)
            # copy contents of original exp dir so experiment re-run has everything it needs
            print("copying experiment for re-run in {}...".format(exp_dir))
            copy_tree(orig_exp_dir, exp_dir)
            print("... complete")
        else:
            exp_dir = exp_dir = os.path.join(config['base_dir'], 'experiments', config['experiment_to_rerun'])
    else:
        if config['test_mode']:
            exp_dir = os.path.join(config['base_dir'], 'experiments', 'tests', time_stamp)
        else:
            exp_dir = os.path.join(config['base_dir'], 'experiments', time_stamp)
        create_directory(exp_dir)
    config['exp_dir'] = exp_dir

    if config['biobert']: