コード例 #1
0
 def _validate_paths(self, data_path):
     validate_existing_directory(data_path)
     for f in self.dataset_files:
         _f_path = path.join(data_path, self.dataset_files[f])
         validate_existing_filepath(_f_path)
         self.dataset_files[f] = _f_path
コード例 #2
0
ファイル: train.py プロジェクト: cdj0311/nlp-architect
params_dict = {}
params_dict['batch_size'] = args.batch_size_squad
params_dict['embed_size'] = 300
params_dict['pad_idx'] = 0
params_dict['hs'] = hidden_size
params_dict['glove_dim'] = 300
params_dict['iter_interval'] = 8000
params_dict['num_iterations'] = 500000
params_dict['ax'] = ax

# Initialzer
init = GlorotInit()
params_dict['init'] = init

validate_existing_directory(args.data_path)
path_gen = sanitize_path(args.data_path)
path_gen=os.path.join(path_gen+"/")

file_name_dict={}
file_name_dict['train_para_ids']='train.ids.context'
file_name_dict['train_ques_ids']='train.ids.question'
file_name_dict['train_answer']='train.span'
file_name_dict['val_para_ids']='dev.ids.context'
file_name_dict['val_ques_ids']='dev.ids.question'
file_name_dict['val_ans']='dev.span'
file_name_dict['vocab_file']='vocab.dat'


train_para_ids = os.path.join(path_gen + file_name_dict['train_para_ids'])
train_ques_ids = os.path.join(path_gen + file_name_dict['train_ques_ids'])
コード例 #3
0
    elmo_ecb_embeddings = load_elmo_for_vocab(mentions)

    with open(out_file, 'wb') as f:
        pickle.dump(elmo_ecb_embeddings, f)

    logger.info('Saving dump to file-%s', out_file)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Create Elmo Embedding dataset only dump')
    parser.add_argument('--mentions',
                        type=str,
                        help='mentions_file file',
                        required=True)
    parser.add_argument('--output',
                        type=str,
                        help='location were to create dump file',
                        required=True)

    args = parser.parse_args()

    if os.path.isdir(args.mentions):
        io.validate_existing_directory(args.mentions)
    else:
        io.validate_existing_filepath(args.mentions)

    elmo_dump()
    print('Done!')
コード例 #4
0
ファイル: prepare_data.py プロジェクト: Asteur/NervanaNlpApch
    parser.add_argument('--data_path',
                        help='enter path where training data and the \
                        glove embeddings were downloaded',
                        type=str)

    parser.add_argument(
        '--no_preprocess_glove',
        action="store_true",
        help='Chose whether or not to preprocess glove embeddings')

    parser.set_defaults()
    args = parser.parse_args()
    glove_flag = not args.no_preprocess_glove

    validate_existing_directory(args.data_path)
    data_path = sanitize_path(args.data_path)
    data_path = os.path.join(data_path + "/")
    # Load Train and Dev Data
    train_filename = os.path.join(data_path + "train-v1.1.json")
    dev_filename = os.path.join(data_path + "dev-v1.1.json")
    with open(train_filename) as train_file:
        train_data = json.load(train_file)

    with open(dev_filename) as dev_file:
        dev_data = json.load(dev_file)

    print('Extracting data from json files')
    # Extract training data from raw files
    train_para, train_question, train_ans = extract_data_from_files(train_data)
    # Extract dev data from raw dataset