Ejemplo n.º 1
0
    tag_dict = read_tag_vocab(_config.output_tag_file)
    reversed_tag_dict = {v: k for (k, v) in tag_dict.items()}
    word_embedding, word_dict = gen_embedding_from_file(
        _config.word_embedding_file, _config.word_embedding_dim)
    char_embedding, char_dict = gen_embedding_from_file(
        _config.char_embedding_file, _config.char_embedding_dim)

    _config.nwords = len(word_dict)
    _config.ntags = len(tag_dict)
    _config.nchars = len(char_dict)

    # read training and development data
    train = DataReader(_config,
                       _config.train_file,
                       word_dict,
                       char_dict,
                       tag_dict,
                       _config.batch_size,
                       is_train=True)
    dev = DataReader(_config, _config.dev_file, word_dict, char_dict, tag_dict,
                     _config.batch_size)

    model = sequence_labeling(_config, word_embedding, char_embedding)
    optimizer = torch.optim.Adam(model.parameters())

    best_f1 = 0.0
    for i in range(_config.nepoch):
        model.train()
        print('EPOCH %d / %d' % (i + 1, _config.nepoch))
        #		you can disable pbar if you do not want to show the training progress
        with tqdm(total=len(train)) as pbar:
Ejemplo n.º 2
0
    _config = config()
    apply_random_seed()

    tag_dict = read_tag_vocab(_config.output_tag_file)
    reversed_tag_dict = {v: k for (k, v) in tag_dict.items()}
    word_embedding, word_dict = gen_embedding_from_file(
        _config.word_embedding_file, _config.word_embedding_dim)
    char_embedding, char_dict = gen_embedding_from_file(
        _config.char_embedding_file, _config.char_embedding_dim)

    _config.nwords = len(word_dict)
    _config.ntags = len(tag_dict)
    _config.nchars = len(char_dict)

    # read training and development data
    test = DataReader(_config, _config.test_file, word_dict, char_dict,
                      tag_dict, _config.batch_size)

    model = sequence_labeling(_config, word_embedding, char_embedding)
    model.load_state_dict(torch.load(_config.model_file))
    model.eval()
    pred_dev_ins, golden_dev_ins = [], []
    for batch_sentence_len_list, batch_word_index_lists, batch_word_mask, batch_char_index_matrices, batch_char_mask, batch_word_len_lists, batch_tag_index_list in test:
        pred_batch_tag = model.decode(batch_word_index_lists,
                                      batch_sentence_len_list,
                                      batch_char_index_matrices,
                                      batch_word_len_lists, batch_char_mask)
        pred_dev_ins += [[
            reversed_tag_dict[t] for t in tag[:l]
        ] for tag, l in zip(pred_batch_tag.data.tolist(),
                            batch_sentence_len_list.data.tolist())]
        golden_dev_ins += [[