Ejemplo n.º 1
0
def train():
    # Trainer
    trainer = Trainer(**train_args.data)

    def _define_optim(obj):
        obj._optimizer = torch.optim.Adam(obj._model.parameters(),
                                          **optim_args.data)
        obj._scheduler = torch.optim.lr_scheduler.LambdaLR(
            obj._optimizer, lambda ep: .75**(ep / 5e4))

    def _update(obj):
        obj._scheduler.step()
        obj._optimizer.step()

    trainer.define_optimizer = lambda: _define_optim(trainer)
    trainer.update = lambda: _update(trainer)
    trainer.get_loss = lambda predict, truth: trainer._loss_func(
        **predict, **truth)
    trainer._create_validator = lambda x: MyTester(**test_args.data)

    # Model
    model = BiaffineParser(**model_args.data)

    # use pretrain embedding
    embed, _ = EmbedLoader.load_embedding(
        model_args['word_emb_dim'], emb_file_name, 'glove', word_v,
        os.path.join(processed_datadir, 'word_emb.pkl'))
    model.word_embedding = torch.nn.Embedding.from_pretrained(embed,
                                                              freeze=False)
    model.word_embedding.padding_idx = word_v.padding_idx
    model.word_embedding.weight.data[word_v.padding_idx].fill_(0)
    model.pos_embedding.padding_idx = pos_v.padding_idx
    model.pos_embedding.weight.data[pos_v.padding_idx].fill_(0)

    try:
        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
        print('model parameter loaded!')
    except Exception as _:
        print("No saved model. Continue.")
        pass

    # Start training
    trainer.train(model, train_data, dev_data)
    print("Training finished!")

    # Saver
    saver = ModelSaver("./save/saved_model.pkl")
    saver.save_pytorch(model)
    print("Model saved!")
Ejemplo n.º 2
0
 def test_train2(self):
     model = BiaffineParser(init_embed=(VOCAB_SIZE, 10),
                            pos_vocab_size=VOCAB_SIZE, pos_emb_dim=10,
                            rnn_hidden_size=16,
                            arc_mlp_size=10,
                            label_mlp_size=10,
                            num_label=NUM_CLS, encoder='transformer')
     ds = prepare_parser_data()
     RUNNER.run_model(model, ds, loss=ParserLoss(), metrics=ParserMetric())
Ejemplo n.º 3
0
def test(path):
    # Tester
    tester = Tester(**test_args.data, evaluator=ParserEvaluator(ignore_label))

    # Model
    model = BiaffineParser(**model_args.data)
    model.eval()
    try:
        ModelLoader.load_pytorch(model, path)
        print('model parameter loaded!')
    except Exception as _:
        print("No saved model. Abort test.")
        raise

    # Start training
    print("Testing Train data")
    tester.test(model, train_data)
    print("Testing Dev data")
    tester.test(model, dev_data)
    print("Testing Test data")
    tester.test(model, test_data)
Ejemplo n.º 4
0
def _load_all(src):
    model_path = src
    src = os.path.dirname(src)

    word_v = _load(src + '/word_v.pkl')
    pos_v = _load(src + '/pos_v.pkl')
    tag_v = _load(src + '/tag_v.pkl')
    pos_pp = torch.load(src + '/pos_pp.pkl')['pipeline']

    model_args = ConfigSection()
    ConfigLoader.load_config('cfg.cfg', {'model': model_args})
    model_args['word_vocab_size'] = len(word_v)
    model_args['pos_vocab_size'] = len(pos_v)
    model_args['num_label'] = len(tag_v)

    model = BiaffineParser(**model_args.data)
    model.load_state_dict(torch.load(model_path))
    return {
        'word_v': word_v,
        'pos_v': pos_v,
        'tag_v': tag_v,
        'model': model,
        'pos_pp': pos_pp,
    }
Ejemplo n.º 5
0
 def test_train(self):
     ds, v1, v2, v3 = init_data()
     model = BiaffineParser(word_vocab_size=len(v1),
                            word_emb_dim=30,
                            pos_vocab_size=len(v2),
                            pos_emb_dim=30,
                            num_label=len(v3))
     trainer = fastNLP.Trainer(model=model,
                               train_data=ds,
                               dev_data=ds,
                               loss=ParserLoss(),
                               metrics=ParserMetric(),
                               metric_key='UAS',
                               n_epochs=10,
                               use_cuda=False,
                               use_tqdm=False)
     trainer.train(load_best_model=False)
Ejemplo n.º 6
0
def test():
    # Tester
    tester = MyTester(**test_args.data)

    # Model
    model = BiaffineParser(**model_args.data)

    try:
        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
        print('model parameter loaded!')
    except Exception as _:
        print("No saved model. Abort test.")
        raise

    # Start training
    tester.test(model, dev_data)
    print(tester.show_metrics())
    print("Testing finished!")
Ejemplo n.º 7
0
              dev_data=dev_data,
              test_data=test_data)

embed, _ = EmbedLoader.load_embedding(
    model_args['word_emb_dim'], emb_file_name, 'glove', word_v,
    os.path.join(processed_datadir, 'word_emb.pkl'))

print(len(word_v))
print(embed.size())

# Model
model_args['word_vocab_size'] = len(word_v)
model_args['pos_vocab_size'] = len(pos_v)
model_args['num_label'] = len(tag_v)

model = BiaffineParser(**model_args.data)
model.reset_parameters()
datasets = (train_data, dev_data, test_data)
for ds in datasets:
    ds.index_field("word_seq", word_v).index_field("pos_seq",
                                                   pos_v).index_field(
                                                       "head_labels", tag_v)
    ds.set_origin_len('word_seq')
if train_args['use_golden_train']:
    train_data.set_target(gold_heads=False)
else:
    train_data.set_target(gold_heads=None)
train_args.data.pop('use_golden_train')
ignore_label = pos_v['P']

print(test_data[0])
Ejemplo n.º 8
0
                         new_added_field_name='words')
for ds in (train_data, dev_data, test_data):
    num_p(ds)
update_v(word_v, train_data, 'words')
update_v(pos_v, train_data, 'pos')
update_v(tag_v, train_data, 'tags')

print('vocab build success {}, {}, {}'.format(len(word_v), len(pos_v),
                                              len(tag_v)))

# Model
model_args['word_vocab_size'] = len(word_v)
model_args['pos_vocab_size'] = len(pos_v)
model_args['num_label'] = len(tag_v)

model = BiaffineParser(**model_args.data)
print(model)

word_idxp = IndexerProcessor(word_v, 'words', 'word_seq')
pos_idxp = IndexerProcessor(pos_v, 'pos', 'pos_seq')
tag_idxp = IndexerProcessor(tag_v, 'tags', 'label_true')
seq_p = SeqLenProcessor('word_seq', 'seq_lens')

set_input_p = SetInputProcessor('word_seq', 'pos_seq', 'seq_lens', flag=True)
set_target_p = SetTargetProcessor('arc_true',
                                  'label_true',
                                  'seq_lens',
                                  flag=True)

label_toword_p = Index2WordProcessor(vocab=tag_v,
                                     field_name='label_pred',