Esempio n. 1
0
def train(path):
    # test saving pipeline
    save_pipe(path)

    # Trainer
    trainer = Trainer(model=model,
                      train_data=train_data,
                      dev_data=dev_data,
                      loss=ParserLoss(),
                      metrics=ParserMetric(),
                      metric_key='UAS',
                      **train_args.data,
                      optimizer=fastNLP.Adam(**optim_args.data),
                      save_path=path)

    # model.word_embedding = torch.nn.Embedding.from_pretrained(embed, freeze=False)
    model.word_embedding.padding_idx = word_v.padding_idx
    model.word_embedding.weight.data[word_v.padding_idx].fill_(0)
    model.pos_embedding.padding_idx = pos_v.padding_idx
    model.pos_embedding.weight.data[pos_v.padding_idx].fill_(0)

    # try:
    #     ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
    #     print('model parameter loaded!')
    # except Exception as _:
    #     print("No saved model. Continue.")
    #     pass

    # Start training
    trainer.train()
    print("Training finished!")

    # save pipeline
    save_pipe(path)
    print('pipe saved')
Esempio n. 2
0
def train(path):
    # test saving pipeline
    save_pipe(path)
    embed = EmbedLoader.fast_load_embedding(model_args['word_emb_dim'],
                                            emb_file_name, word_v)
    embed = torch.tensor(embed, dtype=torch.float32)

    # embed = EmbedLoader.fast_load_embedding(emb_dim=model_args['word_emb_dim'], emb_file=emb_file_name, vocab=word_v)
    # embed = torch.tensor(embed, dtype=torch.float32)
    # model.word_embedding = torch.nn.Embedding.from_pretrained(embed, freeze=True)
    model.word_embedding.padding_idx = word_v.padding_idx
    model.word_embedding.weight.data[word_v.padding_idx].fill_(0)
    model.pos_embedding.padding_idx = pos_v.padding_idx
    model.pos_embedding.weight.data[pos_v.padding_idx].fill_(0)

    class MyCallback(Callback):
        def on_step_end(self, optimizer):
            step = self.trainer.step
            # learning rate decay
            if step > 0 and step % 1000 == 0:
                for pg in optimizer.param_groups:
                    pg['lr'] *= 0.93
                print('decay lr to {}'.format(
                    [pg['lr'] for pg in optimizer.param_groups]))

            if step == 3000:
                # start training embedding
                print('start training embedding at {}'.format(step))
                model = self.trainer.model
                for m in model.modules():
                    if isinstance(m, torch.nn.Embedding):
                        m.weight.requires_grad = True

    # Trainer
    trainer = Trainer(model=model,
                      train_data=train_data,
                      dev_data=dev_data,
                      loss=ParserLoss(),
                      metrics=ParserMetric(),
                      metric_key='UAS',
                      **train_args.data,
                      optimizer=fastNLP.Adam(**optim_args.data),
                      save_path=path,
                      callbacks=[MyCallback()])

    # Start training
    try:
        trainer.train()
        print("Training finished!")
    finally:
        # save pipeline
        save_pipe(path)
        print('pipe saved')
Esempio n. 3
0
from fastNLP import DataSet
from fastNLP import Vocabulary
from fastNLP import Trainer
from fastNLP import Optimizer
from fastNLP import core
import data
import model

CNN = False
RNN = True

if __name__ == '__main__':
    if CNN:
        d_train,d_test = data.getCharDataset()
        m = model.CharacterLevelCNN()
        ADAMOP = fastNLP.Adam(lr=0.001,weight_decay=0,betas=(0.9,0.999))
        trainner = Trainer(
            train_data=d_train,
            model=m,
            n_epochs=100,
            batch_size=128,
            use_cuda=True,
            check_code_level=0,
            optimizer=ADAMOP,
            dev_data=d_test,
            metrics=core.metrics.AccuracyMetric(target="label")
        )
        trainner.train()
    if RNN:
        d_train,d_test,embedding = data.getWordDataset()
        m = model.LSTMClassifier(32,20,256,400001,200,embedding)