def test_fastnlp_1min_tutorial(self):
        # tutorials/fastnlp_1min_tutorial.ipynb
        data_path = "test/data_for_tests/tutorial_sample_dataset.csv"
        ds = DataSet.read_csv(data_path,
                              headers=('raw_sentence', 'label'),
                              sep='\t')
        print(ds[1])

        # 将所有数字转为小写
        ds.apply(lambda x: x['raw_sentence'].lower(),
                 new_field_name='raw_sentence')
        # label转int
        ds.apply(lambda x: int(x['label']),
                 new_field_name='target',
                 is_target=True)

        def split_sent(ins):
            return ins['raw_sentence'].split()

        ds.apply(split_sent, new_field_name='words', is_input=True)

        # 分割训练集/验证集
        train_data, dev_data = ds.split(0.3)
        print("Train size: ", len(train_data))
        print("Test size: ", len(dev_data))

        from fastNLP import Vocabulary
        vocab = Vocabulary(min_freq=2)
        train_data.apply(lambda x: [vocab.add(word) for word in x['words']])

        # index句子, Vocabulary.to_index(word)
        train_data.apply(
            lambda x: [vocab.to_index(word) for word in x['words']],
            new_field_name='words',
            is_input=True)
        dev_data.apply(lambda x: [vocab.to_index(word) for word in x['words']],
                       new_field_name='words',
                       is_input=True)

        from fastNLP.models import CNNText
        model = CNNText((len(vocab), 50),
                        num_classes=5,
                        padding=2,
                        dropout=0.1)

        from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric, Adam

        trainer = Trainer(model=model,
                          train_data=train_data,
                          dev_data=dev_data,
                          loss=CrossEntropyLoss(),
                          optimizer=Adam(),
                          metrics=AccuracyMetric(target='target'))
        trainer.train()
        print('Train finished!')
Exemple #2
0
def train(config):
    train_data = pickle.load(open(os.path.join(config.data_path, config.train_name), "rb"))
    dev_data = pickle.load(open(os.path.join(config.data_path, config.dev_name), "rb"))
    test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb"))
    vocabulary = pickle.load(open(os.path.join(config.data_path, config.vocabulary_name), "rb"))
    # load w2v data
    weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb"))

    if config.task_name == "lstm":
        text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
                         output_dim=config.class_num, hidden_dim=config.hidden_dim,
                         num_layers=config.num_layers, dropout=config.dropout)
    elif config.task_name == "lstm_maxpool":
        text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
                         		  output_dim=config.class_num, hidden_dim=config.hidden_dim,
                         		  num_layers=config.num_layers, dropout=config.dropout)
    elif config.task_name == "rnn":
        text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
                         output_dim=config.class_num, hidden_dim=config.hidden_dim,
                         num_layers=config.num_layers, dropout=config.dropout)
    elif config.task_name == "cnn":
        text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
                         class_num=config.class_num, kernel_num=config.kernel_num,
                         kernel_sizes=config.kernel_sizes, dropout=config.dropout,
                         static=config.static, in_channels=config.in_channels)
    elif config.task_name == "cnn_w2v":
        text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
                             class_num=config.class_num, kernel_num=config.kernel_num,
                             kernel_sizes=config.kernel_sizes, dropout=config.dropout,
                             static=config.static, in_channels=config.in_channels,
                             weight=weight)
    elif config.task_name == "rcnn":
        text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, 
                          output_dim=config.class_num, hidden_dim=config.hidden_dim, 
                          num_layers=config.num_layers, dropout=config.dropout)
    optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay)
    timing = TimingCallback()
    early_stop = EarlyStopCallback(config.patience)
    accuracy = AccuracyMetric(pred='output', target='target')

    trainer = Trainer(train_data=train_data, model=text_model, loss=CrossEntropyLoss(),
                      batch_size=config.batch_size, check_code_level=0,
                      metrics=accuracy, n_epochs=config.epoch,
                      dev_data=dev_data, save_path=config.save_path,
                      print_every=config.print_every, validate_every=config.validate_every,
                      optimizer=optimizer, use_tqdm=False,
                      device=config.device, callbacks=[timing, early_stop])
    trainer.train()

    # test result
    tester = Tester(test_data, text_model, metrics=accuracy)
    tester.test()
Exemple #3
0
def train_variational_ae():
    n_epochs = 500

    trainer = Trainer(train_data=fast_data.train_data,
                      dev_data=fast_data.test_data,
                      model=vae,
                      loss=LossFunc(VariationalAE.loss),
                      metrics=AccuracyMetric(target='dec_target'),
                      n_epochs=n_epochs,
                      batch_size=batch_size,
                      optimizer=Adam(lr=0.001,
                                     weight_decay=0,
                                     model_params=vae.parameters()))
    trainer.train()
    print('VAE Train finished!')
Exemple #4
0
def train_classifier():
    n_epochs = 50

    trainer = Trainer(train_data=fast_data.train_data,
                      dev_data=fast_data.test_data,
                      model=disc,
                      loss=CrossEntropyLoss(target='label_seq'),
                      metrics=AccuracyMetric(target='label_seq'),
                      n_epochs=n_epochs,
                      batch_size=batch_size,
                      optimizer=Adam(lr=0.001,
                                     weight_decay=0,
                                     model_params=disc.parameters()))
    trainer.train()
    print('Disc Train finished!')
def train():
    config = Config()

    train_data, dev_data, vocabulary = get_dataset(config.data_path)

    poetry_model = PoetryModel(vocabulary_size=len(vocabulary),
                               embedding_size=config.embedding_size,
                               hidden_size=config.hidden_size)
    loss = Loss(pred='output', target='target')
    perplexity = Perplexity(pred='output', target='target')

    print("optimizer:", config.optimizer)
    print("momentum:", config.momentum)
    if config.optimizer == 'adam':
        optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay)
    elif config.optimizer == 'sgd':
        optimizer = SGD(lr=config.lr, momentum=config.momentum)
    elif config.optimizer == 'adagrad':
        optimizer = Adagrad(lr=config.lr, weight_decay=config.weight_decay)
    elif config.optimizer == 'adadelta':
        optimizer = Adadelta(lr=config.lr,
                             rho=config.rho,
                             eps=config.eps,
                             weight_decay=config.weight_decay)

    timing = TimingCallback()
    early_stop = EarlyStopCallback(config.patience)

    trainer = Trainer(train_data=train_data,
                      model=poetry_model,
                      loss=loss,
                      metrics=perplexity,
                      n_epochs=config.epoch,
                      batch_size=config.batch_size,
                      print_every=config.print_every,
                      validate_every=config.validate_every,
                      dev_data=dev_data,
                      save_path=config.save_path,
                      optimizer=optimizer,
                      check_code_level=config.check_code_level,
                      metric_key="-PPL",
                      sampler=RandomSampler(),
                      prefetch=False,
                      use_tqdm=True,
                      device=config.device,
                      callbacks=[timing, early_stop])
    trainer.train()
def train(epochs=10, lr=0.001):
    global model
    for i in range(epochs):
        print('----------------- ', str(i + 1), ' ------------------')
        trainer = Trainer(model=model,
                          train_data=train_set,
                          dev_data=test_set,
                          loss=CrossEntropyLoss(pred='output',
                                                target='target'),
                          metrics=AccuracyMetric(pred='pred', target='target'),
                          optimizer=Adam(lr=lr),
                          save_path=None,
                          batch_size=1,
                          n_epochs=1)
        trainer.train()
        model.load_state_dict(copy.deepcopy(trainer.model.state_dict()))
        # save('../model/cnn-' + str(kernel_sizes) + '-' + str(keep_proba) + '-' + str(i+1))
        save('../model/lstm-' + str(input_dim) + '-' + str(hidden_dim) + '-' +
             str(i + 1))
Exemple #7
0
# training
device = 0 if torch.cuda.is_available() else 'cpu'
'''
EMBED_DIM = 100
model = CNNText((len(vocab),EMBED_DIM), num_classes=len(vocab_target), dropout=0.1)
metrics=AccuracyMetric()
loss = CrossEntropyLoss()
optimizer=optim.RMSprop(model.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
N_EPOCHS = 10
BATCH_SIZE = 16
trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metrics,optimizer=optimizer,n_epochs=N_EPOCHS, batch_size=BATCH_SIZE, device=device)
trainer.train()
'''
embed = BertEmbedding(vocab, model_dir_or_name='en', include_cls_sep=True)
model = BertForSequenceClassification(embed, len(vocab_target))
trainer = Trainer(train_data,
                  model,
                  optimizer=Adam(model_params=model.parameters(), lr=2e-5),
                  loss=CrossEntropyLoss(),
                  device=device,
                  batch_size=8,
                  dev_data=dev_data,
                  metrics=AccuracyMetric(),
                  n_epochs=2,
                  print_every=1)
trainer.train()

saver = ModelSaver("save_model/bert2021.1.19.pkl")
saver.save_pytorch(model)
Exemple #8
0
    def test_Adam(self):
        optim = Adam(model_params=torch.nn.Linear(10, 3).parameters())
        self.assertTrue("lr" in optim.__dict__["settings"])
        self.assertTrue("weight_decay" in optim.__dict__["settings"])
        res = optim.construct_from_pytorch(torch.nn.Linear(10, 3).parameters())
        self.assertTrue(isinstance(res, torch.optim.Adam))

        optim = Adam(lr=0.001)
        self.assertEqual(optim.__dict__["settings"]["lr"], 0.001)
        res = optim.construct_from_pytorch(torch.nn.Linear(10, 3).parameters())
        self.assertTrue(isinstance(res, torch.optim.Adam))

        optim = Adam(lr=0.002, weight_decay=0.989)
        self.assertEqual(optim.__dict__["settings"]["lr"], 0.002)
        self.assertEqual(optim.__dict__["settings"]["weight_decay"], 0.989)

        optim = Adam(0.001)
        self.assertEqual(optim.__dict__["settings"]["lr"], 0.001)
        res = optim.construct_from_pytorch(torch.nn.Linear(10, 3).parameters())
        self.assertTrue(isinstance(res, torch.optim.Adam))
Exemple #9
0
               embeddings=embeddings,
               dropout=0.5,
               num_classes=3,
               device=device).to(device)

trainer = Trainer(train_data=train_data,
                  model=model,
                  loss=CrossEntropyLoss(pred='pred', target='label'),
                  metrics=AccuracyMetric(),
                  n_epochs=10,
                  batch_size=32,
                  print_every=-1,
                  validate_every=-1,
                  dev_data=dev_data,
                  use_cuda=True,
                  optimizer=Adam(lr=0.0004, weight_decay=0),
                  check_code_level=-1,
                  metric_key='acc',
                  use_tqdm=False)

trainer.train()
# 训练结束后model为dev的最佳模型,保存
torch.save(model.state_dict(), '../data/checkpoints/best_model.pkl')

tester = Tester(
    data=test_data,
    model=model,
    metrics=AccuracyMetric(),
    batch_size=32,
)
tester.test()
Exemple #10
0
        self.p = []

    def evaluate(self, target, pred):
        self.l.extend(target.tolist())
        pred = pred.argmax(dim=-1).tolist()
        self.p.extend(pred)

    def get_metric(self, reset=True):
        f1_score = m.f1_score(self.l, self.p, average="macro")
        if reset:
            self.l = []
            self.p = []
        return {"f1:": f1_score}


optimizer = Adam(lr=args.lr, weight_decay=0)
acc = AccuracyMetric()
f1 = f1metric()
loss = CrossEntropyLoss()
trainer = Trainer(
    trainset,
    cla,
    optimizer=optimizer,
    loss=loss,
    batch_size=args.batch_size,
    n_epochs=args.num_epoch,
    dev_data=testset,
    metrics=[acc, f1],
    save_path=args.save_dir,
    callbacks=[FitlogCallback(log_loss_every=5)],
)
Exemple #11
0
model = SLSTM(nemb=300, nhid=300, num_layer=1, Tar_emb=embedding)

trainer = Trainer(
    train_data=train_dataset,
    model=model,
    loss=CrossEntropyLoss(pred='predict', target='label'),
    metrics=AccuracyMetric(),
    n_epochs=20,
    batch_size=arg.batch_size,
    print_every=1,
    validate_every=-1,
    dev_data=dev_dataset,
    use_cuda=True,
    save_path=save_dir,
    optimizer=Adam(1e-3, weight_decay=0),
    check_code_level=-1,
    metric_key='acc',
    # sampler=default,
    use_tqdm=True,
)

results = trainer.train(load_best_model=True)
print(results)

torch.save(model, os.path.join(save_dir,"best_model.pkl"))


tester = Tester(
    data=test_dataset,
    model=model,
Exemple #12
0
    def test_fastnlp_advanced_tutorial(self):
        import os
        os.chdir("tutorials/fastnlp_advanced_tutorial")

        from fastNLP import DataSet
        from fastNLP import Instance
        from fastNLP import Vocabulary
        from fastNLP import Trainer
        from fastNLP import Tester

        # ### Instance
        # Instance表示一个样本,由一个或者多个field(域、属性、特征)组成,每个field具有自己的名字以及值
        # 在初始化Instance的时候可以定义它包含的field,使用"field_name=field_value"的写法

        # In[2]:

        # 组织一个Instance,这个Instance由premise、hypothesis、label三个field组成
        instance = Instance(premise='an premise example .',
                            hypothesis='an hypothesis example.',
                            label=1)
        instance

        # In[3]:

        data_set = DataSet([instance] * 5)
        data_set.append(instance)
        data_set[-2:]

        # In[4]:

        # 如果某一个field的类型与dataset对应的field类型不一样仍可被加入dataset中
        instance2 = Instance(premise='the second premise example .',
                             hypothesis='the second hypothesis example.',
                             label='1')
        try:
            data_set.append(instance2)
        except:
            pass
        data_set[-2:]

        # In[5]:

        # 如果某一个field的名字不对,则该instance不能被append到dataset中
        instance3 = Instance(premises='the third premise example .',
                             hypothesis='the third hypothesis example.',
                             label=1)
        try:
            data_set.append(instance3)
        except:
            print('cannot append instance')
            pass
        data_set[-2:]

        # In[6]:

        # 除了文本以外,还可以将tensor作为其中一个field的value
        import torch
        tensor_ins = Instance(image=torch.randn(5, 5), label=0)
        ds = DataSet()
        ds.append(tensor_ins)
        ds

        from fastNLP import DataSet
        from fastNLP import Instance

        # 从csv读取数据到DataSet
        # 类csv文件,即每一行为一个example的文件,都可以使用这种方法进行数据读取
        dataset = DataSet.read_csv('tutorial_sample_dataset.csv',
                                   headers=('raw_sentence', 'label'),
                                   sep='\t')
        # 查看DataSet的大小
        len(dataset)

        # In[8]:

        # 使用数字索引[k],获取第k个样本
        dataset[0]

        # In[9]:

        # 获取的样本是一个Instance
        type(dataset[0])

        # In[10]:

        # 使用数字索引[a: b],获取第a到第b个样本
        dataset[0:3]

        # In[11]:

        # 索引也可以是负数
        dataset[-1]

        data_path = ['premise', 'hypothesis', 'label']

        # 读入文件
        with open(data_path[0]) as f:
            premise = f.readlines()

        with open(data_path[1]) as f:
            hypothesis = f.readlines()

        with open(data_path[2]) as f:
            label = f.readlines()

        assert len(premise) == len(hypothesis) and len(hypothesis) == len(
            label)

        # 组织DataSet
        data_set = DataSet()
        for p, h, l in zip(premise, hypothesis, label):
            p = p.strip()  # 将行末空格去除
            h = h.strip()  # 将行末空格去除
            data_set.append(Instance(premise=p, hypothesis=h, truth=l))

        data_set[0]

        # ### DataSet的其他操作
        # 在构建完毕DataSet后,仍然可以对DataSet的内容进行操作,函数接口为DataSet.apply()

        # In[13]:

        # 将premise域的所有文本转成小写
        data_set.apply(lambda x: x['premise'].lower(),
                       new_field_name='premise')
        data_set[-2:]

        # In[14]:

        # label转int
        data_set.apply(lambda x: int(x['truth']), new_field_name='truth')
        data_set[-2:]

        # In[15]:

        # 使用空格分割句子
        def split_sent(ins):
            return ins['premise'].split()

        data_set.apply(split_sent, new_field_name='premise')
        data_set.apply(lambda x: x['hypothesis'].split(),
                       new_field_name='hypothesis')
        data_set[-2:]

        # In[16]:

        # 筛选数据
        origin_data_set_len = len(data_set)
        data_set.drop(lambda x: len(x['premise']) <= 6)
        origin_data_set_len, len(data_set)

        # In[17]:

        # 增加长度信息
        data_set.apply(lambda x: [1] * len(x['premise']),
                       new_field_name='premise_len')
        data_set.apply(lambda x: [1] * len(x['hypothesis']),
                       new_field_name='hypothesis_len')
        data_set[-1]

        # In[18]:

        # 设定特征域、标签域
        data_set.set_input("premise", "premise_len", "hypothesis",
                           "hypothesis_len")
        data_set.set_target("truth")

        # In[19]:

        # 重命名field
        data_set.rename_field('truth', 'label')
        data_set[-1]

        # In[20]:

        # 切分训练、验证集、测试集
        train_data, vad_data = data_set.split(0.5)
        dev_data, test_data = vad_data.split(0.4)
        len(train_data), len(dev_data), len(test_data)

        # In[21]:

        # 深拷贝一个数据集
        import copy
        train_data_2, dev_data_2 = copy.deepcopy(train_data), copy.deepcopy(
            dev_data)
        del copy

        # 初始化词表,该词表最大的vocab_size为10000,词表中每个词出现的最低频率为2,'<unk>'表示未知词语,'<pad>'表示padding词语
        # Vocabulary默认初始化参数为max_size=None, min_freq=None, unknown='<unk>', padding='<pad>'
        vocab = Vocabulary(max_size=10000,
                           min_freq=2,
                           unknown='<unk>',
                           padding='<pad>')

        # 构建词表
        train_data.apply(lambda x: [vocab.add(word) for word in x['premise']])
        train_data.apply(
            lambda x: [vocab.add(word) for word in x['hypothesis']])
        vocab.build_vocab()

        # In[23]:

        # 根据词表index句子
        train_data.apply(
            lambda x: [vocab.to_index(word) for word in x['premise']],
            new_field_name='premise')
        train_data.apply(
            lambda x: [vocab.to_index(word) for word in x['hypothesis']],
            new_field_name='hypothesis')
        dev_data.apply(
            lambda x: [vocab.to_index(word) for word in x['premise']],
            new_field_name='premise')
        dev_data.apply(
            lambda x: [vocab.to_index(word) for word in x['hypothesis']],
            new_field_name='hypothesis')
        test_data.apply(
            lambda x: [vocab.to_index(word) for word in x['premise']],
            new_field_name='premise')
        test_data.apply(
            lambda x: [vocab.to_index(word) for word in x['hypothesis']],
            new_field_name='hypothesis')
        train_data[-1], dev_data[-1], test_data[-1]

        # 读入vocab文件
        with open('vocab.txt') as f:
            lines = f.readlines()
        vocabs = []
        for line in lines:
            vocabs.append(line.strip())

        # 实例化Vocabulary
        vocab_bert = Vocabulary(unknown=None, padding=None)
        # 将vocabs列表加入Vocabulary
        vocab_bert.add_word_lst(vocabs)
        # 构建词表
        vocab_bert.build_vocab()
        # 更新unknown与padding的token文本
        vocab_bert.unknown = '[UNK]'
        vocab_bert.padding = '[PAD]'

        # In[25]:

        # 根据词表index句子
        train_data_2.apply(
            lambda x: [vocab_bert.to_index(word) for word in x['premise']],
            new_field_name='premise')
        train_data_2.apply(
            lambda x: [vocab_bert.to_index(word) for word in x['hypothesis']],
            new_field_name='hypothesis')
        dev_data_2.apply(
            lambda x: [vocab_bert.to_index(word) for word in x['premise']],
            new_field_name='premise')
        dev_data_2.apply(
            lambda x: [vocab_bert.to_index(word) for word in x['hypothesis']],
            new_field_name='hypothesis')
        train_data_2[-1], dev_data_2[-1]

        # step 1:加载模型参数(非必选)
        from fastNLP.io.config_io import ConfigSection, ConfigLoader
        args = ConfigSection()
        ConfigLoader().load_config("./data/config", {"esim_model": args})
        args["vocab_size"] = len(vocab)
        args.data

        # In[27]:

        # step 2:加载ESIM模型
        from fastNLP.models import ESIM
        model = ESIM(**args.data)
        model

        # In[28]:

        # 另一个例子:加载CNN文本分类模型
        from fastNLP.models import CNNText
        cnn_text_model = CNNText(embed_num=len(vocab),
                                 embed_dim=50,
                                 num_classes=5,
                                 padding=2,
                                 dropout=0.1)
        cnn_text_model

        from fastNLP import CrossEntropyLoss
        from fastNLP import Adam
        from fastNLP import AccuracyMetric
        trainer = Trainer(
            train_data=train_data,
            model=model,
            loss=CrossEntropyLoss(pred='pred', target='label'),
            metrics=AccuracyMetric(),
            n_epochs=3,
            batch_size=16,
            print_every=-1,
            validate_every=-1,
            dev_data=dev_data,
            use_cuda=False,
            optimizer=Adam(lr=1e-3, weight_decay=0),
            check_code_level=-1,
            metric_key='acc',
            use_tqdm=False,
        )
        trainer.train()

        tester = Tester(
            data=test_data,
            model=model,
            metrics=AccuracyMetric(),
            batch_size=args["batch_size"],
        )
        tester.test()

        os.chdir("../..")
def train(config, task_name):
    train_data = pickle.load(
        open(os.path.join(config.data_path, config.train_name), "rb"))
    # debug
    if config.debug:
        train_data = train_data[0:100]
    dev_data = pickle.load(
        open(os.path.join(config.data_path, config.dev_name), "rb"))
    print(len(train_data), len(dev_data))
    # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb"))
    # load w2v data
    # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb"))

    word_vocab = pickle.load(
        open(os.path.join(config.data_path, config.word_vocab_name), "rb"))
    char_vocab = pickle.load(
        open(os.path.join(config.data_path, config.char_vocab_name), "rb"))
    pos_vocab = pickle.load(
        open(os.path.join(config.data_path, config.pos_vocab_name), "rb"))
    # spo_vocab = pickle.load(open(os.path.join(config.data_path, config.spo_vocab_name), "rb"))
    tag_vocab = pickle.load(
        open(os.path.join(config.data_path, config.tag_vocab_name), "rb"))
    print('word vocab', len(word_vocab))
    print('char vocab', len(char_vocab))
    print('pos vocab', len(pos_vocab))
    # print('spo vocab', len(spo_vocab))
    print('tag vocab', len(tag_vocab))

    schema = get_schemas(config.source_path)

    if task_name == 'bilstm_crf':
        model = AdvSeqLabel(
            char_init_embed=(len(char_vocab), config.char_embed_dim),
            word_init_embed=(len(word_vocab), config.word_embed_dim),
            pos_init_embed=(len(pos_vocab), config.pos_embed_dim),
            spo_embed_dim=len(schema),
            sentence_length=config.sentence_length,
            hidden_size=config.hidden_dim,
            num_classes=len(tag_vocab),
            dropout=config.dropout,
            id2words=tag_vocab.idx2word,
            encoding_type=config.encoding_type)
    elif task_name == 'trans_crf':
        model = TransformerSeqLabel(
            char_init_embed=(len(char_vocab), config.char_embed_dim),
            word_init_embed=(len(word_vocab), config.word_embed_dim),
            pos_init_embed=(len(pos_vocab), config.pos_embed_dim),
            spo_embed_dim=len(schema),
            num_classes=len(tag_vocab),
            id2words=tag_vocab.idx2word,
            encoding_type=config.encoding_type,
            num_layers=config.num_layers,
            inner_size=config.inner_size,
            key_size=config.key_size,
            value_size=config.value_size,
            num_head=config.num_head,
            dropout=config.dropout)

    optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay)
    timing = TimingCallback()
    early_stop = EarlyStopCallback(config.patience)
    # loss = NLLLoss()
    logs = FitlogCallback(dev_data)
    metrics = SpanFPreRecMetric(tag_vocab,
                                pred='pred',
                                seq_len='seq_len',
                                target='tag')

    train_data.set_input('tag')
    dev_data.set_input('tag')
    dev_data.set_target('seq_len')
    #print(train_data.get_field_names())
    trainer = Trainer(
        train_data=train_data,
        model=model,
        # loss=loss,
        metrics=metrics,
        metric_key='f',
        batch_size=config.batch_size,
        n_epochs=config.epoch,
        dev_data=dev_data,
        save_path=config.save_path,
        check_code_level=-1,
        print_every=config.print_every,
        validate_every=config.validate_every,
        optimizer=optimizer,
        use_tqdm=False,
        device=config.device,
        callbacks=[timing, early_stop, logs])
    trainer.train()

    # test result
    tester = Tester(dev_data,
                    model,
                    metrics=metrics,
                    device=config.device,
                    batch_size=config.batch_size)
    tester.test()
def train(config, task_name):
    train_data = pickle.load(
        open(os.path.join(config.data_path, config.train_name), "rb"))
    # debug
    if config.debug:
        train_data = train_data[0:30]
    dev_data = pickle.load(
        open(os.path.join(config.data_path, config.dev_name), "rb"))
    # test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb"))
    vocabulary = pickle.load(
        open(os.path.join(config.data_path, config.vocabulary_name), "rb"))

    # load w2v data
    # weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb"))

    if task_name == "lstm":
        text_model = LSTM(vocab_size=len(vocabulary),
                          embed_dim=config.embed_dim,
                          output_dim=config.class_num,
                          hidden_dim=config.hidden_dim,
                          num_layers=config.num_layers,
                          dropout=config.dropout)
    elif task_name == "lstm_maxpool":
        text_model = LSTM_maxpool(vocab_size=len(vocabulary),
                                  embed_dim=config.embed_dim,
                                  output_dim=config.class_num,
                                  hidden_dim=config.hidden_dim,
                                  num_layers=config.num_layers,
                                  dropout=config.dropout)
    elif task_name == "cnn":
        text_model = CNN(vocab_size=len(vocabulary),
                         embed_dim=config.embed_dim,
                         class_num=config.class_num,
                         kernel_num=config.kernel_num,
                         kernel_sizes=config.kernel_sizes,
                         dropout=config.dropout,
                         static=config.static,
                         in_channels=config.in_channels)
    elif task_name == "rnn":
        text_model = RNN(vocab_size=len(vocabulary),
                         embed_dim=config.embed_dim,
                         output_dim=config.class_num,
                         hidden_dim=config.hidden_dim,
                         num_layers=config.num_layers,
                         dropout=config.dropout)
    # elif task_name == "cnn_w2v":
    #     text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim,
    #                          class_num=config.class_num, kernel_num=config.kernel_num,
    #                          kernel_sizes=config.kernel_sizes, dropout=config.dropout,
    #                          static=config.static, in_channels=config.in_channels,
    #                          weight=weight)
    elif task_name == "rcnn":
        text_model = RCNN(vocab_size=len(vocabulary),
                          embed_dim=config.embed_dim,
                          output_dim=config.class_num,
                          hidden_dim=config.hidden_dim,
                          num_layers=config.num_layers,
                          dropout=config.dropout)
    #elif task_name == "bert":
    #    text_model = BertModel.from_pretrained(config.bert_path)

    optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay)
    timing = TimingCallback()
    early_stop = EarlyStopCallback(config.patience)
    logs = FitlogCallback(dev_data)
    f1 = F1_score(pred='output', target='target')

    trainer = Trainer(train_data=train_data,
                      model=text_model,
                      loss=BCEWithLogitsLoss(),
                      batch_size=config.batch_size,
                      check_code_level=-1,
                      metrics=f1,
                      metric_key='f1',
                      n_epochs=config.epoch,
                      dev_data=dev_data,
                      save_path=config.save_path,
                      print_every=config.print_every,
                      validate_every=config.validate_every,
                      optimizer=optimizer,
                      use_tqdm=False,
                      device=config.device,
                      callbacks=[timing, early_stop, logs])
    trainer.train()

    # test result
    tester = Tester(
        dev_data,
        text_model,
        metrics=f1,
        device=config.device,
        batch_size=config.batch_size,
    )
    tester.test()
    print("Total Number of Words:", m)

    rnn_text_model = RNN.RNN_Text(vocab_size=m, input_size=50, hidden_layer_size=128, target_size=k, dropout=0.1)
    cnn_text_model = CNN.CNN_Text(vocab_size=m, input_size=50, target_size=k, dropout=0.05)
    model = rnn_text_model
    # ModelLoader.load_pytorch(model, "model_ckpt_large_CNN.pkl")

    trainer = Trainer(
        train_data=train_set,
        model=model,
        loss=CrossEntropyLoss(pred='pred', target='label'),
        n_epochs=50,
        batch_size=16,
        metrics=AccuracyMetric(pred='pred', target='label'),
        dev_data=dev_set,
        optimizer=Adam(lr=1e-3),
        callbacks=[FitlogCallback(data=test_set)]
    )
    trainer.train()

    # saver = ModelSaver("model_ckpt_large_RNN.pkl")
    # saver.save_pytorch(model)

    tester = Tester(
        data=train_set,
        model=model,
        metrics=AccuracyMetric(pred='pred', target='label'),
        batch_size=16,
    )
    tester.test()
Exemple #16
0
                    embed_dim=128,
                    hidden_dim=hidden_units,
                    output_dim=8)
# mymodel = load_model(mymodel, './model/best_TextModel_acc_2019-06-28-09-07-50')
trainer = Trainer(
    train_data=train_data,
    model=mymodel,
    loss=CrossEntropyLoss(pred='pred', target='target'),
    # loss=SkipBudgetLoss(pred='pred', target='target', updated_states='updated_states'),
    metrics=[AccuracyMetric(), UsedStepsMetric()],
    n_epochs=30,
    batch_size=batch_size,
    print_every=-1,
    validate_every=-1,
    dev_data=test_data,
    save_path='./model',
    optimizer=Adam(lr=learning_rate, weight_decay=0),
    check_code_level=0,
    device="cuda",
    metric_key='acc',
    use_tqdm=False,
    callbacks=[FitlogCallback(test_data)])
start = time.clock()
trainer.train()
end = time.clock()
training_time = end - start
print('total training time:%fs' % (end - start))
fitlog.add_hyper({'time': training_time})

fitlog.finish()