Ejemplo n.º 1
0
    def _get_trainer(self, models_folder):
        optimizer = optim.SGD(self.parameters(),
                              lr=self.config['lr'],
                              momentum=0.9)

        callbacks = []
        clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
        evaluate_callback = EvaluateCallback(
            self.data_bundle.get_dataset('test'))

        if self.config['warmup_steps'] > 0:
            warmup_callback = WarmupCallback(self.config['warmup_steps'],
                                             schedule='linear')
            callbacks.append(warmup_callback)
        callbacks.extend([clip_callback, evaluate_callback])

        return Trainer(self.data_bundle.get_dataset('train'),
                       self,
                       optimizer,
                       batch_size=self.config['batch_size'],
                       sampler=BucketSampler(),
                       num_workers=2,
                       n_epochs=100,
                       dev_data=self.data_bundle.get_dataset('dev'),
                       metrics=SpanFPreRecMetric(
                           tag_vocab=self.data_bundle.get_vocab('target'),
                           encoding_type=self.config['encoding_type']),
                       dev_batch_size=self.config['batch_size'] * 5,
                       callbacks=callbacks,
                       device=self.config['device'],
                       test_use_tqdm=False,
                       use_tqdm=True,
                       print_every=300,
                       save_path=models_folder)
Ejemplo n.º 2
0
              scale=attn_type == 'naive',
              vocab_size=vocab_size,
              feature_vocab_size=feature_vocab_size,
              kv_attn_type=kv_attn_type,
              memory_dropout=memory_dropout,
              fusion_dropout=fusion_dropout,
              fusion_type=fusion_type,
              highway_layer=highway_layer)

optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))

callbacks = []
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
evaluate_callback = EvaluateCallback(data=data_bundle.get_dataset('test'),
                                     test_feature_data=test_feature_data,
                                     feature2id=feature2id,
                                     id2feature=id2feature,
                                     context_num=context_num)

if warmup_steps > 0:
    warmup_callback = WarmupCallback(warmup_steps, schedule='linear')
    callbacks.append(warmup_callback)
callbacks.extend([clip_callback, evaluate_callback])

trainer = Trainer(data_bundle.get_dataset('train'),
                  model,
                  optimizer,
                  batch_size=batch_size,
                  sampler=BucketSampler(),
                  num_workers=0,
                  n_epochs=50,
    use_knowledge=False,
    # kv_attn_type=kv_attn_type,
    use_ngram=args.use_ngram,
    gram2id=gram2id,
    device=device,
    cat_num=cat_num)

optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))
cws_optimizer = None
callbacks = []
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
evaluate_callback = EvaluateCallback(data=data_bundle.get_dataset('test'),
                                     use_knowledge=False,
                                     use_ngram=args.use_ngram,
                                     zen_model=None,
                                     ngram_test_examlpes=ngram_test_examlpes,
                                     args=args,
                                     gram2id=None,
                                     device=device,
                                     dataset='test')

if warmup_steps > 0:
    warmup_callback = WarmupCallback(warmup_steps, schedule='linear')
    callbacks.append(warmup_callback)
callbacks.extend([clip_callback, evaluate_callback])

trainer = Trainer(data_bundle.get_dataset('train'),
                  model,
                  optimizer,
                  batch_size=batch_size,
                  sampler=BucketSampler(),
Ejemplo n.º 4
0
              dropout=trans_dropout,
              after_norm=after_norm,
              attn_type=attn_type,
              bi_embed=None,
              fc_dropout=fc_dropout,
              pos_embed=pos_embed,
              scale=attn_type == 'naive')

if optim_type == 'sgd':
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
else:
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))

callbacks = []
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
evaluate_callback = EvaluateCallback(data_bundle.get_dataset('test'))

if warmup_steps > 0:
    warmup_callback = WarmupCallback(warmup_steps, schedule='linear')
    callbacks.append(warmup_callback)
callbacks.extend([clip_callback, evaluate_callback])

trainer = Trainer(data_bundle.get_dataset('train'),
                  model,
                  optimizer,
                  batch_size=batch_size,
                  sampler=BucketSampler(),
                  num_workers=0,
                  n_epochs=100,
                  dev_data=data_bundle.get_dataset('dev'),
                  metrics=SpanFPreRecMetric(
Ejemplo n.º 5
0
              use_zen=args.zen_model != "")

if args.optim_type == 'sgd':
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
else:
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))

callbacks = []
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
evaluate_callback = EvaluateCallback(data=data_bundle.get_dataset('test'),
                                     use_knowledge=True,
                                     knowledge_type=args.knowledge_type,
                                     pos_th=args.pos_th,
                                     dep_th=args.dep_th,
                                     chunk_th=args.chunk_th,
                                     test_feature_data=test_feature_data,
                                     feature2count=feature2count,
                                     feature2id=feature2id,
                                     id2feature=id2feature,
                                     use_zen=args.zen_model != "",
                                     zen_model=zen_model,
                                     zen_dataset=zen_test_dataset)
if warmup_steps > 0:
    warmup_callback = WarmupCallback(warmup_steps, schedule='linear')
    callbacks.append(warmup_callback)
callbacks.extend([clip_callback, evaluate_callback])

trainer = Trainer(data_bundle.get_dataset('train'),
                  model,
                  optimizer,
                  batch_size=batch_size,
Ejemplo n.º 6
0
              feature_vocab_size=feature_vocab_size,
              kv_attn_type=kv_attn_type,
              memory_dropout=memory_dropout,
              fusion_dropout=fusion_dropout,
              fusion_type=fusion_type,
              highway_layer=highway_layer,
              use_zen=args.zen_model != "")

optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))

callbacks = []
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
evaluate_callback = EvaluateCallback(data=data_bundle.get_dataset('test'),
                                     test_feature_data=test_feature_data,
                                     feature2id=feature2id,
                                     id2feature=id2feature,
                                     context_num=context_num,
                                     use_zen=args.zen_model != "",
                                     zen_model=zen_model,
                                     zen_dataset=zen_test_dataset)

if warmup_steps > 0:
    warmup_callback = WarmupCallback(warmup_steps, schedule='linear')
    callbacks.append(warmup_callback)
callbacks.extend([clip_callback, evaluate_callback])

trainer = Trainer(data_bundle.get_dataset('train'),
                  model,
                  optimizer,
                  batch_size=batch_size,
                  sampler=BucketSampler(),
                  num_workers=0,
Ejemplo n.º 7
0
              highway_layer=highway_layer,
              key_embed_dropout=key_embed_dropout,
              knowledge_type=knowledge_type)

if optim_type == 'sgd':
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
else:
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))

callbacks = []
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
evaluate_callback = EvaluateCallback(data=data_bundle.get_dataset('test'),
                                     use_knowledge=knowledge,
                                     knowledge_type=knowledge_type,
                                     pos_th=pos_th,
                                     dep_th=dep_th,
                                     chunk_th=chunk_th,
                                     test_feature_data=test_feature_data,
                                     feature2count=feature2count,
                                     feature2id=feature2id,
                                     id2feature=id2feature)

if warmup_steps > 0:
    warmup_callback = WarmupCallback(warmup_steps, schedule='linear')
    callbacks.append(warmup_callback)
callbacks.extend([clip_callback, evaluate_callback])

trainer = Trainer(data_bundle.get_dataset('train'),
                  model,
                  optimizer,
                  batch_size=batch_size,
                  sampler=BucketSampler(),
Ejemplo n.º 8
0
def main():
    if args.do_eval:
        torch.multiprocessing.set_start_method('spawn', force=True)

    if args.model == 'bert':

        model = BertCRF(embed, [data_bundle.get_vocab('target')],
                        encoding_type='bioes')

    else:
        model = StackedTransformersCRF(
            tag_vocabs=[data_bundle.get_vocab('target')],
            embed=embed,
            num_layers=num_layers,
            d_model=d_model,
            n_head=n_heads,
            feedforward_dim=dim_feedforward,
            dropout=trans_dropout,
            after_norm=after_norm,
            attn_type=attn_type,
            bi_embed=None,
            fc_dropout=fc_dropout,
            pos_embed=pos_embed,
            scale=attn_type == 'transformer')
        model = torch.nn.DataParallel(model)

    if args.do_eval:
        if os.path.exists(os.path.expanduser(args.saved_model)):
            print("Load checkpoint from {}".format(
                os.path.expanduser(args.saved_model)))
            model = torch.load(args.saved_model)
            model.to('cuda')
            print('model to CUDA')

    optimizer = AdamW(model.parameters(), lr=lr, eps=1e-8)

    callbacks = []
    clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
    evaluate_callback = EvaluateCallback(data_bundle.get_dataset('test'))
    checkpoint_callback = CheckPointCallback(os.path.join(
        directory, 'model.pth'),
                                             delete_when_train_finish=False,
                                             recovery_fitlog=True)

    if warmup_steps > 0:
        warmup_callback = WarmupCallback(warmup_steps, schedule='linear')
        callbacks.append(warmup_callback)
    callbacks.extend([clip_callback, checkpoint_callback, evaluate_callback])

    if not args.do_eval:
        trainer = Trainer(data_bundle.get_dataset('train'),
                          model,
                          optimizer,
                          batch_size=batch_size,
                          sampler=BucketSampler(),
                          num_workers=no_cpu,
                          n_epochs=args.n_epochs,
                          dev_data=data_bundle.get_dataset('dev'),
                          metrics=SpanFPreRecMetric(
                              tag_vocab=data_bundle.get_vocab('target'),
                              encoding_type=encoding_type),
                          dev_batch_size=batch_size,
                          callbacks=callbacks,
                          device=args.device,
                          test_use_tqdm=True,
                          use_tqdm=True,
                          print_every=300,
                          save_path=os.path.join(directory, 'best'))

        trainer.train(load_best_model=True)

        predictor = Predictor(model)
        predict(os.path.join(directory, 'predictions_dev.tsv'), data_bundle,
                predictor, 'dev')
        predict(os.path.join(directory, 'predictions_test.tsv'), data_bundle,
                predictor, 'test')

    else:
        print('Predicting')
        # predictions of multiple files
        torch.multiprocessing.freeze_support()
        model.share_memory()
        predictor = Predictor(model)

        if len(files) > multiprocessing.cpu_count():
            with torch.multiprocessing.Pool(processes=no_cpu) as p:
                with tqdm(total=len(files)) as pbar:
                    for i, _ in enumerate(
                            p.imap_unordered(
                                partial(predict,
                                        data_bundle=data_bundle,
                                        predictor=predictor,
                                        predict_on='train',
                                        do_eval=args.do_eval), files)):
                        pbar.update()
        else:
            for file in tqdm(files):
                predict(file, data_bundle, predictor, 'train', args.do_eval)