Beispiel #1
0
def create_cb():
    lrschedule_callback = LRScheduler(
        lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep)))
    clip_callback = GradientClipCallback(clip_type='value', clip_value=2)
    save_dir = os.path.join(root_path, f'model/{args.data_type}',
                            f'fold{args.fold}')
    save_callback = SaveModelCallback(top=1, save_dir=save_dir)
    if args.cv:
        callbacks = [
            lrschedule_callback,
            clip_callback,
            save_callback,
        ]
    else:
        callbacks = [
            lrschedule_callback,
            clip_callback,
            save_callback,
        ]
    # callbacks.append(Unfreeze_Callback(embedding_param ,args.fix_embed_epoch))

    if args.use_bert:
        if args.fix_bert_epoch != 0:
            callbacks.append(
                Unfreeze_Callback(model.lattice_embed, args.fix_bert_epoch))
        else:
            bert_embedding.requires_grad = True

    callbacks.append(EarlyStopCallback(args.early_stop))

    if args.warmup > 0 and args.model == 'transformer':
        callbacks.append(WarmupCallback(warmup=args.warmup, ))
    return callbacks
Beispiel #2
0
 def test_gradient_clip(self):
     data_set, model = prepare_env()
     trainer = Trainer(data_set, model, optimizer=SGD(lr=0.1), loss=BCELoss(pred="predict", target="y"),
                       batch_size=32, n_epochs=20, print_every=50, dev_data=data_set,
                       metrics=AccuracyMetric(pred="predict", target="y"), use_tqdm=False,
                       callbacks=[GradientClipCallback(model.parameters(), clip_value=2)], check_code_level=2)
     trainer.train()
Beispiel #3
0
 def run3(self):
     # test callbacks, especially clip-norm
     set_rng_seed(100)
     data_set, model = prepare_env()
     trainer = DistTrainer(
         data_set,
         model,
         optimizer=None,
         loss=BCELoss(pred="predict", target="y"),
         n_epochs=3,
         print_every=50,
         callbacks_all=[GradientClipCallback()],
         callbacks_master=[EchoCallback('callbacks_master')])
     trainer.train()
    #                       weight_decay=args.weight_decay)
    optimizer = optim.SGD(param_,
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

if 'msra' in args.dataset:
    datasets['dev'] = datasets['test']

fitlog_evaluate_dataset = {'test': datasets['test']}
if args.test_train:
    fitlog_evaluate_dataset['train'] = datasets['train']
evaluate_callback = FitlogCallback(fitlog_evaluate_dataset, verbose=1)
lrschedule_callback = LRScheduler(
    lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep)))
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)


# model.state_dict()
class CheckWeightCallback(Callback):
    def __init__(self, model):
        super().__init__()
        self.model_ = model

    def on_step_end(self):
        print('parameter weight:', flush=True)
        print(self.model_.state_dict()['encoder.layer_0.attn.w_q.weight'],
              flush=True)


callbacks = [
Beispiel #5
0
    model = Model(data_bundle.get_vocab(Const.INPUTS(0)), config)
    print(model)

    loss = SoftmaxLoss()

    metric = CRMetric()

    optim = Adam(model.parameters(), lr=config.lr)

    lr_decay_callback = LRCallback(optim.param_groups, config.lr_decay)

    trainer = Trainer(
        model=model,
        train_data=data_bundle.datasets["train"],
        dev_data=data_bundle.datasets["dev"],
        loss=loss,
        metrics=metric,
        check_code_level=-1,
        sampler=None,
        batch_size=1,
        device=torch.device("cuda:" + config.cuda)
        if torch.cuda.is_available() else None,
        metric_key='f',
        n_epochs=config.epoch,
        optimizer=optim,
        save_path=None,
        callbacks=[lr_decay_callback,
                   GradientClipCallback(clip_value=5)])
    print()
    trainer.train()
Beispiel #6
0
    print(config)

    def cache():
        bundle = CoReferencePipe(config).process_from_file({'train': config.train_path, 'dev': config.dev_path,
                                                            'test': config.test_path})
        return bundle
    data_bundle = cache()
    print(data_bundle)
    model = Model(data_bundle.get_vocab(Const.INPUTS(0)), config)
    print(model)

    loss = SoftmaxLoss()

    metric = CRMetric()

    optim = Adam(model.parameters(), lr=config.lr)

    lr_decay_callback = LRCallback(optim.param_groups, config.lr_decay)

    trainer = Trainer(model=model, train_data=data_bundle.datasets["train"], dev_data=data_bundle.datasets["dev"],
                      loss=loss, metrics=metric, check_code_level=-1, sampler=None,
                      batch_size=1, device=torch.device("cuda:" + config.cuda) if torch.cuda.is_available() else None,
                      metric_key='f', n_epochs=config.epoch,
                      optimizer=optim,
                      save_path=None,
                      callbacks=[lr_decay_callback, GradientClipCallback(clip_value=5)])
    print()

    trainer.train()
 loss = LossInForward()
 optimizer = AdamW(
     [param for param in model.parameters() if param.requires_grad],
     lr=2e-5)
 # metric = AccuracyMetric()
 metric = SpanFPreRecMetric(
     tag_vocab=data_bundle.get_vocab(Const.TARGET),
     only_gross=False)  # 若only_gross=False, 即还会返回各个label的metric统计值
 device = 'cuda' if torch.cuda.is_available(
 ) else 'cpu'  # 如果有gpu的话在gpu上运行,训练速度会更快
 logger.info('device:{}'.format(device))
 batch_size = 32
 n_epochs = 10
 early_stopping = 10
 callbacks = [
     GradientClipCallback(clip_type='norm', clip_value=1),
     WarmupCallback(warmup=0.1, schedule='linear'),
     EarlyStopCallback(early_stopping)
 ]
 trainer = Trainer(save_path=model_path,
                   train_data=data_bundle.get_dataset('train'),
                   model=model,
                   loss=loss,
                   optimizer=optimizer,
                   batch_size=batch_size,
                   n_epochs=n_epochs,
                   dev_data=data_bundle.get_dataset('dev'),
                   metrics=metric,
                   metric_key='f',
                   device=device,
                   callbacks=callbacks)
    embedding = StaticEmbedding(data_bundle.vocabs[Const.INPUTS(0)],
                                model_dir_or_name='en-glove-840b-300d',
                                requires_grad=True,
                                normalize=False)
else:
    raise RuntimeError(f'NOT support {arg.embedding} embedding yet!')

# define model
model = ESIM(embedding, num_labels=len(data_bundle.vocabs[Const.TARGET]))

# define optimizer and callback
optimizer = Adamax(lr=arg.lr, params=model.parameters())
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)  # 每10个epoch学习率变为原来的0.5倍

callbacks = [
    GradientClipCallback(
        clip_value=10),  # 等价于torch.nn.utils.clip_grad_norm_(10)
    LRScheduler(scheduler),
]

if arg.task in ['snli']:
    callbacks.append(
        EvaluateCallback(data=data_bundle.datasets[arg.test_dataset_name]))
    # evaluate test set in every epoch if task is snli.

# define trainer
trainer = Trainer(train_data=data_bundle.datasets[arg.train_dataset_name],
                  model=model,
                  optimizer=optimizer,
                  loss=CrossEntropyLoss(),
                  batch_size=torch.cuda.device_count() *
                  arg.batch_size_per_gpu,