if torch.cuda.is_available(): model.cuda() optimizer = optim.AdamW(model.parameters(), lr=lr) data = {} for name in ['seen', 'unseen', 'desc']: data[name] = data_bundle.get_dataset(name) callbacks = [GradientClipCallback(clip_type='value', clip_value=5), WarmupCallback(warmup=0.01, schedule='linear')] callbacks.append(FitlogCallback(data=data, verbose=1)) train_data = data_bundle.get_dataset('train') train_data.add_seq_len('input') # from collections import Counter # print(Counter(train_data.get_field('seq_len').content)) # exit(0) sampler = BucketSampler() clip_max_length(train_data, data_bundle) trainer = Trainer(train_data=train_data, model=model, optimizer=optimizer, loss=CrossEntropyLoss(), batch_size=batch_size, sampler=sampler, drop_last=False, update_every=1, num_workers=1, n_epochs=n_epochs, print_every=5, dev_data=data_bundle.get_dataset('dev'), metrics=MonoMetric(), metric_key='t10', validate_every=-1, save_path='save_models/', use_tqdm=True, device=None, callbacks=callbacks, check_code_level=0) trainer.train(load_best_model=False) fitlog.add_other(trainer.start_time, name='start_time')
lg_dict = getattr(data_bundle, 'lg_dict') lg_shifts = getattr(data_bundle, 'lg_shift') train_lg_shifts = getattr(data_bundle, 'train_lg_shift') train_data = DataSet() for name, ds in data_bundle.iter_datasets(): if 'train' in name: for ins in ds: train_data.append(ins) train_data.add_seq_len('input') train_data.set_input('input', 'language_ids') train_data.set_target('target') train_data.set_pad_val('input', pad_id) clip_max_length(train_data, data_bundle, max_sent_len=50) model = JointBertReverseDict(pre_name, train_word2bpes, target_word2bpes, pad_id=pad_id, num_languages=3) if torch.cuda.is_available(): model.cuda() optimizer = optim.AdamW(model.parameters(), lr=lr) data = {} summary_ms = [] for name, ds in data_bundle.iter_datasets():