def _get_trainer(self, models_folder): optimizer = optim.SGD(self.parameters(), lr=self.config['lr'], momentum=0.9) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback( self.data_bundle.get_dataset('test')) if self.config['warmup_steps'] > 0: warmup_callback = WarmupCallback(self.config['warmup_steps'], schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, evaluate_callback]) return Trainer(self.data_bundle.get_dataset('train'), self, optimizer, batch_size=self.config['batch_size'], sampler=BucketSampler(), num_workers=2, n_epochs=100, dev_data=self.data_bundle.get_dataset('dev'), metrics=SpanFPreRecMetric( tag_vocab=self.data_bundle.get_vocab('target'), encoding_type=self.config['encoding_type']), dev_batch_size=self.config['batch_size'] * 5, callbacks=callbacks, device=self.config['device'], test_use_tqdm=False, use_tqdm=True, print_every=300, save_path=models_folder)
scale=attn_type == 'naive', vocab_size=vocab_size, feature_vocab_size=feature_vocab_size, kv_attn_type=kv_attn_type, memory_dropout=memory_dropout, fusion_dropout=fusion_dropout, fusion_type=fusion_type, highway_layer=highway_layer) optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99)) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback(data=data_bundle.get_dataset('test'), test_feature_data=test_feature_data, feature2id=feature2id, id2feature=id2feature, context_num=context_num) if warmup_steps > 0: warmup_callback = WarmupCallback(warmup_steps, schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, evaluate_callback]) trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size, sampler=BucketSampler(), num_workers=0, n_epochs=50,
use_knowledge=False, # kv_attn_type=kv_attn_type, use_ngram=args.use_ngram, gram2id=gram2id, device=device, cat_num=cat_num) optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99)) cws_optimizer = None callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback(data=data_bundle.get_dataset('test'), use_knowledge=False, use_ngram=args.use_ngram, zen_model=None, ngram_test_examlpes=ngram_test_examlpes, args=args, gram2id=None, device=device, dataset='test') if warmup_steps > 0: warmup_callback = WarmupCallback(warmup_steps, schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, evaluate_callback]) trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size, sampler=BucketSampler(),
dropout=trans_dropout, after_norm=after_norm, attn_type=attn_type, bi_embed=None, fc_dropout=fc_dropout, pos_embed=pos_embed, scale=attn_type == 'naive') if optim_type == 'sgd': optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) else: optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99)) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback(data_bundle.get_dataset('test')) if warmup_steps > 0: warmup_callback = WarmupCallback(warmup_steps, schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, evaluate_callback]) trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size, sampler=BucketSampler(), num_workers=0, n_epochs=100, dev_data=data_bundle.get_dataset('dev'), metrics=SpanFPreRecMetric(
use_zen=args.zen_model != "") if args.optim_type == 'sgd': optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) else: optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99)) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback(data=data_bundle.get_dataset('test'), use_knowledge=True, knowledge_type=args.knowledge_type, pos_th=args.pos_th, dep_th=args.dep_th, chunk_th=args.chunk_th, test_feature_data=test_feature_data, feature2count=feature2count, feature2id=feature2id, id2feature=id2feature, use_zen=args.zen_model != "", zen_model=zen_model, zen_dataset=zen_test_dataset) if warmup_steps > 0: warmup_callback = WarmupCallback(warmup_steps, schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, evaluate_callback]) trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size,
feature_vocab_size=feature_vocab_size, kv_attn_type=kv_attn_type, memory_dropout=memory_dropout, fusion_dropout=fusion_dropout, fusion_type=fusion_type, highway_layer=highway_layer, use_zen=args.zen_model != "") optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99)) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback(data=data_bundle.get_dataset('test'), test_feature_data=test_feature_data, feature2id=feature2id, id2feature=id2feature, context_num=context_num, use_zen=args.zen_model != "", zen_model=zen_model, zen_dataset=zen_test_dataset) if warmup_steps > 0: warmup_callback = WarmupCallback(warmup_steps, schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, evaluate_callback]) trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size, sampler=BucketSampler(), num_workers=0,
highway_layer=highway_layer, key_embed_dropout=key_embed_dropout, knowledge_type=knowledge_type) if optim_type == 'sgd': optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) else: optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99)) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback(data=data_bundle.get_dataset('test'), use_knowledge=knowledge, knowledge_type=knowledge_type, pos_th=pos_th, dep_th=dep_th, chunk_th=chunk_th, test_feature_data=test_feature_data, feature2count=feature2count, feature2id=feature2id, id2feature=id2feature) if warmup_steps > 0: warmup_callback = WarmupCallback(warmup_steps, schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, evaluate_callback]) trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size, sampler=BucketSampler(),
def main(): if args.do_eval: torch.multiprocessing.set_start_method('spawn', force=True) if args.model == 'bert': model = BertCRF(embed, [data_bundle.get_vocab('target')], encoding_type='bioes') else: model = StackedTransformersCRF( tag_vocabs=[data_bundle.get_vocab('target')], embed=embed, num_layers=num_layers, d_model=d_model, n_head=n_heads, feedforward_dim=dim_feedforward, dropout=trans_dropout, after_norm=after_norm, attn_type=attn_type, bi_embed=None, fc_dropout=fc_dropout, pos_embed=pos_embed, scale=attn_type == 'transformer') model = torch.nn.DataParallel(model) if args.do_eval: if os.path.exists(os.path.expanduser(args.saved_model)): print("Load checkpoint from {}".format( os.path.expanduser(args.saved_model))) model = torch.load(args.saved_model) model.to('cuda') print('model to CUDA') optimizer = AdamW(model.parameters(), lr=lr, eps=1e-8) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback(data_bundle.get_dataset('test')) checkpoint_callback = CheckPointCallback(os.path.join( directory, 'model.pth'), delete_when_train_finish=False, recovery_fitlog=True) if warmup_steps > 0: warmup_callback = WarmupCallback(warmup_steps, schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, checkpoint_callback, evaluate_callback]) if not args.do_eval: trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size, sampler=BucketSampler(), num_workers=no_cpu, n_epochs=args.n_epochs, dev_data=data_bundle.get_dataset('dev'), metrics=SpanFPreRecMetric( tag_vocab=data_bundle.get_vocab('target'), encoding_type=encoding_type), dev_batch_size=batch_size, callbacks=callbacks, device=args.device, test_use_tqdm=True, use_tqdm=True, print_every=300, save_path=os.path.join(directory, 'best')) trainer.train(load_best_model=True) predictor = Predictor(model) predict(os.path.join(directory, 'predictions_dev.tsv'), data_bundle, predictor, 'dev') predict(os.path.join(directory, 'predictions_test.tsv'), data_bundle, predictor, 'test') else: print('Predicting') # predictions of multiple files torch.multiprocessing.freeze_support() model.share_memory() predictor = Predictor(model) if len(files) > multiprocessing.cpu_count(): with torch.multiprocessing.Pool(processes=no_cpu) as p: with tqdm(total=len(files)) as pbar: for i, _ in enumerate( p.imap_unordered( partial(predict, data_bundle=data_bundle, predictor=predictor, predict_on='train', do_eval=args.do_eval), files)): pbar.update() else: for file in tqdm(files): predict(file, data_bundle, predictor, 'train', args.do_eval)