print_info('see_convergence = True') print_info('so just test train acc|f1') datasets['train'] = datasets['train'][:100] if args.optim == 'adam': optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optim == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) trainer = Trainer(datasets['train'], model, optimizer, loss, args.batch, n_epochs=args.epoch, dev_data=datasets['train'], metrics=metrics, device=device, dev_batch_size=args.test_batch) trainer.train() exit(1208) bigram_embedding_param = list(model.bigram_embed.parameters()) gaz_embedding_param = list(model.lattice_embed.parameters()) embedding_param = bigram_embedding_param if args.lattice: gaz_embedding_param = list(model.lattice_embed.parameters()) embedding_param = embedding_param + gaz_embedding_param embedding_param_ids = list(map(id, embedding_param))
elif arg.task == 'mnli': callbacks.append( FitlogCallback( { 'dev_matched': data_info.datasets['dev_matched'], 'dev_mismatched': data_info.datasets['dev_mismatched'] }, verbose=1)) trainer = Trainer(train_data=data_info.datasets['train'], model=model, optimizer=optimizer, num_workers=0, batch_size=arg.batch_size, n_epochs=arg.n_epochs, print_every=-1, dev_data=data_info.datasets[arg.devset_name], metrics=AccuracyMetric(pred="pred", target="target"), metric_key='acc', device=[i for i in range(torch.cuda.device_count())], check_code_level=-1, callbacks=callbacks, loss=CrossEntropyLoss(pred="pred", target="target")) trainer.train(load_best_model=True) tester = Tester( data=data_info.datasets[arg.testset_name], model=model, metrics=AccuracyMetric(), batch_size=arg.batch_size, device=[i for i in range(torch.cuda.device_count())], )
] if arg.task in ['snli']: callbacks.append( EvaluateCallback(data=data_bundle.datasets[arg.test_dataset_name])) # evaluate test set in every epoch if task is snli. # define trainer trainer = Trainer(train_data=data_bundle.datasets[arg.train_dataset_name], model=model, optimizer=optimizer, loss=CrossEntropyLoss(), batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu, n_epochs=arg.n_epochs, print_every=-1, dev_data=data_bundle.datasets[arg.dev_dataset_name], metrics=AccuracyMetric(), metric_key='acc', device=[i for i in range(torch.cuda.device_count())], check_code_level=-1, save_path=arg.save_path, callbacks=callbacks) # train model trainer.train(load_best_model=True) # define tester tester = Tester( data=data_bundle.datasets[arg.test_dataset_name], model=model,
paths='path/to/quora/data', to_lower=True, seq_len_type=arg.seq_len_type, bert_tokenizer=arg.bert_dir, cut_text=512, get_index=True, concat='bert', ) else: raise RuntimeError(f'NOT support {arg.task} task yet!') # define model model = BertForNLI(class_num=len(data_info.vocabs[Const.TARGET]), bert_dir=arg.bert_dir) # define trainer trainer = Trainer(train_data=data_info.datasets[arg.train_dataset_name], model=model, optimizer=Adam(lr=arg.lr, model_params=model.parameters()), batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu, n_epochs=arg.n_epochs, print_every=-1, dev_data=data_info.datasets[arg.dev_dataset_name], metrics=AccuracyMetric(), metric_key='acc', device=[i for i in range(torch.cuda.device_count())], check_code_level=-1, save_path=arg.save_path) # train model trainer.train(load_best_model=True) # define tester tester = Tester( data=data_info.datasets[arg.test_dataset_name], model=model, metrics=AccuracyMetric(), batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu, device=[i for i in range(torch.cuda.device_count())],
model = CNTNModel(embedding, ns=arg.cntn_ns, k_top=arg.cntn_k_top, num_labels=num_labels, depth=arg.cntn_depth, r=arg.cntn_r) print(model) # define trainer trainer = Trainer(train_data=data_bundle.datasets['train'], model=model, optimizer=Adam(lr=arg.lr, model_params=model.parameters()), loss=CrossEntropyLoss(), batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu, n_epochs=arg.n_epochs, print_every=-1, dev_data=data_bundle.datasets[dev_dict[arg.dataset]], metrics=AccuracyMetric(), metric_key='acc', device=[i for i in range(torch.cuda.device_count())], check_code_level=-1) # train model trainer.train(load_best_model=True) # define tester tester = Tester(data=data_bundle.datasets[test_dict[arg.dataset]], model=model, metrics=AccuracyMetric(), batch_size=torch.cuda.device_count() * arg.batch_size_per_gpu,