def test(args): tokenizer, vocab2id, id2vocab = bert_tokenizer() detokenizer = bert_detokenizer() data_path = os.path.join(args.data_path, args.dataset + '/') dev_samples = torch.load(os.path.join(data_path, args.dataset + '.dev.pkl')) if len(dev_samples) > 10: dev_dataset = GTTPDataset(dev_samples, None, None, None, None, None,None, None, sample_tensor=torch.load(os.path.join(data_path, args.dataset+ '.dev.GTTP.dataset.pkl'))) test_samples = torch.load(os.path.join(data_path, args.dataset + '.test.pkl')) if len(test_samples) > 10: test_dataset = GTTPDataset(test_samples, None, None, None, None, None,None, None, sample_tensor=torch.load(os.path.join(data_path, args.dataset+ '.test.GTTP.dataset.pkl'))) for i in range(args.epoch): print('epoch', i) file = args.output_path + 'model/' + str(i) + '.pkl' if os.path.exists(file): model = GTTP(args.embedding_size, args.hidden_size, vocab2id, id2vocab, max_dec_len=args.max_target_length, beam_width=1) model.load_state_dict(torch.load(file, map_location='cpu')) trainer = CumulativeTrainer(model, tokenizer, detokenizer, args.local_rank, args.num_gpu) if dev_dataset: predictions=trainer.predict('test', dev_dataset, collate_fn, args.batch_size) save_result(predictions, dev_dataset, model.to_sentence, detokenizer, args.output_path, args.local_rank, i, args.dataset+'_dev') if test_dataset: predictions =trainer.predict('test', test_dataset, collate_fn, args.batch_size) save_result(predictions, test_dataset, model.to_sentence, detokenizer, args.output_path, args.local_rank, i, args.dataset+'_test')
def train(args): batch_size = 16 output_path = base_output_path dataset = args.dataset data_path = args.data_path + dataset + '/' + dataset tokenizer, vocab2id, id2vocab = bert_tokenizer() detokenizer = bert_detokenizer() train_samples = torch.load(data_path + '.pkl') marco_train_size = len(train_samples) train_dataset = GLKSDataset(train_samples, None, None, None, None, None, None, None, None, sample_tensor=torch.load(data_path + '.GLKS.dataset.pkl')) model = GLKS(min_window_size, num_windows, embedding_size, hidden_size, vocab2id, id2vocab, max_dec_len=max_target_length, beam_width=1, emb_matrix=None) init_params(model) model_bp_count = (epoch * marco_train_size) / (4 * batch_size * accumulation_steps) model_optimizer = optim.Adam(model.parameters(), lr=2.5e-4) model_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( model_optimizer, 2000, int(model_bp_count) + 100) model_trainer = CumulativeTrainer(model, tokenizer, detokenizer, args.local_rank, 4, accumulation_steps=accumulation_steps) for i in range(epoch): model_trainer.train_epoch('ds_mle_mce_train', train_dataset, collate_fn, batch_size, i, model_optimizer, model_scheduler) model_trainer.serialize(i, output_path=output_path)
def train(args): tokenizer, vocab2id, id2vocab = bert_tokenizer() detokenizer = bert_detokenizer() data_path = os.path.join(args.data_path, args.dataset + '/') train_samples = torch.load( os.path.join(data_path, args.dataset + '.train.pkl')) train_size = len(train_samples) train_dataset = CaSEDataset( train_samples, None, None, None, None, None, None, None, None, None, None, sample_tensor=torch.load( os.path.join(data_path, args.dataset + '.train.CaSE.dataset.pkl'))) model = CaSE(args.max_span_size, args.max_target_length, id2vocab, vocab2id, args.hidden_size) init_params(model) model_bp_count = (args.epoch * train_size) / ( args.num_gpu * args.batch_size * args.accumulation_steps) model_optimizer = optim.Adam(model.parameters(), lr=2.5e-4) model_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( model_optimizer, 2000, int(model_bp_count) + 100) model_trainer = CumulativeTrainer( model, tokenizer, detokenizer, args.local_rank, args.num_gpu, accumulation_steps=args.accumulation_steps) for i in range(args.epoch): model_trainer.train_epoch('train', train_dataset, collate_fn, args.batch_size, i, model_optimizer, model_scheduler) model_trainer.serialize(i, output_path=args.output_path)
def test(args): batch_size = 16 output_path = base_output_path dataset = args.dataset data_path = args.data_path + dataset + '/' + dataset tokenizer, vocab2id, id2vocab = bert_tokenizer() detokenizer = bert_detokenizer() marco_dev_samples = torch.load(data_path + 'marco/marco.dev.pkl') marco_dev_dataset = S2SADataset( marco_dev_samples, None, None, None, None, None, None, None, None, sample_tensor=torch.load(data_path + 'marco/marco.dev.S2SA.dataset.pkl')) marco_test_samples = torch.load(data_path + 'marco/marco.test.pkl') marco_test_dataset = S2SADataset( marco_test_samples, None, None, None, None, None, None, None, None, sample_tensor=torch.load(data_path + 'marco/marco.test.S2SA.dataset.pkl')) cast_test_samples = torch.load(data_path + '.pkl') cast_test_dataset = S2SADataset( cast_test_samples, None, None, None, None, None, None, None, None, sample_tensor=torch.load(data_path + '.S2SA.dataset.pkl')) for i in range(epoch): print('epoch', i) file = output_path + 'model/' + str(i) + '.pkl' if os.path.exists(file): model = S2SA(embedding_size, hidden_size, vocab2id, id2vocab, max_dec_len=max_target_length, beam_width=1) model.load_state_dict(torch.load(file, map_location='cpu')) trainer = CumulativeTrainer(model, tokenizer, detokenizer, args.local_rank, 4) predictions = trainer.predict('test', marco_dev_dataset, collate_fn, batch_size) save_result(predictions, marco_dev_dataset, model.to_sentence, detokenizer, output_path, args.local_rank, i, 'marco_dev') predictions = trainer.predict('test', marco_test_dataset, collate_fn, batch_size) save_result(predictions, marco_test_dataset, model.to_sentence, detokenizer, output_path, args.local_rank, i, 'marco_test') predictions = trainer.predict('test', cast_test_dataset, collate_fn, batch_size) save_result(predictions, cast_test_dataset, model.to_sentence, detokenizer, output_path, args.local_rank, i, 'cast_test')
def test(args): batch_size = args.test_batch_size ratio = args.profile_dropout_ratio policy = args.neighbor_policy task_dir = '%s/%s-%s' % (src, task, policy) drop_attr = '' if args.keep_attributes is not None: for k in _keys: if k not in args.keep_attributes: drop_attr += '_%s' % k _, _, _, kb_vocab = torch.load('%s/kbs.pkl' % task_dir) candidates = torch.load('%s/candidates.pkl' % task_dir) candidate_tensor = torch.load('%s/candidate.ctds.pkl' % task_dir) train_samples = torch.load('%s/train.pkl' % task_dir) dev_samples = torch.load('%s/dev.pkl' % task_dir) test_samples = torch.load('%s/test.pkl' % task_dir) meta_data = torch.load('%s/meta.pkl' % task_dir) vocab2id, id2vocab = torch.load('%s/vocab.pkl' % task_dir) tokenizer = babi_tokenizer print('Item size', len(vocab2id)) train_sample_tensor = torch.load('%s/train.ctds-%s%s.pkl' % (task_dir, ratio, drop_attr)) dev_sample_tensor = torch.load('%s/dev.ctds-%s%s.pkl' % (task_dir, ratio, drop_attr)) test_sample_tensor = torch.load('%s/test.ctds-%s%s.pkl' % (task_dir, ratio, drop_attr)) dev_dataset = CTDSDataset(dev_samples[:cut_data_index], candidates, meta_data, tokenizer, vocab2id, id2vocab, sample_tensor=dev_sample_tensor[:cut_data_index], train_sample_tensor=train_sample_tensor) test_dataset = CTDSDataset( test_samples[:cut_data_index], candidates, meta_data, tokenizer, vocab2id, id2vocab, sample_tensor=test_sample_tensor[:cut_data_index], train_sample_tensor=train_sample_tensor) for i in range(args.infer_epoch_start, epoches): print('epoch', i) file = os.path.join(output_model_path, str(i) + '.pkl') if os.path.exists(file): model = CTDS(hidden_size, vocab2id, id2vocab, candidate_tensor, meta_data) model.load_state_dict(torch.load(file, map_location='cpu')) model_trainer = CumulativeTrainer( model, tokenizer, None, args.local_rank, 4, accumulation_steps=accumulation_steps, max_grad_norm=args.max_grad_norm, save_data_attributes=save_data_attributes) # Dev infer # dev_list_output[0][1].tolist() dev_list_output = model_trainer.predict('infer', dev_dataset, collate_fn, batch_size) #save result, note each GPU process will save a separate file # save_dev = [[batch_data[0][0], batch_data[0][1], batch_data[0][2], batch_data[1][0], batch_data[1][1]] for batch_data in dev_list_output] # save_dev = [[batch_data[0]['response_id'], batch_data[1]] for batch_data in dev_list_output] torch.save( dev_list_output, os.path.join(output_result_path, 'dev.%s.%s' % (i, args.local_rank))) # Test infer test_list_output = model_trainer.predict('infer', test_dataset, collate_fn, batch_size) # save_test = [[batch_data[0][0], batch_data[0][1], batch_data[1][0], batch_data[1][1]] for batch_data in test_list_output] # save_test = [[batch_data[0]['response_id'], batch_data[1]] for batch_data in test_list_output] torch.save( test_list_output, os.path.join(output_result_path, 'test.%s.%s' % (i, args.local_rank)))
def train(args): batch_size = args.train_batch_size ratio = args.profile_dropout_ratio policy = args.neighbor_policy task_dir = '%s/%s-%s' % (src, task, policy) drop_attr = '' if args.keep_attributes is not None: for k in _keys: if k not in args.keep_attributes: drop_attr += '_%s' % k _, _, _, kb_vocab = torch.load('%s/kbs.pkl' % task_dir) candidates = torch.load('%s/candidates.pkl' % task_dir) candidate_tensor = torch.load('%s/candidate.ctds.pkl' % task_dir) # candidate_tensor = candidate_tensor.cuda() if torch.cuda.is_available() else candidate_tensor train_samples = torch.load('%s/train.pkl' % task_dir) train_sample_tensor = torch.load('%s/train.ctds-%s%s.pkl' % (task_dir, ratio, drop_attr)) meta_data = torch.load('%s/meta.pkl' % task_dir) vocab2id, id2vocab = torch.load('%s/vocab.pkl' % task_dir) tokenizer = babi_tokenizer print('Item size', len(vocab2id)) train_dataset = CTDSDataset( train_samples[:cut_data_index], candidates, meta_data, tokenizer, vocab2id, id2vocab, sample_tensor=train_sample_tensor[:cut_data_index], train_sample_tensor=train_sample_tensor) if args.train_epoch_start > 0: # load a model and continue to train file = os.path.join(output_model_path, str(args.train_epoch_start) + '.pkl') if os.path.exists(file): model = CTDS(hidden_size, vocab2id, id2vocab, candidate_tensor, meta_data) model.load_state_dict(torch.load(file, map_location='cpu')) else: print('ERR: do not have %s' % args.train_epoch_start) else: model = CTDS(hidden_size, vocab2id, id2vocab, candidate_tensor, meta_data) init_params(model) train_size = len(train_dataset) model_bp_count = (epoches * train_size) / ( 4 * batch_size * accumulation_steps) # global_batch_step model_optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # model_optimizer = optim.Adam(model.parameters(), lr=args.lr) # model_optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, nesterov=True) if args.warmup > 0: model_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( model_optimizer, round(args.warmup * model_bp_count), int(model_bp_count) + 100) else: model_scheduler = None model_trainer = CumulativeTrainer( model, tokenizer, None, args.local_rank, 4, accumulation_steps=accumulation_steps, max_grad_norm=args.max_grad_norm, save_data_attributes=save_data_attributes) for i in range(args.train_epoch_start, epoches): model_trainer.train_epoch('train', train_dataset, collate_fn, batch_size, i, model_optimizer, model_scheduler) model_trainer.serialize(i, output_path=output_model_path)