logger.info("Load NLG model from path %s" % (opt.read_model_path)) ##### Training and Decoding ##### if not opt.testing: nsentences = max(len(labeled_dataset), len(unlabeled_dataset)) slot_loss_function = set_celoss_function( ignore_index=slu_vocab.slot2id[PAD]) intent_loss_function = set_celoss_function() num_training_steps = ( (nsentences + opt.batchSize - 1) // opt.batchSize) * opt.max_epoch num_warmup_steps = int(num_training_steps * opt.warmup_ratio) slu_optimizer, slu_scheduler = set_optimizer( train_model.slu_model, lr=slu_params['lr'], l2=slu_params['l2'], max_norm=slu_params['max_norm'], layerwise_decay=slu_params['layerwise_decay'], num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, lr_schedule=opt.lr_schedule) surface_loss_function = set_celoss_function( ignore_index=nlg_vocab.word2id[PAD]) slot_control_function = set_scloss_function( slot_weight=nlg_params['slot_weight']) nlg_optimizer, nlg_scheduler = set_optimizer( train_model.nlg_model, lr=nlg_params['lr'], l2=nlg_params['l2'], max_norm=nlg_params['max_norm'], lr_schedule='constant') logger.info("Training starts at %s" %
##### Model Initialization ##### if not opt.testing: word2id = vocab.sfm2id if params['surface_level'] else vocab.word2id ratio = load_embeddings(opt.dataset, train_model.word_embed, word2id, device) logger.info("%.2f%% word embeddings from pretrained vectors" % (ratio * 100)) else: model_path = os.path.join(opt.read_model_path, 'model.pkl') ckpt = torch.load(open(model_path, 'rb'), map_location=device) train_model.load_state_dict(ckpt) logger.info("Load model from path %s" % (model_path)) ##### Training and Decoding ##### if not opt.testing: word2id = vocab.sfm2id if params['surface_level'] else vocab.word2id loss_function = set_celoss_function(ignore_index=word2id[PAD]) optimizer, scheduler = set_optimizer(train_model, lr=opt.lr, l2=opt.l2, max_norm=opt.max_norm, lr_schedule='constant') logger.info("Training starts at %s" % (time.asctime(time.localtime(time.time())))) train_data_index = np.arange(len(train_dataset)) nsentences = len(train_data_index) best_result = {"losses": [], "iter": 0, "dev_ppl": float('inf'), "test_ppl": float('inf'),} for i in range(opt.max_epoch): start_time = time.time() np.random.shuffle(train_data_index) losses = [] train_model.train() for j in range(0, nsentences, opt.batchSize): optimizer.zero_grad() inputs, lens, _ = get_minibatch(train_dataset, vocab, task=task, data_index=train_data_index, index=j, batch_size=opt.batchSize, device=device, surface_level=params['surface_level']) batch_scores = train_model(inputs, lens) batch_loss = loss_function(batch_scores, inputs[:, 1:])
model_path = os.path.join(opt.read_model_path, 'model.pkl') ckpt = torch.load(open(model_path, 'rb'), map_location=device) train_model.load_state_dict(ckpt) logger.info("Load model from path %s" % (model_path)) ##### Training and Decoding ##### if not opt.testing: slot_loss_function = set_celoss_function(ignore_index=vocab.slot2id[PAD]) intent_loss_function = set_celoss_function() num_training_steps = ((len(train_dataset) + opt.batchSize - 1) // opt.batchSize) * opt.max_epoch num_warmup_steps = int(num_training_steps * opt.warmup_ratio) optimizer, scheduler = set_optimizer(train_model, lr=opt.lr, l2=opt.l2, max_norm=opt.max_norm, layerwise_decay=opt.layerwise_decay, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, lr_schedule=opt.lr_schedule) logger.info("Training starts at %s" % (time.asctime(time.localtime(time.time())))) train_data_index = np.arange(len(train_dataset)) nsentences, coefficient = len(train_data_index), 0.5 best_result = { "losses": [], "iter": 0, "dev_slot": 0., "dev_intent": 0., "test_slot": 0., "test_intent": 0., }
all_hyps = [] with torch.no_grad(): for i in range(0, len(dataset), args.batch_size): current_batch = Batch.from_example_list(dataset[i: i + args.batch_size], device, train=False) hyps = model.parse(current_batch, args.beam_size) all_hyps.extend(hyps) acc = evaluator.acc(all_hyps, dataset, output_path, acc_type=acc_type, etype='match', use_checker=use_checker) torch.cuda.empty_cache() gc.collect() return acc if not args.testing: num_training_steps = ((len(train_dataset) + args.batch_size - 1) // args.batch_size) * args.max_epoch num_warmup_steps = int(num_training_steps * args.warmup_ratio) logger.info('Total training steps: %d;\t Warmup steps: %d' % (num_training_steps, num_warmup_steps)) optimizer, scheduler = set_optimizer(model, args, num_warmup_steps, num_training_steps) start_epoch, nsamples, best_result = 0, len(train_dataset), {'dev_acc': 0.} train_index, step_size = np.arange(nsamples), args.batch_size // args.grad_accumulate if args.read_model_path and args.load_optimizer: optimizer.load_state_dict(check_point['optim']) scheduler.load_state_dict(check_point['scheduler']) start_epoch = check_point['epoch'] + 1 logger.info('Start training ......') for i in range(start_epoch, args.max_epoch): start_time = time.time() epoch_loss, epoch_gp_loss, count = 0, 0, 0 np.random.shuffle(train_index) model.train() for j in range(0, nsamples, step_size): count += 1 cur_dataset = [train_dataset[k] for k in train_index[j: j + step_size]]