import torch from torch import optim from torch.optim import lr_scheduler as opt_sched DEVICE = torch.device('cuda') dataset = Dataset() model = Model(dataset).to(DEVICE) inference = BeamSearch(10, dataset.make_batch, DEVICE) opt = optim.Adam(model.parameters(), 3e-4) sched = opt_sched.ReduceLROnPlateau(opt, factor=0.5, verbose=True) i = 0 running_loss = 0 for i in range(100000): batch = dataset.get_batch().to(DEVICE) loss = model(batch) opt.zero_grad() loss.backward() opt.step() i += 1 running_loss += loss.item() if (i + 1) % 100 == 0: print(i + 1) print(running_loss) sched.step(running_loss) running_loss = 0 dec_batch = next(batch.explode()) enc = model.encoder(dec_batch) enc_flipped = model.encoder(
( test_tids, test_tokens, test_labels, test_lens ) = test_set.get_dataset(max_seq_len, volatile=True, gpu=use_gpu) best_dev_fscore = 0.0 best_test_scores = None for epoch in range(max_epoch): epoch_start_time = current_time() epoch_loss = 0.0 train_set.shuffle_dataset(target_label, balance=True) batch_num = train_set.batch_num(batch_size) for batch_idx in range(batch_num): optimizer.zero_grad() ( batch_tids, batch_tokens, batch_labels, batch_lens ) = train_set.get_batch(batch_size, gpu=use_gpu) model_output = model.forward(batch_tokens, batch_lens) loss = loss_func.forward(model_output, batch_labels) loss.backward() optimizer.step() epoch_loss += 1.0 / batch_num * float(loss) epoch_elapsed_time = current_time() - epoch_start_time # Evaluate the current model on dev and test sets dev_preds = model.forward(dev_tokens, dev_lens) dev_scores = _calc_scores(dev_preds, dev_labels) test_preds = model.forward(test_tokens, test_lens) test_scores = _calc_scores(test_preds, test_labels) # Output score logger.info('[{}] Epoch {:<3} [{}ms]: {:.4f} | P: {:<5.2f} '