def tester(self): self.RNN.eval() best_paths = [] real_labels = [] for data, labels, lengths in self.data_loader: data, labels, lengths = util.sort_batch(data, labels, lengths) data = data[:lengths] labels = labels[:lengths] path_score, best_path = self.RNN.get_tags(data.to(device), lengths.to(device)) best_paths.append(best_path) real_labels.append(labels) num_correct = 0 num_comparison = 0 for pred, real in zip(best_paths, real_labels): real = real.squeeze(1) num_correct += np.sum(real.numpy() == np.asarray(pred)) num_comparison += len(real.numpy()) print("Accuracy : ", num_correct / float(num_comparison))
def tester(self): self.RNN.eval() num_correct = 0. num_comparison = 0. best_paths = [] real_labels = [] for data, labels, lengths in self.data_loader: data, labels, lengths = util.sort_batch(data, labels, lengths) path_score, best_path = self.RNN.get_tags(data.to(device), lengths.to(device)) best_path = np.concatenate(best_path, 1) for i, lens in enumerate(lengths): best_paths.append(best_path[i][:lens]) real_labels.append(labels[:, i][:lens]) for pred, real in zip(best_paths, real_labels): num_correct += np.sum(real.numpy() == np.asarray(pred)) num_comparison += len(real.numpy()) print("Test Accuracy : ", num_correct / float(num_comparison))
def trainer(self): steps = 0 loss_deque = deque(maxlen=100) train_loss = [] last_epoch = 0 if self.opts.resume: last_epoch, loss = self.load_progress() for e in range(self.opts.epoch - last_epoch): '''Adaptive LR Change''' for param_group in self.RNN_optim.param_groups: param_group['lr'] = util.linear_LR(e, self.opts) print('epoch: {}, RNN_LR: {:.4}'.format(e, param_group['lr'])) if self.opts.save_progress: '''Save the progress before start adjusting the LR''' if e == self.opts.const_epoch: self.save_progress(self.opts.const_epoch, np.mean(loss_deque)) if e % self.opts.save_every == 0: self.save_progress(e, np.mean(loss_deque)) for data, labels, lengths in self.data_loader: steps += 1 data, labels, lengths = util.sort_batch(data, labels, lengths) #data = data[:lengths] #labels = labels[:lengths] self.RNN_optim.zero_grad() loss = self.RNN(data.to(device), labels.to(device), lengths.to(device)) loss.backward() self.RNN_optim.step() loss_deque.append(loss.cpu().item()) train_loss.append(np.mean(loss_deque)) if steps % self.opts.print_every == 0: print('Epoch: {}, Steps: {}, Loss: {:.4}'.format( e, steps, loss.item())) util.raw_score_plotter(train_loss) if self.opts.save_progress: '''Save the progress before start adjusting the LR''' self.save_progress(-1, np.mean(loss_deque)) util.raw_score_plotter(train_loss)
def tester(self): _, _ = self.load_progress() confusion = torch.zeros(self.opts.num_classes, self.opts.num_classes) #label to be fed into confusion matrix plot. pred_list = [] labels_list = [] y_label = np.arange(self.opts.num_classes) y_label = [str(e) for e in y_label] #eval mode to stop dropout self.RNN.eval() #used for overall accuracy correct = 0 total = 0 for data, label, lengths in self.data_loader: data, label, lengths = util.sort_batch(data, label, lengths) #run the data through RNN pred = self.RNN(data, lengths) #pick the argmax output = torch.max(pred, 1)[1] for output, label in zip(output, label): pred_list.append(output.cpu().item()) labels_list.append(label.item()) if output.cpu().item() == label.item(): correct += 1 total += 1 confusionMatrix.plot_confusion_matrix(np.array(labels_list, dtype=np.int), np.array(pred_list, dtype=np.int), np.array(y_label), title="ConfusionMatrix") plt.show() print("Test Accuracy", correct / float(total))
def trainer(opts, RNN, RNN_optim, criterion, data_loader): steps = 0 for e in range(opts.epoch): for data, labels, lengths in data_loader: steps += 1 data, labels, lengths = util.sort_batch(data, labels, lengths) RNN_optim.zero_grad() pred = RNN(data, lengths) loss = criterion(pred, labels.to(device)) loss.backward() RNN_optim.step() loss_deque.append(loss.cpu().item()) train_loss.append(np.mean(loss_deque)) if steps % opts.print_every == 0: print('Epoch: {}, Steps: {}, Loss: {:.4},'.format( e, steps, loss.item())) util.raw_score_plotter(train_loss)
def train(epoch): model.train() opt.epoch_best_score = -float("inf") opt.epoch_best_name = None for batch_idx, batch in enumerate(train_iter, start=1): batch = sort_batch(batch) src_raw = batch[0] trg_raw = batch[1] src, src_mask = convert_data( src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS ) f_trg, f_trg_mask = convert_data( trg_raw, trg_vocab, device, False, UNK, PAD, SOS, EOS ) b_trg, b_trg_mask = convert_data( trg_raw, trg_vocab, device, True, UNK, PAD, SOS, EOS ) optimizer.zero_grad() if opt.cuda and torch.cuda.device_count() > 1 and opt.local_rank is None: loss, w_loss = nn.parallel.data_parallel( model, (src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask), device_ids ) else: loss, w_loss = model(src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask) global_batches = len(train_iter) * epoch + current_batches writer.add_scalar( "./loss", scalar_value=loss.item(), global_step=global_batches, ) loss.mean().backward() torch.nn.utils.clip_grad_norm_(param_list, opt.grad_clip) optimizer.step() if batch_idx % 10 == 0 or batch_idx == len(train_iter) or batch_idx == 0: logger.info( str( "Epoch: {} batch: {}/{}({:.3%}), loss: {:.6}, lr: {}".format( epoch, batch_idx, len(train_iter), batch_idx / len(train_iter), loss.item(), opt.cur_lr, ) ) ) # validation if batch_idx % opt.vfreq == 0: logger.info(str("===========validation / test START===========")) evaluate(batch_idx, epoch) model.train() if opt.decay_lr: adjust_learningrate(opt.score_list) if len(opt.score_list) == 1 or opt.score_list[-1][0] > max( [x[0] for x in opt.score_list[:-1]] ): if opt.best_name is not None: os.remove(os.path.join(opt.checkpoint, opt.best_name)) opt.best_name = save_model(model, batch_idx, epoch, "best") if opt.epoch_best and opt.score_list[-1][0] > opt.epoch_best_score: opt.epoch_best_score = opt.score_list[-1][0] if opt.epoch_best_name is not None: os.remove(os.path.join(opt.checkpoint, opt.epoch_best_name)) opt.epoch_best_name = save_model(model, batch_idx, epoch, "epoch-best") logger.info("===========validation / test DONE===========") # sampling if batch_idx % opt.sfreq == 0: length = len(src_raw) ix = np.random.randint(0, length) samp_src_raw = [src_raw[ix]] samp_trg_raw = [trg_raw[ix]] samp_src, samp_src_mask = convert_data( samp_src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS ) model.eval() with torch.no_grad(): output = model.beamsearch(samp_src, samp_src_mask, opt.beam_size) best_hyp, best_score = output[0] best_hyp = convert_str([best_hyp], trg_vocab) sampling_result = [] sampling_result.append(["Key", "Value"]) sampling_result.append(["Source", str(" ".join(samp_src_raw[0]))]) sampling_result.append(["Target", str(" ".join(samp_trg_raw[0]))]) sampling_result.append(["Predict", str(" ".join(best_hyp[0]))]) sampling_result.append(["Best Score", str(round(best_score, 5))]) sampling_table = AsciiTable(sampling_result) logger.info("===========sampling START===========") logger.info("\n" + str(sampling_table.table)) logger.info("===========sampling DONE===========") model.train() # saving model if opt.freq and batch_idx % opt.freq == 0: if opt.tmp_name is not None: os.remove(os.path.join(opt.checkpoint, opt.tmp_name)) opt.tmp_name = save_model(model, batch_idx, epoch, "tmp")
def trainer(self): steps = 0 correct = 0 total = 0 loss_deque = deque(maxlen=100) train_loss = [] last_epoch = 0 if self.opts.resume: last_epoch, loss = self.load_progress() for e in range(self.opts.epoch - last_epoch): '''Adaptive LR Change''' for param_group in self.RNN_optim.param_groups: param_group['lr'] = util.linear_LR(e, self.opts) print('epoch: {}, RNN_LR: {:.4}'.format(e, param_group['lr'])) if self.opts.save_progress: '''Save the progress before start adjusting the LR''' if e == self.opts.const_epoch: self.save_progress(self.opts.const_epoch, np.mean(loss_deque)) if e % self.opts.save_every == 0: self.save_progress(e, np.mean(loss_deque)) for data, labels, lengths in self.data_loader: steps += 1 data, labels, lengths = util.sort_batch(data, labels, lengths) self.RNN_optim.zero_grad() pred = self.RNN(data, lengths) loss = self.criterion(pred, labels.to(device)) loss.backward() self.RNN_optim.step() # pick the argmax output = torch.max(pred, 1)[1] for output, label in zip(output, labels): if output.cpu().item() == label.item(): correct += 1 total += 1 loss_deque.append(loss.cpu().item()) train_loss.append(np.mean(loss_deque)) if steps % self.opts.print_every == 0: print( 'Epoch: {}, Steps: {}, Loss: {:.4}, Train Accuracy {:4}, ' .format(e, steps, loss.item(), correct / float(total))) correct = 0 total = 0 util.raw_score_plotter(train_loss) if self.opts.save_progress: '''Save the progress before start adjusting the LR''' self.save_progress(-1, np.mean(loss_deque)) util.raw_score_plotter(train_loss)
# print(val_dataset.batch_size) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.cuda.set_device(0) model = emoModel.EmoGRU(vocab_inp_size, embedding_dim, units, BATCH_SIZE, target_size) # model.to(device) # obtain one sample from the data iterator it = iter(train_dataset) x, y, x_len = next(it) # sort the batch first to be able to use with pac_pack sequence xs, ys, lens = util.sort_batch(x, y, x_len) print("Input size: ", xs.size()) output, _ = model(xs) print(output.size()) model = emoModel.EmoGRU(vocab_inp_size, embedding_dim, units, BATCH_SIZE, target_size) # # model.to(device) ### loss criterion and optimizer for training criterion = nn.CrossEntropyLoss() # the same as log_softmax + NLLLoss optimizer = torch.optim.Adam(model.parameters()) EPOCHS = 1
def train(epoch): model.train() opt.epoch_best_score = -float('inf') opt.epoch_best_name = None for batch_idx, batch in enumerate(train_iter, start=1): start_time = time.time() batch = sort_batch(batch) src_raw = batch[0] trg_raw = batch[1] src, src_mask = convert_data(src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS) f_trg, f_trg_mask = convert_data(trg_raw, trg_vocab, device, False, UNK, PAD, SOS, EOS) b_trg, b_trg_mask = convert_data(trg_raw, trg_vocab, device, True, UNK, PAD, SOS, EOS) optimizer.zero_grad() if opt.cuda and torch.cuda.device_count( ) > 1 and opt.local_rank is None: R = nn.parallel.data_parallel( model, (src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask), device_ids) else: R = model(src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask) R[0].mean().backward() grad_norm = torch.nn.utils.clip_grad_norm_(param_list, opt.grad_clip) optimizer.step() elapsed = time.time() - start_time R = map(lambda x: str(x.mean().item()), R) print(epoch, batch_idx, len(train_iter), 100. * batch_idx / len(train_iter), ' '.join(R), grad_norm.item(), opt.cur_lr, elapsed) # validation if batch_idx % opt.vfreq == 0: evaluate(batch_idx, epoch) model.train() if opt.decay_lr: adjust_learningrate(opt.score_list) if len(opt.score_list) == 1 or \ opt.score_list[-1][0] > max(map(lambda x: x[0], opt.score_list[:-1])): if opt.best_name is not None: os.remove(os.path.join(opt.checkpoint, opt.best_name)) opt.best_name = save_model(model, batch_idx, epoch, 'best') if opt.epoch_best and opt.score_list[-1][0] > opt.epoch_best_score: opt.epoch_best_score = opt.score_list[-1][0] if opt.epoch_best_name is not None: os.remove(os.path.join(opt.checkpoint, opt.epoch_best_name)) opt.epoch_best_name = save_model(model, batch_idx, epoch, 'epoch-best') # sampling if batch_idx % opt.sfreq == 0: length = len(src_raw) ix = np.random.randint(0, length) samp_src_raw = [src_raw[ix]] samp_trg_raw = [trg_raw[ix]] samp_src, samp_src_mask = convert_data(samp_src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS) model.eval() with torch.no_grad(): output = model.beamsearch(samp_src, samp_src_mask, opt.beam_size) best_hyp, best_score = output[0] best_hyp = convert_str([best_hyp], trg_vocab) print('--', ' '.join(samp_src_raw[0])) print('--', ' '.join(samp_trg_raw[0])) print('--', ' '.join(best_hyp[0])) print('--', best_score) model.train() # saving model if opt.freq and batch_idx % opt.freq == 0: if opt.tmp_name is not None: os.remove(os.path.join(opt.checkpoint, opt.tmp_name)) opt.tmp_name = save_model(model, batch_idx, epoch, 'tmp')