def valid(valid_loader, model, logger): model.eval() losses = AverageMeter() # Batches for data in valid_loader: # Move to GPU, if available padded_input, padded_target, input_lengths = data padded_input = padded_input.to(Config.device) padded_target = padded_target.to(Config.device) input_lengths = input_lengths.to(Config.device) with torch.no_grad(): # Forward prop. pred, gold = model(padded_input, input_lengths, padded_target) loss, n_correct = cal_performance(pred, gold, smoothing=args.label_smoothing) try: assert (not math.isnan(loss.item())) except AssertionError: print('n_correct: ' + str(n_correct)) print('data: ' + str(n_correct)) continue # Keep track of metrics losses.update(loss.item()) # Print status logger.info('\nValidation Loss {loss.val:.5f} ({loss.avg:.5f})\n'.format( loss=losses)) return losses.avg
def valid(valid_loader, model, logger): model.eval() losses = AverageMeter() # Batches for data in tqdm(valid_loader): # Move to GPU, if available padded_input, padded_target, input_lengths = data padded_input = padded_input.to(device) padded_target = padded_target.to(device) input_lengths = input_lengths.to(device) with torch.no_grad(): # Forward prop. pred, gold = model(padded_input, input_lengths, padded_target) loss, n_correct = cal_performance(pred, gold, smoothing=args.label_smoothing) # Keep track of metrics losses.update(loss.item()) # Print status logger.info('\nValidation Loss {loss.val:.5f} ({loss.avg:.5f})\n'.format( loss=losses)) return losses.avg
def train(train_loader, model, optimizer, epoch, logger, writer): model.train() # train mode (dropout and batchnorm is used) losses = AverageMeter() times = AverageMeter() start = time.time() # Batches for i, (data) in enumerate(train_loader): # Move to GPU, if available padded_input, padded_target, input_lengths = data padded_input = padded_input.to(Config.device) padded_target = padded_target.to(Config.device) input_lengths = input_lengths.to(Config.device) # Forward prop. pred, gold = model(padded_input, input_lengths, padded_target) loss, n_correct = cal_performance(pred, gold, smoothing=args.label_smoothing) try: assert (not math.isnan(loss.item())) except AssertionError: print('n_correct: ' + str(n_correct)) print('data: ' + str(n_correct)) continue # Back prop. optimizer.zero_grad() loss.backward() # Clip gradients clip_gradient(optimizer.optimizer, Config.grad_clip) # Update weights optimizer.step() # Keep track of metrics elapsed = time.time() - start start = time.time() losses.update(loss.item()) times.update(elapsed) # Print status if i % Config.print_freq == 0: logger.info('Epoch: [{0}][{1}/{2}]\t' 'Batch time {time.val:.5f} ({time.avg:.5f})\t' 'Loss {loss.val:.5f} ({loss.avg:.5f})'.format(epoch, i, len(train_loader), time=times, loss=losses)) writer.add_scalar('step_num/train_loss', losses.avg, optimizer.step_num) writer.add_scalar('step_num/learning_rate', optimizer.lr, optimizer.step_num) return losses.avg
def _run_one_epoch(self, cross_valid=False): total_loss = 0 data_loader = self.tr_loader if not cross_valid else self.cv_loader for i, (data) in enumerate(data_loader): padded_input, input_lengths, padded_target = data padded_input = padded_input.cuda() input_lengths = input_lengths.cuda() padded_target = padded_target.cuda() pred, gold = self.model(padded_input, input_lengths, padded_target) loss, n_correct = cal_performance(pred, gold, smoothing=self.label_smoothing) if not cross_valid: self.optimizer.zero_grad() loss.backward() self.optimizer.step() total_loss += loss.item() non_pad_mask = gold.ne(IGNORE_ID) n_word = non_pad_mask.sum().item() return total_loss / (i + 1)
def train(train_loader, model, optimizer, epoch, logger): model.train() # train mode (dropout and batchnorm is used) losses = AverageMeter() # Batches for i, (data) in enumerate(train_loader): # Move to GPU, if available padded_input, padded_target, input_lengths = data padded_input = padded_input.to(device) padded_target = padded_target.to(device) input_lengths = input_lengths.to(device) # Forward prop. pred, gold = model(padded_input, input_lengths, padded_target) loss, n_correct = cal_performance(pred, gold, smoothing=args.label_smoothing) # Back prop. optimizer.zero_grad() loss.backward() # Update weights optimizer.step() # Keep track of metrics losses.update(loss.item()) # Print status if i % print_freq == 0: logger.info('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.5f} ({loss.avg:.5f})'.format( epoch, i, len(train_loader), loss=losses)) return losses.avg
def train(model, total_batch_size, queue, optimizer, device, train_begin, train_loader_count, print_batch=5, teacher_forcing_ratio=1): total_loss = 0. total_num = 0 total_dist = 0 total_length = 0 total_sent_num = 0 batch = 0 model.train() logger.info('train() start') begin = epoch_begin = time.time() while True: if queue.empty(): logger.debug('queue is empty') feats, scripts, feat_lengths, script_lengths = queue.get() #print("///////////////////////////////////////////") #print("aaaaaaaaaaaaaaaaaaaaaaaaaaaa") #print(feat_lengths) #print(script_lengths) if feats.shape[0] == 0: # empty feats means closing one loader train_loader_count -= 1 logger.debug('left train_loader: %d' % (train_loader_count)) if train_loader_count == 0: break else: continue optimizer.zero_grad() feats = feats.to(device) scripts = scripts.to(device) sh = scripts.shape scripts[:, :sh[1] - 1] = scripts[:, 1:] ####################################### # feats, scripts, feat_lengths # print("feats") # print(feats.shape) # print(feats) # print("feat_lengths") # print(feat_lengths.shape) # print(feat_lengths) # print("input_scripts") # print(scripts.shape) # print(scripts) ####################################### pred, gold = model(feats, feat_lengths, scripts) #pred: before softmax, gold: label data y_hat = pred.max(-1)[1] #print("1. input_script", input_scripts.shape, input_scripts) #print("2. feats", feats.shape, feats) #print("3. logit", logit.shape, logit) # print(logit.shape) # print(y_hat.shape) # print(y_hat) # loss pad #real_value_index = [scripts.contiguous().view(-1) != 0] #loss = criterion(logit.contiguous().view(-1, logit.size(-1)), output_scripts.contiguous().view(-1)) loss, n_correct = cal_performance(pred, gold, smoothing=0.1) #print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") #print(loss) #print(n_correct) #exit() total_loss += loss total_num += sum(feat_lengths) display = random.randrange(0, 100) == 0 dist, length = get_distance(scripts, y_hat, display=display) total_dist += dist total_length += length total_sent_num += scripts.size(0) loss.backward() optimizer.step() if batch % print_batch == 0: current = time.time() elapsed = current - begin epoch_elapsed = (current - epoch_begin) / 60.0 train_elapsed = (current - train_begin) / 3600.0 logger.info( 'batch: {:4d}/{:4d}, loss: {:.6f}, cer: {:.2f}, elapsed: {:.2f}s {:.2f}m {:.2f}h lr:{:.6f}' .format( batch, # len(dataloader), total_batch_size, total_loss / total_num, total_dist / total_length, elapsed, epoch_elapsed, train_elapsed, optimizer.lr)) begin = time.time() nsml.report(False, step=train.cumulative_batch_count, train_step__loss=total_loss.item() / total_num.item(), train_step__cer=total_dist / total_length) batch += 1 train.cumulative_batch_count += 1 logger.info('train() completed') return total_loss / total_num, total_dist / total_length