def train(engine, mini_batch): # You have to reset the gradients of all model parameters # before to take another step in gradient descent. engine.model.train() # Because we assign model as class variable, we can easily access to it. engine.optimizer.zero_grad() x, y = mini_batch.text, mini_batch.label x, y = x.to(engine.device), y.to(engine.device) # Take feed-forward y_hat = engine.model(x) loss = engine.crit(y_hat, y) loss.backward() # Calculate accuracy only if 'y' is LongTensor, # which means that 'y' is one-hot representation. if isinstance(y, torch.LongTensor) or isinstance(y, torch.cuda.LongTensor): accuracy = (torch.argmax(y_hat, dim=-1) == y).sum() / float(y.size(0)) else: accuracy = 0 p_norm = float(get_parameter_norm(engine.model.parameters())) g_norm = float(get_grad_norm(engine.model.parameters())) # Take a step of gradient descent. engine.optimizer.step() return { 'loss': float(loss), 'accuracy': float(accuracy), '|param|': p_norm, '|g_param|': g_norm, }
def train(engine, mini_batch): # You have to reset the gradients of all model parameters # before to take another step in gradient descent. engine.model.train( ) # Because we assign model as class variable, we can easily access to it. engine.optimizer.zero_grad() # text2도 필요 수정필요! x, x2, y = mini_batch.text1, mini_batch.text2, mini_batch.label x, x2, y = x.to(engine.device), x2.to(engine.device), y.to( engine.device) x = x[:, :engine.config.max_length] x2 = x2[:, :engine.config.max_length] x = pad_to_maxseq_to_batch(x, engine.config.max_length, engine.device) x2 = pad_to_maxseq_to_batch(x2, engine.config.max_length, engine.device) #print("엔진", engine.config.max_length) #print(x2.shape) # Take feed-forward y_hat = engine.model(x, x2) ## 19.06.12 추가 #y = y.reshape(9, -1) #y_hat = torch.argmax(y_hat, dim=-1).to(torch.float) #print("y_hat", y_hat) #print("y", y) loss = engine.crit(y_hat, y) loss.backward() # Calculate accuracy only if 'y' is LongTensor, # which means that 'y' is one-hot representation. if isinstance(y, torch.LongTensor) or isinstance( y, torch.cuda.LongTensor): accuracy = (torch.argmax(y_hat, dim=-1) == y).sum() / float( y.size(0)) else: accuracy = 0 p_norm = float(get_parameter_norm(engine.model.parameters())) g_norm = float(get_grad_norm(engine.model.parameters())) # Take a step of gradient descent. engine.optimizer.step() return { 'loss': float(loss), 'accuracy': float(accuracy), '|param|': p_norm, '|g_param|': g_norm, }