Ejemplo n.º 1
0
    def train(engine, mini_batch):
        # You have to reset the gradients of all model parameters
        # before to take another step in gradient descent.
        engine.model.train() # Because we assign model as class variable, we can easily access to it.
        engine.optimizer.zero_grad()

        x, y = mini_batch.text, mini_batch.label
        x, y = x.to(engine.device), y.to(engine.device)

        # Take feed-forward
        y_hat = engine.model(x)

        loss = engine.crit(y_hat, y)
        loss.backward()

        # Calculate accuracy only if 'y' is LongTensor,
        # which means that 'y' is one-hot representation.
        if isinstance(y, torch.LongTensor) or isinstance(y, torch.cuda.LongTensor):
            accuracy = (torch.argmax(y_hat, dim=-1) == y).sum() / float(y.size(0))
        else:
            accuracy = 0

        p_norm = float(get_parameter_norm(engine.model.parameters()))
        g_norm = float(get_grad_norm(engine.model.parameters()))

        # Take a step of gradient descent.
        engine.optimizer.step()

        return {
            'loss': float(loss),
            'accuracy': float(accuracy),
            '|param|': p_norm,
            '|g_param|': g_norm,
        }
Ejemplo n.º 2
0
    def train(engine, mini_batch):
        # You have to reset the gradients of all model parameters
        # before to take another step in gradient descent.
        engine.model.train(
        )  # Because we assign model as class variable, we can easily access to it.
        engine.optimizer.zero_grad()

        # text2도 필요 수정필요!
        x, x2, y = mini_batch.text1, mini_batch.text2, mini_batch.label
        x, x2, y = x.to(engine.device), x2.to(engine.device), y.to(
            engine.device)

        x = x[:, :engine.config.max_length]
        x2 = x2[:, :engine.config.max_length]

        x = pad_to_maxseq_to_batch(x, engine.config.max_length, engine.device)
        x2 = pad_to_maxseq_to_batch(x2, engine.config.max_length,
                                    engine.device)

        #print("엔진", engine.config.max_length)
        #print(x2.shape)

        # Take feed-forward
        y_hat = engine.model(x, x2)

        ## 19.06.12 추가
        #y = y.reshape(9, -1)
        #y_hat = torch.argmax(y_hat, dim=-1).to(torch.float)
        #print("y_hat", y_hat)
        #print("y", y)

        loss = engine.crit(y_hat, y)
        loss.backward()

        # Calculate accuracy only if 'y' is LongTensor,
        # which means that 'y' is one-hot representation.
        if isinstance(y, torch.LongTensor) or isinstance(
                y, torch.cuda.LongTensor):
            accuracy = (torch.argmax(y_hat, dim=-1) == y).sum() / float(
                y.size(0))
        else:
            accuracy = 0

        p_norm = float(get_parameter_norm(engine.model.parameters()))
        g_norm = float(get_grad_norm(engine.model.parameters()))

        # Take a step of gradient descent.
        engine.optimizer.step()

        return {
            'loss': float(loss),
            'accuracy': float(accuracy),
            '|param|': p_norm,
            '|g_param|': g_norm,
        }