Example #1
0
def train_epoch_ch8(model, train_iter, loss, updater, device,  #@save
                    use_random_iter):
    """Train a model within one epoch (defined in Chapter 8)."""
    state, timer = None, d2l.Timer()
    metric = d2l.Accumulator(2)  # Sum of training loss, no. of tokens
    for X, Y in train_iter:
        if state is None or use_random_iter:
            # Initialize `state` when either it is the first iteration or
            # using random sampling
            state = model.begin_state(batch_size=X.shape[0], device=device)
        else:
            if isinstance(model, nn.Module) and not isinstance(state, tuple):
                # `state` is a tensor for `nn.GRU`
                state.detach_()
            else:
                # `state` is a tuple of tensors for `nn.LSTM` and
                # for our custom scratch implementation
                for s in state:
                    s.detach_()
        y = Y.T.reshape(-1)
        X, y = X.to(device), y.to(device)
        y_hat, state = model(X, state)
        l = loss(y_hat, y.long()).mean()
        if isinstance(updater, torch.optim.Optimizer):
            updater.zero_grad()
            l.backward()
            grad_clipping(model, 1)
            updater.step()
        else:
            l.backward()
            grad_clipping(model, 1)
            # Since the `mean` function has been invoked
            updater(batch_size=1)
        metric.add(l * d2l.size(y), d2l.size(y))
    return math.exp(metric[0] / metric[1]), metric[1] / timer.stop()
Example #2
0
def train_epoch_ch8(model, train_iter, loss, updater, device, use_random_iter):
    """Train a model within one epoch"""
    state, timer = None, d2l.Timer()
    metric = d2l.Accumulator(2)
    for X, Y in train_iter:
        if state is None or use_random_iter:
            state = model.begin_state(batch_size=X.shape[0], device=device)
        else:
            if isinstance(model, nn.Module) and not isinstance(state, tuple):
                state.detach_()
            else:
                for s in state:
                    s.detach_()
        y = Y.T.reshape(-1)
        X, y = X.to(device), y.to(device)
        y_hat, state = model(X, state)
        l = loss(y_hat, y.long()).mean()
        if isinstance(updater, torch.optim.Optimizer):
            updater.zero_grad()
            l.backward()
            grad_clipping(model, 1)
            updater.step()
        else:
            l.backward()
            grad_clipping(model, 1)
            updater(batch_size=1)
        metric.add(l * d2l.size(y), d2l.size(y))
        return math.exp(metric[0]/metric[1]),\
            metric[1] / timer.stop()
Example #3
0
def evaluate_accuracy(net, data_iter):  # @save
    """Compute the accuracy for a model on a dataset."""
    if isinstance(net, torch.nn.Module):
        net.eval()  # Set the model to evaluation mode
    metric = Accumulator(2)  # No. of correct predictions, no. of predictions
    for X, y in data_iter:
        metric.add(accuracy(net(X), y), d2l.size(y))
    return metric[0] / metric[1]
Example #4
0
def evaluate_accuracy_gpu(net, data_iter, device=None):
    net.eval()
    if not device:
        device = next(iter(net.parameters())).device
    metric = d2l.Accumulator(2)
    for X, y in data_iter:
        X, y = X.to(device), y.to(device)
        metric.add(d2l.accuracy(net(X), y), d2l.size(y))
    return metric[0] / metric[1]
Example #5
0
 def train_epoch(self, net, train_iter, loss, updater, device,
                 use_random_iter):
     """Train a net within one epoch (defined in Chapter 8)."""
     state, timer = None, d2l.Timer()
     metric = d2l.Accumulator(2)  # Sum of training loss, no. of tokens
     i = 0
     for X, Y in train_iter:
         X = X.to(torch.float32)  # TODO is this necessary?
         Y = Y.to(torch.float32)
         X = X.reshape(-1, *X.shape)  # [direction, batch_size, seq_len]
         if state is None or use_random_iter:
             # Initialize `state` when either it is the first iteration or
             # using random sampling
             state = net.begin_state(batch_size=X.shape[1], device=device)
         else:
             if isinstance(net, nn.Module) and not isinstance(state, tuple):
                 # `state` is a tensor for `nn.GRU`
                 state.detach_()
             else:
                 # `state` is a tuple of tensors for `nn.LSTM` and
                 # for our custom scratch implementation
                 for s in state:
                     s.detach_()
         # y = Y.T.reshape(-1)
         X, Y = X.to(device), Y.to(device)
         y_hat, state = net(X, state)
         l = loss(y_hat, Y).mean()
         if isinstance(updater, torch.optim.Optimizer):
             updater.zero_grad()
             l.backward()
             self.grad_clipping(net, 1)
             updater.step()
         else:
             l.backward()
             self.grad_clipping(net, 1)
             # Since the `mean` function has been invoked
             updater(batch_size=1)
         metric.add(l * d2l.size(Y), d2l.size(Y))
     return math.exp(metric[0] / metric[1]), metric[1] / timer.stop()