def train_epoch_ch8(model, train_iter, loss, updater, device, #@save use_random_iter): """Train a model within one epoch (defined in Chapter 8).""" state, timer = None, d2l.Timer() metric = d2l.Accumulator(2) # Sum of training loss, no. of tokens for X, Y in train_iter: if state is None or use_random_iter: # Initialize `state` when either it is the first iteration or # using random sampling state = model.begin_state(batch_size=X.shape[0], device=device) else: if isinstance(model, nn.Module) and not isinstance(state, tuple): # `state` is a tensor for `nn.GRU` state.detach_() else: # `state` is a tuple of tensors for `nn.LSTM` and # for our custom scratch implementation for s in state: s.detach_() y = Y.T.reshape(-1) X, y = X.to(device), y.to(device) y_hat, state = model(X, state) l = loss(y_hat, y.long()).mean() if isinstance(updater, torch.optim.Optimizer): updater.zero_grad() l.backward() grad_clipping(model, 1) updater.step() else: l.backward() grad_clipping(model, 1) # Since the `mean` function has been invoked updater(batch_size=1) metric.add(l * d2l.size(y), d2l.size(y)) return math.exp(metric[0] / metric[1]), metric[1] / timer.stop()
def train_epoch_ch8(model, train_iter, loss, updater, device, use_random_iter): """Train a model within one epoch""" state, timer = None, d2l.Timer() metric = d2l.Accumulator(2) for X, Y in train_iter: if state is None or use_random_iter: state = model.begin_state(batch_size=X.shape[0], device=device) else: if isinstance(model, nn.Module) and not isinstance(state, tuple): state.detach_() else: for s in state: s.detach_() y = Y.T.reshape(-1) X, y = X.to(device), y.to(device) y_hat, state = model(X, state) l = loss(y_hat, y.long()).mean() if isinstance(updater, torch.optim.Optimizer): updater.zero_grad() l.backward() grad_clipping(model, 1) updater.step() else: l.backward() grad_clipping(model, 1) updater(batch_size=1) metric.add(l * d2l.size(y), d2l.size(y)) return math.exp(metric[0]/metric[1]),\ metric[1] / timer.stop()
def evaluate_accuracy(net, data_iter): # @save """Compute the accuracy for a model on a dataset.""" if isinstance(net, torch.nn.Module): net.eval() # Set the model to evaluation mode metric = Accumulator(2) # No. of correct predictions, no. of predictions for X, y in data_iter: metric.add(accuracy(net(X), y), d2l.size(y)) return metric[0] / metric[1]
def evaluate_accuracy_gpu(net, data_iter, device=None): net.eval() if not device: device = next(iter(net.parameters())).device metric = d2l.Accumulator(2) for X, y in data_iter: X, y = X.to(device), y.to(device) metric.add(d2l.accuracy(net(X), y), d2l.size(y)) return metric[0] / metric[1]
def train_epoch(self, net, train_iter, loss, updater, device, use_random_iter): """Train a net within one epoch (defined in Chapter 8).""" state, timer = None, d2l.Timer() metric = d2l.Accumulator(2) # Sum of training loss, no. of tokens i = 0 for X, Y in train_iter: X = X.to(torch.float32) # TODO is this necessary? Y = Y.to(torch.float32) X = X.reshape(-1, *X.shape) # [direction, batch_size, seq_len] if state is None or use_random_iter: # Initialize `state` when either it is the first iteration or # using random sampling state = net.begin_state(batch_size=X.shape[1], device=device) else: if isinstance(net, nn.Module) and not isinstance(state, tuple): # `state` is a tensor for `nn.GRU` state.detach_() else: # `state` is a tuple of tensors for `nn.LSTM` and # for our custom scratch implementation for s in state: s.detach_() # y = Y.T.reshape(-1) X, Y = X.to(device), Y.to(device) y_hat, state = net(X, state) l = loss(y_hat, Y).mean() if isinstance(updater, torch.optim.Optimizer): updater.zero_grad() l.backward() self.grad_clipping(net, 1) updater.step() else: l.backward() self.grad_clipping(net, 1) # Since the `mean` function has been invoked updater(batch_size=1) metric.add(l * d2l.size(Y), d2l.size(Y)) return math.exp(metric[0] / metric[1]), metric[1] / timer.stop()