def train_epoch(self): """ Train the network for one epoch and return the average loss. * This will be a pessimistic approximation of the true loss of the network, as the loss of the first batches will be higher than the true. Returns: loss (float, list(float)): list of mean losses """ self.model.train() losses = [] self.epoch += 1 epoch_start = time.time() if isinstance(self.train_loader, (tuple, list)): iterator = zip(*self.train_loader) else: iterator = self.train_loader for i_batch, batch in enumerate(iterator, 1): self.step += 1 # zero gradients for optimizer in self.optimizers: optimizer.zero_grad() if isinstance(self.train_loader, (tuple, list)): batch = list( map(lambda x: list(map(lambda y: y.to(self.device), x)), batch)) else: batch = list(map(lambda x: x.to(self.device), batch)) batch_losses = self.process_batch(*batch) # aggregate the losses into a single loss value loss_sum, loss_list = self.aggregate_losses(batch_losses) losses.append(loss_list) # back-propagate loss_sum.backward() if self.clip is not None: # clip_grad_norm_(self.model.parameters(), self.clip) for optimizer in self.optimizers: clip_grad_norm_((p for group in optimizer.param_groups for p in group['params']), self.clip) # update weights for optimizer in self.optimizers: optimizer.step() if self.step % self.log_interval == 0: self.progress_log = epoch_progress(self.epoch, i_batch, self.batch_size, self.train_set_size, epoch_start) for c in self.batch_end_callbacks: if callable(c): c(i_batch, loss_list) return np.array(losses).mean(axis=0)
def train_epoch(self): """ Train the network for one epoch and return the average loss. * This will be a pessimistic approximation of the true loss of the network, as the loss of the first batches will be higher than the true. Returns: loss (float, list(float)): list of mean losses """ self.model.train() losses = [] self.epoch += 1 epoch_start = time.time() if isinstance(self.train_loader, (tuple, list)): iterator = zip(*self.train_loader) else: iterator = self.train_loader for i_batch, batch in enumerate(iterator, 1): self.step += 1 for optimizer in self.optimizers: optimizer.zero_grad() if isinstance(batch.text[0], list): X = [] for item in batch.text[0]: item_array = numpy.array(item) X.append(to_device(torch.from_numpy(item_array), device=self.device, dtype=torch.from_numpy(item_array).dtype)) else: X = to_device(batch.text[0], device=self.device, dtype=batch.text[0].dtype) y = to_device(batch.label, device=self.device, dtype=torch.long) lengths = to_device(batch.text[1], device=self.device, dtype=torch.long) batch_loss, _, _ = self.process_batch(X, lengths, y) # aggregate the losses into a single loss value loss_sum, loss_list = self.return_tensor_and_list(batch_loss) losses.append(loss_list) # back-propagate loss_sum.backward() # if self.clip is not None: # for optimizer in self.optimizers: # clip_grad_norm_((p for group in optimizer.param_groups # for p in group['params']), self.clip) # update weights for optimizer in self.optimizers: optimizer.step() if self.step % self.log_interval == 0: self.progress_log = epoch_progress(self.epoch, i_batch, self.train_batch_size, self.train_set_size, epoch_start) for c in self.batch_end_callbacks: if callable(c): c(i_batch, batch_loss) return numpy.array(losses).mean(axis=0)