def train_epoch(self, epoch, dataloader): total_loss = meter.AverageValueMeter() for m in self.metric.values(): m.reset() self.model.train() print("Training..........") progress_bar = tqdm(dataloader) max_iter = len(dataloader) self.optimizer.zero_grad() for i, (inp, lbl) in enumerate(progress_bar): # 1: Load img_inputs and labels inp = move_to(inp, self.device) lbl = move_to(lbl, self.device) # 2: Clear gradients from previous iteration # 3: Get network outputs outs = self.model(inp) # 4: Calculate the loss loss = self.criterion(outs, lbl) # 5: Calculate gradients loss.backward() # 6: Performing backpropagation if (i + 1) % self.backward_step == 0: self.optimizer.step() self.optimizer.zero_grad() total_loss.add(loss.item()) outs = detach(outs) lbl = detach(lbl) for m in self.metric.values(): value = m.calculate(outs, lbl) m.update(value) with torch.no_grad(): total_loss.add(loss.item()) desc = 'Iteration: {}/{}. Total loss: {:.5f}. '.format( i + 1, len(dataloader), loss.item()) for m in self.metric.values(): value = m.value() metric = m.__class__.__name__ desc += f'{metric}: {value:.5f}, ' progress_bar.set_description(desc) self.tsboard.update_scalar('Loss/train', loss, epoch * len(dataloader) + i) # if (i + 1) % self.config['trainer']['checkpoint_mini_step'] == 0: # self.save_current_checkpoint(epoch) print("+ Train result") avg_loss = total_loss.value()[0] print("Loss:", avg_loss) for m in self.metric.values(): m.summary() m.reset()
def train_epoch(self, epoch, dataloader): # 0: Record loss during training process running_loss = meter.AverageValueMeter() total_loss = meter.AverageValueMeter() for m in self.metric.values(): m.reset() self.model.train() print('Training........') progress_bar = tqdm(dataloader) for i, (inp, lbl) in enumerate(progress_bar): # 1: Load img_inputs and labels inp = move_to(inp, self.device) lbl = move_to(lbl, self.device) # 2: Clear gradients from previous iteration self.optimizer.zero_grad() # 3: Get network outputs outs = self.model(inp) # 4: Calculate the loss loss = self.criterion(outs, lbl) # 5: Calculate gradients loss.backward() # 6: Performing backpropagation self.optimizer.step() with torch.no_grad(): # 7: Update loss running_loss.add(loss.item()) total_loss.add(loss.item()) progress_bar.set_description( 'Iteration: {}/{}. Loss: {:.5f}'.format( i + 1, len(dataloader), total_loss.value()[0])) if (i + 1) % self.log_step == 0 or (i + 1) == len(dataloader): self.tsboard.update_loss('train', running_loss.value()[0], epoch * len(dataloader) + i) running_loss.reset() # 8: Update metric outs = detach(outs) lbl = detach(lbl) for m in self.metric.values(): value = m.calculate(outs, lbl) m.update(value) print('+ Training result') avg_loss = total_loss.value()[0] print('Loss:', avg_loss) for m in self.metric.values(): m.summary()
def val_epoch(self, epoch, dataloader): total_loss = meter.AverageValueMeter() for m in self.metric.values(): m.reset() self.model.eval() print("Evaluating.....") progress_bar = tqdm(dataloader) # cls_loss for i, (inp, lbl) in enumerate(progress_bar): # 1: Load inputs and labels inp = move_to(inp, self.device) lbl = move_to(lbl, self.device) # 2: Get network outputs outs = self.model(inp) # 3: Calculate the loss loss = self.criterion(outs, lbl) # 4: Update loss # 5: Update metric outs = detach(outs) lbl = detach(lbl) for m in self.metric.values(): # value = m.calculate(outs, lbl) m.update(outs, lbl) total_loss.add(loss.item()) desc = 'Iteration: {}/{}. Total loss: {:.5f}. '.format( i + 1, len(dataloader), loss.item()) for m in self.metric.values(): value = m.value() metric = m.__class__.__name__ desc += f'{metric}: {value:.5f}, ' progress_bar.set_description(desc) print("+ Evaluation result") avg_loss = total_loss.value()[0] print("Loss: ", avg_loss) self.val_loss.append(avg_loss) self.tsboard.update_scalar( 'Loss/val', total_loss.value()[0], epoch * len(dataloader) + i ) # Calculate metric here for k in self.metric.keys(): m = self.metric[k].value() self.metric[k].summary() self.val_metric[k].append(m) self.tsboard.update_metric('val', k, m, epoch)
def train_epoch(self, epoch, dataloader): # 0: Record loss during training process running_loss = meter.AverageValueMeter() total_loss = meter.AverageValueMeter() for m in self.metric.values(): m.reset() self.model.train() print('Training........') progress_bar = tqdm(dataloader) for i, x in enumerate(progress_bar): # 1: Load img_inputs and labels input_ids = move_to(x['input_ids'], self.device) attention_mask = move_to(x['attention_mask'], self.device) lbl = move_to(x['labels'], self.device) # 2: Clear gradients from previous iteration self.optimier.zero_grad() # 3: Get network outputs outs = self.model(input_ids, attention_mask) # 4: Calculate the loss loss = self.criterion(outs, lbl) # 5: Calculate gradients loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0) # 6: Performing backpropagation self.optimier.step() self.scheduler.step() with torch.no_grad(): # 7: Update loss running_loss.add(loss.item()) total_loss.add(loss.item()) if (i + 1) % self.log_step == 0 or (i + 1) == len(dataloader): self.tsboard.update_loss('train', running_loss.value()[0], epoch * len(dataloader) + i) running_loss.reset() # 8: Update metric outs = detach(outs) lbl = detach(lbl) for m in self.metric.values(): value = m.calculate(outs, lbl) m.update(value) print('+ Training result') avg_loss = total_loss.value()[0] print('Loss:', avg_loss) for m in self.metric.values(): m.summary()
def val_epoch(self, epoch, dataloader): running_loss = meter.AverageValueMeter() for m in self.metric.values(): m.reset() self.model.eval() print('Evaluating........') progress_bar = tqdm(dataloader) for i, x in enumerate(progress_bar): # 1: Load inputs and labels img = x['input_ids'] mask = x['attention_mask'] lbl = x['targets'] img = move_to(img, self.device) mask = move_to(img, self.device) lbl = move_to(lbl, self.device) # 2: Get network outputs outs = self.model(img, mask) # 3: Calculate the loss loss = self.criterion(outs, lbl) # 4: Update loss running_loss.add(loss.item()) # 5: Update metric outs = detach(outs) lbl = detach(lbl) for m in self.metric.values(): value = m.calculate(outs, lbl) m.update(value) print('+ Evaluation result') avg_loss = running_loss.value()[0] print('Loss:', avg_loss) self.val_loss.append(avg_loss) self.tsboard.update_loss('val', avg_loss, epoch) for k in self.metric.keys(): m = self.metric[k].value() self.metric[k].summary() self.val_metric[k].append(m) self.tsboard.update_metric('val', k, m, epoch)