def train_epoch(self, epoch, dataloader): total_loss = meter.AverageValueMeter() for m in self.metric.values(): m.reset() self.model.train() print("Training..........") progress_bar = tqdm(dataloader) max_iter = len(dataloader) self.optimizer.zero_grad() for i, (inp, lbl) in enumerate(progress_bar): # 1: Load img_inputs and labels inp = move_to(inp, self.device) lbl = move_to(lbl, self.device) # 2: Clear gradients from previous iteration # 3: Get network outputs outs = self.model(inp) # 4: Calculate the loss loss = self.criterion(outs, lbl) # 5: Calculate gradients loss.backward() # 6: Performing backpropagation if (i + 1) % self.backward_step == 0: self.optimizer.step() self.optimizer.zero_grad() total_loss.add(loss.item()) outs = detach(outs) lbl = detach(lbl) for m in self.metric.values(): value = m.calculate(outs, lbl) m.update(value) with torch.no_grad(): total_loss.add(loss.item()) desc = 'Iteration: {}/{}. Total loss: {:.5f}. '.format( i + 1, len(dataloader), loss.item()) for m in self.metric.values(): value = m.value() metric = m.__class__.__name__ desc += f'{metric}: {value:.5f}, ' progress_bar.set_description(desc) self.tsboard.update_scalar('Loss/train', loss, epoch * len(dataloader) + i) # if (i + 1) % self.config['trainer']['checkpoint_mini_step'] == 0: # self.save_current_checkpoint(epoch) print("+ Train result") avg_loss = total_loss.value()[0] print("Loss:", avg_loss) for m in self.metric.values(): m.summary() m.reset()
def train_epoch(self, epoch, dataloader): # 0: Record loss during training process running_loss = meter.AverageValueMeter() total_loss = meter.AverageValueMeter() for m in self.metric.values(): m.reset() self.model.train() print('Training........') progress_bar = tqdm(dataloader) for i, (inp, lbl) in enumerate(progress_bar): # 1: Load img_inputs and labels inp = move_to(inp, self.device) lbl = move_to(lbl, self.device) # 2: Clear gradients from previous iteration self.optimizer.zero_grad() # 3: Get network outputs outs = self.model(inp) # 4: Calculate the loss loss = self.criterion(outs, lbl) # 5: Calculate gradients loss.backward() # 6: Performing backpropagation self.optimizer.step() with torch.no_grad(): # 7: Update loss running_loss.add(loss.item()) total_loss.add(loss.item()) progress_bar.set_description( 'Iteration: {}/{}. Loss: {:.5f}'.format( i + 1, len(dataloader), total_loss.value()[0])) if (i + 1) % self.log_step == 0 or (i + 1) == len(dataloader): self.tsboard.update_loss('train', running_loss.value()[0], epoch * len(dataloader) + i) running_loss.reset() # 8: Update metric outs = detach(outs) lbl = detach(lbl) for m in self.metric.values(): value = m.calculate(outs, lbl) m.update(value) print('+ Training result') avg_loss = total_loss.value()[0] print('Loss:', avg_loss) for m in self.metric.values(): m.summary()
def val_epoch(self, epoch, dataloader): total_loss = meter.AverageValueMeter() for m in self.metric.values(): m.reset() self.model.eval() print("Evaluating.....") progress_bar = tqdm(dataloader) # cls_loss for i, (inp, lbl) in enumerate(progress_bar): # 1: Load inputs and labels inp = move_to(inp, self.device) lbl = move_to(lbl, self.device) # 2: Get network outputs outs = self.model(inp) # 3: Calculate the loss loss = self.criterion(outs, lbl) # 4: Update loss # 5: Update metric outs = detach(outs) lbl = detach(lbl) for m in self.metric.values(): # value = m.calculate(outs, lbl) m.update(outs, lbl) total_loss.add(loss.item()) desc = 'Iteration: {}/{}. Total loss: {:.5f}. '.format( i + 1, len(dataloader), loss.item()) for m in self.metric.values(): value = m.value() metric = m.__class__.__name__ desc += f'{metric}: {value:.5f}, ' progress_bar.set_description(desc) print("+ Evaluation result") avg_loss = total_loss.value()[0] print("Loss: ", avg_loss) self.val_loss.append(avg_loss) self.tsboard.update_scalar( 'Loss/val', total_loss.value()[0], epoch * len(dataloader) + i ) # Calculate metric here for k in self.metric.keys(): m = self.metric[k].value() self.metric[k].summary() self.val_metric[k].append(m) self.tsboard.update_metric('val', k, m, epoch)
def train_epoch(self, epoch, dataloader): # 0: Record loss during training process running_loss = meter.AverageValueMeter() total_loss = meter.AverageValueMeter() for m in self.metric.values(): m.reset() self.model.train() print('Training........') progress_bar = tqdm(dataloader) for i, x in enumerate(progress_bar): # 1: Load img_inputs and labels input_ids = move_to(x['input_ids'], self.device) attention_mask = move_to(x['attention_mask'], self.device) lbl = move_to(x['labels'], self.device) # 2: Clear gradients from previous iteration self.optimier.zero_grad() # 3: Get network outputs outs = self.model(input_ids, attention_mask) # 4: Calculate the loss loss = self.criterion(outs, lbl) # 5: Calculate gradients loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0) # 6: Performing backpropagation self.optimier.step() self.scheduler.step() with torch.no_grad(): # 7: Update loss running_loss.add(loss.item()) total_loss.add(loss.item()) if (i + 1) % self.log_step == 0 or (i + 1) == len(dataloader): self.tsboard.update_loss('train', running_loss.value()[0], epoch * len(dataloader) + i) running_loss.reset() # 8: Update metric outs = detach(outs) lbl = detach(lbl) for m in self.metric.values(): value = m.calculate(outs, lbl) m.update(value) print('+ Training result') avg_loss = total_loss.value()[0] print('Loss:', avg_loss) for m in self.metric.values(): m.summary()
def val_epoch(self, epoch, dataloader): running_loss = meter.AverageValueMeter() for m in self.metric.values(): m.reset() self.model.eval() print('Evaluating........') progress_bar = tqdm(dataloader) for i, x in enumerate(progress_bar): # 1: Load inputs and labels img = x['input_ids'] mask = x['attention_mask'] lbl = x['targets'] img = move_to(img, self.device) mask = move_to(img, self.device) lbl = move_to(lbl, self.device) # 2: Get network outputs outs = self.model(img, mask) # 3: Calculate the loss loss = self.criterion(outs, lbl) # 4: Update loss running_loss.add(loss.item()) # 5: Update metric outs = detach(outs) lbl = detach(lbl) for m in self.metric.values(): value = m.calculate(outs, lbl) m.update(value) print('+ Evaluation result') avg_loss = running_loss.value()[0] print('Loss:', avg_loss) self.val_loss.append(avg_loss) self.tsboard.update_loss('val', avg_loss, epoch) for k in self.metric.keys(): m = self.metric[k].value() self.metric[k].summary() self.val_metric[k].append(m) self.tsboard.update_metric('val', k, m, epoch)
config = torch.load(args.w, map_location=dev_id) model = get_instance(config['config']['model']).to(device) model.load_state_dict(config['model_state_dict']) # Load data dataset = ShopeeDataset(img_dir=args.d, csv_path=args.c, is_train=False) dataloader = DataLoader(dataset, batch_size=args.b) # Metrics metrics = { 'Accuracy': Accuracy(), 'ConfusionMatrix': ConfusionMatrix(nclasses=42), } with torch.no_grad(): for m in metrics.values(): m.reset() model.eval() progress_bar = tqdm(dataloader) for i, (inp, lbl) in enumerate(progress_bar): inp = move_to(inp, device) lbl = move_to(lbl, device) outs = model(inp) for m in metrics.values(): value = m.calculate(outs, lbl) m.update(value) print('+ Evaluation result') for m in metrics.values(): m.summary()
dev_id = 'cuda:{}'.format(args.g) \ if torch.cuda.is_available() and args.g is not None \ else 'cpu' device = torch.device(dev_id) # Load model config = torch.load(args.w, map_location=dev_id) model = get_instance(config['config']['model']).to(device) model.load_state_dict(config['model_state_dict']) # Load data tfs = tvtf.Compose([ tvtf.Resize((224, 224)), tvtf.ToTensor(), tvtf.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) dataset = ImageFolderDataset(args.d, tfs) dataloader = DataLoader(dataset, batch_size=args.b) with torch.no_grad(): out = [('filename', 'prediction', 'confidence')] model.eval() for i, (imgs, fns) in enumerate(tqdm(dataloader)): imgs = move_to(imgs, device) logits = model(imgs) probs = F.softmax(logits, dim=1) confs, preds = torch.max(probs, dim=1) out.extend([(fn, pred.item(), conf.item()) for fn, pred, conf in zip(fns, preds, confs)]) csv.writer(open(args.o, 'w')).writerows(out)