def train_one_epoch(self): # Set the model to be in training mode self.net.train() # Initialize average meters epoch_loss = AverageMeter() epoch_acc = AverageMeter() epoch_iou = AverageMeter() epoch_filtered_iou = AverageMeter() tqdm_batch = tqdm(self.train_loader, f'Epoch-{self.current_epoch}-') for x in tqdm_batch: # prepare data imgs = torch.tensor(x['img'], dtype=torch.float, device=self.device) masks = torch.tensor(x['mask'], dtype=torch.float, device=self.device) # model pred, *_ = self.net(imgs) # loss cur_loss = self.loss(pred, masks) if np.isnan(float(cur_loss.item())): raise ValueError('Loss is nan during training...') # optimizer self.optimizer.zero_grad() cur_loss.backward() self.optimizer.step() # metrics pred_t = torch.sigmoid(pred) > 0.5 masks_t = masks > 0.5 cur_acc = torch.sum(pred_t == masks_t).item() / masks.numel() cur_iou = iou_pytorch(pred_t, masks_t) cur_filtered_iou = iou_pytorch(remove_small_mask_batch(pred_t), masks_t) batch_size = imgs.shape[0] epoch_loss.update(cur_loss.item(), batch_size) epoch_acc.update(cur_acc, batch_size) epoch_iou.update(cur_iou.item(), batch_size) epoch_filtered_iou.update(cur_filtered_iou.item(), batch_size) tqdm_batch.close() logging.info( f'Training at epoch- {self.current_epoch} |' f'loss: {epoch_loss.val:.5} - Acc: {epoch_acc.val:.5}' f'- IOU: {epoch_iou.val:.5} - Filtered IOU: {epoch_filtered_iou.val:.5}' )
def validate(self): # set the model in eval mode self.net.eval() # Initialize average meters epoch_loss = AverageMeter() epoch_iou = AverageMeter() epoch_filtered_iou = AverageMeter() tqdm_batch = tqdm(self.valid_loader, f'Epoch-{self.current_epoch}-') with torch.no_grad(): for x in tqdm_batch: # prepare data imgs = torch.tensor(x['img'], dtype=torch.float, device=self.device) masks = torch.tensor(x['mask'], dtype=torch.float, device=self.device) # model pred, *_ = self.net(imgs) # loss cur_loss = self.loss(pred, masks) if np.isnan(float(cur_loss.item())): raise ValueError('Loss is nan during validation...') # metrics pred_t = torch.sigmoid(pred) > 0.5 masks_t = masks > 0.5 cur_iou = iou_pytorch(pred_t, masks_t) cur_filtered_iou = iou_pytorch(remove_small_mask_batch(pred_t), masks_t) batch_size = imgs.shape[0] epoch_loss.update(cur_loss.item(), batch_size) epoch_iou.update(cur_iou.item(), batch_size) epoch_filtered_iou.update(cur_filtered_iou.item(), batch_size) tqdm_batch.close() logging.info(f'Validation at epoch- {self.current_epoch} |' f'loss: {epoch_loss.val:.5} - IOU: {epoch_iou.val:.5}' f' - Filtered IOU: {epoch_filtered_iou.val:.5}') return epoch_filtered_iou.val
def find_best_thres(self): # set the model in eval mode self.net.eval() pred_TTA_list = [] masks_t_list = [] tqdm_batch = tqdm(self.valid_loader, f'Epoch-{self.current_epoch}-') with torch.no_grad(): for x in tqdm_batch: # prepare data imgs = torch.tensor(x['img'], dtype=torch.float, device=self.device) masks = torch.tensor(x['mask'], dtype=torch.float, device=self.device) # model pred, *_ = self.net(imgs) pred_flip, *_ = self.net(imgs.flip(dims=[3])) pred_TTA = (torch.sigmoid(pred) + torch.sigmoid(pred_flip.flip(dims=[3]))) / 2 # metrics masks_t = masks > 0.5 pred_TTA_list.append(pred_TTA.cpu()) masks_t_list.append(masks_t.cpu()) tqdm_batch.close() pred_TTA = torch.cat(pred_TTA_list, dim=0) masks_t = torch.cat(masks_t_list, dim=0) thresholds = np.linspace(0.3, 0.7, 50) filtered_ious = np.array([ iou_pytorch(remove_small_mask_batch(pred_TTA > t), masks_t) for t in thresholds ]) best_thres_idx = np.argmax(filtered_ious) best_thres = thresholds[best_thres_idx] logging.info(f'Best Threshold- {best_thres}') return best_thres
outputs = model(inputs) outputs = outputs.view(-1, args.image_dimension, args.image_dimension) labels = labels.view(-1, args.image_dimension, args.image_dimension) val_loss = loss_function(labels, outputs) # compute the metrics # print(outputs.shape, labels.shape) outputs = (outputs >= args.threshold) * 1 f1, precision, recall = pixel_segementation_evaluation( labels.cpu().detach().numpy().reshape(-1), outputs.cpu().detach().numpy().reshape(-1)) iou = iou_pytorch(outputs, labels) # print training/validation statistics valid_running_loss.append(val_loss.detach().item()) valid_running_accuracy.append(f1) valid_running_precision.append(precision) valid_running_recall.append(recall) valid_running_dice.append(iou.detach().item()) # clear variables from memory del inputs, labels, outputs torch.cuda.empty_cache() train_epoch_loss.append(np.mean(train_running_loss)) valid_epoch_loss.append(np.mean(valid_running_loss)) valid_epoch_dice.append(np.mean(valid_running_dice))