def validate(val_loader, net, criterion, optim, curr_epoch, writer): """ Runs the validation loop after each training epoch val_loader: Data loader for validation net: thet network criterion: loss fn optimizer: optimizer curr_epoch: current epoch writer: tensorboard writer return: val_avg for step function if required """ net.eval() val_loss = AverageMeter() iou_acc = 0 dump_images = [] for val_idx, data in enumerate(val_loader): inputs, gt_image, img_names = data assert len(inputs.size()) == 4 and len(gt_image.size()) == 3 assert inputs.size()[2:] == gt_image.size()[1:] batch_pixel_size = inputs.size(0) * inputs.size(2) * inputs.size(3) inputs, gt_cuda = inputs.cuda(), gt_image.cuda() with torch.no_grad(): output = net(inputs) # output = (1, 19, 713, 713) assert output.size()[2:] == gt_image.size()[1:] assert output.size()[1] == args.dataset_cls.num_classes val_loss.update(criterion(output, gt_cuda).item(), batch_pixel_size) predictions = output.data.max(1)[1].cpu() # Logging if val_idx % 20 == 0: if args.local_rank == 0: logging.info("validating: %d / %d", val_idx + 1, len(val_loader)) if val_idx > 10 and args.test_mode: break # Image Dumps if val_idx < 10: dump_images.append([gt_image, predictions, img_names]) iou_acc += fast_hist(predictions.numpy().flatten(), gt_image.numpy().flatten(), args.dataset_cls.num_classes) del output, val_idx, data if args.apex: iou_acc_tensor = torch.cuda.FloatTensor(iou_acc) torch.distributed.all_reduce(iou_acc_tensor, op=torch.distributed.ReduceOp.SUM) iou_acc = iou_acc_tensor.cpu().numpy() if args.local_rank == 0: evaluate_eval(args, net, optim, val_loss, iou_acc, dump_images, writer, curr_epoch, args.dataset_cls) return val_loss.avg
def validate(val_loader, net, criterion, optim, scheduler, curr_epoch, curr_iter): """ Runs the validation loop after each training epoch val_loader: Data loader for validation net: thet network criterion: loss fn optimizer: optimizer curr_epoch: current epoch return: val_avg for step function if required """ net.eval() val_loss = AverageMeter() iou_acc = 0 error_acc = 0 for val_idx, data in enumerate(val_loader): inputs, gts = data = data assert len(inputs.size()) == 4 and len(gts.size()) == 3 assert inputs.size()[2:] == gts.size()[1:] batch_pixel_size = inputs.size(0) * inputs.size(2) * inputs.size(3) inputs, gts = inputs.cuda(), gts.cuda() with torch.no_grad(): output = net(inputs) del inputs assert output.size()[2:] == gts.size()[1:] assert output.size()[1] == args.num_classes val_loss.update(criterion(output, gts).item(), batch_pixel_size) predictions = output.data.max(1)[1].cpu() # Logging if val_idx % 20 == 0: logging.info("validating: %d / %d", val_idx + 1, len(val_loader)) iou_acc += fast_hist(predictions.numpy().flatten(), gts.cpu().numpy().flatten(), args.num_classes) del gts, output, val_idx, data per_cls_iou = evaluate_eval(args, net, optim, scheduler, val_loss, iou_acc, curr_epoch, args.dataset, curr_iter) return val_loss.avg, per_cls_iou
def inf(self, imgs, img_names, gt, inference, net, scales, pbar, base_img): ###################################################################### # Run inference ###################################################################### self.img_name = img_names[0] col_img_name = '{}/{}_color.png'.format(self.rgb_path, self.img_name) pred_img_name = '{}/{}.png'.format(self.pred_path, self.img_name) diff_img_name = '{}/{}_diff.png'.format(self.diff_path, self.img_name) compose_img_name = '{}/{}_compose.png'.format(self.compose_path, self.img_name) to_pil = transforms.ToPILImage() if self.inference_mode == 'pooling': img = imgs pool_base_img = to_pil(base_img[0]) else: img = to_pil(imgs[0]) prediction_pre_argmax_collection = inference(net, img, scales) if self.inference_mode == 'pooling': prediction = prediction_pre_argmax_collection prediction = np.concatenate(prediction, axis=0)[0] else: prediction_pre_argmax = np.mean(prediction_pre_argmax_collection, axis=0) prediction = np.argmax(prediction_pre_argmax, axis=0) if args.dataset == 'kitti' and args.split == 'test': origin_h, origin_w = 375, 1242 pred_pil = Image.fromarray(prediction.astype('uint8')) pred_pil = pred_pil.resize((origin_w, origin_h), Image.NEAREST) small_img = img.copy() small_img = small_img.resize((origin_w, origin_h), Image.BICUBIC) prediction = np.array(pred_pil) if self.metrics: #可以以这个参数控制不进行MIoU的计算 self.hist += fast_hist(prediction.flatten(), gt.cpu().numpy().flatten(), self.dataset_cls.num_classes) iou = round(np.nanmean(per_class_iu(self.hist)) * 100, 2) pbar.set_description("Mean IOU: %s" % (str(iou))) ###################################################################### # Dump Images ###################################################################### if self.write_image: if self.inference_mode == 'pooling': img = pool_base_img colorized = self.dataset_cls.colorize_mask(prediction) colorized.save(col_img_name) if args.dataset == 'kitti' and args.split == 'test': blend = Image.blend(small_img.convert("RGBA"), colorized.convert("RGBA"), 0.5) else: blend = Image.blend(img.convert("RGBA"), colorized.convert("RGBA"), 0.5) blend.save(compose_img_name) if gt is not None and args.split != 'test': gt = gt[0].cpu().numpy() # only write diff image if gt is valid diff = (prediction != gt) diff[gt == 255] = 0 diffimg = Image.fromarray(diff.astype('uint8') * 255) PIL.ImageChops.lighter( blend, PIL.ImageOps.invert(diffimg).convert("RGBA")).save( diff_img_name) label_out = np.zeros_like(prediction) for label_id, train_id in self.dataset_cls.id_to_trainid.items(): label_out[np.where(prediction == train_id)] = label_id cv2.imwrite(pred_img_name, label_out) #这里将会转换成label_id
def validate(val_loader, dataset, net, criterion, optim, scheduler, curr_epoch, writer, curr_iter, save_pth=True): """ Runs the validation loop after each training epoch val_loader: Data loader for validation dataset: dataset name (str) net: thet network criterion: loss fn optimizer: optimizer curr_epoch: current epoch writer: tensorboard writer return: val_avg for step function if required """ net.eval() val_loss = AverageMeter() iou_acc = 0 error_acc = 0 dump_images = [] for val_idx, data in enumerate(val_loader): # input = torch.Size([1, 3, 713, 713]) # gt_image = torch.Size([1, 713, 713]) inputs, gt_image, img_names, _ = data if len(inputs.shape) == 5: B, D, C, H, W = inputs.shape inputs = inputs.view(-1, C, H, W) gt_image = gt_image.view(-1, 1, H, W) assert len(inputs.size()) == 4 and len(gt_image.size()) == 3 assert inputs.size()[2:] == gt_image.size()[1:] batch_pixel_size = inputs.size(0) * inputs.size(2) * inputs.size(3) inputs, gt_cuda = inputs.cuda(), gt_image.cuda() with torch.no_grad(): if args.use_wtloss: output, f_cor_arr = net(inputs, visualize=True) else: output = net(inputs) del inputs assert output.size()[2:] == gt_image.size()[1:] assert output.size()[1] == datasets.num_classes val_loss.update(criterion(output, gt_cuda).item(), batch_pixel_size) del gt_cuda # Collect data from different GPU to a single GPU since # encoding.parallel.criterionparallel function calculates distributed loss # functions predictions = output.data.max(1)[1].cpu() # Logging if val_idx % 20 == 0: if args.local_rank == 0: logging.info("validating: %d / %d", val_idx + 1, len(val_loader)) if val_idx > 10 and args.test_mode: break # Image Dumps if val_idx < 10: dump_images.append([gt_image, predictions, img_names]) iou_acc += fast_hist(predictions.numpy().flatten(), gt_image.numpy().flatten(), datasets.num_classes) del output, val_idx, data iou_acc_tensor = torch.cuda.FloatTensor(iou_acc) torch.distributed.all_reduce(iou_acc_tensor, op=torch.distributed.ReduceOp.SUM) iou_acc = iou_acc_tensor.cpu().numpy() if args.local_rank == 0: evaluate_eval(args, net, optim, scheduler, val_loss, iou_acc, dump_images, writer, curr_epoch, dataset, None, curr_iter, save_pth=save_pth) return val_loss.avg
def validate(val_loader, net, criterion, optimizer, curr_epoch, writer): ''' Runs the validation loop after each training epoch val_loader: Data loader for validation net: thet network criterion: loss fn optimizer: optimizer curr_epoch: current epoch writer: tensorboard writer return: ''' net.eval() val_loss = AverageMeter() mf_score = AverageMeter() IOU_acc = 0 dump_images = [] heatmap_images = [] for vi, data in enumerate(val_loader): input, mask, edge, img_names = data assert len(input.size()) == 4 and len(mask.size()) == 3 assert input.size()[2:] == mask.size()[1:] h, w = mask.size()[1:] batch_pixel_size = input.size(0) * input.size(2) * input.size(3) input, mask_cuda, edge_cuda = input.cuda(), mask.cuda(), edge.cuda() with torch.no_grad(): seg_out, edge_out = net(input) # output = (1, 19, 713, 713) if args.joint_edgeseg_loss: loss_dict = criterion((seg_out, edge_out), (mask_cuda, edge_cuda)) val_loss.update(sum(loss_dict.values()).item(), batch_pixel_size) else: val_loss.update( criterion(seg_out, mask_cuda).item(), batch_pixel_size) # Collect data from different GPU to a single GPU since # encoding.parallel.criterionparallel function calculates distributed loss # functions seg_predictions = seg_out.data.max(1)[1].cpu() edge_predictions = edge_out.max(1)[0].cpu() #Logging if vi % 20 == 0: if args.local_rank == 0: logging.info('validating: %d / %d' % (vi + 1, len(val_loader))) if vi > 10 and args.test_mode: break _edge = edge.max(1)[0] #Image Dumps if vi < 10: dump_images.append([mask, seg_predictions, img_names]) heatmap_images.append([_edge, edge_predictions, img_names]) IOU_acc += fast_hist(seg_predictions.numpy().flatten(), mask.numpy().flatten(), args.dataset_cls.num_classes) del seg_out, edge_out, vi, data if args.local_rank == 0: evaluate_eval(args, net, optimizer, val_loss, mf_score, IOU_acc, dump_images, heatmap_images, writer, curr_epoch, args.dataset_cls) return val_loss.avg
def evaluate(val_loader, net): ''' Runs the evaluation loop and prints F score val_loader: Data loader for validation net: thet network return: ''' net.eval() # 0.0005 13.0 it/sec # 0.001875 4.80 it/sec # 0.00375 1.70 it/sec # 0.005 1.03 it/sec thresh = 0.0001 mf_score1 = AverageMeter() mf_pc_score1 = AverageMeter() ap_score1 = AverageMeter() ap_pc_score1 = AverageMeter() IOU_acc = 0 Fpc = np.zeros((args.dataset_cls.num_classes)) Fc = np.zeros((args.dataset_cls.num_classes)) for vi, data in enumerate(val_loader): input, mask, edge, img_names = data assert len(input.size()) == 4 and len(mask.size()) == 3 assert input.size()[2:] == mask.size()[1:] h, w = mask.size()[1:] batch_pixel_size = input.size(0) * input.size(2) * input.size(3) input, mask_cuda, edge_cuda = input.cuda(), mask.cuda(), edge.cuda() with torch.no_grad(): seg_out, edge_out = net(input) seg_predictions = seg_out.data.max(1)[1].cpu() edge_predictions = edge_out.max(1)[0].cpu() logging.info('evaluating: %d / %d' % (vi + 1, len(val_loader))) ''' _Fpc, _Fc = eval_mask_boundary(seg_predictions.numpy(), mask.numpy(), args.dataset_cls.num_classes, bound_th=float(thresh)) Fc += _Fc Fpc += _Fpc logging.info('F_Score: ' + str(np.sum(Fpc/Fc)/args.dataset_cls.num_classes)) ''' IOU_acc += fast_hist(seg_predictions.numpy().flatten(), mask.numpy().flatten(), args.dataset_cls.num_classes) del seg_out, edge_out, vi, data acc = np.diag(IOU_acc).sum() / IOU_acc.sum() acc_cls = np.diag(IOU_acc) / IOU_acc.sum(axis=1) acc_cls = np.nanmean(acc_cls) iu = np.diag(IOU_acc) / (IOU_acc.sum(axis=1) + IOU_acc.sum(axis=0) - np.diag(IOU_acc)) freq = IOU_acc.sum(axis=1) / IOU_acc.sum() mean_iu = np.nanmean(iu) fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() #logging.info('F_Score: ' + str(np.sum(Fpc/Fc)/args.dataset_cls.num_classes)) #logging.info('F_Score (Classwise): ' + str(Fpc/Fc)) results = { "mean_iu": mean_iu, "acc": acc, "acc_cls": acc_cls, "fwavacc": fwavacc } return results
def eval_minibatch(data, net, criterion, val_loss, calc_metrics, args, val_idx): """ Evaluate a single minibatch of images. * calculate metrics * dump images There are two primary multi-scale inference types: 1. 'MSCALE', or in-model multi-scale: where the multi-scale iteration loop is handled within the model itself (see networks/mscale.py -> nscale_forward()) 2. 'multi_scale_inference', where we use Averaging to combine scales """ torch.cuda.empty_cache() scales = [args.default_scale] if args.multi_scale_inference: scales.extend([float(x) for x in args.extra_scales.split(',')]) if val_idx == 0: logx.msg( f'Using multi-scale inference (AVGPOOL) with scales {scales}') # input = torch.Size([1, 3, h, w]) # gt_image = torch.Size([1, h, w]) images, gt_image, img_names, scale_float = data assert len(images.size()) == 4 and len(gt_image.size()) == 3 assert images.size()[2:] == gt_image.size()[1:] batch_pixel_size = images.size(0) * images.size(2) * images.size(3) input_size = images.size(2), images.size(3) if args.do_flip: # By ending with flip=0, we insure that the images that are dumped # out correspond to the unflipped versions. A bit hacky. flips = [1, 0] else: flips = [0] with torch.no_grad(): output = 0.0 for flip in flips: for scale in scales: if flip == 1: inputs = flip_tensor(images, 3) else: inputs = images infer_size = [round(sz * scale) for sz in input_size] if scale != 1.0: inputs = resize_tensor(inputs, infer_size) inputs = {'images': inputs, 'gts': gt_image} inputs = {k: v.cuda() for k, v in inputs.items()} # Expected Model outputs: # required: # 'pred' the network prediction, shape (1, 19, h, w) # # optional: # 'pred_*' - multi-scale predictions from mscale model # 'attn_*' - multi-scale attentions from mscale model output_dict = net(inputs) _pred = output_dict['pred'] # save AVGPOOL style multi-scale output for visualizing if not cfg.MODEL.MSCALE: scale_name = fmt_scale('pred', scale) output_dict[scale_name] = _pred # resize tensor down to 1.0x scale in order to combine # with other scales of prediction if scale != 1.0: _pred = resize_tensor(_pred, input_size) if flip == 1: output = output + flip_tensor(_pred, 3) else: output = output + _pred output = output / len(scales) / len(flips) assert_msg = 'output_size {} gt_cuda size {}' gt_cuda = gt_image.cuda() assert_msg = assert_msg.format(output.size()[2:], gt_cuda.size()[1:]) assert output.size()[2:] == gt_cuda.size()[1:], assert_msg assert output.size()[1] == cfg.DATASET.NUM_CLASSES, assert_msg # Update loss and scoring datastructure if calc_metrics: val_loss.update( criterion(output, gt_image.cuda()).item(), batch_pixel_size) output_data = torch.nn.functional.softmax(output, dim=1).cpu().data max_probs, predictions = output_data.max(1) # Assemble assets to visualize assets = {} for item in output_dict: if 'attn_' in item: assets[item] = output_dict[item] if 'pred_' in item: smax = torch.nn.functional.softmax(output_dict[item], dim=1) _, pred = smax.data.max(1) assets[item] = pred.cpu().numpy() predictions = predictions.numpy() assets['predictions'] = predictions assets['prob_mask'] = max_probs if calc_metrics: assets['err_mask'] = calc_err_mask_all(predictions, gt_image.numpy(), cfg.DATASET.NUM_CLASSES) _iou_acc = fast_hist(predictions.flatten(), gt_image.numpy().flatten(), cfg.DATASET.NUM_CLASSES) return assets, _iou_acc
def inf(self, imgs, img_names, gt, inference, net, scales, pbar, base_img, pos): ###################################################################### # Run inference ###################################################################### self.img_name = img_names[0] col_img_name = '{}/{}_color.png'.format(self.rgb_path, self.img_name) pred_img_name = '{}/{}.png'.format(self.pred_path, self.img_name) diff_img_name = '{}/{}_diff.png'.format(self.diff_path, self.img_name) compose_img_name = '{}/{}_compose.png'.format(self.compose_path, self.img_name) to_pil = transforms.ToPILImage() if self.inference_mode == 'pooling': img = imgs pool_base_img = to_pil(base_img[0]) else: img = to_pil(imgs[0]) prediction_pre_argmax_collection = inference(net, img, scales, pos) # print(len(prediction_pre_argmax_collection)) # print(prediction_pre_argmax_collection[0].shape) if self.inference_mode == 'pooling': prediction = prediction_pre_argmax_collection prediction = np.concatenate(prediction, axis=0) else: prediction_pre_argmax = np.mean(prediction_pre_argmax_collection, axis=0) prediction = np.argmax(prediction_pre_argmax, axis=0) if self.metrics: self.hist += fast_hist(prediction.flatten(), gt.cpu().numpy().flatten(), self.dataset_cls.num_classes) iou_w = round(np.nanmean(per_class_iu(self.hist)) * 100, 2) # acc_w = np.diag(self.hist).sum() / self.hist.sum() H, W = prediction.shape pred_split = np.split(prediction, [H // 4, (H // 4) * 2, (H // 4) * 3], axis=0) gt_split = np.split(gt.cpu().numpy(), [H // 4, (H // 4) * 2, (H // 4) * 3], axis=1) self.hist_up += fast_hist(pred_split[0].flatten(), gt_split[0].flatten(), self.dataset_cls.num_classes) iou_u = round(np.nanmean(per_class_iu(self.hist_up)) * 100, 2) # acc_u = np.diag(self.hist_up).sum() / self.hist_up.sum() self.hist_mid1 += fast_hist(pred_split[1].flatten(), gt_split[1].flatten(), self.dataset_cls.num_classes) iou_m1 = round(np.nanmean(per_class_iu(self.hist_mid1)) * 100, 2) # acc_m1 = np.diag(self.hist_mid1).sum() / self.hist_mid1.sum() self.hist_mid2 += fast_hist(pred_split[2].flatten(), gt_split[2].flatten(), self.dataset_cls.num_classes) iou_m2 = round(np.nanmean(per_class_iu(self.hist_mid2)) * 100, 2) # acc_m2 = np.diag(self.hist_mid2).sum() / self.hist_mid2.sum() self.hist_down += fast_hist(pred_split[3].flatten(), gt_split[3].flatten(), self.dataset_cls.num_classes) iou_d = round(np.nanmean(per_class_iu(self.hist_down)) * 100, 2) # acc_d = np.diag(self.hist_down).sum() / self.hist_down.sum() pbar.set_description( "Mean IOU (Whole,Up,Mid1,Mid2,DOWN): %s %s %s %s %s" % (str(iou_w), str(iou_u), str(iou_m1), str(iou_m2), str(iou_d))) # pbar.set_description("ACC (Whole,Up,Mid,DOWN): %s %s %s %s" % (str(acc_w), str(acc_u), str(acc_m), str(acc_d))) ###################################################################### # Dump Images ###################################################################### if self.write_image: if self.inference_mode == 'pooling': img = pool_base_img colorized = self.dataset_cls.colorize_mask(prediction) colorized.save(col_img_name) blend = Image.blend(img.convert("RGBA"), colorized.convert("RGBA"), 0.5) blend.save(compose_img_name) if gt is not None: gt = gt[0].cpu().numpy() # only write diff image if gt is valid diff = (prediction != gt) diff[gt == 255] = 0 diffimg = Image.fromarray(diff.astype('uint8') * 255) PIL.ImageChops.lighter( blend, PIL.ImageOps.invert(diffimg).convert("RGBA")).save( diff_img_name) label_out = np.zeros_like(prediction) for label_id, train_id in self.dataset_cls.id_to_trainid.items(): label_out[np.where(prediction == train_id)] = label_id cv2.imwrite(pred_img_name, label_out)
def validate(val_loader, net, criterion1, criterion2, optim, curr_epoch, writer): """ Runs the validation loop after each training epoch val_loader: Data loader for validation net: thet network criterion: loss fn optimizer: optimizer curr_epoch: current epoch writer: tensorboard writer return: val_avg for step function if required """ net.eval() val_loss1 = AverageMeter() val_loss2 = AverageMeter() iou_acc1 = 0 iou_acc2 = 0 dump_images = [] for val_idx, data in enumerate(val_loader): inputs1, gt_image1, img_names1, inputs2, gt_image2, img_names2 = data assert len(inputs1.size()) == 4 and len(gt_image1.size()) == 3 assert inputs1.size()[2:] == gt_image1.size()[1:] assert len(inputs2.size()) == 4 and len(gt_image2.size()) == 3 assert inputs2.size()[2:] == gt_image2.size()[1:] batch_pixel_size1 = inputs1.size(0) * inputs1.size(2) * inputs1.size(3) batch_pixel_size2 = inputs2.size(0) * inputs2.size(2) * inputs2.size(3) inputs1, gt_cuda1 = inputs1.cuda(), gt_image1.cuda() inputs2, gt_cuda2 = inputs2.cuda(), gt_image2.cuda() with torch.no_grad(): output1 = net(inputs1, task='semantic') # output = (1, 19, 713, 713) output2 = net(inputs2, task='traversability') # output = (1, 19, 713, 713) assert output1.size()[2:] == gt_image1.size()[1:] assert output1.size()[1] == args.dataset_cls.num_classes1 assert output2.size()[2:] == gt_image2.size()[1:] assert output2.size()[1] == args.dataset_cls.num_classes2 val_loss1.update( criterion1(output1, gt_cuda1).item(), batch_pixel_size1) val_loss2.update( criterion2(output2, gt_cuda2).item(), batch_pixel_size2) predictions1 = output1.data.max(1)[1].cpu() predictions2 = output2.data.max(1)[1].cpu() # Logging if val_idx % 20 == 0: if args.local_rank == 0: logging.info("validating: %d / %d", val_idx + 1, len(val_loader)) if val_idx > 10 and args.test_mode: break # Image Dumps # if val_idx < 30: # dump_images.append([gt_image, predictions1, predictions2, img_names]) iou_acc1 += fast_hist(predictions1.numpy().flatten(), gt_image1.numpy().flatten(), args.dataset_cls.num_classes1) iou_acc2 += fast_hist(predictions2.numpy().flatten(), gt_image2.numpy().flatten(), args.dataset_cls.num_classes2) del output1, output2, val_idx, data if args.apex: iou_acc_tensor1 = torch.cuda.FloatTensor(iou_acc1) torch.distributed.all_reduce(iou_acc_tensor1, op=torch.distributed.ReduceOp.SUM) iou_acc1 = iou_acc_tensor1.cpu().numpy() iou_acc_tensor2 = torch.cuda.FloatTensor(iou_acc2) torch.distributed.all_reduce(iou_acc_tensor2, op=torch.distributed.ReduceOp.SUM) iou_acc2 = iou_acc_tensor2.cpu().numpy() if args.local_rank == 0: evaluate_eval(args, net, optim, val_loss1, val_loss2, iou_acc1, iou_acc2, dump_images, writer, curr_epoch, args.dataset_cls) return val_loss1.avg
def eval_minibatch(data, net, criterion, val_loss, calc_metrics, args, val_idx): """ Evaluate a single minibatch of images. * calculate metrics * dump images There are two primary multi-scale inference types: 1. 'MSCALE', or in-model multi-scale: where the multi-scale iteration loop is handled within the model itself (see networks/mscale.py -> nscale_forward()) 2. 'multi_scale_inference', where we use Averaging to combine scales """ torch.cuda.empty_cache() scales = [args.default_scale] if args.multi_scale_inference: scales.extend([float(x) for x in args.extra_scales.split(',')]) if val_idx == 0: logx.msg( f'Using multi-scale inference (AVGPOOL) with scales {scales}') # input = torch.Size([1, 3, h, w]) # gt_image = torch.Size([1, h, w]) ori_images, gt_image, img_names, scale_float = data if len(gt_image.size()) == 4: # if input is the image, we construct zero gt for the image. (This should only happen for test mode, where there is no gt.) gt_image = gt_image.new_zeros(gt_image.size()[:-1]) assert len(ori_images.size()) == 4 and len(gt_image.size()) == 3 assert ori_images.size()[2:] == gt_image.size()[1:] batch_pixel_size = ori_images.size(0) * ori_images.size( 2) * ori_images.size(3) input_size = ori_images.size(2), ori_images.size(3) if args.do_flip: # By ending with flip=0, we insure that the images that are dumped # out correspond to the unflipped versions. A bit hacky. flips = [1, 0] else: flips = [0] #TODO add to config. max_crop_size = (args.crop_size[0], args.crop_size[1]) m_h, m_w = max_crop_size crop_overlaps = (args.crop_overlap[0], args.crop_overlap[1]) h_sp, w_sp = max_crop_size[0] - crop_overlaps[0], max_crop_size[ 1] - crop_overlaps[1] assert h_sp > 0 and w_sp > 0, "crop size should be larger than crop overlaps." output = 0.0 with torch.no_grad(): for flip in flips: for scale in scales: if flip == 1: inputs = flip_tensor(ori_images, 3) else: inputs = ori_images infer_size = [round(sz * scale) for sz in input_size] if scale != 1.0: inputs = resize_tensor(inputs, infer_size) n, c, h, w = inputs.size(0), inputs.size(1), inputs.size( 2), inputs.size(3) if crop_overlaps[0] > 0: h_n = (h - max_crop_size[0] - 1) // h_sp + 2 else: h_n = (h - 1) // max_crop_size[0] + 1 if crop_overlaps[1] > 0: w_n = (w - max_crop_size[1] - 1) // w_sp + 2 else: w_n = (w - 1) // max_crop_size[1] + 1 if h_n > 1 and crop_overlaps[0] == 0: h_sp = (h - max_crop_size[0]) // (h_n - 1) if w_n > 1 and crop_overlaps[1] == 0: w_sp = (w - max_crop_size[1]) // (w_n - 1) full_output_dict = None weights = None for i in range(h_n): for j in range(w_n): if i != h_n - 1 and j != w_n - 1: h0, h1 = [i * h_sp, i * h_sp + m_h] w0, w1 = [j * w_sp, j * w_sp + m_w] elif i != h_n - 1 and j == w_n - 1: h0, h1 = [i * h_sp, i * h_sp + m_h] w0, w1 = [w - m_w, w] elif i == h_n - 1 and j != w_n - 1: h0, h1 = [h - m_h, h] w0, w1 = [j * w_sp, j * w_sp + m_w] else: h0, h1 = [h - m_h, h] w0, w1 = [w - m_w, w] temp_inputs = inputs[:, :, h0:h1, w0:w1] temp_gt_image = gt_image[:, h0:h1, w0:w1] temp_inputs = { 'images': temp_inputs, 'gts': temp_gt_image } temp_inputs = { k: v.cuda() for k, v in temp_inputs.items() } # Expected Model outputs: # required: # 'pred' the network prediction, shape (1, 19, h, w) # # optional: # 'pred_*' - multi-scale predictions from mscale model # 'attn_*' - multi-scale attentions from mscale model output_dict = net(temp_inputs) _pred = output_dict['pred'] if full_output_dict is None: full_output_dict = {} weights = _pred.new_zeros((n, 1, h, w)) for k, v in output_dict.items(): #TODO need to finish the rest of the keys? if k != 'pred': continue full_output_dict[k] = v.new_zeros( *(v.shape[:-2]), h, w) weights[:, :, h0:h1, w0:w1] += _pred.new_ones(1) for k, v in output_dict.items(): # TODO need to finish the rest of the keys? if k != 'pred': continue full_output_dict[k][:, :, h0:h1, w0:w1] += output_dict[k] for k, v in full_output_dict.items(): full_output_dict[k] = v / weights _pred = full_output_dict['pred'] # save AVGPOOL style multi-scale output for visualizing if not cfg.MODEL.MSCALE: scale_name = fmt_scale('pred', scale) output_dict[scale_name] = _pred # resize tensor down to 1.0x scale in order to combine # with other scales of prediction if scale != 1.0: _pred = resize_tensor(_pred, input_size) if flip == 1: output = output + flip_tensor(_pred, 3) else: output = output + _pred output = output / len(scales) / len(flips) assert_msg = 'output_size {} gt_cuda size {}' gt_cuda = gt_image.cuda() assert_msg = assert_msg.format(output.size()[2:], gt_cuda.size()[1:]) assert output.size()[2:] == gt_cuda.size()[1:], assert_msg assert output.size()[1] == cfg.DATASET.NUM_CLASSES, assert_msg # Update loss and scoring datastructure if calc_metrics: val_loss.update( criterion(output, gt_image.cuda()).item(), batch_pixel_size) output_data = torch.nn.functional.softmax(output, dim=1).cpu().data max_probs, predictions = output_data.max(1) # Assemble assets to visualize assets = {} for item in output_dict: if 'attn_' in item: assets[item] = output_dict[item] if 'pred_' in item: smax = torch.nn.functional.softmax(output_dict[item], dim=1) _, pred = smax.data.max(1) assets[item] = pred.cpu().numpy() predictions = predictions.numpy() assets['predictions'] = predictions assets['prob_mask'] = max_probs if calc_metrics: assets['err_mask'] = calc_err_mask_all(predictions, gt_image.numpy(), cfg.DATASET.NUM_CLASSES) _iou_acc = fast_hist(predictions.flatten(), gt_image.numpy().flatten(), cfg.DATASET.NUM_CLASSES) return assets, _iou_acc
def main(): if args.dataset=='robotic_instrument': from datasets.robotic_instrument import get_testloader, RoboticInstrument if args.task=='binary': num_classes = 2 elif args.task=='parts': num_classes = 5 elif args.task=='type': num_classes = 8 dataset = RoboticInstrument(args.task, 'test') test_loader = get_testloader(args.task, batch_size=args.batch_size) net_param = {"class_num" : num_classes, "in_chns" : 3, "bilinear" : True, "feature_chns": [16, 32, 64, 128, 256], "dropout" : [0.0, 0.0, 0.3, 0.4, 0.5]} elif args.dataset=='covid19_lesion': from datasets.covid19_lesion import get_testloader, Covid19Dataset dataset = Covid19Dataset(args.task, 'test') test_loader = get_testloader(args.task, batch_size=args.batch_size) num_classes = 2 net_param = {"class_num" : num_classes, "in_chns" : 1, "bilinear" : True, "feature_chns": [16, 32, 64, 128, 256], "dropout" : [0.0, 0.0, 0.3, 0.4, 0.5]} else: raise NotImplementedError('The dataset is not supported.') net = COPLENet(net_param).cuda() optimizer.load_weights(net, None, None, args.snapshot, False) torch.cuda.empty_cache() net.eval() hist = 0 predictions = [] groundtruths = [] for test_idx, data in enumerate(test_loader): inputs, gts = data assert len(inputs.size()) == 4 and len(gts.size()) == 3 assert inputs.size()[2:] == gts.size()[1:] inputs, gts = inputs.cuda(), gts.cuda() with torch.no_grad(): output = net(inputs) del inputs assert output.size()[2:] == gts.size()[1:] assert output.size()[1] == num_classes prediction = output.data.max(1)[1].cpu() predictions.append(output.data.cpu().numpy()) groundtruths.append(gts.cpu().numpy()) hist += fast_hist(prediction.numpy().flatten(), gts.cpu().numpy().flatten(), num_classes) del gts, output, test_idx, data predictions = np.concatenate(predictions, axis=0) groundtruths = np.concatenate(groundtruths, axis=0) if args.dump_imgs: assert len(dataset)==predictions.shape[0] dump_dir = './dump_' + args.dataset + '_' + args.task + '_' + args.method os.makedirs(dump_dir, exist_ok=True) for i in range(len(dataset)): img = skimage.io.imread(dataset.img_paths[i]) if len(img.shape)==2: img = np.stack((img, img, img), axis=2) img = skimage.transform.resize(img, (224,336)) cm = np.argmax(predictions[i,:,:,:], axis=0) color_cm = add_color(cm) color_cm = skimage.transform.resize(color_cm, (224,336)) gt = np.asarray(groundtruths[i,:,:], np.uint8) color_gt = add_color(gt) color_gt = skimage.transform.resize(color_gt, (224,336)) blend_pred = 0.5 * img + 0.5 * color_cm blend_gt = 0.5 * img + 0.5 * color_gt blend_pred = np.asarray(blend_pred*255, np.uint8) blend_gt = np.asarray(blend_gt*255, np.uint8) #skimage.io.imsave(os.path.join(dump_dir, 'img_{:03d}.png'.format(i)), img) skimage.io.imsave(os.path.join(dump_dir, 'pred_{:03d}.png'.format(i)), blend_pred) skimage.io.imsave(os.path.join(dump_dir, 'gt_{:03d}.png'.format(i)), blend_gt) if i > 20: break acc = np.diag(hist).sum() / hist.sum() acc_cls = np.diag(hist) / hist.sum(axis=1) iou = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) id2cat = {i: i for i in range(len(iou))} iou_false_positive = hist.sum(axis=1) - np.diag(hist) iou_false_negative = hist.sum(axis=0) - np.diag(hist) iou_true_positive = np.diag(hist) print('IoU:') print('label_id label IoU Precision Recall TP FP FN Pixel Acc.') for idx, i in enumerate(iou): idx_string = "{:2d}".format(idx) class_name = "{:>13}".format(id2cat[idx]) if idx in id2cat else '' iou_string = '{:5.1f}'.format(i * 100) total_pixels = hist.sum() tp = '{:5.1f}'.format(100 * iou_true_positive[idx] / total_pixels) fp = '{:5.1f}'.format(100 * iou_false_positive[idx] / total_pixels) fn = '{:5.1f}'.format(100 * iou_false_negative[idx] / total_pixels) precision = '{:5.1f}'.format( iou_true_positive[idx] / (iou_true_positive[idx] + iou_false_positive[idx])) recall = '{:5.1f}'.format( iou_true_positive[idx] / (iou_true_positive[idx] + iou_false_negative[idx])) pixel_acc = '{:5.1f}'.format(100*acc_cls[idx]) print('{} {} {} {} {} {} {} {} {}'.format( idx_string, class_name, iou_string, precision, recall, tp, fp, fn, pixel_acc))