def run_inference(self, imgs_minibatch): with torch.no_grad(): if self.cfg.TEST.BBOX_AUG.ENABLED: output = im_detect_bbox_aug(self.model, imgs_minibatch, self.device) else: output = self.model(imgs_minibatch.to(self.device)) if len(output): output = [o.to("cpu") for o in output] return output
def compute_on_dataset(model, data_loader, device, timer=None): model.eval() results_dict = {} cpu_device = torch.device("cpu") for _, batch in enumerate(tqdm(data_loader)): images, targets, image_ids = batch with torch.no_grad(): if timer: timer.tic() if cfg.TEST.BBOX_AUG.ENABLED: output = im_detect_bbox_aug(model, images, device) else: output = model(images.to(device)) if timer: if not cfg.MODEL.DEVICE == 'cpu': torch.cuda.synchronize() timer.toc() output = [o.to(cpu_device) for o in output] results_dict.update( {img_id: result for img_id, result in zip(image_ids, output)} ) return results_dict
def next_data(self, skip_num=0): while skip_num > 0: idx, batch = next(self.progress) skip_num -= 1 idx, batch = next(self.progress) images, targets, image_ids = batch # compability kwargs = dict() if isinstance(images, dict): tmp = images.pop("images") for k in images: images[k] = (images[k]).to(self.device) kwargs.update(images) images = tmp images = images.to(self.device) if isinstance(targets, dict): tmp = targets.pop("targets") # for k in targets: # targets[k] = [target.to(self.device) for target in targets[k]] kwargs.update(targets) targets = tmp # targets = [target.to(self.device) for target in targets] # process image with torch.no_grad(): if self.cfg.TEST.BBOX_AUG.ENABLED: result = im_detect_bbox_aug(self.model, images, self.device) else: result = self.model(images, targets, **kwargs) predictions = [o.to(torch.device("cpu")) for o in result["result"]] # always single image is passed at a time prediction = predictions[0] target, image_id = targets[0], image_ids[0] img_info = self.data_loader.dataset.get_img_info(image_id) image = cv2.imread( os.path.join(self.data_loader.dataset.root, img_info['file_name'])) if len(target) == 0: return image, image # reshape prediction (a BoxList) into the original image size width = img_info["width"] height = img_info["height"] focal_length = img_info["camera_params"]["intrinsic"]["fx"] baseline = img_info["camera_params"]["extrinsic"]["baseline"] # if img_info.get("calib"): # kitti_calib = img_info["calib"] # else: # kitti_calib = None kitti_calib = img_info["calib"] if img_info.get("calib") else None prediction = prediction.resize((width, height)) target = target.resize((width, height)) depth_mode = self.data_loader.dataset.output_depth_mode if hasattr( self.data_loader.dataset, "output_depth_mode") else "depth" # when lr head is on, use union # if self.cfg.MODEL.USE_LR_ROI_HEADS: # target = target.convert("xyxy") # disps = target.get_field("depths").convert("disp").depths # target.bbox[:,0] -= disps # prediction = prediction.convert("xyxy") # depths = PointDepth(prediction.get_field("depths")[0], prediction.size, focal_length=target.get_field("depths").focal_length, baseline=target.get_field("depths").baseline, mode="depth") # disps = depths.convert("disp").depths # prediction.bbox[:,0] -= disps if self.cfg.MODEL.DEPTHNET_ON: # get disparity disp_output = result["disparity"][0] disp_output *= width / 256 # to numpy disp_img = disp_output.cpu().numpy().transpose(1, 2, 0) cv2.imshow("disparity", disp_img) # disp_gt = cv2.imread(os.path.join(self.data_loader.dataset.root, img_info['disp_file_name']), -cv2.IMREAD_ANYDEPTH) # disp_gt = (disp_gt-1)/256./256. # cv2.imshow("disparity_gt", disp_gt) # cv2.waitKey() if self.cfg.MODEL.RPN_ONLY: return None, None if prediction.has_field("mask"): # if we have masks, paste the masks in the right position # in the image, as defined by the bounding boxes masks = prediction.get_field("mask") # always single image is passed at a time masks = self.masker([masks], [prediction])[0] prediction.add_field("mask", masks) top_predictions = self.select_top_predictions(prediction) # print(top_predictions, target) # top_predictions = _height_to_depth(top_predictions, img_info) # if self.cfg.MODEL.DEPTH_ON or self.cfg.MODEL.BOX3D_ON: # et_match, gt_match = self.match_datas(top_predictions, target) # et_depths = top_predictions.get_field("depths").tolist() # gt_depths = target.get_field("depths") # if isinstance(gt_depths, PointDepth): # gt_depths = gt_depths.depths.tolist() # else: # gt_depths = gt_depths.tolist() # for i,j in zip(et_match, gt_match): # # print(et_depths[i], gt_depths[j]) # # abs_err = math.fabs(1/et_depths[i][0]- 1/gt_depths[j]) * focal_length # 1 / depth # abs_err = math.fabs(et_depths[i][0]- gt_depths[j]) #/ width * focal_length # disp_unity # rel_err = math.fabs(et_depths[i][0]- gt_depths[j]) / gt_depths[j] # disp_unity # self.error_stats.append({ # "abs_err": abs_err, # "rel_err": rel_err, # "gt": gt_depths[j] # }) # self.depth_abs_errors.append(abs_err) # self.depth_rel_errors.append(rel_err) # if len(self.depth_abs_errors)>0: # print(sum(self.depth_abs_errors) / len(self.depth_abs_errors)) # print(sum(self.depth_rel_errors) / len(self.depth_rel_errors)) if not self.test_only: result = image.copy() if self.show_mask_heatmaps: return self.create_mask_montage(result, top_predictions) result = self.overlay_boxes(result, top_predictions) if self.cfg.MODEL.MASK_ON: result = self.overlay_mask(result, top_predictions) # if self.cfg.MODEL.KEYPOINT_ON: # result = self.overlay_keypoints(result, top_predictions) if self.cfg.MODEL.DEPTH_ON: result = self.overlay_depth(result, top_predictions, img_info=img_info, depth_mode=depth_mode) if self.cfg.MODEL.BOX3D_ON: result = self.overlay_box3d(result, top_predictions, calib=kitti_calib) result = self.overlay_class_names(result, top_predictions) # process ground truth # gt_pred = [] # for obj in target: boxes = target.bbox if target.has_field("masks"): masks = target.get_field("masks").get_mask_tensor() # masks = self.masker(masks, boxes) target.add_field("mask", masks) if target.has_field("keypoints"): # add logits keypoints = target.get_field("keypoints") keypoints.add_field( "logits", torch.tensor([[1.] * len(keypoints)] * len(boxes))) target.add_field("keypoints", keypoints) if target.has_field("depths"): depths = target.get_field("depths") if isinstance(depths, PointDepth): depths = depths.depths target.add_field("depths", depths) target.add_field("scores", torch.tensor([1.] * len(boxes))) # target = _height_to_depth(target, img_info) if not self.test_only: result_gt = image.copy() if self.show_mask_heatmaps: return self.create_mask_montage(result_gt, target) result_gt = self.overlay_boxes(result_gt, target) if self.cfg.MODEL.MASK_ON: result_gt = self.overlay_mask(result_gt, target) # if self.cfg.MODEL.KEYPOINT_ON: # result_gt = self.overlay_keypoints(result_gt, target) if self.cfg.MODEL.DEPTH_ON: result_gt = self.overlay_depth(result_gt, target, img_info=img_info, depth_mode=depth_mode) if self.cfg.MODEL.BOX3D_ON: result_gt = self.overlay_box3d(result_gt, target, calib=kitti_calib) result_gt = self.overlay_class_names(result_gt, target) return result, result_gt return None, None
def do_coco_compute_and_evalute( model, data_loader, device, output_folder, ): model.eval() dataset = data_loader.dataset masker = Masker(threshold=0.5, padding=1) cpu_device = torch.device("cpu") logger = logging.getLogger("maskrcnn_benchmark.inference") logger.info("Preparing results for COCO format") coco_results = {"bbox": [], "segm": []} for _, batch in enumerate(tqdm(data_loader)): images, targets, image_ids = batch with torch.no_grad(): if cfg.TEST.BBOX_AUG.ENABLED: predictions = im_detect_bbox_aug(model, images, device) else: predictions = model(images.to(device)) predictions = [p.to(cpu_device) for p in predictions] for image_id, prediction, target in zip(image_ids, predictions, targets): original_id = dataset.id_to_img_map[image_id] img_info = dataset.get_img_info(image_id) w = img_info["width"] h = img_info["height"] prediction = prediction.resize((w, h)) prediction = prediction.convert("xywh") boxes = prediction.bbox.tolist() scores = prediction.get_field("scores").tolist() classes = prediction.get_field("labels").tolist() masks = prediction.get_field("masks") if isinstance(masks, SegmentationMask): masks = masks.get_mask_tensor(do_squeeze=False)[:, None] # Masker is necessary only if masks haven't been already resized. if list(masks.shape[-2:]) != [h, w]: masks = masker(masks.expand(1, -1, -1, -1, -1), prediction) masks = masks[0] mapped_labels = [ dataset.contiguous_category_id_to_json_id[i] for i in classes ] rles = convert_binary_to_rle(masks.cpu()) coco_results["bbox"].extend([{ "image_id": original_id, "category_id": mapped_labels[k], "bbox": box, "score": scores[k], } for k, box in enumerate(boxes)]) coco_results["segm"].extend([{ "image_id": original_id, "category_id": mapped_labels[k], "segmentation": rle, "score": scores[k], } for k, rle in enumerate(rles)]) iou_types = ["bbox", "segm"] results = COCOResults(*iou_types) logger.info("Evaluating predictions") for iou_type in iou_types: with tempfile.NamedTemporaryFile() as f: file_path = f.name if output_folder: file_path = os.path.join(output_folder, iou_type + ".json") res = evaluate_predictions_on_coco(dataset.coco, coco_results[iou_type], file_path, dataset.ids, iou_type) results.update(res) logger.info(results)