def test_vis(): dset_name = sys.argv[1] assert dset_name in DatasetCatalog.list() meta = MetadataCatalog.get(dset_name) dprint("MetadataCatalog: ", meta) objs = meta.objs t_start = time.perf_counter() dicts = DatasetCatalog.get(dset_name) logger.info("Done loading {} samples with {:.3f}s.".format(len(dicts), time.perf_counter() - t_start)) dirname = "output/{}-data-vis".format(dset_name) os.makedirs(dirname, exist_ok=True) for d in dicts: img = read_image_cv2(d["file_name"], format="BGR") depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 anno = d["annotations"][0] # only one instance per image imH, imW = img.shape[:2] mask = cocosegm2mask(anno["segmentation"], imH, imW) bbox = anno["bbox"] bbox_mode = anno["bbox_mode"] bbox_xyxy = np.array(BoxMode.convert(bbox, bbox_mode, BoxMode.XYXY_ABS)) kpt3d = anno["bbox3d_and_center"] quat = anno["quat"] trans = anno["trans"] R = quat2mat(quat) # 0-based label cat_id = anno["category_id"] K = d["cam"] kpt_2d = misc.project_pts(kpt3d, K, R, trans) # # TODO: visualize pose and keypoints label = objs[cat_id] # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) img_vis = vis_image_mask_bbox_cv2(img, [mask], bboxes=[bbox_xyxy], labels=[label]) img_vis_kpt2d = img.copy() img_vis_kpt2d = misc.draw_projected_box3d( img_vis_kpt2d, kpt_2d, middle_color=None, bottom_color=(128, 128, 128) ) xyz_info = mmcv.load(anno["xyz_path"]) xyz = np.zeros((imH, imW, 3), dtype=np.float32) xyz_crop = xyz_info["xyz_crop"].astype(np.float32) x1, y1, x2, y2 = xyz_info["xyxy"] xyz[y1 : y2 + 1, x1 : x2 + 1, :] = xyz_crop xyz_show = get_emb_show(xyz) grid_show( [img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpt2d[:, :, [2, 1, 0]], depth, xyz_show], ["img", "vis_img", "img_vis_kpts2d", "depth", "emb_show"], row=2, col=3, )
def test_vis(): dset_name = sys.argv[1] assert dset_name in DatasetCatalog.list() meta = MetadataCatalog.get(dset_name) dprint("MetadataCatalog: ", meta) objs = meta.objs t_start = time.perf_counter() dicts = DatasetCatalog.get(dset_name) logger.info("Done loading {} samples with {:.3f}s.".format( len(dicts), time.perf_counter() - t_start)) dirname = "output/{}-data-vis".format(dset_name) os.makedirs(dirname, exist_ok=True) for d in dicts: img = read_image_cv2(d["file_name"], format="BGR") depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 imH, imW = img.shape[:2] annos = d["annotations"] masks = [ cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos ] bboxes = [anno["bbox"] for anno in annos] bbox_modes = [anno["bbox_mode"] for anno in annos] bboxes_xyxy = np.array([ BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes) ]) kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] quats = [anno["quat"] for anno in annos] transes = [anno["trans"] for anno in annos] Rs = [quat2mat(quat) for quat in quats] # 0-based label cat_ids = [anno["category_id"] for anno in annos] K = d["cam"] kpts_2d = [ misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes) ] # # TODO: visualize pose and keypoints labels = [objs[cat_id] for cat_id in cat_ids] for _i in range(len(annos)): img_vis = vis_image_mask_bbox_cv2(img, masks[_i:_i + 1], bboxes=bboxes_xyxy[_i:_i + 1], labels=labels[_i:_i + 1]) img_vis_kpts2d = misc.draw_projected_box3d(img_vis.copy(), kpts_2d[_i]) if "test" not in dset_name: xyz_path = annos[_i]["xyz_path"] xyz_info = mmcv.load(xyz_path) x1, y1, x2, y2 = xyz_info["xyxy"] xyz_crop = xyz_info["xyz_crop"].astype(np.float32) xyz = np.zeros((imH, imW, 3), dtype=np.float32) xyz[y1:y2 + 1, x1:x2 + 1, :] = xyz_crop xyz_show = get_emb_show(xyz) xyz_crop_show = get_emb_show(xyz_crop) img_xyz = img.copy() / 255.0 mask_xyz = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype("uint8") fg_idx = np.where(mask_xyz != 0) img_xyz[fg_idx[0], fg_idx[1], :] = xyz_show[fg_idx[0], fg_idx[1], :3] img_xyz_crop = img_xyz[y1:y2 + 1, x1:x2 + 1, :] img_vis_crop = img_vis[y1:y2 + 1, x1:x2 + 1, :] # diff mask diff_mask_xyz = np.abs(masks[_i] - mask_xyz)[y1:y2 + 1, x1:x2 + 1] grid_show( [ img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpts2d[:, :, [2, 1, 0]], depth, # xyz_show, diff_mask_xyz, xyz_crop_show, img_xyz[:, :, [2, 1, 0]], img_xyz_crop[:, :, [2, 1, 0]], img_vis_crop, ], [ "img", "vis_img", "img_vis_kpts2d", "depth", "diff_mask_xyz", "xyz_crop_show", "img_xyz", "img_xyz_crop", "img_vis_crop", ], row=3, col=3, ) else: grid_show( [ img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpts2d[:, :, [2, 1, 0]], depth ], ["img", "vis_img", "img_vis_kpts2d", "depth"], row=2, col=2, )
def test_vis(): dset_name = sys.argv[1] assert dset_name in DatasetCatalog.list() meta = MetadataCatalog.get(dset_name) dprint("MetadataCatalog: ", meta) objs = meta.objs t_start = time.perf_counter() dicts = DatasetCatalog.get(dset_name) logger.info("Done loading {} samples with {:.3f}s.".format( len(dicts), time.perf_counter() - t_start)) dirname = "output/{}-data-vis".format(dset_name) os.makedirs(dirname, exist_ok=True) for d in dicts: img = read_image_cv2(d["file_name"], format="BGR") depth = mmcv.imread(d["depth_file"], "unchanged") / 1000.0 imH, imW = img.shape[:2] annos = d["annotations"] masks = [ cocosegm2mask(anno["segmentation"], imH, imW) for anno in annos ] bboxes = [anno["bbox"] for anno in annos] bbox_modes = [anno["bbox_mode"] for anno in annos] bboxes_xyxy = np.array([ BoxMode.convert(box, box_mode, BoxMode.XYXY_ABS) for box, box_mode in zip(bboxes, bbox_modes) ]) kpts_3d_list = [anno["bbox3d_and_center"] for anno in annos] quats = [anno["quat"] for anno in annos] transes = [anno["trans"] for anno in annos] Rs = [quat2mat(quat) for quat in quats] # 0-based label cat_ids = [anno["category_id"] for anno in annos] K = d["cam"] kpts_2d = [ misc.project_pts(kpt3d, K, R, t) for kpt3d, R, t in zip(kpts_3d_list, Rs, transes) ] # # TODO: visualize pose and keypoints labels = [objs[cat_id] for cat_id in cat_ids] # img_vis = vis_image_bboxes_cv2(img, bboxes=bboxes_xyxy, labels=labels) img_vis = vis_image_mask_bbox_cv2(img, masks, bboxes=bboxes_xyxy, labels=labels) img_vis_kpts2d = img.copy() for anno_i in range(len(annos)): img_vis_kpts2d = misc.draw_projected_box3d(img_vis_kpts2d, kpts_2d[anno_i]) grid_show( [ img[:, :, [2, 1, 0]], img_vis[:, :, [2, 1, 0]], img_vis_kpts2d[:, :, [2, 1, 0]], depth ], [f"img:{d['file_name']}", "vis_img", "img_vis_kpts2d", "depth"], row=2, col=2, )
def save_result_of_dataset(cfg, model, data_loader, output_dir, dataset_name): """ Run model (in eval mode) on the data_loader and save predictions Args: cfg: config model (nn.Module): a module which accepts an object from `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. If you wish to evaluate a model in `training` mode instead, you can wrap the given model and override its behavior of `.eval()` and `.train()`. data_loader: an iterable object with a length. The elements it generates will be the inputs to the model. Returns: The return value of `evaluator.evaluate()` """ cpu_device = torch.device("cpu") num_devices = get_world_size() logger = logging.getLogger(__name__) logger.info("Start inference on {} images".format(len(data_loader))) # NOTE: dataset name should be the same as TRAIN to get the correct meta _metadata = MetadataCatalog.get(dataset_name) data_ref = ref.__dict__[_metadata.ref_key] obj_names = _metadata.objs obj_ids = [data_ref.obj2id[obj_name] for obj_name in obj_names] result_name = "results.pkl" mmcv.mkdir_or_exist(output_dir) result_path = osp.join(output_dir, result_name) total = len(data_loader) # inference data loader must have a fixed length results = OrderedDict() VIS = False logging_interval = 50 num_warmup = min(5, logging_interval - 1, total - 1) start_time = time.perf_counter() total_compute_time = 0 with inference_context(model), torch.no_grad(): for idx, inputs in enumerate(data_loader): if idx == num_warmup: start_time = time.perf_counter() total_compute_time = 0 if VIS: images_ori = [_input["image"].clone() for _input in inputs] start_compute_time = time.perf_counter() outputs = model(inputs) # NOTE: do model inference torch.cuda.synchronize() cur_compute_time = time.perf_counter() - start_compute_time total_compute_time += cur_compute_time # NOTE: process results for i in range(len(inputs)): _input = inputs[i] output = outputs[i] cur_results = {} instances = output["instances"] HAS_MASK = False if instances.has("pred_masks"): HAS_MASK = True pred_masks = instances.pred_masks # (#objs, imH, imW) pred_masks = pred_masks.detach().cpu().numpy() # NOTE: time comsuming step rles = [ binary_mask_to_rle(pred_masks[_k]) for _k in range(len(pred_masks)) ] instances = instances.to(cpu_device) boxes = instances.pred_boxes.tensor.clone().detach().cpu( ).numpy() # xyxy scores = instances.scores.tolist() labels = instances.pred_classes.detach().cpu().numpy() obj_ids = [ data_ref.obj2id[obj_names[int(label)]] for label in labels ] ego_quats = instances.pred_ego_quats.detach().cpu().numpy() ego_rots = [ quat2mat(ego_quats[k]) for k in range(len(ego_quats)) ] transes = instances.pred_transes.detach().cpu().numpy() cur_results = { "time": cur_compute_time / len(inputs), "obj_ids": obj_ids, "scores": scores, "boxes": boxes, # xyxy "Rs": ego_rots, "ts": transes, # m } if HAS_MASK: cur_results["masks"] = rles if VIS: import cv2 from lib.vis_utils.image import vis_image_mask_bbox_cv2 image = (images_ori[i].detach().cpu().numpy().transpose( 1, 2, 0) + 0.5).astype("uint8") img_vis = vis_image_mask_bbox_cv2( image, pred_masks, boxes, labels=[obj_names[int(label)] for label in labels]) cv2.imshow("img", img_vis.astype("uint8")) cv2.waitKey() results[_input["scene_im_id"]] = cur_results if (idx + 1) % logging_interval == 0: duration = time.perf_counter() - start_time seconds_per_img = duration / (idx + 1 - num_warmup) eta = datetime.timedelta(seconds=int(seconds_per_img * (total - num_warmup) - duration)) logger.info( "Inference done {}/{}. {:.4f} s / img. ETA={}".format( idx + 1, total, seconds_per_img, str(eta))) # Measure the time only for this worker (before the synchronization barrier) total_time = int(time.perf_counter() - start_time) total_time_str = str(datetime.timedelta(seconds=total_time)) # NOTE this format is parsed by grep logger.info( "Total inference time: {} ({:.6f} s / img per device, on {} devices)". format(total_time_str, total_time / (total - num_warmup), num_devices)) total_compute_time_str = str( datetime.timedelta(seconds=int(total_compute_time))) logger.info( "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)" .format(total_compute_time_str, total_compute_time / (total - num_warmup), num_devices)) mmcv.dump(results, result_path) logger.info("Results saved to {}".format(result_path))