def __call__(self): """ Load light-weight instance annotations of all images into a list of dicts in Detectron2 format. Do not load heavy data into memory in this file, since we will load the annotations of all images into memory. """ # cache the dataset_dicts to avoid loading masks from files hashed_file_name = hashlib.md5( ("".join([str(fn) for fn in self.objs]) + "dataset_dicts_{}_{}_{}_{}_{}_{}".format( self.name, self.dataset_root, self.with_masks, self.with_depth, self.with_xyz, osp.abspath(__file__))).encode("utf-8")).hexdigest() cache_path = osp.join( self.dataset_root, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name)) if osp.exists(cache_path) and self.use_cache: logger.info("load cached dataset dicts from {}".format(cache_path)) return mmcv.load(cache_path) t_start = time.perf_counter() dataset_dicts = [] self.num_instances_without_valid_segmentation = 0 self.num_instances_without_valid_box = 0 logger.info("loading dataset dicts: {}".format(self.name)) # it is slow because of loading and converting masks to rle for scene in self.scenes: scene_id = int(scene) scene_root = osp.join(self.dataset_root, scene) gt_dict = mmcv.load(osp.join(scene_root, 'scene_gt.json')) gt_info_dict = mmcv.load(osp.join(scene_root, 'scene_gt_info.json')) cam_dict = mmcv.load(osp.join(scene_root, 'scene_camera.json')) for str_im_id in tqdm(gt_dict, postfix=f"{scene_id}"): int_im_id = int(str_im_id) rgb_path = osp.join(scene_root, "rgb/{:06d}.jpg").format(int_im_id) assert osp.exists(rgb_path), rgb_path depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id)) K = np.array(cam_dict[str_im_id]['cam_K'], dtype=np.float32).reshape(3, 3) depth_factor = 1000.0 / cam_dict[str_im_id][ 'depth_scale'] # 10000 record = { "dataset_name": self.name, 'file_name': osp.relpath(rgb_path, PROJ_ROOT), 'depth_file': osp.relpath(depth_path, PROJ_ROOT), 'height': self.height, 'width': self.width, 'image_id': int_im_id, "scene_im_id": "{}/{}".format(scene_id, int_im_id), # for evaluation "cam": K, "depth_factor": depth_factor, "img_type": 'syn_pbr' # NOTE: has background } insts = [] for anno_i, anno in enumerate(gt_dict[str_im_id]): obj_id = anno['obj_id'] if obj_id not in self.cat_ids: continue cur_label = self.cat2label[obj_id] # 0-based label R = np.array(anno['cam_R_m2c'], dtype='float32').reshape(3, 3) t = np.array(anno['cam_t_m2c'], dtype='float32') / 1000.0 pose = np.hstack([R, t.reshape(3, 1)]) quat = mat2quat(R).astype('float32') allo_q = mat2quat(egocentric_to_allocentric(pose) [:3, :3]).astype('float32') proj = (record["cam"] @ t.T).T proj = proj[:2] / proj[2] bbox_visib = gt_info_dict[str_im_id][anno_i]['bbox_visib'] bbox_obj = gt_info_dict[str_im_id][anno_i]['bbox_obj'] x1, y1, w, h = bbox_visib if self.filter_invalid: if h <= 1 or w <= 1: self.num_instances_without_valid_box += 1 continue mask_file = osp.join( scene_root, "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i)) mask_visib_file = osp.join( scene_root, "mask_visib/{:06d}_{:06d}.png".format( int_im_id, anno_i)) assert osp.exists(mask_file), mask_file assert osp.exists(mask_visib_file), mask_visib_file # load mask visib TODO: load both mask_visib and mask_full mask_single = mmcv.imread(mask_visib_file, "unchanged") area = mask_single.sum() if area < 3: # filter out too small or nearly invisible instances self.num_instances_without_valid_segmentation += 1 continue mask_rle = binary_mask_to_rle(mask_single, compressed=True) inst = { 'category_id': cur_label, # 0-based label 'bbox': bbox_visib, # TODO: load both bbox_obj and bbox_visib 'bbox_mode': BoxMode.XYWH_ABS, 'pose': pose, "quat": quat, "trans": t, "allo_quat": allo_q, "centroid_2d": proj, # absolute (cx, cy) "segmentation": mask_rle, "mask_full_file": mask_file, # TODO: load as mask_full, rle } if self.with_xyz: xyz_crop_path = mask_file.replace( "/mask/", "/xyz_crop/").replace(".png", ".pkl") assert osp.exists(xyz_crop_path), xyz_crop_path inst["xyz_crop_path"] = xyz_crop_path insts.append(inst) if len(insts) == 0: # filter im without anno continue record['annotations'] = insts dataset_dicts.append(record) if self.num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.". format(self.num_instances_without_valid_segmentation)) if self.num_instances_without_valid_box > 0: logger.warning( "Filtered out {} instances without valid box. " "There might be issues in your dataset generation process.". format(self.num_instances_without_valid_box)) ########################## if self.num_to_load > 0: self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) dataset_dicts = dataset_dicts[:self.num_to_load] logger.info("loaded {} dataset dicts, using {}s".format( len(dataset_dicts), time.perf_counter() - t_start)) mkdir_p(osp.dirname(cache_path)) mmcv.dump(dataset_dicts, cache_path, protocol=4) logger.info("Dumped dataset_dicts to {}".format(cache_path)) return dataset_dicts
def __call__(self): """ Load light-weight instance annotations of all images into a list of dicts in Detectron2 format. Do not load heavy data into memory in this file, since we will load the annotations of all images into memory. """ # cache the dataset_dicts to avoid loading masks from files hashed_file_name = hashlib.md5(( "".join([str(fn) for fn in self.objs]) + "dataset_dicts_{}_{}_{}_{}_{}".format( self.name, self.dataset_root, self.with_masks, self.with_depth, osp.abspath(__file__))).encode("utf-8")).hexdigest() cache_path = osp.join( self.dataset_root, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name)) if osp.exists(cache_path) and self.use_cache: logger.info("load cached dataset dicts from {}".format(cache_path)) return mmcv.load(cache_path) t_start = time.perf_counter() logger.info("loading dataset dicts: {}".format(self.name)) self.num_instances_without_valid_segmentation = 0 self.num_instances_without_valid_box = 0 dataset_dicts = [ ] ####################################################### im_id_global = 0 if True: targets = mmcv.load(self.ann_file) scene_im_ids = [(item["scene_id"], item["im_id"]) for item in targets] scene_im_ids = sorted(list(set(scene_im_ids))) # load infos for each scene gt_dicts = {} gt_info_dicts = {} cam_dicts = {} for scene_id, im_id in scene_im_ids: scene_root = osp.join(self.dataset_root, f"{scene_id:06d}") if scene_id not in gt_dicts: gt_dicts[scene_id] = mmcv.load( osp.join(scene_root, 'scene_gt.json')) if scene_id not in gt_info_dicts: gt_info_dicts[scene_id] = mmcv.load( osp.join(scene_root, 'scene_gt_info.json')) # bbox_obj, bbox_visib if scene_id not in cam_dicts: cam_dicts[scene_id] = mmcv.load( osp.join(scene_root, "scene_camera.json")) for scene_id, im_id in tqdm(scene_im_ids): str_im_id = str(im_id) scene_root = osp.join(self.dataset_root, f"{scene_id:06d}") rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(im_id) assert osp.exists(rgb_path), rgb_path depth_path = osp.join(scene_root, "depth/{:06d}.png".format(im_id)) scene_id = int(rgb_path.split('/')[-3]) cam = np.array(cam_dicts[scene_id][str_im_id]['cam_K'], dtype=np.float32).reshape(3, 3) depth_factor = 1000. / cam_dicts[scene_id][str_im_id][ 'depth_scale'] record = { "dataset_name": self.name, 'file_name': osp.relpath(rgb_path, PROJ_ROOT), 'depth_file': osp.relpath(depth_path, PROJ_ROOT), "depth_factor": depth_factor, 'height': self.height, 'width': self.width, 'image_id': im_id_global, # unique image_id in the dataset, for coco evaluation "scene_im_id": "{}/{}".format(scene_id, im_id), # for evaluation "cam": cam, "img_type": 'real' } im_id_global += 1 insts = [] for anno_i, anno in enumerate(gt_dicts[scene_id][str_im_id]): obj_id = anno['obj_id'] if ref.tudl.id2obj[obj_id] not in self.select_objs: continue cur_label = self.cat2label[obj_id] # 0-based label R = np.array(anno['cam_R_m2c'], dtype='float32').reshape(3, 3) t = np.array(anno['cam_t_m2c'], dtype='float32') / 1000.0 pose = np.hstack([R, t.reshape(3, 1)]) quat = mat2quat(R).astype('float32') allo_q = mat2quat(egocentric_to_allocentric(pose) [:3, :3]).astype('float32') proj = (record["cam"] @ t.T).T proj = proj[:2] / proj[2] bbox_visib = gt_info_dicts[scene_id][str_im_id][anno_i][ 'bbox_visib'] bbox_obj = gt_info_dicts[scene_id][str_im_id][anno_i][ 'bbox_obj'] x1, y1, w, h = bbox_visib if self.filter_invalid: if h <= 1 or w <= 1: self.num_instances_without_valid_box += 1 continue mask_file = osp.join( scene_root, "mask/{:06d}_{:06d}.png".format(im_id, anno_i)) mask_visib_file = osp.join( scene_root, "mask_visib/{:06d}_{:06d}.png".format(im_id, anno_i)) assert osp.exists(mask_file), mask_file assert osp.exists(mask_visib_file), mask_visib_file # load mask visib TODO: load both mask_visib and mask_full mask_single = mmcv.imread(mask_visib_file, "unchanged") area = mask_single.sum() if area < 3: # filter out too small or nearly invisible instances self.num_instances_without_valid_segmentation += 1 continue mask_rle = binary_mask_to_rle(mask_single, compressed=True) inst = { 'category_id': cur_label, # 0-based label 'bbox': bbox_visib, # TODO: load both bbox_obj and bbox_visib 'bbox_mode': BoxMode.XYWH_ABS, 'pose': pose, "quat": quat, "trans": t, "allo_quat": allo_q, "centroid_2d": proj, # absolute (cx, cy) "segmentation": mask_rle, "mask_full_file": mask_file, # TODO: load as mask_full, rle } insts.append(inst) if len(insts) == 0: # filter im without anno continue record['annotations'] = insts dataset_dicts.append(record) if self.num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.". format(self.num_instances_without_valid_segmentation)) if self.num_instances_without_valid_box > 0: logger.warning( "Filtered out {} instances without valid box. " "There might be issues in your dataset generation process.". format(self.num_instances_without_valid_box)) ########################################################################## if self.num_to_load > 0: self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) dataset_dicts = dataset_dicts[:self.num_to_load] logger.info("loaded dataset dicts, num_images: {}, using {}s".format( len(dataset_dicts), time.perf_counter() - t_start)) mmcv.dump(dataset_dicts, cache_path, protocol=4) logger.info("Dumped dataset_dicts to {}".format(cache_path)) return dataset_dicts
def __call__(self): # LM_Dataset """Load light-weight instance annotations of all images into a list of dicts in Detectron2 format. Do not load heavy data into memory in this file, since we will load the annotations of all images into memory. """ # cache the dataset_dicts to avoid loading masks from files hashed_file_name = hashlib.md5( ("".join([str(fn) for fn in self.objs]) + "dataset_dicts_{}_{}_{}_{}_{}".format( self.name, self.dataset_root, self.with_masks, self.with_depth, __name__)).encode("utf-8")).hexdigest() cache_path = osp.join( self.cache_dir, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name)) if osp.exists(cache_path) and self.use_cache: logger.info("load cached dataset dicts from {}".format(cache_path)) return mmcv.load(cache_path) t_start = time.perf_counter() logger.info("loading dataset dicts: {}".format(self.name)) self.num_instances_without_valid_segmentation = 0 self.num_instances_without_valid_box = 0 dataset_dicts = [ ] # ###################################################### assert len(self.ann_files) == len( self.image_prefixes ), f"{len(self.ann_files)} != {len(self.image_prefixes)}" assert len(self.ann_files) == len( self.xyz_prefixes ), f"{len(self.ann_files)} != {len(self.xyz_prefixes)}" for ann_file, scene_root, xyz_root in zip(tqdm(self.ann_files), self.image_prefixes, self.xyz_prefixes): # linemod each scene is an object with open(ann_file, "r") as f_ann: indices = [line.strip("\r\n") for line in f_ann.readlines()] # string ids gt_dict = mmcv.load(osp.join(scene_root, "scene_gt.json")) gt_info_dict = mmcv.load(osp.join( scene_root, "scene_gt_info.json")) # bbox_obj, bbox_visib cam_dict = mmcv.load(osp.join(scene_root, "scene_camera.json")) for im_id in tqdm(indices): int_im_id = int(im_id) str_im_id = str(int_im_id) rgb_path = osp.join(scene_root, "rgb/{:06d}.png").format(int_im_id) assert osp.exists(rgb_path), rgb_path depth_path = osp.join(scene_root, "depth/{:06d}.png".format(int_im_id)) scene_id = int(rgb_path.split("/")[-3]) scene_im_id = f"{scene_id}/{int_im_id}" if self.debug_im_id is not None: if self.debug_im_id != scene_im_id: continue K = np.array(cam_dict[str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3) depth_factor = 1000.0 / cam_dict[str_im_id]["depth_scale"] if self.filter_scene: if scene_id not in self.cat_ids: continue record = { "dataset_name": self.name, "file_name": osp.relpath(rgb_path, PROJ_ROOT), "depth_file": osp.relpath(depth_path, PROJ_ROOT), "height": self.height, "width": self.width, "image_id": int_im_id, "scene_im_id": scene_im_id, # for evaluation "cam": K, "depth_factor": depth_factor, "img_type": "real", } insts = [] for anno_i, anno in enumerate(gt_dict[str_im_id]): obj_id = anno["obj_id"] if obj_id not in self.cat_ids: continue cur_label = self.cat2label[obj_id] # 0-based label R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) t = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 pose = np.hstack([R, t.reshape(3, 1)]) quat = mat2quat(R).astype("float32") proj = (record["cam"] @ t.T).T proj = proj[:2] / proj[2] bbox_visib = gt_info_dict[str_im_id][anno_i]["bbox_visib"] bbox_obj = gt_info_dict[str_im_id][anno_i]["bbox_obj"] x1, y1, w, h = bbox_visib if self.filter_invalid: if h <= 1 or w <= 1: self.num_instances_without_valid_box += 1 continue mask_file = osp.join( scene_root, "mask/{:06d}_{:06d}.png".format(int_im_id, anno_i)) mask_visib_file = osp.join( scene_root, "mask_visib/{:06d}_{:06d}.png".format( int_im_id, anno_i)) assert osp.exists(mask_file), mask_file assert osp.exists(mask_visib_file), mask_visib_file # load mask visib TODO: load both mask_visib and mask_full mask_single = mmcv.imread(mask_visib_file, "unchanged") area = mask_single.sum() if area < 3: # filter out too small or nearly invisible instances self.num_instances_without_valid_segmentation += 1 continue mask_rle = binary_mask_to_rle(mask_single, compressed=True) inst = { "category_id": cur_label, # 0-based label "bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib "bbox_mode": BoxMode.XYWH_ABS, "pose": pose, "quat": quat, "trans": t, "centroid_2d": proj, # absolute (cx, cy) "segmentation": mask_rle, "mask_full_file": mask_file, # TODO: load as mask_full, rle } if "test" not in self.name: xyz_path = osp.join( xyz_root, f"{int_im_id:06d}_{anno_i:06d}.pkl") assert osp.exists(xyz_path), xyz_path inst["xyz_path"] = xyz_path model_info = self.models_info[str(obj_id)] inst["model_info"] = model_info # TODO: using full mask and full xyz for key in ["bbox3d_and_center"]: inst[key] = self.models[cur_label][key] insts.append(inst) if len(insts) == 0: # filter im without anno continue record["annotations"] = insts dataset_dicts.append(record) if self.num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.". format(self.num_instances_without_valid_segmentation)) if self.num_instances_without_valid_box > 0: logger.warning( "Filtered out {} instances without valid box. " "There might be issues in your dataset generation process.". format(self.num_instances_without_valid_box)) ########################################################################## if self.num_to_load > 0: self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) dataset_dicts = dataset_dicts[:self.num_to_load] logger.info("loaded {} dataset dicts, using {}s".format( len(dataset_dicts), time.perf_counter() - t_start)) mmcv.mkdir_or_exist(osp.dirname(cache_path)) mmcv.dump(dataset_dicts, cache_path, protocol=4) logger.info("Dumped dataset_dicts to {}".format(cache_path)) return dataset_dicts
def _load_from_idx_file(self, idx_file, image_root): """ idx_file: the scene/image ids image_root/scene contains: scene_gt.json scene_gt_info.json scene_camera.json """ xyz_root = osp.join(image_root, "xyz_crop") scene_gt_dicts = {} scene_gt_info_dicts = {} scene_cam_dicts = {} scene_im_ids = [] # store tuples of (scene_id, im_id) with open(idx_file, "r") as f: for line in f: line_split = line.strip("\r\n").split("/") scene_id = int(line_split[0]) im_id = int(line_split[1]) scene_im_ids.append((scene_id, im_id)) if scene_id not in scene_gt_dicts: scene_gt_file = osp.join(image_root, f"{scene_id:06d}/scene_gt.json") assert osp.exists(scene_gt_file), scene_gt_file scene_gt_dicts[scene_id] = mmcv.load(scene_gt_file) if scene_id not in scene_gt_info_dicts: scene_gt_info_file = osp.join(image_root, f"{scene_id:06d}/scene_gt_info.json") assert osp.exists(scene_gt_info_file), scene_gt_info_file scene_gt_info_dicts[scene_id] = mmcv.load(scene_gt_info_file) if scene_id not in scene_cam_dicts: scene_cam_file = osp.join(image_root, f"{scene_id:06d}/scene_camera.json") assert osp.exists(scene_cam_file), scene_cam_file scene_cam_dicts[scene_id] = mmcv.load(scene_cam_file) ###################################################### scene_im_ids = sorted(scene_im_ids) # sort to make it reproducible dataset_dicts = [] num_instances_without_valid_segmentation = 0 num_instances_without_valid_box = 0 for (scene_id, im_id) in tqdm(scene_im_ids): rgb_path = osp.join(image_root, f"{scene_id:06d}/rgb/{im_id:06d}.png") assert osp.exists(rgb_path), rgb_path str_im_id = str(im_id) scene_im_id = f"{scene_id}/{im_id}" # for ycbv/tless, load cam K from image infos cam_anno = np.array(scene_cam_dicts[scene_id][str_im_id]["cam_K"], dtype=np.float32).reshape(3, 3) depth_factor = 1000.0 / scene_cam_dicts[scene_id][str_im_id]["depth_scale"] # dprint(record['cam']) if "/train_synt/" in rgb_path: img_type = "syn" else: img_type = "real" record = { "dataset_name": self.name, "file_name": osp.relpath(rgb_path, PROJ_ROOT), "height": self.height, "width": self.width, "image_id": self._unique_im_id, "scene_im_id": scene_im_id, # for evaluation "cam": cam_anno, # self.cam, "depth_factor": depth_factor, "img_type": img_type, } if self.with_depth: depth_file = osp.join(image_root, f"{scene_id:06d}/depth/{im_id:06d}.png") assert osp.exists(depth_file), depth_file record["depth_file"] = osp.relpath(depth_file, PROJ_ROOT) insts = [] anno_dict_list = scene_gt_dicts[scene_id][str(im_id)] info_dict_list = scene_gt_info_dicts[scene_id][str(im_id)] for anno_i, anno in enumerate(anno_dict_list): info = info_dict_list[anno_i] obj_id = anno["obj_id"] if obj_id not in self.cat_ids: continue # 0-based label now cur_label = self.cat2label[obj_id] ################ pose ########################### R = np.array(anno["cam_R_m2c"], dtype="float32").reshape(3, 3) trans = np.array(anno["cam_t_m2c"], dtype="float32") / 1000.0 # mm->m pose = np.hstack([R, trans.reshape(3, 1)]) quat = mat2quat(pose[:3, :3]) ############# bbox ############################ if "cam_old" in record: # NOTE: calculate bbox from pose/points bbox = misc.compute_2d_bbox_xyxy_from_pose_v2( self.models[cur_label]["pts"], pose, self.cam, # NOTE: use self.cam here width=self.width, height=self.height, clip=True, ) else: bbox = info["bbox_obj"] x1, y1, w, h = bbox x2 = x1 + w y2 = y1 + h x1 = max(min(x1, self.width), 0) y1 = max(min(y1, self.height), 0) x2 = max(min(x2, self.width), 0) y2 = max(min(y2, self.height), 0) bbox = [x1, y1, x2, y2] if self.filter_invalid: bw = bbox[2] - bbox[0] bh = bbox[3] - bbox[1] if bh <= 1 or bw <= 1: num_instances_without_valid_box += 1 continue ############## mask ####################### if self.with_masks: # either list[list[float]] or dict(RLE) mask_visib_file = osp.join(image_root, f"{scene_id:06d}/mask_visib/{im_id:06d}_{anno_i:06d}.png") assert osp.exists(mask_visib_file), mask_visib_file mask = mmcv.imread(mask_visib_file, "unchanged") area = mask.sum() if area < 3 and self.filter_invalid: num_instances_without_valid_segmentation += 1 continue mask_rle = binary_mask_to_rle(mask) mask_full_file = osp.join(image_root, f"{scene_id:06d}/mask/{im_id:06d}_{anno_i:06d}.png") assert osp.exists(mask_full_file), mask_full_file proj = (self.cam @ trans.T).T # NOTE: use self.cam here proj = proj[:2] / proj[2] inst = { "category_id": cur_label, # 0-based label "bbox": bbox, # TODO: load both bbox_obj and bbox_visib "bbox_mode": BoxMode.XYXY_ABS, "pose": pose, "quat": quat, "trans": trans, "centroid_2d": proj, # absolute (cx, cy) "segmentation": mask_rle, "mask_full_file": mask_full_file, # TODO: load as mask_full, rle } if self.with_xyz: xyz_path = osp.join(xyz_root, f"{scene_id:06d}/{im_id:06d}_{anno_i:06d}-xyz.pkl") assert osp.exists(xyz_path), xyz_path inst["xyz_path"] = xyz_path model_info = self.models_info[str(obj_id)] inst["model_info"] = model_info # TODO: using full mask and full xyz for key in ["bbox3d_and_center"]: inst[key] = self.models[cur_label][key] insts.append(inst) if len(insts) == 0: # and self.filter_invalid: continue record["annotations"] = insts dataset_dicts.append(record) self._unique_im_id += 1 if num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.".format( num_instances_without_valid_segmentation ) ) if num_instances_without_valid_box > 0: logger.warning( "Filtered out {} instances without valid box. " "There might be issues in your dataset generation process.".format(num_instances_without_valid_box) ) return dataset_dicts
def __call__(self): # LM_SYN_IMGN_Dataset """Load light-weight instance annotations of all images into a list of dicts in Detectron2 format. Do not load heavy data into memory in this file, since we will load the annotations of all images into memory. """ # cache the dataset_dicts to avoid loading masks from files hashed_file_name = hashlib.md5( ( "".join([str(fn) for fn in self.objs]) + "dataset_dicts_{}_{}_{}_{}_{}_{}".format( self.name, self.dataset_root, self.with_masks, self.with_depth, self.n_per_obj, __name__ ) ).encode("utf-8") ).hexdigest() cache_path = osp.join(self.dataset_root, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name)) if osp.exists(cache_path) and self.use_cache: logger.info("load cached dataset dicts from {}".format(cache_path)) return mmcv.load(cache_path) t_start = time.perf_counter() logger.info("loading dataset dicts: {}".format(self.name)) self.num_instances_without_valid_segmentation = 0 self.num_instances_without_valid_box = 0 dataset_dicts = [] ####################################################### assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}" assert len(self.ann_files) == len(self.xyz_prefixes), f"{len(self.ann_files)} != {len(self.xyz_prefixes)}" for ann_file, scene_root, xyz_root in zip(self.ann_files, self.image_prefixes, self.xyz_prefixes): # linemod each scene is an object with open(ann_file, "r") as f_ann: indices = [line.strip("\r\n").split()[-1] for line in f_ann.readlines()] # string ids # sample uniformly (equal space) if self.n_per_obj > 0: sample_num = min(self.n_per_obj, len(indices)) sel_indices_idx = np.linspace(0, len(indices) - 1, sample_num, dtype=np.int32) sel_indices = [indices[int(_i)] for _i in sel_indices_idx] else: sel_indices = indices for im_id in tqdm(sel_indices): rgb_path = osp.join(scene_root, "{}-color.png").format(im_id) assert osp.exists(rgb_path), rgb_path depth_path = osp.join(scene_root, "{}-depth.png".format(im_id)) obj_name = im_id.split("/")[0] if obj_name == "benchviseblue": obj_name = "benchvise" obj_id = ref.lm_full.obj2id[obj_name] if self.filter_scene: if obj_name not in self.objs: continue record = { "dataset_name": self.name, "file_name": osp.relpath(rgb_path, PROJ_ROOT), "depth_file": osp.relpath(depth_path, PROJ_ROOT), "height": self.height, "width": self.width, "image_id": im_id.split("/")[-1], "scene_im_id": im_id, "cam": self.cam, "img_type": "syn", } cur_label = self.obj2label[obj_name] # 0-based label pose_path = osp.join(scene_root, "{}-pose.txt".format(im_id)) pose = np.loadtxt(pose_path, skiprows=1) R = pose[:3, :3] t = pose[:3, 3] quat = mat2quat(R).astype("float32") proj = (record["cam"] @ t.T).T proj = proj[:2] / proj[2] depth = mmcv.imread(depth_path, "unchanged") / 1000.0 mask = (depth > 0).astype(np.uint8) bbox_obj = mask2bbox_xywh(mask) x1, y1, w, h = bbox_obj if self.filter_invalid: if h <= 1 or w <= 1: self.num_instances_without_valid_box += 1 continue area = mask.sum() if area < 3: # filter out too small or nearly invisible instances self.num_instances_without_valid_segmentation += 1 continue mask_rle = binary_mask_to_rle(mask, compressed=True) xyz_path = osp.join(xyz_root, f"{im_id}-xyz.pkl") assert osp.exists(xyz_path), xyz_path inst = { "category_id": cur_label, # 0-based label "bbox": bbox_obj, # TODO: load both bbox_obj and bbox_visib "bbox_mode": BoxMode.XYWH_ABS, "pose": pose, "quat": quat, "trans": t, "centroid_2d": proj, # absolute (cx, cy) "segmentation": mask_rle, "xyz_path": xyz_path, } model_info = self.models_info[str(obj_id)] inst["model_info"] = model_info # TODO: using full mask and full xyz for key in ["bbox3d_and_center"]: inst[key] = self.models[cur_label][key] record["annotations"] = [inst] dataset_dicts.append(record) if self.num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.".format( self.num_instances_without_valid_segmentation ) ) if self.num_instances_without_valid_box > 0: logger.warning( "Filtered out {} instances without valid box. " "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) ) ########################################################################## # if self.num_to_load > 0: # self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) # random.shuffle(dataset_dicts) # dataset_dicts = dataset_dicts[: self.num_to_load] logger.info( "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start) ) mmcv.dump(dataset_dicts, cache_path, protocol=4) logger.info("Dumped dataset_dicts to {}".format(cache_path)) return dataset_dicts
def save_result_of_dataset(cfg, model, data_loader, output_dir, dataset_name): """ Run model (in eval mode) on the data_loader and save predictions Args: cfg: config model (nn.Module): a module which accepts an object from `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. If you wish to evaluate a model in `training` mode instead, you can wrap the given model and override its behavior of `.eval()` and `.train()`. data_loader: an iterable object with a length. The elements it generates will be the inputs to the model. Returns: The return value of `evaluator.evaluate()` """ cpu_device = torch.device("cpu") num_devices = get_world_size() logger = logging.getLogger(__name__) logger.info("Start inference on {} images".format(len(data_loader))) # NOTE: dataset name should be the same as TRAIN to get the correct meta _metadata = MetadataCatalog.get(dataset_name) data_ref = ref.__dict__[_metadata.ref_key] obj_names = _metadata.objs obj_ids = [data_ref.obj2id[obj_name] for obj_name in obj_names] result_name = "results.pkl" mmcv.mkdir_or_exist(output_dir) result_path = osp.join(output_dir, result_name) total = len(data_loader) # inference data loader must have a fixed length results = OrderedDict() VIS = False logging_interval = 50 num_warmup = min(5, logging_interval - 1, total - 1) start_time = time.perf_counter() total_compute_time = 0 with inference_context(model), torch.no_grad(): for idx, inputs in enumerate(data_loader): if idx == num_warmup: start_time = time.perf_counter() total_compute_time = 0 if VIS: images_ori = [_input["image"].clone() for _input in inputs] start_compute_time = time.perf_counter() outputs = model(inputs) # NOTE: do model inference torch.cuda.synchronize() cur_compute_time = time.perf_counter() - start_compute_time total_compute_time += cur_compute_time # NOTE: process results for i in range(len(inputs)): _input = inputs[i] output = outputs[i] cur_results = {} instances = output["instances"] HAS_MASK = False if instances.has("pred_masks"): HAS_MASK = True pred_masks = instances.pred_masks # (#objs, imH, imW) pred_masks = pred_masks.detach().cpu().numpy() # NOTE: time comsuming step rles = [ binary_mask_to_rle(pred_masks[_k]) for _k in range(len(pred_masks)) ] instances = instances.to(cpu_device) boxes = instances.pred_boxes.tensor.clone().detach().cpu( ).numpy() # xyxy scores = instances.scores.tolist() labels = instances.pred_classes.detach().cpu().numpy() obj_ids = [ data_ref.obj2id[obj_names[int(label)]] for label in labels ] ego_quats = instances.pred_ego_quats.detach().cpu().numpy() ego_rots = [ quat2mat(ego_quats[k]) for k in range(len(ego_quats)) ] transes = instances.pred_transes.detach().cpu().numpy() cur_results = { "time": cur_compute_time / len(inputs), "obj_ids": obj_ids, "scores": scores, "boxes": boxes, # xyxy "Rs": ego_rots, "ts": transes, # m } if HAS_MASK: cur_results["masks"] = rles if VIS: import cv2 from lib.vis_utils.image import vis_image_mask_bbox_cv2 image = (images_ori[i].detach().cpu().numpy().transpose( 1, 2, 0) + 0.5).astype("uint8") img_vis = vis_image_mask_bbox_cv2( image, pred_masks, boxes, labels=[obj_names[int(label)] for label in labels]) cv2.imshow("img", img_vis.astype("uint8")) cv2.waitKey() results[_input["scene_im_id"]] = cur_results if (idx + 1) % logging_interval == 0: duration = time.perf_counter() - start_time seconds_per_img = duration / (idx + 1 - num_warmup) eta = datetime.timedelta(seconds=int(seconds_per_img * (total - num_warmup) - duration)) logger.info( "Inference done {}/{}. {:.4f} s / img. ETA={}".format( idx + 1, total, seconds_per_img, str(eta))) # Measure the time only for this worker (before the synchronization barrier) total_time = int(time.perf_counter() - start_time) total_time_str = str(datetime.timedelta(seconds=total_time)) # NOTE this format is parsed by grep logger.info( "Total inference time: {} ({:.6f} s / img per device, on {} devices)". format(total_time_str, total_time / (total - num_warmup), num_devices)) total_compute_time_str = str( datetime.timedelta(seconds=int(total_compute_time))) logger.info( "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)" .format(total_compute_time_str, total_compute_time / (total - num_warmup), num_devices)) mmcv.dump(results, result_path) logger.info("Results saved to {}".format(result_path))
def __call__(self): # LM_BLENDER """Load light-weight instance annotations of all images into a list of dicts in Detectron2 format. Do not load heavy data into memory in this file, since we will load the annotations of all images into memory. """ # cache the dataset_dicts to avoid loading masks from files hashed_file_name = hashlib.md5( ( "".join([str(fn) for fn in self.objs]) + "dataset_dicts_{}_{}_{}_{}_{}_{}_{}".format( self.name, self.dataset_root, self.with_masks, self.with_depth, self.with_xyz, self.n_per_obj, __name__, ) ).encode("utf-8") ).hexdigest() cache_path = osp.join(self.cache_dir, "dataset_dicts_{}_{}.pkl".format(self.name, hashed_file_name)) if osp.exists(cache_path) and self.use_cache: logger.info("load cached dataset dicts from {}".format(cache_path)) return mmcv.load(cache_path) t_start = time.perf_counter() logger.info("loading dataset dicts: {}".format(self.name)) self.num_instances_without_valid_segmentation = 0 self.num_instances_without_valid_box = 0 dataset_dicts = [] ####################################################### assert len(self.ann_files) == len(self.image_prefixes), f"{len(self.ann_files)} != {len(self.image_prefixes)}" for ann_file, scene_root in zip(tqdm(self.ann_files), self.image_prefixes): # each scene is an object assert osp.exists(ann_file), ann_file scene_gt_dict = mmcv.load(ann_file) # sample uniformly (equal space) indices = list(scene_gt_dict.keys()) if self.n_per_obj > 0: sample_num = min(self.n_per_obj, len(scene_gt_dict)) sel_indices_idx = np.linspace(0, len(scene_gt_dict) - 1, sample_num, dtype=np.int32) sel_indices = [indices[int(_i)] for _i in sel_indices_idx] else: sel_indices = indices for str_im_id in tqdm(sel_indices): int_im_id = int(str_im_id) rgb_path = osp.join(scene_root, "{}.jpg").format(str_im_id) assert osp.exists(rgb_path), rgb_path depth_path = osp.join(scene_root, "{}_depth_opengl.png".format(str_im_id)) obj_name = osp.basename(ann_file).split("_")[0] # obj_gt.json obj_id = ref.lm_full.obj2id[obj_name] if obj_name not in self.objs: continue record = { "dataset_name": self.name, "file_name": osp.relpath(rgb_path, PROJ_ROOT), "depth_file": osp.relpath(depth_path, PROJ_ROOT), "height": self.height, "width": self.width, "image_id": int_im_id, "scene_im_id": f"{obj_id}/{int_im_id}", "cam": self.cam, "img_type": "syn_blender", # has bg } cur_label = self.obj2label[obj_name] # 0-based label anno = scene_gt_dict[str_im_id][0] # only one object R = np.array(anno["cam_R_m2c"]).reshape(3, 3) t = np.array(anno["cam_t_m2c"]).reshape(-1) / 1000 pose = np.hstack([R, t.reshape(3, 1)]) quat = mat2quat(R).astype("float32") proj = (record["cam"] @ t.T).T proj = proj[:2] / proj[2] bbox_visib = anno["bbox_visib"] x1, y1, w, h = bbox_visib if self.filter_invalid: if h <= 1 or w <= 1: self.num_instances_without_valid_box += 1 continue mask_path = osp.join(scene_root, "{}_mask_opengl.png".format(str_im_id)) mask = mmcv.imread(mask_path, "unchanged") mask = (mask > 0).astype(np.uint8) area = mask.sum() if area < 3: # filter out too small or nearly invisible instances self.num_instances_without_valid_segmentation += 1 continue mask_rle = binary_mask_to_rle(mask, compressed=True) xyz_path = osp.join(scene_root, "{}_xyz_bop.pkl".format(str_im_id)) assert osp.exists(xyz_path), xyz_path visib_fract = anno.get("visib_fract", 1.0) inst = { "category_id": cur_label, # 0-based label "bbox": bbox_visib, # TODO: load both bbox_obj and bbox_visib "bbox_mode": BoxMode.XYWH_ABS, "pose": pose, "quat": quat, "trans": t, "centroid_2d": proj, # absolute (cx, cy) "segmentation": mask_rle, "xyz_path": xyz_path, "visib_fract": visib_fract, } model_info = self.models_info[str(obj_id)] inst["model_info"] = model_info for key in ["bbox3d_and_center"]: inst[key] = self.models[cur_label][key] record["annotations"] = [inst] dataset_dicts.append(record) if self.num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.".format( self.num_instances_without_valid_segmentation ) ) if self.num_instances_without_valid_box > 0: logger.warning( "Filtered out {} instances without valid box. " "There might be issues in your dataset generation process.".format(self.num_instances_without_valid_box) ) ########################################################################## # if self.num_to_load > 0: # self.num_to_load = min(int(self.num_to_load), len(dataset_dicts)) # random.shuffle(dataset_dicts) # dataset_dicts = dataset_dicts[: self.num_to_load] logger.info( "loaded dataset dicts, num_images: {}, using {}s".format(len(dataset_dicts), time.perf_counter() - t_start) ) mmcv.dump(dataset_dicts, cache_path, protocol=4) logger.info("Dumped dataset_dicts to {}".format(cache_path)) return dataset_dicts
def _load_from_idx_file(self, idx_file, image_root): """ idx_file: the scene/image ids image_root/scene contains: scene_gt.json scene_gt_info.json scene_camera.json """ scene_gt_dicts = {} scene_gt_info_dicts = {} scene_cam_dicts = {} scene_im_ids = [] # store tuples of (scene_id, im_id) with open(idx_file, 'r') as f: for line in f: line_split = line.strip('\r\n').split('/') scene_id = int(line_split[0]) im_id = int(line_split[1]) scene_im_ids.append((scene_id, im_id)) if scene_id not in scene_gt_dicts: scene_gt_file = osp.join(image_root, f'{scene_id:06d}/scene_gt.json') assert osp.exists(scene_gt_file), scene_gt_file scene_gt_dicts[scene_id] = mmcv.load(scene_gt_file) if scene_id not in scene_gt_info_dicts: scene_gt_info_file = osp.join(image_root, f'{scene_id:06d}/scene_gt_info.json') assert osp.exists(scene_gt_info_file), scene_gt_info_file scene_gt_info_dicts[scene_id] = mmcv.load(scene_gt_info_file) if scene_id not in scene_cam_dicts: scene_cam_file = osp.join(image_root, f'{scene_id:06d}/scene_camera.json') assert osp.exists(scene_cam_file), scene_cam_file scene_cam_dicts[scene_id] = mmcv.load(scene_cam_file) ###################################################### scene_im_ids = sorted(scene_im_ids) # sort to make it reproducible dataset_dicts = [] num_instances_without_valid_segmentation = 0 num_instances_without_valid_box = 0 for (scene_id, im_id) in tqdm(scene_im_ids): rgb_path = osp.join(image_root, f'{scene_id:06d}/rgb/{im_id:06d}.png') assert osp.exists(rgb_path), rgb_path # for ycbv/tless, load cam K from image infos cam_anno = np.array(scene_cam_dicts[scene_id][str(im_id)]["cam_K"], dtype="float32").reshape(3, 3) # dprint(record['cam']) if '/train_synt/' in rgb_path: img_type = 'syn' else: img_type = 'real' record = { "dataset_name": self.name, 'file_name': osp.relpath(rgb_path, PROJ_ROOT), 'height': self.height, 'width': self.width, 'image_id': self._unique_im_id, "scene_im_id": "{}/{}".format(scene_id, im_id), # for evaluation "cam": cam_anno, # self.cam, "img_type": img_type } if self.with_depth: depth_file = osp.join(image_root, f'{scene_id:06d}/depth/{im_id:06d}.png') assert osp.exists(depth_file), depth_file record["depth_file"] = osp.relpath(depth_file, PROJ_ROOT) insts = [] anno_dict_list = scene_gt_dicts[scene_id][str(im_id)] info_dict_list = scene_gt_info_dicts[scene_id][str(im_id)] for anno_i, anno in enumerate(anno_dict_list): info = info_dict_list[anno_i] obj_id = anno['obj_id'] if obj_id not in self.cat_ids: continue # 0-based label now cur_label = self.cat2label[obj_id] ################ pose ########################### R = np.array(anno['cam_R_m2c'], dtype='float32').reshape(3, 3) trans = np.array(anno['cam_t_m2c'], dtype='float32') / 1000.0 # mm->m pose = np.hstack([R, trans.reshape(3, 1)]) quat = mat2quat(pose[:3, :3]) allo_q = mat2quat(egocentric_to_allocentric(pose)[:3, :3]) ############# bbox ############################ if True: bbox = info['bbox_obj'] x1, y1, w, h = bbox x2 = x1 + w y2 = y1 + h x1 = max(min(x1, self.width), 0) y1 = max(min(y1, self.height), 0) x2 = max(min(x2, self.width), 0) y2 = max(min(y2, self.height), 0) bbox = [x1, y1, x2, y2] if self.filter_invalid: bw = bbox[2] - bbox[0] bh = bbox[3] - bbox[1] if bh <= 1 or bw <= 1: num_instances_without_valid_box += 1 continue ############## mask ####################### if self.with_masks: # either list[list[float]] or dict(RLE) mask_visib_file = osp.join(image_root, f'{scene_id:06d}/mask_visib/{im_id:06d}_{anno_i:06d}.png') assert osp.exists(mask_visib_file), mask_visib_file mask = mmcv.imread(mask_visib_file, 'unchanged') if mask.sum() < 1 and self.filter_invalid: num_instances_without_valid_segmentation += 1 continue mask_rle = binary_mask_to_rle(mask) mask_full_file = osp.join(image_root, f'{scene_id:06d}/mask/{im_id:06d}_{anno_i:06d}.png') assert osp.exists(mask_full_file), mask_full_file proj = (self.cam @ trans.T).T # NOTE: use self.cam here proj = proj[:2] / proj[2] inst = { 'category_id': cur_label, # 0-based label 'bbox': bbox, # TODO: load both bbox_obj and bbox_visib 'bbox_mode': BoxMode.XYXY_ABS, "quat": quat, "trans": trans, "allo_quat": allo_q, "centroid_2d": proj, # absolute (cx, cy) "segmentation": mask_rle, "mask_full_file": mask_full_file, # TODO: load as mask_full, rle } insts.append(inst) if len(insts) == 0: # and self.filter_invalid: continue record["annotations"] = insts dataset_dicts.append(record) self._unique_im_id += 1 if num_instances_without_valid_segmentation > 0: logger.warn("Filtered out {} instances without valid segmentation. " "There might be issues in your dataset generation process.".format( num_instances_without_valid_segmentation)) if num_instances_without_valid_box > 0: logger.warn( "Filtered out {} instances without valid box. " "There might be issues in your dataset generation process.".format(num_instances_without_valid_box)) return dataset_dicts