def get_depth(self, scene_id, im_id): dataset_path = os.path.join(cfg.LM_PATH, "test") scene_path = os.path.join(dataset_path, f"{scene_id:06d}") file_path = os.path.join(scene_path, f"depth/{im_id:06d}.png") if os.path.exists(file_path): return bop_inout.load_depth(file_path) else: print(f"missing file: {file_path}") return np.zeros((480, 640), dtype=np.float32)
def create_patch_pair(depth_path, mask_path, im_cam, gt, save_name, md_pcd_pts): raw_depth = io.load_depth(depth_path) mask = io.load_im(mask_path) img_pcd = PointCloud.create_from_depth_image( depth=Image(masked_where(mask == 0.0, raw_depth).filled(0.0)), intrinsic=PHCamIntrinsic(*IM_SIZE, *[im_cam['cam_K'][i] for i in K]), depth_scale=im_cam['depth_scale'], depth_trunc=150000) img_pcd.voxel_down_sample(VOXEL_SIZE) if np.asarray(img_pcd.points).shape[0] in PCD_PTS_RANGE or IS_TARGET: cam_R, cam_t = gt['cam_R_m2c'], gt['cam_t_m2c'] # Select reference points on image using farthest point sampling img_pcd_pts_fps = torch.as_tensor(img_pcd.points).to(DEVICE) img_ref_idxs = fps(img_pcd_pts_fps, ratio=FPS_RATIO).to('cpu').numpy() # Calculate model reference points img_ref_pts = np.asarray(img_pcd.points)[img_ref_idxs] md_ref_pts = (img_ref_pts - cam_t.T) @ np.linalg.inv(cam_R).T # Recreate model point cloud md_ref_idxs = np.arange(md_ref_pts.shape[0]) md_pcd_pts = np.concatenate([md_ref_pts, md_pcd_pts], axis=0) md_pcd = PointCloud() md_pcd.points = Vector3dVector(md_pcd_pts) # Calculate and save PPFs img_save_path = f'image/{save_name}' create_local_patches(img_pcd, img_ref_idxs, img_save_path) md_save_path = f'model/{save_name}' create_local_patches(md_pcd, md_ref_idxs, md_save_path) entry = [save_name, img_ref_idxs.shape[0]] else: entry = [] return entry
if im_ind % 10 == 0: misc.log( 'Calculating error {} - method: {}, dataset: {}{}, scene: {}, ' 'im: {}'.format(p['error_type'], method, dataset, split_type_str, scene_id, im_ind)) # Intrinsic camera matrix. K = scene_camera[im_id]['cam_K'] # Load the depth image if VSD is selected as the pose error function. depth_im = None if p['error_type'] == 'vsd': depth_path = dp_split['depth_tpath'].format(scene_id=scene_id, im_id=im_id) depth_im = inout.load_depth(depth_path) depth_im *= scene_camera[im_id][ 'depth_scale'] # Convert to [mm]. for obj_id, target in im_targets.items(): # The required number of top estimated poses. if p['n_top'] == 0: # All estimates are considered. n_top_curr = None elif p['n_top'] == -1: # Given by the number of GT poses. # n_top_curr = sum([gt['obj_id'] == obj_id for gt in scene_gt[im_id]]) n_top_curr = target['inst_count'] else: n_top_curr = p['n_top'] # Get the estimates.
"/{:06d}.tif".format(im_id) image_gray = inout.load_im(rgb_path) image_t = np.zeros((image_gray.shape[0], image_gray.shape[1], 3), dtype=np.uint8) image_t[:, :, :] = np.expand_dims(image_gray, axis=2) else: rgb_path = test_dir+"/{:06d}/".format(scene_id)+img_type+\ "/{:06d}.png".format(im_id) image_t = inout.load_im(rgb_path) if (dataset == "itodd"): depth_path = test_dir + "/{:06d}/depth/{:06d}.tif".format( scene_id, im_id) else: depth_path = test_dir + "/{:06d}/depth/{:06d}.png".format( scene_id, im_id) depth_t = inout.load_depth(depth_path) / 1000 * depth_scale depth_t_zero_nan = np.nan_to_num(depth_t) t1 = time.time() inst_count_est = np.zeros((len(inst_counts))) inst_count_pred = np.zeros((len(inst_counts))) inst_count_good = np.zeros((len(inst_counts))) depth_valid = np.logical_and(depth_t > 0.2, depth_t < 2.2) rgb_valid = np.logical_or(depth_valid, depth_t_zero_nan == 0) image_t = image_t.astype(np.float32) image_t[np.invert(rgb_valid)] = 0.1 * image_t[np.invert(rgb_valid)] points_tgt = np.zeros((depth_t.shape[0], depth_t.shape[1], 6), np.float32) points_tgt[:, :, :3] = getXYZ(depth_t, fx=cam_K[0, 0], fy=cam_K[1, 1],
scene_gt_info = {} im_ids = sorted(scene_gt.keys()) for im_counter, im_id in enumerate(im_ids): if im_counter % 100 == 0: misc.log( 'Calculating GT info - dataset: {} ({}, {}), scene: {}, im: {}' .format(p['dataset'], p['dataset_split'], p['dataset_split_type'], scene_id, im_id)) # Load depth image. depth_fpath = dp_split['depth_tpath'].format(scene_id=scene_id, im_id=im_id) if not os.path.exists(depth_fpath): depth_fpath = depth_fpath.replace('.tif', '.png') depth = inout.load_depth(depth_fpath) depth *= scene_camera[im_id]['depth_scale'] # Convert to [mm]. K = scene_camera[im_id]['cam_K'] fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] im_size = (depth.shape[1], depth.shape[0]) scene_gt_info[im_id] = [] for gt_id, gt in enumerate(scene_gt[im_id]): # Render depth image of the object model in the ground-truth pose. depth_gt_large = ren.render_object(gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'], fx, fy, cx + ren_cx_offset, cy + ren_cy_offset)['depth'] depth_gt = depth_gt_large[ren_cy_offset:(ren_cy_offset +
rgb = None if p['vis_rgb']: if 'rgb' in dp_split['im_modalities']: rgb = inout.load_im(dp_split['rgb_tpath'].format( scene_id=scene_id, im_id=im_id))[:, :, :3] elif 'gray' in dp_split['im_modalities']: gray = inout.load_im(dp_split['gray_tpath'].format( scene_id=scene_id, im_id=im_id)) rgb = np.dstack([gray, gray, gray]) else: raise ValueError('RGB nor gray images are available.') depth = None if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']): depth = inout.load_depth(dp_split['depth_tpath'].format( scene_id=scene_id, im_id=im_id)) depth *= scene_camera[im_id][ 'depth_scale'] # Convert to [mm]. # Visualization name. if p['vis_per_obj_id']: vis_name = '{im_id:06d}_{obj_id:06d}'.format( im_id=im_id, obj_id=im_ests_vis_obj_ids[ests_vis_id]) else: vis_name = '{im_id:06d}'.format(im_id=im_id) # Path to the output RGB visualization. vis_rgb_path = None if p['vis_rgb']: vis_rgb_path = p['vis_rgb_tpath'].format( vis_path=p['vis_path'],
mask_paths = os.path.join(dp_split['base_path'], complete_split, '{:06d}/mask_visib'.format(scene_id)) img_path = dp_split['rgb_tpath'].format(scene_id=scene_id, im_id=im_id) relative_img_path = os.path.relpath(img_path, os.path.dirname(coco_gt_path)) image_info = pycoco_utils.create_image_info(image_id, relative_img_path, dp_split['im_size']) coco_scene_output["images"].append(image_info) for idx, inst in enumerate(inst_list): category_info = inst['obj_id'] mask_p = os.path.join(mask_paths, '{:06d}_{:06d}.png'.format(im_id, idx)) binary_inst_mask = (inout.load_depth(mask_p) / 255.).astype( np.bool) annotation_info = pycoco_utils.create_annotation_info( segmentation_id, image_id, category_info, binary_inst_mask, tolerance=2) if annotation_info is not None: coco_scene_output["annotations"].append(annotation_info) segmentation_id = segmentation_id + 1 image_id = image_id + 1
def __getitem__(self, frame_id): row = self.frame_index.iloc[frame_id] scene_id, view_id = row.scene_id, row.view_id view_id = int(view_id) view_id_str = f'{view_id:06d}' scene_id_str = f'{int(scene_id):06d}' scene_dir = self.base_dir / scene_id_str rgb_dir = scene_dir / 'rgb' if not rgb_dir.exists(): rgb_dir = scene_dir / 'gray' rgb_path = rgb_dir / f'{view_id_str}.png' if not rgb_path.exists(): rgb_path = rgb_path.with_suffix('.jpg') if not rgb_path.exists(): rgb_path = rgb_path.with_suffix('.tif') rgb = np.array(Image.open(rgb_path)) if rgb.ndim == 2: rgb = np.repeat(rgb[..., None], 3, axis=-1) rgb = rgb[..., :3] h, w = rgb.shape[:2] rgb = torch.as_tensor(rgb) cam_annotation = self.annotations[scene_id_str]['scene_camera'][str( view_id)] if 'cam_R_w2c' in cam_annotation: RC0 = np.array(cam_annotation['cam_R_w2c']).reshape(3, 3) tC0 = np.array(cam_annotation['cam_t_w2c']) * 0.001 TC0 = Transform(RC0, tC0) else: TC0 = Transform(np.eye(3), np.zeros(3)) K = np.array(cam_annotation['cam_K']).reshape(3, 3) T0C = TC0.inverse() T0C = T0C.toHomogeneousMatrix() camera = dict(T0C=T0C, K=K, TWC=T0C, resolution=rgb.shape[:2]) T0C = TC0.inverse() objects = [] mask = np.zeros((h, w), dtype=np.uint8) if 'scene_gt_info' in self.annotations[scene_id_str]: annotation = self.annotations[scene_id_str]['scene_gt'][str( view_id)] n_objects = len(annotation) visib = self.annotations[scene_id_str]['scene_gt_info'][str( view_id)] for n in range(n_objects): RCO = np.array(annotation[n]['cam_R_m2c']).reshape(3, 3) tCO = np.array(annotation[n]['cam_t_m2c']) * 0.001 TCO = Transform(RCO, tCO) T0O = T0C * TCO T0O = T0O.toHomogeneousMatrix() obj_id = annotation[n]['obj_id'] name = f'obj_{int(obj_id):06d}' bbox_visib = np.array(visib[n]['bbox_visib']) x, y, w, h = bbox_visib x1 = x y1 = y x2 = x + w y2 = y + h obj = dict(label=name, name=name, TWO=T0O, T0O=T0O, visib_fract=visib[n]['visib_fract'], id_in_segm=n + 1, bbox=[x1, y1, x2, y2]) objects.append(obj) mask_path = scene_dir / 'mask_visib' / f'{view_id_str}_all.png' if mask_path.exists(): mask = np.array(Image.open(mask_path)) else: for n in range(n_objects): mask_n = np.array( Image.open(scene_dir / 'mask_visib' / f'{view_id_str}_{n:06d}.png')) mask[mask_n == 255] = n + 1 mask = torch.as_tensor(mask) if self.load_depth: depth_path = scene_dir / 'depth' / f'{view_id_str}.png' if not depth_path.exists(): depth_path = depth_path.with_suffix('.tif') depth = np.array(inout.load_depth(depth_path)) camera['depth'] = depth * cam_annotation['depth_scale'] / 1000 obs = dict( objects=objects, camera=camera, frame_info=row.to_dict(), ) return rgb, mask, obs
print((mask_annot[im_id])) score = mask_annot[im_id][obj_id][inst]['score'] if score < 0: continue obj_bb_est = mask_annot[im_id][obj_id][inst]['obj_bb'] obj_id_est = mask_annot[im_id][obj_id][inst]['obj_id'] chan_id = mask_annot[im_id][obj_id][inst]['np_channel_id'] mask = maskrcnn_scene_masks[im_id] inst_mask = mask[:, :, chan_id] else: obj_bb_est = scene_gt_info[str(im_id)][inst]['bbox_obj'] obj_id_est = obj_id score = 1.0 mask_p = os.path.join(mask_paths, '{:06d}_{:06d}.png'.format(im_id, inst)) inst_mask = inout.load_depth(mask_p) / 255. print((img.shape)) img_masked = img * inst_mask[..., None].astype(np.uint8) x, y, w, h = obj_bb_est xmin = float(x) / dp_split['im_size'][0] ymin = float(y) / dp_split['im_size'][1] xmax = float((x + w)) / dp_split['im_size'][0] ymax = float((y + h)) / dp_split['im_size'][1] det = [ BoundingBox(xmin, ymin, xmax, ymax, classes={obj_id_est: score}) ] if pose_refiner_method: depth_masked = depth_img * inst_mask