def get_rgb(self, scene_id, im_id): dataset_path = os.path.join(cfg.LM_PATH, "test") scene_path = os.path.join(dataset_path, f"{scene_id:06d}") file_path = os.path.join(scene_path, f"rgb/{im_id:06d}.png") if os.path.exists(file_path): return bop_inout.load_im(file_path)[..., :3] / 255 else: print(f"missing file: {file_path}") return np.zeros((480, 640, 3), dtype=np.float32)
def get_seg(self, scene_id, im_id, gt_id): dataset_path = os.path.join(cfg.LM_PATH, "test") scene_path = os.path.join(dataset_path, f"{scene_id:06d}") file_path = os.path.join(scene_path, f"mask_visib/{im_id:06d}_{gt_id:06d}.png") if os.path.exists(file_path): return bop_inout.load_im(file_path) else: print(f"missing file: {file_path}") return np.zeros((480, 640), dtype=np.uint8)
def create_patch_pair(depth_path, mask_path, im_cam, gt, save_name, md_pcd_pts): raw_depth = io.load_depth(depth_path) mask = io.load_im(mask_path) img_pcd = PointCloud.create_from_depth_image( depth=Image(masked_where(mask == 0.0, raw_depth).filled(0.0)), intrinsic=PHCamIntrinsic(*IM_SIZE, *[im_cam['cam_K'][i] for i in K]), depth_scale=im_cam['depth_scale'], depth_trunc=150000) img_pcd.voxel_down_sample(VOXEL_SIZE) if np.asarray(img_pcd.points).shape[0] in PCD_PTS_RANGE or IS_TARGET: cam_R, cam_t = gt['cam_R_m2c'], gt['cam_t_m2c'] # Select reference points on image using farthest point sampling img_pcd_pts_fps = torch.as_tensor(img_pcd.points).to(DEVICE) img_ref_idxs = fps(img_pcd_pts_fps, ratio=FPS_RATIO).to('cpu').numpy() # Calculate model reference points img_ref_pts = np.asarray(img_pcd.points)[img_ref_idxs] md_ref_pts = (img_ref_pts - cam_t.T) @ np.linalg.inv(cam_R).T # Recreate model point cloud md_ref_idxs = np.arange(md_ref_pts.shape[0]) md_pcd_pts = np.concatenate([md_ref_pts, md_pcd_pts], axis=0) md_pcd = PointCloud() md_pcd.points = Vector3dVector(md_pcd_pts) # Calculate and save PPFs img_save_path = f'image/{save_name}' create_local_patches(img_pcd, img_ref_idxs, img_save_path) md_save_path = f'model/{save_name}' create_local_patches(md_pcd, md_ref_idxs, md_save_path) entry = [save_name, img_ref_idxs.shape[0]] else: entry = [] return entry
cam_path = test_dir+"/{:06d}/scene_camera.json".format(scene_id) cam_info = inout.load_scene_camera(cam_path) if(dummy_run): image_t = np.zeros((im_height,im_width,3),np.uint8) for obj_id_target in obj_id_targets: #refreshing _,_,_,_,_,_ = obj_pix2pose[model_ids_list.index(obj_id_target)].est_pose(image_t,np.array([0,0,128,128],np.int)) prev_sid=scene_id #to avoid re-load scene_camera.json cam_param = cam_info[im_id] cam_K = cam_param['cam_K'] depth_scale = cam_param['depth_scale'] #depth/1000 * depth_scale if(img_type=='gray'): rgb_path = test_dir+"/{:06d}/".format(scene_id)+img_type+\ "/{:06d}.tif".format(im_id) image_gray = inout.load_im(rgb_path) #copy gray values to three channels image_t = np.zeros((image_gray.shape[0],image_gray.shape[1],3),dtype=np.uint8) image_t[:,:,:]= np.expand_dims(image_gray,axis=2) else: rgb_path = test_dir+"/{:06d}/".format(scene_id)+img_type+\ "/{:06d}.png".format(im_id) image_t = inout.load_im(rgb_path) t1=time.time() inst_count_est=np.zeros((len(inst_counts))) inst_count_pred = np.zeros((len(inst_counts))) if(detect_type=='rcnn'): rois,obj_orders,obj_ids,scores,masks = get_rcnn_detection(image_t,model) elif(detect_type=='retinanet'):
def add_object(self, obj_id, model_path, **kwargs): """See base class.""" # Color of the object model (the original color saved with the object model # will be used if None). surf_color = None if 'surf_color' in kwargs: surf_color = kwargs['surf_color'] # Load the object model. model = inout.load_ply(model_path) self.models[obj_id] = model # Calculate the 3D bounding box of the model (will be used to set the near # and far clipping plane). bb = misc.calc_3d_bbox(model['pts'][:, 0], model['pts'][:, 1], model['pts'][:, 2]) self.model_bbox_corners[obj_id] = np.array([ [bb[0], bb[1], bb[2]], [bb[0], bb[1], bb[2] + bb[5]], [bb[0], bb[1] + bb[4], bb[2]], [bb[0], bb[1] + bb[4], bb[2] + bb[5]], [bb[0] + bb[3], bb[1], bb[2]], [bb[0] + bb[3], bb[1], bb[2] + bb[5]], [bb[0] + bb[3], bb[1] + bb[4], bb[2]], [bb[0] + bb[3], bb[1] + bb[4], bb[2] + bb[5]], ]) # Set texture/color of vertices. self.model_textures[obj_id] = None # Use the specified uniform surface color. if surf_color is not None: colors = np.tile( list(surf_color) + [1.0], [model['pts'].shape[0], 1]) # Set UV texture coordinates to dummy values. texture_uv = np.zeros((model['pts'].shape[0], 2), np.float32) # Use the model texture. elif 'texture_file' in self.models[obj_id].keys(): model_texture_path = os.path.join( os.path.dirname(model_path), self.models[obj_id]['texture_file']) model_texture = inout.load_im(model_texture_path) # Normalize the texture image. if model_texture.max() > 1.0: model_texture = model_texture.astype(np.float32) / 255.0 model_texture = np.flipud(model_texture) self.model_textures[obj_id] = model_texture # UV texture coordinates. texture_uv = model['texture_uv'] # Set the per-vertex color to dummy values. colors = np.zeros((model['pts'].shape[0], 3), np.float32) # Use the original model color. elif 'colors' in model.keys(): assert (model['pts'].shape[0] == model['colors'].shape[0]) colors = model['colors'] if colors.max() > 1.0: colors /= 255.0 # Color values are expected in range [0, 1]. # Set UV texture coordinates to dummy values. texture_uv = np.zeros((model['pts'].shape[0], 2), np.float32) # Set the model color to gray. else: colors = np.ones((model['pts'].shape[0], 3), np.float32) * 0.5 # Set UV texture coordinates to dummy values. texture_uv = np.zeros((model['pts'].shape[0], 2), np.float32) # Set the vertex data. if self.mode == 'depth': vertices_type = [('a_position', np.float32, 3), ('a_color', np.float32, colors.shape[1])] vertices = np.array(list(zip(model['pts'], colors)), vertices_type) else: if self.shading == 'flat': vertices_type = [('a_position', np.float32, 3), ('a_color', np.float32, colors.shape[1]), ('a_texcoord', np.float32, 2)] vertices = np.array( list(zip(model['pts'], colors, texture_uv)), vertices_type) elif self.shading == 'phong': vertices_type = [('a_position', np.float32, 3), ('a_normal', np.float32, 3), ('a_color', np.float32, colors.shape[1]), ('a_texcoord', np.float32, 2)] vertices = np.array( list( zip(model['pts'], model['normals'], colors, texture_uv)), vertices_type) else: raise ValueError('Unknown shading type.') # Create vertex and index buffer for the loaded object model. self.vertex_buffers[obj_id] = vertices.view(gloo.VertexBuffer) self.index_buffers[obj_id] = \ model['faces'].flatten().astype(np.uint32).view(gloo.IndexBuffer) # Set shader for the selected shading. if self.shading == 'flat': rgb_fragment_code = _rgb_fragment_flat_code elif self.shading == 'phong': rgb_fragment_code = _rgb_fragment_phong_code else: raise ValueError('Unknown shading type.') # Prepare the RGB OpenGL program. rgb_program = gloo.Program(_rgb_vertex_code, rgb_fragment_code) rgb_program.bind(self.vertex_buffers[obj_id]) if self.model_textures[obj_id] is not None: rgb_program['u_use_texture'] = int(True) rgb_program['u_texture'] = self.model_textures[obj_id] else: rgb_program['u_use_texture'] = int(False) rgb_program['u_texture'] = np.zeros((1, 1, 4), np.float32) self.rgb_programs[obj_id] = rgb_program # Prepare the depth OpenGL program. depth_program = gloo.Program(_depth_vertex_code, _depth_fragment_code) depth_program.bind(self.vertex_buffers[obj_id]) self.depth_programs[obj_id] = depth_program
im_ests_vis.append(obj_ests_sorted) im_ests_vis_obj_ids.append(obj_id) # Join the per-object estimates if only one visualization is to be made. if not p['vis_per_obj_id']: im_ests_vis = [ list(itertools.chain.from_iterable(im_ests_vis)) ] for ests_vis_id, ests_vis in enumerate(im_ests_vis): # Load the color and depth images and prepare images for rendering. rgb = None if p['vis_rgb']: if 'rgb' in dp_split['im_modalities']: rgb = inout.load_im(dp_split['rgb_tpath'].format( scene_id=scene_id, im_id=im_id))[:, :, :3] elif 'gray' in dp_split['im_modalities']: gray = inout.load_im(dp_split['gray_tpath'].format( scene_id=scene_id, im_id=im_id)) rgb = np.dstack([gray, gray, gray]) else: raise ValueError('RGB nor gray images are available.') depth = None if p['vis_depth_diff'] or (p['vis_rgb'] and p['vis_rgb_resolve_visib']): depth = inout.load_depth(dp_split['depth_tpath'].format( scene_id=scene_id, im_id=im_id)) depth *= scene_camera[im_id][ 'depth_scale'] # Convert to [mm].
misc.ensure_dir(os.path.dirname(out_scene_camera_tpath.format( out_path=out_path, obj_id=obj_id))) misc.ensure_dir(os.path.dirname(out_scene_gt_tpath.format( out_path=out_path, obj_id=obj_id))) # Load model. model_path = dp_model['model_tpath'].format(obj_id=obj_id) model = inout.load_ply(model_path) model_uv_path = dp_model['model_uv_tpath'].format(obj_id=obj_id) model_uv = inout.load_ply(model_uv_path) # Load model texture. if 'texture_file' in model: model_texture_path =\ os.path.join(os.path.dirname(model_path), model['texture_file']) model_texture = inout.load_im(model_texture_path) else: model_texture = None model_uv_texture = None scene_camera = {} scene_gt = {} im_id = 0 for radius in radii: # Sample viewpoints. view_sampler_mode = 'hinterstoisser' # 'hinterstoisser' or 'fibonacci'. views, views_level = view_sampler.sample_views( min_n_views, radius, dp_split_test['azimuth_range'], dp_split_test['elev_range'], view_sampler_mode) misc.log('Sampled views: ' + str(len(views)))
for img_id in range(len(rgb_files)): rgb_fn = rgb_files[img_id] gt = gts[img_id][0] obj_id = int(gt['obj_id']) z_tra = (gt['cam_t_m2c'] / 1000)[2, 0] z_tras.append(z_tra) filename = rgb_fn.split("/")[-1] if not (os.path.exists(crop_dir + "/{:02d}".format(obj_id))): os.makedirs(crop_dir + "/{:02d}".format(obj_id)) if not (os.path.exists(cropmask_dir + "/{:02d}".format(obj_id))): os.makedirs(cropmask_dir + "/{:02d}".format(obj_id)) crop_fn = os.path.join(crop_dir + "/{:02d}".format(obj_id), filename) cropmask_fn = os.path.join(cropmask_dir + "/{:02d}".format(obj_id), filename) if not (os.path.exists(crop_fn)): img = inout.load_im(rgb_fn) mask = inout.load_im(mask_files[img_id]) > 0 vu_valid = np.where(mask) bbox = np.array([ np.min(vu_valid[0]), np.min(vu_valid[1]), np.max(vu_valid[0]), np.max(vu_valid[1]) ]) crop_img = np.zeros((bbox[2] - bbox[0], bbox[3] - bbox[1], 3), np.uint8) img = img[bbox[0]:bbox[2], bbox[1]:bbox[3]] crop_img[mask[bbox[0]:bbox[2], bbox[1]:bbox[3]]] = img[mask[bbox[0]:bbox[2], bbox[1]:bbox[3]]] inout.save_im(crop_fn, crop_img)
) else: cfg_fn = sys.argv[1] #"cfg/cfg_bop2019.json" cfg = inout.load_json(cfg_fn) dataset = sys.argv[2] bop_dir,source_dir,model_plys,model_info,model_ids,rgb_files,\ depth_files,mask_files,gts,cam_param_global,scene_cam =\ bop_io.get_dataset(cfg,dataset,incl_param=True) xyz_target_dir = bop_dir + "/train_xyz" im_width, im_height = cam_param_global['im_size'] cam_K = cam_param_global['K'] #check if the image dimension is the same rgb_fn = rgb_files[0] img_temp = inout.load_im(rgb_fn) if (img_temp.shape[0] != im_height or img_temp.shape[1] != im_width): print("the size of training images is different from test images") im_height = img_temp.shape[0] im_width = img_temp.shape[1] ren = Renderer((im_width, im_height), cam_K) t_model = -1 if (len(sys.argv) == 3): print("render only this obj:", sys.argv[2]) t_model = int(sys.argv[2]) for m_id, model_id in enumerate(model_ids): if (t_model != -1 and model_id != t_model): continue m_info = model_info['{}'.format(model_id)]
def create_tf_example(example, dp_split, scene_camera, scene_gt=None, scene_gt_info=None): scene_id = example['scene_id'] im_id = example['im_id'] width = dp_split['im_size'][0] height = dp_split['im_size'][1] K = scene_camera[scene_id][im_id]['cam_K'] gts = None gts_info = None mask_visib_fpaths = None if FLAGS.add_gt: gts = scene_gt[scene_id][im_id] gts_info = scene_gt_info[scene_id][im_id] # Collect paths to object masks. mask_visib_fpaths = [] for gt_id in range(len(gts)): mask_visib_fpaths.append(dp_split['mask_visib_tpath'].format( scene_id=scene_id, im_id=im_id, gt_id=gt_id)) # RGB image. im_path = None rgb_encoded = None if 'rgb' in dp_split['im_modalities']: # Absolute path to the RGB image. im_path = dp_split['rgb_tpath'].format(scene_id=scene_id, im_id=im_id) # Determine the format of the RGB image. rgb_format_in = im_path.split('.')[-1] if rgb_format_in in ['jpg', 'jpeg']: rgb_format_in = 'jpg' # Load the RGB image. if rgb_format_in == FLAGS.rgb_format: with tf.gfile.GFile(im_path, 'rb') as fid: rgb_encoded = fid.read() else: rgb = inout.load_im(im_path) rgb_encoded = encode_image(rgb, FLAGS.rgb_format) # Grayscale image. elif 'gray' in dp_split['im_modalities']: # Absolute path to the grayscale image. im_path = dp_split['gray_tpath'].format(scene_id=scene_id, im_id=im_id) # Load the grayscale image and duplicate the channel. gray = inout.load_im(im_path) rgb = np.dstack([gray, gray, gray]) rgb_encoded = encode_image(rgb, FLAGS.rgb_format) # Path of the image relative to BOP_PATH. im_path_rel = im_path.split(config.BOP_PATH)[1] im_path_rel_encoded = im_path_rel.encode('utf8') # Collect ground-truth information about the annotated object instances. pose_q1, pose_q2, pose_q3, pose_q4 = [], [], [], [] pose_t1, pose_t2, pose_t3, t4 = [], [], [], [] obj_ids = [] obj_ids_txt = [] obj_visibilities = [] masks_visib_encoded = [] if FLAGS.add_gt: for gt_id, gt in enumerate(gts): # Orientation of the object instance. R = np.eye(4) R[:3, :3] = gt['cam_R_m2c'] q = transform.quaternion_from_matrix(R) pose_q1.append(q[0]) pose_q2.append(q[1]) pose_q3.append(q[2]) pose_q4.append(q[3]) # Translation of the object instance. t = gt['cam_t_m2c'].flatten() pose_t1.append(t[0]) pose_t2.append(t[1]) pose_t3.append(t[2]) obj_ids_txt.append(str(gt['obj_id']).encode('utf8')) obj_ids.append(int(gt['obj_id'])) obj_visibilities.append(float(gts_info[gt_id]['visib_fract'])) # Mask of the visible part of the object instance. with tf.gfile.GFile(mask_visib_fpaths[gt_id], 'rb') as fid: mask_visib_encoded_png = fid.read() masks_visib_encoded.append(mask_visib_encoded_png) # Intrinsic camera parameters. fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] # TF Example. feature = { 'image/scene_id': tfrecord.int64_list_feature(scene_id), 'image/im_id': tfrecord.int64_list_feature(im_id), 'image/path': tfrecord.bytes_list_feature(im_path_rel_encoded), 'image/encoded': tfrecord.bytes_list_feature(rgb_encoded), 'image/width': tfrecord.int64_list_feature(width), 'image/height': tfrecord.int64_list_feature(height), 'image/channels': tfrecord.int64_list_feature(3), 'image/camera/fx': tfrecord.float_list_feature([fx]), 'image/camera/fy': tfrecord.float_list_feature([fy]), 'image/camera/cx': tfrecord.float_list_feature([cx]), 'image/camera/cy': tfrecord.float_list_feature([cy]), 'image/object/id': tfrecord.int64_list_feature(obj_ids), 'image/object/visibility': tfrecord.float_list_feature(obj_visibilities), 'image/object/pose/q1': tfrecord.float_list_feature(pose_q1), 'image/object/pose/q2': tfrecord.float_list_feature(pose_q2), 'image/object/pose/q3': tfrecord.float_list_feature(pose_q3), 'image/object/pose/q4': tfrecord.float_list_feature(pose_q4), 'image/object/pose/t1': tfrecord.float_list_feature(pose_t1), 'image/object/pose/t2': tfrecord.float_list_feature(pose_t2), 'image/object/pose/t3': tfrecord.float_list_feature(pose_t3), 'image/object/mask': tfrecord.bytes_list_feature(masks_visib_encoded), } tf_example = tf.train.Example(features=tf.train.Features(feature=feature)) res = tf_example.SerializeToString() return res, example