예제 #1
0
파일: dataset.py 프로젝트: btut/reagent
 def get_rgb(self, scene_id, im_id):
     dataset_path = os.path.join(cfg.LM_PATH, "test")
     scene_path = os.path.join(dataset_path, f"{scene_id:06d}")
     file_path = os.path.join(scene_path, f"rgb/{im_id:06d}.png")
     if os.path.exists(file_path):
         return bop_inout.load_im(file_path)[..., :3] / 255
     else:
         print(f"missing file: {file_path}")
         return np.zeros((480, 640, 3), dtype=np.float32)
예제 #2
0
파일: dataset.py 프로젝트: btut/reagent
 def get_seg(self, scene_id, im_id, gt_id):
     dataset_path = os.path.join(cfg.LM_PATH, "test")
     scene_path = os.path.join(dataset_path, f"{scene_id:06d}")
     file_path = os.path.join(scene_path,
                              f"mask_visib/{im_id:06d}_{gt_id:06d}.png")
     if os.path.exists(file_path):
         return bop_inout.load_im(file_path)
     else:
         print(f"missing file: {file_path}")
         return np.zeros((480, 640), dtype=np.uint8)
예제 #3
0
def create_patch_pair(depth_path, mask_path, im_cam, gt, save_name,
                      md_pcd_pts):
    raw_depth = io.load_depth(depth_path)
    mask = io.load_im(mask_path)

    img_pcd = PointCloud.create_from_depth_image(
        depth=Image(masked_where(mask == 0.0, raw_depth).filled(0.0)),
        intrinsic=PHCamIntrinsic(*IM_SIZE, *[im_cam['cam_K'][i] for i in K]),
        depth_scale=im_cam['depth_scale'],
        depth_trunc=150000)
    img_pcd.voxel_down_sample(VOXEL_SIZE)

    if np.asarray(img_pcd.points).shape[0] in PCD_PTS_RANGE or IS_TARGET:
        cam_R, cam_t = gt['cam_R_m2c'], gt['cam_t_m2c']

        # Select reference points on image using farthest point sampling
        img_pcd_pts_fps = torch.as_tensor(img_pcd.points).to(DEVICE)
        img_ref_idxs = fps(img_pcd_pts_fps, ratio=FPS_RATIO).to('cpu').numpy()

        # Calculate model reference points
        img_ref_pts = np.asarray(img_pcd.points)[img_ref_idxs]
        md_ref_pts = (img_ref_pts - cam_t.T) @ np.linalg.inv(cam_R).T

        # Recreate model point cloud
        md_ref_idxs = np.arange(md_ref_pts.shape[0])
        md_pcd_pts = np.concatenate([md_ref_pts, md_pcd_pts], axis=0)
        md_pcd = PointCloud()
        md_pcd.points = Vector3dVector(md_pcd_pts)

        # Calculate and save PPFs
        img_save_path = f'image/{save_name}'
        create_local_patches(img_pcd, img_ref_idxs, img_save_path)

        md_save_path = f'model/{save_name}'
        create_local_patches(md_pcd, md_ref_idxs, md_save_path)

        entry = [save_name, img_ref_idxs.shape[0]]
    else:
        entry = []

    return entry
예제 #4
0
        cam_path = test_dir+"/{:06d}/scene_camera.json".format(scene_id)
        cam_info = inout.load_scene_camera(cam_path)
        if(dummy_run):
            image_t = np.zeros((im_height,im_width,3),np.uint8)        
            for obj_id_target in obj_id_targets: #refreshing
                _,_,_,_,_,_ = obj_pix2pose[model_ids_list.index(obj_id_target)].est_pose(image_t,np.array([0,0,128,128],np.int))    
    
    prev_sid=scene_id #to avoid re-load scene_camera.json
    cam_param = cam_info[im_id]
    cam_K = cam_param['cam_K']
    depth_scale = cam_param['depth_scale'] #depth/1000 * depth_scale
    
    if(img_type=='gray'):
        rgb_path = test_dir+"/{:06d}/".format(scene_id)+img_type+\
                        "/{:06d}.tif".format(im_id)
        image_gray = inout.load_im(rgb_path)
        #copy gray values to three channels    
        image_t = np.zeros((image_gray.shape[0],image_gray.shape[1],3),dtype=np.uint8)
        image_t[:,:,:]= np.expand_dims(image_gray,axis=2)
    else:
        rgb_path = test_dir+"/{:06d}/".format(scene_id)+img_type+\
                        "/{:06d}.png".format(im_id)
        image_t = inout.load_im(rgb_path)            

    t1=time.time()
    inst_count_est=np.zeros((len(inst_counts)))
    inst_count_pred = np.zeros((len(inst_counts)))
    
    if(detect_type=='rcnn'):
        rois,obj_orders,obj_ids,scores,masks = get_rcnn_detection(image_t,model)
    elif(detect_type=='retinanet'):
예제 #5
0
    def add_object(self, obj_id, model_path, **kwargs):
        """See base class."""
        # Color of the object model (the original color saved with the object model
        # will be used if None).
        surf_color = None
        if 'surf_color' in kwargs:
            surf_color = kwargs['surf_color']

        # Load the object model.
        model = inout.load_ply(model_path)
        self.models[obj_id] = model

        # Calculate the 3D bounding box of the model (will be used to set the near
        # and far clipping plane).
        bb = misc.calc_3d_bbox(model['pts'][:, 0], model['pts'][:, 1],
                               model['pts'][:, 2])
        self.model_bbox_corners[obj_id] = np.array([
            [bb[0], bb[1], bb[2]],
            [bb[0], bb[1], bb[2] + bb[5]],
            [bb[0], bb[1] + bb[4], bb[2]],
            [bb[0], bb[1] + bb[4], bb[2] + bb[5]],
            [bb[0] + bb[3], bb[1], bb[2]],
            [bb[0] + bb[3], bb[1], bb[2] + bb[5]],
            [bb[0] + bb[3], bb[1] + bb[4], bb[2]],
            [bb[0] + bb[3], bb[1] + bb[4], bb[2] + bb[5]],
        ])

        # Set texture/color of vertices.
        self.model_textures[obj_id] = None

        # Use the specified uniform surface color.
        if surf_color is not None:
            colors = np.tile(
                list(surf_color) + [1.0], [model['pts'].shape[0], 1])

            # Set UV texture coordinates to dummy values.
            texture_uv = np.zeros((model['pts'].shape[0], 2), np.float32)

        # Use the model texture.
        elif 'texture_file' in self.models[obj_id].keys():
            model_texture_path = os.path.join(
                os.path.dirname(model_path),
                self.models[obj_id]['texture_file'])
            model_texture = inout.load_im(model_texture_path)

            # Normalize the texture image.
            if model_texture.max() > 1.0:
                model_texture = model_texture.astype(np.float32) / 255.0
            model_texture = np.flipud(model_texture)
            self.model_textures[obj_id] = model_texture

            # UV texture coordinates.
            texture_uv = model['texture_uv']

            # Set the per-vertex color to dummy values.
            colors = np.zeros((model['pts'].shape[0], 3), np.float32)

        # Use the original model color.
        elif 'colors' in model.keys():
            assert (model['pts'].shape[0] == model['colors'].shape[0])
            colors = model['colors']
            if colors.max() > 1.0:
                colors /= 255.0  # Color values are expected in range [0, 1].

            # Set UV texture coordinates to dummy values.
            texture_uv = np.zeros((model['pts'].shape[0], 2), np.float32)

        # Set the model color to gray.
        else:
            colors = np.ones((model['pts'].shape[0], 3), np.float32) * 0.5

            # Set UV texture coordinates to dummy values.
            texture_uv = np.zeros((model['pts'].shape[0], 2), np.float32)

        # Set the vertex data.
        if self.mode == 'depth':
            vertices_type = [('a_position', np.float32, 3),
                             ('a_color', np.float32, colors.shape[1])]
            vertices = np.array(list(zip(model['pts'], colors)), vertices_type)
        else:
            if self.shading == 'flat':
                vertices_type = [('a_position', np.float32, 3),
                                 ('a_color', np.float32, colors.shape[1]),
                                 ('a_texcoord', np.float32, 2)]
                vertices = np.array(
                    list(zip(model['pts'], colors, texture_uv)), vertices_type)
            elif self.shading == 'phong':
                vertices_type = [('a_position', np.float32, 3),
                                 ('a_normal', np.float32, 3),
                                 ('a_color', np.float32, colors.shape[1]),
                                 ('a_texcoord', np.float32, 2)]
                vertices = np.array(
                    list(
                        zip(model['pts'], model['normals'], colors,
                            texture_uv)), vertices_type)
            else:
                raise ValueError('Unknown shading type.')

        # Create vertex and index buffer for the loaded object model.
        self.vertex_buffers[obj_id] = vertices.view(gloo.VertexBuffer)
        self.index_buffers[obj_id] = \
          model['faces'].flatten().astype(np.uint32).view(gloo.IndexBuffer)

        # Set shader for the selected shading.
        if self.shading == 'flat':
            rgb_fragment_code = _rgb_fragment_flat_code
        elif self.shading == 'phong':
            rgb_fragment_code = _rgb_fragment_phong_code
        else:
            raise ValueError('Unknown shading type.')

        # Prepare the RGB OpenGL program.
        rgb_program = gloo.Program(_rgb_vertex_code, rgb_fragment_code)
        rgb_program.bind(self.vertex_buffers[obj_id])
        if self.model_textures[obj_id] is not None:
            rgb_program['u_use_texture'] = int(True)
            rgb_program['u_texture'] = self.model_textures[obj_id]
        else:
            rgb_program['u_use_texture'] = int(False)
            rgb_program['u_texture'] = np.zeros((1, 1, 4), np.float32)
        self.rgb_programs[obj_id] = rgb_program

        # Prepare the depth OpenGL program.
        depth_program = gloo.Program(_depth_vertex_code, _depth_fragment_code)
        depth_program.bind(self.vertex_buffers[obj_id])
        self.depth_programs[obj_id] = depth_program
예제 #6
0
                im_ests_vis.append(obj_ests_sorted)
                im_ests_vis_obj_ids.append(obj_id)

            # Join the per-object estimates if only one visualization is to be made.
            if not p['vis_per_obj_id']:
                im_ests_vis = [
                    list(itertools.chain.from_iterable(im_ests_vis))
                ]

            for ests_vis_id, ests_vis in enumerate(im_ests_vis):

                # Load the color and depth images and prepare images for rendering.
                rgb = None
                if p['vis_rgb']:
                    if 'rgb' in dp_split['im_modalities']:
                        rgb = inout.load_im(dp_split['rgb_tpath'].format(
                            scene_id=scene_id, im_id=im_id))[:, :, :3]
                    elif 'gray' in dp_split['im_modalities']:
                        gray = inout.load_im(dp_split['gray_tpath'].format(
                            scene_id=scene_id, im_id=im_id))
                        rgb = np.dstack([gray, gray, gray])
                    else:
                        raise ValueError('RGB nor gray images are available.')

                depth = None
                if p['vis_depth_diff'] or (p['vis_rgb']
                                           and p['vis_rgb_resolve_visib']):
                    depth = inout.load_depth(dp_split['depth_tpath'].format(
                        scene_id=scene_id, im_id=im_id))
                    depth *= scene_camera[im_id][
                        'depth_scale']  # Convert to [mm].
  misc.ensure_dir(os.path.dirname(out_scene_camera_tpath.format(
    out_path=out_path, obj_id=obj_id)))
  misc.ensure_dir(os.path.dirname(out_scene_gt_tpath.format(
    out_path=out_path, obj_id=obj_id)))

  # Load model.
  model_path = dp_model['model_tpath'].format(obj_id=obj_id)
  model = inout.load_ply(model_path)
  model_uv_path = dp_model['model_uv_tpath'].format(obj_id=obj_id)
  model_uv = inout.load_ply(model_uv_path)

  # Load model texture.
  if 'texture_file' in model:
    model_texture_path =\
      os.path.join(os.path.dirname(model_path), model['texture_file'])
    model_texture = inout.load_im(model_texture_path)
  else:
    model_texture = None
  model_uv_texture = None

  scene_camera = {}
  scene_gt = {}
  im_id = 0
  for radius in radii:
    # Sample viewpoints.
    view_sampler_mode = 'hinterstoisser'  # 'hinterstoisser' or 'fibonacci'.
    views, views_level = view_sampler.sample_views(
      min_n_views, radius, dp_split_test['azimuth_range'],
      dp_split_test['elev_range'], view_sampler_mode)

    misc.log('Sampled views: ' + str(len(views)))
예제 #8
0
 for img_id in range(len(rgb_files)):
     rgb_fn = rgb_files[img_id]
     gt = gts[img_id][0]
     obj_id = int(gt['obj_id'])
     z_tra = (gt['cam_t_m2c'] / 1000)[2, 0]
     z_tras.append(z_tra)
     filename = rgb_fn.split("/")[-1]
     if not (os.path.exists(crop_dir + "/{:02d}".format(obj_id))):
         os.makedirs(crop_dir + "/{:02d}".format(obj_id))
     if not (os.path.exists(cropmask_dir + "/{:02d}".format(obj_id))):
         os.makedirs(cropmask_dir + "/{:02d}".format(obj_id))
     crop_fn = os.path.join(crop_dir + "/{:02d}".format(obj_id), filename)
     cropmask_fn = os.path.join(cropmask_dir + "/{:02d}".format(obj_id),
                                filename)
     if not (os.path.exists(crop_fn)):
         img = inout.load_im(rgb_fn)
         mask = inout.load_im(mask_files[img_id]) > 0
         vu_valid = np.where(mask)
         bbox = np.array([
             np.min(vu_valid[0]),
             np.min(vu_valid[1]),
             np.max(vu_valid[0]),
             np.max(vu_valid[1])
         ])
         crop_img = np.zeros((bbox[2] - bbox[0], bbox[3] - bbox[1], 3),
                             np.uint8)
         img = img[bbox[0]:bbox[2], bbox[1]:bbox[3]]
         crop_img[mask[bbox[0]:bbox[2],
                       bbox[1]:bbox[3]]] = img[mask[bbox[0]:bbox[2],
                                                    bbox[1]:bbox[3]]]
         inout.save_im(crop_fn, crop_img)
예제 #9
0
    )
else:
    cfg_fn = sys.argv[1]  #"cfg/cfg_bop2019.json"
    cfg = inout.load_json(cfg_fn)

    dataset = sys.argv[2]
    bop_dir,source_dir,model_plys,model_info,model_ids,rgb_files,\
        depth_files,mask_files,gts,cam_param_global,scene_cam =\
             bop_io.get_dataset(cfg,dataset,incl_param=True)

    xyz_target_dir = bop_dir + "/train_xyz"
    im_width, im_height = cam_param_global['im_size']
    cam_K = cam_param_global['K']
    #check if the image dimension is the same
    rgb_fn = rgb_files[0]
    img_temp = inout.load_im(rgb_fn)
    if (img_temp.shape[0] != im_height or img_temp.shape[1] != im_width):
        print("the size of training images is different from test images")
        im_height = img_temp.shape[0]
        im_width = img_temp.shape[1]

    ren = Renderer((im_width, im_height), cam_K)
t_model = -1
if (len(sys.argv) == 3):
    print("render only this obj:", sys.argv[2])
    t_model = int(sys.argv[2])

for m_id, model_id in enumerate(model_ids):
    if (t_model != -1 and model_id != t_model):
        continue
    m_info = model_info['{}'.format(model_id)]
예제 #10
0
def create_tf_example(example,
                      dp_split,
                      scene_camera,
                      scene_gt=None,
                      scene_gt_info=None):

    scene_id = example['scene_id']
    im_id = example['im_id']
    width = dp_split['im_size'][0]
    height = dp_split['im_size'][1]
    K = scene_camera[scene_id][im_id]['cam_K']

    gts = None
    gts_info = None
    mask_visib_fpaths = None
    if FLAGS.add_gt:
        gts = scene_gt[scene_id][im_id]
        gts_info = scene_gt_info[scene_id][im_id]

        # Collect paths to object masks.
        mask_visib_fpaths = []
        for gt_id in range(len(gts)):
            mask_visib_fpaths.append(dp_split['mask_visib_tpath'].format(
                scene_id=scene_id, im_id=im_id, gt_id=gt_id))

    # RGB image.
    im_path = None
    rgb_encoded = None
    if 'rgb' in dp_split['im_modalities']:

        # Absolute path to the RGB image.
        im_path = dp_split['rgb_tpath'].format(scene_id=scene_id, im_id=im_id)

        # Determine the format of the RGB image.
        rgb_format_in = im_path.split('.')[-1]
        if rgb_format_in in ['jpg', 'jpeg']:
            rgb_format_in = 'jpg'

        # Load the RGB image.
        if rgb_format_in == FLAGS.rgb_format:
            with tf.gfile.GFile(im_path, 'rb') as fid:
                rgb_encoded = fid.read()
        else:
            rgb = inout.load_im(im_path)
            rgb_encoded = encode_image(rgb, FLAGS.rgb_format)

    # Grayscale image.
    elif 'gray' in dp_split['im_modalities']:

        # Absolute path to the grayscale image.
        im_path = dp_split['gray_tpath'].format(scene_id=scene_id, im_id=im_id)

        # Load the grayscale image and duplicate the channel.
        gray = inout.load_im(im_path)
        rgb = np.dstack([gray, gray, gray])
        rgb_encoded = encode_image(rgb, FLAGS.rgb_format)

    # Path of the image relative to BOP_PATH.
    im_path_rel = im_path.split(config.BOP_PATH)[1]
    im_path_rel_encoded = im_path_rel.encode('utf8')

    # Collect ground-truth information about the annotated object instances.
    pose_q1, pose_q2, pose_q3, pose_q4 = [], [], [], []
    pose_t1, pose_t2, pose_t3, t4 = [], [], [], []
    obj_ids = []
    obj_ids_txt = []
    obj_visibilities = []
    masks_visib_encoded = []
    if FLAGS.add_gt:
        for gt_id, gt in enumerate(gts):

            # Orientation of the object instance.
            R = np.eye(4)
            R[:3, :3] = gt['cam_R_m2c']
            q = transform.quaternion_from_matrix(R)
            pose_q1.append(q[0])
            pose_q2.append(q[1])
            pose_q3.append(q[2])
            pose_q4.append(q[3])

            # Translation of the object instance.
            t = gt['cam_t_m2c'].flatten()
            pose_t1.append(t[0])
            pose_t2.append(t[1])
            pose_t3.append(t[2])

            obj_ids_txt.append(str(gt['obj_id']).encode('utf8'))
            obj_ids.append(int(gt['obj_id']))
            obj_visibilities.append(float(gts_info[gt_id]['visib_fract']))

            # Mask of the visible part of the object instance.
            with tf.gfile.GFile(mask_visib_fpaths[gt_id], 'rb') as fid:
                mask_visib_encoded_png = fid.read()
                masks_visib_encoded.append(mask_visib_encoded_png)

    # Intrinsic camera parameters.
    fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]

    # TF Example.
    feature = {
        'image/scene_id': tfrecord.int64_list_feature(scene_id),
        'image/im_id': tfrecord.int64_list_feature(im_id),
        'image/path': tfrecord.bytes_list_feature(im_path_rel_encoded),
        'image/encoded': tfrecord.bytes_list_feature(rgb_encoded),
        'image/width': tfrecord.int64_list_feature(width),
        'image/height': tfrecord.int64_list_feature(height),
        'image/channels': tfrecord.int64_list_feature(3),
        'image/camera/fx': tfrecord.float_list_feature([fx]),
        'image/camera/fy': tfrecord.float_list_feature([fy]),
        'image/camera/cx': tfrecord.float_list_feature([cx]),
        'image/camera/cy': tfrecord.float_list_feature([cy]),
        'image/object/id': tfrecord.int64_list_feature(obj_ids),
        'image/object/visibility':
        tfrecord.float_list_feature(obj_visibilities),
        'image/object/pose/q1': tfrecord.float_list_feature(pose_q1),
        'image/object/pose/q2': tfrecord.float_list_feature(pose_q2),
        'image/object/pose/q3': tfrecord.float_list_feature(pose_q3),
        'image/object/pose/q4': tfrecord.float_list_feature(pose_q4),
        'image/object/pose/t1': tfrecord.float_list_feature(pose_t1),
        'image/object/pose/t2': tfrecord.float_list_feature(pose_t2),
        'image/object/pose/t3': tfrecord.float_list_feature(pose_t3),
        'image/object/mask': tfrecord.bytes_list_feature(masks_visib_encoded),
    }
    tf_example = tf.train.Example(features=tf.train.Features(feature=feature))

    res = tf_example.SerializeToString()
    return res, example