コード例 #1
0
def estimate_camera_poses(TC1Oa, TC2Ob, labels_ab, TC1Og, TC2Od, labels_gd,
                          mesh_db):
    # Assume (TC1Oa and TC2Ob), (TC1Og, TC2Od) are the same.
    # Notation differ from the paper, paper(code)
    # we have 1(a), 2(b), a(alpha), b(beta), g(gamma), d(delta)
    bsz = TC1Oa.shape[0]
    assert TC1Oa.shape == (bsz, 4, 4)
    assert TC2Ob.shape == (bsz, 4, 4)
    assert TC1Og.shape == (bsz, 4, 4)
    assert TC2Od.shape == (bsz, 4, 4)
    assert len(labels_ab) == bsz
    assert len(labels_gd) == bsz
    TObC2 = invert_T(TC2Ob)

    meshes_ab = mesh_db.select(labels_ab)
    ids_expand, sym_ids = expand_ids_for_symmetry(labels_ab,
                                                  mesh_db.n_sym_mapping)
    sym_expand = meshes_ab.symmetries[ids_expand, sym_ids]

    dist_fn = symmetric_distance_batched_fast
    dists, _ = dist_fn(TC1Og[ids_expand],
                       (TC1Oa[ids_expand] @ sym_expand @ TObC2[ids_expand])
                       @ TC2Od[ids_expand], labels_gd[ids_expand], mesh_db)
    min_ids = scatter_argmin(dists, ids_expand)
    S_Oa_star = meshes_ab.symmetries[torch.arange(len(min_ids)),
                                     sym_ids[min_ids]]
    TC1C2 = TC1Oa @ S_Oa_star @ TObC2
    return TC1C2
コード例 #2
0
ファイル: data_utils.py プロジェクト: greatwallet/cosypose
def parse_obs_data(obs):
    data = defaultdict(list)
    frame_info = obs['frame_info']
    TWC = torch.as_tensor(obs['camera']['TWC']).float()
    for n, obj in enumerate(obs['objects']):
        info = dict(frame_obj_id=n,
                    label=obj['name'],
                    visib_fract=obj.get('visib_fract', 1),
                    scene_id=frame_info['scene_id'],
                    view_id=frame_info['view_id'])
        data['infos'].append(info)
        data['TWO'].append(obj['TWO'])
        data['bboxes'].append(obj['bbox'])

    for k, v in data.items():
        if k != 'infos':
            data[k] = torch.stack([torch.as_tensor(x).float() for x in v])

    data['infos'] = pd.DataFrame(data['infos'])
    TCO = invert_T(TWC).unsqueeze(0) @ data['TWO']

    data = tc.PandasTensorCollection(
        infos=data['infos'],
        TCO=TCO,
        bboxes=data['bboxes'],
        poses=TCO,
    )
    return data
コード例 #3
0
    def render(self,
               obj_infos,
               TCO,
               K,
               resolution=(240, 320),
               render_depth=False):
        TCO = torch.as_tensor(TCO).detach()
        TOC = invert_T(TCO).cpu().numpy()
        K = torch.as_tensor(K).cpu().numpy()
        bsz = len(TCO)
        assert TCO.shape == (bsz, 4, 4)
        assert K.shape == (bsz, 3, 3)

        # NOTE: Could be faster with pytorch 3.8's sharedmemory
        for n in np.arange(bsz):
            obj_info = dict(name=obj_infos[n]['name'], TWO=np.eye(4))
            cam_info = dict(
                resolution=resolution,
                K=K[n],
                TWC=TOC[n],
            )
            kwargs = dict(cam_infos=[cam_info],
                          obj_infos=[obj_info],
                          render_depth=render_depth)
            if self.n_workers > 0:
                kwargs['data_id'] = n
                self.in_queue.put(kwargs)
            else:
                cam_obs = self.plotters[0].render_scene(**kwargs)
                images = np.stack([d['rgb'] for d in cam_obs])
                depth = np.stack([d['depth']
                                  for d in cam_obs]) if render_depth else None
                self.out_queue.put((n, images, depth))

        images = [None for _ in np.arange(bsz)]
        depths = [None for _ in np.arange(bsz)]
        for n in np.arange(bsz):
            data_id, im, depth = self.out_queue.get()
            images[data_id] = im[0]
            if render_depth:
                depths[data_id] = depth[0]
        images = torch.as_tensor(np.stack(
            images, axis=0)).pin_memory().cuda(non_blocking=True)
        images = images.float().permute(0, 3, 1, 2) / 255

        if render_depth:
            depths = torch.as_tensor(np.stack(
                depths, axis=0)).pin_memory().cuda(non_blocking=True)
            depths = depths.float()
            return images, depths
        else:
            return images
コード例 #4
0
 def make_scene_infos(self, TWO_9d, TCW_9d):
     TWO = compute_transform_from_pose9d(TWO_9d)
     TCW = compute_transform_from_pose9d(TCW_9d)
     TWC = invert_T(TCW)
     objects = tc.PandasTensorCollection(
         infos=self.obj_infos,
         TWO=TWO,
     )
     cameras = tc.PandasTensorCollection(
         infos=self.cam_infos,
         TWC=TWC,
         K=self.K
     )
     return objects, cameras
コード例 #5
0
 def robust_initialization_TWO_TCW(self, n_init=1):
     TWO_9d_init = []
     TCW_9d_init = []
     dists = []
     for n in range(n_init):
         TWO, TWC = self.sample_initial_TWO_TWC(n)
         TCW = invert_T(TWC)
         TWO_9d, TCW_9d = self.extract_pose9d(TWO), self.extract_pose9d(TCW)
         dists_, _ = self.align_TCO_cand(TWO_9d, TCW_9d)
         TWO_9d_init.append(TWO_9d)
         TCW_9d_init.append(TCW_9d)
         dists.append(dists_.mean())
     best_iter = torch.tensor(dists).argmin()
     return TWO_9d_init[best_iter], TCW_9d_init[best_iter]
コード例 #6
0
    def __init__(self, candidates, cameras, pairs_TC1C2, mesh_db):

        self.device, self.dtype = candidates.device, candidates.poses.dtype
        self.mesh_db = mesh_db
        cameras = cameras.to(self.device).to(self.dtype)
        pairs_TC1C2 = pairs_TC1C2.to(self.device).to(self.dtype)

        view_ids = np.unique(candidates.infos['view_id'])
        keep_ids = np.logical_and(
            np.isin(pairs_TC1C2.infos['view1'], view_ids),
            np.isin(pairs_TC1C2.infos['view2'], view_ids),
        )
        pairs_TC1C2 = pairs_TC1C2[np.where(keep_ids)[0]]

        keep_ids = np.where(np.isin(cameras.infos['view_id'], view_ids))[0]
        cameras = cameras[keep_ids]

        self.cam_infos = cameras.infos
        self.view_to_id = {view_id: n for n, view_id in enumerate(self.cam_infos['view_id'])}
        self.K = cameras.K
        self.n_views = len(self.cam_infos)

        self.obj_infos = make_obj_infos(candidates)
        self.obj_to_id = {obj_id: n for n, obj_id in enumerate(self.obj_infos['obj_id'])}
        self.obj_points = self.mesh_db.select(self.obj_infos['label'].values).points
        self.n_points = self.obj_points.shape[1]
        self.n_objects = len(self.obj_infos)

        self.cand = candidates
        self.cand_TCO = candidates.poses
        self.cand_labels = candidates.infos['label']
        self.cand_view_ids = [self.view_to_id[view_id] for view_id in candidates.infos['view_id']]
        self.cand_obj_ids = [self.obj_to_id[obj_id] for obj_id in candidates.infos['obj_id']]
        self.n_candidates = len(self.cand_TCO)
        self.visibility_matrix = self.make_visibility_matrix(self.cand_view_ids, self.cand_obj_ids)

        self.v2v1_TC2C1_map = {(self.view_to_id[v2], self.view_to_id[v1]): invert_T(TC1C2) for
                               (v1, v2, TC1C2) in zip(pairs_TC1C2.infos['view1'],
                                                      pairs_TC1C2.infos['view2'],
                                                      pairs_TC1C2.TC1C2)}
        self.ov_TCO_cand_map = {(o, v): TCO for (o, v, TCO) in zip(self.cand_obj_ids,
                                                                   self.cand_view_ids,
                                                                   self.cand_TCO)}
        self.residuals_ids = self.make_residuals_ids()
コード例 #7
0
 def reproject_scene(self, objects, cameras):
     TCO_data = []
     for o in range(len(objects)):
         for v in range(len(cameras)):
             obj = objects[[o]]
             cam = cameras[[v]]
             infos = dict(
                 scene_id=cam.infos['scene_id'].values,
                 view_id=cam.infos['view_id'].values,
                 score=obj.infos['score'].values + 1.0,
                 view_group=obj.infos['view_group'].values,
                 label=obj.infos['label'].values,
                 batch_im_id=cam.infos['batch_im_id'].values,
                 obj_id=obj.infos['obj_id'].values,
                 from_ba=[True],
             )
             data_ = tc.PandasTensorCollection(
                 infos=pd.DataFrame(infos),
                 poses=invert_T(cam.TWC) @ obj.TWO,
             )
             TCO_data.append(data_)
     return tc.concatenate(TCO_data)
コード例 #8
0
def make_scene_renderings(objects, cameras, urdf_ds_name, distance=1.5, theta=np.pi/4, angles=[0],
                          object_scale=1.0, camera_scale=1.5, background_color=(242, 231, 191),
                          show_cameras=False,
                          resolution=(640, 480), colormap_rgb=None, object_id_ref=0,
                          gui=False,
                          use_nms3d=True,
                          camera_color=(0.2, 0.2, 0.2, 1.0)):

    renderer = BulletSceneRenderer([urdf_ds_name, 'camera'], background_color=background_color, gui=gui)
    urdf_ds = renderer.body_cache.urdf_ds

    # Patch the scales for visualization
    is_camera = np.array(['camera' in label for label in urdf_ds.index['label']])
    urdf_ds.index.loc[~is_camera, 'scale'] = object_scale * 0.001
    urdf_ds.index.loc[is_camera, 'scale'] = camera_scale

    if use_nms3d:
        objects = nms3d(objects, poses_attr='TWO', th=0.04)
    objects = objects.cpu()
    objects.TWO = objects.poses

    if colormap_rgb is None:
        colormap_rgb, _ = make_colormaps(objects.infos['label'])
    objects.infos['color'] = objects.infos['label'].apply(lambda k: colormap_rgb[k])

    cameras = cameras.cpu()
    TWWB = objects.poses[object_id_ref]

    cam = cameras[[0]]
    TCWB = invert_T(cam.TWC.squeeze(0)) @ TWWB
    TWBC = invert_T(TCWB)
    if TWBC[2, -1] < 0:
        quat = euler2quat([np.pi, 0, 0])
        TWWB = Transform(TWWB.numpy()) * Transform(quat, np.zeros(3))
        TWWB = TWWB.toHomogeneousMatrix()
    TWWB = np.asarray(TWWB)

    list_objects = []
    for obj_id in range(len(objects)):
        TWO = np.linalg.inv(TWWB) @ objects.TWO[obj_id].numpy()
        TWO[:3, -1] *= object_scale
        obj = dict(
            name=objects.infos.loc[obj_id, 'label'],
            color=objects.infos.loc[obj_id, 'color'],
            TWO=TWO,
        )
        list_objects.append(obj)
    target = np.mean(np.stack([obj['TWO'][:3, -1] for obj in list_objects]), axis=0)

    if show_cameras:
        for cam_id in range(len(cameras)):
            obj = dict(
                name='camera',
                color=camera_color,
                TWO=np.linalg.inv(TWWB) @ cameras.TWC[cam_id].numpy()
            )
            list_objects.append(obj)

    fx, fy = 515, 515
    w, h = resolution
    K = np.array([
        [fx, 0, w/2],
        [0, fy, h/2],
        [0, 0, 1]
    ])
    list_cameras = []
    for phi in angles:
        x = distance * np.sin(theta) * np.cos(phi)
        y = distance * np.sin(theta) * np.sin(phi)
        z = distance * np.cos(theta)
        t = np.array([x, y, z])
        R = transforms3d.euler.euler2mat(np.pi, theta, phi, axes='sxyz')
        R = R @ transforms3d.euler.euler2mat(0, 0, -np.pi/2, axes='sxyz')
        t += np.array(target)
        TWC = Transform(R, t).toHomogeneousMatrix()
        TWBC = TWWB @ TWC
        list_cameras.append(
            dict(K=K, TWC=TWC, resolution=(w, h))
        )
    renders = renderer.render_scene(list_objects, list_cameras)
    images = np.stack([render['rgb'] for render in renders])
    if gui:
        time.sleep(100)
    renderer.disconnect()
    return images
コード例 #9
0
def multiview_candidate_matching(candidates,
                                 mesh_db,
                                 model_bsz=1e3,
                                 score_bsz=1e5,
                                 dist_threshold=0.02,
                                 cameras=None,
                                 n_ransac_iter=20,
                                 n_min_inliers=3):
    timer_models = Timer()
    timer_score = Timer()
    timer_misc = Timer()

    known_poses = cameras is not None
    if known_poses:
        logger.debug('Using known camera poses.')
        n_ransac_iter = 1
    else:
        logger.debug('Estimating camera poses using RANSAC.')

    timer_misc.start()
    candidates.infos['cand_id'] = np.arange(len(candidates))
    timer_misc.pause()

    timer_models.start()
    seeds, tmatches = cosypose_cext.make_ransac_infos(
        candidates.infos['view_id'].values.tolist(),
        candidates.infos['label'].values.tolist(),
        n_ransac_iter,
        0,
    )

    if not known_poses:
        TC1C2 = estimate_camera_poses_batch(candidates,
                                            seeds,
                                            mesh_db,
                                            bsz=model_bsz)
    else:
        cameras.infos['idx'] = np.arange(len(cameras))
        view_map = cameras.infos.set_index('view_id')
        TWC1 = cameras.TWC[view_map.loc[seeds['view1'], 'idx'].values]
        TWC2 = cameras.TWC[view_map.loc[seeds['view2'], 'idx'].values]
        TC1C2 = invert_T(TWC1) @ TWC2
    timer_models.pause()

    timer_score.start()
    dists = score_tmaches_batch(candidates,
                                tmatches,
                                TC1C2,
                                mesh_db,
                                bsz=score_bsz)

    inliers = cosypose_cext.find_ransac_inliers(
        seeds['view1'],
        seeds['view2'],
        tmatches['hypothesis_id'],
        tmatches['cand1'],
        tmatches['cand2'],
        dists.cpu().numpy(),
        dist_threshold,
        n_min_inliers,
    )
    timer_score.pause()

    timer_misc.start()
    pairs_TC1C2 = get_best_viewpair_pose_est(TC1C2, seeds, inliers)
    filtered_candidates = scene_level_matching(candidates, inliers)
    scene_infos = make_obj_infos(filtered_candidates)
    timer_misc.pause()

    outputs = dict(
        filtered_candidates=filtered_candidates,
        scene_infos=scene_infos,
        pairs_TC1C2=pairs_TC1C2,
        time_models=timer_models.stop(),
        time_score=timer_score.stop(),
        time_misc=timer_misc.stop(),
    )
    return outputs