Beispiel #1
0
    def __init__(self, resolution=(320, 240), near=0.01, far=10, client_id=0):
        assert client_id >= 0, 'Please provide a client id (0 by default)'
        h, w = min(resolution), max(resolution)

        self._client_id = client_id
        self._near = near
        self._far = far
        self._shape = (h, w)
        self._rgba = np.zeros(self._shape + (4, ), dtype=np.uint8)
        self._mask = np.zeros(self._shape, dtype=np.uint8)
        self._depth = np.zeros(self._shape, dtype=np.float32)
        self._render_options = dict()
        self._render_flags = 0

        self.mask_link_index(True)
        self.casts_shadow(True)

        # Transform between standard camera coordinate (z forward) and OPENGL camera coordinate system
        wxyz = transforms3d.euler.euler2quat(np.pi / 2, 0, 0, axes='rxyz')
        xyzw = [*wxyz[1:], wxyz[0]]
        self.TCCGL = Transform(xyzw, (0, 0, 0))

        # Set some default parameters
        self.set_extrinsic_bullet(target=(0, 0, 0),
                                  distance=1.6,
                                  yaw=90,
                                  pitch=-35,
                                  roll=0)
        self.set_intrinsic_fov(90)
Beispiel #2
0
    def get_state(self):
        obs = dict()
        # Get images
        rgba, mask, depth = self._shot()
        rgb = rgba[..., :3]
        obs.update(rgb=rgb, mask=mask, depth=depth)

        # Get intrinsic, extrinsic parameters with standard conventions.
        h, w = self._shape
        if self._K is not None:
            K = self._K
        else:
            K = K_from_fov(self._proj_params['fov'])

        trans = self._view_params['target']
        orn = euler2quat([
            self._view_params[k] * np.pi / 180
            for k in ('pitch', 'roll', 'yaw')
        ],
                         axes='sxyz')
        TWCGL = Transform(orn, trans)
        TWC = TWCGL * self.TCCGL.inverse()
        obs.update(TWC=TWC.toHomogeneousMatrix(),
                   K=K,
                   resolution=(self._shape[1], self._shape[0]),
                   proj_mat=self._proj_mat,
                   near=self._near,
                   far=self._far)
        return obs
Beispiel #3
0
 def setup_scene(self, obj_infos):
     labels = [obj['name'] for obj in obj_infos]
     bodies = self.body_cache.get_bodies_by_labels(labels)
     for (obj_info, body) in zip(obj_infos, bodies):
         TWO = Transform(obj_info['TWO'])
         body.pose = TWO
         color = obj_info.get('color', None)
         if color is not None:
             pb.changeVisualShape(body.body_id, -1, physicsClientId=0, rgbaColor=color)
     return bodies
Beispiel #4
0
 def set_extrinsic_T(self, TWC):
     TWC = Transform(TWC)
     TWCGL = TWC * self.TCCGL
     xyzw = TWCGL.quaternion.coeffs()
     wxyz = [xyzw[-1], *xyzw[:-1]]
     pitch, roll, yaw = transforms3d.euler.quat2euler(wxyz, axes='sxyz')
     yaw = yaw * 180 / np.pi
     pitch = pitch * 180 / np.pi
     roll = roll * 180 / np.pi
     yaw = (yaw % 360 + 360) % 360
     distance = 0.0001
     self.set_extrinsic_bullet(target=TWCGL.translation, distance=distance,
                               pitch=pitch, roll=roll, yaw=yaw)
Beispiel #5
0
 def set_extrinsic_spherical(self, target=(0, 0, 0), rho=0.6, theta=np.pi/4, phi=0, roll=0):
     """
     Angles in *radians*.
     https://fr.wikipedia.org/wiki/Coordonn%C3%A9es_sph%C3%A9riques#/media/Fichier:Spherical_Coordinates_(Colatitude,_Longitude)_(b).svg
     """
     x = rho * np.sin(theta) * np.cos(phi)
     y = rho * np.sin(theta) * np.sin(phi)
     z = rho * np.cos(theta)
     t = np.array([x, y, z])
     R = transforms3d.euler.euler2mat(np.pi, theta, phi, axes='sxyz')
     R = R @ transforms3d.euler.euler2mat(0, 0, -np.pi/2 + roll, axes='sxyz')
     t += np.array(target)
     TWC = Transform(R, t)
     self.set_extrinsic_T(TWC)
def load_posecnn_results():
    results_path = LOCAL_DATA_DIR / 'saved_detections' / 'ycbv_posecnn.pkl'
    results = pkl.loads(results_path.read_bytes())
    infos, poses, bboxes = [], [], []

    l_offsets = (LOCAL_DATA_DIR / 'bop_datasets/ycbv' /
                 'offsets.txt').read_text().strip().split('\n')
    ycb_offsets = dict()
    for l_n in l_offsets:
        obj_id, offset = l_n[:2], l_n[3:]
        obj_id = int(obj_id)
        offset = np.array(json.loads(offset)) * 0.001
        ycb_offsets[obj_id] = offset

    def mat_from_qt(qt):
        wxyz = qt[:4].copy().tolist()
        xyzw = [*wxyz[1:], wxyz[0]]
        t = qt[4:].copy()
        return Transform(xyzw, t)

    for scene_view_str, result in results.items():
        scene_id, view_id = scene_view_str.split('/')
        scene_id, view_id = int(scene_id), int(view_id)
        n_dets = result['rois'].shape[0]
        for n in range(n_dets):
            obj_id = result['rois'][:, 1].astype(np.int)[n]
            label = f'obj_{obj_id:06d}'
            infos.append(
                dict(
                    scene_id=scene_id,
                    view_id=view_id,
                    score=result['rois'][n, 1],
                    label=label,
                ))
            bboxes.append(result['rois'][n, 2:6])
            pose = mat_from_qt(result['poses'][n])
            offset = ycb_offsets[obj_id]
            pose = pose * Transform((0, 0, 0, 1), offset).inverse()
            poses.append(pose.toHomogeneousMatrix())

    data = tc.PandasTensorCollection(
        infos=pd.DataFrame(infos),
        poses=torch.as_tensor(np.stack(poses)).float(),
        bboxes=torch.as_tensor(np.stack(bboxes)).float(),
    ).cpu()
    return data
Beispiel #7
0
 def render_images(self, cam_infos, render_depth=False):
     cam_obs = []
     for cam_info in cam_infos:
         K = cam_info['K']
         TWC = Transform(cam_info['TWC'])
         resolution = cam_info['resolution']
         cam = Camera(resolution=resolution, client_id=self.client_id)
         cam.set_intrinsic_K(K)
         cam.set_extrinsic_T(TWC)
         cam_obs_ = cam.get_state()
         if self.background_color is not None:
             im = cam_obs_['rgb']
             mask = cam_obs_['mask']
             im[np.logical_or(mask < 0, mask == 255)] = self.background_color
             if render_depth:
                 depth = cam_obs_['depth']
                 near, far = cam_obs_['near'], cam_obs_['far']
                 z_n = 2 * depth - 1
                 z_e = 2 * near * far / (far + near - z_n * (far - near))
                 z_e[np.logical_or(mask < 0, mask == 255)] = 0.
                 cam_obs_['depth'] = z_e
         cam_obs.append(cam_obs_)
     return cam_obs
Beispiel #8
0
 def mat_from_qt(qt):
     wxyz = qt[:4].copy().tolist()
     xyzw = [*wxyz[1:], wxyz[0]]
     t = qt[4:].copy()
     return Transform(xyzw, t)
Beispiel #9
0
class Camera:
    def __init__(self, resolution=(320, 240), near=0.01, far=10, client_id=0):
        assert client_id >= 0, 'Please provide a client id (0 by default)'
        h, w = min(resolution), max(resolution)

        self._client_id = client_id
        self._near = near
        self._far = far
        self._shape = (h, w)
        self._rgba = np.zeros(self._shape + (4, ), dtype=np.uint8)
        self._mask = np.zeros(self._shape, dtype=np.uint8)
        self._depth = np.zeros(self._shape, dtype=np.float32)
        self._render_options = dict()
        self._render_flags = 0

        self.mask_link_index(True)
        self.casts_shadow(True)

        # Transform between standard camera coordinate (z forward) and OPENGL camera coordinate system
        wxyz = transforms3d.euler.euler2quat(np.pi / 2, 0, 0, axes='rxyz')
        xyzw = [*wxyz[1:], wxyz[0]]
        self.TCCGL = Transform(xyzw, (0, 0, 0))

        # Set some default parameters
        self.set_extrinsic_bullet(target=(0, 0, 0),
                                  distance=1.6,
                                  yaw=90,
                                  pitch=-35,
                                  roll=0)
        self.set_intrinsic_fov(90)

    def set_extrinsic_bullet(self, target, distance, yaw, pitch, roll):
        """
        Angles in *degrees*.
        """
        up = 'z'
        self._view_params = dict(yaw=yaw,
                                 pitch=pitch,
                                 roll=roll,
                                 target=target,
                                 distance=distance)
        self._view_mat = pb.computeViewMatrixFromYawPitchRoll(
            target, distance, yaw, pitch, roll, 'xyz'.index(up))

    def set_extrinsic_T(self, TWC):
        TWC = Transform(TWC)
        TWCGL = TWC * self.TCCGL
        xyzw = TWCGL.quaternion.coeffs()
        wxyz = [xyzw[-1], *xyzw[:-1]]
        pitch, roll, yaw = transforms3d.euler.quat2euler(wxyz, axes='sxyz')
        yaw = yaw * 180 / np.pi
        pitch = pitch * 180 / np.pi
        roll = roll * 180 / np.pi
        yaw = (yaw % 360 + 360) % 360
        distance = 0.0001
        self.set_extrinsic_bullet(target=TWCGL.translation,
                                  distance=distance,
                                  pitch=pitch,
                                  roll=roll,
                                  yaw=yaw)

    def set_extrinsic_spherical(self,
                                target=(0, 0, 0),
                                rho=0.6,
                                theta=np.pi / 4,
                                phi=0,
                                roll=0):
        """
        Angles in *radians*.
        https://fr.wikipedia.org/wiki/Coordonn%C3%A9es_sph%C3%A9riques#/media/Fichier:Spherical_Coordinates_(Colatitude,_Longitude)_(b).svg
        """
        x = rho * np.sin(theta) * np.cos(phi)
        y = rho * np.sin(theta) * np.sin(phi)
        z = rho * np.cos(theta)
        t = np.array([x, y, z])
        R = transforms3d.euler.euler2mat(np.pi, theta, phi, axes='sxyz')
        R = R @ transforms3d.euler.euler2mat(
            0, 0, -np.pi / 2 + roll, axes='sxyz')
        t += np.array(target)
        TWC = Transform(R, t)
        self.set_extrinsic_T(TWC)

    def set_intrinsic_K(self, K):
        h, w = self._shape
        proj_mat = proj_from_K(K, near=self._near, far=self._far, h=h,
                               w=w).flatten()
        assert np.allclose(proj_mat[11], -1)
        self._proj_mat = proj_mat
        self._K = K
        self._proj_params = None

    def set_intrinsic_fov(self, fov):
        h, w = self._shape
        self._proj_params = dict(fov=fov)
        self._proj_mat = pb.computeProjectionMatrixFOV(fov=fov,
                                                       aspect=w / h,
                                                       nearVal=self._near,
                                                       farVal=self._far)
        self._K = None

    def set_intrinsic_f(self, *args):
        if len(args) == 2:
            fx, fy = args
            raise NotImplementedError
        else:
            assert len(args) == 1
            fy = args[0]
        h, w = self._shape
        fov_y = np.arctan(h * 0.5 / fy) * 180 / np.pi
        fov = fov_y * 2
        self.set_intrinsic_fov(fov)

    def get_state(self):
        obs = dict()
        # Get images
        rgba, mask, depth = self._shot()
        rgb = rgba[..., :3]
        obs.update(rgb=rgb, mask=mask, depth=depth)

        # Get intrinsic, extrinsic parameters with standard conventions.
        h, w = self._shape
        if self._K is not None:
            K = self._K
        else:
            K = K_from_fov(self._proj_params['fov'])

        trans = self._view_params['target']
        orn = euler2quat([
            self._view_params[k] * np.pi / 180
            for k in ('pitch', 'roll', 'yaw')
        ],
                         axes='sxyz')
        TWCGL = Transform(orn, trans)
        TWC = TWCGL * self.TCCGL.inverse()
        obs.update(TWC=TWC.toHomogeneousMatrix(),
                   K=K,
                   resolution=(self._shape[1], self._shape[0]),
                   proj_mat=self._proj_mat,
                   near=self._near,
                   far=self._far)
        return obs

    def _shot(self):
        """ Computes a RGB image, a depth buffer and a segmentation mask buffer
        with body unique ids of visible objects for each pixel.
        """
        h, w = self._shape
        renderer = pb.ER_BULLET_HARDWARE_OPENGL

        w, h, rgba, depth, mask = pb.getCameraImage(
            width=w,
            height=h,
            projectionMatrix=self._proj_mat,
            viewMatrix=self._view_mat,
            renderer=renderer,
            flags=self._render_flags,
            **self._render_options,
            physicsClientId=self._client_id)

        rgba = np.asarray(rgba, dtype=np.uint8).reshape((h, w, 4))
        depth = np.asarray(depth, dtype=np.float32).reshape((h, w))
        mask = np.asarray(mask, dtype=np.uint8).reshape((h, w))
        return rgba, mask, depth

    def _project(self, fov, near, far):
        """ Apply camera projection matrix.
            Args:
             fov (float): Field of view.
             near float): Near plane distance.
             far (float): Far plane distance.
        """
        self.near = near
        self.far = far
        h, w = self._shape
        self._proj_mat = pb.computeProjectionMatrixFOV(fov=fov,
                                                       aspect=w / h,
                                                       nearVal=near,
                                                       farVal=far)

    def mask_link_index(self, flag):
        """ If is enabled, the mask combines the object unique id and link index
        as follows: value = objectUniqueId + ((linkIndex+1)<<24).
        """
        if flag:
            self._render_flags |= pb.ER_SEGMENTATION_MASK_OBJECT_AND_LINKINDEX
        else:
            self._render_flags &= ~pb.ER_SEGMENTATION_MASK_OBJECT_AND_LINKINDEX

    def casts_shadow(self, flag):
        """ 1 for shadows, 0 for no shadows. """
        self._render_options['shadow'] = 1 if flag else 0
Beispiel #10
0
    def __getitem__(self, frame_id):
        row = self.frame_index.iloc[frame_id]
        scene_id, view_id = row.scene_id, row.view_id
        view_id = int(view_id)
        view_id_str = f'{view_id:06d}'
        scene_id_str = f'{int(scene_id):06d}'
        scene_dir = self.base_dir / scene_id_str

        rgb_dir = scene_dir / 'rgb'
        if not rgb_dir.exists():
            rgb_dir = scene_dir / 'gray'
        rgb_path = rgb_dir / f'{view_id_str}.png'
        if not rgb_path.exists():
            rgb_path = rgb_path.with_suffix('.jpg')
        if not rgb_path.exists():
            rgb_path = rgb_path.with_suffix('.tif')

        rgb = np.array(Image.open(rgb_path))
        if rgb.ndim == 2:
            rgb = np.repeat(rgb[..., None], 3, axis=-1)
        rgb = rgb[..., :3]
        h, w = rgb.shape[:2]
        rgb = torch.as_tensor(rgb)

        cam_annotation = self.annotations[scene_id_str]['scene_camera'][str(
            view_id)]
        if 'cam_R_w2c' in cam_annotation:
            RC0 = np.array(cam_annotation['cam_R_w2c']).reshape(3, 3)
            tC0 = np.array(cam_annotation['cam_t_w2c']) * 0.001
            TC0 = Transform(RC0, tC0)
        else:
            TC0 = Transform(np.eye(3), np.zeros(3))
        K = np.array(cam_annotation['cam_K']).reshape(3, 3)
        T0C = TC0.inverse()
        T0C = T0C.toHomogeneousMatrix()
        camera = dict(T0C=T0C, K=K, TWC=T0C, resolution=rgb.shape[:2])

        T0C = TC0.inverse()

        objects = []
        mask = np.zeros((h, w), dtype=np.uint8)
        if 'scene_gt_info' in self.annotations[scene_id_str]:
            annotation = self.annotations[scene_id_str]['scene_gt'][str(
                view_id)]
            n_objects = len(annotation)
            visib = self.annotations[scene_id_str]['scene_gt_info'][str(
                view_id)]
            for n in range(n_objects):
                RCO = np.array(annotation[n]['cam_R_m2c']).reshape(3, 3)
                tCO = np.array(annotation[n]['cam_t_m2c']) * 0.001
                TCO = Transform(RCO, tCO)
                T0O = T0C * TCO
                T0O = T0O.toHomogeneousMatrix()
                obj_id = annotation[n]['obj_id']
                name = f'obj_{int(obj_id):06d}'
                bbox_visib = np.array(visib[n]['bbox_visib'])
                x, y, w, h = bbox_visib
                x1 = x
                y1 = y
                x2 = x + w
                y2 = y + h
                obj = dict(label=name,
                           name=name,
                           TWO=T0O,
                           T0O=T0O,
                           visib_fract=visib[n]['visib_fract'],
                           id_in_segm=n + 1,
                           bbox=[x1, y1, x2, y2])
                objects.append(obj)

            mask_path = scene_dir / 'mask_visib' / f'{view_id_str}_all.png'
            if mask_path.exists():
                mask = np.array(Image.open(mask_path))
            else:
                for n in range(n_objects):
                    mask_n = np.array(
                        Image.open(scene_dir / 'mask_visib' /
                                   f'{view_id_str}_{n:06d}.png'))
                    mask[mask_n == 255] = n + 1

        mask = torch.as_tensor(mask)

        if self.load_depth:
            depth_path = scene_dir / 'depth' / f'{view_id_str}.png'
            if not depth_path.exists():
                depth_path = depth_path.with_suffix('.tif')
            depth = np.array(inout.load_depth(depth_path))
            camera['depth'] = depth * cam_annotation['depth_scale'] / 1000

        obs = dict(
            objects=objects,
            camera=camera,
            frame_info=row.to_dict(),
        )
        return rgb, mask, obs
Beispiel #11
0
 def pose(self):
     pos, orn = self._client.getBasePositionAndOrientation(self._body_id)
     return Transform(orn, pos).toHomogeneousMatrix()