Esempio n. 1
0
    def __init__(self, resolution=(320, 240), near=0.01, far=10, client_id=0):
        assert client_id >= 0, 'Please provide a client id (0 by default)'
        h, w = min(resolution), max(resolution)

        self._client_id = client_id
        self._near = near
        self._far = far
        self._shape = (h, w)
        self._rgba = np.zeros(self._shape + (4, ), dtype=np.uint8)
        self._mask = np.zeros(self._shape, dtype=np.uint8)
        self._depth = np.zeros(self._shape, dtype=np.float32)
        self._render_options = dict()
        self._render_flags = 0

        self.mask_link_index(True)
        self.casts_shadow(True)

        # Transform between standard camera coordinate (z forward) and OPENGL camera coordinate system
        wxyz = transforms3d.euler.euler2quat(np.pi / 2, 0, 0, axes='rxyz')
        xyzw = [*wxyz[1:], wxyz[0]]
        self.TCCGL = Transform(xyzw, (0, 0, 0))

        # Set some default parameters
        self.set_extrinsic_bullet(target=(0, 0, 0),
                                  distance=1.6,
                                  yaw=90,
                                  pitch=-35,
                                  roll=0)
        self.set_intrinsic_fov(90)
Esempio n. 2
0
    def get_state(self):
        obs = dict()
        # Get images
        rgba, mask_uint8, mask_int, depth = self._shot()
        rgb = rgba[..., :3]
        obs.update(rgb=rgb, mask=mask_uint8, depth=depth, mask_int=mask_int)

        # Get intrinsic, extrinsic parameters with standard conventions.
        h, w = self._shape
        if self._K is not None:
            K = self._K
        else:
            K = K_from_fov(self._proj_params['fov'])

        trans = self._view_params['target']
        orn = euler2quat([
            self._view_params[k] * np.pi / 180
            for k in ('pitch', 'roll', 'yaw')
        ],
                         axes='sxyz')
        TWCGL = Transform(orn, trans)
        TWC = TWCGL * self.TCCGL.inverse()
        obs.update(TWC=TWC.toHomogeneousMatrix(),
                   K=K,
                   resolution=(self._shape[1], self._shape[0]),
                   proj_mat=self._proj_mat,
                   near=self._near,
                   far=self._far)
        return obs
Esempio n. 3
0
 def setup_scene(self, obj_infos):
     labels = [obj['name'] for obj in obj_infos]
     bodies = self.body_cache.get_bodies_by_labels(labels)
     for (obj_info, body) in zip(obj_infos, bodies):
         TWO = Transform(obj_info['TWO'])
         body.pose = TWO
         q = obj_info.get('joints', None)
         if q is not None:
             body.q = q
         color = obj_info.get('color', None)
         if color is not None:
             pb.changeVisualShape(body.body_id,
                                  -1,
                                  physicsClientId=0,
                                  rgbaColor=color)
     return bodies
Esempio n. 4
0
 def set_extrinsic_T(self, TWC):
     TWC = Transform(TWC)
     TWCGL = TWC * self.TCCGL
     xyzw = TWCGL.quaternion.coeffs()
     wxyz = [xyzw[-1], *xyzw[:-1]]
     pitch, roll, yaw = transforms3d.euler.quat2euler(wxyz, axes='sxyz')
     yaw = yaw * 180 / np.pi
     pitch = pitch * 180 / np.pi
     roll = roll * 180 / np.pi
     yaw = (yaw % 360 + 360) % 360
     distance = 0.0001
     self.set_extrinsic_bullet(target=TWCGL.translation,
                               distance=distance,
                               pitch=pitch,
                               roll=roll,
                               yaw=yaw)
Esempio n. 5
0
 def set_extrinsic_spherical(self,
                             target=(0, 0, 0),
                             rho=0.6,
                             theta=np.pi / 4,
                             phi=0,
                             roll=0):
     """
     Angles in *radians*.
     https://fr.wikipedia.org/wiki/Coordonn%C3%A9es_sph%C3%A9riques#/media/Fichier:Spherical_Coordinates_(Colatitude,_Longitude)_(b).svg
     """
     x = rho * np.sin(theta) * np.cos(phi)
     y = rho * np.sin(theta) * np.sin(phi)
     z = rho * np.cos(theta)
     t = np.array([x, y, z])
     R = transforms3d.euler.euler2mat(np.pi, theta, phi, axes='sxyz')
     R = R @ transforms3d.euler.euler2mat(
         0, 0, -np.pi / 2 + roll, axes='sxyz')
     t += np.array(target)
     TWC = Transform(R, t)
     self.set_extrinsic_T(TWC)
Esempio n. 6
0
 def render_images(self, cam_infos, render_depth=False, render_mask=True):
     cam_obs = []
     for cam_info in cam_infos:
         K = cam_info['K']
         TWC = Transform(cam_info['TWC'])
         resolution = cam_info['resolution']
         cam = Camera(resolution=resolution, client_id=self.client_id)
         cam.set_intrinsic_K(K)
         cam.set_extrinsic_T(TWC)
         cam_obs_ = cam.get_state()
         if self.background_color is not None:
             im = cam_obs_['rgb']
             mask = cam_obs_['mask']
             im[np.logical_or(mask < 0,
                              mask == 255)] = self.background_color
             if render_depth:
                 depth = cam_obs_['depth']
                 near, far = cam_obs_['near'], cam_obs_['far']
                 z_n = 2 * depth - 1
                 z_e = 2 * near * far / (far + near - z_n * (far - near))
                 z_e[np.logical_or(mask < 0, mask == 255)] = 0.
                 cam_obs_['depth'] = z_e
         cam_obs.append(cam_obs_)
     return cam_obs
Esempio n. 7
0
    def __getitem__(self, frame_id):
        row = self.frame_index.iloc[frame_id]
        rgb_path = self.ds_dir / row.rgb_path
        rgb = np.asarray(Image.open(rgb_path))[..., :3]
        h, w = rgb.shape[:2]

        mask = None
        bbox = None
        keypoints_2d = np.zeros((17, 2), dtype=np.float) * float('nan')
        if self.annotation_types['segmentation']:
            mask = np.asarray(
                Image.open(rgb_path.parent.parent / 'seg' /
                           rgb_path.with_suffix('.png').name))
            mask = (mask[..., 0] == 0).astype(np.uint8) * 255
            ids = np.where(mask)
            x1, y1, x2, y2 = np.min(ids[1]), np.min(ids[0]), np.max(
                ids[1]), np.max(ids[0])
            bbox = np.array([x1, y1, x2, y2])

        if self.annotation_types['keypoints_2d']:
            infos = json.loads(
                (rgb_path.parent.parent / 'd3_preds' /
                 rgb_path.with_suffix('.json').name).read_text())
            x1, x2, y1, y2 = np.array(infos['bbox']).flatten()
            bbox = np.array([x1, y1, x2, y2])

            mask = make_masks_from_det(bbox[None], h, w).numpy().astype(
                np.uint8)[0] * 255

            pts = np.transpose(np.array(infos['reprojection']))
            if 'visibility' in infos:
                visibility = infos['visibility'][:-2]
                pts[np.invert(visibility), :] = -1.0
            keypoints_2d = pts

        elif self.annotation_types['keypoints_3d']:
            cam_infos = json.loads(
                (rgb_path.parent.parent / 'caminfo' /
                 rgb_path.with_suffix('.json').name).read_text())
            vertex_infos = json.loads(
                (rgb_path.parent.parent.parent / 'vertex' /
                 rgb_path.with_suffix('.json').name).read_text())
            joint_infos = json.loads(
                (rgb_path.parent.parent.parent / 'joint' /
                 rgb_path.with_suffix('.json').name).read_text())
            keypoints_2d = make_2d_keypoints(vertex_infos, joint_infos,
                                             cam_infos)

        if self.annotation_types['cam_info']:
            infos = json.loads(
                (rgb_path.parent.parent / 'caminfo' /
                 rgb_path.with_suffix('.json').name).read_text())
            H, W = infos['FilmHeight'], infos['FilmWidth']
            assert rgb.shape[:2] == (H, W)
            fov = infos['Fov'] * np.pi / 180
            f = W / (2 * np.tan(fov / 2))  # From UE, differs from pybullet.
            trans = np.array([infos['Location'][k]
                              for k in ('X', 'Y', 'Z')]) * 0.001
            pitch, yaw, roll = np.array(
                [infos['Rotation'][k] for k in ('Pitch', 'Yaw', 'Roll')])
            trans[1] *= -1
            yaw = -180 + yaw
            roll = -roll
            pitch = -pitch
            pitch, yaw, roll = np.array([pitch, yaw, roll]) * np.pi / 180
            R = np.asarray(make_rotation(pitch, -yaw, -roll))
            R_NORMAL_UE = np.array([
                [0, -1, 0],
                [0, 0, -1],
                [1, 0, 0],
            ])
            R = R @ R_NORMAL_UE.T
            T0C = Transform(R, trans).toHomogeneousMatrix()
        else:
            f = self.focal_length_when_unknown
            T0C = np.zeros((4, 4))
        K = np.array([[f, 0, w / 2], [0, f, h / 2], [0, 0, 1]])
        cam = dict(K=K, T0C=T0C, TWC=T0C, resolution=(w, h))

        angles = None
        if self.annotation_types['angles']:
            angles = json.loads(
                (rgb_path.parent.parent.parent / 'angles' /
                 rgb_path.with_suffix('.json').name).read_text())
            q = np.array(angles)[:4] * np.pi / 180
            q[0] *= -1
        else:
            q = np.array([np.nan for _ in range(4)])
        q = {
            joint_name: q[n]
            for n, joint_name in enumerate([
                'Model_Rotation', 'Rotation_Base', 'Base_Elbow', 'Elbow_Wrist'
            ])
        }

        obs = dict()
        TWO = Transform((0, 0, 0, 1), (0, 0, 0)).toHomogeneousMatrix()
        robot = dict(joints=q,
                     name='owi535',
                     id_in_segm=255,
                     bbox=bbox,
                     TWO=TWO,
                     keypoints_2d=keypoints_2d)
        obs['objects'] = [robot]
        obs['camera'] = cam
        obs['frame_info'] = row.to_dict()
        return rgb, mask, obs
Esempio n. 8
0
class Camera:
    def __init__(self, resolution=(320, 240), near=0.01, far=10, client_id=0):
        assert client_id >= 0, 'Please provide a client id (0 by default)'
        h, w = min(resolution), max(resolution)

        self._client_id = client_id
        self._near = near
        self._far = far
        self._shape = (h, w)
        self._rgba = np.zeros(self._shape + (4, ), dtype=np.uint8)
        self._mask = np.zeros(self._shape, dtype=np.uint8)
        self._depth = np.zeros(self._shape, dtype=np.float32)
        self._render_options = dict()
        self._render_flags = 0

        self.mask_link_index(True)
        self.casts_shadow(True)

        # Transform between standard camera coordinate (z forward) and OPENGL camera coordinate system
        wxyz = transforms3d.euler.euler2quat(np.pi / 2, 0, 0, axes='rxyz')
        xyzw = [*wxyz[1:], wxyz[0]]
        self.TCCGL = Transform(xyzw, (0, 0, 0))

        # Set some default parameters
        self.set_extrinsic_bullet(target=(0, 0, 0),
                                  distance=1.6,
                                  yaw=90,
                                  pitch=-35,
                                  roll=0)
        self.set_intrinsic_fov(90)

    def set_extrinsic_bullet(self, target, distance, yaw, pitch, roll):
        """
        Angles in *degrees*.
        """
        up = 'z'
        self._view_params = dict(yaw=yaw,
                                 pitch=pitch,
                                 roll=roll,
                                 target=target,
                                 distance=distance)
        self._view_mat = pb.computeViewMatrixFromYawPitchRoll(
            target, distance, yaw, pitch, roll, 'xyz'.index(up))

    def set_extrinsic_T(self, TWC):
        TWC = Transform(TWC)
        TWCGL = TWC * self.TCCGL
        xyzw = TWCGL.quaternion.coeffs()
        wxyz = [xyzw[-1], *xyzw[:-1]]
        pitch, roll, yaw = transforms3d.euler.quat2euler(wxyz, axes='sxyz')
        yaw = yaw * 180 / np.pi
        pitch = pitch * 180 / np.pi
        roll = roll * 180 / np.pi
        yaw = (yaw % 360 + 360) % 360
        distance = 0.0001
        self.set_extrinsic_bullet(target=TWCGL.translation,
                                  distance=distance,
                                  pitch=pitch,
                                  roll=roll,
                                  yaw=yaw)

    def set_extrinsic_spherical(self,
                                target=(0, 0, 0),
                                rho=0.6,
                                theta=np.pi / 4,
                                phi=0,
                                roll=0):
        """
        Angles in *radians*.
        https://fr.wikipedia.org/wiki/Coordonn%C3%A9es_sph%C3%A9riques#/media/Fichier:Spherical_Coordinates_(Colatitude,_Longitude)_(b).svg
        """
        x = rho * np.sin(theta) * np.cos(phi)
        y = rho * np.sin(theta) * np.sin(phi)
        z = rho * np.cos(theta)
        t = np.array([x, y, z])
        R = transforms3d.euler.euler2mat(np.pi, theta, phi, axes='sxyz')
        R = R @ transforms3d.euler.euler2mat(
            0, 0, -np.pi / 2 + roll, axes='sxyz')
        t += np.array(target)
        TWC = Transform(R, t)
        self.set_extrinsic_T(TWC)

    def set_intrinsic_K(self, K):
        h, w = self._shape
        proj_mat = proj_from_K(K, near=self._near, far=self._far, h=h,
                               w=w).flatten()
        assert np.allclose(proj_mat[11], -1)
        self._proj_mat = proj_mat
        self._K = K
        self._proj_params = None

    def set_intrinsic_fov(self, fov):
        h, w = self._shape
        self._proj_params = dict(fov=fov)
        self._proj_mat = pb.computeProjectionMatrixFOV(fov=fov,
                                                       aspect=w / h,
                                                       nearVal=self._near,
                                                       farVal=self._far)
        self._K = None

    def set_intrinsic_f(self, *args):
        if len(args) == 2:
            fx, fy = args
            raise NotImplementedError
        else:
            assert len(args) == 1
            fy = args[0]
        h, w = self._shape
        fov_y = np.arctan(h * 0.5 / fy) * 180 / np.pi
        fov = fov_y * 2
        self.set_intrinsic_fov(fov)

    def get_state(self):
        obs = dict()
        # Get images
        rgba, mask_uint8, mask_int, depth = self._shot()
        rgb = rgba[..., :3]
        obs.update(rgb=rgb, mask=mask_uint8, depth=depth, mask_int=mask_int)

        # Get intrinsic, extrinsic parameters with standard conventions.
        h, w = self._shape
        if self._K is not None:
            K = self._K
        else:
            K = K_from_fov(self._proj_params['fov'])

        trans = self._view_params['target']
        orn = euler2quat([
            self._view_params[k] * np.pi / 180
            for k in ('pitch', 'roll', 'yaw')
        ],
                         axes='sxyz')
        TWCGL = Transform(orn, trans)
        TWC = TWCGL * self.TCCGL.inverse()
        obs.update(TWC=TWC.toHomogeneousMatrix(),
                   K=K,
                   resolution=(self._shape[1], self._shape[0]),
                   proj_mat=self._proj_mat,
                   near=self._near,
                   far=self._far)
        return obs

    def _shot(self):
        """ Computes a RGB image, a depth buffer and a segmentation mask buffer
        with body unique ids of visible objects for each pixel.
        """
        h, w = self._shape
        renderer = pb.ER_BULLET_HARDWARE_OPENGL

        w, h, rgba, depth, mask = pb.getCameraImage(
            width=w,
            height=h,
            projectionMatrix=self._proj_mat,
            viewMatrix=self._view_mat,
            renderer=renderer,
            flags=self._render_flags,
            **self._render_options,
            physicsClientId=self._client_id)

        rgba = np.asarray(rgba, dtype=np.uint8).reshape((h, w, 4))
        depth = np.asarray(depth, dtype=np.float32).reshape((h, w))
        mask_uint8 = np.asarray(mask, dtype=np.uint8).reshape((h, w))
        mask_int = np.asarray(mask, dtype=np.int).reshape((h, w))
        return rgba, mask_uint8, mask_int, depth

    def _project(self, fov, near, far):
        """ Apply camera projection matrix.
            Args:
             fov (float): Field of view.
             near float): Near plane distance.
             far (float): Far plane distance.
        """
        self.near = near
        self.far = far
        h, w = self._shape
        self._proj_mat = pb.computeProjectionMatrixFOV(fov=fov,
                                                       aspect=w / h,
                                                       nearVal=near,
                                                       farVal=far)

    def mask_link_index(self, flag):
        """ If is enabled, the mask combines the object unique id and link index
        as follows: value = objectUniqueId + ((linkIndex+1)<<24).
        """
        if flag:
            self._render_flags |= pb.ER_SEGMENTATION_MASK_OBJECT_AND_LINKINDEX
        else:
            self._render_flags &= ~pb.ER_SEGMENTATION_MASK_OBJECT_AND_LINKINDEX

    def casts_shadow(self, flag):
        """ 1 for shadows, 0 for no shadows. """
        self._render_options['shadow'] = 1 if flag else 0
Esempio n. 9
0
    def __getitem__(self, idx):
        row = self.frame_index.iloc[idx]
        rgb_path = Path(row.rgb_path)
        rgb = np.asarray(Image.open(rgb_path))
        assert rgb.shape[-1] == 3, rgb_path

        mask = None
        annotations = json.loads(
            rgb_path.with_suffix('').with_suffix('.json').read_text())

        # Camera
        TWC = np.eye(4)
        camera_infos_path = self.base_dir / '_camera_settings.json'
        h, w = rgb.shape[0], rgb.shape[1]
        if camera_infos_path.exists():
            cam_infos = json.loads(camera_infos_path.read_text())
            assert len(cam_infos['camera_settings']) == 1
            cam_infos = cam_infos['camera_settings'][0]['intrinsic_settings']
            fx, fy, cx, cy = [cam_infos[k] for k in ('fx', 'fy', 'cx', 'cy')]
        else:
            fx, fy = 320, 320
            cx, cy = w / 2, h / 2

        K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
        camera = dict(
            TWC=TWC,
            resolution=(w, h),
            K=K,
        )
        label = self.label

        # Objects
        obj_data = annotations['objects'][0]
        if 'quaternion_xyzw' in obj_data:
            TWO = Transform(np.array(obj_data['quaternion_xyzw']),
                            np.array(obj_data['location']) *
                            self.scale).toHomogeneousMatrix()
            R_NORMAL_UE = np.array([
                [0, -1, 0],
                [0, 0, -1],
                [1, 0, 0],
            ])
            TWO[:3, :3] = TWO[:3, :3] @ R_NORMAL_UE
        else:
            TWO = np.eye(4) * float('nan')
        TWO = Transform(TWO)

        joints = annotations['sim_state']['joints']
        joints = OrderedDict(
            {d['name'].split('/')[-1]: d['position']
             for d in joints})
        if self.label == 'iiwa7':
            joints = {
                k.replace('iiwa7_', 'iiwa_'): v
                for k, v in joints.items()
            }

        keypoints_2d = obj_data['keypoints']
        keypoints_2d = np.concatenate(
            [np.array(kp['projected_location'])[None] for kp in keypoints_2d],
            axis=0)
        keypoints_2d = np.unique(keypoints_2d, axis=0)
        valid = np.logical_and(
            np.logical_and(keypoints_2d[:, 0] >= 0,
                           keypoints_2d[:, 0] <= w - 1),
            np.logical_and(keypoints_2d[:, 1] >= 0,
                           keypoints_2d[:, 1] <= h - 1))
        keypoints_2d = keypoints_2d[valid]
        det_valid = len(keypoints_2d) >= 2
        if det_valid:
            bbox = np.concatenate(
                [np.min(keypoints_2d, axis=0),
                 np.max(keypoints_2d, axis=0)])
            dx, dy = bbox[[2, 3]] - bbox[[0, 1]]
            if dx <= 20 or dy <= 20:
                det_valid = False

        if not det_valid or self.image_bbox:
            m = 1 / 5
            keypoints_2d = np.array([[w * m, h * m], [w - w * m, h - h * m]])
        bbox = np.concatenate(
            [np.min(keypoints_2d, axis=0),
             np.max(keypoints_2d, axis=0)])
        mask = make_masks_from_det(bbox[None], h, w).numpy().astype(
            np.uint8)[0] * 1

        keypoints = obj_data['keypoints']
        TCO_keypoints_3d = {
            kp['name']: np.array(kp['location']) * self.scale
            for kp in keypoints
        }
        TCO_keypoints_3d = np.array(
            [TCO_keypoints_3d.get(k, np.nan) for k in self.keypoint_names])
        keypoints_2d = {
            kp['name']: kp['projected_location']
            for kp in keypoints
        }
        keypoints_2d = np.array(
            [keypoints_2d.get(k, np.nan) for k in self.keypoint_names])

        robot = dict(label=label,
                     name=label,
                     joints=joints,
                     TWO=TWO.toHomogeneousMatrix(),
                     bbox=bbox,
                     id_in_segm=1,
                     keypoints_2d=keypoints_2d,
                     TCO_keypoints_3d=TCO_keypoints_3d)

        state = dict(objects=[robot], camera=camera, frame_info=row.to_dict())
        return rgb, mask, state
Esempio n. 10
0
 def pose(self):
     pos, orn = self._client.getBasePositionAndOrientation(self._body_id)
     return Transform(orn, pos).toHomogeneousMatrix()