예제 #1
0
def analyze_image_depths(path, bbox, out_name):
    """
    path should lead to a .npy file
    """
    img = np.load(path)
    img = np.reshape(img, img.shape[:2])
    img_slice = img[bbox[0]:bbox[2], bbox[1]:bbox[3]]
    vec = np.ndarray.flatten(img_slice)
    # vec = reject_outliers(vec)

    var = np.var(vec)
    mean = np.mean(vec)
    print("State for {}: Mean: {}, Standard Deviation: {}\n".format(
        out_name, mean, np.sqrt(var)))

    n, bins, patches = plt.hist(vec, vec.size // 10, facecolor="blue")

    plt.xlabel("depth value")
    plt.ylabel("count")
    plt.title("depth within region")
    plt.grid(True)
    plt.show()

    plt.savefig(os.path.join(out_path, "graph_" + out_name),
                bbox_inches="tight")
    plt.close()
    depth_img = DepthImage(img_slice)
    depth_img.save(os.path.join(out_path, out_name))
예제 #2
0
    def test_depth_init(self):
        # valid data
        random_valid_data = np.random.rand(IM_HEIGHT,
                                           IM_WIDTH).astype(np.float32)
        im = DepthImage(random_valid_data)
        self.assertEqual(im.height, IM_HEIGHT)
        self.assertEqual(im.width, IM_WIDTH)
        self.assertEqual(im.channels, 1)
        self.assertEqual(im.type, np.float32)
        self.assertTrue(np.allclose(im.data, random_valid_data))

        # invalid channels
        random_data = np.random.rand(IM_HEIGHT, IM_WIDTH, 3).astype(np.float32)
        caught_bad_channels = False
        try:
            im = DepthImage(random_data)
        except ValueError:
            caught_bad_channels = True
        self.assertTrue(caught_bad_channels)

        # invalid type
        random_data = np.random.rand(IM_HEIGHT, IM_WIDTH).astype(np.uint8)
        caught_bad_dtype = False
        try:
            im = DepthImage(random_data)
        except ValueError:
            caught_bad_dtype = True
        self.assertTrue(caught_bad_dtype)
예제 #3
0
def inpaint(img):
    """
    Inpaint the image
    """
    # create DepthImage from gray version of img
    gray_img = skimage.color.rgb2gray(img)
    depth_img = DepthImage(gray_img)

    # zero out high-gradient areas and inpaint
    thresh_img = depth_img.threshold_gradients_pctile(0.95)
    inpaint_img = thresh_img.inpaint()
    return inpaint_img.data
예제 #4
0
    def _depth_im_from_pointcloud(self, msg):
        """Convert a pointcloud2 message to a depth image."""
        # set format
        if self._format is None:
            self._set_format(msg)

        # rescale camera intr in case binning is turned on
        if msg.height != self._camera_intr.height:
            rescale_factor = float(msg.height) / self._camera_intr.height
            self._camera_intr = self._camera_intr.resize(rescale_factor)

        # read num points
        num_points = msg.height * msg.width

        # read buffer
        raw_tup = struct.Struct(self._format).unpack_from(msg.data, 0)
        raw_arr = np.array(raw_tup)

        # subsample depth values and reshape
        depth_ind = 2 + 4 * np.arange(num_points)
        depth_buf = raw_arr[depth_ind]
        depth_arr = depth_buf.reshape(msg.height, msg.width)
        depth_im = DepthImage(depth_arr, frame=self._frame)

        return depth_im
예제 #5
0
    def _read_color_and_depth_image(self):
        """Read a color and depth image from the device."""
        frames = self._pipe.wait_for_frames()
        if self._depth_align:
            frames = self._align.process(frames)

        depth_frame = frames.get_depth_frame()
        color_frame = frames.get_color_frame()

        if not depth_frame or not color_frame:
            logging.warning("Could not retrieve frames.")
            return None, None

        if self._filter_depth:
            depth_frame = self._filter_depth_frame(depth_frame)

        # convert to numpy arrays
        depth_image = self._to_numpy(depth_frame, np.float32)
        color_image = self._to_numpy(color_frame, np.uint8)

        # convert depth to meters
        depth_image *= self._depth_scale

        # bgr to rgb
        color_image = color_image[..., ::-1]

        depth = DepthImage(depth_image, frame=self._frame)
        color = ColorImage(color_image, frame=self._frame)
        return color, depth
예제 #6
0
    def rendered_images(data, render_mode=None):
        if render_mode is None:
            render_mode = RenderMode.SEGMASK
        rendered_images = []
        num_images = data.attrs[NUM_IMAGES_KEY]

        for i in range(num_images):
            # get the image data y'all
            image_key = IMAGE_KEY + '_' + str(i)
            image_data = data[image_key]
            image_arr = np.array(image_data[IMAGE_DATA_KEY])
            frame = image_data.attrs[IMAGE_FRAME_KEY]
            if render_mode == RenderMode.SEGMASK:
                image = BinaryImage(image_arr, frame)
            elif render_mode == RenderMode.DEPTH:
                image = DepthImage(image_arr, frame)
            elif render_mode == RenderMode.SCALED_DEPTH:
                image = ColorImage(image_arr, frame)
            R_camera_table = image_data.attrs[CAM_ROT_KEY]
            t_camera_table = image_data.attrs[CAM_POS_KEY]
            frame = image_data.attrs[CAM_FRAME_KEY]
            T_camera_world = RigidTransform(R_camera_table,
                                            t_camera_table,
                                            from_frame=frame,
                                            to_frame='world')

            rendered_images.append(ObjectRender(image, T_camera_world))
        return rendered_images
def visualize_predictions(
    run_dir,
    test_config,
    pred_mask_dir,
    pred_info_dir,
    show_bbox=True,
    show_class=True,
):
    """Visualizes predictions."""
    # Create subdirectory for prediction visualizations
    vis_dir = os.path.join(run_dir, "vis")
    depth_dir = os.path.join(test_config["path"], test_config["images"])
    if not os.path.exists(vis_dir):
        os.makedirs(vis_dir)

    indices_arr = np.load(
        os.path.join(test_config["path"], test_config["indices"]))
    image_ids = np.arange(indices_arr.size)
    ##################################################################
    # Process each image
    ##################################################################
    print("VISUALIZING PREDICTIONS")
    for image_id in tqdm(image_ids):
        depth_image_fn = "image_{:06d}.npy".format(indices_arr[image_id])
        # depth_image_fn = 'image_{:06d}.png'.format(indices_arr[image_id])

        # Load image and ground truth data and resize for net
        depth_data = np.load(os.path.join(depth_dir, depth_image_fn))
        # image = ColorImage.open(os.path.join(depth_dir, '..', 'depth_ims', depth_image_fn)).data
        image = DepthImage(depth_data).to_color().data

        # load mask and info
        r = np.load(
            os.path.join(pred_info_dir,
                         "image_{:06}.npy".format(image_id))).item()
        r_masks = np.load(
            os.path.join(pred_mask_dir, "image_{:06}.npy".format(image_id)))
        # Must transpose from (n, h, w) to (h, w, n)
        if r_masks.any():
            r["masks"] = np.transpose(r_masks, (1, 2, 0))
        else:
            r["masks"] = r_masks
        # Visualize
        fig = plt.figure(figsize=(1.7067, 1.7067), dpi=300, frameon=False)
        ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0])
        fig.add_axes(ax)
        visualize.display_instances(
            image,
            r["rois"],
            r["masks"],
            r["class_ids"],
            ["bg", "obj"],
            ax=ax,
            show_bbox=show_bbox,
            show_class=show_class,
        )
        file_name = os.path.join(vis_dir, "vis_{:06d}".format(image_id))
        fig.savefig(file_name, transparent=True, dpi=300)
        plt.close()
 def _depth_im_callback(self, msg):
     """Callback for handling depth images."""
     try:
         self._cur_depth_im = DepthImage(self._bridge.imgmsg_to_cv2(msg) /
                                         1000.0,
                                         frame=self._frame)
     except (CvBridgeError, ValueError):
         self._cur_depth_im = None
    def frames(self):
        """Retrieve the next frame from the tensor dataset and convert it to a
        ColorImage and a DepthImage.

        Parameters
        ----------
        skip_registration : bool
            If True, the registration step is skipped.

        Returns
        -------
        :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage`
            The ColorImage and DepthImage of the current frame.

        Raises
        ------
        RuntimeError
            If the stream is not running or if all images in the
            directory have been used.
        """
        if not self._running:
            raise RuntimeError(
                "Device pointing to %s not runnning. Cannot read frames" %
                (self._path_to_images))

        if self._im_index >= self._num_images:
            raise RuntimeError("Device is out of images")

        # read images
        datapoint = self._dataset.datapoint(
            self._im_index, TensorDatasetVirtualSensor.IMAGE_FIELDS)
        color_im = ColorImage(
            datapoint[TensorDatasetVirtualSensor.COLOR_IM_FIELD],
            frame=self._frame,
        )
        depth_im = DepthImage(
            datapoint[TensorDatasetVirtualSensor.DEPTH_IM_FIELD],
            frame=self._frame,
        )
        if self._image_rescale_factor != 1.0:
            color_im = color_im.resize(self._image_rescale_factor)
            depth_im = depth_im.resize(self._image_rescale_factor,
                                       interp="nearest")
        self._im_index = (self._im_index + 1) % self._num_images
        return color_im, depth_im
 def _depth_image_callback(self, image_msg):
     """subscribe to depth image topic and keep it up to date"""
     encoding = image_msg.encoding
     try:
         depth_arr = self._bridge.imgmsg_to_cv2(image_msg, encoding)
     except CvBridgeError as e:
         rospy.logerr(e)
     depth = np.array(depth_arr * MM_TO_METERS, np.float32)
     self._cur_depth_im = DepthImage(depth, self._frame)
예제 #11
0
 def _read_depth_images(self, num_images):
     """Reads depth images from the device"""
     depth_images = self._ros_read_images(self._depth_image_buffer,
                                          num_images, self.staleness_limit)
     for i in range(0, num_images):
         depth_images[i] = (depth_images[i] * MM_TO_METERS
                            )  # convert to meters
         if self._flip_images:
             depth_images[i] = np.flipud(depth_images[i])
             depth_images[i] = np.fliplr(depth_images[i])
         depth_images[i] = DepthImage(depth_images[i], frame=self._frame)
     return depth_images
    def frames(self):
        """Retrieve the next frame from the image directory and convert it to a
        ColorImage and a DepthImage.

        Parameters
        ----------
        skip_registration : bool
            If True, the registration step is skipped.

        Returns
        -------
        :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage`
            The ColorImage and DepthImage of the current frame.

        Raises
        ------
        RuntimeError
            If the stream is not running or if all images in the
            directory have been used.
        """
        if not self._running:
            raise RuntimeError(
                "Device pointing to %s not runnning. Cannot read frames" %
                (self._path_to_images))

        if (self._im_index >= self._num_images) and not self._loop:
            raise RuntimeError("Device is out of images")
        elif (self._im_index >= self._num_images) and self._loop:
            self._im_index = 0

        # read images
        color_filename = os.path.join(
            self._path_to_images,
            "color_%d%s" % (self._im_index, self._color_ext),
        )
        color_im = ColorImage.open(color_filename, frame=self._frame)
        depth_filename = os.path.join(self._path_to_images,
                                      "depth_%d.npy" % (self._im_index))
        depth_im = DepthImage.open(depth_filename, frame=self._frame)
        self._im_index += 1
        return color_im, depth_im
    def _frames_and_index_map(self, skip_registration=False):
        """Retrieve a new frame from the Kinect and return a ColorImage,
        DepthImage, IrImage, and a map from depth pixels to color
        pixel indices.

        Parameters
        ----------
        skip_registration : bool
            If True, the registration step is skipped.

        Returns
        -------
        :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage`,
          :obj:`IrImage`, :obj:`numpy.ndarray`
            The ColorImage, DepthImage, and IrImage of the
            current frame, and an ndarray that maps pixels
            of the depth image to the index of the
            corresponding pixel in the color image.

        Raises
        ------
        RuntimeError
            If the Kinect stream is not running.
        """
        if not self._running:
            raise RuntimeError(
                "Kinect2 device %s not runnning. Cannot read frames" %
                (self._device_num))

        # read frames
        frames = self._listener.waitForNewFrame()
        unregistered_color = frames["color"]
        distorted_depth = frames["depth"]
        ir = frames["ir"]

        # apply color to depth registration
        color_frame = self._color_frame
        color = unregistered_color
        depth = distorted_depth
        color_depth_map = (np.zeros([depth.height,
                                     depth.width]).astype(np.int32).ravel())
        if (not skip_registration and self._registration_mode
                == Kinect2RegistrationMode.COLOR_TO_DEPTH):
            color_frame = self._ir_frame
            depth = lf2.Frame(depth.width, depth.height, 4,
                              lf2.FrameType.Depth)
            color = lf2.Frame(depth.width, depth.height, 4,
                              lf2.FrameType.Color)
            self._registration.apply(
                unregistered_color,
                distorted_depth,
                depth,
                color,
                color_depth_map=color_depth_map,
            )

        # convert to array (copy needed to prevent reference of deleted data
        color_arr = np.copy(color.asarray())
        color_arr[:, :, [0, 2]] = color_arr[:, :, [2, 0]]  # convert BGR to RGB
        color_arr[:, :, 0] = np.fliplr(color_arr[:, :, 0])
        color_arr[:, :, 1] = np.fliplr(color_arr[:, :, 1])
        color_arr[:, :, 2] = np.fliplr(color_arr[:, :, 2])
        color_arr[:, :, 3] = np.fliplr(color_arr[:, :, 3])
        depth_arr = np.fliplr(np.copy(depth.asarray()))
        ir_arr = np.fliplr(np.copy(ir.asarray()))

        # convert from mm to meters
        if self._depth_mode == Kinect2DepthMode.METERS:
            depth_arr = depth_arr * MM_TO_METERS

        # Release and return
        self._listener.release(frames)
        return (
            ColorImage(color_arr[:, :, :3], color_frame),
            DepthImage(depth_arr, self._ir_frame),
            IrImage(ir_arr.astype(np.uint16), self._ir_frame),
            color_depth_map,
        )
예제 #14
0
def detect(detector_type, config, run_dir, test_config):
    """Run PCL-based detection on a depth-image-based dataset.

    Parameters
    ----------
    config : dict
        config for a PCL detector
    run_dir : str
        Directory to save outputs in. Output will be saved in pred_masks, pred_info,
        and modal_segmasks_processed subdirectories.
    test_config : dict
        config containing dataset information
    """

    ##################################################################
    # Set up output directories
    ##################################################################

    # Create subdirectory for prediction masks
    pred_dir = os.path.join(run_dir, 'pred_masks')
    mkdir_if_missing(pred_dir)

    # Create subdirectory for prediction scores & bboxes
    pred_info_dir = os.path.join(run_dir, 'pred_info')
    mkdir_if_missing(pred_info_dir)

    # Create subdirectory for transformed GT segmasks
    resized_segmask_dir = os.path.join(run_dir, 'modal_segmasks_processed')
    mkdir_if_missing(resized_segmask_dir)

    ##################################################################
    # Set up input directories
    ##################################################################

    dataset_dir = test_config['path']
    indices_arr = np.load(os.path.join(dataset_dir, test_config['indices']))

    # Input depth image data (numpy files, not .pngs)
    depth_dir = os.path.join(dataset_dir, test_config['images'])

    # Input GT binary masks dir
    gt_mask_dir = os.path.join(dataset_dir, test_config['masks'])

    # Input binary mask data
    if 'bin_masks' in test_config.keys():
        bin_mask_dir = os.path.join(dataset_dir, test_config['bin_masks'])

    # Input camera intrinsics
    camera_intrinsics_fn = os.path.join(dataset_dir, 'camera_intrinsics.intr')
    camera_intrs = CameraIntrinsics.load(camera_intrinsics_fn)

    image_ids = np.arange(indices_arr.size)

    ##################################################################
    # Process each image
    ##################################################################
    for image_id in tqdm(image_ids):
        base_name = 'image_{:06d}'.format(indices_arr[image_id])
        output_name = 'image_{:06d}'.format(image_id)
        depth_image_fn = base_name + '.npy'

        # Extract depth image
        depth_data = np.load(os.path.join(depth_dir, depth_image_fn))
        depth_im = DepthImage(depth_data, camera_intrs.frame)
        depth_im = depth_im.inpaint(0.25)

        # Mask out bin pixels if appropriate/necessary
        if bin_mask_dir:
            mask_im = BinaryImage.open(
                os.path.join(bin_mask_dir, base_name + '.png'),
                camera_intrs.frame)
            mask_im = mask_im.resize(depth_im.shape[:2])
            depth_im = depth_im.mask_binary(mask_im)

        point_cloud = camera_intrs.deproject(depth_im)
        point_cloud.remove_zero_points()
        pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32))
        tree = pcl_cloud.make_kdtree()
        if detector_type == 'euclidean':
            segmentor = pcl_cloud.make_EuclideanClusterExtraction()
            segmentor.set_ClusterTolerance(config['tolerance'])
        elif detector_type == 'region_growing':
            segmentor = pcl_cloud.make_RegionGrowing(ksearch=50)
            segmentor.set_NumberOfNeighbours(config['n_neighbors'])
            segmentor.set_CurvatureThreshold(config['curvature'])
            segmentor.set_SmoothnessThreshold(config['smoothness'])
        else:
            print('PCL detector type not supported')
            exit()

        segmentor.set_MinClusterSize(config['min_cluster_size'])
        segmentor.set_MaxClusterSize(config['max_cluster_size'])
        segmentor.set_SearchMethod(tree)
        cluster_indices = segmentor.Extract()

        # Set up predicted masks and metadata
        indiv_pred_masks = []
        r_info = {
            'rois': [],
            'scores': [],
            'class_ids': [],
        }

        for i, cluster in enumerate(cluster_indices):
            points = pcl_cloud.to_array()[cluster]
            indiv_pred_mask = camera_intrs.project_to_image(
                PointCloud(points.T, frame=camera_intrs.frame)).to_binary()
            indiv_pred_mask.data[indiv_pred_mask.data > 0] = 1
            indiv_pred_masks.append(indiv_pred_mask.data)

            # Compute bounding box, score, class_id
            nonzero_pix = np.nonzero(indiv_pred_mask.data)
            min_x, max_x = np.min(nonzero_pix[1]), np.max(nonzero_pix[1])
            min_y, max_y = np.min(nonzero_pix[0]), np.max(nonzero_pix[0])
            r_info['rois'].append([min_y, min_x, max_y, max_x])
            r_info['scores'].append(1.0)
            r_info['class_ids'].append(1)

        r_info['rois'] = np.array(r_info['rois'])
        r_info['scores'] = np.array(r_info['scores'])
        r_info['class_ids'] = np.array(r_info['class_ids'])

        # Write the predicted masks and metadata
        if indiv_pred_masks:
            pred_mask_output = np.stack(indiv_pred_masks).astype(np.uint8)
        else:
            pred_mask_output = np.array(indiv_pred_masks).astype(np.uint8)

        # Save out ground-truth mask as array of shape (n, h, w)
        indiv_gt_masks = []
        gt_mask = cv2.imread(os.path.join(gt_mask_dir, base_name + '.png'))
        gt_mask = cv2.resize(gt_mask,
                             (depth_im.shape[1], depth_im.shape[0])).astype(
                                 np.uint8)[:, :, 0]
        num_gt_masks = np.max(gt_mask)
        for i in range(1, num_gt_masks + 1):
            indiv_gt_mask = (gt_mask == i)
            if np.any(indiv_gt_mask):
                indiv_gt_masks.append(gt_mask == i)
        gt_mask_output = np.stack(indiv_gt_masks)

        np.save(os.path.join(resized_segmask_dir, output_name + '.npy'),
                gt_mask_output)
        np.save(os.path.join(pred_dir, output_name + '.npy'), pred_mask_output)
        np.save(os.path.join(pred_info_dir, output_name + '.npy'), r_info)

    print('Saved prediction masks to:\t {}'.format(pred_dir))
    print('Saved prediction info (bboxes, scores, classes) to:\t {}'.format(
        pred_info_dir))
    print('Saved transformed GT segmasks to:\t {}'.format(resized_segmask_dir))

    return pred_dir, pred_info_dir, resized_segmask_dir
    def create_camera_data(self,
                           rgb: np.ndarray,
                           depth: np.ndarray,
                           cam_intr_frame: str,
                           fx: float,
                           fy: float,
                           cx: float,
                           cy: float,
                           skew: float,
                           w: int,
                           h: int,
                           seg_mask: np.ndarray = np.empty(shape=(0, )),
                           bbox: tuple = ()):
        """Create the CameraData object in the correct format expected by gqcnn

        Parameters
        ---------
        req: rgb: np.ndarray: rgb image
             depth: np.ndarray: depth image
             cam_intr_frame: str: the reference frame of the images. Grasp poses are computed wrt this frame
             fx: float: focal length (x)
             fy: float: focal length (y)
             cx: float: principal point (x)
             cy: float: principal point (y)
             skew: float: skew coefficient
             w: int: width
             h: int: height
        opt: seg_mask: np.ndarray: segmentation mask
             bbox: tuple: a tuple of 4 values that define the mask bounding box = (x_min, y_min, x_max, y_max)

        Returns:
            CameraData: object that stores the input data required by plan_grasp()
        """

        camera_data = CameraData()

        # Create images
        camera_data.rgb_img = ColorImage(rgb, frame=cam_intr_frame)
        camera_data.depth_img = DepthImage(depth, frame=cam_intr_frame)

        if seg_mask.size > 0:
            camera_data.seg_img = BinaryImage(seg_mask, cam_intr_frame)

        # Check image sizes
        if camera_data.rgb_img.height != camera_data.depth_img.height or \
           camera_data.rgb_img.width != camera_data.depth_img.width:

            msg = ("Color image and depth image must be the same shape! Color"
                   " is %d x %d but depth is %d x %d") % (
                       camera_data.rgb_img.height, camera_data.rgb_img.width,
                       camera_data.depth_img.height,
                       camera_data.depth_img.width)

            raise AssertionError(msg)

        if (camera_data.rgb_img.height < self.min_height
                or camera_data.rgb_img.width < self.min_width):

            msg = ("Color image is too small! Must be at least %d x %d"
                   " resolution but the requested image is only %d x %d") % (
                       self.min_height, self.min_width,
                       camera_data.rgb_img.height, camera_data.rgb_img.width)

            raise AssertionError(msg)

        if camera_data.rgb_img.height != camera_data.seg_img.height or \
            camera_data.rgb_img.width != camera_data.seg_img.width:

            msg = ("Images and segmask must be the same shape! Color image is"
                   " %d x %d but segmask is %d x %d") % (
                       camera_data.rgb_img.height, camera_data.rgb_img.width,
                       camera_data.seg_img.height, camera_data.seg_img.width)

            raise AssertionError(msg)

        # set intrinsic params
        camera_data.intrinsic_params = CameraIntrinsics(
            cam_intr_frame, fx, fy, cx, cy, skew, h, w)

        # set mask bounding box
        if len(bbox) == 4:
            camera_data.bounding_box = {
                'min_x': bbox[0],
                'min_y': bbox[1],
                'max_x': bbox[2],
                'max_y': bbox[3]
            }

        return camera_data
    def read_images(self, req):
        """Reads images from a ROS service request.

        Parameters
        ---------
        req: :obj:`ROS ServiceRequest`
            ROS ServiceRequest for grasp planner service.
        """
        # Get the raw depth and color images as ROS `Image` objects.
        raw_color = req.color_image
        raw_depth = req.depth_image

        # Get the raw camera info as ROS `CameraInfo`.
        raw_camera_info = req.camera_info

        # Wrap the camera info in a BerkeleyAutomation/perception
        # `CameraIntrinsics` object.
        camera_intr = CameraIntrinsics(
            frame=raw_camera_info.header.frame_id,
            fx=raw_camera_info.K[0],
            fy=raw_camera_info.K[4],
            cx=raw_camera_info.K[2],
            cy=raw_camera_info.K[5],
            width=raw_camera_info.width,
            height=raw_camera_info.height,
        )

        # Create wrapped BerkeleyAutomation/perception RGB and depth images by
        # unpacking the ROS images using ROS `CvBridge`
        try:
            color_im = ColorImage(self.cv_bridge.imgmsg_to_cv2(
                raw_color, "rgb8"),
                                  frame=camera_intr.frame)

            cv2_depth = self.cv_bridge.imgmsg_to_cv2(
                raw_depth, desired_encoding="passthrough")
            cv2_depth = np.array(cv2_depth, dtype=np.float32)

            cv2_depth *= 0.001

            nan_idxs = np.isnan(cv2_depth)
            cv2_depth[nan_idxs] = 1000.0

            depth_im = DepthImage(cv2_depth, frame=camera_intr.frame)

        except CvBridgeError as cv_bridge_exception:
            print("except CvBridgeError")
            rospy.logerr(cv_bridge_exception)

        # Check image sizes.
        if color_im.height != depth_im.height or color_im.width != depth_im.width:
            msg = ("Color image and depth image must be the same shape! Color"
                   " is %d x %d but depth is %d x %d") % (
                       color_im.height, color_im.width, depth_im.height,
                       depth_im.width)

            rospy.logerr(msg)
            raise rospy.ServiceException(msg)

        if (color_im.height < self.min_height
                or color_im.width < self.min_width):
            msg = ("Color image is too small! Must be at least %d x %d"
                   " resolution but the requested image is only %d x %d") % (
                       self.min_height, self.min_width, color_im.height,
                       color_im.width)

            rospy.logerr(msg)
            raise rospy.ServiceException(msg)

        # --- create CameraData struct --- #
        camera_data = CameraData()
        camera_data.rgb_img = color_im
        camera_data.depth_img = depth_im
        camera_data.intrinsic_params = camera_intr

        return camera_data
def generate_segmask_dataset(output_dataset_path,
                             config,
                             save_tensors=True,
                             warm_start=False):
    """Generate a segmentation training dataset

    Parameters
    ----------
    dataset_path : str
        path to store the dataset
    config : dict
        dictionary-like objects containing parameters of the simulator and visualization
    save_tensors : bool
        save tensor datasets (for recreating state)
    warm_start : bool
        restart dataset generation from a previous state
    """

    # read subconfigs
    dataset_config = config["dataset"]
    image_config = config["images"]
    vis_config = config["vis"]

    # debugging
    debug = config["debug"]
    if debug:
        np.random.seed(SEED)

    # read general parameters
    num_states = config["num_states"]
    num_images_per_state = config["num_images_per_state"]

    states_per_flush = config["states_per_flush"]
    states_per_garbage_collect = config["states_per_garbage_collect"]

    # set max obj per state
    max_objs_per_state = config["state_space"]["heap"]["max_objs"]

    # read image parameters
    im_height = config["state_space"]["camera"]["im_height"]
    im_width = config["state_space"]["camera"]["im_width"]
    segmask_channels = max_objs_per_state + 1

    # create the dataset path and all subfolders if they don't exist
    if not os.path.exists(output_dataset_path):
        os.mkdir(output_dataset_path)

    image_dir = os.path.join(output_dataset_path, "images")
    if not os.path.exists(image_dir):
        os.mkdir(image_dir)
    color_dir = os.path.join(image_dir, "color_ims")
    if image_config["color"] and not os.path.exists(color_dir):
        os.mkdir(color_dir)
    depth_dir = os.path.join(image_dir, "depth_ims")
    if image_config["depth"] and not os.path.exists(depth_dir):
        os.mkdir(depth_dir)
    amodal_dir = os.path.join(image_dir, "amodal_masks")
    if image_config["amodal"] and not os.path.exists(amodal_dir):
        os.mkdir(amodal_dir)
    modal_dir = os.path.join(image_dir, "modal_masks")
    if image_config["modal"] and not os.path.exists(modal_dir):
        os.mkdir(modal_dir)
    semantic_dir = os.path.join(image_dir, "semantic_masks")
    if image_config["semantic"] and not os.path.exists(semantic_dir):
        os.mkdir(semantic_dir)

    # setup logging
    experiment_log_filename = os.path.join(output_dataset_path,
                                           "dataset_generation.log")
    if os.path.exists(experiment_log_filename) and not warm_start:
        os.remove(experiment_log_filename)
    Logger.add_log_file(logger, experiment_log_filename, global_log_file=True)
    config.save(
        os.path.join(output_dataset_path, "dataset_generation_params.yaml"))
    metadata = {}
    num_prev_states = 0

    # set dataset params
    if save_tensors:

        # read dataset subconfigs
        state_dataset_config = dataset_config["states"]
        image_dataset_config = dataset_config["images"]
        state_tensor_config = state_dataset_config["tensors"]
        image_tensor_config = image_dataset_config["tensors"]

        obj_pose_dim = POSE_DIM * max_objs_per_state
        obj_com_dim = POINT_DIM * max_objs_per_state
        state_tensor_config["fields"]["obj_poses"]["height"] = obj_pose_dim
        state_tensor_config["fields"]["obj_coms"]["height"] = obj_com_dim
        state_tensor_config["fields"]["obj_ids"]["height"] = max_objs_per_state

        image_tensor_config["fields"]["camera_pose"]["height"] = POSE_DIM

        if image_config["color"]:
            image_tensor_config["fields"]["color_im"] = {
                "dtype": "uint8",
                "channels": 3,
                "height": im_height,
                "width": im_width,
            }

        if image_config["depth"]:
            image_tensor_config["fields"]["depth_im"] = {
                "dtype": "float32",
                "channels": 1,
                "height": im_height,
                "width": im_width,
            }

        if image_config["modal"]:
            image_tensor_config["fields"]["modal_segmasks"] = {
                "dtype": "uint8",
                "channels": segmask_channels,
                "height": im_height,
                "width": im_width,
            }

        if image_config["amodal"]:
            image_tensor_config["fields"]["amodal_segmasks"] = {
                "dtype": "uint8",
                "channels": segmask_channels,
                "height": im_height,
                "width": im_width,
            }

        if image_config["semantic"]:
            image_tensor_config["fields"]["semantic_segmasks"] = {
                "dtype": "uint8",
                "channels": 1,
                "height": im_height,
                "width": im_width,
            }

        # create dataset filenames
        state_dataset_path = os.path.join(output_dataset_path, "state_tensors")
        image_dataset_path = os.path.join(output_dataset_path, "image_tensors")

        if warm_start:

            if not os.path.exists(state_dataset_path) or not os.path.exists(
                    image_dataset_path):
                logger.error(
                    "Attempting to warm start without saved tensor dataset")
                exit(1)

            # open datasets
            logger.info("Opening state dataset")
            state_dataset = TensorDataset.open(state_dataset_path,
                                               access_mode="READ_WRITE")
            logger.info("Opening image dataset")
            image_dataset = TensorDataset.open(image_dataset_path,
                                               access_mode="READ_WRITE")

            # read configs
            state_tensor_config = state_dataset.config
            image_tensor_config = image_dataset.config

            # clean up datasets (there may be datapoints with indices corresponding to non-existent data)
            num_state_datapoints = state_dataset.num_datapoints
            num_image_datapoints = image_dataset.num_datapoints
            num_prev_states = num_state_datapoints

            # clean up images
            image_ind = num_image_datapoints - 1
            image_datapoint = image_dataset[image_ind]
            while (image_ind > 0
                   and image_datapoint["state_ind"] >= num_state_datapoints):
                image_ind -= 1
                image_datapoint = image_dataset[image_ind]
            images_to_remove = num_image_datapoints - 1 - image_ind
            logger.info("Deleting last %d image tensors" % (images_to_remove))
            if images_to_remove > 0:
                image_dataset.delete_last(images_to_remove)
                num_image_datapoints = image_dataset.num_datapoints
        else:
            # create datasets from scratch
            logger.info("Creating datasets")

            state_dataset = TensorDataset(state_dataset_path,
                                          state_tensor_config)
            image_dataset = TensorDataset(image_dataset_path,
                                          image_tensor_config)

        # read templates
        state_datapoint = state_dataset.datapoint_template
        image_datapoint = image_dataset.datapoint_template

    if warm_start:

        if not os.path.exists(
                os.path.join(output_dataset_path, "metadata.json")):
            logger.error(
                "Attempting to warm start without previously created dataset")
            exit(1)

        # Read metadata and indices
        metadata = json.load(
            open(os.path.join(output_dataset_path, "metadata.json"), "r"))
        test_inds = np.load(os.path.join(image_dir,
                                         "test_indices.npy")).tolist()
        train_inds = np.load(os.path.join(image_dir,
                                          "train_indices.npy")).tolist()

        # set obj ids and splits
        reverse_obj_ids = metadata["obj_ids"]
        obj_id_map = utils.reverse_dictionary(reverse_obj_ids)
        obj_splits = metadata["obj_splits"]
        obj_keys = obj_splits.keys()
        mesh_filenames = metadata["meshes"]

        # Get list of images generated so far
        generated_images = (sorted(os.listdir(color_dir))
                            if image_config["color"] else sorted(
                                os.listdir(depth_dir)))
        num_total_images = len(generated_images)

        # Do our own calculation if no saved tensors
        if num_prev_states == 0:
            num_prev_states = num_total_images // num_images_per_state

        # Find images to remove and remove them from all relevant places if they exist
        num_images_to_remove = num_total_images - (num_prev_states *
                                                   num_images_per_state)
        logger.info(
            "Deleting last {} invalid images".format(num_images_to_remove))
        for k in range(num_images_to_remove):
            im_name = generated_images[-(k + 1)]
            im_basename = os.path.splitext(im_name)[0]
            im_ind = int(im_basename.split("_")[1])
            if os.path.exists(os.path.join(depth_dir, im_name)):
                os.remove(os.path.join(depth_dir, im_name))
            if os.path.exists(os.path.join(color_dir, im_name)):
                os.remove(os.path.join(color_dir, im_name))
            if os.path.exists(os.path.join(semantic_dir, im_name)):
                os.remove(os.path.join(semantic_dir, im_name))
            if os.path.exists(os.path.join(modal_dir, im_basename)):
                shutil.rmtree(os.path.join(modal_dir, im_basename))
            if os.path.exists(os.path.join(amodal_dir, im_basename)):
                shutil.rmtree(os.path.join(amodal_dir, im_basename))
            if im_ind in train_inds:
                train_inds.remove(im_ind)
            elif im_ind in test_inds:
                test_inds.remove(im_ind)

    else:

        # Create initial env to generate metadata
        env = BinHeapEnv(config)
        obj_id_map = env.state_space.obj_id_map
        obj_keys = env.state_space.obj_keys
        obj_splits = env.state_space.obj_splits
        mesh_filenames = env.state_space.mesh_filenames
        save_obj_id_map = obj_id_map.copy()
        save_obj_id_map[ENVIRONMENT_KEY] = np.iinfo(np.uint32).max
        reverse_obj_ids = utils.reverse_dictionary(save_obj_id_map)
        metadata["obj_ids"] = reverse_obj_ids
        metadata["obj_splits"] = obj_splits
        metadata["meshes"] = mesh_filenames
        json.dump(
            metadata,
            open(os.path.join(output_dataset_path, "metadata.json"), "w"),
            indent=JSON_INDENT,
            sort_keys=True,
        )
        train_inds = []
        test_inds = []

    # generate states and images
    state_id = num_prev_states
    while state_id < num_states:

        # create env and set objects
        create_start = time.time()
        env = BinHeapEnv(config)
        env.state_space.obj_id_map = obj_id_map
        env.state_space.obj_keys = obj_keys
        env.state_space.set_splits(obj_splits)
        env.state_space.mesh_filenames = mesh_filenames
        create_stop = time.time()
        logger.info("Creating env took %.3f sec" %
                    (create_stop - create_start))

        # sample states
        states_remaining = num_states - state_id
        for i in range(min(states_per_garbage_collect, states_remaining)):

            # log current rollout
            if state_id % config["log_rate"] == 0:
                logger.info("State: %06d" % (state_id))

            try:
                # reset env
                env.reset()
                state = env.state
                split = state.metadata["split"]

                # render state
                if vis_config["state"]:
                    env.view_3d_scene()

                # Save state if desired
                if save_tensors:

                    # set obj state variables
                    obj_pose_vec = np.zeros(obj_pose_dim)
                    obj_com_vec = np.zeros(obj_com_dim)
                    obj_id_vec = np.iinfo(
                        np.uint32).max * np.ones(max_objs_per_state)
                    j = 0
                    for obj_state in state.obj_states:
                        obj_pose_vec[j * POSE_DIM:(j + 1) *
                                     POSE_DIM] = obj_state.pose.vec
                        obj_com_vec[j * POINT_DIM:(j + 1) *
                                    POINT_DIM] = obj_state.center_of_mass
                        obj_id_vec[j] = int(obj_id_map[obj_state.key])
                        j += 1

                    # store datapoint env params
                    state_datapoint["state_id"] = state_id
                    state_datapoint["obj_poses"] = obj_pose_vec
                    state_datapoint["obj_coms"] = obj_com_vec
                    state_datapoint["obj_ids"] = obj_id_vec
                    state_datapoint["split"] = split

                    # store state datapoint
                    image_start_ind = image_dataset.num_datapoints
                    image_end_ind = image_start_ind + num_images_per_state
                    state_datapoint["image_start_ind"] = image_start_ind
                    state_datapoint["image_end_ind"] = image_end_ind

                    # clean up
                    del obj_pose_vec
                    del obj_com_vec
                    del obj_id_vec

                    # add state
                    state_dataset.add(state_datapoint)

                # render images
                for k in range(num_images_per_state):

                    # reset the camera
                    if num_images_per_state > 1:
                        env.reset_camera()

                    obs = env.render_camera_image(color=image_config["color"])
                    if image_config["color"]:
                        color_obs, depth_obs = obs
                    else:
                        depth_obs = obs

                    # vis obs
                    if vis_config["obs"]:
                        if image_config["depth"]:
                            plt.figure()
                            plt.imshow(depth_obs)
                            plt.title("Depth Observation")
                        if image_config["color"]:
                            plt.figure()
                            plt.imshow(color_obs)
                            plt.title("Color Observation")
                        plt.show()

                    if (image_config["modal"] or image_config["amodal"]
                            or image_config["semantic"]):

                        # render segmasks
                        (
                            amodal_segmasks,
                            modal_segmasks,
                        ) = env.render_segmentation_images()

                        # retrieve segmask data
                        modal_segmask_arr = np.iinfo(np.uint8).max * np.ones(
                            [im_height, im_width, segmask_channels],
                            dtype=np.uint8,
                        )
                        amodal_segmask_arr = np.iinfo(np.uint8).max * np.ones(
                            [im_height, im_width, segmask_channels],
                            dtype=np.uint8,
                        )
                        stacked_segmask_arr = np.zeros(
                            [im_height, im_width, 1], dtype=np.uint8)

                        modal_segmask_arr[:, :, :env.
                                          num_objects] = modal_segmasks
                        amodal_segmask_arr[:, :, :env.
                                           num_objects] = amodal_segmasks

                        if image_config["semantic"]:
                            for j in range(env.num_objects):
                                this_obj_px = np.where(
                                    modal_segmasks[:, :, j] > 0)
                                stacked_segmask_arr[this_obj_px[0],
                                                    this_obj_px[1],
                                                    0] = (j + 1)

                    # visualize
                    if vis_config["semantic"]:
                        plt.figure()
                        plt.imshow(stacked_segmask_arr.squeeze())
                        plt.show()

                    if save_tensors:
                        # save image data as tensors
                        if image_config["color"]:
                            image_datapoint["color_im"] = color_obs
                        if image_config["depth"]:
                            image_datapoint["depth_im"] = depth_obs[:, :, None]
                        if image_config["modal"]:
                            image_datapoint[
                                "modal_segmasks"] = modal_segmask_arr
                        if image_config["amodal"]:
                            image_datapoint[
                                "amodal_segmasks"] = amodal_segmask_arr
                        if image_config["semantic"]:
                            image_datapoint[
                                "semantic_segmasks"] = stacked_segmask_arr

                        image_datapoint["camera_pose"] = env.camera.pose.vec
                        image_datapoint[
                            "camera_intrs"] = env.camera.intrinsics.vec
                        image_datapoint["state_ind"] = state_id
                        image_datapoint["split"] = split

                        # add image
                        image_dataset.add(image_datapoint)

                    # Save depth image and semantic masks
                    if image_config["color"]:
                        ColorImage(color_obs).save(
                            os.path.join(
                                color_dir,
                                "image_{:06d}.png".format(
                                    num_images_per_state * state_id + k),
                            ))
                    if image_config["depth"]:
                        DepthImage(depth_obs).save(
                            os.path.join(
                                depth_dir,
                                "image_{:06d}.png".format(
                                    num_images_per_state * state_id + k),
                            ))
                    if image_config["modal"]:
                        modal_id_dir = os.path.join(
                            modal_dir,
                            "image_{:06d}".format(num_images_per_state *
                                                  state_id + k),
                        )
                        if not os.path.exists(modal_id_dir):
                            os.mkdir(modal_id_dir)
                        for i in range(env.num_objects):
                            BinaryImage(modal_segmask_arr[:, :, i]).save(
                                os.path.join(
                                    modal_id_dir,
                                    "channel_{:03d}.png".format(i),
                                ))
                    if image_config["amodal"]:
                        amodal_id_dir = os.path.join(
                            amodal_dir,
                            "image_{:06d}".format(num_images_per_state *
                                                  state_id + k),
                        )
                        if not os.path.exists(amodal_id_dir):
                            os.mkdir(amodal_id_dir)
                        for i in range(env.num_objects):
                            BinaryImage(amodal_segmask_arr[:, :, i]).save(
                                os.path.join(
                                    amodal_id_dir,
                                    "channel_{:03d}.png".format(i),
                                ))
                    if image_config["semantic"]:
                        GrayscaleImage(stacked_segmask_arr.squeeze()).save(
                            os.path.join(
                                semantic_dir,
                                "image_{:06d}.png".format(
                                    num_images_per_state * state_id + k),
                            ))

                    # Save split
                    if split == TRAIN_ID:
                        train_inds.append(num_images_per_state * state_id + k)
                    else:
                        test_inds.append(num_images_per_state * state_id + k)

                # auto-flush after every so many timesteps
                if state_id % states_per_flush == 0:
                    np.save(
                        os.path.join(image_dir, "train_indices.npy"),
                        train_inds,
                    )
                    np.save(os.path.join(image_dir, "test_indices.npy"),
                            test_inds)
                    if save_tensors:
                        state_dataset.flush()
                        image_dataset.flush()

                # delete action objects
                for obj_state in state.obj_states:
                    del obj_state
                del state
                gc.collect()

                # update state id
                state_id += 1

            except Exception as e:
                # log an error
                logger.warning("Heap failed!")
                logger.warning("%s" % (str(e)))
                logger.warning(traceback.print_exc())
                if debug:
                    raise

                del env
                gc.collect()
                env = BinHeapEnv(config)
                env.state_space.obj_id_map = obj_id_map
                env.state_space.obj_keys = obj_keys
                env.state_space.set_splits(obj_splits)
                env.state_space.mesh_filenames = mesh_filenames

        # garbage collect
        del env
        gc.collect()

    # write all datasets to file, save indices
    np.save(os.path.join(image_dir, "train_indices.npy"), train_inds)
    np.save(os.path.join(image_dir, "test_indices.npy"), test_inds)
    if save_tensors:
        state_dataset.flush()
        image_dataset.flush()

    logger.info("Generated %d image datapoints" %
                (state_id * num_images_per_state))
예제 #18
0
        out_name, mean, np.sqrt(var)))

    n, bins, patches = plt.hist(vec, vec.size // 10, facecolor="blue")

    plt.xlabel("depth value")
    plt.ylabel("count")
    plt.title("depth within region")
    plt.grid(True)
    plt.show()

    plt.savefig(os.path.join(out_path, "graph_" + out_name),
                bbox_inches="tight")
    plt.close()
    depth_img = DepthImage(img_slice)
    depth_img.save(os.path.join(out_path, out_name))


if __name__ == "__main__":
    for img_name in images:
        img_path = os.path.join(base_path, img_name)
        img = np.load(img_path)
        img = np.reshape(img, img.shape[:2])
        DepthImage(img).save(os.path.join(base_path, "depth_0.png"))

    for img_name, boxes in zip(images, boxes_list):
        img_path = os.path.join(base_path, img_name)
        for box in boxes:
            out_name = "img_{}_box_{}_{}_{}_{}.png".format(
                img_name, box[0], box[1], box[2], box[3])
            analyze_image_depths(img_path, box, out_name)
예제 #19
0
    def test_virtual(self, height=100, width=100):
        # Generate folder of color and depth images
        if not os.path.exists(IM_FILEROOT):
            os.makedirs(IM_FILEROOT)
        cam_intr = CameraIntrinsics(
            "a",
            fx=0.0,
            fy=0.0,
            cx=0.0,
            cy=0.0,
            skew=0.0,
            height=100,
            width=100,
        )
        cam_intr.save(os.path.join(IM_FILEROOT, "a.intr"))
        color_data = (255 * np.random.rand(10, height, width, 3)).astype(
            np.uint8)
        depth_data = np.random.rand(10, height, width).astype(np.float32)
        for i in range(10):
            im = ColorImage(color_data[i], frame="a")
            im.save(os.path.join(IM_FILEROOT, "color_{:d}.png".format(i)))

            im = DepthImage(depth_data[i], frame="a")
            im.save(os.path.join(IM_FILEROOT, "depth_{:d}.npy".format(i)))

        # Create virtual camera
        virtual_cam = RgbdSensorFactory.sensor("virtual",
                                               cfg={
                                                   "image_dir": IM_FILEROOT,
                                                   "frame": "a"
                                               })
        self.assertTrue(
            virtual_cam.path_to_images == IM_FILEROOT,
            msg="img path changed after init",
        )

        # Start virtual camera and read frames
        virtual_cam.start()
        self.assertTrue(virtual_cam.is_running,
                        msg="camera not running after start")
        for i in range(10):
            color, depth = virtual_cam.frames()
            self.assertTrue(
                np.all(color.data == color_data[i]),
                msg="color data for img {:d} changed".format(i),
            )
            self.assertTrue(
                color.frame == virtual_cam.frame,
                msg="frame mismatch between color im and camera",
            )
            self.assertTrue(
                np.all(depth.data == depth_data[i]),
                msg="depth data for img {:d} changed".format(i),
            )
            self.assertTrue(
                depth.frame == virtual_cam.frame,
                msg="frame mismatch between depth im and camera",
            )

        # Make sure camera is stopped
        virtual_cam.stop()
        self.assertFalse(virtual_cam.is_running,
                         msg="camera running after stop")

        # Cleanup images
        for i in range(10):
            os.remove(os.path.join(IM_FILEROOT, "color_{:d}.png".format(i)))
            os.remove(os.path.join(IM_FILEROOT, "depth_{:d}.npy".format(i)))
        os.remove(os.path.join(IM_FILEROOT, "a.intr"))
        os.rmdir(IM_FILEROOT)
예제 #20
0
    def wrapped_images(self,
                       mesh,
                       object_to_camera_poses,
                       render_mode,
                       stable_pose=None,
                       mat_props=None,
                       light_props=None,
                       debug=False):
        """Create ObjectRender objects of the given mesh at the list of object to camera poses.

        Parameters
        ----------
        mesh : :obj:`Mesh3D`
            The mesh to be rendered.
        object_to_camera_poses : :obj:`list` of :obj:`RigidTransform`
            A list of object to camera transforms to render from.
        render_mode : int
            One of RenderMode.COLOR, RenderMode.DEPTH, or
            RenderMode.SCALED_DEPTH.
        stable_pose : :obj:`StablePose`
            A stable pose to render the object in.
        mat_props : :obj:`MaterialProperties`
            Material properties for the mesh
        light_props : :obj:`MaterialProperties`
            Lighting properties for the scene
        debug : bool
            Whether or not to debug the C++ meshrendering code.

        Returns
        -------
        :obj:`list` of :obj:`ObjectRender`
            A list of ObjectRender objects generated from the given parameters.
        """
        # pre-multiply the stable pose
        world_to_camera_poses = [
            T_obj_camera.as_frames('obj', 'camera')
            for T_obj_camera in object_to_camera_poses
        ]
        if stable_pose is not None:
            t_obj_stp = np.array([0, 0, -stable_pose.r.dot(stable_pose.x0)[2]])
            T_obj_stp = RigidTransform(rotation=stable_pose.r,
                                       translation=t_obj_stp,
                                       from_frame='obj',
                                       to_frame='stp')
            stp_to_camera_poses = copy.copy(object_to_camera_poses)
            object_to_camera_poses = []
            for T_stp_camera in stp_to_camera_poses:
                T_stp_camera.from_frame = 'stp'
                object_to_camera_poses.append(T_stp_camera.dot(T_obj_stp))

        # set lighting mode
        enable_lighting = True
        if render_mode == RenderMode.SEGMASK or render_mode == RenderMode.DEPTH or \
           render_mode == RenderMode.DEPTH_SCENE:
            enable_lighting = False

        # render both image types (doesn't really cost any time)
        color_ims, depth_ims = self.images(mesh,
                                           object_to_camera_poses,
                                           mat_props=mat_props,
                                           light_props=light_props,
                                           enable_lighting=enable_lighting,
                                           debug=debug)

        # convert to image wrapper classes
        images = []
        if render_mode == RenderMode.SEGMASK:
            # wrap binary images
            for binary_im in color_ims:
                images.append(
                    BinaryImage(binary_im[:, :, 0],
                                frame=self._camera_intr.frame,
                                threshold=0))

        elif render_mode == RenderMode.COLOR:
            # wrap color images
            for color_im in color_ims:
                images.append(
                    ColorImage(color_im, frame=self._camera_intr.frame))

        elif render_mode == RenderMode.COLOR_SCENE:
            # wrap color and depth images
            for color_im in color_ims:
                images.append(
                    ColorImage(color_im, frame=self._camera_intr.frame))

            # render images of scene objects
            color_scene_ims = {}
            for name, scene_obj in self._scene.items():
                scene_object_to_camera_poses = []
                for world_to_camera_pose in world_to_camera_poses:
                    scene_object_to_camera_poses.append(world_to_camera_pose *
                                                        scene_obj.T_mesh_world)

                color_scene_ims[name] = self.wrapped_images(
                    scene_obj.mesh,
                    scene_object_to_camera_poses,
                    RenderMode.COLOR,
                    mat_props=scene_obj.mat_props,
                    light_props=light_props,
                    debug=debug)

            # combine with scene images
            # TODO: re-add farther
            for i in range(len(images)):
                for j, name in enumerate(color_scene_ims.keys()):
                    zero_px = images[i].zero_pixels()
                    images[i].data[zero_px[:, 0],
                                   zero_px[:, 1], :] = color_scene_ims[name][
                                       i].image.data[zero_px[:, 0],
                                                     zero_px[:, 1], :]

        elif render_mode == RenderMode.DEPTH:
            # render depth image
            for depth_im in depth_ims:
                images.append(
                    DepthImage(depth_im, frame=self._camera_intr.frame))

        elif render_mode == RenderMode.DEPTH_SCENE:
            # create empty depth images
            for depth_im in depth_ims:
                images.append(
                    DepthImage(depth_im, frame=self._camera_intr.frame))

            # render images of scene objects
            depth_scene_ims = {}
            for name, scene_obj in self._scene.items():
                scene_object_to_camera_poses = []
                for world_to_camera_pose in world_to_camera_poses:
                    scene_object_to_camera_poses.append(world_to_camera_pose *
                                                        scene_obj.T_mesh_world)
                depth_scene_ims[name] = self.wrapped_images(
                    scene_obj.mesh,
                    scene_object_to_camera_poses,
                    RenderMode.DEPTH,
                    mat_props=scene_obj.mat_props,
                    light_props=light_props)

            # combine with scene images
            for i in range(len(images)):
                for j, name in enumerate(depth_scene_ims.keys()):
                    images[i] = images[i].combine_with(
                        depth_scene_ims[name][i].image)

        elif render_mode == RenderMode.RGBD:
            # create RGB-D images
            for color_im, depth_im in zip(color_ims, depth_ims):
                c = ColorImage(color_im, frame=self._camera_intr.frame)
                d = DepthImage(depth_im, frame=self._camera_intr.frame)
                images.append(RgbdImage.from_color_and_depth(c, d))

        elif render_mode == RenderMode.RGBD_SCENE:
            # create RGB-D images
            for color_im, depth_im in zip(color_ims, depth_ims):
                c = ColorImage(color_im, frame=self._camera_intr.frame)
                d = DepthImage(depth_im, frame=self._camera_intr.frame)
                images.append(RgbdImage.from_color_and_depth(c, d))

            # render images of scene objects
            rgbd_scene_ims = {}
            for name, scene_obj in self._scene.items():
                scene_object_to_camera_poses = []
                for world_to_camera_pose in world_to_camera_poses:
                    scene_object_to_camera_poses.append(world_to_camera_pose *
                                                        scene_obj.T_mesh_world)

                rgbd_scene_ims[name] = self.wrapped_images(
                    scene_obj.mesh,
                    scene_object_to_camera_poses,
                    RenderMode.RGBD,
                    mat_props=scene_obj.mat_props,
                    light_props=light_props,
                    debug=debug)

            # combine with scene images
            for i in range(len(images)):
                for j, name in enumerate(rgbd_scene_ims.keys()):
                    images[i] = images[i].combine_with(
                        rgbd_scene_ims[name][i].image)

        elif render_mode == RenderMode.SCALED_DEPTH:
            # convert to color image
            for depth_im in depth_ims:
                d = DepthImage(depth_im, frame=self._camera_intr.frame)
                images.append(d.to_color())
        else:
            raise ValueError('Render mode %s not supported' % (render_mode))

        # create object renders
        if stable_pose is not None:
            object_to_camera_poses = copy.copy(stp_to_camera_poses)
        rendered_images = []
        for image, T_obj_camera in zip(images, object_to_camera_poses):
            T_camera_obj = T_obj_camera.inverse()
            rendered_images.append(ObjectRender(image, T_camera_obj))

        return rendered_images