def __init__(self, dataset_path, frame=None, loop=True):
        self._dataset_path = dataset_path
        self._frame = frame
        self._color_frame = frame
        self._ir_frame = frame
        self._im_index = 0
        self._running = False

        from autolab_core import TensorDataset

        self._dataset = TensorDataset.open(self._dataset_path)
        self._num_images = self._dataset.num_datapoints
        self._image_rescale_factor = 1.0
        if "image_rescale_factor" in self._dataset.metadata.keys():
            self._image_rescale_factor = (
                1.0 / self._dataset.metadata["image_rescale_factor"])

        datapoint = self._dataset.datapoint(
            0, [TensorDatasetVirtualSensor.CAMERA_INTR_FIELD])
        camera_intr_vec = datapoint[
            TensorDatasetVirtualSensor.CAMERA_INTR_FIELD]
        self._color_intr = CameraIntrinsics.from_vec(
            camera_intr_vec,
            frame=self._color_frame).resize(self._image_rescale_factor)
        self._ir_intr = CameraIntrinsics.from_vec(
            camera_intr_vec,
            frame=self._ir_frame).resize(self._image_rescale_factor)
    def __init__(self, frame="webcam", device_id=0):
        """Initialize a Logitech webcam sensor.

        Parameters
        ----------
        frame : str
            A name for the frame in which RGB images are returned.
        device_id : int
            The device ID for the webcam (by default, zero).
        """
        self._frame = frame
        self._camera_intr = None
        self._device_id = device_id
        self._cap = None
        self._running = False
        self._adjust_exposure = True

        # Set up camera intrinsics for the sensor
        width, height = 1280, 960
        focal_x, focal_y = 1430.0, 1420.0
        center_x, center_y = 1280 / 2, 960 / 2
        self._camera_intr = CameraIntrinsics(
            self._frame,
            focal_x,
            focal_y,
            center_x,
            center_y,
            height=height,
            width=width,
        )
Beispiel #3
0
 def _set_camera_properties(self, msg):
     """Set the camera intrinsics from an info msg."""
     focal_x = msg.K[0]
     focal_y = msg.K[4]
     center_x = msg.K[2]
     center_y = msg.K[5]
     im_height = msg.height
     im_width = msg.width
     self._camera_intr = CameraIntrinsics(
         self._frame,
         focal_x,
         focal_y,
         center_x,
         center_y,
         height=im_height,
         width=im_width,
     )
    def __init__(self,
                 frame="phoxi",
                 device_name="2018-02-020-LC3",
                 size="small"):
        """Initialize a PhoXi Sensor.

        Parameters
        ----------
        frame : str
            A name for the frame in which depth images, normal maps,
            and RGB images are returned.
        device_name : str
            The string name of the PhoXi device
            (SN listed on sticker on back sensor).
            Old PhoXi: 1703005
            New PhoXi: 2018-02-020-LC3
        size : str
            An indicator for which size of image is desired.
            Either 'large' (2064x1544) or 'small' (1032x772).
        """

        self._frame = frame
        self._device_name = str(device_name)
        self._camera_intr = None
        self._running = False
        self._bridge = CvBridge()

        self._cur_color_im = None
        self._cur_depth_im = None
        self._cur_normal_map = None

        # Set up camera intrinsics for the sensor
        width, height = 2064, 1544
        focal_x, focal_y = 2244.0, 2244.0
        center_x, center_y = 1023.0, 768.0
        if size == "small":
            width = 1032
            height = 772
            focal_x = focal_x / 2
            focal_y = focal_y / 2
            center_x = center_x / 2
            center_y = center_y / 2

            if str(device_name) == "1703005":
                focal_x = focal_y = 1105.0

        self._camera_intr = CameraIntrinsics(
            self._frame,
            focal_x,
            focal_y,
            center_x,
            center_y,
            height=height,
            width=width,
        )
Beispiel #5
0
 def color_intrinsics(self):
     """:obj:`CameraIntrinsics` : RealSense color camera intrinsics."""
     return CameraIntrinsics(
         self._frame,
         self._intrinsics[0, 0],
         self._intrinsics[1, 1],
         self._intrinsics[0, 2],
         self._intrinsics[1, 2],
         height=RealSenseSensor.COLOR_IM_HEIGHT,
         width=RealSenseSensor.COLOR_IM_WIDTH,
     )
 def color_intrinsics(self):
     """:obj:`CameraIntrinsics` : Color camera intrinsics of Kinect."""
     if self._device is None:
         raise RuntimeError(
             "Kinect2 device not runnning. Cannot return color intrinsics")
     camera_params = self._device.getColorCameraParams()
     return CameraIntrinsics(
         self._color_frame,
         camera_params.fx,
         camera_params.fy,
         camera_params.cx,
         camera_params.cy,
     )
 def ir_intrinsics(self):
     """:obj:`CameraIntrinsics` : IR camera intrinsics for the Kinect."""
     if self._device is None:
         raise RuntimeError(
             "Kinect2 device not runnning. Cannot return IR intrinsics")
     camera_params = self._device.getIrCameraParams()
     return CameraIntrinsics(
         self._ir_frame,
         camera_params.fx,
         camera_params.fy,
         camera_params.cx,
         camera_params.cy,
         height=Kinect2Sensor.DEPTH_IM_HEIGHT,
         width=Kinect2Sensor.DEPTH_IM_WIDTH,
     )
    def sample(self, size=1):
        """Sample random variables from the model.
        Parameters
        ----------
        size : int
            number of sample to take
        Returns
        -------
        :obj:`list` of :obj:`CameraSample`
            sampled camera intrinsics and poses
        """
        samples = []
        for i in range(size):
            # sample camera params
            focal = self.focal_rv.rvs(size=1)[0]
            cx = self.cx_rv.rvs(size=1)[0]
            cy = self.cy_rv.rvs(size=1)[0]

            # sample viewsphere params
            radius = self.rad_rv.rvs(size=1)[0]
            elev = self.elev_rv.rvs(size=1)[0]
            az = self.az_rv.rvs(size=1)[0]
            roll = self.roll_rv.rvs(size=1)[0]

            # sample plane translation
            tx = self.tx_rv.rvs(size=1)[0]
            ty = self.ty_rv.rvs(size=1)[0]

            # convert to pose and intrinsics
            pose = self.camera_to_world_pose(radius, elev, az, roll, tx, ty)
            intrinsics = CameraIntrinsics(
                self.frame,
                fx=focal,
                fy=focal,
                cx=cx,
                cy=cy,
                skew=0.0,
                height=self.im_height,
                width=self.im_width,
            )

            # convert to camera pose
            samples.append((pose, intrinsics))

        # not a list if only 1 sample
        if size == 1:
            return samples[0]
        return samples
    def read_images(self, req):
        """Reads images from a ROS service request.

        Parameters
        ---------
        req: :obj:`ROS ServiceRequest`
            ROS ServiceRequest for grasp planner service.
        """
        # Get the raw depth and color images as ROS `Image` objects.
        raw_color = req.color_image
        raw_depth = req.depth_image

        # Get the raw camera info as ROS `CameraInfo`.
        raw_camera_info = req.camera_info

        # Wrap the camera info in a BerkeleyAutomation/perception
        # `CameraIntrinsics` object.
        camera_intr = CameraIntrinsics(
            frame=raw_camera_info.header.frame_id,
            fx=raw_camera_info.K[0],
            fy=raw_camera_info.K[4],
            cx=raw_camera_info.K[2],
            cy=raw_camera_info.K[5],
            width=raw_camera_info.width,
            height=raw_camera_info.height,
        )

        # Create wrapped BerkeleyAutomation/perception RGB and depth images by
        # unpacking the ROS images using ROS `CvBridge`
        try:
            color_im = ColorImage(self.cv_bridge.imgmsg_to_cv2(
                raw_color, "rgb8"),
                                  frame=camera_intr.frame)

            cv2_depth = self.cv_bridge.imgmsg_to_cv2(
                raw_depth, desired_encoding="passthrough")
            cv2_depth = np.array(cv2_depth, dtype=np.float32)

            cv2_depth *= 0.001

            nan_idxs = np.isnan(cv2_depth)
            cv2_depth[nan_idxs] = 1000.0

            depth_im = DepthImage(cv2_depth, frame=camera_intr.frame)

        except CvBridgeError as cv_bridge_exception:
            print("except CvBridgeError")
            rospy.logerr(cv_bridge_exception)

        # Check image sizes.
        if color_im.height != depth_im.height or color_im.width != depth_im.width:
            msg = ("Color image and depth image must be the same shape! Color"
                   " is %d x %d but depth is %d x %d") % (
                       color_im.height, color_im.width, depth_im.height,
                       depth_im.width)

            rospy.logerr(msg)
            raise rospy.ServiceException(msg)

        if (color_im.height < self.min_height
                or color_im.width < self.min_width):
            msg = ("Color image is too small! Must be at least %d x %d"
                   " resolution but the requested image is only %d x %d") % (
                       self.min_height, self.min_width, color_im.height,
                       color_im.width)

            rospy.logerr(msg)
            raise rospy.ServiceException(msg)

        # --- create CameraData struct --- #
        camera_data = CameraData()
        camera_data.rgb_img = color_im
        camera_data.depth_img = depth_im
        camera_data.intrinsic_params = camera_intr

        return camera_data
Beispiel #10
0
    def test_virtual(self, height=100, width=100):
        # Generate folder of color and depth images
        if not os.path.exists(IM_FILEROOT):
            os.makedirs(IM_FILEROOT)
        cam_intr = CameraIntrinsics(
            "a",
            fx=0.0,
            fy=0.0,
            cx=0.0,
            cy=0.0,
            skew=0.0,
            height=100,
            width=100,
        )
        cam_intr.save(os.path.join(IM_FILEROOT, "a.intr"))
        color_data = (255 * np.random.rand(10, height, width, 3)).astype(
            np.uint8)
        depth_data = np.random.rand(10, height, width).astype(np.float32)
        for i in range(10):
            im = ColorImage(color_data[i], frame="a")
            im.save(os.path.join(IM_FILEROOT, "color_{:d}.png".format(i)))

            im = DepthImage(depth_data[i], frame="a")
            im.save(os.path.join(IM_FILEROOT, "depth_{:d}.npy".format(i)))

        # Create virtual camera
        virtual_cam = RgbdSensorFactory.sensor("virtual",
                                               cfg={
                                                   "image_dir": IM_FILEROOT,
                                                   "frame": "a"
                                               })
        self.assertTrue(
            virtual_cam.path_to_images == IM_FILEROOT,
            msg="img path changed after init",
        )

        # Start virtual camera and read frames
        virtual_cam.start()
        self.assertTrue(virtual_cam.is_running,
                        msg="camera not running after start")
        for i in range(10):
            color, depth = virtual_cam.frames()
            self.assertTrue(
                np.all(color.data == color_data[i]),
                msg="color data for img {:d} changed".format(i),
            )
            self.assertTrue(
                color.frame == virtual_cam.frame,
                msg="frame mismatch between color im and camera",
            )
            self.assertTrue(
                np.all(depth.data == depth_data[i]),
                msg="depth data for img {:d} changed".format(i),
            )
            self.assertTrue(
                depth.frame == virtual_cam.frame,
                msg="frame mismatch between depth im and camera",
            )

        # Make sure camera is stopped
        virtual_cam.stop()
        self.assertFalse(virtual_cam.is_running,
                         msg="camera running after stop")

        # Cleanup images
        for i in range(10):
            os.remove(os.path.join(IM_FILEROOT, "color_{:d}.png".format(i)))
            os.remove(os.path.join(IM_FILEROOT, "depth_{:d}.npy".format(i)))
        os.remove(os.path.join(IM_FILEROOT, "a.intr"))
        os.rmdir(IM_FILEROOT)
    def __init__(self, path_to_images, frame=None, loop=True):
        """Create a new virtualized Primesense sensor.

        This requires a directory containing a specific set of files.

        First, the directory must contain a set of images, where each
        image has three files:
        - color_{#}.png
        - depth_{#}.npy
        - ir_{#}.npy
        In these, the {#} is replaced with the integer index for the
        image. These indices should start at zero and increase
        consecutively.

        Second, the directory must contain CameraIntrinsics files
        for the color and ir cameras:
        - {frame}_color.intr
        - {frame}_ir.intr
        In these, the {frame} is replaced with the reference frame
        name that is passed as a parameter to this function.

        Parameters
        ----------
        path_to_images : :obj:`str`
            The path to a directory containing images that the virtualized
            sensor will return.
        frame : :obj:`str`
            The name of the frame of reference in which the sensor resides.
            If None, this will be discovered from the files in the directory.
        loop : :obj:`str`
            Whether or not to loop back to the first image after running out
        """
        self._running = False
        self._path_to_images = path_to_images
        self._im_index = 0
        self._num_images = 0
        self._frame = frame
        self._loop = loop
        filenames = os.listdir(self._path_to_images)

        # get number of images
        for filename in filenames:
            if filename.find("depth") != -1 and filename.endswith(".npy"):
                self._num_images += 1

        # set the frame dynamically
        self._color_ext = ".png"
        for filename in filenames:
            file_root, file_ext = os.path.splitext(filename)
            color_ind = file_root.rfind("color")

            if (file_ext in VirtualSensor.SUPPORTED_FILE_EXTS
                    and color_ind != -1):
                self._color_ext = file_ext

            if (self._frame is None and file_ext == INTR_EXTENSION
                    and color_ind != -1):
                self._frame = file_root[:color_ind - 1]
                self._color_frame = file_root
                self._ir_frame = file_root

        # load color intrinsics
        color_intr_filename = os.path.join(self._path_to_images,
                                           "%s_color.intr" % (self._frame))
        ir_intr_filename = os.path.join(self._path_to_images,
                                        "%s_ir.intr" % (self._frame))
        generic_intr_filename = os.path.join(self._path_to_images,
                                             "%s.intr" % (self._frame))
        if os.path.exists(color_intr_filename):
            self._color_intr = CameraIntrinsics.load(color_intr_filename)
        else:
            self._color_intr = CameraIntrinsics.load(generic_intr_filename)
        if os.path.exists(ir_intr_filename):
            self._ir_intr = CameraIntrinsics.load(ir_intr_filename)
        else:
            self._ir_intr = CameraIntrinsics.load(generic_intr_filename)
Beispiel #12
0
class EnsensoSensor(CameraSensor):
    """Class for interfacing with an Ensenso N* sensor."""
    def __init__(self, frame="ensenso"):
        # set member vars
        self._frame = frame
        self._initialized = False
        self._format = None
        self._camera_intr = None
        self._cur_depth_im = None
        self._running = False

    def __del__(self):
        """Automatically stop the sensor for safety."""
        if self.is_running:
            self.stop()

    def _set_format(self, msg):
        """Set the buffer formatting."""
        num_points = msg.height * msg.width
        self._format = "<" + num_points * "ffff"

    def _set_camera_properties(self, msg):
        """Set the camera intrinsics from an info msg."""
        focal_x = msg.K[0]
        focal_y = msg.K[4]
        center_x = msg.K[2]
        center_y = msg.K[5]
        im_height = msg.height
        im_width = msg.width
        self._camera_intr = CameraIntrinsics(
            self._frame,
            focal_x,
            focal_y,
            center_x,
            center_y,
            height=im_height,
            width=im_width,
        )

    def _depth_im_from_pointcloud(self, msg):
        """Convert a pointcloud2 message to a depth image."""
        # set format
        if self._format is None:
            self._set_format(msg)

        # rescale camera intr in case binning is turned on
        if msg.height != self._camera_intr.height:
            rescale_factor = float(msg.height) / self._camera_intr.height
            self._camera_intr = self._camera_intr.resize(rescale_factor)

        # read num points
        num_points = msg.height * msg.width

        # read buffer
        raw_tup = struct.Struct(self._format).unpack_from(msg.data, 0)
        raw_arr = np.array(raw_tup)

        # subsample depth values and reshape
        depth_ind = 2 + 4 * np.arange(num_points)
        depth_buf = raw_arr[depth_ind]
        depth_arr = depth_buf.reshape(msg.height, msg.width)
        depth_im = DepthImage(depth_arr, frame=self._frame)

        return depth_im

    def _pointcloud_callback(self, msg):
        """Callback for handling point clouds."""
        self._cur_depth_im = self._depth_im_from_pointcloud(msg)

    def _camera_info_callback(self, msg):
        """Callback for reading camera info."""
        self._camera_info_sub.unregister()
        self._set_camera_properties(msg)

    @property
    def ir_intrinsics(self):
        """:obj:`CameraIntrinsics` : IR Camera intrinsics for Ensenso."""
        return self._camera_intr

    @property
    def is_running(self):
        """bool : True if the stream is running, or false otherwise."""
        return self._running

    @property
    def frame(self):
        """:obj:`str` : The reference frame of the sensor."""
        return self._frame

    def start(self):
        """Start the sensor"""
        # initialize subscribers
        self._pointcloud_sub = rospy.Subscriber(
            "/%s/depth/points" % (self.frame),
            PointCloud2,
            self._pointcloud_callback,
        )
        self._camera_info_sub = rospy.Subscriber(
            "/%s/left/camera_info" % (self.frame),
            CameraInfo,
            self._camera_info_callback,
        )

        while self._camera_intr is None:
            time.sleep(0.1)

        self._running = True

    def stop(self):
        """Stop the sensor"""
        # check that everything is running
        if not self._running:
            logging.warning("Ensenso not running. Aborting stop")
            return False

        # stop subs
        self._pointcloud_sub.unregister()
        self._camera_info_sub.unregister()
        self._running = False
        return True

    def frames(self):
        """Retrieve a new frame from the Ensenso and convert it to a
        ColorImage and a DepthImage.

        Returns
        -------
        :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage`
            The ColorImage and DepthImage of the current frame.

        Raises
        ------
        RuntimeError
            If the Ensenso stream is not running.
        """
        # wait for a new image
        while self._cur_depth_im is None:
            time.sleep(0.01)

        # read next image
        depth_im = self._cur_depth_im
        color_im = ColorImage(
            np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8),
            frame=self._frame,
        )
        self._cur_depth_im = None
        return color_im, depth_im, None

    def median_depth_img(self, num_img=1, fill_depth=0.0):
        """Collect a series of depth images and return the median of the set.

        Parameters
        ----------
        num_img : int
            The number of consecutive frames to process.

        Returns
        -------
        :obj:`DepthImage`
            The median DepthImage collected from the frames.
        """
        depths = []

        for _ in range(num_img):
            _, depth, _ = self.frames()
            depths.append(depth)

        median_depth = Image.median_images(depths)
        median_depth.data[median_depth.data == 0.0] = fill_depth
        return median_depth
    def create_camera_data(self,
                           rgb: np.ndarray,
                           depth: np.ndarray,
                           cam_intr_frame: str,
                           fx: float,
                           fy: float,
                           cx: float,
                           cy: float,
                           skew: float,
                           w: int,
                           h: int,
                           seg_mask: np.ndarray = np.empty(shape=(0, )),
                           bbox: tuple = ()):
        """Create the CameraData object in the correct format expected by gqcnn

        Parameters
        ---------
        req: rgb: np.ndarray: rgb image
             depth: np.ndarray: depth image
             cam_intr_frame: str: the reference frame of the images. Grasp poses are computed wrt this frame
             fx: float: focal length (x)
             fy: float: focal length (y)
             cx: float: principal point (x)
             cy: float: principal point (y)
             skew: float: skew coefficient
             w: int: width
             h: int: height
        opt: seg_mask: np.ndarray: segmentation mask
             bbox: tuple: a tuple of 4 values that define the mask bounding box = (x_min, y_min, x_max, y_max)

        Returns:
            CameraData: object that stores the input data required by plan_grasp()
        """

        camera_data = CameraData()

        # Create images
        camera_data.rgb_img = ColorImage(rgb, frame=cam_intr_frame)
        camera_data.depth_img = DepthImage(depth, frame=cam_intr_frame)

        if seg_mask.size > 0:
            camera_data.seg_img = BinaryImage(seg_mask, cam_intr_frame)

        # Check image sizes
        if camera_data.rgb_img.height != camera_data.depth_img.height or \
           camera_data.rgb_img.width != camera_data.depth_img.width:

            msg = ("Color image and depth image must be the same shape! Color"
                   " is %d x %d but depth is %d x %d") % (
                       camera_data.rgb_img.height, camera_data.rgb_img.width,
                       camera_data.depth_img.height,
                       camera_data.depth_img.width)

            raise AssertionError(msg)

        if (camera_data.rgb_img.height < self.min_height
                or camera_data.rgb_img.width < self.min_width):

            msg = ("Color image is too small! Must be at least %d x %d"
                   " resolution but the requested image is only %d x %d") % (
                       self.min_height, self.min_width,
                       camera_data.rgb_img.height, camera_data.rgb_img.width)

            raise AssertionError(msg)

        if camera_data.rgb_img.height != camera_data.seg_img.height or \
            camera_data.rgb_img.width != camera_data.seg_img.width:

            msg = ("Images and segmask must be the same shape! Color image is"
                   " %d x %d but segmask is %d x %d") % (
                       camera_data.rgb_img.height, camera_data.rgb_img.width,
                       camera_data.seg_img.height, camera_data.seg_img.width)

            raise AssertionError(msg)

        # set intrinsic params
        camera_data.intrinsic_params = CameraIntrinsics(
            cam_intr_frame, fx, fy, cx, cy, skew, h, w)

        # set mask bounding box
        if len(bbox) == 4:
            camera_data.bounding_box = {
                'min_x': bbox[0],
                'min_y': bbox[1],
                'max_x': bbox[2],
                'max_y': bbox[3]
            }

        return camera_data
def detect(detector_type, config, run_dir, test_config):
    """Run PCL-based detection on a depth-image-based dataset.

    Parameters
    ----------
    config : dict
        config for a PCL detector
    run_dir : str
        Directory to save outputs in. Output will be saved in pred_masks, pred_info,
        and modal_segmasks_processed subdirectories.
    test_config : dict
        config containing dataset information
    """

    ##################################################################
    # Set up output directories
    ##################################################################

    # Create subdirectory for prediction masks
    pred_dir = os.path.join(run_dir, 'pred_masks')
    mkdir_if_missing(pred_dir)

    # Create subdirectory for prediction scores & bboxes
    pred_info_dir = os.path.join(run_dir, 'pred_info')
    mkdir_if_missing(pred_info_dir)

    # Create subdirectory for transformed GT segmasks
    resized_segmask_dir = os.path.join(run_dir, 'modal_segmasks_processed')
    mkdir_if_missing(resized_segmask_dir)

    ##################################################################
    # Set up input directories
    ##################################################################

    dataset_dir = test_config['path']
    indices_arr = np.load(os.path.join(dataset_dir, test_config['indices']))

    # Input depth image data (numpy files, not .pngs)
    depth_dir = os.path.join(dataset_dir, test_config['images'])

    # Input GT binary masks dir
    gt_mask_dir = os.path.join(dataset_dir, test_config['masks'])

    # Input binary mask data
    if 'bin_masks' in test_config.keys():
        bin_mask_dir = os.path.join(dataset_dir, test_config['bin_masks'])

    # Input camera intrinsics
    camera_intrinsics_fn = os.path.join(dataset_dir, 'camera_intrinsics.intr')
    camera_intrs = CameraIntrinsics.load(camera_intrinsics_fn)

    image_ids = np.arange(indices_arr.size)

    ##################################################################
    # Process each image
    ##################################################################
    for image_id in tqdm(image_ids):
        base_name = 'image_{:06d}'.format(indices_arr[image_id])
        output_name = 'image_{:06d}'.format(image_id)
        depth_image_fn = base_name + '.npy'

        # Extract depth image
        depth_data = np.load(os.path.join(depth_dir, depth_image_fn))
        depth_im = DepthImage(depth_data, camera_intrs.frame)
        depth_im = depth_im.inpaint(0.25)

        # Mask out bin pixels if appropriate/necessary
        if bin_mask_dir:
            mask_im = BinaryImage.open(
                os.path.join(bin_mask_dir, base_name + '.png'),
                camera_intrs.frame)
            mask_im = mask_im.resize(depth_im.shape[:2])
            depth_im = depth_im.mask_binary(mask_im)

        point_cloud = camera_intrs.deproject(depth_im)
        point_cloud.remove_zero_points()
        pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32))
        tree = pcl_cloud.make_kdtree()
        if detector_type == 'euclidean':
            segmentor = pcl_cloud.make_EuclideanClusterExtraction()
            segmentor.set_ClusterTolerance(config['tolerance'])
        elif detector_type == 'region_growing':
            segmentor = pcl_cloud.make_RegionGrowing(ksearch=50)
            segmentor.set_NumberOfNeighbours(config['n_neighbors'])
            segmentor.set_CurvatureThreshold(config['curvature'])
            segmentor.set_SmoothnessThreshold(config['smoothness'])
        else:
            print('PCL detector type not supported')
            exit()

        segmentor.set_MinClusterSize(config['min_cluster_size'])
        segmentor.set_MaxClusterSize(config['max_cluster_size'])
        segmentor.set_SearchMethod(tree)
        cluster_indices = segmentor.Extract()

        # Set up predicted masks and metadata
        indiv_pred_masks = []
        r_info = {
            'rois': [],
            'scores': [],
            'class_ids': [],
        }

        for i, cluster in enumerate(cluster_indices):
            points = pcl_cloud.to_array()[cluster]
            indiv_pred_mask = camera_intrs.project_to_image(
                PointCloud(points.T, frame=camera_intrs.frame)).to_binary()
            indiv_pred_mask.data[indiv_pred_mask.data > 0] = 1
            indiv_pred_masks.append(indiv_pred_mask.data)

            # Compute bounding box, score, class_id
            nonzero_pix = np.nonzero(indiv_pred_mask.data)
            min_x, max_x = np.min(nonzero_pix[1]), np.max(nonzero_pix[1])
            min_y, max_y = np.min(nonzero_pix[0]), np.max(nonzero_pix[0])
            r_info['rois'].append([min_y, min_x, max_y, max_x])
            r_info['scores'].append(1.0)
            r_info['class_ids'].append(1)

        r_info['rois'] = np.array(r_info['rois'])
        r_info['scores'] = np.array(r_info['scores'])
        r_info['class_ids'] = np.array(r_info['class_ids'])

        # Write the predicted masks and metadata
        if indiv_pred_masks:
            pred_mask_output = np.stack(indiv_pred_masks).astype(np.uint8)
        else:
            pred_mask_output = np.array(indiv_pred_masks).astype(np.uint8)

        # Save out ground-truth mask as array of shape (n, h, w)
        indiv_gt_masks = []
        gt_mask = cv2.imread(os.path.join(gt_mask_dir, base_name + '.png'))
        gt_mask = cv2.resize(gt_mask,
                             (depth_im.shape[1], depth_im.shape[0])).astype(
                                 np.uint8)[:, :, 0]
        num_gt_masks = np.max(gt_mask)
        for i in range(1, num_gt_masks + 1):
            indiv_gt_mask = (gt_mask == i)
            if np.any(indiv_gt_mask):
                indiv_gt_masks.append(gt_mask == i)
        gt_mask_output = np.stack(indiv_gt_masks)

        np.save(os.path.join(resized_segmask_dir, output_name + '.npy'),
                gt_mask_output)
        np.save(os.path.join(pred_dir, output_name + '.npy'), pred_mask_output)
        np.save(os.path.join(pred_info_dir, output_name + '.npy'), r_info)

    print('Saved prediction masks to:\t {}'.format(pred_dir))
    print('Saved prediction info (bboxes, scores, classes) to:\t {}'.format(
        pred_info_dir))
    print('Saved transformed GT segmasks to:\t {}'.format(resized_segmask_dir))

    return pred_dir, pred_info_dir, resized_segmask_dir