def get_segmented_point_cloud_world(self, cfg, point_cloud_world):
        # read params

        min_pt_box = np.array(cfg['min_pt'])
        max_pt_box = np.array(cfg['max_pt'])

        box = Box(min_pt_box, max_pt_box, self.WORLD_FRAME)
        print min_pt_box
        print max_pt_box

        ch, num = point_cloud_world.data.shape

        print num
        A = point_cloud_world.data
        count = 0
        for i in range(num):
            if min_pt_box[0] < A[0][i] < max_pt_box[0] and \
               min_pt_box[1] < A[1][i] < max_pt_box[1] and \
               min_pt_box[2] < A[2][i] < max_pt_box[2]:
                count += 1

        print count

        seg_point_cloud_world, _ = point_cloud_world.box_mask(box)
        #seg_point_cloud_world = point_cloud_world
        return seg_point_cloud_world
Esempio n. 2
0
 save_raw = config['save_raw']
 vis = config['vis']
 
 # open gui
 gui = plt.figure(0, figsize=(8,8))
 plt.ion()
 plt.title('INITIALIZING')
 plt.imshow(np.zeros([100,100]),
            cmap=plt.cm.gray_r)
 plt.axis('off')
 plt.draw()
 plt.pause(GUI_PAUSE)
 
 # read workspace bounds
 workspace_box = Box(np.array(workspace_config['min_pt']),
                     np.array(workspace_config['max_pt']),
                     frame='world')
 
 # read workspace objects
 workspace_objects = {}
 for obj_key, obj_config in workspace_config['objects'].iteritems():
     mesh_filename = obj_config['mesh_filename']
     pose_filename = obj_config['pose_filename']
     obj_mesh = trimesh.load_mesh(mesh_filename)
     obj_pose = RigidTransform.load(pose_filename)
     obj_mat_props = MaterialProperties(smooth=True,
                                        wireframe=False)
     scene_obj = SceneObject(obj_mesh, obj_pose, obj_mat_props)
     workspace_objects[obj_key] = scene_obj
     
 # setup each sensor
Esempio n. 3
0
    def detect(self, color_im, depth_im, cfg, camera_intr,
               T_camera_world,
               vis_foreground=False, vis_segmentation=False):
        """
        Detects all relevant objects in an rgbd image pair using foreground masking.

        Parameters
        ----------
        color_im : :obj:`ColorImage`
            color image for detection
        depth_im : :obj:`DepthImage`
            depth image for detection (corresponds to color image)
        cfg : :obj:`YamlConfig`
            parameters of detection function
        camera_intr : :obj:`CameraIntrinsics`
            intrinsics of the camera
        T_camera_world : :obj:`autolab_core.RigidTransform`
            registration of the camera to world frame

        Returns
        ------
        :obj:`list` of :obj:`RgbdDetection`
            all detections in the image
        """
        # read params
        min_pt_box = np.array(cfg['min_pt'])
        max_pt_box = np.array(cfg['max_pt'])
        min_contour_area = cfg['min_contour_area']
        max_contour_area = cfg['max_contour_area']
        min_box_area = cfg['min_box_area']
        max_box_area = cfg['max_box_area']
        box_padding_px = cfg['box_padding_px']
        crop_height = cfg['image_height']
        crop_width = cfg['image_width']
        depth_grad_thresh = cfg['depth_grad_thresh']
        point_cloud_mask_only = cfg['point_cloud_mask_only']

        w = cfg['filter_dim']

        half_crop_height = float(crop_height) / 2
        half_crop_width = float(crop_width) / 2
        half_crop_dims = np.array([half_crop_height, half_crop_width])

        fill_depth = np.max(depth_im.data[depth_im.data > 0])

        kinect2_denoising = False
        if 'kinect2_denoising' in cfg.keys() and cfg['kinect2_denoising']:
            kinect2_denoising = True
            depth_offset = cfg['kinect2_noise_offset']
            max_depth = cfg['kinect2_noise_max_depth']

        box = Box(min_pt_box, max_pt_box, 'world')

        # project into 3D
        point_cloud_cam = camera_intr.deproject(depth_im)
        point_cloud_world = T_camera_world * point_cloud_cam
        seg_point_cloud_world, _ = point_cloud_world.box_mask(box)
        seg_point_cloud_cam = T_camera_world.inverse() * seg_point_cloud_world
        depth_im_seg = camera_intr.project_to_image(seg_point_cloud_cam)

        # mask image using background detection
        bgmodel = color_im.background_model()
        binary_im = depth_im_seg.to_binary()

        # filter the image
        y, x = np.ogrid[-w/2+1:w/2+1, -w/2+1:w/2+1]
        mask = x*x + y*y <= w/2*w/2
        filter_struct = np.zeros([w,w]).astype(np.uint8)
        filter_struct[mask] = 1
        binary_im_filtered_data = snm.binary_dilation(binary_im.data, structure=filter_struct)
        binary_im_filtered = BinaryImage(binary_im_filtered_data.astype(np.uint8),
                                         frame=binary_im.frame,
                                         threshold=0)

        # find all contours
        contours = binary_im_filtered.find_contours(min_area=min_contour_area, max_area=max_contour_area)

        if vis_foreground:
            plt.figure()
            plt.subplot(1,3,1)
            plt.imshow(color_im.data)
            plt.axis('off')
            plt.subplot(1,3,2)
            plt.imshow(binary_im.data, cmap=plt.cm.gray)
            plt.axis('off')
            plt.subplot(1,3,3)
            plt.imshow(binary_im_filtered.data, cmap=plt.cm.gray)
            plt.axis('off')
            plt.show()

        # switch to just return the mean of nonzero_px
        if point_cloud_mask_only == 1:
            center_px = np.mean(binary_im_filtered.nonzero_pixels(), axis=0)
            ci = center_px[0]
            cj = center_px[1]
            binary_thumbnail = binary_im_filtered.crop(crop_height, crop_width, ci, cj)
            color_thumbnail = color_im.crop(crop_height, crop_width, ci, cj)
            depth_thumbnail = depth_im.crop(crop_height, crop_width, ci, cj)
            thumbnail_intr = camera_intr
            if camera_intr is not None:
                thumbnail_intr = camera_intr.crop(crop_height, crop_width, ci, cj)
                
                
            query_box = Box(center_px - half_crop_dims, center_px + half_crop_dims)
            return [RgbdDetection(color_thumbnail,
                                  depth_thumbnail,
                                  query_box,
                                  binary_thumbnail=binary_thumbnail,
                                  contour=None,
                                  camera_intr=thumbnail_intr)]

        # convert contours to detections
        detections = []
        for i, contour in enumerate(contours):
            orig_box = contour.bounding_box
            logging.debug('Orig box %d area: %.3f' %(i, orig_box.area))
            if orig_box.area > min_box_area and orig_box.area < max_box_area:
                # convert orig bounding box to query bounding box
                min_pt = orig_box.center - half_crop_dims
                max_pt = orig_box.center + half_crop_dims
                query_box = Box(min_pt, max_pt, frame=orig_box.frame)

                # segment color to get refined detection
                contour_mask = binary_im_filtered.contour_mask(contour)
                binary_thumbnail = contour_mask.crop(query_box.height, query_box.width, query_box.ci, query_box.cj)
            else:
                # otherwise take original bounding box
                query_box = Box(contour.bounding_box.min_pt - box_padding_px,
                                contour.bounding_box.max_pt + box_padding_px,
                                frame = contour.bounding_box.frame)

                binary_thumbnail = binary_im_filtered.crop(query_box.height, query_box.width, query_box.ci, query_box.cj)

            # crop to get thumbnails
            color_thumbnail = color_im.crop(query_box.height, query_box.width, query_box.ci, query_box.cj)
            depth_thumbnail = depth_im.crop(query_box.height, query_box.width, query_box.ci, query_box.cj)
            thumbnail_intr = camera_intr
            if camera_intr is not None:
                thumbnail_intr = camera_intr.crop(query_box.height, query_box.width, query_box.ci, query_box.cj)

            # fix depth thumbnail
            depth_thumbnail = depth_thumbnail.replace_zeros(fill_depth)
            if kinect2_denoising:
                depth_data = depth_thumbnail.data
                min_depth = np.min(depth_data)
                binary_mask_data = binary_thumbnail.data
                depth_mask_data = depth_thumbnail.mask_binary(binary_thumbnail).data
                depth_mask_data += depth_offset
                depth_data[binary_mask_data > 0] = depth_mask_data[binary_mask_data > 0]
                depth_thumbnail = DepthImage(depth_data, depth_thumbnail.frame)

            # append to detections
            detections.append(RgbdDetection(color_thumbnail,
                                            depth_thumbnail,
                                            query_box,
                                            binary_thumbnail=binary_thumbnail,
                                            contour=contour,
                                            camera_intr=thumbnail_intr))

        return detections
Esempio n. 4
0
    def detect(self, color_im, depth_im, cfg, camera_intr=None,
               T_camera_world=None,
               vis_foreground=False, vis_segmentation=False):
        """
        Detects all relevant objects in an rgbd image pair using foreground masking.

        Parameters
        ----------
        color_im : :obj:`ColorImage`
            color image for detection
        depth_im : :obj:`DepthImage`
            depth image for detection (corresponds to color image)
        cfg : :obj:`YamlConfig`
            parameters of detection function
        camera_intr : :obj:`CameraIntrinsics`
            intrinsics of the camera
        T_camera_world : :obj:`autolab_core.RigidTransform`
            registration of the camera to world frame

        Returns
        ------
        :obj:`list` of :obj:`RgbdDetection`
            all detections in the image
        """
        # read params
        foreground_mask_tolerance = cfg['foreground_mask_tolerance']
        min_contour_area = cfg['min_contour_area']
        max_contour_area = cfg['max_contour_area']
        min_box_area = cfg['min_box_area']
        max_box_area = cfg['max_box_area']
        box_padding_px = cfg['box_padding_px']
        crop_height = cfg['image_height']
        crop_width = cfg['image_width']
        depth_grad_thresh = cfg['depth_grad_thresh']

        w = cfg['filter_dim']

        half_crop_height = float(crop_height) / 2
        half_crop_width = float(crop_width) / 2
        half_crop_dims = np.array([half_crop_height, half_crop_width])

        fill_depth = np.max(depth_im.data[depth_im.data > 0])

        kinect2_denoising = False
        if 'kinect2_denoising' in cfg.keys() and cfg['kinect2_denoising']:
            kinect2_denoising = True
            depth_offset = cfg['kinect2_noise_offset']
            max_depth = cfg['kinect2_noise_max_depth']

        # mask image using background detection
        bgmodel = color_im.background_model()
        binary_im = color_im.foreground_mask(foreground_mask_tolerance, bgmodel=bgmodel)

        # filter the image
        y, x = np.ogrid[-w/2+1:w/2+1, -w/2+1:w/2+1]
        mask = x*x + y*y <= w/2*w/2
        filter_struct = np.zeros([w,w]).astype(np.uint8)
        filter_struct[mask] = 1
        binary_im_filtered = binary_im.apply(snm.grey_closing, structure=filter_struct)

        # find all contours
        contours = binary_im_filtered.find_contours(min_area=min_contour_area, max_area=max_contour_area)

        if vis_foreground:
            plt.figure()
            plt.subplot(1,2,1)
            plt.imshow(color_im.data)
            plt.axis('off')
            plt.subplot(1,2,2)
            plt.imshow(binary_im_filtered.data, cmap=plt.cm.gray)
            plt.axis('off')
            plt.show()

        # threshold gradients of depth
        depth_im = depth_im.threshold_gradients(depth_grad_thresh)

        # convert contours to detections
        detections = []
        for contour in contours:
            orig_box = contour.bounding_box
            if orig_box.area > min_box_area and orig_box.area < max_box_area:
                # convert orig bounding box to query bounding box
                min_pt = orig_box.center - half_crop_dims
                max_pt = orig_box.center + half_crop_dims
                query_box = Box(min_pt, max_pt, frame=orig_box.frame)

                # segment color to get refined detection
                color_thumbnail = color_im.crop(query_box.height, query_box.width, query_box.ci, query_box.cj)
                binary_thumbnail, segment_thumbnail, query_box = self._segment_color(color_thumbnail, query_box, bgmodel, cfg, vis_segmentation=vis_segmentation)
                if binary_thumbnail is None:
                    continue
            else:
                # otherwise take original bounding box
                query_box = Box(contour.bounding_box.min_pt - box_padding_px,
                                contour.bounding_box.max_pt + box_padding_px,
                                frame = contour.bounding_box.frame)

                binary_thumbnail = binary_im_filtered.crop(query_box.height, query_box.width, query_box.ci, query_box.cj)

            # crop to get thumbnails
            color_thumbnail = color_im.crop(query_box.height, query_box.width, query_box.ci, query_box.cj)
            depth_thumbnail = depth_im.crop(query_box.height, query_box.width, query_box.ci, query_box.cj)
            thumbnail_intr = camera_intr
            if camera_intr is not None:
                thumbnail_intr = camera_intr.crop(query_box.height, query_box.width, query_box.ci, query_box.cj)

            # fix depth thumbnail
            depth_thumbnail = depth_thumbnail.replace_zeros(fill_depth)
            if kinect2_denoising:
                depth_data = depth_thumbnail.data
                min_depth = np.min(depth_data)
                binary_mask_data = binary_thumbnail.data
                depth_mask_data = depth_thumbnail.mask_binary(binary_thumbnail).data
                depth_mask_data += depth_offset
                depth_data[binary_mask_data > 0] = depth_mask_data[binary_mask_data > 0]
                depth_thumbnail = DepthImage(depth_data, depth_thumbnail.frame)

            # append to detections
            detections.append(RgbdDetection(color_thumbnail,
                                            depth_thumbnail,
                                            query_box,
                                            binary_thumbnail=binary_thumbnail,
                                            contour=contour,
                                            camera_intr=thumbnail_intr))

        return detections
Esempio n. 5
0
    def _segment_color(self, color_im, bounding_box, bgmodel, cfg, vis_segmentation=False):
        """ Re-segments a color image to isolate an object of interest using foreground masking and kmeans """
        # read params
        foreground_mask_tolerance = cfg['foreground_mask_tolerance']
        color_seg_rgb_weight = cfg['color_seg_rgb_weight']
        color_seg_num_clusters = cfg['color_seg_num_clusters']
        color_seg_hsv_weight = cfg['color_seg_hsv_weight']
        color_seg_dist_pctile = cfg['color_seg_dist_pctile']
        color_seg_dist_thresh = cfg['color_seg_dist_thresh']
        color_seg_min_bg_dist = cfg['color_seg_min_bg_dist']
        min_contour_area= cfg['min_contour_area']
        contour_dist_thresh = cfg['contour_dist_thresh']

        # foreground masking
        binary_im = color_im.foreground_mask(foreground_mask_tolerance, bgmodel=bgmodel)
        binary_im = binary_im.prune_contours(area_thresh=min_contour_area, dist_thresh=contour_dist_thresh)
        if binary_im is None:
            return None, None, None

        color_im = color_im.mask_binary(binary_im)

        # kmeans segmentation
        segment_im = color_im.segment_kmeans(color_seg_rgb_weight,
                                             color_seg_num_clusters,
                                             hue_weight=color_seg_hsv_weight)
        
        # keep the segment that is farthest from the background
        bg_dists = []
        hsv_bgmodel = 255 * np.array(colorsys.rgb_to_hsv(float(bgmodel[0]) / 255,
                                                         float(bgmodel[1]) / 255,
                                                         float(bgmodel[2]) / 255))
        hsv_bgmodel = np.r_[color_seg_rgb_weight * np.array(bgmodel), color_seg_hsv_weight * hsv_bgmodel[:1]]

        for k in range(segment_im.num_segments-1):
            seg_mask = segment_im.segment_mask(k)
            color_im_segment = color_im.mask_binary(seg_mask)
            color_im_segment_data = color_im_segment.nonzero_data()
            color_im_segment_data = np.c_[color_seg_rgb_weight * color_im_segment_data, color_seg_hsv_weight * color_im_segment.nonzero_hsv_data()[:,:1]]

            # take the median distance from the background
            bg_dist = np.median(np.linalg.norm(color_im_segment_data - hsv_bgmodel, axis=1))
            if vis_segmentation:
                logging.info('BG Dist for segment %d: %.4f' %(k, bg_dist))
            bg_dists.append(bg_dist)

        # sort by distance
        dists_and_indices = zip(np.arange(len(bg_dists)), bg_dists)
        dists_and_indices.sort(key = lambda x: x[1], reverse=True)
        
        # mask out the segment in the binary image
        if color_seg_num_clusters > 1 and abs(dists_and_indices[0][1] - dists_and_indices[1][1]) > color_seg_dist_thresh and dists_and_indices[1][1] < color_seg_min_bg_dist:
            obj_segment = dists_and_indices[0][0]
            obj_seg_mask = segment_im.segment_mask(obj_segment)
            binary_im = binary_im.mask_binary(obj_seg_mask)
            binary_im, diff_px = binary_im.center_nonzero()
            bounding_box = Box(bounding_box.min_pt.astype(np.float32) - diff_px,
                               bounding_box.max_pt.astype(np.float32) - diff_px,
                               bounding_box.frame)

        if vis_segmentation:
            plt.figure()
            plt.subplot(1,3,1)
            plt.imshow(color_im.data)
            plt.axis('off')
            plt.subplot(1,3,2)
            plt.imshow(segment_im.data)
            plt.colorbar()
            plt.axis('off')
            plt.subplot(1,3,3)
            plt.imshow(binary_im.data, cmap=plt.cm.gray)
            plt.axis('off')
            plt.show()

        return binary_im, segment_im, bounding_box
Esempio n. 6
0
    save_raw = config["save_raw"]
    vis = config["vis"]

    # open gui
    gui = plt.figure(0, figsize=(8, 8))
    plt.ion()
    plt.title("INITIALIZING")
    plt.imshow(np.zeros([100, 100]), cmap=plt.cm.gray_r)
    plt.axis("off")
    plt.draw()
    plt.pause(GUI_PAUSE)

    # read workspace bounds
    workspace_box = Box(
        np.array(workspace_config["min_pt"]),
        np.array(workspace_config["max_pt"]),
        frame="world",
    )

    # read workspace objects
    workspace_objects = {}
    for obj_key, obj_config in workspace_config["objects"].iteritems():
        mesh_filename = obj_config["mesh_filename"]
        pose_filename = obj_config["pose_filename"]
        obj_mesh = trimesh.load_mesh(mesh_filename)
        obj_pose = RigidTransform.load(pose_filename)
        obj_mat_props = MaterialProperties(smooth=True, wireframe=False)
        scene_obj = SceneObject(obj_mesh, obj_pose, obj_mat_props)
        workspace_objects[obj_key] = scene_obj

    # setup each sensor
Esempio n. 7
0
    vis = config['vis']

    # make output dir if needed
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # read rescale factor
    rescale_factor = 1.0
    if 'rescale_factor' in config.keys():
        rescale_factor = config['rescale_factor']

    # read workspace bounds
    workspace = None
    if 'workspace' in config.keys():
        workspace = Box(np.array(config['workspace']['min_pt']),
                        np.array(config['workspace']['max_pt']),
                        frame='world')
        
    # init ros node
    rospy.init_node('capture_test_images') #NOTE: this is required by the camera sensor classes
    Logger.reconfigure_root()

    for sensor_name, sensor_config in config['sensors'].iteritems():
        logger.info('Capturing images from sensor %s' %(sensor_name))
        save_dir = os.path.join(output_dir, sensor_name)
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)        
        
        # read params
        sensor_type = sensor_config['type']
        sensor_frame = sensor_config['frame']
Esempio n. 8
0
    # make output dir if needed
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # read rescale factor
    rescale_factor = 1.0
    if "rescale_factor" in config.keys():
        rescale_factor = config["rescale_factor"]

    # read workspace bounds
    workspace = None
    if "workspace" in config.keys():
        workspace = Box(
            np.array(config["workspace"]["min_pt"]),
            np.array(config["workspace"]["max_pt"]),
            frame="world",
        )

    # init ros node
    rospy.init_node(
        "capture_test_images"
    )  # NOTE: this is required by the camera sensor classes
    Logger.reconfigure_root()

    for sensor_name, sensor_config in config["sensors"].iteritems():
        logger.info("Capturing images from sensor %s" % (sensor_name))
        save_dir = os.path.join(output_dir, sensor_name)
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
Esempio n. 9
0
                     scale=0.001,
                     color=(1, 0, 0),
                     subsample=10)
        vis3d.points(point_cloud_filtered,
                     scale=0.001,
                     color=(0, 0, 1),
                     subsample=10)
        vis3d.show()

    pcl_start = time.time()

    # subsample point cloud
    #rate = int(1.0 / rescale_factor)**2
    #point_cloud_filtered = point_cloud_filtered.subsample(rate, random=False)
    box = Box(np.array([0.2, -0.24, min_height]),
              np.array([0.56, 0.21, max_height]),
              frame='world')
    point_cloud_masked, valid_indices = point_cloud_filtered.box_mask(box)
    invalid_indices = np.setdiff1d(np.arange(point_cloud_filtered.num_points),
                                   valid_indices)

    # apply PCL filters
    pcl_cloud = pcl.PointCloud(point_cloud_masked.data.T.astype(np.float32))
    tree = pcl_cloud.make_kdtree()
    ec = pcl_cloud.make_EuclideanClusterExtraction()
    ec.set_ClusterTolerance(0.005)
    #ec.set_MinClusterSize(1)
    #ec.set_MaxClusterSize(250)
    ec.set_MinClusterSize(250)
    ec.set_MaxClusterSize(1000000)
    ec.set_SearchMethod(tree)
Esempio n. 10
0
        # Create ICP objects
        feature_matcher = PointToPlaneFeatureMatcher()
        solver = PointToPlaneICPSolver()

        # Start physical depth and color cameras
        logging.info('Creating Phoxi sensor')
        sensor_config = {'frame': 'phoxi', 'device_name': 1703005, 'size': 'small'}
        phoxi_sensor = RgbdSensorFactory.sensor('phoxi', sensor_config)
        logging.info('Starting Phoxi sensor')
        phoxi_sensor.start()
        logging.info('Phoxi sensor initialized')

        # Create box for filtering point cloud
        min_pt = np.array([0.25, -0.12, 0])
        max_pt = np.array([0.5, 0.12, 0.25])
        mask_box = Box(min_pt, max_pt, 'world')

        env = GraspingEnv(config, config['vis'])
        dataset = get_dataset(config, args)
        datapoint = dataset.datapoint_template

        obj_id, obj_id_to_key = 0, {}
        while not rospy.is_shutdown():
            try:
                env.reset()
            except NoRemainingSamplesException:
                break
            obj_id_to_key[obj_id] = env.state.obj.key
            print '\n\n\n\n\n\n\n\n\n\nSampled object', env.state.obj.key

            env.state.obj.T_obj_world.translation[0] += .4 # put object in bin, not outside it
Esempio n. 11
0
from sensor_msgs.msg import PointCloud2
from sensor_msgs import point_cloud2
from std_msgs.msg import Header, Int64
import rospy

from autolab_core import Box, PointCloud, RigidTransform
from perception import CameraIntrinsics
from visualization import Visualizer3D as vis3d

CAMERA_INTR_FILENAME = '/nfs/diskstation/calib/primesense_overhead/primesense_overhead.intr'
CAMERA_POSE_FILENAME = '/nfs/diskstation/calib/primesense_overhead/primesense_overhead_to_world.tf'
camera_intr = CameraIntrinsics.load(CAMERA_INTR_FILENAME)
T_camera_world = RigidTransform.load(CAMERA_POSE_FILENAME).as_frames(
    'camera', 'world')
workspace = Box(min_pt=np.array([0.22, -0.22, 0.005]),
                max_pt=np.array([0.56, 0.22, 0.2]),
                frame='world')
pub = None


def cloudCallback(msg):
    global T_camera_world

    # read the cloud
    cloud_array = []
    for p in point_cloud2.read_points(msg):
        cloud_array.append([p[0], p[1], p[2]])
    cloud_array = np.array(cloud_array).T

    # form a point cloud
    point_cloud_camera = PointCloud(cloud_array, frame='camera')