Exemple #1
0
    def test_binary_init(self):
        # valid data
        random_valid_data = (255.0 *
                             np.random.rand(IM_HEIGHT, IM_WIDTH)).astype(
                                 np.uint8)
        binary_data = 255 * (random_valid_data > 128)
        im = BinaryImage(random_valid_data)
        self.assertEqual(im.height, IM_HEIGHT)
        self.assertEqual(im.width, IM_WIDTH)
        self.assertEqual(im.channels, 1)
        self.assertTrue(np.allclose(im.data, binary_data))

        # invalid channels
        random_data = np.random.rand(IM_HEIGHT, IM_WIDTH, 3).astype(np.uint8)
        caught_bad_channels = False
        try:
            im = BinaryImage(random_data)
        except:
            caught_bad_channels = True
        self.assertTrue(caught_bad_channels)

        # invalid type
        random_data = np.random.rand(IM_HEIGHT, IM_WIDTH).astype(np.float32)
        caught_bad_dtype = False
        try:
            im = BinaryImage(random_data)
        except:
            caught_bad_dtype = True
        self.assertTrue(caught_bad_dtype)
Exemple #2
0
def register_example(reg_example, aligner, model):
    ses = reg_example.salient_edge_set

    # Pre-process mesh
    mesh = ses.mesh
    edge_mask = ses.edge_mask
    edge_pc_obj = PointCloud(generate_canonical_pc(mesh, edge_mask).T,
                             frame='obj')

    # Process Depth Image
    ci = reg_example.camera_intrs
    depth_im = reg_example.depth_im
    point_cloud_im = ci.deproject_to_image(depth_im)
    normal_cloud_im = point_cloud_im.normal_cloud_im()
    joined = np.dstack((depth_im.data, normal_cloud_im.data))
    mask = model.predict(joined[np.newaxis, :, :, :])[0]
    mask *= 255.0
    mask = BinaryImage(mask.astype(np.uint8))

    depth_im_edges = depth_im.mask_binary(mask)
    edge_pc_cam = ci.deproject(depth_im_edges)
    edge_pc_cam.remove_zero_points()

    # Align the two point sets with Super4PCS
    T_obj_camera_est = aligner.align(edge_pc_cam, edge_pc_obj)

    return T_obj_camera_est
    def _action_callback(self, action):
        # Update image with binary mask
        binary_data = np.frombuffer(action.mask_data, dtype=np.uint8).reshape(action.height, action.width)
        binary_image = BinaryImage(binary_data)
        mask = binary_image.nonzero_pixels()
        self._current_image.data[mask[:,0], mask[:,1], :] = (0.3 * self._current_image.data[mask[:,0], mask[:,1], :] +
                                                             0.7 * np.array([200, 30, 30], dtype=np.uint8))

        # Paint the appropriate action type over the current image
        action_type = action.action_type
        action_data = action.action_data

        if action_type == 'parallel_jaw':
            location = np.array([action_data[0], action_data[1]], dtype=np.uint32)
            axis = np.array([action_data[2], action_data[3]])
            if axis[0] != 0:
                angle = np.rad2deg(np.arctan((axis[1] / axis[0])))
            else:
                angle = 90
            jaw_image = ColorImage(imutils.rotate_bound(self._jaw_image.data, angle))
            self._current_image = self._overlay_image(self._current_image, jaw_image, location)
        elif action_type == 'suction':
            location = np.array([action_data[0], action_data[1]], dtype=np.uint32)
            self._current_image = self._overlay_image(self._current_image, self._suction_image, location)
        elif action_type == 'push':
            start = np.array([action_data[0], action_data[1]], dtype=np.int32)
            end = np.array([action_data[2], action_data[3]], dtype=np.int32)
            axis = (end - start).astype(np.float32)
            axis_len = np.linalg.norm(axis)
            if axis_len == 0:
                axis = np.array([0.0, 1.0])
                axis_len = 1.0
            axis = axis / axis_len

            if axis[0] != 0:
                angle = np.rad2deg(np.arctan2(axis[1], axis[0]))
            else:
                angle = (90.0 if axis[1] > 0 else -90.0)
            start_image = ColorImage(imutils.rotate_bound(self._push_image.data, angle))
            end_image = ColorImage(imutils.rotate_bound(self._push_end_image.data, angle))

            start = np.array(start - axis * 0.5 * self._push_image.width, dtype=np.int32)
            end = np.array(end + axis * 0.5 * self._push_end_image.width, dtype=np.int32)

            self._current_image = self._overlay_image(self._current_image, start_image, start)
            self._current_image = self._overlay_image(self._current_image, end_image, end)

        # Update display
        self.image_signal.emit()
        self.conf_signal.emit(action.confidence)
Exemple #4
0
 def rendered_images(data, render_mode=RenderMode.SEGMASK):
     rendered_images = []
     num_images = data.attrs[NUM_IMAGES_KEY]
     
     for i in range(num_images):
         # get the image data y'all
         image_key = IMAGE_KEY + '_' + str(i)
         image_data = data[image_key]
         image_arr = np.array(image_data[IMAGE_DATA_KEY])
         frame = image_data.attrs[IMAGE_FRAME_KEY]
         if render_mode == RenderMode.SEGMASK:
             image = BinaryImage(image_arr, frame)
         elif render_mode == RenderMode.DEPTH:
             image = DepthImage(image_arr, frame)
         elif render_mode == RenderMode.SCALED_DEPTH:
             image = ColorImage(image_arr, frame)
         R_camera_table =  image_data.attrs[CAM_ROT_KEY]
         t_camera_table =  image_data.attrs[CAM_POS_KEY]
         frame          =  image_data.attrs[CAM_FRAME_KEY]
         T_camera_world = RigidTransform(R_camera_table, t_camera_table,
                                         from_frame=frame,
                                         to_frame='world')
         
         rendered_images.append(ObjectRender(image, T_camera_world))
     return rendered_images
Exemple #5
0
    def plan_grasp_segmask(self, req):
        """Grasp planner request handler.

        Parameters
        ---------
        req: :obj:`ROS ServiceRequest`
            ROS `ServiceRequest` for grasp planner service.
        """
        color_im, depth_im, camera_intr = self.read_images(req)
        raw_segmask = req.segmask
        try:
            segmask = BinaryImage(self.cv_bridge.imgmsg_to_cv2(
                raw_segmask, desired_encoding="passthrough"),
                                  frame=camera_intr.frame)
        except CvBridgeError as cv_bridge_exception:
            rospy.logerr(cv_bridge_exception)
        if color_im.height != segmask.height or \
           color_im.width != segmask.width:
            msg = ("Images and segmask must be the same shape! Color image is"
                   " %d x %d but segmask is %d x %d") % (
                       color_im.height, color_im.width, segmask.height,
                       segmask.width)
            rospy.logerr(msg)
            raise rospy.ServiceException(msg)

        return self._plan_grasp(color_im,
                                depth_im,
                                camera_intr,
                                segmask=segmask)
Exemple #6
0
 def load(save_dir):
     if not os.path.exists(save_dir):
         raise ValueError('Directory %s does not exist!' % (save_dir))
     color_image_filename = os.path.join(save_dir, 'color.png')
     depth_image_filename = os.path.join(save_dir, 'depth.npy')
     camera_intr_filename = os.path.join(save_dir, 'camera.intr')
     segmask_filename = os.path.join(save_dir, 'segmask.npy')
     obj_segmask_filename = os.path.join(save_dir, 'obj_segmask.npy')
     state_filename = os.path.join(save_dir, 'state.pkl')
     camera_intr = CameraIntrinsics.load(camera_intr_filename)
     color = ColorImage.open(color_image_filename, frame=camera_intr.frame)
     depth = DepthImage.open(depth_image_filename, frame=camera_intr.frame)
     segmask = None
     if os.path.exists(segmask_filename):
         segmask = BinaryImage.open(segmask_filename,
                                    frame=camera_intr.frame)
     obj_segmask = None
     if os.path.exists(obj_segmask_filename):
         obj_segmask = SegmentationImage.open(obj_segmask_filename,
                                              frame=camera_intr.frame)
     fully_observed = None
     if os.path.exists(state_filename):
         fully_observed = pkl.load(open(state_filename, 'rb'))
     return RgbdImageState(RgbdImage.from_color_and_depth(color, depth),
                           camera_intr,
                           segmask=segmask,
                           obj_segmask=obj_segmask,
                           fully_observed=fully_observed)
    def get_state(self, depth, segmask):
        # Read images.
        depth_im = DepthImage(depth, frame=self.camera_intr.frame)
        color_im = ColorImage(np.zeros([depth_im.height, depth_im.width,
                                        3]).astype(np.uint8),
                              frame=self.camera_intr.frame)
        segmask = BinaryImage(segmask.astype(np.uint8) * 255,
                              frame=self.camera_intr.frame)

        # Inpaint.
        depth_im = depth_im.inpaint(rescale_factor=self.inpaint_rescale_factor)

        # Create state.
        rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
        state = RgbdImageState(rgbd_im, self.camera_intr, segmask=segmask)
        return state, rgbd_im
Exemple #8
0
def bbox_to_mask(bbox, c_img):
    loX, loY, hiX, hiY = bbox
    bin_img = np.zeros(c_img.shape[0:2])
    #don't include workspace points
    for x in range(loX, hiX):
        for y in range(loY, hiY):
            r, g, b = c_img[y][x]
            if r < 240 or g < 240 or b < 240:
                bin_img[y][x] = 255

    return BinaryImage(bin_img.astype(np.uint8))
    def plan_grasp_segmask_handler(self, req):
        """Grasp planner request handler.

        Parameters
        ---------
        req: :obj:`ROS ServiceRequest`
            ROS `ServiceRequest` for grasp planner service.
        """

        camera_data = self.read_images(req)
        raw_segmask = req.segmask

        # create segmentation mask
        try:
            camera_data.seg_img = BinaryImage(
                self.cv_bridge.imgmsg_to_cv2(raw_segmask,
                                             desired_encoding="passthrough"),
                frame=camera_data.intrinsic_params.frame)

        except CvBridgeError as cv_bridge_exception:
            rospy.logerr(cv_bridge_exception)

        if camera_data.rgb_img.height != camera_data.seg_img.height or \
           camera_data.rgb_img.width != camera_data.seg_img.width:

            msg = ("Images and segmask must be the same shape! Color image is"
                   " %d x %d but segmask is %d x %d") % (
                       camera_data.rgb_img.height, camera_data.rgb_img.width,
                       camera_data.seg_img.height, camera_data.seg_img.width)

            rospy.logerr(msg)
            raise rospy.ServiceException(msg)

        self.grasp_poses = []
        ok = self.plan_grasp(camera_data, n_candidates=1)

        if ok:
            return self._create_grasp_planner_srv_msg()
        else:
            return None
Exemple #10
0
def run_gqcnn(depth,seg_mask):
	best_angle = 0;
	best_point = [0,0];
	best_dist = 0;
	depth_im =DepthImage(depth.astype("float32"), frame=camera_intr.frame)
	color_im = ColorImage(np.zeros([imageWidth, imageHeight,3]).astype(np.uint8),
                          frame=camera_intr.frame)
	print(seg_mask)
	segmask = BinaryImage(seg_mask)
	print(segmask)
	rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
	state_gqcnn = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) 
	policy_start = time.time()
	try:
		action = policy(state_gqcnn)
		logger.info("Planning took %.3f sec" % (time.time() - policy_start))
		best_point = [action.grasp.center[0],action.grasp.center[1]];
		best_point = [action.grasp.center[0],action.grasp.center[1]];
		best_angle = float(action.grasp.angle)*180/3.141592
	except Exception as inst:
		print(inst)

	return best_angle,best_point,best_dist
Exemple #11
0
def run_gqcnn(depth, seg_mask):
    best_angle = 0
    best_point = [0, 0]
    best_dist = 0
    depth_im = DepthImage(depth.astype("float32"), frame=camera_intr.frame)
    color_im = ColorImage(np.zeros([imageWidth, imageHeight,
                                    3]).astype(np.uint8),
                          frame=camera_intr.frame)
    segmask = BinaryImage(seg_mask)
    rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
    state_gqcnn = RgbdImageState(rgbd_im, camera_intr, segmask=segmask)
    policy_start = time.time()
    q_value = -1
    try:
        action = policy(state_gqcnn)
        best_point = [action.grasp.center[0], action.grasp.center[1]]
        best_angle = float(action.grasp.angle) * 180 / 3.141592
        q_value = action.q_value
        print("inner :       ", action.q_value)

    except Exception as inst:
        print(inst)

    return q_value
        'gqcnn', config['metric'])

    for ind in range(num_image_tensors):
        print("File: " + str(ind))
        image_arr = np.load(image_tensors[ind])['arr_0']
        binary_arr = np.load(binary_image_tensors[ind])['arr_0']
        start_ind_arr = np.load(start_ind_tensors[ind])['arr_0']
        end_ind_arr = np.load(end_ind_tensors[ind])['arr_0']

        datapoints_in_file = min(len(start_ind_arr), DATAPOINTS_PER_FILE)
        for i in range(datapoints_in_file):
            start_ind = start_ind_arr[i]
            end_ind = end_ind_arr[i]

            # depth_im = DepthImage(image_arr[i,...])
            binary_im = BinaryImage(binary_arr[i, ...])
            grasps, metrics = load_grasps_for_image(start_ind, end_ind,
                                                    grasp_tensors,
                                                    metric_tensors)

            # # read pixel offsets
            # cx = depth_im.center[1]
            # cy = depth_im.center[0]

            # for g, grasp in enumerate(grasps):
            #     current_index = start_ind + g
            #     grasp_x = grasp[1]
            #     grasp_y = grasp[0]
            #     grasp_angle = grasp[3]

            #     # center images on the grasp, rotate to image x axis
    # Read images.
    depth_data = np.load(depth_im_filename)
    depth_data = depth_data.astype(np.float32) / 1000.0
    depth_im = DepthImage(depth_data, frame=camera_intr.frame)
    color_im = ColorImage(np.zeros([depth_im.height, depth_im.width,
                                    3]).astype(np.uint8),
                          frame=camera_intr.frame)

    # Optionally read a segmask.
    mask = np.zeros((camera_intr.height, camera_intr.width, 1), dtype=np.uint8)
    c = np.array([165, 460, 500, 135])
    r = np.array([165, 165, 370, 370])
    rr, cc = skimage.draw.polygon(r, c, shape=mask.shape)
    mask[rr, cc, 0] = 255
    segmask = BinaryImage(mask)
    if segmask_filename is not None:
        segmask = BinaryImage.open(segmask_filename)
    valid_px_mask = depth_im.invalid_pixel_mask().inverse()
    if segmask is None:
        segmask = valid_px_mask
    else:
        segmask = segmask.mask_binary(valid_px_mask)

    # Create new cloud.
    point_cloud = camera_intr.deproject(depth_im)
    point_cloud.remove_zero_points()
    pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32))
    tree = pcl_cloud.make_kdtree()

    # Find large clusters (likely to be real objects instead of noise).
Exemple #14
0
def run_experiment():
    """ Run the experiment """
    # get the images from the sensor
    previous_grasp = None
    while True:
        rospy.loginfo("Waiting for images")
        start_time = time.time()
        raw_color = rospy.wait_for_message("/camera/rgb/image_color", Image)
        raw_depth = rospy.wait_for_message("/camera/depth_registered/image",
                                           Image)
        image_load_time = time.time() - start_time
        rospy.loginfo('Images loaded in: ' + str(image_load_time) + ' secs.')

        ### Create wrapped Perception RGB and Depth Images by unpacking the ROS Images using CVBridge ###
        try:
            color_image = perception.ColorImage(
                cv_bridge.imgmsg_to_cv2(raw_color, "rgb8"),
                frame=T_camera_world.from_frame)
            depth_image = perception.DepthImage(
                cv_bridge.imgmsg_to_cv2(raw_depth,
                                        desired_encoding="passthrough"),
                frame=T_camera_world.from_frame)
        except CvBridgeError as cv_bridge_exception:
            rospy.logerr(cv_bridge_exception)

        # inpaint to remove holes
        inpainted_color_image = color_image.inpaint(
            rescale_factor=INPAINT_RESCALE_FACTOR)
        inpainted_depth_image = depth_image.inpaint(
            rescale_factor=INPAINT_RESCALE_FACTOR)

        if DETECT_OBJECTS:
            detector = RgbdDetectorFactory.detector('point_cloud_box')
            detections = detector.detect(inpainted_color_image,
                                         inpainted_depth_image,
                                         detector_cfg,
                                         camera_intrinsics,
                                         T_camera_world,
                                         vis_foreground=False,
                                         vis_segmentation=False)

        detected_obj = None
        if previous_grasp is not None:
            position = previous_grasp.pose.position
            position = np.array([position.x, position.y, position.z])
            center_point = Point(position, camera_intrinsics.frame)
            center_pixel = camera_intrinsics.project(center_point,
                                                     camera_intrinsics.frame)
            i, j = center_pixel.y, center_pixel.x
            if DETECT_OBJECTS:
                for detection in detections:
                    binaryIm = detection.binary_im
                    if binaryIm[i, j]:
                        segmask = binaryIm
                        detected_obj = detection
                        break
            else:
                # Generate an ellipse inverse segmask centered on previous grasp
                y, x = np.ogrid[-i:IMAGE_HEIGHT - i, -j:IMAGE_WIDTH - j]
                circlemask = x * x + y * y <= CIRCLE_RAD * CIRCLE_RAD
                segmask_data = np.ones(
                    (IMAGE_HEIGHT, IMAGE_WIDTH), dtype=np.uint8) * 255
                segmask_data[circlemask] = 0
                segmask = BinaryImage(segmask_data, camera_intrinsics.frame)
        else:
            segmask = BinaryImage(
                np.ones((IMAGE_HEIGHT, IMAGE_WIDTH), dtype=np.uint8) * 255,
                camera_intrinsics.frame)
        segmask._encoding = 'mono8'

        if VISUALIZE_DETECTOR_OUTPUT:
            vis.figure()
            vis.subplot(1, 2, 1)
            vis.imshow(detected_obj.color_thumbnail)
            vis.subplot(1, 2, 2)
            vis.imshow(detected_obj.depth_thumbnail)
            vis.show()

        try:
            rospy.loginfo('Planning Grasp')
            start_time = time.time()
            planned_grasp_data = plan_grasp(inpainted_color_image.rosmsg,
                                            inpainted_depth_image.rosmsg,
                                            segmask.rosmsg, raw_camera_info,
                                            boundingBox)
            grasp_plan_time = time.time() - start_time
            rospy.loginfo('Total grasp planning time: ' +
                          str(grasp_plan_time) + ' secs.')

            rospy.loginfo('Queueing Grasp')
            previous_grasp = planned_grasp_data.grasp
            execute_grasp(previous_grasp)
            # raw_input("Press ENTER to resume")
        except rospy.ServiceException as e:
            rospy.logerr(e)
            previous_grasp = None
            raw_input("Press ENTER to resume")
Exemple #15
0
    def visualize(self):
        """ Visualize predictions """

        logging.info('Visualizing ' + self.datapoint_type)

        # iterate through shuffled file indices
        for i in self.indices:
            im_filename = self.im_filenames[i]
            pose_filename = self.pose_filenames[i]
            label_filename = self.label_filenames[i]

            logging.info('Loading Image File: ' + im_filename +
                         ' Pose File: ' + pose_filename + ' Label File: ' +
                         label_filename)

            # load tensors from files
            metric_tensor = np.load(os.path.join(self.data_dir,
                                                 label_filename))['arr_0']
            label_tensor = 1 * (metric_tensor > self.metric_thresh)
            image_tensor = np.load(os.path.join(self.data_dir,
                                                im_filename))['arr_0']
            hand_poses_tensor = np.load(
                os.path.join(self.data_dir, pose_filename))['arr_0']

            pose_tensor = self._read_pose_data(hand_poses_tensor,
                                               self.input_data_mode)

            # score with neural network
            pred_p_success_tensor = self._gqcnn.predict(
                image_tensor, pose_tensor)

            # compute results
            classification_result = ClassificationResult(
                [pred_p_success_tensor], [label_tensor])

            logging.info('Error rate on files: %.3f' %
                         (classification_result.error_rate))
            logging.info('Precision on files: %.3f' %
                         (classification_result.precision))
            logging.info('Recall on files: %.3f' %
                         (classification_result.recall))
            mispred_ind = classification_result.mispredicted_indices()
            correct_ind = classification_result.correct_indices()
            # IPython.embed()

            if self.datapoint_type == 'true_positive' or self.datapoint_type == 'true_negative':
                vis_ind = correct_ind
            else:
                vis_ind = mispred_ind
            num_visualized = 0
            # visualize
            for ind in vis_ind:
                # limit the number of sampled datapoints displayed per object
                if num_visualized >= self.samples_per_object:
                    break
                num_visualized += 1

                # don't visualize the datapoints that we don't want
                if self.datapoint_type == 'true_positive':
                    if classification_result.labels[ind] == 0:
                        continue
                elif self.datapoint_type == 'true_negative':
                    if classification_result.labels[ind] == 1:
                        continue
                elif self.datapoint_type == 'false_positive':
                    if classification_result.labels[ind] == 0:
                        continue
                elif self.datapoint_type == 'false_negative':
                    if classification_result.labels[ind] == 1:
                        continue

                logging.info('Datapoint %d of files for %s' %
                             (ind, im_filename))
                logging.info('Depth: %.3f' % (hand_poses_tensor[ind, 2]))

                data = image_tensor[ind, ...]
                if self.display_image_type == RenderMode.SEGMASK:
                    image = BinaryImage(data)
                elif self.display_image_type == RenderMode.GRAYSCALE:
                    image = GrayscaleImage(data)
                elif self.display_image_type == RenderMode.COLOR:
                    image = ColorImage(data)
                elif self.display_image_type == RenderMode.DEPTH:
                    image = DepthImage(data)
                elif self.display_image_type == RenderMode.RGBD:
                    image = RgbdImage(data)
                elif self.display_image_type == RenderMode.GD:
                    image = GdImage(data)

                vis2d.figure()

                if self.display_image_type == RenderMode.RGBD:
                    vis2d.subplot(1, 2, 1)
                    vis2d.imshow(image.color)
                    grasp = Grasp2D(Point(image.center,
                                          'img'), 0, hand_poses_tensor[ind, 2],
                                    self.gripper_width_m)
                    grasp.camera_intr = grasp.camera_intr.resize(1.0 / 3.0)
                    vis2d.grasp(grasp)
                    vis2d.subplot(1, 2, 2)
                    vis2d.imshow(image.depth)
                    vis2d.grasp(grasp)
                elif self.display_image_type == RenderMode.GD:
                    vis2d.subplot(1, 2, 1)
                    vis2d.imshow(image.gray)
                    grasp = Grasp2D(Point(image.center,
                                          'img'), 0, hand_poses_tensor[ind, 2],
                                    self.gripper_width_m)
                    grasp.camera_intr = grasp.camera_intr.resize(1.0 / 3.0)
                    vis2d.grasp(grasp)
                    vis2d.subplot(1, 2, 2)
                    vis2d.imshow(image.depth)
                    vis2d.grasp(grasp)
                else:
                    vis2d.imshow(image)
                    grasp = Grasp2D(Point(image.center,
                                          'img'), 0, hand_poses_tensor[ind, 2],
                                    self.gripper_width_m)
                    grasp.camera_intr = grasp.camera_intr.resize(1.0 / 3.0)
                    vis2d.grasp(grasp)
                vis2d.title('Datapoint %d: Pred: %.3f Label: %.3f' %
                            (ind, classification_result.pred_probs[ind, 1],
                             classification_result.labels[ind]))
                vis2d.show()

        # cleanup
        self._cleanup()
    def _plan_grasp(self,
                    color_im,
                    depth_im,
                    camera_intr,
                    bounding_box=None,
                    segmask=None):
        """ Grasp planner request handler .
        
        Parameters
        ---------
        req: :obj:`ROS ServiceRequest`
            ROS ServiceRequest for grasp planner service
        """
        rospy.loginfo('Planning Grasp')

        # inpaint images
        color_im = color_im.inpaint(
            rescale_factor=self.cfg['inpaint_rescale_factor'])
        depth_im = depth_im.inpaint(
            rescale_factor=self.cfg['inpaint_rescale_factor'])

        # init segmask
        if segmask is None:
            segmask = BinaryImage(255 *
                                  np.ones(depth_im.shape).astype(np.uint8),
                                  frame=color_im.frame)

        # visualize
        if self.cfg['vis']['color_image']:
            vis.imshow(color_im)
            vis.show()
        if self.cfg['vis']['depth_image']:
            vis.imshow(depth_im)
            vis.show()
        if self.cfg['vis']['segmask'] and segmask is not None:
            vis.imshow(segmask)
            vis.show()

        # aggregate color and depth images into a single perception rgbdimage
        rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)

        # mask bounding box
        if bounding_box is not None:
            # calc bb parameters
            min_x = bounding_box.minX
            min_y = bounding_box.minY
            max_x = bounding_box.maxX
            max_y = bounding_box.maxY

            # contain box to image->don't let it exceed image height/width bounds
            if min_x < 0:
                min_x = 0
            if min_y < 0:
                min_y = 0
            if max_x > rgbd_im.width:
                max_x = rgbd_im.width
            if max_y > rgbd_im.height:
                max_y = rgbd_im.height

            # mask
            bb_segmask_arr = np.zeros([rgbd_image.height, rgbd_image.width])
            bb_segmask_arr[min_y:max_y, min_x:max_x] = 255
            bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8),
                                     segmask.frame)
            segmask = segmask.mask_binary(bb_segmask)

        # visualize
        if self.cfg['vis']['rgbd_state']:
            masked_rgbd_im = rgbd_im.mask_binary(segmask)
            vis.figure()
            vis.subplot(1, 2, 1)
            vis.imshow(masked_rgbd_im.color)
            vis.subplot(1, 2, 2)
            vis.imshow(masked_rgbd_im.depth)
            vis.show()

        # create an RGBDImageState with the cropped RGBDImage and CameraIntrinsics
        rgbd_state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask)

        # execute policy
        try:
            return self.execute_policy(rgbd_state, self.grasping_policy,
                                       self.grasp_pose_publisher,
                                       camera_intr.frame)
        except NoValidGraspsException:
            rospy.logerr(
                'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!'
            )
            raise rospy.ServiceException(
                'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!'
            )
Exemple #17
0
    def plan_grasp(self, camera_data, n_candidates=1):
        """Grasp planner.

        Parameters
        ---------
        req: :obj:`ROS ServiceRequest`
            ROS `ServiceRequest` for grasp planner service.
        """

        self._camera_data = camera_data

        # --- Inpaint images --- #
        color_im = camera_data.rgb_img.inpaint(
            rescale_factor=self.cfg["inpaint_rescale_factor"])

        depth_im = camera_data.depth_img.inpaint(
            rescale_factor=self.cfg["inpaint_rescale_factor"])

        # --- Init segmask --- #
        if camera_data.seg_img is None:
            segmask = BinaryImage(255 *
                                  np.ones(depth_im.shape).astype(np.uint8),
                                  frame=color_im.frame)
        else:
            segmask = camera_data.seg_mask


        # --- Aggregate color and depth images into a single
        # BerkeleyAutomation/perception `RgbdImage` --- #
        rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)

        # --- Mask bounding box --- #
        if camera_data.bounding_box is not None:
            # Calc bb parameters.
            min_x = camera_data.bounding_box['min_x']
            min_y = camera_data.bounding_box['min_y']
            max_x = camera_data.bounding_box['max_x']
            max_y = camera_data.bounding_box['max_y']

            # Contain box to image->don't let it exceed image height/width
            # bounds.
            if min_x < 0:
                min_x = 0
            if min_y < 0:
                min_y = 0
            if max_x > rgbd_im.width:
                max_x = rgbd_im.width
            if max_y > rgbd_im.height:
                max_y = rgbd_im.height

            # Mask.
            bb_segmask_arr = np.zeros([rgbd_im.height, rgbd_im.width])
            bb_segmask_arr[min_y:max_y, min_x:max_x] = 255
            bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8),
                                     segmask.frame)
            segmask = segmask.mask_binary(bb_segmask)

        # --- Create an `RgbdImageState` with the cropped `RgbdImage` and `CameraIntrinsics` --- #
        rgbd_state = RgbdImageState(rgbd_im, camera_data.intrinsic_params, segmask=segmask)

        # --- Execute policy --- #
        try:
            grasps_and_predictions = self.execute_policy(rgbd_state, self.grasping_policy,
                                                     camera_data.intrinsic_params.frame,
                                                     n_candidates)

            self._dexnet_gp = grasps_and_predictions

            # --- project planar grasps to 3D space --- #
            l = []
            for gp in grasps_and_predictions:

                # my method
                pose_6d = self.transform_grasp_to_6D(gp[0], camera_data.intrinsic_params)
                pos = pose_6d[:3,3]
                rot = pose_6d[:3, :3]
                grasp_6D = Grasp6D(position=pos, rotation=rot,
                                   width=gp[0].width, score= gp[1],
                                   ref_frame=camera_data.intrinsic_params.frame)

                l.append(grasp_6D)

                # dexnet method --> needs autolab_core installed as catkin package
                # 6D_gp = gp[0].pose()

            self.grasp_poses = l
            self.best_grasp = l[0]

            self.visualize()

            return True

        except NoValidGraspsException:
            warnings.warn(("While executing policy found no valid grasps from sampled antipodal point pairs!"))

            return False
Exemple #18
0
    def create_camera_data(self, rgb: np.ndarray, depth: np.ndarray,
                           cam_intr_frame: str, fx: float, fy: float,
                           cx: float, cy: float, skew: float, w: int, h: int,
                           seg_mask: np.ndarray = np.empty(shape=(0,)), bbox: tuple = ()):

        """Create the CameraData object in the correct format expected by gqcnn

        Parameters
        ---------
        req: rgb: np.ndarray: rgb image
             depth: np.ndarray: depth image
             cam_intr_frame: str: the reference frame of the images. Grasp poses are computed wrt this frame
             fx: float: focal length (x)
             fy: float: focal length (y)
             cx: float: principal point (x)
             cy: float: principal point (y)
             skew: float: skew coefficient
             w: int: width
             h: int: height
        opt: seg_mask: np.ndarray: segmentation mask
             bbox: tuple: a tuple of 4 values that define the mask bounding box = (x_min, y_min, x_max, y_max)

        Returns:
            CameraData: object that stores the input data required by plan_grasp()
        """

        camera_data = CameraData()

        # Create images
        camera_data.rgb_img = ColorImage(rgb, frame=cam_intr_frame)
        camera_data.depth_img = DepthImage(depth, frame=cam_intr_frame)

        if seg_mask.size > 0:
            camera_data.seg_img = BinaryImage(seg_mask, cam_intr_frame)

        # Check image sizes
        if camera_data.rgb_img.height != camera_data.depth_img.height or \
           camera_data.rgb_img.width != camera_data.depth_img.width:

            msg = ("Color image and depth image must be the same shape! Color"
                   " is %d x %d but depth is %d x %d") % (
                       camera_data.rgb_img.height, camera_data.rgb_img.width,
                       camera_data.depth_img.height, camera_data.depth_img.width)

            raise AssertionError(msg)

        if (camera_data.rgb_img.height < self.min_height
                or camera_data.rgb_img.width < self.min_width):

            msg = ("Color image is too small! Must be at least %d x %d"
                   " resolution but the requested image is only %d x %d") % (
                       self.min_height, self.min_width,
                       camera_data.rgb_img.height, camera_data.rgb_img.width)

            raise AssertionError(msg)

        if camera_data.rgb_img.height != camera_data.seg_img.height or \
            camera_data.rgb_img.width != camera_data.seg_img.width:

            msg = ("Images and segmask must be the same shape! Color image is"
                " %d x %d but segmask is %d x %d") % (
                    camera_data.rgb_img.height, camera_data.rgb_img.width,
                    camera_data.seg_img.height, camera_data.seg_img.width)

            raise AssertionError(msg)

        # set intrinsic params
        camera_data.intrinsic_params = CameraIntrinsics(cam_intr_frame, fx, fy, cx, cy, skew, h, w)

        # set mask bounding box
        if len(bbox) == 4:
            camera_data.bounding_box = {'min_x': bbox[0], 'min_y': bbox[1], 'max_x': bbox[2], 'max_y': bbox[3]}

        return camera_data
def detect_img():
    global color_img
    global mask

    global depth_im
    global color_im
    global segmask
    global center
    global angle
    global best_angle
    global prev_q_value
    global best_center
    global depth_frame
    global frames

    def nothing(x):
        pass

    mask = np.ones((640, 480), dtype=np.uint8) * 255
    cv2.namedWindow("rgb", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("rgb", 1280, 720)
    cv2.createTrackbar('W', "rgb", 0, 100, nothing)
    cv2.namedWindow("mask", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("mask", 640, 480)
    cv2.namedWindow("depth", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("depth", 1000, 1000)
    line_arr1 = [0, 1, 3, 2, 0, 4, 6, 2]
    line_arr2 = [5, 4, 6, 7, 5, 1, 3, 7]

    divide_value = 65535
    pipeline.start(config)
    frames = pipeline.wait_for_frames()
    while 1:
        try:
            frames = pipeline.wait_for_frames()
            w = cv2.getTrackbarPos('W', "rgb")
            depth_frame = frames.get_depth_frame()
            color_frame = frames.get_color_frame()

            if not depth_frame or not color_frame:
                continue
            depth_to_color_img = np.asanyarray(
                depth_frame.get_data()) / divide_value
            color_img = np.asanyarray(color_frame.get_data())
            #print(depth_to_color_img)
            depth_im = DepthImage(depth_to_color_img.astype("float32"),
                                  frame=camera_intr.frame)
            color_im = ColorImage(np.zeros([480, 640, 3]).astype(np.uint8),
                                  frame=camera_intr.frame)
            seg_mask = cv2.resize(mask.copy(), (640, 480),
                                  interpolation=cv2.INTER_CUBIC)
            segmask = BinaryImage(seg_mask)
            show_img = cv2.resize(color_img.copy(), (640, 480),
                                  interpolation=cv2.INTER_CUBIC)
            try:
                ret, img_binary = cv2.threshold(mask, 127, 255, 0)
                contours, color_hierachy = cv2.findContours(
                    img_binary.copy(), cv2.RETR_EXTERNAL,
                    cv2.CHAIN_APPROX_SIMPLE)
                c0 = np.reshape(contours[0], (-1, 2))
                show_img = cv2.drawContours(np.uint8(show_img), contours, -1,
                                            (0, 255, 0), 1)
            except:
                pass
            show_img_resize = cv2.resize(show_img.copy(), (640, 480),
                                         interpolation=cv2.INTER_CUBIC)
            try:
                r = 50
                show_img_resize = cv2.line(
                    show_img_resize,
                    (int(best_center[0] -
                         r * math.cos(best_angle / 180 * math.pi)),
                     int(best_center[1] -
                         r * math.sin(best_angle / 180 * math.pi))),
                    (int(best_center[0] +
                         r * math.cos(best_angle / 180 * math.pi)),
                     int(best_center[1] +
                         r * math.sin(best_angle / 180 * math.pi))),
                    (0, 0, 255), 5)
                show_img_resize = cv2.line(
                    show_img_resize,
                    (int(best_center[0]), int(best_center[1])),
                    (int(best_center[0] + 1), int(best_center[1])),
                    (0, 136, 255), 7)
                show_img_resize = cv2.line(
                    show_img_resize,
                    (int(center[0] - r * math.cos(angle / 180 * math.pi)),
                     int(center[1] - r * math.sin(angle / 180 * math.pi))),
                    (int(center[0] + r * math.cos(angle / 180 * math.pi)),
                     int(center[1] + r * math.sin(angle / 180 * math.pi))),
                    (255, 100, 255), 5)
                show_img_resize = cv2.line(
                    show_img_resize, (int(center[0]), int(center[1])),
                    (int(center[0] + 1), int(center[1])), (100, 136, 255), 7)

            except:
                pass
            depth_colormap = cv2.applyColorMap(
                cv2.convertScaleAbs(depth_to_color_img * 1000, alpha=2), 1)
            cv2.imshow("rgb", show_img_resize)
            cv2.imshow("depth", depth_colormap)
            cv2.imshow("mask", seg_mask)
            k = cv2.waitKey(5) & 0xFF
            if k == ord('s'):
                cv2.destroyWindow("rgb")
        except Exception as inst:
            print(inst)
            pass
    end()
Exemple #20
0
def detect(detector_type, config, run_dir, test_config):
    """Run PCL-based detection on a depth-image-based dataset.

    Parameters
    ----------
    config : dict
        config for a PCL detector
    run_dir : str
        Directory to save outputs in. Output will be saved in pred_masks, pred_info,
        and modal_segmasks_processed subdirectories.
    test_config : dict
        config containing dataset information
    """

    ##################################################################
    # Set up output directories
    ##################################################################

    # Create subdirectory for prediction masks
    pred_dir = os.path.join(run_dir, 'pred_masks')
    mkdir_if_missing(pred_dir)

    # Create subdirectory for prediction scores & bboxes
    pred_info_dir = os.path.join(run_dir, 'pred_info')
    mkdir_if_missing(pred_info_dir)

    # Create subdirectory for transformed GT segmasks
    resized_segmask_dir = os.path.join(run_dir, 'modal_segmasks_processed')
    mkdir_if_missing(resized_segmask_dir)

    ##################################################################
    # Set up input directories
    ##################################################################

    dataset_dir = test_config['path']
    indices_arr = np.load(os.path.join(dataset_dir, test_config['indices']))

    # Input depth image data (numpy files, not .pngs)
    depth_dir = os.path.join(dataset_dir, test_config['images'])

    # Input GT binary masks dir
    gt_mask_dir = os.path.join(dataset_dir, test_config['masks'])

    # Input binary mask data
    if 'bin_masks' in test_config.keys():
        bin_mask_dir = os.path.join(dataset_dir, test_config['bin_masks'])

    # Input camera intrinsics
    camera_intrinsics_fn = os.path.join(dataset_dir, 'camera_intrinsics.intr')
    camera_intrs = CameraIntrinsics.load(camera_intrinsics_fn)

    image_ids = np.arange(indices_arr.size)

    ##################################################################
    # Process each image
    ##################################################################
    for image_id in tqdm(image_ids):
        base_name = 'image_{:06d}'.format(indices_arr[image_id])
        output_name = 'image_{:06d}'.format(image_id)
        depth_image_fn = base_name + '.npy'

        # Extract depth image
        depth_data = np.load(os.path.join(depth_dir, depth_image_fn))
        depth_im = DepthImage(depth_data, camera_intrs.frame)
        depth_im = depth_im.inpaint(0.25)

        # Mask out bin pixels if appropriate/necessary
        if bin_mask_dir:
            mask_im = BinaryImage.open(
                os.path.join(bin_mask_dir, base_name + '.png'),
                camera_intrs.frame)
            mask_im = mask_im.resize(depth_im.shape[:2])
            depth_im = depth_im.mask_binary(mask_im)

        point_cloud = camera_intrs.deproject(depth_im)
        point_cloud.remove_zero_points()
        pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32))
        tree = pcl_cloud.make_kdtree()
        if detector_type == 'euclidean':
            segmentor = pcl_cloud.make_EuclideanClusterExtraction()
            segmentor.set_ClusterTolerance(config['tolerance'])
        elif detector_type == 'region_growing':
            segmentor = pcl_cloud.make_RegionGrowing(ksearch=50)
            segmentor.set_NumberOfNeighbours(config['n_neighbors'])
            segmentor.set_CurvatureThreshold(config['curvature'])
            segmentor.set_SmoothnessThreshold(config['smoothness'])
        else:
            print('PCL detector type not supported')
            exit()

        segmentor.set_MinClusterSize(config['min_cluster_size'])
        segmentor.set_MaxClusterSize(config['max_cluster_size'])
        segmentor.set_SearchMethod(tree)
        cluster_indices = segmentor.Extract()

        # Set up predicted masks and metadata
        indiv_pred_masks = []
        r_info = {
            'rois': [],
            'scores': [],
            'class_ids': [],
        }

        for i, cluster in enumerate(cluster_indices):
            points = pcl_cloud.to_array()[cluster]
            indiv_pred_mask = camera_intrs.project_to_image(
                PointCloud(points.T, frame=camera_intrs.frame)).to_binary()
            indiv_pred_mask.data[indiv_pred_mask.data > 0] = 1
            indiv_pred_masks.append(indiv_pred_mask.data)

            # Compute bounding box, score, class_id
            nonzero_pix = np.nonzero(indiv_pred_mask.data)
            min_x, max_x = np.min(nonzero_pix[1]), np.max(nonzero_pix[1])
            min_y, max_y = np.min(nonzero_pix[0]), np.max(nonzero_pix[0])
            r_info['rois'].append([min_y, min_x, max_y, max_x])
            r_info['scores'].append(1.0)
            r_info['class_ids'].append(1)

        r_info['rois'] = np.array(r_info['rois'])
        r_info['scores'] = np.array(r_info['scores'])
        r_info['class_ids'] = np.array(r_info['class_ids'])

        # Write the predicted masks and metadata
        if indiv_pred_masks:
            pred_mask_output = np.stack(indiv_pred_masks).astype(np.uint8)
        else:
            pred_mask_output = np.array(indiv_pred_masks).astype(np.uint8)

        # Save out ground-truth mask as array of shape (n, h, w)
        indiv_gt_masks = []
        gt_mask = cv2.imread(os.path.join(gt_mask_dir, base_name + '.png'))
        gt_mask = cv2.resize(gt_mask,
                             (depth_im.shape[1], depth_im.shape[0])).astype(
                                 np.uint8)[:, :, 0]
        num_gt_masks = np.max(gt_mask)
        for i in range(1, num_gt_masks + 1):
            indiv_gt_mask = (gt_mask == i)
            if np.any(indiv_gt_mask):
                indiv_gt_masks.append(gt_mask == i)
        gt_mask_output = np.stack(indiv_gt_masks)

        np.save(os.path.join(resized_segmask_dir, output_name + '.npy'),
                gt_mask_output)
        np.save(os.path.join(pred_dir, output_name + '.npy'), pred_mask_output)
        np.save(os.path.join(pred_info_dir, output_name + '.npy'), r_info)

    print('Saved prediction masks to:\t {}'.format(pred_dir))
    print('Saved prediction info (bboxes, scores, classes) to:\t {}'.format(
        pred_info_dir))
    print('Saved transformed GT segmasks to:\t {}'.format(resized_segmask_dir))

    return pred_dir, pred_info_dir, resized_segmask_dir
Exemple #21
0
def generate_segmask_dataset(output_dataset_path,
                             config,
                             save_tensors=True,
                             warm_start=False):
    """ Generate a segmentation training dataset

    Parameters
    ----------
    dataset_path : str
        path to store the dataset
    config : dict
        dictionary-like objects containing parameters of the simulator and visualization
    save_tensors : bool
        save tensor datasets (for recreating state)
    warm_start : bool
        restart dataset generation from a previous state
    """

    # read subconfigs
    dataset_config = config['dataset']
    image_config = config['images']
    vis_config = config['vis']

    # debugging
    debug = config['debug']
    if debug:
        np.random.seed(SEED)

    # read general parameters
    num_states = config['num_states']
    num_images_per_state = config['num_images_per_state']

    states_per_flush = config['states_per_flush']
    states_per_garbage_collect = config['states_per_garbage_collect']

    # set max obj per state
    max_objs_per_state = config['state_space']['heap']['max_objs']

    # read image parameters
    im_height = config['state_space']['camera']['im_height']
    im_width = config['state_space']['camera']['im_width']
    segmask_channels = max_objs_per_state + 1

    # create the dataset path and all subfolders if they don't exist
    if not os.path.exists(output_dataset_path):
        os.mkdir(output_dataset_path)

    image_dir = os.path.join(output_dataset_path, 'images')
    if not os.path.exists(image_dir):
        os.mkdir(image_dir)
    color_dir = os.path.join(image_dir, 'color_ims')
    if image_config['color'] and not os.path.exists(color_dir):
        os.mkdir(color_dir)
    depth_dir = os.path.join(image_dir, 'depth_ims')
    if image_config['depth'] and not os.path.exists(depth_dir):
        os.mkdir(depth_dir)
    amodal_dir = os.path.join(image_dir, 'amodal_masks')
    if image_config['amodal'] and not os.path.exists(amodal_dir):
        os.mkdir(amodal_dir)
    modal_dir = os.path.join(image_dir, 'modal_masks')
    if image_config['modal'] and not os.path.exists(modal_dir):
        os.mkdir(modal_dir)
    semantic_dir = os.path.join(image_dir, 'semantic_masks')
    if image_config['semantic'] and not os.path.exists(semantic_dir):
        os.mkdir(semantic_dir)

    # setup logging
    experiment_log_filename = os.path.join(output_dataset_path,
                                           'dataset_generation.log')
    if os.path.exists(experiment_log_filename) and not warm_start:
        os.remove(experiment_log_filename)
    Logger.add_log_file(logger, experiment_log_filename, global_log_file=True)
    config.save(
        os.path.join(output_dataset_path, 'dataset_generation_params.yaml'))
    metadata = {}
    num_prev_states = 0

    # set dataset params
    if save_tensors:

        # read dataset subconfigs
        state_dataset_config = dataset_config['states']
        image_dataset_config = dataset_config['images']
        state_tensor_config = state_dataset_config['tensors']
        image_tensor_config = image_dataset_config['tensors']

        obj_pose_dim = POSE_DIM * max_objs_per_state
        obj_com_dim = POINT_DIM * max_objs_per_state
        state_tensor_config['fields']['obj_poses']['height'] = obj_pose_dim
        state_tensor_config['fields']['obj_coms']['height'] = obj_com_dim
        state_tensor_config['fields']['obj_ids']['height'] = max_objs_per_state

        image_tensor_config['fields']['camera_pose']['height'] = POSE_DIM

        if image_config['color']:
            image_tensor_config['fields']['color_im'] = {
                'dtype': 'uint8',
                'channels': 3,
                'height': im_height,
                'width': im_width
            }

        if image_config['depth']:
            image_tensor_config['fields']['depth_im'] = {
                'dtype': 'float32',
                'channels': 1,
                'height': im_height,
                'width': im_width
            }

        if image_config['modal']:
            image_tensor_config['fields']['modal_segmasks'] = {
                'dtype': 'uint8',
                'channels': segmask_channels,
                'height': im_height,
                'width': im_width
            }

        if image_config['amodal']:
            image_tensor_config['fields']['amodal_segmasks'] = {
                'dtype': 'uint8',
                'channels': segmask_channels,
                'height': im_height,
                'width': im_width
            }

        if image_config['semantic']:
            image_tensor_config['fields']['semantic_segmasks'] = {
                'dtype': 'uint8',
                'channels': 1,
                'height': im_height,
                'width': im_width
            }

        # create dataset filenames
        state_dataset_path = os.path.join(output_dataset_path, 'state_tensors')
        image_dataset_path = os.path.join(output_dataset_path, 'image_tensors')

        if warm_start:

            if not os.path.exists(state_dataset_path) or not os.path.exists(
                    image_dataset_path):
                logger.error(
                    'Attempting to warm start without saved tensor dataset')
                exit(1)

            # open datasets
            logger.info('Opening state dataset')
            state_dataset = TensorDataset.open(state_dataset_path,
                                               access_mode='READ_WRITE')
            logger.info('Opening image dataset')
            image_dataset = TensorDataset.open(image_dataset_path,
                                               access_mode='READ_WRITE')

            # read configs
            state_tensor_config = state_dataset.config
            image_tensor_config = image_dataset.config

            # clean up datasets (there may be datapoints with indices corresponding to non-existent data)
            num_state_datapoints = state_dataset.num_datapoints
            num_image_datapoints = image_dataset.num_datapoints
            num_prev_states = num_state_datapoints

            # clean up images
            image_ind = num_image_datapoints - 1
            image_datapoint = image_dataset[image_ind]
            while image_ind > 0 and image_datapoint[
                    'state_ind'] >= num_state_datapoints:
                image_ind -= 1
                image_datapoint = image_dataset[image_ind]
            images_to_remove = num_image_datapoints - 1 - image_ind
            logger.info('Deleting last %d image tensors' % (images_to_remove))
            if images_to_remove > 0:
                image_dataset.delete_last(images_to_remove)
                num_image_datapoints = image_dataset.num_datapoints
        else:
            # create datasets from scratch
            logger.info('Creating datasets')

            state_dataset = TensorDataset(state_dataset_path,
                                          state_tensor_config)
            image_dataset = TensorDataset(image_dataset_path,
                                          image_tensor_config)

        # read templates
        state_datapoint = state_dataset.datapoint_template
        image_datapoint = image_dataset.datapoint_template

    if warm_start:

        if not os.path.exists(
                os.path.join(output_dataset_path, 'metadata.json')):
            logger.error(
                'Attempting to warm start without previously created dataset')
            exit(1)

        # Read metadata and indices
        metadata = json.load(
            open(os.path.join(output_dataset_path, 'metadata.json'), 'r'))
        test_inds = np.load(os.path.join(image_dir,
                                         'test_indices.npy')).tolist()
        train_inds = np.load(os.path.join(image_dir,
                                          'train_indices.npy')).tolist()

        # set obj ids and splits
        reverse_obj_ids = metadata['obj_ids']
        obj_id_map = utils.reverse_dictionary(reverse_obj_ids)
        obj_splits = metadata['obj_splits']
        obj_keys = obj_splits.keys()
        mesh_filenames = metadata['meshes']

        # Get list of images generated so far
        generated_images = sorted(
            os.listdir(color_dir)) if image_config['color'] else sorted(
                os.listdir(depth_dir))
        num_total_images = len(generated_images)

        # Do our own calculation if no saved tensors
        if num_prev_states == 0:
            num_prev_states = num_total_images // num_images_per_state

        # Find images to remove and remove them from all relevant places if they exist
        num_images_to_remove = num_total_images - (num_prev_states *
                                                   num_images_per_state)
        logger.info(
            'Deleting last {} invalid images'.format(num_images_to_remove))
        for k in range(num_images_to_remove):
            im_name = generated_images[-(k + 1)]
            im_basename = os.path.splitext(im_name)[0]
            im_ind = int(im_basename.split('_')[1])
            if os.path.exists(os.path.join(depth_dir, im_name)):
                os.remove(os.path.join(depth_dir, im_name))
            if os.path.exists(os.path.join(color_dir, im_name)):
                os.remove(os.path.join(color_dir, im_name))
            if os.path.exists(os.path.join(semantic_dir, im_name)):
                os.remove(os.path.join(semantic_dir, im_name))
            if os.path.exists(os.path.join(modal_dir, im_basename)):
                shutil.rmtree(os.path.join(modal_dir, im_basename))
            if os.path.exists(os.path.join(amodal_dir, im_basename)):
                shutil.rmtree(os.path.join(amodal_dir, im_basename))
            if im_ind in train_inds:
                train_inds.remove(im_ind)
            elif im_ind in test_inds:
                test_inds.remove(im_ind)

    else:

        # Create initial env to generate metadata
        env = BinHeapEnv(config)
        obj_id_map = env.state_space.obj_id_map
        obj_keys = env.state_space.obj_keys
        obj_splits = env.state_space.obj_splits
        mesh_filenames = env.state_space.mesh_filenames
        save_obj_id_map = obj_id_map.copy()
        save_obj_id_map[ENVIRONMENT_KEY] = np.iinfo(np.uint32).max
        reverse_obj_ids = utils.reverse_dictionary(save_obj_id_map)
        metadata['obj_ids'] = reverse_obj_ids
        metadata['obj_splits'] = obj_splits
        metadata['meshes'] = mesh_filenames
        json.dump(metadata,
                  open(os.path.join(output_dataset_path, 'metadata.json'),
                       'w'),
                  indent=JSON_INDENT,
                  sort_keys=True)
        train_inds = []
        test_inds = []

    # generate states and images
    state_id = num_prev_states
    while state_id < num_states:

        # create env and set objects
        create_start = time.time()
        env = BinHeapEnv(config)
        env.state_space.obj_id_map = obj_id_map
        env.state_space.obj_keys = obj_keys
        env.state_space.set_splits(obj_splits)
        env.state_space.mesh_filenames = mesh_filenames
        create_stop = time.time()
        logger.info('Creating env took %.3f sec' %
                    (create_stop - create_start))

        # sample states
        states_remaining = num_states - state_id
        for i in range(min(states_per_garbage_collect, states_remaining)):

            # log current rollout
            if state_id % config['log_rate'] == 0:
                logger.info('State: %06d' % (state_id))

            try:
                # reset env
                env.reset()
                state = env.state
                split = state.metadata['split']

                # render state
                if vis_config['state']:
                    env.view_3d_scene()

                # Save state if desired
                if save_tensors:

                    # set obj state variables
                    obj_pose_vec = np.zeros(obj_pose_dim)
                    obj_com_vec = np.zeros(obj_com_dim)
                    obj_id_vec = np.iinfo(
                        np.uint32).max * np.ones(max_objs_per_state)
                    j = 0
                    for obj_state in state.obj_states:
                        obj_pose_vec[j * POSE_DIM:(j + 1) *
                                     POSE_DIM] = obj_state.pose.vec
                        obj_com_vec[j * POINT_DIM:(j + 1) *
                                    POINT_DIM] = obj_state.center_of_mass
                        obj_id_vec[j] = int(obj_id_map[obj_state.key])
                        j += 1

                    # store datapoint env params
                    state_datapoint['state_id'] = state_id
                    state_datapoint['obj_poses'] = obj_pose_vec
                    state_datapoint['obj_coms'] = obj_com_vec
                    state_datapoint['obj_ids'] = obj_id_vec
                    state_datapoint['split'] = split

                    # store state datapoint
                    image_start_ind = image_dataset.num_datapoints
                    image_end_ind = image_start_ind + num_images_per_state
                    state_datapoint['image_start_ind'] = image_start_ind
                    state_datapoint['image_end_ind'] = image_end_ind

                    # clean up
                    del obj_pose_vec
                    del obj_com_vec
                    del obj_id_vec

                    # add state
                    state_dataset.add(state_datapoint)

                # render images
                for k in range(num_images_per_state):

                    # reset the camera
                    if num_images_per_state > 1:
                        env.reset_camera()

                    obs = env.render_camera_image(color=image_config['color'])
                    if image_config['color']:
                        color_obs, depth_obs = obs
                    else:
                        depth_obs = obs

                    # vis obs
                    if vis_config['obs']:
                        if image_config['depth']:
                            plt.figure()
                            plt.imshow(depth_obs)
                            plt.title('Depth Observation')
                        if image_config['color']:
                            plt.figure()
                            plt.imshow(color_obs)
                            plt.title('Color Observation')
                        plt.show()

                    if image_config['modal'] or image_config[
                            'amodal'] or image_config['semantic']:

                        # render segmasks
                        amodal_segmasks, modal_segmasks = env.render_segmentation_images(
                        )

                        # retrieve segmask data
                        modal_segmask_arr = np.iinfo(np.uint8).max * np.ones(
                            [im_height, im_width, segmask_channels],
                            dtype=np.uint8)
                        amodal_segmask_arr = np.iinfo(np.uint8).max * np.ones(
                            [im_height, im_width, segmask_channels],
                            dtype=np.uint8)
                        stacked_segmask_arr = np.zeros(
                            [im_height, im_width, 1], dtype=np.uint8)

                        modal_segmask_arr[:, :, :env.
                                          num_objects] = modal_segmasks
                        amodal_segmask_arr[:, :, :env.
                                           num_objects] = amodal_segmasks

                        if image_config['semantic']:
                            for j in range(env.num_objects):
                                this_obj_px = np.where(
                                    modal_segmasks[:, :, j] > 0)
                                stacked_segmask_arr[this_obj_px[0],
                                                    this_obj_px[1], 0] = j + 1

                    # visualize
                    if vis_config['semantic']:
                        plt.figure()
                        plt.imshow(stacked_segmask_arr.squeeze())
                        plt.show()

                    if save_tensors:
                        # save image data as tensors
                        if image_config['color']:
                            image_datapoint['color_im'] = color_obs
                        if image_config['depth']:
                            image_datapoint['depth_im'] = depth_obs[:, :, None]
                        if image_config['modal']:
                            image_datapoint[
                                'modal_segmasks'] = modal_segmask_arr
                        if image_config['amodal']:
                            image_datapoint[
                                'amodal_segmasks'] = amodal_segmask_arr
                        if image_config['semantic']:
                            image_datapoint[
                                'semantic_segmasks'] = stacked_segmask_arr

                        image_datapoint['camera_pose'] = env.camera.pose.vec
                        image_datapoint[
                            'camera_intrs'] = env.camera.intrinsics.vec
                        image_datapoint['state_ind'] = state_id
                        image_datapoint['split'] = split

                        # add image
                        image_dataset.add(image_datapoint)

                    # Save depth image and semantic masks
                    if image_config['color']:
                        ColorImage(color_obs).save(
                            os.path.join(
                                color_dir, 'image_{:06d}.png'.format(
                                    num_images_per_state * state_id + k)))
                    if image_config['depth']:
                        DepthImage(depth_obs).save(
                            os.path.join(
                                depth_dir, 'image_{:06d}.png'.format(
                                    num_images_per_state * state_id + k)))
                    if image_config['modal']:
                        modal_id_dir = os.path.join(
                            modal_dir,
                            'image_{:06d}'.format(num_images_per_state *
                                                  state_id + k))
                        if not os.path.exists(modal_id_dir):
                            os.mkdir(modal_id_dir)
                        for i in range(env.num_objects):
                            BinaryImage(modal_segmask_arr[:, :, i]).save(
                                os.path.join(modal_id_dir,
                                             'channel_{:03d}.png'.format(i)))
                    if image_config['amodal']:
                        amodal_id_dir = os.path.join(
                            amodal_dir,
                            'image_{:06d}'.format(num_images_per_state *
                                                  state_id + k))
                        if not os.path.exists(amodal_id_dir):
                            os.mkdir(amodal_id_dir)
                        for i in range(env.num_objects):
                            BinaryImage(amodal_segmask_arr[:, :, i]).save(
                                os.path.join(amodal_id_dir,
                                             'channel_{:03d}.png'.format(i)))
                    if image_config['semantic']:
                        GrayscaleImage(stacked_segmask_arr.squeeze()).save(
                            os.path.join(
                                semantic_dir, 'image_{:06d}.png'.format(
                                    num_images_per_state * state_id + k)))

                    # Save split
                    if split == TRAIN_ID:
                        train_inds.append(num_images_per_state * state_id + k)
                    else:
                        test_inds.append(num_images_per_state * state_id + k)

                # auto-flush after every so many timesteps
                if state_id % states_per_flush == 0:
                    np.save(os.path.join(image_dir, 'train_indices.npy'),
                            train_inds)
                    np.save(os.path.join(image_dir, 'test_indices.npy'),
                            test_inds)
                    if save_tensors:
                        state_dataset.flush()
                        image_dataset.flush()

                # delete action objects
                for obj_state in state.obj_states:
                    del obj_state
                del state
                gc.collect()

                # update state id
                state_id += 1

            except Exception as e:
                # log an error
                logger.warning('Heap failed!')
                logger.warning('%s' % (str(e)))
                logger.warning(traceback.print_exc())
                if debug:
                    raise

                del env
                gc.collect()
                env = BinHeapEnv(config)
                env.state_space.obj_id_map = obj_id_map
                env.state_space.obj_keys = obj_keys
                env.state_space.set_splits(obj_splits)
                env.state_space.mesh_filenames = mesh_filenames

        # garbage collect
        del env
        gc.collect()

    # write all datasets to file, save indices
    np.save(os.path.join(image_dir, 'train_indices.npy'), train_inds)
    np.save(os.path.join(image_dir, 'test_indices.npy'), test_inds)
    if save_tensors:
        state_dataset.flush()
        image_dataset.flush()

    logger.info('Generated %d image datapoints' %
                (state_id * num_images_per_state))
Exemple #22
0
    def wrapped_images(self, mesh, object_to_camera_poses,
                       render_mode, stable_pose=None, mat_props=None,
                       light_props=None,debug=False):
        """Create ObjectRender objects of the given mesh at the list of object to camera poses.

        Parameters
        ----------
        mesh : :obj:`Mesh3D`
            The mesh to be rendered.
        object_to_camera_poses : :obj:`list` of :obj:`RigidTransform`
            A list of object to camera transforms to render from.
        render_mode : int
            One of RenderMode.COLOR, RenderMode.DEPTH, or
            RenderMode.SCALED_DEPTH.
        stable_pose : :obj:`StablePose`
            A stable pose to render the object in.
        mat_props : :obj:`MaterialProperties`
            Material properties for the mesh
        light_props : :obj:`MaterialProperties`
            Lighting properties for the scene
        debug : bool
            Whether or not to debug the C++ meshrendering code.

        Returns
        -------
        :obj:`list` of :obj:`ObjectRender`
            A list of ObjectRender objects generated from the given parameters.
        """
        # pre-multiply the stable pose
        world_to_camera_poses = [T_obj_camera.as_frames('obj', 'camera') for T_obj_camera in object_to_camera_poses]
        if stable_pose is not None:
            t_obj_stp = np.array([0,0,-stable_pose.r.dot(stable_pose.x0)[2]])
            T_obj_stp = RigidTransform(rotation=stable_pose.r,
                                       translation=t_obj_stp,
                                       from_frame='obj',
                                       to_frame='stp')            
            stp_to_camera_poses = copy.copy(object_to_camera_poses)
            object_to_camera_poses = []
            for T_stp_camera in stp_to_camera_poses:
                T_stp_camera.from_frame = 'stp'
                object_to_camera_poses.append(T_stp_camera.dot(T_obj_stp))

        # set lighting mode
        enable_lighting = True
        if render_mode == RenderMode.SEGMASK or render_mode == RenderMode.DEPTH or \
           render_mode == RenderMode.DEPTH_SCENE:
            enable_lighting = False

        # render both image types (doesn't really cost any time)
        color_ims, depth_ims = self.images(mesh, object_to_camera_poses,
                                           mat_props=mat_props,
                                           light_props=light_props,
                                           enable_lighting=enable_lighting,
                                           debug=debug)

        # convert to image wrapper classes
        images = []
        if render_mode == RenderMode.SEGMASK:
            # wrap binary images
            for binary_im in color_ims:
                images.append(BinaryImage(binary_im[:,:,0], frame=self._camera_intr.frame, threshold=0))

        elif render_mode == RenderMode.COLOR:
            # wrap color images
            for color_im in color_ims:
                images.append(ColorImage(color_im, frame=self._camera_intr.frame))

        elif render_mode == RenderMode.COLOR_SCENE:
            # wrap color and depth images
            for color_im in color_ims:
                images.append(ColorImage(color_im, frame=self._camera_intr.frame))

            # render images of scene objects
            color_scene_ims = {}
            for name, scene_obj in self._scene.iteritems():
                scene_object_to_camera_poses = []
                for world_to_camera_pose in world_to_camera_poses:
                    scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world)

                color_scene_ims[name] = self.wrapped_images(scene_obj.mesh, scene_object_to_camera_poses, RenderMode.COLOR, mat_props=scene_obj.mat_props, light_props=light_props, debug=debug)

            # combine with scene images
            # TODO: re-add farther
            for i in range(len(images)):
                for j, name in enumerate(color_scene_ims.keys()):
                    zero_px = images[i].zero_pixels()
                    images[i].data[zero_px[:,0], zero_px[:,1], :] = color_scene_ims[name][i].image.data[zero_px[:,0], zero_px[:,1], :]

        elif render_mode == RenderMode.DEPTH:
            # render depth image
            for depth_im in depth_ims:
                images.append(DepthImage(depth_im, frame=self._camera_intr.frame))

        elif render_mode == RenderMode.DEPTH_SCENE:
            # create empty depth images
            for depth_im in depth_ims:
                images.append(DepthImage(depth_im, frame=self._camera_intr.frame))

            # render images of scene objects
            depth_scene_ims = {}
            for name, scene_obj in self._scene.iteritems():
                scene_object_to_camera_poses = []
                for world_to_camera_pose in world_to_camera_poses:
                    scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world)
                depth_scene_ims[name] = self.wrapped_images(scene_obj.mesh, scene_object_to_camera_poses, RenderMode.DEPTH, mat_props=scene_obj.mat_props, light_props=light_props)

            # combine with scene images
            for i in range(len(images)):
                for j, name in enumerate(depth_scene_ims.keys()):
                    images[i] = images[i].combine_with(depth_scene_ims[name][i].image)

        elif render_mode == RenderMode.RGBD:
            # create RGB-D images
            for color_im, depth_im in zip(color_ims, depth_ims):
                c = ColorImage(color_im, frame=self._camera_intr.frame)
                d = DepthImage(depth_im, frame=self._camera_intr.frame)
                images.append(RgbdImage.from_color_and_depth(c, d))

        elif render_mode == RenderMode.RGBD_SCENE:
            # create RGB-D images
            for color_im, depth_im in zip(color_ims, depth_ims):
                c = ColorImage(color_im, frame=self._camera_intr.frame)
                d = DepthImage(depth_im, frame=self._camera_intr.frame)
                images.append(RgbdImage.from_color_and_depth(c, d))

            # render images of scene objects
            rgbd_scene_ims = {}
            for name, scene_obj in self._scene.iteritems():
                scene_object_to_camera_poses = []
                for world_to_camera_pose in world_to_camera_poses:
                    scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world)

                rgbd_scene_ims[name] = self.wrapped_images(scene_obj.mesh, scene_object_to_camera_poses, RenderMode.RGBD, mat_props=scene_obj.mat_props, light_props=light_props, debug=debug)

           # combine with scene images
            for i in range(len(images)):
                for j, name in enumerate(rgbd_scene_ims.keys()):
                    images[i] = images[i].combine_with(rgbd_scene_ims[name][i].image)

        elif render_mode == RenderMode.SCALED_DEPTH:
            # convert to color image
            for depth_im in depth_ims:
                d = DepthImage(depth_im, frame=self._camera_intr.frame)
                images.append(d.to_color())
        else:
            raise ValueError('Render mode %s not supported' %(render_mode))

        # create object renders
        if stable_pose is not None:
            object_to_camera_poses = copy.copy(stp_to_camera_poses)
        rendered_images = []
        for image, T_obj_camera in zip(images, object_to_camera_poses):
            T_camera_obj = T_obj_camera.inverse()
            rendered_images.append(ObjectRender(image, T_camera_obj))

        return rendered_images
Exemple #23
0
def visualize_tensor_dataset(dataset, config):
    """
    Visualizes a Tensor dataset.

    Parameters
    ----------
    dataset : :obj:`TensorDataset`
        dataset to visualize
    config : :obj:`autolab_core.YamlConfig`
        parameters for visualization

    Notes
    -----
    Required parameters of config are specified in Other Parameters

    Other Parameters
    ----------------
    field_name : str
        name of the field in the TensorDataset to visualize (defaults to depth_ims_tf_table, which is a single view point cloud of the object on a table)
    field_type : str
        type of image that the field name correspondes to (defaults to depth, can also be `segmask` if using the field `object_masks`)

    print_fields : :obj:`list` of str
        names of additiona fields to print to the command line
    filter : :obj:`dict` mapping str to :obj:`dict` 
        contraints that all displayed datapoints must satisfy (supports any univariate field name as a key and numeric thresholds)

    gripper_width_px : float
        width of the gripper to plot in pixels
    font_size : int
        size of font on the rendered images
    """
    # shuffle the tensor indices
    indices = dataset.datapoint_indices
    np.random.shuffle(indices)

    # read config
    field_name = config['field_name']
    field_type = config['field_type']
    font_size = config['font_size']
    print_fields = config['print_fields']
    gripper_width_px = config['gripper_width_px']

    num = 0
    for i, ind in enumerate(indices):
        datapoint = dataset[ind]
        data = datapoint[field_name]
        if field_type == RenderMode.SEGMASK:
            image = BinaryImage(data)
        elif field_type == RenderMode.DEPTH:
            image = DepthImage(data)
        else:
            raise ValueError('Field type %s not supported!' % (field_type))

        skip_datapoint = False
        for f, filter_cfg in config['filter'].iteritems():
            data = datapoint[f]
            if 'greater_than' in filter_cfg.keys(
            ) and data < filter_cfg['greater_than']:
                skip_datapoint = True
                break
            elif 'less_than' in filter_cfg.keys(
            ) and data > filter_cfg['less_than']:
                skip_datapoint = True
                break
        if skip_datapoint:
            continue

        logging.info('DATAPOINT %d' % (num))
        for f in print_fields:
            data = datapoint[f]
            logging.info('Field %s:' % (f))
            print data

        grasp_2d = Grasp2D(Point(image.center), 0, datapoint['hand_poses'][2])

        vis2d.figure()
        if field_type == RenderMode.RGBD:
            vis2d.subplot(1, 2, 1)
            vis2d.imshow(image.color)
            vis2d.grasp(grasp_2d, width=gripper_width_px)
            vis2d.subplot(1, 2, 2)
            vis2d.imshow(image.depth)
            vis2d.grasp(grasp_2d, width=gripper_width_px)
        elif field_type == RenderMode.GD:
            vis2d.subplot(1, 2, 1)
            vis2d.imshow(image.gray)
            vis2d.grasp(grasp_2d, width=gripper_width_px)
            vis2d.subplot(1, 2, 2)
            vis2d.imshow(image.depth)
            vis2d.grasp(grasp_2d, width=gripper_width_px)
        else:
            vis2d.imshow(image)
            vis2d.grasp(grasp_2d, width=gripper_width_px)
        vis2d.title('Datapoint %d: %s' % (ind, field_type))
        vis2d.show()

        num += 1
    depth_im_filename = args.depth_im_filename
    camera_intr_filename = args.camera_intr_filename
    config_filename = args.config_filename

    # read config
    config = YamlConfig(config_filename)
    policy_config = config['policy']

    # read image
    depth_im_arr = pkl.load(open(depth_im_filename, 'r'))
    depth_im = DepthImage(depth_im_arr)
    color_im = ColorImage(
        np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8))
    rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)
    segmask_arr = 255 * (depth_im_arr < 1.0)
    segmask = BinaryImage(segmask_arr.astype(np.uint8))
    camera_intr = CameraIntrinsics.load(camera_intr_filename)
    state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask)

    # init policy
    policy = UniformRandomGraspingPolicy(policy_config)
    policy_start = time.time()
    action = policy(state)
    logging.info('Planning took %.3f sec' % (time.time() - policy_start))

    # vis final grasp
    if policy_config['vis']['final_grasp']:
        vis.figure(size=(10, 10))
        vis.imshow(rgbd_im.depth,
                   vmin=policy_config['vis']['vmin'],
                   vmax=policy_config['vis']['vmax'])
Exemple #25
0
    def wrapped_render(self, render_modes, front_and_back=False):
        """Render images of the scene and wrap them with Image wrapper classes
        from the Berkeley AUTOLab's perception module.

        Parameters
        ----------
        render_modes : list of perception.RenderMode 
            A list of the desired image types to return, from the perception
            module's RenderMode enum.

        front_and_back : bool
            If True, all surface normals are treated as if they're facing the camera.

        Returns
        -------
        list of perception.Image
            A list containing a corresponding Image sub-class for each type listed
            in render_modes.
        """

        # Render raw images
        render_color = False
        for mode in render_modes:
            if mode != RenderMode.DEPTH and mode != RenderMode.SCALED_DEPTH:
                render_color = True
                break

        color_im, depth_im = None, None
        if render_color:
            color_im, depth_im = self.render(render_color,
                                             front_and_back=front_and_back)
        else:
            depth_im = self.render(render_color)

        # For each specified render mode, add an Image object of the appropriate type
        images = []
        for render_mode in render_modes:
            # Then, convert them to an image wrapper class
            if render_mode == RenderMode.SEGMASK:
                images.append(
                    BinaryImage((depth_im > 0.0).astype(np.uint8),
                                frame=self.camera.intrinsics.frame,
                                threshold=0))

            elif render_mode == RenderMode.COLOR:
                images.append(
                    ColorImage(color_im, frame=self.camera.intrinsics.frame))

            elif render_mode == RenderMode.GRAYSCALE:
                images.append(
                    ColorImage(
                        color_im,
                        frame=self.camera.intrinsics.frame).to_grayscale())

            elif render_mode == RenderMode.DEPTH:
                images.append(
                    DepthImage(depth_im, frame=self.camera.intrinsics.frame))

            elif render_mode == RenderMode.SCALED_DEPTH:
                images.append(
                    DepthImage(depth_im,
                               frame=self.camera.intrinsics.frame).to_color())

            elif render_mode == RenderMode.RGBD:
                c = ColorImage(color_im, frame=self.camera.intrinsics.frame)
                d = DepthImage(depth_im, frame=self.camera.intrinsics.frame)
                images.append(RgbdImage.from_color_and_depth(c, d))

            elif render_mode == RenderMode.GD:
                g = ColorImage(
                    color_im,
                    frame=self.camera.intrinsics.frame).to_grayscale()
                d = DepthImage(depth_im, frame=self.camera.intrinsics.frame)
                images.append(GdImage.from_grayscale_and_depth(g, d))
            else:
                raise ValueError(
                    'Render mode {} not supported'.format(render_mode))

        return images
Exemple #26
0
    plan_grasp_segmask = rospy.ServiceProxy(
        '%s/grasp_planner_segmask' % (namespace), GQCNNGraspPlannerSegmask)
    cv_bridge = CvBridge()

    # setup sensor
    camera_intr = CameraIntrinsics.load(camera_intr_filename)

    # read images
    depth_im = DepthImage.open(depth_im_filename, frame=camera_intr.frame)
    color_im = ColorImage(np.zeros([depth_im.height, depth_im.width,
                                    3]).astype(np.uint8),
                          frame=camera_intr.frame)

    # read segmask
    if segmask_filename is not None:
        segmask = BinaryImage.open(segmask_filename, frame=camera_intr.frame)
        grasp_resp = plan_grasp_segmask(color_im.rosmsg, depth_im.rosmsg,
                                        camera_intr.rosmsg, segmask.rosmsg)
    else:
        grasp_resp = plan_grasp(color_im.rosmsg, depth_im.rosmsg,
                                camera_intr.rosmsg)
    grasp = grasp_resp.grasp

    # convert to a grasp action
    grasp_type = grasp.grasp_type
    if grasp_type == GQCNNGrasp.PARALLEL_JAW:
        center = Point(np.array([grasp.center_px[0], grasp.center_px[1]]),
                       frame=camera_intr.frame)
        grasp_2d = Grasp2D(center,
                           grasp.angle,
                           grasp.depth,
Exemple #27
0
    def _plan_grasp(self,
                    color_im,
                    depth_im,
                    camera_intr,
                    bounding_box=None,
                    segmask=None):
        """Grasp planner request handler.

        Parameters
        ---------
        req: :obj:`ROS ServiceRequest`
            ROS `ServiceRequest` for grasp planner service.
        """
        rospy.loginfo("Planning Grasp")

        # Inpaint images.
        color_im = color_im.inpaint(
            rescale_factor=self.cfg["inpaint_rescale_factor"])
        depth_im = depth_im.inpaint(
            rescale_factor=self.cfg["inpaint_rescale_factor"])

        # Init segmask.
        if segmask is None:
            segmask = BinaryImage(255 *
                                  np.ones(depth_im.shape).astype(np.uint8),
                                  frame=color_im.frame)

        # Visualize.
        if self.cfg["vis"]["color_image"]:
            vis.imshow(color_im)
            vis.show()
        if self.cfg["vis"]["depth_image"]:
            vis.imshow(depth_im)
            vis.show()
        if self.cfg["vis"]["segmask"] and segmask is not None:
            vis.imshow(segmask)
            vis.show()

        # Aggregate color and depth images into a single
        # BerkeleyAutomation/perception `RgbdImage`.
        rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)

        # Mask bounding box.
        if bounding_box is not None:
            # Calc bb parameters.
            min_x = bounding_box.minX
            min_y = bounding_box.minY
            max_x = bounding_box.maxX
            max_y = bounding_box.maxY

            # Contain box to image->don't let it exceed image height/width
            # bounds.
            if min_x < 0:
                min_x = 0
            if min_y < 0:
                min_y = 0
            if max_x > rgbd_im.width:
                max_x = rgbd_im.width
            if max_y > rgbd_im.height:
                max_y = rgbd_im.height

            # Mask.
            bb_segmask_arr = np.zeros([rgbd_im.height, rgbd_im.width])
            bb_segmask_arr[min_y:max_y, min_x:max_x] = 255
            bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8),
                                     segmask.frame)
            segmask = segmask.mask_binary(bb_segmask)

        # Visualize.
        if self.cfg["vis"]["rgbd_state"]:
            masked_rgbd_im = rgbd_im.mask_binary(segmask)
            vis.figure()
            vis.subplot(1, 2, 1)
            vis.imshow(masked_rgbd_im.color)
            vis.subplot(1, 2, 2)
            vis.imshow(masked_rgbd_im.depth)
            vis.show()

        # Create an `RgbdImageState` with the cropped `RgbdImage` and
        # `CameraIntrinsics`.
        rgbd_state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask)

        # Execute policy.
        try:
            return self.execute_policy(rgbd_state, self.grasping_policy,
                                       self.grasp_pose_publisher,
                                       camera_intr.frame)
        except NoValidGraspsException:
            rospy.logerr(
                ("While executing policy found no valid grasps from sampled"
                 " antipodal point pairs. Aborting Policy!"))
            raise rospy.ServiceException(
                ("While executing policy found no valid grasps from sampled"
                 " antipodal point pairs. Aborting Policy!"))
                          frame=camera_intr.frame,
                          encoding="bgr8")
    depth_im = DepthImage(depth_cvmat, frame=camera_intr.frame)

    # check image sizes.
    if (color_im.height != depth_im.height
            or color_im.width != depth_im.width):
        msg = ("Color image and depth image must be the same shape! Color"
               " is %d x %d but depth is %d x %d") % (
                   color_im.height, color_im.width, depth_im.height,
                   depth_im.width)
        print(msg)

    # assume not mask is provided
    # segmask = depth_im.invalid_pixel_mask().inverse()
    segmask = BinaryImage(255 * np.ones(depth_im.shape).astype(np.uint8),
                          frame=color_im.frame)

    # inpaint images.
    # color_im = color_im.inpaint(
    #     rescale_factor=config["inpaint_rescale_factor"])
    # depth_im = depth_im.inpaint(
    #     rescale_factor=config["inpaint_rescale_factor"])

    # Aggregate color and depth images into a single
    # BerkeleyAutomation/perception `RgbdImage`.
    rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im)

    state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask)

    # vis.imshow(segmask)
    # vis.imshow(rgbd_im)
Exemple #29
0
                policy_config["metric"]["gqcnn_model"])

    # Setup sensor.
    camera_intr = CameraIntrinsics.load(camera_intr_filename)

    # Read images.
    depth_data = np.load(depth_im_filename)
    depth_im = DepthImage(depth_data, frame=camera_intr.frame)
    color_im = ColorImage(np.zeros([depth_im.height, depth_im.width,
                                    3]).astype(np.uint8),
                          frame=camera_intr.frame)

    # Optionally read a segmask.
    segmask = None
    if segmask_filename is not None:
        segmask = BinaryImage.open(segmask_filename)
    valid_px_mask = depth_im.invalid_pixel_mask().inverse()
    if segmask is None:
        segmask = valid_px_mask
    else:
        segmask = segmask.mask_binary(valid_px_mask)

    # Inpaint.
    depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor)

    if "input_images" in policy_config["vis"] and policy_config["vis"][
            "input_images"]:
        vis.figure(size=(10, 10))
        num_plot = 1
        if segmask is not None:
            num_plot = 2
Exemple #30
0
    def _compute_edge_mask(self, salient_edge_set, depth_im, ci, T_obj_camera):
        """Compute the edge mask for a given salient edge set, depth image, camera intrinsics,
        and object-to-camera transform.
        """
        mesh = salient_edge_set.mesh
        edge_mask = salient_edge_set.edge_mask

        # Allocate array for storing mask
        di_mask = np.zeros(depth_im.data.shape, dtype=np.uint8)

        # ---------------
        # Compute edge endpoint coordinates in camera frame and up/down biases for each
        # ---------------
        m = mesh.copy().apply_transform(T_obj_camera.matrix)
        vertex_inds = m.face_adjacency_edges[edge_mask]
        endpoints_3d = m.vertices[vertex_inds]

        # For each edge, compute the face that we should be sampling most closely against
        vecs = (endpoints_3d[:,0] + endpoints_3d[:,1]) / 2.0
        face_inds = m.face_adjacency[edge_mask]
        normals = m.face_normals[face_inds]
        dots = np.einsum('ijk,ik->ij', normals, vecs)
        valid = np.where(np.logical_not((dots > 0).all(1)))[0]
        vertex_inds = vertex_inds[valid]

        # Find "other" vertex per edge of interest
        for i in range(len(vertex_inds)):
            # Extract other vertex end
            endpoint_inds = vertex_inds[i]

            # Project vertices down to 2D
            endpoints = m.vertices[endpoint_inds]

            points = endpoints.T
            points_proj = ci._K.dot(points)
            point_depths = np.tile(points_proj[2,:], [3, 1])
            points_proj = np.divide(points_proj, point_depths).T[:,:2]
            point_depths = point_depths[0][:2]
            endpoints = points_proj[:2]

            delta = endpoints[1] - endpoints[0]
            dy = np.abs(delta[1])
            dx = np.abs(delta[0])

            depth_offset=5e-3

            # Move along y
            if dy > dx:
                order = np.argsort(endpoints[:,1])
                endpoints = endpoints[order]
                depths = point_depths[order]

                # x = my + b
                delta = endpoints[1] - endpoints[0]
                slope = delta[0] / delta[1]
                intercept = endpoints[0][0] - slope * endpoints[0][1]

                # Move along y axis
                y = int(endpoints[0][1]) + 1
                y_end = int(endpoints[1][1])

                while y <= y_end:
                    if y < 0 or y >= di_mask.shape[0]:
                        y += 1
                        continue

                    exp_x = slope * y + intercept
                    x = int(exp_x)
                    xs_to_attempt = [x, x+1, x-1, x+2, x-2]
                    exp_depth = ((y - endpoints[0][1]) / delta[1]) * (depths[1] - depths[0]) + depths[0]

                    for x in xs_to_attempt:
                        if x < 0 or x >= di_mask.shape[1]:
                            continue
                        depth = depth_im.data[y,x]
                        if np.abs(depth - exp_depth) < depth_offset:
                            di_mask[y][x] = 0xFF
                            break
                    y += 1
            else:
                order = np.argsort(endpoints[:,0])
                endpoints = endpoints[order]
                depths = point_depths[order]

                # y = mx + b
                delta = endpoints[1] - endpoints[0]
                slope = delta[1] / delta[0]
                intercept = endpoints[0][1] - slope * endpoints[0][0]

                # Move along y axis
                x = int(endpoints[0][0]) + 1
                x_end = int(endpoints[1][0])

                while x <= x_end:
                    if x < 0 or x >= di_mask.shape[1]:
                        x += 1
                        continue

                    exp_y = slope * x + intercept
                    y = int(exp_y)
                    ys_to_attempt = [y, y+1, y-1, y+2, y-2]
                    exp_depth = ((x - endpoints[0][0]) / delta[0]) * (depths[1] - depths[0]) + depths[0]

                    for y in ys_to_attempt:
                        if y < 0 or y >= di_mask.shape[0]:
                            continue
                        depth = depth_im.data[y,x]
                        if np.abs(depth - exp_depth) < depth_offset:
                            di_mask[y][x] = 0xFF
                            break
                    x += 1

        return BinaryImage(di_mask)