def load(save_dir): if not os.path.exists(save_dir): raise ValueError('Directory %s does not exist!' % (save_dir)) color_image_filename = os.path.join(save_dir, 'color.png') depth_image_filename = os.path.join(save_dir, 'depth.npy') camera_intr_filename = os.path.join(save_dir, 'camera.intr') segmask_filename = os.path.join(save_dir, 'segmask.npy') obj_segmask_filename = os.path.join(save_dir, 'obj_segmask.npy') state_filename = os.path.join(save_dir, 'state.pkl') camera_intr = CameraIntrinsics.load(camera_intr_filename) color = ColorImage.open(color_image_filename, frame=camera_intr.frame) depth = DepthImage.open(depth_image_filename, frame=camera_intr.frame) segmask = None if os.path.exists(segmask_filename): segmask = BinaryImage.open(segmask_filename, frame=camera_intr.frame) obj_segmask = None if os.path.exists(obj_segmask_filename): obj_segmask = SegmentationImage.open(obj_segmask_filename, frame=camera_intr.frame) fully_observed = None if os.path.exists(state_filename): fully_observed = pkl.load(open(state_filename, 'rb')) return RgbdImageState(RgbdImage.from_color_and_depth(color, depth), camera_intr, segmask=segmask, obj_segmask=obj_segmask, fully_observed=fully_observed)
def plan_grasp(self, depth, rgb, resetting=False, camera_intr=None, segmask=None): """ Computes possible grasps. Parameters ---------- depth: type `numpy` depth image rgb: type `numpy` rgb image camera_intr: type `perception.CameraIntrinsics` Intrinsic camera object. segmask: type `perception.BinaryImage` Binary segmask of detected object Returns ------- type `GQCNNGrasp` Computed optimal grasp. """ if "FC" in self.model: assert not (segmask is None), "Fully-Convolutional policy expects a segmask." if camera_intr is None: camera_intr_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "gqcnn/data/calib/primesense/primesense.intr") camera_intr = CameraIntrinsics.load(camera_intr_filename) depth_im = DepthImage(depth, frame=camera_intr.frame) color_im = ColorImage(rgb, frame=camera_intr.frame) valid_px_mask = depth_im.invalid_pixel_mask().inverse() if segmask is None: segmask = valid_px_mask else: segmask = segmask.mask_binary(valid_px_mask) # Inpaint. depth_im = depth_im.inpaint( rescale_factor=self.config["inpaint_rescale_factor"]) # Aggregate color and depth images into a single # BerkeleyAutomation/perception `RgbdImage`. self.rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # Create an `RgbdImageState` with the `RgbdImage` and `CameraIntrinsics`. state = RgbdImageState(self.rgbd_im, camera_intr, segmask=segmask) # Set input sizes for fully-convolutional policy. if "FC" in self.model: self.policy_config["metric"]["fully_conv_gqcnn_config"][ "im_height"] = depth_im.shape[0] self.policy_config["metric"]["fully_conv_gqcnn_config"][ "im_width"] = depth_im.shape[1] return self.execute_policy(state, resetting)
def get_state(self, depth, segmask): # Read images. depth_im = DepthImage(depth, frame=self.camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=self.camera_intr.frame) segmask = BinaryImage(segmask.astype(np.uint8) * 255, frame=self.camera_intr.frame) # Inpaint. depth_im = depth_im.inpaint(rescale_factor=self.inpaint_rescale_factor) # Create state. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state = RgbdImageState(rgbd_im, self.camera_intr, segmask=segmask) return state, rgbd_im
def run_gqcnn(depth,seg_mask): best_angle = 0; best_point = [0,0]; best_dist = 0; depth_im =DepthImage(depth.astype("float32"), frame=camera_intr.frame) color_im = ColorImage(np.zeros([imageWidth, imageHeight,3]).astype(np.uint8), frame=camera_intr.frame) print(seg_mask) segmask = BinaryImage(seg_mask) print(segmask) rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state_gqcnn = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) policy_start = time.time() try: action = policy(state_gqcnn) logger.info("Planning took %.3f sec" % (time.time() - policy_start)) best_point = [action.grasp.center[0],action.grasp.center[1]]; best_point = [action.grasp.center[0],action.grasp.center[1]]; best_angle = float(action.grasp.angle)*180/3.141592 except Exception as inst: print(inst) return best_angle,best_point,best_dist
def run_gqcnn(depth, seg_mask): best_angle = 0 best_point = [0, 0] best_dist = 0 depth_im = DepthImage(depth.astype("float32"), frame=camera_intr.frame) color_im = ColorImage(np.zeros([imageWidth, imageHeight, 3]).astype(np.uint8), frame=camera_intr.frame) segmask = BinaryImage(seg_mask) rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state_gqcnn = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) policy_start = time.time() q_value = -1 try: action = policy(state_gqcnn) best_point = [action.grasp.center[0], action.grasp.center[1]] best_angle = float(action.grasp.angle) * 180 / 3.141592 q_value = action.q_value print("inner : ", action.q_value) except Exception as inst: print(inst) return q_value
def _plan_grasp(self, color_im, depth_im, camera_intr, bounding_box=None, segmask=None): """Grasp planner request handler. Parameters --------- req: :obj:`ROS ServiceRequest` ROS `ServiceRequest` for grasp planner service. """ rospy.loginfo("Planning Grasp") # Inpaint images. color_im = color_im.inpaint( rescale_factor=self.cfg["inpaint_rescale_factor"]) depth_im = depth_im.inpaint( rescale_factor=self.cfg["inpaint_rescale_factor"]) # Init segmask. if segmask is None: segmask = BinaryImage(255 * np.ones(depth_im.shape).astype(np.uint8), frame=color_im.frame) # Visualize. if self.cfg["vis"]["color_image"]: vis.imshow(color_im) vis.show() if self.cfg["vis"]["depth_image"]: vis.imshow(depth_im) vis.show() if self.cfg["vis"]["segmask"] and segmask is not None: vis.imshow(segmask) vis.show() # Aggregate color and depth images into a single # BerkeleyAutomation/perception `RgbdImage`. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # Mask bounding box. if bounding_box is not None: # Calc bb parameters. min_x = bounding_box.minX min_y = bounding_box.minY max_x = bounding_box.maxX max_y = bounding_box.maxY # Contain box to image->don't let it exceed image height/width # bounds. if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 if max_x > rgbd_im.width: max_x = rgbd_im.width if max_y > rgbd_im.height: max_y = rgbd_im.height # Mask. bb_segmask_arr = np.zeros([rgbd_im.height, rgbd_im.width]) bb_segmask_arr[min_y:max_y, min_x:max_x] = 255 bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8), segmask.frame) segmask = segmask.mask_binary(bb_segmask) # Visualize. if self.cfg["vis"]["rgbd_state"]: masked_rgbd_im = rgbd_im.mask_binary(segmask) vis.figure() vis.subplot(1, 2, 1) vis.imshow(masked_rgbd_im.color) vis.subplot(1, 2, 2) vis.imshow(masked_rgbd_im.depth) vis.show() # Create an `RgbdImageState` with the cropped `RgbdImage` and # `CameraIntrinsics`. rgbd_state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Execute policy. try: return self.execute_policy(rgbd_state, self.grasping_policy, self.grasp_pose_publisher, camera_intr.frame) except NoValidGraspsException: rospy.logerr( ("While executing policy found no valid grasps from sampled" " antipodal point pairs. Aborting Policy!")) raise rospy.ServiceException( ("While executing policy found no valid grasps from sampled" " antipodal point pairs. Aborting Policy!"))
def wrapped_render(self, render_modes, front_and_back=False): """Render images of the scene and wrap them with Image wrapper classes from the Berkeley AUTOLab's perception module. Parameters ---------- render_modes : list of perception.RenderMode A list of the desired image types to return, from the perception module's RenderMode enum. front_and_back : bool If True, all surface normals are treated as if they're facing the camera. Returns ------- list of perception.Image A list containing a corresponding Image sub-class for each type listed in render_modes. """ # Render raw images render_color = False for mode in render_modes: if mode != RenderMode.DEPTH and mode != RenderMode.SCALED_DEPTH: render_color = True break color_im, depth_im = None, None if render_color: color_im, depth_im = self.render(render_color, front_and_back=front_and_back) else: depth_im = self.render(render_color) # For each specified render mode, add an Image object of the appropriate type images = [] for render_mode in render_modes: # Then, convert them to an image wrapper class if render_mode == RenderMode.SEGMASK: images.append( BinaryImage((depth_im > 0.0).astype(np.uint8), frame=self.camera.intrinsics.frame, threshold=0)) elif render_mode == RenderMode.COLOR: images.append( ColorImage(color_im, frame=self.camera.intrinsics.frame)) elif render_mode == RenderMode.GRAYSCALE: images.append( ColorImage( color_im, frame=self.camera.intrinsics.frame).to_grayscale()) elif render_mode == RenderMode.DEPTH: images.append( DepthImage(depth_im, frame=self.camera.intrinsics.frame)) elif render_mode == RenderMode.SCALED_DEPTH: images.append( DepthImage(depth_im, frame=self.camera.intrinsics.frame).to_color()) elif render_mode == RenderMode.RGBD: c = ColorImage(color_im, frame=self.camera.intrinsics.frame) d = DepthImage(depth_im, frame=self.camera.intrinsics.frame) images.append(RgbdImage.from_color_and_depth(c, d)) elif render_mode == RenderMode.GD: g = ColorImage( color_im, frame=self.camera.intrinsics.frame).to_grayscale() d = DepthImage(depth_im, frame=self.camera.intrinsics.frame) images.append(GdImage.from_grayscale_and_depth(g, d)) else: raise ValueError( 'Render mode {} not supported'.format(render_mode)) return images
def wrapped_images(self, mesh, object_to_camera_poses, render_mode, stable_pose=None, mat_props=None, light_props=None,debug=False): """Create ObjectRender objects of the given mesh at the list of object to camera poses. Parameters ---------- mesh : :obj:`Mesh3D` The mesh to be rendered. object_to_camera_poses : :obj:`list` of :obj:`RigidTransform` A list of object to camera transforms to render from. render_mode : int One of RenderMode.COLOR, RenderMode.DEPTH, or RenderMode.SCALED_DEPTH. stable_pose : :obj:`StablePose` A stable pose to render the object in. mat_props : :obj:`MaterialProperties` Material properties for the mesh light_props : :obj:`MaterialProperties` Lighting properties for the scene debug : bool Whether or not to debug the C++ meshrendering code. Returns ------- :obj:`list` of :obj:`ObjectRender` A list of ObjectRender objects generated from the given parameters. """ # pre-multiply the stable pose world_to_camera_poses = [T_obj_camera.as_frames('obj', 'camera') for T_obj_camera in object_to_camera_poses] if stable_pose is not None: t_obj_stp = np.array([0,0,-stable_pose.r.dot(stable_pose.x0)[2]]) T_obj_stp = RigidTransform(rotation=stable_pose.r, translation=t_obj_stp, from_frame='obj', to_frame='stp') stp_to_camera_poses = copy.copy(object_to_camera_poses) object_to_camera_poses = [] for T_stp_camera in stp_to_camera_poses: T_stp_camera.from_frame = 'stp' object_to_camera_poses.append(T_stp_camera.dot(T_obj_stp)) # set lighting mode enable_lighting = True if render_mode == RenderMode.SEGMASK or render_mode == RenderMode.DEPTH or \ render_mode == RenderMode.DEPTH_SCENE: enable_lighting = False # render both image types (doesn't really cost any time) color_ims, depth_ims = self.images(mesh, object_to_camera_poses, mat_props=mat_props, light_props=light_props, enable_lighting=enable_lighting, debug=debug) # convert to image wrapper classes images = [] if render_mode == RenderMode.SEGMASK: # wrap binary images for binary_im in color_ims: images.append(BinaryImage(binary_im[:,:,0], frame=self._camera_intr.frame, threshold=0)) elif render_mode == RenderMode.COLOR: # wrap color images for color_im in color_ims: images.append(ColorImage(color_im, frame=self._camera_intr.frame)) elif render_mode == RenderMode.COLOR_SCENE: # wrap color and depth images for color_im in color_ims: images.append(ColorImage(color_im, frame=self._camera_intr.frame)) # render images of scene objects color_scene_ims = {} for name, scene_obj in self._scene.iteritems(): scene_object_to_camera_poses = [] for world_to_camera_pose in world_to_camera_poses: scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world) color_scene_ims[name] = self.wrapped_images(scene_obj.mesh, scene_object_to_camera_poses, RenderMode.COLOR, mat_props=scene_obj.mat_props, light_props=light_props, debug=debug) # combine with scene images # TODO: re-add farther for i in range(len(images)): for j, name in enumerate(color_scene_ims.keys()): zero_px = images[i].zero_pixels() images[i].data[zero_px[:,0], zero_px[:,1], :] = color_scene_ims[name][i].image.data[zero_px[:,0], zero_px[:,1], :] elif render_mode == RenderMode.DEPTH: # render depth image for depth_im in depth_ims: images.append(DepthImage(depth_im, frame=self._camera_intr.frame)) elif render_mode == RenderMode.DEPTH_SCENE: # create empty depth images for depth_im in depth_ims: images.append(DepthImage(depth_im, frame=self._camera_intr.frame)) # render images of scene objects depth_scene_ims = {} for name, scene_obj in self._scene.iteritems(): scene_object_to_camera_poses = [] for world_to_camera_pose in world_to_camera_poses: scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world) depth_scene_ims[name] = self.wrapped_images(scene_obj.mesh, scene_object_to_camera_poses, RenderMode.DEPTH, mat_props=scene_obj.mat_props, light_props=light_props) # combine with scene images for i in range(len(images)): for j, name in enumerate(depth_scene_ims.keys()): images[i] = images[i].combine_with(depth_scene_ims[name][i].image) elif render_mode == RenderMode.RGBD: # create RGB-D images for color_im, depth_im in zip(color_ims, depth_ims): c = ColorImage(color_im, frame=self._camera_intr.frame) d = DepthImage(depth_im, frame=self._camera_intr.frame) images.append(RgbdImage.from_color_and_depth(c, d)) elif render_mode == RenderMode.RGBD_SCENE: # create RGB-D images for color_im, depth_im in zip(color_ims, depth_ims): c = ColorImage(color_im, frame=self._camera_intr.frame) d = DepthImage(depth_im, frame=self._camera_intr.frame) images.append(RgbdImage.from_color_and_depth(c, d)) # render images of scene objects rgbd_scene_ims = {} for name, scene_obj in self._scene.iteritems(): scene_object_to_camera_poses = [] for world_to_camera_pose in world_to_camera_poses: scene_object_to_camera_poses.append(world_to_camera_pose * scene_obj.T_mesh_world) rgbd_scene_ims[name] = self.wrapped_images(scene_obj.mesh, scene_object_to_camera_poses, RenderMode.RGBD, mat_props=scene_obj.mat_props, light_props=light_props, debug=debug) # combine with scene images for i in range(len(images)): for j, name in enumerate(rgbd_scene_ims.keys()): images[i] = images[i].combine_with(rgbd_scene_ims[name][i].image) elif render_mode == RenderMode.SCALED_DEPTH: # convert to color image for depth_im in depth_ims: d = DepthImage(depth_im, frame=self._camera_intr.frame) images.append(d.to_color()) else: raise ValueError('Render mode %s not supported' %(render_mode)) # create object renders if stable_pose is not None: object_to_camera_poses = copy.copy(stp_to_camera_poses) rendered_images = [] for image, T_obj_camera in zip(images, object_to_camera_poses): T_camera_obj = T_obj_camera.inverse() rendered_images.append(ObjectRender(image, T_camera_obj)) return rendered_images
def plan_grasp(self, camera_data, n_candidates=1): """Grasp planner. Parameters --------- req: :obj:`ROS ServiceRequest` ROS `ServiceRequest` for grasp planner service. """ self._camera_data = camera_data # --- Inpaint images --- # color_im = camera_data.rgb_img.inpaint( rescale_factor=self.cfg["inpaint_rescale_factor"]) depth_im = camera_data.depth_img.inpaint( rescale_factor=self.cfg["inpaint_rescale_factor"]) # --- Init segmask --- # if camera_data.seg_img is None: segmask = BinaryImage(255 * np.ones(depth_im.shape).astype(np.uint8), frame=color_im.frame) else: segmask = camera_data.seg_mask # --- Aggregate color and depth images into a single # BerkeleyAutomation/perception `RgbdImage` --- # rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # --- Mask bounding box --- # if camera_data.bounding_box is not None: # Calc bb parameters. min_x = camera_data.bounding_box['min_x'] min_y = camera_data.bounding_box['min_y'] max_x = camera_data.bounding_box['max_x'] max_y = camera_data.bounding_box['max_y'] # Contain box to image->don't let it exceed image height/width # bounds. if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 if max_x > rgbd_im.width: max_x = rgbd_im.width if max_y > rgbd_im.height: max_y = rgbd_im.height # Mask. bb_segmask_arr = np.zeros([rgbd_im.height, rgbd_im.width]) bb_segmask_arr[min_y:max_y, min_x:max_x] = 255 bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8), segmask.frame) segmask = segmask.mask_binary(bb_segmask) # --- Create an `RgbdImageState` with the cropped `RgbdImage` and `CameraIntrinsics` --- # rgbd_state = RgbdImageState(rgbd_im, camera_data.intrinsic_params, segmask=segmask) # --- Execute policy --- # try: grasps_and_predictions = self.execute_policy(rgbd_state, self.grasping_policy, camera_data.intrinsic_params.frame, n_candidates) self._dexnet_gp = grasps_and_predictions # --- project planar grasps to 3D space --- # l = [] for gp in grasps_and_predictions: # my method pose_6d = self.transform_grasp_to_6D(gp[0], camera_data.intrinsic_params) pos = pose_6d[:3,3] rot = pose_6d[:3, :3] grasp_6D = Grasp6D(position=pos, rotation=rot, width=gp[0].width, score= gp[1], ref_frame=camera_data.intrinsic_params.frame) l.append(grasp_6D) # dexnet method --> needs autolab_core installed as catkin package # 6D_gp = gp[0].pose() self.grasp_poses = l self.best_grasp = l[0] self.visualize() return True except NoValidGraspsException: warnings.warn(("While executing policy found no valid grasps from sampled antipodal point pairs!")) return False
camera_intr = sensor.ir_intrinsics # read images color_im, depth_im, _ = sensor.frames() color_im = color_im.inpaint(rescale_factor=inpaint_rescale_factor) depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) # detect the objects detector = PointCloudBoxDetector() detections = detector.detect(color_im, depth_im, detection_config, camera_intr=camera_intr, T_camera_world=T_camera_world) detection = detections[0] # form RGB-D image state rgbd_im = RgbdImage.from_color_and_depth(detection.color_thumbnail, detection.depth_thumbnail) state = RgbdImageState(rgbd_im, detection.camera_intr, detection.binary_thumbnail) # init policy policy = CrossEntropyRobustGraspingPolicy(policy_config) policy_start = time.time() action = policy(state) logging.info('Planning took %.3f sec' %(time.time() - policy_start)) # vis final grasp if policy_config['vis']['final_grasp']: vis.figure(size=(10,10)) vis.subplot(1,2,1) vis.imshow(rgbd_im.color) vis.grasp(action.grasp, scale=1.5, show_center=False, show_axis=True)
def run_gqcnn(): global depth_im global color_im global segmask global policy global prev_q_value global prev_depth global toggle global t global x global y global z global best_angle global depth_raw global num_find_loc global state global loc_count global no_find global center global q_value global angle global no_valid_grasp_count global no_valid_move_y no_valid_grasp_count = 0 num_find_loc = 0 best_angle = 0 x = 0.0 y = 0.5 z = 0 time.sleep(5) while 1: try: print("GQCNN IS RUNNING") #depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) if "input_images" in policy_config["vis"] and policy_config["vis"][ "input_images"]: plt.pause(0.001) #pass # Create state. print("GQCNN IS RUNNING2") rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state_gqcnn = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Query policy. policy_start = time.time() try: action = policy(state_gqcnn) logger.info("Planning took %.3f sec" % (time.time() - policy_start)) no_valid_grasp_count = 0 except Exception as inst: print(inst) no_valid_grasp_count = no_valid_grasp_count + 1 time.sleep(0.3) print("GQCNN IS RUNNING3") if policy_config["vis"]["final_grasp"]: print("GQCNN IS RUNNING4") center[0] = action.grasp.center[0] center[1] = action.grasp.center[1] q_value = action.q_value angle = float(action.grasp.angle) * 180 / 3.141592 print("center : \t" + str(action.grasp.center)) print("angle : \t" + str(action.grasp.angle)) print("Depth : \t" + str(action.grasp.depth)) print("XYZ : \t" + str(x) + " , " + str(y) + " , " + str(z)) print("\n\n\n\n\n\n\nQ_value : \t" + str(action.q_value)) if (prev_q_value < action.q_value): prev_q_value = action.q_value prev_depth = action.grasp.depth best_center[0] = action.grasp.center[0] best_center[1] = action.grasp.center[1] best_angle = action.grasp.angle best_angle = float(best_angle) * 180 / 3.141592 print("gqcnn_best_center : \t" + str(x) + "," + str(y)) print("best_angle : \t" + str(best_angle)) print("\n\n\n\n\n\n\nbest_Q_value : \t" + str(action.q_value)) print("XYZ : \t" + str(x) + str(y) + str(z)) num_find_loc = num_find_loc + 1 if num_find_loc > 5: prev_q_value = 0 x = 0 y = 0 z = 0 num_find_loc = 0 best_angle = 0 best_center[0] = 0 best_center[1] = 0 time.sleep(0.01) except Exception as inst: print(inst) time.sleep(1) pass
def _plan_grasp(self, color_im, depth_im, camera_intr, bounding_box=None, segmask=None): """ Grasp planner request handler . Parameters --------- req: :obj:`ROS ServiceRequest` ROS ServiceRequest for grasp planner service """ rospy.loginfo('Planning Grasp') # inpaint images color_im = color_im.inpaint( rescale_factor=self.cfg['inpaint_rescale_factor']) depth_im = depth_im.inpaint( rescale_factor=self.cfg['inpaint_rescale_factor']) # init segmask if segmask is None: segmask = BinaryImage(255 * np.ones(depth_im.shape).astype(np.uint8), frame=color_im.frame) # visualize if self.cfg['vis']['color_image']: vis.imshow(color_im) vis.show() if self.cfg['vis']['depth_image']: vis.imshow(depth_im) vis.show() if self.cfg['vis']['segmask'] and segmask is not None: vis.imshow(segmask) vis.show() # aggregate color and depth images into a single perception rgbdimage rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # mask bounding box if bounding_box is not None: # calc bb parameters min_x = bounding_box.minX min_y = bounding_box.minY max_x = bounding_box.maxX max_y = bounding_box.maxY # contain box to image->don't let it exceed image height/width bounds if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 if max_x > rgbd_im.width: max_x = rgbd_im.width if max_y > rgbd_im.height: max_y = rgbd_im.height # mask bb_segmask_arr = np.zeros([rgbd_image.height, rgbd_image.width]) bb_segmask_arr[min_y:max_y, min_x:max_x] = 255 bb_segmask = BinaryImage(bb_segmask_arr.astype(np.uint8), segmask.frame) segmask = segmask.mask_binary(bb_segmask) # visualize if self.cfg['vis']['rgbd_state']: masked_rgbd_im = rgbd_im.mask_binary(segmask) vis.figure() vis.subplot(1, 2, 1) vis.imshow(masked_rgbd_im.color) vis.subplot(1, 2, 2) vis.imshow(masked_rgbd_im.depth) vis.show() # create an RGBDImageState with the cropped RGBDImage and CameraIntrinsics rgbd_state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # execute policy try: return self.execute_policy(rgbd_state, self.grasping_policy, self.grasp_pose_publisher, camera_intr.frame) except NoValidGraspsException: rospy.logerr( 'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!' ) raise rospy.ServiceException( 'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!' )
def plan_grasp(self, req): """ Grasp planner request handler . Parameters --------- req: :obj:`ROS ServiceRequest` ROS ServiceRequest for grasp planner service """ rospy.loginfo('Planning Grasp') # set min dimensions pad = max( math.ceil( np.sqrt(2) * (float(self.cfg['policy']['metric']['crop_width']) / 2)), math.ceil( np.sqrt(2) * (float(self.cfg['policy']['metric']['crop_height']) / 2))) min_width = 2 * pad + self.cfg['policy']['metric']['crop_width'] min_height = 2 * pad + self.cfg['policy']['metric']['crop_height'] # get the raw depth and color images as ROS Image objects raw_color = req.color_image raw_depth = req.depth_image segmask = None raw_segmask = req.segmask # get the raw camera info as ROS CameraInfo object raw_camera_info = req.camera_info # get the bounding box as a custom ROS BoundingBox msg bounding_box = req.bounding_box # wrap the camera info in a perception CameraIntrinsics object camera_intrinsics = CameraIntrinsics( raw_camera_info.header.frame_id, raw_camera_info.K[0], raw_camera_info.K[4], raw_camera_info.K[2], raw_camera_info.K[5], raw_camera_info.K[1], raw_camera_info.height, raw_camera_info.width) ### create wrapped Perception RGB and Depth Images by unpacking the ROS Images using CVBridge ### try: color_image = ColorImage(self.cv_bridge.imgmsg_to_cv2( raw_color, "rgb8"), frame=camera_intrinsics.frame) depth_image = DepthImage(self.cv_bridge.imgmsg_to_cv2( raw_depth, desired_encoding="passthrough"), frame=camera_intrinsics.frame) segmask = BinaryImage(self.cv_bridge.imgmsg_to_cv2( raw_segmask, desired_encoding="passthrough"), frame=camera_intrinsics.frame) except CvBridgeError as cv_bridge_exception: rospy.logerr(cv_bridge_exception) # check image sizes if color_image.height != depth_image.height or \ color_image.width != depth_image.width: rospy.logerr( 'Color image and depth image must be the same shape! Color is %d x %d but depth is %d x %d' % (color_image.height, color_image.width, depth_image.height, depth_image.width)) raise rospy.ServiceException( 'Color image and depth image must be the same shape! Color is %d x %d but depth is %d x %d' % (color_image.height, color_image.width, depth_image.height, depth_image.width)) if color_image.height < min_height or color_image.width < min_width: rospy.logerr( 'Color image is too small! Must be at least %d x %d resolution but the requested image is only %d x %d' % (min_height, min_width, color_image.height, color_image.width)) raise rospy.ServiceException( 'Color image is too small! Must be at least %d x %d resolution but the requested image is only %d x %d' % (min_height, min_width, color_image.height, color_image.width)) # inpaint images color_image = color_image.inpaint( rescale_factor=self.cfg['inpaint_rescale_factor']) depth_image = depth_image.inpaint( rescale_factor=self.cfg['inpaint_rescale_factor']) # visualize if self.cfg['vis']['color_image']: vis.imshow(color_image) vis.show() if self.cfg['vis']['depth_image']: vis.imshow(depth_image) vis.show() if self.cfg['vis']['segmask'] and segmask is not None: vis.imshow(segmask) vis.show() # aggregate color and depth images into a single perception rgbdimage rgbd_image = RgbdImage.from_color_and_depth(color_image, depth_image) # calc crop parameters minX = bounding_box.minX - pad minY = bounding_box.minY - pad maxX = bounding_box.maxX + pad maxY = bounding_box.maxY + pad # contain box to image->don't let it exceed image height/width bounds if minX < 0: minX = 0 if minY < 0: minY = 0 if maxX > rgbd_image.width: maxX = rgbd_image.width if maxY > rgbd_image.height: maxY = rgbd_image.height centroidX = (maxX + minX) / 2 centroidY = (maxY + minY) / 2 # compute width and height width = maxX - minX height = maxY - minY # crop camera intrinsics and rgbd image cropped_camera_intrinsics = camera_intrinsics.crop( height, width, centroidY, centroidX) cropped_rgbd_image = rgbd_image.crop(height, width, centroidY, centroidX) cropped_segmask = None if segmask is not None: cropped_segmask = segmask.crop(height, width, centroidY, centroidX) # visualize if self.cfg['vis']['cropped_rgbd_image']: vis.imshow(cropped_rgbd_image) vis.show() # create an RGBDImageState with the cropped RGBDImage and CameraIntrinsics image_state = RgbdImageState(cropped_rgbd_image, cropped_camera_intrinsics, segmask=cropped_segmask) # execute policy try: return self.execute_policy(image_state, self.grasping_policy, self.grasp_pose_publisher, cropped_camera_intrinsics.frame) except NoValidGraspsException: rospy.logerr( 'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!' ) raise rospy.ServiceException( 'While executing policy found no valid grasps from sampled antipodal point pairs. Aborting Policy!' )
def sample_grasp(self, env, mesh_obj, vis=True): # Setup sensor. #camera_intr = CameraIntrinsics.load(camera_intr_filename) # Read images. # get depth image from camera inpaint_rescale_factor = 1.0 segmask_filename = None _, center, extents, obj_xquat, bbox_xpos = self.get_bbox_properties( env, mesh_obj) camera_pos = copy.deepcopy(center) camera_pos[2] += 0.5 * extents[2] + 0.2 env.set_camera(camera_pos, np.array([0.7071068, 0, 0, -0.7071068]), camera_name=f"ext_camera_0") rgb, depth = env.render_from_camera(int(self.config.camera_img_height), int(self.config.camera_img_width), camera_name=f"ext_camera_0") # enforce zoom out from scipy.interpolate import interp2d center_x = self.config.camera_img_height / 2 + 1 center_y = self.config.camera_img_width / 2 + 1 img_height = self.config.camera_img_height img_width = self.config.camera_img_width xdense = np.linspace(0, img_height - 1, img_height) ydense = np.linspace(0, img_width - 1, img_width) xintr = (xdense - center_x) * (1.0 / self.rescale_factor) + center_x yintr = (ydense - center_y) * (1.0 / self.rescale_factor) + center_y xintr[xintr < 0] = 0 xintr[xintr > (img_height - 1)] = img_height - 1 yintr[yintr < 0] = 0 yintr[yintr > (img_width - 1)] = img_width - 1 fr = interp2d(xdense, ydense, rgb[:, :, 0], kind="linear") rgb_r_new = fr(xintr, yintr) fg = interp2d(xdense, ydense, rgb[:, :, 1], kind="linear") rgb_g_new = fg(xintr, yintr) fb = interp2d(xdense, ydense, rgb[:, :, 2], kind="linear") rgb_b_new = fb(xintr, yintr) rgb_new = np.stack([rgb_r_new, rgb_g_new, rgb_b_new], axis=2) fd = interp2d(xdense, ydense, depth, kind="linear") depth_new = fd(xintr, yintr) #from skimage.transform import resize #rgb22, depth2 = env.render_from_camera(int(self.config.camera_img_height) , int(self.config.camera_img_width), camera_name=f"ext_camera_0") #import ipdb; ipdb.set_trace() # visualize the interpolation #import imageio #imageio.imwrite(f"tmp/rgb_{self.iter_id}.png", rgb) #imageio.imwrite(f"tmp/rgb2_{self.iter_id}.png", rgb_new) #imageio.imwrite(f"tmp/depth_{self.iter_id}.png", depth) #imageio.imwrite(f"tmp/depth2_{self.iter_id}.png", depth_new) #import ipdb; ipdb.set_trace() rgb = rgb_new depth = depth_new depth = depth * self.rescale_factor # rgb: 128 x 128 x 1 # depth: 128 x 128 x 1 scaled_camera_fov_y = self.config.camera_fov_y aspect = 1 scaled_fovx = 2 * np.arctan( np.tan(np.deg2rad(scaled_camera_fov_y) * 0.5) * aspect) scaled_fovx = np.rad2deg(scaled_fovx) scaled_fovy = scaled_camera_fov_y cx = self.config.camera_img_width * 0.5 cy = self.config.camera_img_height * 0.5 scaled_fx = cx / np.tan(np.deg2rad( scaled_fovx / 2.)) * (self.rescale_factor) scaled_fy = cy / np.tan(np.deg2rad( scaled_fovy / 2.)) * (self.rescale_factor) camera_intr = CameraIntrinsics(frame='phoxi', fx=scaled_fx, fy=scaled_fy, cx=self.config.camera_img_width * 0.5, cy=self.config.camera_img_height * 0.5, height=self.config.camera_img_height, width=self.config.camera_img_width) depth_im = DepthImage(depth, frame=camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=camera_intr.frame) # Optionally read a segmask. valid_px_mask = depth_im.invalid_pixel_mask().inverse() segmask = valid_px_mask # Inpaint. depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) # Create state. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Query policy. policy_start = time.time() action = self.policy(state) print("Planning took %.3f sec" % (time.time() - policy_start)) # numpy array with 2 values grasp_center = action.grasp.center._data[:, 0] #(width, depth) grasp_depth = action.grasp.depth * (1 / self.rescale_factor) grasp_angle = action.grasp.angle #np.pi*0.3 if self.config.data_collection_mode: self.current_grasp = action.grasp depth_im = state.rgbd_im.depth scale = 1.0 depth_im_scaled = depth_im.resize(scale) translation = scale * np.array([ depth_im.center[0] - grasp_center[1], depth_im.center[1] - grasp_center[0] ]) im_tf = depth_im_scaled im_tf = depth_im_scaled.transform(translation, grasp_angle) im_tf = im_tf.crop(self.gqcnn_image_size, self.gqcnn_image_size) # get the patch self.current_patch = im_tf.raw_data XYZ_origin, gripper_quat = self.compute_grasp_pts_from_grasp_sample( grasp_center, grasp_depth, grasp_angle, env) return XYZ_origin[:, 0], gripper_quat # Vis final grasp. if vis: from visualization import Visualizer2D as vis vis.figure(size=(10, 10)) vis.imshow(rgbd_im.depth, vmin=self.policy_config["vis"]["vmin"], vmax=self.policy_config["vis"]["vmax"]) vis.grasp(action.grasp, scale=2.5, show_center=False, show_axis=True) vis.title("Planned grasp at depth {0:.3f}m with Q={1:.3f}".format( action.grasp.depth, action.q_value)) vis.show(f"tmp/grasp2_{mesh_obj.name}_{self.iter_id}.png") vis.figure(size=(10, 10)) vis.imshow(im_tf, vmin=self.policy_config["vis"]["vmin"], vmax=self.policy_config["vis"]["vmax"]) vis.show(f"tmp/cropd_{mesh_obj.name}_{self.iter_id}.png") import ipdb ipdb.set_trace() return XYZ_origin[:, 0], gripper_quat import ipdb ipdb.set_trace()
def visualize(self): """ Visualize predictions """ logging.info('Visualizing ' + self.datapoint_type) # iterate through shuffled file indices for i in self.indices: im_filename = self.im_filenames[i] pose_filename = self.pose_filenames[i] label_filename = self.label_filenames[i] logging.info('Loading Image File: ' + im_filename + ' Pose File: ' + pose_filename + ' Label File: ' + label_filename) # load tensors from files metric_tensor = np.load(os.path.join(self.data_dir, label_filename))['arr_0'] label_tensor = 1 * (metric_tensor > self.metric_thresh) image_tensor = np.load(os.path.join(self.data_dir, im_filename))['arr_0'] hand_poses_tensor = np.load( os.path.join(self.data_dir, pose_filename))['arr_0'] pose_tensor = self._read_pose_data(hand_poses_tensor, self.input_data_mode) # score with neural network pred_p_success_tensor = self._gqcnn.predict( image_tensor, pose_tensor) # compute results classification_result = ClassificationResult( [pred_p_success_tensor], [label_tensor]) logging.info('Error rate on files: %.3f' % (classification_result.error_rate)) logging.info('Precision on files: %.3f' % (classification_result.precision)) logging.info('Recall on files: %.3f' % (classification_result.recall)) mispred_ind = classification_result.mispredicted_indices() correct_ind = classification_result.correct_indices() # IPython.embed() if self.datapoint_type == 'true_positive' or self.datapoint_type == 'true_negative': vis_ind = correct_ind else: vis_ind = mispred_ind num_visualized = 0 # visualize for ind in vis_ind: # limit the number of sampled datapoints displayed per object if num_visualized >= self.samples_per_object: break num_visualized += 1 # don't visualize the datapoints that we don't want if self.datapoint_type == 'true_positive': if classification_result.labels[ind] == 0: continue elif self.datapoint_type == 'true_negative': if classification_result.labels[ind] == 1: continue elif self.datapoint_type == 'false_positive': if classification_result.labels[ind] == 0: continue elif self.datapoint_type == 'false_negative': if classification_result.labels[ind] == 1: continue logging.info('Datapoint %d of files for %s' % (ind, im_filename)) logging.info('Depth: %.3f' % (hand_poses_tensor[ind, 2])) data = image_tensor[ind, ...] if self.display_image_type == RenderMode.SEGMASK: image = BinaryImage(data) elif self.display_image_type == RenderMode.GRAYSCALE: image = GrayscaleImage(data) elif self.display_image_type == RenderMode.COLOR: image = ColorImage(data) elif self.display_image_type == RenderMode.DEPTH: image = DepthImage(data) elif self.display_image_type == RenderMode.RGBD: image = RgbdImage(data) elif self.display_image_type == RenderMode.GD: image = GdImage(data) vis2d.figure() if self.display_image_type == RenderMode.RGBD: vis2d.subplot(1, 2, 1) vis2d.imshow(image.color) grasp = Grasp2D(Point(image.center, 'img'), 0, hand_poses_tensor[ind, 2], self.gripper_width_m) grasp.camera_intr = grasp.camera_intr.resize(1.0 / 3.0) vis2d.grasp(grasp) vis2d.subplot(1, 2, 2) vis2d.imshow(image.depth) vis2d.grasp(grasp) elif self.display_image_type == RenderMode.GD: vis2d.subplot(1, 2, 1) vis2d.imshow(image.gray) grasp = Grasp2D(Point(image.center, 'img'), 0, hand_poses_tensor[ind, 2], self.gripper_width_m) grasp.camera_intr = grasp.camera_intr.resize(1.0 / 3.0) vis2d.grasp(grasp) vis2d.subplot(1, 2, 2) vis2d.imshow(image.depth) vis2d.grasp(grasp) else: vis2d.imshow(image) grasp = Grasp2D(Point(image.center, 'img'), 0, hand_poses_tensor[ind, 2], self.gripper_width_m) grasp.camera_intr = grasp.camera_intr.resize(1.0 / 3.0) vis2d.grasp(grasp) vis2d.title('Datapoint %d: Pred: %.3f Label: %.3f' % (ind, classification_result.pred_probs[ind, 1], classification_result.labels[ind])) vis2d.show() # cleanup self._cleanup()
def run_gqcnn(): global depth_im global color_im global segmask global policy global prev_q_value global prev_depth global toggle global t global x global y global z global best_angle global depth_raw global num_find_loc global state global loc_count global no_find global center global q_value global angle global no_valid_grasp_count global no_valid_move_y global depth_frame global frames no_valid_grasp_count = 0 time.sleep(3) depth_frame = frames.get_depth_frame() depth_intrin = depth_frame.profile.as_video_stream_profile().intrinsics best_angle = 0 x = 0.0 y = 0.5 action = 0 num_find_loc = 0 while 1: try: #depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) if "input_images" in policy_config["vis"] and policy_config["vis"][ "input_images"]: plt.pause(0.001) #pass # Create state. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state_gqcnn = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Query policy. policy_start = time.time() try: action = policy(state_gqcnn) logger.info("Planning took %.3f sec" % (time.time() - policy_start)) no_valid_grasp_count = 0 except Exception as inst: print(inst) no_valid_grasp_count = no_valid_grasp_count + 1 time.sleep(0.3) if policy_config["vis"]["final_grasp"]: center[0] = action.grasp.center[0] center[1] = action.grasp.center[1] depth = depth_frame.get_distance(int(center[0]), int(center[1])) #depth_point = rs.rs2_deproject_pixel_to_point(depth_intrin, [int(center[0]), int(center[1])], depth) #print(depth_point) q_value = action.q_value angle = float(action.grasp.angle) * 180 / 3.141592 x, y, z = convert_depth_frame_to_pointcloud( int(center[0]), int(center[1]), depth) print("center : \t" + str(action.grasp.center)) print("angle : \t" + str(action.grasp.angle)) print("Depth : \t" + str(action.grasp.depth)) print("\n\n\n\n\n\n\nQ_value : \t" + str(action.q_value)) if (prev_q_value < action.q_value): prev_q_value = action.q_value prev_depth = action.grasp.depth best_center[0] = action.grasp.center[0] best_center[1] = action.grasp.center[1] depth = depth_frame.get_distance(int(best_center[0]), int(best_center[1])) #depth_point = rs.rs2_deproject_pixel_to_point(depth_intrin, [int(best_center[0]), int(best_center[1])], depth) #print(depth_point) #convert_data_ = convert_2d_2d(int(best_center[0]),int(best_center[1])) #convert_data=convert_2d_3d(int(convert_data_[0]),int(convert_data_[1])) x, y, z = convert_depth_frame_to_pointcloud( int(best_center[0]), int(best_center[1]), depth) best_angle = action.grasp.angle best_angle = float(best_angle) * 180 / 3.141592 print("gqcnn_best_center : \t" + str(x) + "," + str(y)) print("best_angle : \t" + str(best_angle)) print("\n\n\n\n\n\n\nbest_Q_value : \t" + str(action.q_value)) print("XYZ : \t" + str(x) + "\t" + str(y) + "\t" + str(z)) num_find_loc = num_find_loc + 1 if num_find_loc > 5: prev_q_value = 0 x = 0 y = 0 z = 0 num_find_loc = 0 best_angle = 0 best_center[0] = 0 best_center[1] = 0 time.sleep(0.01) except Exception as inst: print(inst) time.sleep(1) pass
if "input_images" in policy_config["vis"] and policy_config["vis"][ "input_images"]: vis.figure(size=(10, 10)) num_plot = 1 if segmask is not None: num_plot = 2 vis.subplot(1, num_plot, 1) vis.imshow(depth_im) if segmask is not None: vis.subplot(1, num_plot, 2) vis.imshow(segmask) vis.show() # Create state. rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Set input sizes for fully-convolutional policy. if fully_conv: policy_config["metric"]["fully_conv_gqcnn_config"][ "im_height"] = depth_im.shape[0] policy_config["metric"]["fully_conv_gqcnn_config"][ "im_width"] = depth_im.shape[1] # Init policy. if fully_conv: # TODO(vsatish): We should really be doing this in some factory policy. if policy_config["type"] == "fully_conv_suction": policy = FullyConvolutionalGraspingPolicySuction(policy_config) elif policy_config["type"] == "fully_conv_pj":
def run_gqcnn(): global depth_im global color_im global segmask global policy global im3 global ax3 global prev_q_value global prev_depth global toggle global t global x global y global z global best_angle global depth_raw global im1 global ax1 global num_find_loc global state global loc_count global no_find global center global q_value global angle global no_valid_grasp_count global no_valid_move_y no_valid_grasp_count = 0 best_angle = 0 x = 0.0 y = 0.5 while 1: try: print("GQCNN IS RUNNING") #depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) if "input_images" in policy_config["vis"] and policy_config["vis"][ "input_images"]: im1.set_data(depth_im) plt.pause(0.001) #pass # Create state. print("GQCNN IS RUNNING2") rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) state_gqcnn = RgbdImageState(rgbd_im, camera_intr, segmask=segmask) # Query policy. policy_start = time.time() try: action = policy(state_gqcnn) logger.info("Planning took %.3f sec" % (time.time() - policy_start)) no_valid_grasp_count = 0 except: no_valid_grasp_count = no_valid_grasp_count + 1 time.sleep(0.3) # Vis final grasp. if policy_config["vis"]["final_grasp"]: # vis.imshow(segmask, # vmin=policy_config["vis"]["vmin"], # vmax=policy_config["vis"]["vmax"]) #im3.set_data(rgbd_im.depth) center[0] = action.grasp.center[0] + 96 center[1] = action.grasp.center[1] + 96 q_value = action.q_value angle = float(action.grasp.angle) * 180 / 3.141592 print("center : \t" + str(action.grasp.center)) print("angle : \t" + str(action.grasp.angle)) print("Depth : \t" + str(action.grasp.depth)) if (prev_q_value < action.q_value): #vis.grasp(action.grasp, scale=1, show_center=True, show_axis=True) #vis.title("Planned grasp at depth {0:.3f}m with Q={1:.3f}".format( # action.grasp.depth, action.q_value)) prev_q_value = action.q_value prev_depth = action.grasp.depth best_center[0] = action.grasp.center[0] + 96 best_center[1] = action.grasp.center[1] + 96 convert_data = convert_2d_3d(int(best_center[0]), int(best_center[1])) x = -1 * convert_data[0] / 1000 + no_valid_move_y y = (-1 * convert_data[1] / 1000) + 0.41 z = convert_data[2] / 1000 # x=-(best_center[0]-592)*0.00065625 +0.00#592 # y=-(best_center[1]-361)*0.000673611+0.46 #361 best_angle = action.grasp.angle best_angle = float(best_angle) * 180 / 3.141592 print("gqcnn_best_center : \t" + str(x) + "," + str(y)) print("best_angle : \t" + str(best_angle)) print("Q_value : \t" + str(action.q_value)) num_find_loc = num_find_loc + 1 # vis.show() #plt.show() time.sleep(0.3) #plt.close() except: time.sleep(1) pass