def get_segmented_point_cloud_world(self, cfg, point_cloud_world): # read params min_pt_box = np.array(cfg['min_pt']) max_pt_box = np.array(cfg['max_pt']) box = Box(min_pt_box, max_pt_box, self.WORLD_FRAME) print min_pt_box print max_pt_box ch, num = point_cloud_world.data.shape print num A = point_cloud_world.data count = 0 for i in range(num): if min_pt_box[0] < A[0][i] < max_pt_box[0] and \ min_pt_box[1] < A[1][i] < max_pt_box[1] and \ min_pt_box[2] < A[2][i] < max_pt_box[2]: count += 1 print count seg_point_cloud_world, _ = point_cloud_world.box_mask(box) #seg_point_cloud_world = point_cloud_world return seg_point_cloud_world
save_raw = config['save_raw'] vis = config['vis'] # open gui gui = plt.figure(0, figsize=(8,8)) plt.ion() plt.title('INITIALIZING') plt.imshow(np.zeros([100,100]), cmap=plt.cm.gray_r) plt.axis('off') plt.draw() plt.pause(GUI_PAUSE) # read workspace bounds workspace_box = Box(np.array(workspace_config['min_pt']), np.array(workspace_config['max_pt']), frame='world') # read workspace objects workspace_objects = {} for obj_key, obj_config in workspace_config['objects'].iteritems(): mesh_filename = obj_config['mesh_filename'] pose_filename = obj_config['pose_filename'] obj_mesh = trimesh.load_mesh(mesh_filename) obj_pose = RigidTransform.load(pose_filename) obj_mat_props = MaterialProperties(smooth=True, wireframe=False) scene_obj = SceneObject(obj_mesh, obj_pose, obj_mat_props) workspace_objects[obj_key] = scene_obj # setup each sensor
def detect(self, color_im, depth_im, cfg, camera_intr, T_camera_world, vis_foreground=False, vis_segmentation=False): """ Detects all relevant objects in an rgbd image pair using foreground masking. Parameters ---------- color_im : :obj:`ColorImage` color image for detection depth_im : :obj:`DepthImage` depth image for detection (corresponds to color image) cfg : :obj:`YamlConfig` parameters of detection function camera_intr : :obj:`CameraIntrinsics` intrinsics of the camera T_camera_world : :obj:`autolab_core.RigidTransform` registration of the camera to world frame Returns ------ :obj:`list` of :obj:`RgbdDetection` all detections in the image """ # read params min_pt_box = np.array(cfg['min_pt']) max_pt_box = np.array(cfg['max_pt']) min_contour_area = cfg['min_contour_area'] max_contour_area = cfg['max_contour_area'] min_box_area = cfg['min_box_area'] max_box_area = cfg['max_box_area'] box_padding_px = cfg['box_padding_px'] crop_height = cfg['image_height'] crop_width = cfg['image_width'] depth_grad_thresh = cfg['depth_grad_thresh'] point_cloud_mask_only = cfg['point_cloud_mask_only'] w = cfg['filter_dim'] half_crop_height = float(crop_height) / 2 half_crop_width = float(crop_width) / 2 half_crop_dims = np.array([half_crop_height, half_crop_width]) fill_depth = np.max(depth_im.data[depth_im.data > 0]) kinect2_denoising = False if 'kinect2_denoising' in cfg.keys() and cfg['kinect2_denoising']: kinect2_denoising = True depth_offset = cfg['kinect2_noise_offset'] max_depth = cfg['kinect2_noise_max_depth'] box = Box(min_pt_box, max_pt_box, 'world') # project into 3D point_cloud_cam = camera_intr.deproject(depth_im) point_cloud_world = T_camera_world * point_cloud_cam seg_point_cloud_world, _ = point_cloud_world.box_mask(box) seg_point_cloud_cam = T_camera_world.inverse() * seg_point_cloud_world depth_im_seg = camera_intr.project_to_image(seg_point_cloud_cam) # mask image using background detection bgmodel = color_im.background_model() binary_im = depth_im_seg.to_binary() # filter the image y, x = np.ogrid[-w/2+1:w/2+1, -w/2+1:w/2+1] mask = x*x + y*y <= w/2*w/2 filter_struct = np.zeros([w,w]).astype(np.uint8) filter_struct[mask] = 1 binary_im_filtered_data = snm.binary_dilation(binary_im.data, structure=filter_struct) binary_im_filtered = BinaryImage(binary_im_filtered_data.astype(np.uint8), frame=binary_im.frame, threshold=0) # find all contours contours = binary_im_filtered.find_contours(min_area=min_contour_area, max_area=max_contour_area) if vis_foreground: plt.figure() plt.subplot(1,3,1) plt.imshow(color_im.data) plt.axis('off') plt.subplot(1,3,2) plt.imshow(binary_im.data, cmap=plt.cm.gray) plt.axis('off') plt.subplot(1,3,3) plt.imshow(binary_im_filtered.data, cmap=plt.cm.gray) plt.axis('off') plt.show() # switch to just return the mean of nonzero_px if point_cloud_mask_only == 1: center_px = np.mean(binary_im_filtered.nonzero_pixels(), axis=0) ci = center_px[0] cj = center_px[1] binary_thumbnail = binary_im_filtered.crop(crop_height, crop_width, ci, cj) color_thumbnail = color_im.crop(crop_height, crop_width, ci, cj) depth_thumbnail = depth_im.crop(crop_height, crop_width, ci, cj) thumbnail_intr = camera_intr if camera_intr is not None: thumbnail_intr = camera_intr.crop(crop_height, crop_width, ci, cj) query_box = Box(center_px - half_crop_dims, center_px + half_crop_dims) return [RgbdDetection(color_thumbnail, depth_thumbnail, query_box, binary_thumbnail=binary_thumbnail, contour=None, camera_intr=thumbnail_intr)] # convert contours to detections detections = [] for i, contour in enumerate(contours): orig_box = contour.bounding_box logging.debug('Orig box %d area: %.3f' %(i, orig_box.area)) if orig_box.area > min_box_area and orig_box.area < max_box_area: # convert orig bounding box to query bounding box min_pt = orig_box.center - half_crop_dims max_pt = orig_box.center + half_crop_dims query_box = Box(min_pt, max_pt, frame=orig_box.frame) # segment color to get refined detection contour_mask = binary_im_filtered.contour_mask(contour) binary_thumbnail = contour_mask.crop(query_box.height, query_box.width, query_box.ci, query_box.cj) else: # otherwise take original bounding box query_box = Box(contour.bounding_box.min_pt - box_padding_px, contour.bounding_box.max_pt + box_padding_px, frame = contour.bounding_box.frame) binary_thumbnail = binary_im_filtered.crop(query_box.height, query_box.width, query_box.ci, query_box.cj) # crop to get thumbnails color_thumbnail = color_im.crop(query_box.height, query_box.width, query_box.ci, query_box.cj) depth_thumbnail = depth_im.crop(query_box.height, query_box.width, query_box.ci, query_box.cj) thumbnail_intr = camera_intr if camera_intr is not None: thumbnail_intr = camera_intr.crop(query_box.height, query_box.width, query_box.ci, query_box.cj) # fix depth thumbnail depth_thumbnail = depth_thumbnail.replace_zeros(fill_depth) if kinect2_denoising: depth_data = depth_thumbnail.data min_depth = np.min(depth_data) binary_mask_data = binary_thumbnail.data depth_mask_data = depth_thumbnail.mask_binary(binary_thumbnail).data depth_mask_data += depth_offset depth_data[binary_mask_data > 0] = depth_mask_data[binary_mask_data > 0] depth_thumbnail = DepthImage(depth_data, depth_thumbnail.frame) # append to detections detections.append(RgbdDetection(color_thumbnail, depth_thumbnail, query_box, binary_thumbnail=binary_thumbnail, contour=contour, camera_intr=thumbnail_intr)) return detections
def detect(self, color_im, depth_im, cfg, camera_intr=None, T_camera_world=None, vis_foreground=False, vis_segmentation=False): """ Detects all relevant objects in an rgbd image pair using foreground masking. Parameters ---------- color_im : :obj:`ColorImage` color image for detection depth_im : :obj:`DepthImage` depth image for detection (corresponds to color image) cfg : :obj:`YamlConfig` parameters of detection function camera_intr : :obj:`CameraIntrinsics` intrinsics of the camera T_camera_world : :obj:`autolab_core.RigidTransform` registration of the camera to world frame Returns ------ :obj:`list` of :obj:`RgbdDetection` all detections in the image """ # read params foreground_mask_tolerance = cfg['foreground_mask_tolerance'] min_contour_area = cfg['min_contour_area'] max_contour_area = cfg['max_contour_area'] min_box_area = cfg['min_box_area'] max_box_area = cfg['max_box_area'] box_padding_px = cfg['box_padding_px'] crop_height = cfg['image_height'] crop_width = cfg['image_width'] depth_grad_thresh = cfg['depth_grad_thresh'] w = cfg['filter_dim'] half_crop_height = float(crop_height) / 2 half_crop_width = float(crop_width) / 2 half_crop_dims = np.array([half_crop_height, half_crop_width]) fill_depth = np.max(depth_im.data[depth_im.data > 0]) kinect2_denoising = False if 'kinect2_denoising' in cfg.keys() and cfg['kinect2_denoising']: kinect2_denoising = True depth_offset = cfg['kinect2_noise_offset'] max_depth = cfg['kinect2_noise_max_depth'] # mask image using background detection bgmodel = color_im.background_model() binary_im = color_im.foreground_mask(foreground_mask_tolerance, bgmodel=bgmodel) # filter the image y, x = np.ogrid[-w/2+1:w/2+1, -w/2+1:w/2+1] mask = x*x + y*y <= w/2*w/2 filter_struct = np.zeros([w,w]).astype(np.uint8) filter_struct[mask] = 1 binary_im_filtered = binary_im.apply(snm.grey_closing, structure=filter_struct) # find all contours contours = binary_im_filtered.find_contours(min_area=min_contour_area, max_area=max_contour_area) if vis_foreground: plt.figure() plt.subplot(1,2,1) plt.imshow(color_im.data) plt.axis('off') plt.subplot(1,2,2) plt.imshow(binary_im_filtered.data, cmap=plt.cm.gray) plt.axis('off') plt.show() # threshold gradients of depth depth_im = depth_im.threshold_gradients(depth_grad_thresh) # convert contours to detections detections = [] for contour in contours: orig_box = contour.bounding_box if orig_box.area > min_box_area and orig_box.area < max_box_area: # convert orig bounding box to query bounding box min_pt = orig_box.center - half_crop_dims max_pt = orig_box.center + half_crop_dims query_box = Box(min_pt, max_pt, frame=orig_box.frame) # segment color to get refined detection color_thumbnail = color_im.crop(query_box.height, query_box.width, query_box.ci, query_box.cj) binary_thumbnail, segment_thumbnail, query_box = self._segment_color(color_thumbnail, query_box, bgmodel, cfg, vis_segmentation=vis_segmentation) if binary_thumbnail is None: continue else: # otherwise take original bounding box query_box = Box(contour.bounding_box.min_pt - box_padding_px, contour.bounding_box.max_pt + box_padding_px, frame = contour.bounding_box.frame) binary_thumbnail = binary_im_filtered.crop(query_box.height, query_box.width, query_box.ci, query_box.cj) # crop to get thumbnails color_thumbnail = color_im.crop(query_box.height, query_box.width, query_box.ci, query_box.cj) depth_thumbnail = depth_im.crop(query_box.height, query_box.width, query_box.ci, query_box.cj) thumbnail_intr = camera_intr if camera_intr is not None: thumbnail_intr = camera_intr.crop(query_box.height, query_box.width, query_box.ci, query_box.cj) # fix depth thumbnail depth_thumbnail = depth_thumbnail.replace_zeros(fill_depth) if kinect2_denoising: depth_data = depth_thumbnail.data min_depth = np.min(depth_data) binary_mask_data = binary_thumbnail.data depth_mask_data = depth_thumbnail.mask_binary(binary_thumbnail).data depth_mask_data += depth_offset depth_data[binary_mask_data > 0] = depth_mask_data[binary_mask_data > 0] depth_thumbnail = DepthImage(depth_data, depth_thumbnail.frame) # append to detections detections.append(RgbdDetection(color_thumbnail, depth_thumbnail, query_box, binary_thumbnail=binary_thumbnail, contour=contour, camera_intr=thumbnail_intr)) return detections
def _segment_color(self, color_im, bounding_box, bgmodel, cfg, vis_segmentation=False): """ Re-segments a color image to isolate an object of interest using foreground masking and kmeans """ # read params foreground_mask_tolerance = cfg['foreground_mask_tolerance'] color_seg_rgb_weight = cfg['color_seg_rgb_weight'] color_seg_num_clusters = cfg['color_seg_num_clusters'] color_seg_hsv_weight = cfg['color_seg_hsv_weight'] color_seg_dist_pctile = cfg['color_seg_dist_pctile'] color_seg_dist_thresh = cfg['color_seg_dist_thresh'] color_seg_min_bg_dist = cfg['color_seg_min_bg_dist'] min_contour_area= cfg['min_contour_area'] contour_dist_thresh = cfg['contour_dist_thresh'] # foreground masking binary_im = color_im.foreground_mask(foreground_mask_tolerance, bgmodel=bgmodel) binary_im = binary_im.prune_contours(area_thresh=min_contour_area, dist_thresh=contour_dist_thresh) if binary_im is None: return None, None, None color_im = color_im.mask_binary(binary_im) # kmeans segmentation segment_im = color_im.segment_kmeans(color_seg_rgb_weight, color_seg_num_clusters, hue_weight=color_seg_hsv_weight) # keep the segment that is farthest from the background bg_dists = [] hsv_bgmodel = 255 * np.array(colorsys.rgb_to_hsv(float(bgmodel[0]) / 255, float(bgmodel[1]) / 255, float(bgmodel[2]) / 255)) hsv_bgmodel = np.r_[color_seg_rgb_weight * np.array(bgmodel), color_seg_hsv_weight * hsv_bgmodel[:1]] for k in range(segment_im.num_segments-1): seg_mask = segment_im.segment_mask(k) color_im_segment = color_im.mask_binary(seg_mask) color_im_segment_data = color_im_segment.nonzero_data() color_im_segment_data = np.c_[color_seg_rgb_weight * color_im_segment_data, color_seg_hsv_weight * color_im_segment.nonzero_hsv_data()[:,:1]] # take the median distance from the background bg_dist = np.median(np.linalg.norm(color_im_segment_data - hsv_bgmodel, axis=1)) if vis_segmentation: logging.info('BG Dist for segment %d: %.4f' %(k, bg_dist)) bg_dists.append(bg_dist) # sort by distance dists_and_indices = zip(np.arange(len(bg_dists)), bg_dists) dists_and_indices.sort(key = lambda x: x[1], reverse=True) # mask out the segment in the binary image if color_seg_num_clusters > 1 and abs(dists_and_indices[0][1] - dists_and_indices[1][1]) > color_seg_dist_thresh and dists_and_indices[1][1] < color_seg_min_bg_dist: obj_segment = dists_and_indices[0][0] obj_seg_mask = segment_im.segment_mask(obj_segment) binary_im = binary_im.mask_binary(obj_seg_mask) binary_im, diff_px = binary_im.center_nonzero() bounding_box = Box(bounding_box.min_pt.astype(np.float32) - diff_px, bounding_box.max_pt.astype(np.float32) - diff_px, bounding_box.frame) if vis_segmentation: plt.figure() plt.subplot(1,3,1) plt.imshow(color_im.data) plt.axis('off') plt.subplot(1,3,2) plt.imshow(segment_im.data) plt.colorbar() plt.axis('off') plt.subplot(1,3,3) plt.imshow(binary_im.data, cmap=plt.cm.gray) plt.axis('off') plt.show() return binary_im, segment_im, bounding_box
save_raw = config["save_raw"] vis = config["vis"] # open gui gui = plt.figure(0, figsize=(8, 8)) plt.ion() plt.title("INITIALIZING") plt.imshow(np.zeros([100, 100]), cmap=plt.cm.gray_r) plt.axis("off") plt.draw() plt.pause(GUI_PAUSE) # read workspace bounds workspace_box = Box( np.array(workspace_config["min_pt"]), np.array(workspace_config["max_pt"]), frame="world", ) # read workspace objects workspace_objects = {} for obj_key, obj_config in workspace_config["objects"].iteritems(): mesh_filename = obj_config["mesh_filename"] pose_filename = obj_config["pose_filename"] obj_mesh = trimesh.load_mesh(mesh_filename) obj_pose = RigidTransform.load(pose_filename) obj_mat_props = MaterialProperties(smooth=True, wireframe=False) scene_obj = SceneObject(obj_mesh, obj_pose, obj_mat_props) workspace_objects[obj_key] = scene_obj # setup each sensor
vis = config['vis'] # make output dir if needed if not os.path.exists(output_dir): os.makedirs(output_dir) # read rescale factor rescale_factor = 1.0 if 'rescale_factor' in config.keys(): rescale_factor = config['rescale_factor'] # read workspace bounds workspace = None if 'workspace' in config.keys(): workspace = Box(np.array(config['workspace']['min_pt']), np.array(config['workspace']['max_pt']), frame='world') # init ros node rospy.init_node('capture_test_images') #NOTE: this is required by the camera sensor classes Logger.reconfigure_root() for sensor_name, sensor_config in config['sensors'].iteritems(): logger.info('Capturing images from sensor %s' %(sensor_name)) save_dir = os.path.join(output_dir, sensor_name) if not os.path.exists(save_dir): os.mkdir(save_dir) # read params sensor_type = sensor_config['type'] sensor_frame = sensor_config['frame']
# make output dir if needed if not os.path.exists(output_dir): os.makedirs(output_dir) # read rescale factor rescale_factor = 1.0 if "rescale_factor" in config.keys(): rescale_factor = config["rescale_factor"] # read workspace bounds workspace = None if "workspace" in config.keys(): workspace = Box( np.array(config["workspace"]["min_pt"]), np.array(config["workspace"]["max_pt"]), frame="world", ) # init ros node rospy.init_node( "capture_test_images" ) # NOTE: this is required by the camera sensor classes Logger.reconfigure_root() for sensor_name, sensor_config in config["sensors"].iteritems(): logger.info("Capturing images from sensor %s" % (sensor_name)) save_dir = os.path.join(output_dir, sensor_name) if not os.path.exists(save_dir): os.mkdir(save_dir)
scale=0.001, color=(1, 0, 0), subsample=10) vis3d.points(point_cloud_filtered, scale=0.001, color=(0, 0, 1), subsample=10) vis3d.show() pcl_start = time.time() # subsample point cloud #rate = int(1.0 / rescale_factor)**2 #point_cloud_filtered = point_cloud_filtered.subsample(rate, random=False) box = Box(np.array([0.2, -0.24, min_height]), np.array([0.56, 0.21, max_height]), frame='world') point_cloud_masked, valid_indices = point_cloud_filtered.box_mask(box) invalid_indices = np.setdiff1d(np.arange(point_cloud_filtered.num_points), valid_indices) # apply PCL filters pcl_cloud = pcl.PointCloud(point_cloud_masked.data.T.astype(np.float32)) tree = pcl_cloud.make_kdtree() ec = pcl_cloud.make_EuclideanClusterExtraction() ec.set_ClusterTolerance(0.005) #ec.set_MinClusterSize(1) #ec.set_MaxClusterSize(250) ec.set_MinClusterSize(250) ec.set_MaxClusterSize(1000000) ec.set_SearchMethod(tree)
# Create ICP objects feature_matcher = PointToPlaneFeatureMatcher() solver = PointToPlaneICPSolver() # Start physical depth and color cameras logging.info('Creating Phoxi sensor') sensor_config = {'frame': 'phoxi', 'device_name': 1703005, 'size': 'small'} phoxi_sensor = RgbdSensorFactory.sensor('phoxi', sensor_config) logging.info('Starting Phoxi sensor') phoxi_sensor.start() logging.info('Phoxi sensor initialized') # Create box for filtering point cloud min_pt = np.array([0.25, -0.12, 0]) max_pt = np.array([0.5, 0.12, 0.25]) mask_box = Box(min_pt, max_pt, 'world') env = GraspingEnv(config, config['vis']) dataset = get_dataset(config, args) datapoint = dataset.datapoint_template obj_id, obj_id_to_key = 0, {} while not rospy.is_shutdown(): try: env.reset() except NoRemainingSamplesException: break obj_id_to_key[obj_id] = env.state.obj.key print '\n\n\n\n\n\n\n\n\n\nSampled object', env.state.obj.key env.state.obj.T_obj_world.translation[0] += .4 # put object in bin, not outside it
from sensor_msgs.msg import PointCloud2 from sensor_msgs import point_cloud2 from std_msgs.msg import Header, Int64 import rospy from autolab_core import Box, PointCloud, RigidTransform from perception import CameraIntrinsics from visualization import Visualizer3D as vis3d CAMERA_INTR_FILENAME = '/nfs/diskstation/calib/primesense_overhead/primesense_overhead.intr' CAMERA_POSE_FILENAME = '/nfs/diskstation/calib/primesense_overhead/primesense_overhead_to_world.tf' camera_intr = CameraIntrinsics.load(CAMERA_INTR_FILENAME) T_camera_world = RigidTransform.load(CAMERA_POSE_FILENAME).as_frames( 'camera', 'world') workspace = Box(min_pt=np.array([0.22, -0.22, 0.005]), max_pt=np.array([0.56, 0.22, 0.2]), frame='world') pub = None def cloudCallback(msg): global T_camera_world # read the cloud cloud_array = [] for p in point_cloud2.read_points(msg): cloud_array.append([p[0], p[1], p[2]]) cloud_array = np.array(cloud_array).T # form a point cloud point_cloud_camera = PointCloud(cloud_array, frame='camera')