def __init__(self, path_to_images, frame=None): """Create a new virtualized Primesense sensor. This requires a directory containing a specific set of files. First, the directory must contain a set of images, where each image has three files: - color_{#}.png - depth_{#}.npy - ir_{#}.npy In these, the {#} is replaced with the integer index for the image. These indices should start at zero and increase consecutively. Second, the directory must contain CameraIntrisnics files for the color and ir cameras: - {frame}_color.intr - {frame}_ir.intr In these, the {frame} is replaced with the reference frame name that is passed as a parameter to this function. Parameters ---------- path_to_images : :obj:`str` The path to a directory containing images that the virtualized sensor will return. frame : :obj:`str` The name of the frame of reference in which the sensor resides. If None, this will be discovered from the files in the directory. """ self._running = False self._path_to_images = path_to_images self._im_index = 0 self._num_images = 0 self._frame = frame filenames = os.listdir(self._path_to_images) # get number of images for filename in filenames: if filename.find('depth') != -1 and filename.endswith('.npy'): self._num_images += 1 # set the frame dynamically if self._frame is None: for filename in filenames: file_root, file_ext = os.path.splitext(filename) color_ind = file_root.rfind('color') if file_ext == INTR_EXTENSION and color_ind != -1: self._frame = file_root[:color_ind-1] self._color_frame = file_root self._ir_frame = file_root break # load color intrinsics color_intr_filename = os.path.join(self._path_to_images, '%s_color.intr' %(self._frame)) self._color_intr = CameraIntrinsics.load(color_intr_filename) ir_intr_filename = os.path.join(self._path_to_images, '%s_ir.intr' %(self._frame)) self._ir_intr = CameraIntrinsics.load(ir_intr_filename)
def load(save_dir): if not os.path.exists(save_dir): raise ValueError('Directory %s does not exist!' % (save_dir)) color_image_filename = os.path.join(save_dir, 'color.png') depth_image_filename = os.path.join(save_dir, 'depth.npy') camera_intr_filename = os.path.join(save_dir, 'camera.intr') segmask_filename = os.path.join(save_dir, 'segmask.npy') obj_segmask_filename = os.path.join(save_dir, 'obj_segmask.npy') state_filename = os.path.join(save_dir, 'state.pkl') camera_intr = CameraIntrinsics.load(camera_intr_filename) color = ColorImage.open(color_image_filename, frame=camera_intr.frame) depth = DepthImage.open(depth_image_filename, frame=camera_intr.frame) segmask = None if os.path.exists(segmask_filename): segmask = BinaryImage.open(segmask_filename, frame=camera_intr.frame) obj_segmask = None if os.path.exists(obj_segmask_filename): obj_segmask = SegmentationImage.open(obj_segmask_filename, frame=camera_intr.frame) fully_observed = None if os.path.exists(state_filename): fully_observed = pkl.load(open(state_filename, 'rb')) return RgbdImageState(RgbdImage.from_color_and_depth(color, depth), camera_intr, segmask=segmask, obj_segmask=obj_segmask, fully_observed=fully_observed)
def plan_grasp(self, depth, rgb, resetting=False, camera_intr=None, segmask=None): """ Computes possible grasps. Parameters ---------- depth: type `numpy` depth image rgb: type `numpy` rgb image camera_intr: type `perception.CameraIntrinsics` Intrinsic camera object. segmask: type `perception.BinaryImage` Binary segmask of detected object Returns ------- type `GQCNNGrasp` Computed optimal grasp. """ if "FC" in self.model: assert not (segmask is None), "Fully-Convolutional policy expects a segmask." if camera_intr is None: camera_intr_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "gqcnn/data/calib/primesense/primesense.intr") camera_intr = CameraIntrinsics.load(camera_intr_filename) depth_im = DepthImage(depth, frame=camera_intr.frame) color_im = ColorImage(rgb, frame=camera_intr.frame) valid_px_mask = depth_im.invalid_pixel_mask().inverse() if segmask is None: segmask = valid_px_mask else: segmask = segmask.mask_binary(valid_px_mask) # Inpaint. depth_im = depth_im.inpaint( rescale_factor=self.config["inpaint_rescale_factor"]) # Aggregate color and depth images into a single # BerkeleyAutomation/perception `RgbdImage`. self.rgbd_im = RgbdImage.from_color_and_depth(color_im, depth_im) # Create an `RgbdImageState` with the `RgbdImage` and `CameraIntrinsics`. state = RgbdImageState(self.rgbd_im, camera_intr, segmask=segmask) # Set input sizes for fully-convolutional policy. if "FC" in self.model: self.policy_config["metric"]["fully_conv_gqcnn_config"][ "im_height"] = depth_im.shape[0] self.policy_config["metric"]["fully_conv_gqcnn_config"][ "im_width"] = depth_im.shape[1] return self.execute_policy(state, resetting)
def largest_planar_surface(filename, cam_int_file): # Load the image as a numpy array and the camera intrinsics image = np.load(filename) ci = CameraIntrinsics.load(cam_int_file) # Create and deproject a depth image of the data using the camera intrinsics di = DepthImage(image, frame=ci.frame) di = di.inpaint() pc = ci.deproject(di) # Make a PCL type point cloud from the image p = pcl.PointCloud(pc.data.T.astype(np.float32)) # Make a segmenter and segment the point cloud. seg = p.make_segmenter() seg.set_model_type(pcl.SACMODEL_PARALLEL_PLANE) seg.set_method_type(pcl.SAC_RANSAC) seg.set_distance_threshold(0.005) indices, model = seg.segment() return indices, model, image, pc
def find_clutter(pc, indices, cam_int_file): """plane_pc = pc.data.T[indices] using pc from largest_planar_surface""" plane_pc_data = pc.data.T[indices] ci = CameraIntrinsics.load(cam_int_file) plane_pc = PointCloud(plane_pc_data.T, pc.frame) di = ci.project_to_image(plane_pc) vis2d.figure() vis2d.imshow(di) vis2d.show() inv_pixel_mask = di.invalid_pixel_mask() vis2d.figure() vis2d.imshow(inv_pixel_mask) vis2d.show() #print(inv_pixel_mask.data[inv_pixel_mask.data.shape[0]//2][inv_pixel_mask.data.shape[1]//2]) clutter = [] for r in range(len(inv_pixel_mask.data)): for c in range(len(inv_pixel_mask.data[r])): if inv_pixel_mask.data[r][c] > 0: i = r * len(inv_pixel_mask.data[r]) + c clutter.append(i) return pc.data.T[clutter]
# Read config. config = YamlConfig(config_filename) inpaint_rescale_factor = config["inpaint_rescale_factor"] policy_config = config["policy"] # Make relative paths absolute. if "gqcnn_model" in policy_config["metric"]: policy_config["metric"]["gqcnn_model"] = model_path if not os.path.isabs(policy_config["metric"]["gqcnn_model"]): policy_config["metric"]["gqcnn_model"] = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", policy_config["metric"]["gqcnn_model"]) # Setup sensor. camera_intr = CameraIntrinsics.load(camera_intr_filename) # Read images. depth_data = np.load(depth_im_filename) depth_im = DepthImage(depth_data, frame=camera_intr.frame) color_im = ColorImage(np.zeros([depth_im.height, depth_im.width, 3]).astype(np.uint8), frame=camera_intr.frame) # Optionally read a segmask. segmask = None if segmask_filename is not None: segmask = BinaryImage.open(segmask_filename) valid_px_mask = depth_im.invalid_pixel_mask().inverse() if segmask is None: segmask = valid_px_mask
def detect(detector_type, config, run_dir, test_config): """Run PCL-based detection on a depth-image-based dataset. Parameters ---------- config : dict config for a PCL detector run_dir : str Directory to save outputs in. Output will be saved in pred_masks, pred_info, and modal_segmasks_processed subdirectories. test_config : dict config containing dataset information """ ################################################################## # Set up output directories ################################################################## # Create subdirectory for prediction masks pred_dir = os.path.join(run_dir, 'pred_masks') mkdir_if_missing(pred_dir) # Create subdirectory for prediction scores & bboxes pred_info_dir = os.path.join(run_dir, 'pred_info') mkdir_if_missing(pred_info_dir) # Create subdirectory for transformed GT segmasks resized_segmask_dir = os.path.join(run_dir, 'modal_segmasks_processed') mkdir_if_missing(resized_segmask_dir) ################################################################## # Set up input directories ################################################################## dataset_dir = test_config['path'] indices_arr = np.load(os.path.join(dataset_dir, test_config['indices'])) # Input depth image data (numpy files, not .pngs) depth_dir = os.path.join(dataset_dir, test_config['images']) # Input GT binary masks dir gt_mask_dir = os.path.join(dataset_dir, test_config['masks']) # Input binary mask data if 'bin_masks' in test_config.keys(): bin_mask_dir = os.path.join(dataset_dir, test_config['bin_masks']) # Input camera intrinsics camera_intrinsics_fn = os.path.join(dataset_dir, 'camera_intrinsics.intr') camera_intrs = CameraIntrinsics.load(camera_intrinsics_fn) image_ids = np.arange(indices_arr.size) ################################################################## # Process each image ################################################################## for image_id in tqdm(image_ids): base_name = 'image_{:06d}'.format(indices_arr[image_id]) output_name = 'image_{:06d}'.format(image_id) depth_image_fn = base_name + '.npy' # Extract depth image depth_data = np.load(os.path.join(depth_dir, depth_image_fn)) depth_im = DepthImage(depth_data, camera_intrs.frame) depth_im = depth_im.inpaint(0.25) # Mask out bin pixels if appropriate/necessary if bin_mask_dir: mask_im = BinaryImage.open( os.path.join(bin_mask_dir, base_name + '.png'), camera_intrs.frame) mask_im = mask_im.resize(depth_im.shape[:2]) depth_im = depth_im.mask_binary(mask_im) point_cloud = camera_intrs.deproject(depth_im) point_cloud.remove_zero_points() pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32)) tree = pcl_cloud.make_kdtree() if detector_type == 'euclidean': segmentor = pcl_cloud.make_EuclideanClusterExtraction() segmentor.set_ClusterTolerance(config['tolerance']) elif detector_type == 'region_growing': segmentor = pcl_cloud.make_RegionGrowing(ksearch=50) segmentor.set_NumberOfNeighbours(config['n_neighbors']) segmentor.set_CurvatureThreshold(config['curvature']) segmentor.set_SmoothnessThreshold(config['smoothness']) else: print('PCL detector type not supported') exit() segmentor.set_MinClusterSize(config['min_cluster_size']) segmentor.set_MaxClusterSize(config['max_cluster_size']) segmentor.set_SearchMethod(tree) cluster_indices = segmentor.Extract() # Set up predicted masks and metadata indiv_pred_masks = [] r_info = { 'rois': [], 'scores': [], 'class_ids': [], } for i, cluster in enumerate(cluster_indices): points = pcl_cloud.to_array()[cluster] indiv_pred_mask = camera_intrs.project_to_image( PointCloud(points.T, frame=camera_intrs.frame)).to_binary() indiv_pred_mask.data[indiv_pred_mask.data > 0] = 1 indiv_pred_masks.append(indiv_pred_mask.data) # Compute bounding box, score, class_id nonzero_pix = np.nonzero(indiv_pred_mask.data) min_x, max_x = np.min(nonzero_pix[1]), np.max(nonzero_pix[1]) min_y, max_y = np.min(nonzero_pix[0]), np.max(nonzero_pix[0]) r_info['rois'].append([min_y, min_x, max_y, max_x]) r_info['scores'].append(1.0) r_info['class_ids'].append(1) r_info['rois'] = np.array(r_info['rois']) r_info['scores'] = np.array(r_info['scores']) r_info['class_ids'] = np.array(r_info['class_ids']) # Write the predicted masks and metadata if indiv_pred_masks: pred_mask_output = np.stack(indiv_pred_masks).astype(np.uint8) else: pred_mask_output = np.array(indiv_pred_masks).astype(np.uint8) # Save out ground-truth mask as array of shape (n, h, w) indiv_gt_masks = [] gt_mask = cv2.imread(os.path.join(gt_mask_dir, base_name + '.png')) gt_mask = cv2.resize(gt_mask, (depth_im.shape[1], depth_im.shape[0])).astype( np.uint8)[:, :, 0] num_gt_masks = np.max(gt_mask) for i in range(1, num_gt_masks + 1): indiv_gt_mask = (gt_mask == i) if np.any(indiv_gt_mask): indiv_gt_masks.append(gt_mask == i) gt_mask_output = np.stack(indiv_gt_masks) np.save(os.path.join(resized_segmask_dir, output_name + '.npy'), gt_mask_output) np.save(os.path.join(pred_dir, output_name + '.npy'), pred_mask_output) np.save(os.path.join(pred_info_dir, output_name + '.npy'), r_info) print('Saved prediction masks to:\t {}'.format(pred_dir)) print('Saved prediction info (bboxes, scores, classes) to:\t {}'.format( pred_info_dir)) print('Saved transformed GT segmasks to:\t {}'.format(resized_segmask_dir)) return pred_dir, pred_info_dir, resized_segmask_dir
from meshpy import VirtualCamera, ObjFile, SceneObject from perception import CameraIntrinsics from visualization import Visualizer2D from core import RigidTransform import numpy as np from matplotlib import pyplot as plt from perception.object_render import RenderMode dexnet_path = "/home/chris/optimal_manipulation_simulator/dex-net-new-api" if __name__ == "__main__": print "\n\n\n\n" camera_intr = CameraIntrinsics.load("../config/primesense_overhead.intr") camera = VirtualCamera(camera_intr) of = ObjFile(dexnet_path + "/data/meshes/chess_pieces/WizRook.obj") rook1 = of.read() T_world_camera = RigidTransform(rotation=np.array([[-1, 0, 0], [0, 1, 0], [0, 0, -1]]), translation=np.array([0, 0, .2]).T, from_frame="world", to_frame="primesense_overhead") # T_world_camera = RigidTransform(rotation = np.array([[1,0,0],[0,1,0],[0,0,1]]), # translation=np.array([-.2,0,0]).T, # from_frame="world", # to_frame="primesense_overhead") # squares = [(-1,-1), (-1,0), (-1,1), (0,-1), (0,1), (1,-1), (1,0), (1,1)] # for i, square in enumerate(squares):
def prepare_dexnet(self): # Get configs. model_config = json.load( open(os.path.join(self.model_path, "config.json"), "r")) # self.model_config = model_config # try: # gqcnn_config = model_config["gqcnn"] # gripper_mode = gqcnn_config["gripper_mode"] # except KeyError: # gqcnn_config = model_config["gqcnn_config"] # input_data_mode = gqcnn_config["input_data_mode"] # if input_data_mode == "tf_image": # gripper_mode = GripperMode.LEGACY_PARALLEL_JAW # elif input_data_mode == "tf_image_suction": # gripper_mode = GripperMode.LEGACY_SUCTION # elif input_data_mode == "suction": # gripper_mode = GripperMode.SUCTION # elif input_data_mode == "multi_suction": # gripper_mode = GripperMode.MULTI_SUCTION # elif input_data_mode == "parallel_jaw": # gripper_mode = GripperMode.PARALLEL_JAW # else: # raise ValueError( # "Input data mode {} not supported!".format(input_data_mode)) # Read config. config = YamlConfig(self.config_filename) self.inpaint_rescale_factor = config["inpaint_rescale_factor"] policy_config = config["policy"] self.policy_config = policy_config # Make relative paths absolute. if "gqcnn_model" in policy_config["metric"]: policy_config["metric"]["gqcnn_model"] = self.model_path if not os.path.isabs(policy_config["metric"]["gqcnn_model"]): policy_config["metric"]["gqcnn_model"] = os.path.join( os.path.dirname(os.path.realpath(__file__)), policy_config["metric"]["gqcnn_model"]) # Setup sensor. self.camera_intr = CameraIntrinsics.load(self.camera_intr_filename) # Init policy. # Set input sizes for fully-convolutional policy. if self.fully_conv: policy_config["metric"]["fully_conv_gqcnn_config"][ "im_height"] = self.height policy_config["metric"]["fully_conv_gqcnn_config"][ "im_width"] = self.width if self.fully_conv: # TODO(vsatish): We should really be doing this in some factory policy. if policy_config["type"] == "fully_conv_suction": self.policy = FullyConvolutionalGraspingPolicySuction( policy_config) elif policy_config["type"] == "fully_conv_pj": self.policy = FullyConvolutionalGraspingPolicyParallelJaw( policy_config) else: raise ValueError( "Invalid fully-convolutional policy type: {}".format( policy_config["type"])) else: policy_type = "cem" if "type" in policy_config: policy_type = policy_config["type"] if policy_type == "ranking": self.policy = RobustGraspingPolicy(policy_config) elif policy_type == "cem": self.policy = CrossEntropyRobustGraspingPolicy(policy_config) else: raise ValueError("Invalid policy type: {}".format(policy_type))
parser = argparse.ArgumentParser() parser.add_argument('--intrinsics_file_path', type=str, default=AZURE_KINECT_CALIB_DATA) parser.add_argument('--transform_file_path', type=str, default=AZURE_KINECT_EXTRINSICS) parser.add_argument('--filename', type=str, default=None) args = parser.parse_args() # rospy.init_node("Test azure kinect calibration") print('Starting robot') fa = FrankaArm() cv_bridge = CvBridge() ir_intrinsics = CameraIntrinsics.load(args.intrinsics_file_path) kinect2_left_to_world_transform = RigidTransform.load( args.transform_file_path) # print('Opening Grippers') # fa.open_gripper() print('Reset with pose') fa.reset_pose() print('Reset with joints') fa.reset_joints() fa.close_gripper()
import pcl.pcl_visualization # Autolab imports from autolab_core import PointCloud from perception import DepthImage, CameraIntrinsics from visualization import Visualizer3D as vis3d from visualization import Visualizer2D as vis2d from clustering import * # Load the image into a numpy array image = np.load( '/nfs/diskstation/projects/dex-net/placing/datasets/real/sample_ims_05_22/depth_ims_numpy/image_000001.npy' ) # Create a DepthImage using the array ci = CameraIntrinsics.load( '/nfs/diskstation/projects/dex-net/placing/datasets/real/sample_ims_05_22/camera_intrinsics.intr' ) di = DepthImage(image, frame=ci.frame) pc = ci.deproject(di) ## Visualize the depth image #vis2d.figure() #vis2d.imshow(di) #vis2d.show() # Make and display a PCL type point cloud from the image p = pcl.PointCloud(pc.data.T.astype(np.float32)) # Make a segmenter and segment the point cloud. seg = p.make_segmenter() seg.set_model_type(pcl.SACMODEL_PARALLEL_PLANE)
# Autolab imports from autolab_core import PointCloud, RigidTransform from perception import DepthImage, CameraIntrinsics, RenderMode from visualization import Visualizer2D as vis2d, Visualizer3D as vis3d from meshrender import Scene, MaterialProperties, AmbientLight, PointLight, SceneObject, VirtualCamera """ This library uses an object mesh file and a depth image of a bin for object placement to infer the best placement location for the given object in the bin from the depth image. @author: Ruta Joshi """ # Globals #ci_file = '/nfs/diskstation/projects/dex-net/placing/datasets/real/sample_ims_05_22/camera_intrinsics.intr' ci_file = '/nfs/diskstation/projects/dex-net/placing/datasets/sim/sim_06_22/image_dataset/depth_ims/intrinsics_000002.intr' ci = CameraIntrinsics.load(ci_file) cp_file = '/nfs/diskstation/projects/dex-net/placing/datasets/sim/sim_06_22/image_dataset/depth_ims/pose_000002.tf' cp = RigidTransform.load(cp_file) """ 1. Given depth image of bin, retrieve largest planar surface """ @profile def largest_planar_surface(filename): # Load the image as a numpy array and the camera intrinsics image = np.load(filename) # Create and deproject a depth image of the data using the camera intrinsics di = DepthImage(image, frame=ci.frame) di = di.inpaint() pc = ci.deproject(di) # Make a PCL type point cloud from the image
#from lib.networks import make_network #from lib.datasets import make_data_loader #from lib.utils.net_utils import load_network from numpy.ctypeslib import ndpointer best_center = [0, 0] center = [0, 0] prev_q_value = 0 q_value = 0 prev_depth = 0 #-------------------------------------gqcnn----------------------------------- config_filename = os.path.join("gqcnn_pj_kinect.yaml") config = YamlConfig(config_filename) policy_config = config["policy"] policy_type = "cem" policy = CrossEntropyRobustGraspingPolicy(policy_config) camera_intr = CameraIntrinsics.load("kinect.intr") WIDTH = 512 HEIGHT = 512 def run_gqcnn(): global depth_im global color_im global segmask global policy global prev_q_value global prev_depth global toggle global t global x global y
table_mesh.compute_vertex_normals() table_mat_props = MaterialProperties(color=(0, 255, 0), ambient=0.5, diffuse=1.0, specular=1, shininess=0) for k, stable_pose in enumerate(stable_poses): logging.info('Rendering stable pose %d' % (k)) # set resting pose T_obj_world = mesh.get_T_surface_obj( stable_pose.T_obj_table).as_frames('obj', 'world') # load camera intrinsics camera_intr = CameraIntrinsics.load( 'data/camera_intr/primesense_carmine_108.intr') #camera_intr = camera_intr.resize(4) # create virtual camera virtual_camera = VirtualCamera(camera_intr) # create lighting props T_light_camera = RigidTransform(translation=[0, 0, 0], from_frame='light', to_frame=camera_intr.frame) light_props = LightingProperties(ambient=-0.25, diffuse=1, specular=0.25, T_light_camera=T_light_camera, cutoff=180)
Cornell = True if Cornell: array = 72 dset = 'Cornell' tensor = '00000' else: array = 650 dset = 'dexnet_2_tensor' tensor = '02536' filename = "/media/psf/Home/Documents/Grasping_Research/docker/pcb_" + dset + "_" + tensor + "_" + str( array) + ".pcd" depth_data = np.load("./data/training/" + dset + "/tensors/depth_ims_tf_table_" + tensor + ".npz")['arr_0'][array] camera_intr = CameraIntrinsics.load("./data/calib/primesense/primesense.intr") #camera_intr = CameraIntrinsics.load("./data/calib/phoxi/phoxi.intr") depth_im = DepthImage(depth_data, frame=camera_intr.frame) # Crop and resize camera intrinsics as in DexNet dataset generation ln 439 in tool/generate_gqcnn_dataset.py camera_intr_scale = 32.0 / 96.0 cropped_camera_intr = camera_intr.crop(32, 32, 16, 16) final_camera_intr = cropped_camera_intr.resize(camera_intr_scale) final_camera_intr.cx = 16 final_camera_intr.cy = 16 print(final_camera_intr.cx) PC = final_camera_intr.deproject(depth_im) print(min(PC.data[2]))
from gpd.msg import CloudIndexed import IPython import numpy as np from scipy.linalg import lstsq from sensor_msgs.msg import PointCloud2 from sensor_msgs import point_cloud2 from std_msgs.msg import Header, Int64 import rospy from autolab_core import Box, PointCloud, RigidTransform from perception import CameraIntrinsics from visualization import Visualizer3D as vis3d CAMERA_INTR_FILENAME = '/nfs/diskstation/calib/primesense_overhead/primesense_overhead.intr' CAMERA_POSE_FILENAME = '/nfs/diskstation/calib/primesense_overhead/primesense_overhead_to_world.tf' camera_intr = CameraIntrinsics.load(CAMERA_INTR_FILENAME) T_camera_world = RigidTransform.load(CAMERA_POSE_FILENAME).as_frames( 'camera', 'world') workspace = Box(min_pt=np.array([0.22, -0.22, 0.005]), max_pt=np.array([0.56, 0.22, 0.2]), frame='world') pub = None def cloudCallback(msg): global T_camera_world # read the cloud cloud_array = [] for p in point_cloud2.read_points(msg): cloud_array.append([p[0], p[1], p[2]])