def predict(self): rot, trans = RigidTransform.rotation_and_translation_from_matrix( self.datapoint['obj_pose']) dataset_pose = RigidTransform(rot, trans, 'obj', 'world') dataset_final_poses = self.datapoint['final_poses'] num_vertices = self.datapoint['num_vertices'] final_poses = [] for i in range(20): final_pose_mat = dataset_final_poses[i * 4:(i + 1) * 4] if np.array_equal(final_pose_mat, np.zeros((4, 4))): break rot, trans = RigidTransform.rotation_and_translation_from_matrix( final_pose_mat) final_poses.append(RigidTransform(rot, trans, 'obj', 'world')) vertices = self.datapoint['vertices'][:num_vertices] normals = self.datapoint['normals'][:num_vertices] vertices = (self.obj.T_obj_world * dataset_pose.inverse() * PointCloud(vertices.T, 'world')).data.T normals = (self.obj.T_obj_world * dataset_pose.inverse() * PointCloud(normals.T, 'world')).data.T vertex_probs = self.datapoint[ 'vertex_probs'][:num_vertices, :len(final_poses)] required_forces = self.datapoint['min_required_forces'][:num_vertices] return vertices, normals, final_poses, vertex_probs, required_forces
def score_plane_intersection(pc, cell_pc, cell_centroid, shadow, clutter): """ The score of this cell is the number of points that intersect with the plane. Assumes that the cell_pc is the set of all points in both the plane and the cell. The score is higher if more points intersect (since clutter and box edges are not in the flat plane, they don't intersect). """ length, width, height = shadow.extents minx, maxx = cell_centroid[0] - (length / 2), cell_centroid[0] + (length / 2) miny, maxy = cell_centroid[1] - (width / 2), cell_centroid[1] + (width / 2) intersecting_pts = cell_pc[minx < cell_pc[:, 0]] intersecting_pts = intersecting_pts[intersecting_pts[:, 0] < maxx] intersecting_pts = intersecting_pts[miny < intersecting_pts[:, 1]] intersecting_pts = intersecting_pts[intersecting_pts[:, 1] < maxy] #clutter = clutter.data.T clutter = clutter[minx < clutter[:, 0]] clutter = clutter[clutter[:, 0] < maxx] clutter = clutter[miny < clutter[:, 1]] clutter = clutter[clutter[:, 1] < maxy] if len(clutter) == 0: return intersecting_pts, len(intersecting_pts) #print("Int pts shape = " + str(intersecting_pts.shape)) #print("clutter shape = " + str(clutter.shape)) #print("Clutter: " + str(clutter)) clutter_pts = np.in1d(intersecting_pts, clutter) if (len(clutter_pts) % 3 != 0): print("Int pts shape = " + str(intersecting_pts.shape)) print("clutter shape = " + str(clutter.shape)) print("Clutter pts shape = " + str(clutter_pts.shape)) clutter_pts.shape = (len(clutter_pts) // 3, 3) if len(clutter_pts) == 0: return intersecting_pts, len(intersecting_pts) clutter_pts = np.apply_along_axis(np.all, 1, clutter_pts) int_pts_pc = PointCloud(intersecting_pts.T, pc.frame) clutter_pc = PointCloud(clutter.T, pc.frame) di = ci.project_to_image(int_pts_pc) clutter_di = ci.project_to_image(clutter_pc) bi = di.invalid_pixel_mask() diff = bi.diff_with_target(clutter_di) vis2d.figure() vis2d.imshow(clutter) vis2d.imshow(diff) vis2d.show() #print("Length of clutter pts = " + str(len(clutter_pts))) return intersecting_pts, -1 * len(clutter_pts) """
def alignment_error(self, T_obj_camera_est, n_samples=10000): mesh = self.salient_edge_set.mesh sampled_points = mesh.sample(n_samples) true_points = self.T_obj_camera.apply( PointCloud(sampled_points.T, frame='obj')).data.T est_points = T_obj_camera_est.apply( PointCloud(sampled_points.T, frame='obj')).data.T lookup_tree = KDTree(true_points) _, indices = lookup_tree.query(est_points) squared_err = np.linalg.norm(est_points - true_points[indices], axis=1)**2 err = np.sum(squared_err) return err
def get_cuboid(object_pose, cuboid_dimensions, camera_intrinsics): # cuboid centroid cuboid_centroid = object_pose.translation result_json = {'cuboid_centroid': cuboid_centroid.tolist()} # cuboid x = cuboid_dimensions[0] / 2 y = cuboid_dimensions[1] / 2 z = cuboid_dimensions[2] / 2 # colors in nvdu_viz: b b m m g g y y (b)lue, (m)agenta, (g)reen, (y)ellow cuboid_corners = np.array([[x, -x, -x, x, x, -x, -x, x], [-y, -y, y, y, -y, -y, y, y], [z, z, z, z, -z, -z, -z, -z]]) cuboid_points_model = PointCloud(cuboid_corners, 'target_model') cuboid_points_camera = object_pose.apply(cuboid_points_model) result_json['cuboid'] = np.transpose(cuboid_points_camera.data).tolist() # projected_cuboid_centroid cuboid_centroid_image_coords = project_subpixel( camera_intrinsics, Point(cuboid_centroid, 'camera')) result_json[ 'projected_cuboid_centroid'] = cuboid_centroid_image_coords.tolist() # projected_cuboid cuboid_image_coords = project_subpixel(camera_intrinsics, cuboid_points_camera) result_json['projected_cuboid'] = np.transpose( cuboid_image_coords).tolist() return result_json
def plot_grasp(grasp, obj, mag, transform=None, color=(1, 0, 0)): _, c = grasp.close_fingers(obj) c1, c2 = c c1_start = c1.point c2_start = c2.point d1 = (c1_start - grasp.center) d1 = mag * d1 / np.linalg.norm(d1) d2 = (c2_start - grasp.center) d2 = mag * d2 / np.linalg.norm(d2) c1_end = c1_start + d1 c2_end = c2_start + d2 if transform is not None: points = PointCloud(np.asarray( [c1_start, c1_end, c2_start, c2_end]).T, frame=transform.from_frame) points = transform.apply(points).data.T c1_start = points[0] c1_end = points[1] c2_start = points[2] c2_end = points[3] x = np.linspace(c1_start[0], c1_end[0], num=10) y = np.linspace(c1_start[1], c1_end[1], num=10) z = np.linspace(c1_start[2], c1_end[2], num=10) l = mlab.plot3d(x, y, z, color=color, tube_radius=mag / 40) x = np.linspace(c2_start[0], c2_end[0], num=10) y = np.linspace(c2_start[1], c2_end[1], num=10) z = np.linspace(c2_start[2], c2_end[2], num=10) l = mlab.plot3d(x, y, z, color=color, tube_radius=mag / 40)
def do_stuff(pc, indices, model, rotated_shadow, img_file): scene = Scene() camera = VirtualCamera(ci, cp) scene.camera = camera # Works shadow_obj = SceneObject(rotated_shadow) scene.add_object('shadow', shadow_obj) wd = scene.wrapped_render([RenderMode.DEPTH])[0] wd_bi = wd.to_binary() vis2d.figure() vis2d.imshow(wd_bi) vis2d.show() # Doesn't work yet plane = pc.data.T[indices] plane_pc = PointCloud(plane.T, pc.frame) di = ci.project_to_image(plane_pc) bi = di.to_binary() vis2d.figure() vis2d.imshow(bi) vis2d.show() # Works both = bi.mask_binary(wd_bi) vis2d.figure() vis2d.imshow(both) vis2d.show()
def register_example(reg_example, aligner, model): ses = reg_example.salient_edge_set # Pre-process mesh mesh = ses.mesh edge_mask = ses.edge_mask edge_pc_obj = PointCloud(generate_canonical_pc(mesh, edge_mask).T, frame='obj') # Process Depth Image ci = reg_example.camera_intrs depth_im = reg_example.depth_im point_cloud_im = ci.deproject_to_image(depth_im) normal_cloud_im = point_cloud_im.normal_cloud_im() joined = np.dstack((depth_im.data, normal_cloud_im.data)) mask = model.predict(joined[np.newaxis, :, :, :])[0] mask *= 255.0 mask = BinaryImage(mask.astype(np.uint8)) depth_im_edges = depth_im.mask_binary(mask) edge_pc_cam = ci.deproject(depth_im_edges) edge_pc_cam.remove_zero_points() # Align the two point sets with Super4PCS T_obj_camera_est = aligner.align(edge_pc_cam, edge_pc_obj) return T_obj_camera_est
def fine_grid_search(pc, indices, model, shadow, splits): length, width, height = shadow.extents split_size = max(length, width) pc_data, ind = get_pc_data(pc, indices) maxes = np.max(pc_data, axis=0) mins = np.min(pc_data, axis=0) bin_base = mins[2] plane_normal = model[0:3] #splits = 3 step_size = split_size / splits plane_data = get_plane_data(pc, indices) plane_pc = PointCloud(plane_data.T, pc.frame) plane_pc = cp.inverse().apply(plane_pc) di = ci.project_to_image(plane_pc) bi = di.to_binary() bi = bi.inverse() scene = Scene() camera = VirtualCamera(ci, cp) scene.camera = camera shadow_obj = SceneObject(shadow) scene.add_object('shadow', shadow_obj) orig_tow = shadow_obj.T_obj_world numx = (int(np.round((maxes[0]-mins[0])/split_size)) - 1) * splits + 1 numy = (int(np.round((maxes[1]-mins[1])/split_size)) - 1) * splits + 1 scores = np.zeros((numx, numy)) for i in range(numx): x = mins[0] + i*step_size for j in range(numy): y = mins[1] + j*step_size for tow in transforms(pc, pc_data, shadow, x, y, x+split_size, y+split_size, 8, orig_tow): shadow_obj.T_obj_world = tow scores[i][j] = under_shadow(scene, bi) shadow_obj.T_obj_world = orig_tow print("\nScores: \n" + str(scores)) best = best_cell(scores) print("\nBest Cell: " + str(best) + ", with score = " + str(scores[best[0]][best[1]])) #------- # Visualize best placement vis3d.figure() x = mins[0] + best[0]*step_size y = mins[1] + best[1]*step_size cell_indices = np.where((x < pc_data[:,0]) & (pc_data[:,0] < x+split_size) & (y < pc_data[:,1]) & (pc_data[:,1] < y+split_size))[0] points = pc_data[cell_indices] rest = pc_data[np.setdiff1d(np.arange(len(pc_data)), cell_indices)] vis3d.points(points, color=(0,1,1)) vis3d.points(rest, color=(1,0,1)) vis3d.show() #-------- return best, scene
def test_registration(self): np.random.seed(101) source_points = np.random.rand(3, NUM_POINTS).astype(np.float32) source_normals = np.random.rand(3, NUM_POINTS).astype(np.float32) source_normals = source_normals / np.tile( np.linalg.norm(source_normals, axis=0)[np.newaxis, :], [3, 1]) source_point_cloud = PointCloud(source_points, frame='world') source_normal_cloud = NormalCloud(source_normals, frame='world') matcher = PointToPlaneFeatureMatcher() solver = PointToPlaneICPSolver(sample_size=NUM_POINTS) # 3d registration tf = RigidTransform(rotation=RigidTransform.random_rotation(), translation=RigidTransform.random_translation(), from_frame='world', to_frame='world') tf = RigidTransform(from_frame='world', to_frame='world').interpolate_with(tf, 0.01) target_point_cloud = tf * source_point_cloud target_normal_cloud = tf * source_normal_cloud result = solver.register(source_point_cloud, target_point_cloud, source_normal_cloud, target_normal_cloud, matcher, num_iterations=NUM_ITERS) self.assertTrue( np.allclose(tf.matrix, result.T_source_target.matrix, atol=1e-3)) # 2d registration theta = 0.1 * np.random.rand() t = 0.005 * np.random.rand(3, 1) t[2] = 0 R = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) tf = RigidTransform(R, t, from_frame='world', to_frame='world') target_point_cloud = tf * source_point_cloud target_normal_cloud = tf * source_normal_cloud result = solver.register_2d(source_point_cloud, target_point_cloud, source_normal_cloud, target_normal_cloud, matcher, num_iterations=NUM_ITERS) self.assertTrue( np.allclose(tf.matrix, result.T_source_target.matrix, atol=1e-3))
def _colorize(self, depth_im, color_im): """Colorize a depth image from the PhoXi using a color image from the webcam. Parameters ---------- depth_im : DepthImage The PhoXi depth image. color_im : ColorImage Corresponding color image. Returns ------- ColorImage A colorized image corresponding to the PhoXi depth image. """ # Project the point cloud into the webcam's frame target_shape = (depth_im.data.shape[0], depth_im.data.shape[1], 3) pc_depth = self._phoxi.ir_intrinsics.deproject(depth_im) pc_color = self._T_webcam_world.inverse().dot(self._T_phoxi_world).apply(pc_depth) # Sort the points by their distance from the webcam's apeture pc_data = pc_color.data.T dists = np.linalg.norm(pc_data, axis=1) order = np.argsort(dists) pc_data = pc_data[order] pc_color = PointCloud(pc_data.T, frame=self._webcam.color_intrinsics.frame) sorted_dists = dists[order] sorted_depths = depth_im.data.flatten()[order] # Generate image coordinates for each sorted point icds = self._webcam.color_intrinsics.project(pc_color).data.T # Create mask for points that are masked by others rounded_icds = np.array(icds / 3.0, dtype=np.uint32) unique_icds, unique_inds, unique_inv = np.unique(rounded_icds, axis=0, return_index=True, return_inverse=True) icd_depths = sorted_dists[unique_inds] min_depths_pp = icd_depths[unique_inv] depth_delta_mask = np.abs(min_depths_pp - sorted_dists) < 5e-3 # Create mask for points with missing depth or that lie outside the image valid_mask = np.logical_and(np.logical_and(icds[:,0] >= 0, icds[:,0] < self._webcam.color_intrinsics.width), np.logical_and(icds[:,1] >= 0, icds[:,1] < self._webcam.color_intrinsics.height)) valid_mask = np.logical_and(valid_mask, sorted_depths != 0.0) valid_mask = np.logical_and(valid_mask, depth_delta_mask) valid_icds = icds[valid_mask] colors = color_im.data[valid_icds[:,1],valid_icds[:,0],:] color_im_data = np.zeros((target_shape[0] * target_shape[1], target_shape[2]), dtype=np.uint8) color_im_data[valid_mask] = colors color_im_data[order] = color_im_data.copy() color_im_data = color_im_data.reshape(target_shape) return ColorImage(color_im_data, frame=self._frame)
def process_raw_point_cloud(self, point_cloud_cam_raw): points = pcl2.read_points(point_cloud_cam_raw, field_names=('x', 'y', 'z'), skip_nans=True) point_cloud_raw_points = [p for p in points] print "Num raw points in point cloud: " + repr( len(point_cloud_raw_points)) + " points" point_cloud_cam = PointCloud( np.transpose(np.array(point_cloud_raw_points)), self.pointCloudProcessor.CAM_FRAME) point_cloud_world = self.T_cam_world * point_cloud_cam return point_cloud_world
def plot_frame(length=1, transform=None): x = np.linspace(0, length, num=10) y = np.zeros(10) z = np.zeros(10) if transform is not None: points = PointCloud(np.stack([x, y, z]), frame=transform.from_frame) points = transform.apply(points).data x = points[0, :] y = points[1, :] z = points[2, :] l = mlab.plot3d(x, y, z, color=(1, 0, 0), tube_radius=length / 40) x = np.zeros(10) y = np.linspace(0, length, num=10) z = np.zeros(10) if transform is not None: points = PointCloud(np.stack([x, y, z]), frame=transform.from_frame) points = transform.apply(points).data x = points[0, :] y = points[1, :] z = points[2, :] l = mlab.plot3d(x, y, z, color=(0, 1, 0), tube_radius=length / 40) x = np.zeros(10) y = np.zeros(10) z = np.linspace(0, length, num=10) if transform is not None: points = PointCloud(np.stack([x, y, z]), frame=transform.from_frame) points = transform.apply(points).data x = points[0, :] y = points[1, :] z = points[2, :] l = mlab.plot3d(x, y, z, color=(0, 0, 1), tube_radius=length / 40)
def cloudCallback(msg): global T_camera_world # read the cloud cloud_array = [] for p in point_cloud2.read_points(msg): cloud_array.append([p[0], p[1], p[2]]) cloud_array = np.array(cloud_array).T # form a point cloud point_cloud_camera = PointCloud(cloud_array, frame='camera') # segment the point cloud point_cloud_world = T_camera_world * point_cloud_camera seg_point_cloud_world, valid_indices = point_cloud_world.box_mask( workspace) """ point_cloud_world.remove_infinite_points() vis3d.figure() vis3d.points(point_cloud_world, random=True, subsample=10, scale=0.001, color=(0,0,1)) vis3d.points(seg_point_cloud_world, random=True, subsample=10, scale=0.0015, color=(0,1,0)) vis3d.pose(T_camera_world) vis3d.show() IPython.embed() """ # convert to indexed cloud frame frame_id = msg.header.frame_id msg = CloudIndexed() header = Header() header.frame_id = frame_id header.stamp = rospy.Time.now() msg.cloud_sources.cloud = point_cloud2.create_cloud_xyz32( header, cloud_array.T.tolist()) msg.cloud_sources.view_points.append(Point(0, 0, 0)) for i in xrange(cloud_array.shape[1]): msg.cloud_sources.camera_source.append(Int64(0)) for i in valid_indices: msg.indices.append(Int64(i)) # publish global pub pub.publish(msg) print 'Published cloud with', len(msg.indices), 'indices'
def deproject(self, depth_image): """Deprojects a DepthImage into a PointCloud. Parameters ---------- depth_image : :obj:`DepthImage` The 2D depth image to projet into a point cloud. Returns ------- :obj:`autolab_core.PointCloud` A 3D point cloud created from the depth image. Raises ------ ValueError If depth_image is not a valid DepthImage in the same reference frame as the camera. """ # check valid input if not isinstance(depth_image, DepthImage): raise ValueError('Must provide DepthImage object for projection') if depth_image.frame != self._frame: raise ValueError( 'Cannot deproject points in frame %s from camera with frame %s' % (depth_image.frame, self._frame)) # create homogeneous pixels row_indices = np.arange(depth_image.height) col_indices = np.arange(depth_image.width) pixel_grid = np.meshgrid(col_indices, row_indices) pixels = np.c_[pixel_grid[0].flatten(), pixel_grid[1].flatten()].T depth_data = depth_image.data.flatten() pixels_homog = np.r_[pixels, depth_data.reshape(1, depth_data.shape[0])] # deproject points_3d = np.linalg.inv(self.S).dot( pixels_homog - np.tile(self.t.reshape(3, 1), [1, pixels_homog.shape[1]])) return PointCloud(data=points_3d, frame=self._frame)
def transform_pt_obj_to_grid(self, x_sdf, direction = False): """ Converts a point in sdf coords to the grid basis. If direction then don't translate. Parameters ---------- x_sdf : numpy 3xN ndarray or numeric scalar points to transform from sdf basis in meters to grid basis Returns ------- x_grid : numpy 3xN ndarray or scalar points in grid basis """ if isinstance(x_sdf, Number): return self.T_world_grid_.scale * x_sdf if direction: points_sdf = NormalCloud(x_sdf.astype(np.float32), frame='world') else: points_sdf = PointCloud(x_sdf.astype(np.float32), frame='world') x_grid = self.T_world_grid_ * points_sdf return x_grid.data
def transform_pt_grid_to_obj(self, x_grid, direction = False): """ Converts a point in grid coords to the world basis. If direction then don't translate. Parameters ---------- x_grid : numpy 3xN ndarray or numeric scalar points to transform from grid basis to sdf basis in meters Returns ------- x_sdf : numpy 3xN ndarray points in sdf basis (meters) """ if isinstance(x_grid, Number): return self.T_grid_world_.scale * x_grid if direction: points_grid = NormalCloud(x_grid.astype(np.float32), frame='grid') else: points_grid = PointCloud(x_grid.astype(np.float32), frame='grid') x_sdf = self.T_grid_world_ * points_grid return x_sdf.data
def find_clutter(pc, indices, cam_int_file): """plane_pc = pc.data.T[indices] using pc from largest_planar_surface""" plane_pc_data = pc.data.T[indices] ci = CameraIntrinsics.load(cam_int_file) plane_pc = PointCloud(plane_pc_data.T, pc.frame) di = ci.project_to_image(plane_pc) vis2d.figure() vis2d.imshow(di) vis2d.show() inv_pixel_mask = di.invalid_pixel_mask() vis2d.figure() vis2d.imshow(inv_pixel_mask) vis2d.show() #print(inv_pixel_mask.data[inv_pixel_mask.data.shape[0]//2][inv_pixel_mask.data.shape[1]//2]) clutter = [] for r in range(len(inv_pixel_mask.data)): for c in range(len(inv_pixel_mask.data[r])): if inv_pixel_mask.data[r][c] > 0: i = r * len(inv_pixel_mask.data[r]) + c clutter.append(i) return pc.data.T[clutter]
def test_bad_transformation(self, num_points=10): R_a_b = RigidTransform.random_rotation() t_a_b = RigidTransform.random_translation() T_a_b = RigidTransform(R_a_b, t_a_b, "a", "b") # bad point frame caught_bad_frame = False try: x_c = np.random.rand(3) p_c = Point(x_c, "c") T_a_b * p_c except ValueError: caught_bad_frame = True self.assertTrue( caught_bad_frame, msg="Failed to catch bad point frame" ) # bad point cloud frame caught_bad_frame = False try: x_c = np.random.rand(3, num_points) pc_c = PointCloud(x_c, "c") T_a_b * pc_c except ValueError: caught_bad_frame = True self.assertTrue( caught_bad_frame, msg="Failed to catch bad point cloud frame" ) # illegal input caught_bad_input = False try: x_a = np.random.rand(3, num_points) T_a_b * x_a except ValueError: caught_bad_input = True self.assertTrue( caught_bad_input, msg="Failed to catch numpy array input" )
def test_point_cloud_transformation(self, num_points=10): R_a_b = RigidTransform.random_rotation() t_a_b = RigidTransform.random_translation() T_a_b = RigidTransform(R_a_b, t_a_b, "a", "b") x_a = np.random.rand(3, num_points) pc_a = PointCloud(x_a, "a") # multiply with numpy arrays x_b = R_a_b.dot(x_a) + np.tile(t_a_b.reshape(3, 1), [1, num_points]) # use multiplication operator pc_b = T_a_b * pc_a self.assertTrue( np.sum(np.abs(pc_b.data - x_b)) < 1e-5, msg="Point cloud transformation incorrect:\n" "Expected:\n{}\nGot:\n{}".format(x_b, pc_b.data), ) # check frames self.assertEqual( pc_b.frame, "b", msg="Transformed point cloud has incorrect frame" )
def test_divs(self, num_points=10): data = np.random.rand(3, num_points) p_a = PointCloud(data, 'a') p_b = Point(np.random.rand(3), 'b') # div on left p_a_int = p_a / 5 assert np.allclose(p_a_int._data, p_a._data / 5) p_a_float = p_a / 2.5 assert np.allclose(p_a_float._data, p_a._data / 2.5) p_b_int = p_b / 5 assert np.allclose(p_b_int._data, p_b._data / 5) p_b_float = p_b / 2.5 assert np.allclose(p_b_float._data, p_b._data / 2.5) # div on right p_a_int = 5 / p_a assert np.allclose(p_a_int._data, 5 / p_a._data) p_a_float = 2.5 / p_a assert np.allclose(p_a_float._data, 2.5 / p_a._data) p_b_int = 5 / p_b assert np.allclose(p_b_int._data, 5 / p_b._data) p_b_float = 2.5 / p_b assert np.allclose(p_b_float._data, 2.5 / p_b._data)
grasp_indices, best_metric_indices = sorted_contacts( vertices, normals, T_ar_object) for indices in best_metric_indices[0:5]: a = grasp_indices[indices][0] b = grasp_indices[indices][1] normal1, normal2 = normals[a], normals[b] contact1, contact2 = vertices[a], vertices[b] # visualize the mesh and contacts vis.figure() vis.mesh(mesh) vis.normals( NormalCloud(np.hstack( (normal1.reshape(-1, 1), normal2.reshape(-1, 1))), frame='test'), PointCloud(np.hstack( (contact1.reshape(-1, 1), contact2.reshape(-1, 1))), frame='test')) # vis.pose(T_obj_gripper, alpha=0.05) vis.show() if BAXTER_CONNECTED: repeat = True while repeat: # come in from the top... T_obj_gripper = contacts_to_baxter_hand_pose( contact1, contact2, np.array([0, 0, -1])) execute_grasp(T_obj_gripper, T_ar_object, ar_tag) repeat = bool(raw_input("repeat?")) exit()
def get_camera_chessboard(sensor_frame, sensor_type, device_num, config): # load cfg num_transform_avg = config['num_transform_avg'] num_images = config['num_images'] sx = config['corners_x'] sy = config['corners_y'] color_image_upsample_rate = config['color_image_upsample_rate'] vis = config['vis'] # open sensor if sensor_type.lower() in CameraChessboardRegister._SENSOR_TYPES: sensor = Kinect2Sensor( device_num=device_num, frame=sensor_frame, packet_pipeline_mode=Kinect2PacketPipelineMode.CPU) else: logging.warning( 'Could not register device at %s. Sensor type %s not supported' % (sensor_frame, sensor_type)) logging.info('Registering camera %s' % (sensor_frame)) sensor.start() # repeat registration multiple times and average results R = np.zeros([3, 3]) t = np.zeros([3, 1]) points_3d_plane = PointCloud(np.zeros([3, sx * sy]), frame=sensor.ir_frame) k = 0 while k < num_transform_avg: # average a bunch of depth images together depth_ims = np.zeros([ Kinect2Sensor.DEPTH_IM_HEIGHT, Kinect2Sensor.DEPTH_IM_WIDTH, num_images ]) for i in range(num_images): small_color_im, new_depth_im, _ = sensor.frames() depth_ims[:, :, i] = new_depth_im.data med_depth_im = np.median(depth_ims, axis=2) depth_im = DepthImage(med_depth_im, sensor.ir_frame) # find the corner pixels in an upsampled version of the color image big_color_im = small_color_im.resize(color_image_upsample_rate) corner_px = CameraChessboardRegister._find_chessboard(big_color_im, sx=sx, sy=sy, vis=vis) if corner_px is None: logging.error('No chessboard detected') continue # convert back to original image small_corner_px = corner_px / color_image_upsample_rate if vis: plt.figure() plt.imshow(small_color_im.data) for i in range(sx): plt.scatter(small_corner_px[i, 0], small_corner_px[i, 1], s=25, c='b') plt.axis('off') plt.show() # project points into 3D camera_intr = sensor.ir_intrinsics points_3d = camera_intr.deproject(depth_im) # get round chessboard ind corner_px_round = np.round(small_corner_px).astype(np.uint16) corner_ind = depth_im.ij_to_linear(corner_px_round[:, 0], corner_px_round[:, 1]) if corner_ind.shape[0] != sx * sy: print 'Did not find all corners. Discarding...' continue # average 3d points points_3d_plane = (k * points_3d_plane + points_3d[corner_ind]) / (k + 1) logging.info('Registration iteration %d of %d' % (k + 1, config['num_transform_avg'])) k += 1 # fit a plane to the chessboard corners X = np.c_[points_3d_plane.x_coords, points_3d_plane.y_coords, np.ones(points_3d_plane.num_points)] y = points_3d_plane.z_coords A = X.T.dot(X) b = X.T.dot(y) w = np.linalg.inv(A).dot(b) n = np.array([w[0], w[1], -1]) n = n / np.linalg.norm(n) mean_point_plane = points_3d_plane.mean() # find x-axis of the chessboard coordinates on the fitted plane T_camera_table = RigidTransform( translation=-points_3d_plane.mean().data, from_frame=points_3d_plane.frame, to_frame='table') points_3d_centered = T_camera_table * points_3d_plane # get points along y coord_pos_y = int(math.floor(sx * (sy - 1) / 2.0)) coord_neg_y = int(math.ceil(sx * (sy + 1) / 2.0)) points_pos_y = points_3d_centered[:coord_pos_y] points_neg_y = points_3d_centered[coord_neg_y:] y_axis = np.mean(points_pos_y.data, axis=1) - np.mean( points_neg_y.data, axis=1) y_axis = y_axis - np.vdot(y_axis, n) * n y_axis = y_axis / np.linalg.norm(y_axis) x_axis = np.cross(-n, y_axis) # WARNING! May need symmetry breaking but it appears the points are ordered consistently # produce translation and rotation from plane center and chessboard basis rotation_cb_camera = rotation_from_axes(x_axis, y_axis, n) translation_cb_camera = mean_point_plane.data T_cb_camera = RigidTransform(rotation=rotation_cb_camera, translation=translation_cb_camera, from_frame='cb', to_frame=sensor.frame) T_camera_cb = T_cb_camera.inverse() cb_points_cam = points_3d[corner_ind] # optionally display cb corners with detected pose in 3d space if config['debug']: # display image with axes overlayed cb_center_im = camera_intr.project( Point(T_cb_camera.translation, frame=sensor.ir_frame)) cb_x_im = camera_intr.project( Point(T_cb_camera.translation + T_cb_camera.x_axis * config['scale_amt'], frame=sensor.ir_frame)) cb_y_im = camera_intr.project( Point(T_cb_camera.translation + T_cb_camera.y_axis * config['scale_amt'], frame=sensor.ir_frame)) cb_z_im = camera_intr.project( Point(T_cb_camera.translation + T_cb_camera.z_axis * config['scale_amt'], frame=sensor.ir_frame)) x_line = np.array([cb_center_im.data, cb_x_im.data]) y_line = np.array([cb_center_im.data, cb_y_im.data]) z_line = np.array([cb_center_im.data, cb_z_im.data]) plt.figure() plt.imshow(small_color_im.data) plt.scatter(cb_center_im.data[0], cb_center_im.data[1]) plt.plot(x_line[:, 0], x_line[:, 1], c='r', linewidth=3) plt.plot(y_line[:, 0], y_line[:, 1], c='g', linewidth=3) plt.plot(z_line[:, 0], z_line[:, 1], c='b', linewidth=3) plt.axis('off') plt.title('Chessboard frame in camera %s' % (sensor.frame)) plt.show() return T_camera_cb, cb_points_cam, points_3d_plane
num_clusters = len(cluster_indices) obj_segmask_data = np.zeros(depth_im.shape) # Read out all points in large clusters. cur_i = 0 for j, indices in enumerate(cluster_indices): num_points = len(indices) points = np.zeros([3, num_points]) for i, index in enumerate(indices): points[0, i] = pcl_cloud[index][0] points[1, i] = pcl_cloud[index][1] points[2, i] = pcl_cloud[index][2] segment = PointCloud(points, frame=camera_intr.frame) depth_segment = camera_intr.project_to_image(segment) obj_segmask_data[depth_segment.data > 0] = j + 1 obj_segmask = SegmentationImage(obj_segmask_data.astype(np.uint8)) obj_segmask = obj_segmask.mask_binary(segmask) # Inpaint. depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) if "input_images" in policy_config["vis"] and policy_config["vis"][ "input_images"]: vis.figure(size=(10, 10)) num_plot = 3 vis.subplot(1, num_plot, 1) vis.imshow(depth_im) vis.subplot(1, num_plot, 2)
def preprocess_images(raw_color_im, raw_depth_im, camera_intr, T_camera_world, workspace_box, workspace_im, image_proc_config): """ Preprocess a set of color and depth images. """ # read params inpaint_rescale_factor = image_proc_config['inpaint_rescale_factor'] cluster = image_proc_config['cluster'] cluster_tolerance = image_proc_config['cluster_tolerance'] min_cluster_size = image_proc_config['min_cluster_size'] max_cluster_size = image_proc_config['max_cluster_size'] # deproject into 3D world coordinates point_cloud_cam = camera_intr.deproject(raw_depth_im) point_cloud_cam.remove_zero_points() point_cloud_world = T_camera_world * point_cloud_cam # compute the segmask for points above the box seg_point_cloud_world, _ = point_cloud_world.box_mask(workspace_box) seg_point_cloud_cam = T_camera_world.inverse() * seg_point_cloud_world depth_im_seg = camera_intr.project_to_image(seg_point_cloud_cam) # mask out objects in the known workspace env_pixels = depth_im_seg.pixels_farther_than(workspace_im) depth_im_seg._data[env_pixels[:,0], env_pixels[:,1]] = 0 # REMOVE NOISE # clip low points low_indices = np.where(point_cloud_world.data[2,:] < workspace_box.min_pt[2])[0] point_cloud_world.data[2,low_indices] = workspace_box.min_pt[2] # clip high points high_indices = np.where(point_cloud_world.data[2,:] > workspace_box.max_pt[2])[0] point_cloud_world.data[2,high_indices] = workspace_box.max_pt[2] # segment out the region in the workspace (including the table) workspace_point_cloud_world, valid_indices = point_cloud_world.box_mask(workspace_box) invalid_indices = np.setdiff1d(np.arange(point_cloud_world.num_points), valid_indices) if cluster: # create new cloud pcl_cloud = pcl.PointCloud(workspace_point_cloud_world.data.T.astype(np.float32)) tree = pcl_cloud.make_kdtree() # find large clusters (likely to be real objects instead of noise) ec = pcl_cloud.make_EuclideanClusterExtraction() ec.set_ClusterTolerance(cluster_tolerance) ec.set_MinClusterSize(min_cluster_size) ec.set_MaxClusterSize(max_cluster_size) ec.set_SearchMethod(tree) cluster_indices = ec.Extract() num_clusters = len(cluster_indices) # read out all points in large clusters filtered_points = np.zeros([3,workspace_point_cloud_world.num_points]) cur_i = 0 for j, indices in enumerate(cluster_indices): num_points = len(indices) points = np.zeros([3,num_points]) for i, index in enumerate(indices): points[0,i] = pcl_cloud[index][0] points[1,i] = pcl_cloud[index][1] points[2,i] = pcl_cloud[index][2] filtered_points[:,cur_i:cur_i+num_points] = points.copy() cur_i = cur_i + num_points # reconstruct the point cloud all_points = np.c_[filtered_points[:,:cur_i], point_cloud_world.data[:,invalid_indices]] else: all_points = point_cloud_world.data filtered_point_cloud_world = PointCloud(all_points, frame='world') # compute the filtered depth image filtered_point_cloud_cam = T_camera_world.inverse() * filtered_point_cloud_world depth_im = camera_intr.project_to_image(filtered_point_cloud_cam) # form segmask segmask = depth_im_seg.to_binary() valid_px_segmask = depth_im.invalid_pixel_mask().inverse() segmask = segmask.mask_binary(valid_px_segmask) # inpaint color_im = raw_color_im.inpaint(rescale_factor=inpaint_rescale_factor) depth_im = depth_im.inpaint(rescale_factor=inpaint_rescale_factor) return color_im, depth_im, segmask
def points(points, T_points_world=None, color=np.array([0, 1, 0]), scale=0.01, n_cuts=20, subsample=None, random=False, name=None): """Scatter a point cloud in pose T_points_world. Parameters ---------- points : autolab_core.BagOfPoints or (n,3) float The point set to visualize. T_points_world : autolab_core.RigidTransform Pose of points, specified as a transformation from point frame to world frame. color : (3,) or (n,3) float Color of whole cloud or per-point colors scale : float Radius of each point. n_cuts : int Number of longitude/latitude lines on sphere points. subsample : int Parameter of subsampling to display fewer points. name : str A name for the object to be added. """ if isinstance(points, BagOfPoints): if points.dim != 3: raise ValueError('BagOfPoints must have dimension 3xN!') else: if type(points) is not np.ndarray: raise ValueError( 'Points visualizer expects BagOfPoints or numpy array!') if len(points.shape) == 1: points = points[:, np.newaxis].T if len(points.shape) != 2 or points.shape[1] != 3: raise ValueError( 'Numpy array of points must have dimension (N,3)') frame = 'points' if T_points_world: frame = T_points_world.from_frame points = PointCloud(points.T, frame=frame) color = np.array(color) if subsample is not None: num_points = points.num_points points, inds = points.subsample(subsample, random=random) if color.shape[0] == num_points and color.shape[1] == 3: color = color[inds, :] # transform into world frame if points.frame != 'world': if T_points_world is None: T_points_world = RigidTransform(from_frame=points.frame, to_frame='world') points_world = T_points_world * points else: points_world = points point_data = points_world.data if len(point_data.shape) == 1: point_data = point_data[:, np.newaxis] point_data = point_data.T mpcolor = color if len(color.shape) > 1: mpcolor = color[0] mp = MaterialProperties(color=np.array(mpcolor), k_a=0.5, k_d=0.3, k_s=0.0, alpha=10.0, smooth=True) # For each point, create a sphere of the specified color and size. sphere = trimesh.creation.uv_sphere(scale, [n_cuts, n_cuts]) raw_pose_data = np.tile(np.eye(4), (points.num_points, 1)) raw_pose_data[3::4, :3] = points.data.T instcolor = None if color.shape[0] == points.num_points and color.shape[1] == 3: instcolor = color obj = InstancedSceneObject(sphere, raw_pose_data=raw_pose_data, colors=instcolor, material=mp) if name is None: name = str(uuid.uuid4()) Visualizer3D._scene.add_object(name, obj)
break # store clicked pt in 3D logging.info('Point collection complete') depth = depth_im[clicked_pt[1], clicked_pt[0]] p = ir_intrinsics.deproject_pixel(depth, Point(clicked_pt, frame=ir_intrinsics.frame)) robot_points_camera.append(p.data) # reset y.reset_home() y.stop() # collect true_robot_points_world = PointCloud(np.array([T.translation for T in robot_poses]).T, frame=ir_intrinsics.frame) est_robot_points_world = T_camera_world * PointCloud(np.array(robot_points_camera).T, frame=ir_intrinsics.frame) mean_true_robot_point = np.mean(true_robot_points_world.data, axis=1).reshape(3,1) mean_est_robot_point = np.mean(est_robot_points_world.data, axis=1).reshape(3,1) # fit a plane best_R_cb_world = None best_dist = np.inf k = 0 K = 25 num_poses = len(robot_poses) sample_size = int(num_poses * 0.3) min_inliers = int(num_poses * 0.6) dist_thresh = 0.0015 true_robot_points_world._data = true_robot_points_world._data - mean_true_robot_point
def register(sensor, config): """ Registers a camera to a chessboard. Parameters ---------- sensor : :obj:`perception.RgbdSensor` the sensor to register config : :obj:`autolab_core.YamlConfig` or :obj:`dict` configuration file for registration Returns ------- :obj:`ChessboardRegistrationResult` the result of registration Notes ----- The config must have the parameters specified in the Other Parameters section. Other Parameters ---------------- num_transform_avg : int the number of independent registrations to average together num_images : int the number of images to read for each independent registration corners_x : int the number of chessboard corners in the x-direction corners_y : int the number of chessboard corners in the y-direction color_image_rescale_factor : float amount to rescale the color image for detection (numbers around 4-8 are useful) vis : bool whether or not to visualize the registration """ # read config num_transform_avg = config['num_transform_avg'] num_images = config['num_images'] sx = config['corners_x'] sy = config['corners_y'] point_order = config['point_order'] color_image_rescale_factor = config['color_image_rescale_factor'] flip_normal = config['flip_normal'] y_points_left = False if 'y_points_left' in config.keys() and sx == sy: y_points_left = config['y_points_left'] num_images = 1 vis = config['vis'] # read params from sensor logging.info('Registering camera %s' %(sensor.frame)) ir_intrinsics = sensor.ir_intrinsics # repeat registration multiple times and average results R = np.zeros([3,3]) t = np.zeros([3,1]) points_3d_plane = PointCloud(np.zeros([3, sx*sy]), frame=sensor.ir_frame) k = 0 while k < num_transform_avg: # average a bunch of depth images together depth_ims = None for i in range(num_images): start = time.time() small_color_im, new_depth_im, _ = sensor.frames() end = time.time() logging.info('Frames Runtime: %.3f' %(end-start)) if depth_ims is None: depth_ims = np.zeros([new_depth_im.height, new_depth_im.width, num_images]) depth_ims[:,:,i] = new_depth_im.data med_depth_im = np.median(depth_ims, axis=2) depth_im = DepthImage(med_depth_im, sensor.ir_frame) # find the corner pixels in an upsampled version of the color image big_color_im = small_color_im.resize(color_image_rescale_factor) corner_px = big_color_im.find_chessboard(sx=sx, sy=sy) if vis: plt.figure() plt.imshow(big_color_im.data) for i in range(sx): plt.scatter(corner_px[i,0], corner_px[i,1], s=25, c='b') plt.show() if corner_px is None: logging.error('No chessboard detected! Check camera exposure settings') continue # convert back to original image small_corner_px = corner_px / color_image_rescale_factor if vis: plt.figure() plt.imshow(small_color_im.data) for i in range(sx): plt.scatter(small_corner_px[i,0], small_corner_px[i,1], s=25, c='b') plt.axis('off') plt.show() # project points into 3D camera_intr = sensor.ir_intrinsics points_3d = camera_intr.deproject(depth_im) # get round chessboard ind corner_px_round = np.round(small_corner_px).astype(np.uint16) corner_ind = depth_im.ij_to_linear(corner_px_round[:,0], corner_px_round[:,1]) if corner_ind.shape[0] != sx*sy: logging.warning('Did not find all corners. Discarding...') continue # average 3d points points_3d_plane = (k * points_3d_plane + points_3d[corner_ind]) / (k + 1) logging.info('Registration iteration %d of %d' %(k+1, config['num_transform_avg'])) k += 1 # fit a plane to the chessboard corners X = np.c_[points_3d_plane.x_coords, points_3d_plane.y_coords, np.ones(points_3d_plane.num_points)] y = points_3d_plane.z_coords A = X.T.dot(X) b = X.T.dot(y) w = np.linalg.inv(A).dot(b) n = np.array([w[0], w[1], -1]) n = n / np.linalg.norm(n) if flip_normal: n = -n mean_point_plane = points_3d_plane.mean() # find x-axis of the chessboard coordinates on the fitted plane T_camera_table = RigidTransform(translation = -points_3d_plane.mean().data, from_frame=points_3d_plane.frame, to_frame='table') points_3d_centered = T_camera_table * points_3d_plane # get points along y if point_order == 'row_major': coord_pos_x = int(math.floor(sx*sy/2.0)) coord_neg_x = int(math.ceil(sx*sy/2.0)) points_pos_x = points_3d_centered[coord_pos_x:] points_neg_x = points_3d_centered[:coord_neg_x] x_axis = np.mean(points_pos_x.data, axis=1) - np.mean(points_neg_x.data, axis=1) x_axis = x_axis - np.vdot(x_axis, n)*n x_axis = x_axis / np.linalg.norm(x_axis) y_axis = np.cross(n, x_axis) else: coord_pos_y = int(math.floor(sx*(sy-1)/2.0)) coord_neg_y = int(math.ceil(sx*(sy+1)/2.0)) points_pos_y = points_3d_centered[:coord_pos_y] points_neg_y = points_3d_centered[coord_neg_y:] y_axis = np.mean(points_pos_y.data, axis=1) - np.mean(points_neg_y.data, axis=1) y_axis = y_axis - np.vdot(y_axis, n)*n y_axis = y_axis / np.linalg.norm(y_axis) x_axis = np.cross(-n, y_axis) # produce translation and rotation from plane center and chessboard basis rotation_cb_camera = RigidTransform.rotation_from_axes(x_axis, y_axis, n) translation_cb_camera = mean_point_plane.data T_cb_camera = RigidTransform(rotation=rotation_cb_camera, translation=translation_cb_camera, from_frame='cb', to_frame=sensor.frame) if y_points_left and np.abs(T_cb_camera.y_axis[1]) > 0.1: if T_cb_camera.x_axis[0] > 0: T_cb_camera.rotation = T_cb_camera.rotation.dot(RigidTransform.z_axis_rotation(-np.pi/2).T) else: T_cb_camera.rotation = T_cb_camera.rotation.dot(RigidTransform.z_axis_rotation(np.pi/2).T) T_camera_cb = T_cb_camera.inverse() # optionally display cb corners with detected pose in 3d space if config['debug']: # display image with axes overlayed cb_center_im = camera_intr.project(Point(T_cb_camera.translation, frame=sensor.ir_frame)) cb_x_im = camera_intr.project(Point(T_cb_camera.translation + T_cb_camera.x_axis * config['scale_amt'], frame=sensor.ir_frame)) cb_y_im = camera_intr.project(Point(T_cb_camera.translation + T_cb_camera.y_axis * config['scale_amt'], frame=sensor.ir_frame)) cb_z_im = camera_intr.project(Point(T_cb_camera.translation + T_cb_camera.z_axis * config['scale_amt'], frame=sensor.ir_frame)) x_line = np.array([cb_center_im.data, cb_x_im.data]) y_line = np.array([cb_center_im.data, cb_y_im.data]) z_line = np.array([cb_center_im.data, cb_z_im.data]) plt.figure() plt.imshow(small_color_im.data) plt.scatter(cb_center_im.data[0], cb_center_im.data[1]) plt.plot(x_line[:,0], x_line[:,1], c='r', linewidth=3) plt.plot(y_line[:,0], y_line[:,1], c='g', linewidth=3) plt.plot(z_line[:,0], z_line[:,1], c='b', linewidth=3) plt.axis('off') plt.title('Chessboard frame in camera %s' %(sensor.frame)) plt.show() return ChessboardRegistrationResult(T_camera_cb, points_3d_plane)
def transform_dense(self, delta_T, detailed = False): """ Transform the grid by pose T and scale with canonical reference frame at the SDF center with axis alignment. Parameters ---------- delta_T : SimilarityTransform the transformation from the current frame of reference to the new frame of reference detailed : bool whether or not to use interpolation Returns ------- :obj:`Sdf3D` new sdf with grid warped by T """ # map all surface points to their new location start_t = time.clock() # form points array if self.pts_ is None: [x_ind, y_ind, z_ind] = np.indices(self.dims_) self.pts_ = np.c_[x_ind.flatten().T, np.c_[y_ind.flatten().T, z_ind.flatten().T]].astype(np.float32) # transform points num_pts = self.pts_.shape[0] pts_sdf = self.T_grid_sdf_ * PointCloud(self.pts_.T, frame='grid') pts_sdf_tf = delta_T.as_frames('sdf', 'sdf') * pts_sdf pts_grid_tf = self.T_sdf_grid_ * pts_sdf_tf pts_tf = pts_grid_tf.data.T all_points_t = time.clock() # transform the center origin_sdf = self.T_grid_sdf_ * Point(self.origin_, frame='grid') origin_sdf_tf = delta_T.as_frames('sdf', 'sdf') * origin_sdf origin_tf = self.T_sdf_grid_ * origin_sdf_tf origin_tf = origin_tf.data # use same resolution (since indices will be rescaled) resolution_tf = self.resolution_ origin_res_t = time.clock() # add each point to the new pose if detailed: sdf_data_tf = np.zeros([num_pts, 1]) for i in range(num_pts): sdf_data_tf[i] = self[pts_tf[i,0], pts_tf[i,1], pts_tf[i,2]] else: pts_tf_round = np.round(pts_tf).astype(np.int64) # snap to closest boundary pts_tf_round[:,0] = np.max(np.c_[np.zeros([num_pts, 1]), pts_tf_round[:,0]], axis=1) pts_tf_round[:,0] = np.min(np.c_[(self.dims_[0] - 1) * np.ones([num_pts, 1]), pts_tf_round[:,0]], axis=1) pts_tf_round[:,1] = np.max(np.c_[np.zeros([num_pts, 1]), pts_tf_round[:,1]], axis=1) pts_tf_round[:,1] = np.min(np.c_[(self.dims_[1] - 1) * np.ones([num_pts, 1]), pts_tf_round[:,1]], axis=1) pts_tf_round[:,2] = np.max(np.c_[np.zeros([num_pts, 1]), pts_tf_round[:,2]], axis=1) pts_tf_round[:,2] = np.min(np.c_[(self.dims_[2] - 1) * np.ones([num_pts, 1]), pts_tf_round[:,2]], axis=1) sdf_data_tf = self.data_[pts_tf_round[:,0], pts_tf_round[:,1], pts_tf_round[:,2]] sdf_data_tf_grid = sdf_data_tf.reshape(self.dims_) tf_t = time.clock() logging.debug('Sdf3D: Time to transform coords: %f' %(all_points_t - start_t)) logging.debug('Sdf3D: Time to transform origin: %f' %(origin_res_t - all_points_t)) logging.debug('Sdf3D: Time to transfer sd: %f' %(tf_t - origin_res_t)) return Sdf3D(sdf_data_tf_grid, origin_tf, resolution_tf, use_abs=self._use_abs_, T_sdf_world=self.T_sdf_world_)
def test_inits(self, num_points=10): # basic init data = np.random.rand(3, num_points) p_a = PointCloud(data, 'a') self.assertTrue(np.abs(data.shape[0] - p_a.shape[0]) < 1e-5, msg='BagOfPoints has incorrect shape') self.assertTrue(np.abs(data.shape[1] - p_a.shape[1]) < 1e-5, msg='BagOfPoints has incorrect shape') self.assertTrue(np.sum(np.abs(data - p_a.data)) < 1e-5, msg='BagOfPoints has incorrect data') self.assertTrue(np.abs(data.shape[0] - p_a.dim) < 1e-5, msg='BagOfPoints has incorrect dim') self.assertTrue(np.abs(data.shape[1] - p_a.num_points) < 1e-5, msg='BagOfPoints has incorrect num points') self.assertEqual('a', p_a.frame, msg='BagOfPoints has incorrect frame') # point init with multiple points caught_bad_init = False try: data = np.random.rand(3, num_points) p_a = Point(data, 'a') except ValueError as e: caught_bad_init = True self.assertTrue( caught_bad_init, msg='Failed to catch point init with more than one point') # point init with bad dim caught_bad_init = False try: data = np.random.rand(3, 3) p_a = Point(data, 'a') except ValueError as e: caught_bad_init = True self.assertTrue(caught_bad_init, msg='Failed to catch point init with 3x3') # point cloud with bad shape caught_bad_init = False try: data = np.random.rand(3, 3, 3) p_a = PointCloud(data, 'a') except ValueError as e: caught_bad_init = True self.assertTrue(caught_bad_init, msg='Failed to catch point cloud init with 3x3x3') # point cloud with bad dim caught_bad_init = False try: data = np.random.rand(4, num_points) p_a = PointCloud(data, 'a') except ValueError as e: caught_bad_init = True self.assertTrue(caught_bad_init, msg='Failed to catch point cloud init with 4x%d' % (num_points)) # point cloud with bad type caught_bad_init = False try: data = 100 * np.random.rand(3, num_points).astype(np.uint8) p_a = PointCloud(data, 'a') except ValueError as e: caught_bad_init = True self.assertTrue(caught_bad_init, msg='Failed to catch point cloud init with uint type') # image coords with bad type caught_bad_init = False try: data = np.random.rand(2, num_points) p_a = ImageCoords(data, 'a') except ValueError as e: caught_bad_init = True self.assertTrue( caught_bad_init, msg='Failed to catch image coords init with float type') # image coords with bad dim caught_bad_init = False try: data = 100 * np.random.rand(3, num_points).astype(np.uint16) p_a = ImageCoords(data, 'a') except ValueError as e: caught_bad_init = True self.assertTrue(caught_bad_init, msg='Failed to catch image coords init with 3xN array') # rgb coordinate with bad type caught_bad_init = False try: data = np.random.rand(3, num_points) p_a = RgbCloud(data, 'a') except ValueError as e: caught_bad_init = True self.assertTrue(caught_bad_init, msg='Failed to catch rgb cloud init with float type') # image coords with bad dim caught_bad_init = False try: data = 100 * np.random.rand(4, num_points).astype(np.uint16) p_a = RgbCloud(data, 'a') except ValueError as e: caught_bad_init = True self.assertTrue(caught_bad_init, msg='Failed to catch rgb cloud init with 4xN array')
def detect(detector_type, config, run_dir, test_config): """Run PCL-based detection on a depth-image-based dataset. Parameters ---------- config : dict config for a PCL detector run_dir : str Directory to save outputs in. Output will be saved in pred_masks, pred_info, and modal_segmasks_processed subdirectories. test_config : dict config containing dataset information """ ################################################################## # Set up output directories ################################################################## # Create subdirectory for prediction masks pred_dir = os.path.join(run_dir, 'pred_masks') mkdir_if_missing(pred_dir) # Create subdirectory for prediction scores & bboxes pred_info_dir = os.path.join(run_dir, 'pred_info') mkdir_if_missing(pred_info_dir) # Create subdirectory for transformed GT segmasks resized_segmask_dir = os.path.join(run_dir, 'modal_segmasks_processed') mkdir_if_missing(resized_segmask_dir) ################################################################## # Set up input directories ################################################################## dataset_dir = test_config['path'] indices_arr = np.load(os.path.join(dataset_dir, test_config['indices'])) # Input depth image data (numpy files, not .pngs) depth_dir = os.path.join(dataset_dir, test_config['images']) # Input GT binary masks dir gt_mask_dir = os.path.join(dataset_dir, test_config['masks']) # Input binary mask data if 'bin_masks' in test_config.keys(): bin_mask_dir = os.path.join(dataset_dir, test_config['bin_masks']) # Input camera intrinsics camera_intrinsics_fn = os.path.join(dataset_dir, 'camera_intrinsics.intr') camera_intrs = CameraIntrinsics.load(camera_intrinsics_fn) image_ids = np.arange(indices_arr.size) ################################################################## # Process each image ################################################################## for image_id in tqdm(image_ids): base_name = 'image_{:06d}'.format(indices_arr[image_id]) output_name = 'image_{:06d}'.format(image_id) depth_image_fn = base_name + '.npy' # Extract depth image depth_data = np.load(os.path.join(depth_dir, depth_image_fn)) depth_im = DepthImage(depth_data, camera_intrs.frame) depth_im = depth_im.inpaint(0.25) # Mask out bin pixels if appropriate/necessary if bin_mask_dir: mask_im = BinaryImage.open( os.path.join(bin_mask_dir, base_name + '.png'), camera_intrs.frame) mask_im = mask_im.resize(depth_im.shape[:2]) depth_im = depth_im.mask_binary(mask_im) point_cloud = camera_intrs.deproject(depth_im) point_cloud.remove_zero_points() pcl_cloud = pcl.PointCloud(point_cloud.data.T.astype(np.float32)) tree = pcl_cloud.make_kdtree() if detector_type == 'euclidean': segmentor = pcl_cloud.make_EuclideanClusterExtraction() segmentor.set_ClusterTolerance(config['tolerance']) elif detector_type == 'region_growing': segmentor = pcl_cloud.make_RegionGrowing(ksearch=50) segmentor.set_NumberOfNeighbours(config['n_neighbors']) segmentor.set_CurvatureThreshold(config['curvature']) segmentor.set_SmoothnessThreshold(config['smoothness']) else: print('PCL detector type not supported') exit() segmentor.set_MinClusterSize(config['min_cluster_size']) segmentor.set_MaxClusterSize(config['max_cluster_size']) segmentor.set_SearchMethod(tree) cluster_indices = segmentor.Extract() # Set up predicted masks and metadata indiv_pred_masks = [] r_info = { 'rois': [], 'scores': [], 'class_ids': [], } for i, cluster in enumerate(cluster_indices): points = pcl_cloud.to_array()[cluster] indiv_pred_mask = camera_intrs.project_to_image( PointCloud(points.T, frame=camera_intrs.frame)).to_binary() indiv_pred_mask.data[indiv_pred_mask.data > 0] = 1 indiv_pred_masks.append(indiv_pred_mask.data) # Compute bounding box, score, class_id nonzero_pix = np.nonzero(indiv_pred_mask.data) min_x, max_x = np.min(nonzero_pix[1]), np.max(nonzero_pix[1]) min_y, max_y = np.min(nonzero_pix[0]), np.max(nonzero_pix[0]) r_info['rois'].append([min_y, min_x, max_y, max_x]) r_info['scores'].append(1.0) r_info['class_ids'].append(1) r_info['rois'] = np.array(r_info['rois']) r_info['scores'] = np.array(r_info['scores']) r_info['class_ids'] = np.array(r_info['class_ids']) # Write the predicted masks and metadata if indiv_pred_masks: pred_mask_output = np.stack(indiv_pred_masks).astype(np.uint8) else: pred_mask_output = np.array(indiv_pred_masks).astype(np.uint8) # Save out ground-truth mask as array of shape (n, h, w) indiv_gt_masks = [] gt_mask = cv2.imread(os.path.join(gt_mask_dir, base_name + '.png')) gt_mask = cv2.resize(gt_mask, (depth_im.shape[1], depth_im.shape[0])).astype( np.uint8)[:, :, 0] num_gt_masks = np.max(gt_mask) for i in range(1, num_gt_masks + 1): indiv_gt_mask = (gt_mask == i) if np.any(indiv_gt_mask): indiv_gt_masks.append(gt_mask == i) gt_mask_output = np.stack(indiv_gt_masks) np.save(os.path.join(resized_segmask_dir, output_name + '.npy'), gt_mask_output) np.save(os.path.join(pred_dir, output_name + '.npy'), pred_mask_output) np.save(os.path.join(pred_info_dir, output_name + '.npy'), r_info) print('Saved prediction masks to:\t {}'.format(pred_dir)) print('Saved prediction info (bboxes, scores, classes) to:\t {}'.format( pred_info_dir)) print('Saved transformed GT segmasks to:\t {}'.format(resized_segmask_dir)) return pred_dir, pred_info_dir, resized_segmask_dir