def generate_tsdf_3d_ewa_image(depth_image, camera, camera_extrinsic_matrix=np.eye( 4, dtype=np.float32), field_shape=np.array([128, 128, 128]), default_value=1, voxel_size=0.004, array_offset=np.array([-64, -64, 64]), narrow_band_width_voxels=20, back_cutoff_voxels=np.inf, gaussian_covariance_scale=1.0): """ Generate 3D TSDF field based on elliptical Gaussian averages (EWA) of depth values from the provided image. Elliptical Gaussian filters are projected from spherical 3D Gaussian functions onto the depth image and convolved with a circular 2D Gaussain filter before averaging the depth values. :type depth_image: np.ndarray :param depth_image: depth image to use :type camera: calib.camera.DepthCamera :param camera: camera used to generate the depth image :param voxel_size: voxel size, in meters :param array_offset: offset of the TSDF grid from the world origin :param camera_extrinsic_matrix: matrix representing transformation of the camera (incl. rotation and translation) [ R | T] [ 0 | 1] :param default_value: default initial TSDF value :param field_shape: shape of the TSDF grid to generate :param narrow_band_width_voxels: span (in voxels) where signed distance is between -1 and 1 :param back_cutoff_voxels: where to truncate the negative voxel values (currently not supported!) :param gaussian_covariance_scale: scale of elliptical gaussians (relative to voxel size) :return: resulting 3D TSDF """ # TODO: use back_cutoff_voxels for additional limit on # "if signed_distance < -narrow_band_half_width" (maybe?) if default_value == 1: field = np.ones(field_shape, dtype=np.float32) elif default_value == 0: field = np.zeros(field_shape, dtype=np.float32) else: field = np.ndarray(field_shape, dtype=np.float32) field.fill(default_value) camera_intrinsic_matrix = camera.intrinsics.intrinsic_matrix depth_ratio = camera.depth_unit_ratio narrow_band_half_width = narrow_band_width_voxels / 2 * voxel_size # in metric units w_voxel = 1.0 camera_rotation_matrix = camera_extrinsic_matrix[0:3, 0:3] covariance_voxel_sphere_world_space = np.eye(3) * ( gaussian_covariance_scale * voxel_size) covariance_camera_space = camera_rotation_matrix.dot(covariance_voxel_sphere_world_space) \ .dot(camera_rotation_matrix.T) image_space_scaling_matrix = camera.intrinsics.intrinsic_matrix[0:2, 0:2] squared_radius_threshold = 4.0 * gaussian_covariance_scale * voxel_size for z_field in range(field_shape[2]): for y_field in range(field_shape[1]): for x_field in range(field_shape[0]): # coordinates deliberately flipped here to maintain consistency between Python & C++ implementations # Eigen Tensors being used are column-major, whereas here we use row-major layout by default x_voxel = (z_field + array_offset[0]) * voxel_size y_voxel = (y_field + array_offset[1]) * voxel_size z_voxel = (x_field + array_offset[2]) * voxel_size voxel_world = np.array([[x_voxel, y_voxel, z_voxel, w_voxel]], dtype=np.float32).T voxel_camera = camera_extrinsic_matrix.dot( voxel_world).flatten()[:3] if voxel_camera[2] <= near_clipping_distance: continue # distance along ray from camera to voxel center ray_distance = np.linalg.norm(voxel_camera) # squared distance along optical axis from camera to voxel z_cam_squared = voxel_camera[2]**2 inv_z_cam = 1 / voxel_camera[2] projection_jacobian = \ np.array([[inv_z_cam, 0, -voxel_camera[0] / z_cam_squared], [0, inv_z_cam, -voxel_camera[1] / z_cam_squared], [voxel_camera[0] / ray_distance, voxel_camera[1] / ray_distance, voxel_camera[2] / ray_distance]]) remapped_covariance = projection_jacobian.dot(covariance_camera_space) \ .dot(projection_jacobian.T) final_covariance = image_space_scaling_matrix.dot( remapped_covariance[0:2, 0:2]).dot( image_space_scaling_matrix.T) + np.eye(2) Q = np.linalg.inv(final_covariance) gaussian = eg.EllipticalGaussian( eg.ImplicitEllipse(Q=Q, F=squared_radius_threshold)) voxel_image = (camera_intrinsic_matrix.dot(voxel_camera) / voxel_camera[2])[:2] voxel_image = voxel_image.reshape(-1, 1) bounds_max = gaussian.ellipse.get_bounds() result = find_sampling_bounds_helper(bounds_max, depth_image, voxel_image) if result is None: continue else: (start_x, end_x, start_y, end_y) = result weights_sum = 0.0 depth_sum = 0 for y_sample in range(start_y, end_y): for x_sample in range(start_x, end_x): sample_centered = np.array( [[x_sample], [y_sample]], dtype=np.float64) - voxel_image dist_sq = gaussian.get_distance_from_center_squared( sample_centered) if dist_sq > squared_radius_threshold: continue weight = gaussian.compute(dist_sq) surface_depth = depth_image[y_sample, x_sample] * depth_ratio if surface_depth <= 0.0: continue depth_sum += weight * surface_depth weights_sum += weight if depth_sum <= 0.0: continue final_depth = depth_sum / weights_sum signed_distance = final_depth - voxel_camera[2] field[z_field, y_field, x_field] = common.compute_tsdf_value( signed_distance, narrow_band_half_width) return field
def generate_tsdf_2d_ewa_tsdf_inclusive(depth_image, camera, image_y_coordinate, camera_extrinsic_matrix=np.eye( 4, dtype=np.float32), field_size=128, default_value=1, voxel_size=0.004, array_offset=np.array([-64, -64, 64]), narrow_band_width_voxels=20, back_cutoff_voxels=np.inf, gaussian_covariance_scale=1.0): """ Generate 2D TSDF field based on elliptical Gaussian averages (EWA) of TSDF values based on corresponding depth values from the provided image. When the sampling range for a particular voxel partially falls outside the image, tsdf value of 1.0 is used during averaging for points that are outside. Elliptical Gaussian filters are projected from spherical 3D Gaussian functions onto the depth image and convolved with a circular 2D Gaussain filter before averaging the depth values. :param narrow_band_width_voxels: desired width, in voxels, of the narrow band (non-truncated region) of the TSDF :param array_offset: assumes camera is at array_offset voxels relative to TSDF grid :param camera_extrinsic_matrix: matrix representing transformation of the camera (incl. rotation and translation) [ R | T] [ 0 | 1] :param voxel_size: voxel size, in meters :param default_value: default initial TSDF value :param field_size: :param depth_image: :type depth_image: np.ndarray :param camera: :type camera: calib.camera.DepthCamera :param image_y_coordinate: pixel row in the depth image to use for TSDF generation :type image_y_coordinate: int :param gaussian_covariance_scale: scaling of the 3D gaussian, which controls the amount of smoothing :type gaussian_covariance_scale: float :return: resulting 2D TSDF """ # TODO: use back_cutoff_voxels for additional limit if default_value == 1: field = np.ones((field_size, field_size), dtype=np.float32) elif default_value == 0: field = np.zeros((field_size, field_size), dtype=np.float32) else: field = np.ndarray((field_size, field_size), dtype=np.float32) field.fill(default_value) camera_intrinsic_matrix = camera.intrinsics.intrinsic_matrix depth_ratio = camera.depth_unit_ratio narrow_band_half_width = narrow_band_width_voxels / 2 * voxel_size # in metric units w_voxel = 1.0 y_voxel = 0 camera_rotation_matrix = camera_extrinsic_matrix[0:3, 0:3] covariance_voxel_sphere_world_space = np.eye(3) * ( gaussian_covariance_scale * voxel_size) covariance_camera_space = camera_rotation_matrix.dot(covariance_voxel_sphere_world_space) \ .dot(camera_rotation_matrix.T) image_space_scaling_matrix = camera_intrinsic_matrix[0:2, 0:2].copy() squared_radius_threshold = 4.0 * gaussian_covariance_scale * voxel_size for y_field in range(field_size): for x_field in range(field_size): x_voxel = (x_field + array_offset[0]) * voxel_size z_voxel = (y_field + array_offset[2]) * voxel_size voxel_world = np.array([[x_voxel, y_voxel, z_voxel, w_voxel]], dtype=np.float32).T voxel_camera = camera_extrinsic_matrix.dot( voxel_world).flatten()[:3] if voxel_camera[2] <= near_clipping_distance: continue voxel_image = (camera_intrinsic_matrix.dot(voxel_camera) / voxel_camera[2])[:2] voxel_image[1] = image_y_coordinate voxel_image = voxel_image.reshape(-1, 1) x_image = voxel_image[0] y_image = voxel_image[1] margin = 3 if y_image < -margin or y_image >= depth_image.shape[0] + margin \ or x_image < -margin or x_image >= depth_image.shape[1] + margin: continue # distance along ray from camera to voxel ray_distance = np.linalg.norm(voxel_camera) # squared distance along optical axis from camera to voxel z_cam_squared = voxel_camera[2]**2 projection_jacobian = \ np.array([[1 / voxel_camera[2], 0, -voxel_camera[0] / z_cam_squared], [0, 1 / voxel_camera[2], -voxel_camera[1] / z_cam_squared], [voxel_camera[0] / ray_distance, voxel_camera[1] / ray_distance, voxel_camera[2] / ray_distance]]) remapped_covariance = projection_jacobian.dot(covariance_camera_space) \ .dot(projection_jacobian.T) final_covariance = image_space_scaling_matrix.dot( remapped_covariance[0:2, 0:2]).dot( image_space_scaling_matrix.T) + np.eye(2) Q = np.linalg.inv(final_covariance) gaussian = eg.EllipticalGaussian( eg.ImplicitEllipse(Q=Q, F=squared_radius_threshold)) bounds_max = gaussian.ellipse.get_bounds() result = find_sampling_bounds_inclusive_helper( bounds_max, depth_image, voxel_image) if result is None: continue else: (start_x, end_x, start_y, end_y) = result weights_sum = 0.0 tsdf_sum = 0.0 for y_sample in range(start_y, end_y): for x_sample in range(start_x, end_x): sample_centered = np.array([[x_sample], [y_sample]], dtype=np.float64) - voxel_image dist_sq = gaussian.get_distance_from_center_squared( sample_centered) if dist_sq > squared_radius_threshold: continue weight = gaussian.compute(dist_sq) if y_sample < 0 or y_sample >= depth_image.shape[0] \ or x_sample < 0 or x_sample >= depth_image.shape[1]: tsdf_sum += weight * 1.0 else: surface_depth = depth_image[y_sample, x_sample] * depth_ratio if surface_depth <= 0.0: continue # signed distance from surface to voxel along camera axis signed_distance = surface_depth - voxel_camera[2] tsdf_value = common.compute_tsdf_value( signed_distance, narrow_band_half_width) tsdf_sum += weight * tsdf_value weights_sum += weight if weights_sum == 0.0: continue field[y_field, x_field] = tsdf_sum / weights_sum return field