def object_to_world(voxels, euler_angles_x, euler_angles_y, translation_vector, target_volume_size=(128, 128, 128)): """Apply the transformations to the voxels and place them in world coords.""" scale_factor = 1.82 # object to world voxel space scale factor translation_vector = tf.expand_dims(translation_vector, axis=-1) sampling_points = tf.cast(sampling_points_from_3d_grid(target_volume_size), tf.float32) # 128^3 X 3 transf_matrix_x = rotation_matrix_3d.from_euler( euler_angles_x) # [B, 3, 3] transf_matrix_y = rotation_matrix_3d.from_euler( euler_angles_y) # [B, 3, 3] transf_matrix = tf.matmul(transf_matrix_x, transf_matrix_y) # [B, 3, 3] transf_matrix = transf_matrix * scale_factor # [B, 3, 3] sampling_points = tf.matmul(transf_matrix, tf.transpose(sampling_points)) # [B, 3, N] translation_vector = tf.matmul(transf_matrix, translation_vector) # [B, 3, 1] sampling_points = sampling_points - translation_vector sampling_points = tf.linalg.matrix_transpose(sampling_points) sampling_points = sampling_points_to_voxel_index(sampling_points, target_volume_size) sampling_points = tf.cast(sampling_points, tf.float32) interpolated_points = trilinear.interpolate(voxels, sampling_points) interpolated_voxels = tf.reshape(interpolated_points, [-1] + list(target_volume_size) + [4]) return interpolated_voxels
def __call__(self, prediction, gt, sample): sdfs, pointclouds, sizes_3d, translations_3d, rotations_3d = prediction translations_3d = translations_3d.numpy() translations_3d[0, 0, :] = [0.0, 0.0, 0.3] translations_3d[0, 1, :] = [0.0, -0.25, 0.0] translations_3d = tf.constant(translations_3d) rotations_3d = rotations_3d.numpy() rotations_3d[0, 0, :] = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0] rotations_3d[0, 1, :] = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0] rotations_3d = tf.constant(rotations_3d) sizes_3d = sizes_3d.numpy() sizes_3d[0, 0, :] = [0.5, 1.0, 0.5] sizes_3d[0, 1, :] = [0.5, 0.5, 0.5] sizes_3d = tf.constant(sizes_3d) pointclouds_world = centernet_utils.transform_pointcloud( pointclouds / 2.0, sizes_3d, rotations_3d, translations_3d) all_sdf_values = [[], [], []] total_collision_loss = tf.constant(0.0) num_objects = sdfs.shape[1] for i in range(num_objects): # iterate over all objects object_loss = 0 for j in range(num_objects): # iterate over other objects if i == j: continue # Transform into coordinate system of other object size_3d_object_j = sizes_3d[:, j:j + 1] translation_3d_object_j = translations_3d[:, j:j + 1] rotations_3d_object_j = rotations_3d[:, j:j + 1] pointcloud_i_object_j = centernet_utils.transform_pointcloud( pointclouds_world[:, i:i + 1], size_3d_object_j, rotations_3d_object_j, translation_3d_object_j, inverse=True) * 2.0 # Map into SDF sdf_j = tf.expand_dims(sdfs[:, j:j + 1], -1) * -1.0 # inside positive pointcloud_i_sdf_j = (pointcloud_i_object_j * (29.0 / 32.0) / 2.0 + 0.5) * 32.0 - 0.5 sdf_values = trilinear.interpolate(sdf_j, pointcloud_i_sdf_j) sdf_values = tf.nn.relu(sdf_values + self.tol) all_sdf_values[i].append(sdf_values) object_loss += tf.reduce_sum(sdf_values, axis=[1, 2, 3]) robust_loss = 0.5 * (object_loss * object_loss) / (1 + object_loss * object_loss) total_collision_loss += robust_loss if self.reduce_batch: loss = tf.reduce_mean(total_collision_loss) else: loss = total_collision_loss if self.return_values: return loss, all_sdf_values return loss
def ray_sample_voxel_grid(ray_points, voxels, w2v_alpha, w2v_beta): """Estimates the voxel value at the ray points using trilinear interpolation. Args: ray_points: A tensor of shape `[B, M, N, 3]` where B is the batch size, M is the number of rays and N the samples along the ray. voxels: A tensor of shape `[B, H, W, D, C]`, where B is the batch size, H, W, D is height, width and depth of the voxel grid and C is the number of channels. w2v_alpha: A tensor of shape `[B, 3]` where B the batch size. w2v_beta: A tensor of shape `[B, 3]` where B the batch size. Returns: A tensor of shape `[B, M, N, C]` """ w2v_alpha = utils.match_intermediate_batch_dimensions(w2v_alpha, ray_points) w2v_beta = utils.match_intermediate_batch_dimensions(w2v_beta, ray_points) rays = w2v_alpha*ray_points + w2v_beta batch_size = tf.shape(voxels)[0] channels = tf.shape(voxels)[-1] target_shape = tf.concat([tf.shape(rays)[:-1], [channels]], axis=-1) rays = tf.reshape(rays, [batch_size, -1, 3]) features_alpha = trilinear.interpolate(voxels, rays) return tf.reshape(features_alpha, target_shape)
def test_interpolate_jacobian_random(self, bsize, height, width, depth, channels): """Tests whether jacobian is correct.""" grid_3d_np = np.random.uniform(size=(bsize, height, width, depth, channels)) sampling_points_np = np.zeros((bsize, height * width * depth, 3)) sampling_points_np[:, :, 0] = np.arange(0, height * width * depth) self.assert_jacobian_is_correct_fn( lambda grid_3d: trilinear.interpolate(grid_3d, sampling_points_np), [grid_3d_np])
def transform_volume(voxels, transformation_matrix, voxel_size=(128, 128, 128)): """Apply a transformation to the input voxels.""" voxels = tf.convert_to_tensor(voxels) volume_sampling = sampling_points_from_3d_grid(voxel_size) volume_sampling = tf.matmul(transformation_matrix, tf.transpose(a=volume_sampling)) volume_sampling = tf.cast(tf.linalg.matrix_transpose(volume_sampling), tf.float32) volume_sampling = sampling_points_to_voxel_index(volume_sampling, voxel_size) interpolated_voxels = trilinear.interpolate(voxels, volume_sampling) return tf.reshape(interpolated_voxels, tf.shape(voxels))
def render_voxels_from_blender_camera(voxels, object_rotation, object_translation, height, width, focal, principal_point, camera_rotation_matrix, camera_translation_vector, frustum_size=(256, 256, 512), absorption_factor=0.1, cell_size=1.0, depth_min=0.0, depth_max=5.0): """Renders the voxels according to their position in the world.""" batch_size = voxels.shape[0] voxel_size = voxels.shape[1] sampling_volume = sampling_points_from_frustum(height, width, focal, principal_point, depth_min=depth_min, depth_max=depth_max, frustum_size=frustum_size) sampling_volume = \ place_frustum_sampling_points_at_blender_camera(sampling_volume, camera_rotation_matrix, camera_translation_vector) interpolated_voxels = \ object_rotation_in_blender_world(voxels, object_rotation) # Adjust the camera (translate the camera instead of the object) sampling_volume = sampling_volume - object_translation sampling_volume = sampling_volume / CUBE_BOX_DIM sampling_volume = sampling_points_to_voxel_index(sampling_volume, voxel_size) camera_voxels = trilinear.interpolate(interpolated_voxels, sampling_volume) camera_voxels = tf.reshape(camera_voxels, [batch_size] + list(frustum_size) + [4]) voxel_image = emission_absorption.render( camera_voxels, absorption_factor=absorption_factor, cell_size=cell_size) return voxel_image
def generate_ground_image(height, width, focal, principal_point, camera_rotation_matrix, camera_translation_vector, ground_color=(0.43, 0.43, 0.8)): """Generate an image depicting only the ground.""" batch_size = camera_rotation_matrix.shape[0] background_image = np.ones((batch_size, height, width, 1, 1), dtype=np.float32) background_image[:, -1, ...] = 0 # Zero the bottom line for proper sampling # The projection of the ground depends on the top right corner (approximation) plane_point_np = np.tile(np.array([[3.077984, 2.905388, 0.]], dtype=np.float32), (batch_size, 1)) plane_point_rotated = rotation_matrix_3d.rotate(plane_point_np, camera_rotation_matrix) plane_point_translated = plane_point_rotated + camera_translation_vector plane_point2d = \ perspective.project(plane_point_translated, focal, principal_point) _, y = tf.split(plane_point2d, [1, 1], axis=-1) sfactor = height/y helper_matrix1 = np.tile(np.array([[[1, 0, 0], [0, 0, 0], [0, 0, 0]]]), (batch_size, 1, 1)) helper_matrix2 = np.tile(np.array([[[0, 0, 0], [0, 1, 0], [0, 0, 1]]]), (batch_size, 1, 1)) transformation_matrix = tf.multiply(tf.expand_dims(sfactor, -1), helper_matrix1) + helper_matrix2 plane_points = grid.generate((0., 0., 0.), (float(height), float(width), 0.), (height, width, 1)) plane_points = tf.reshape(plane_points, [-1, 3]) transf_plane_points = tf.matmul(transformation_matrix, plane_points, transpose_b=True) interpolated_points = \ trilinear.interpolate(background_image, tf.linalg.matrix_transpose(transf_plane_points)) ground_alpha = (1- tf.reshape(interpolated_points, [batch_size, height, width, 1])) ground_image = tf.ones((batch_size, height, width, 3))*ground_color return ground_image, ground_alpha
def test_interpolate_jacobian_random(self, bsize, height, width, depth, channels): """Tests whether jacobian is correct.""" grid_3d_np = np.random.uniform(size=(bsize, height, width, depth, channels)) sampling_points_np = np.zeros((bsize, height * width * depth, 3)) sampling_points_np[:, :, 0] = np.arange(0, height * width * depth) # Wrap these in identities because some assert_* ops look at the constant # tensor value and mark it as unfeedable. grid_3d = tf.identity(tf.convert_to_tensor(value=grid_3d_np)) sampling_points = tf.identity( tf.convert_to_tensor(value=sampling_points_np)) interpolated_points = trilinear.interpolate( grid_3d=grid_3d, sampling_points=sampling_points) self.assert_jacobian_is_correct(grid_3d, grid_3d_np, interpolated_points)
def generate_ground_image(height, width, focal, principal_point, camera_rotation_matrix, camera_translation_vector, ground_color=(0.43, 0.43, 0.8)): """Generate an image depicting only the ground.""" background_image = np.ones((height, width, 1, 1), dtype=np.float32) background_image[-1, ...] = 0 # Set the bottom line to 0 for proper sampling # The projection of the ground depends on the top right corner (approximation) plane_point_np = np.array([[3.077984, 2.905388, 0.]], dtype=np.float32) plane_point2d = \ perspective.project(rotation_matrix_3d.rotate(plane_point_np, camera_rotation_matrix) + camera_translation_vector.T, focal, principal_point) _, y = tf.split(plane_point2d, [1, 1], axis=-1) sfactor = y / 256. transformation_matrix = (1/sfactor)*np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]]) + \ np.array([[0, 0, 0], [0, 1, 0], [0, 0, 1]]) plane_points = grid.generate( (0., 0., 0.), (float(height), float(width), 0.), (height, width, 1)) plane_points = tf.reshape(plane_points, [-1, 3]) transf_plane_points = tf.matmul(transformation_matrix, plane_points, transpose_b=True) interpolated_points = \ trilinear.interpolate(background_image, tf.transpose(transf_plane_points)) ground_alpha = (1 - tf.reshape(interpolated_points, [256, 256, 1])) ground_image = tf.ones((256, 256, 3)) * ground_color return ground_image, ground_alpha
def update(self, labeled_sdfs, labeled_classes, labeled_poses, predicted_sdfs, predicted_classes, predicted_poses): """Update.""" if labeled_sdfs or labeled_classes: print(labeled_sdfs) mean_x = tf.reduce_mean(labeled_poses[1][:, 0]) mean_z = tf.reduce_mean(labeled_poses[1][:, 2]) samples_world = grid.generate( (mean_x - 0.5, 0.0, mean_z - 0.5), (mean_x + 0.5, 1.0, mean_z + 0.5), [self.resolution, self.resolution, self.resolution]) samples_world = tf.reshape(samples_world, [-1, 3]) status = False if status: _, axs = plt.subplots(3, 3) fig_obj_count = 0 # Do the same for the ground truth and predictions num_collisions = 0 prev_intersection = 0 sdf_values = tf.zeros_like(samples_world)[:, 0:1] for classes, sdfs, poses in [(predicted_classes, predicted_sdfs, predicted_poses)]: for i in range(classes.shape[0]): sdf = tf.expand_dims(sdfs[i], -1) sdf = sdf * -1.0 # inside positive, outside zero samples_object = centernet_utils.transform_pointcloud( tf.reshape(samples_world, [1, 1, -1, 3]), tf.reshape(poses[2][i], [1, 1, 3]), tf.reshape(poses[0][i], [1, 1, 3, 3]), tf.reshape(poses[1][i], [1, 1, 3]), inverse=True) * 2.0 samples_object = (samples_object * (29.0 / 32.0) / 2.0 + 0.5) * 32.0 - 0.5 samples = tf.squeeze(samples_object) interpolated = trilinear.interpolate(sdf, samples) occupancy_value = tf.math.sign( tf.nn.relu(interpolated + self.tol)) sdf_values += occupancy_value intersection = tf.reduce_sum( tf.math.sign(tf.nn.relu(sdf_values - 1))) if intersection > prev_intersection: prev_intersection = intersection num_collisions += 1 status2 = False if status2: a = 1 values = interpolated inter = tf.reshape( values, [self.resolution, self.resolution, self.resolution]) inter = tf.transpose(tf.reduce_max(inter, axis=a)) im = axs[fig_obj_count, 0].matshow(inter.numpy()) plt.colorbar(im, ax=axs[fig_obj_count, 0]) values = tf.math.sign(tf.nn.relu(interpolated + self.tol)) inter = tf.reshape( values, [self.resolution, self.resolution, self.resolution]) inter = tf.transpose(tf.reduce_max(inter, axis=a)) im = axs[fig_obj_count, 1].matshow(inter.numpy()) plt.colorbar(im, ax=axs[fig_obj_count, 1]) values = sdf_values inter = tf.reshape( values, [self.resolution, self.resolution, self.resolution]) inter = tf.transpose(tf.reduce_max(inter, axis=a)) im = axs[fig_obj_count, 2].matshow(inter.numpy()) plt.colorbar(im, ax=axs[fig_obj_count, 2]) fig_obj_count += 1 intersection = tf.reduce_sum(tf.math.sign(tf.nn.relu(sdf_values - 1))) union = tf.reduce_sum(tf.math.sign(sdf_values)) iou = intersection / union self.collisions.append(num_collisions) self.intersections.append(intersection) self.ious.append(iou) return num_collisions, intersection, iou
def update(self, labeled_sdfs, labeled_classes, labeled_poses, predicted_sdfs, predicted_classes, predicted_poses): """Update.""" labeled_rotations = labeled_poses[0] labeled_translations = labeled_poses[1] labeled_sizes = labeled_poses[2] status = True if status: box_limits_x = [100, -100] # box_limits_y = [100, -100] box_limits_z = [100, -100] for i in range(labeled_translations.shape[0]): rot = tf.reshape(tf.gather(labeled_rotations[i], [0, 2, 6, 8]), [2, 2]) min_x = tf.cast(0.0 - labeled_sizes[i][0] / 2.0, dtype=tf.float32) max_x = tf.cast(0.0 + labeled_sizes[i][0] / 2.0, dtype=tf.float32) # min_y = tf.cast(0.0 - labeled_sizes[i][1] / 2.0, dtype=tf.float32) # max_y = tf.cast(0.0 + labeled_sizes[i][1] / 2.0, dtype=tf.float32) min_z = tf.cast(0.0 - labeled_sizes[i][2] / 2.0, dtype=tf.float32) max_z = tf.cast(0.0 + labeled_sizes[i][2] / 2.0, dtype=tf.float32) translation = tf.reshape( [labeled_translations[i][0], labeled_translations[i][2]], [2, 1]) pt_0 = rot @ tf.reshape([min_x, min_z], [2, 1]) + translation pt_1 = rot @ tf.reshape([min_x, max_z], [2, 1]) + translation pt_2 = rot @ tf.reshape([max_x, min_z], [2, 1]) + translation pt_3 = rot @ tf.reshape([max_x, max_z], [2, 1]) + translation for pt in [pt_0, pt_1, pt_2, pt_3]: if pt[0] < box_limits_x[0]: box_limits_x[0] = pt[0] if pt[0] > box_limits_x[1]: box_limits_x[1] = pt[0] if pt[1] < box_limits_z[0]: box_limits_z[0] = pt[1] if pt[1] > box_limits_z[1]: box_limits_z[1] = pt[1] mean_x = tf.reduce_mean(box_limits_x) mean_z = tf.reduce_mean(box_limits_z) else: mean_x = tf.reduce_mean(labeled_translations[:, 0]) mean_z = tf.reduce_mean(labeled_translations[:, 2]) samples_world = grid.generate( (mean_x - 0.5, 0.0, mean_z - 0.5), (mean_x + 0.5, 1.0, mean_z + 0.5), [self.resolution, self.resolution, self.resolution]) # samples_world = grid.generate( # (box_limits_x[0][0], box_limits_y[0], box_limits_z[0][0]), # (box_limits_x[1][0], box_limits_y[1], box_limits_z[1][0]), # [self.resolution, self.resolution, self.resolution]) # samples_world = grid.generate( # (-5.0, -5.0, -5.0), # (5.0, 5.0, 5.0), # [self.resolution, self.resolution, self.resolution]) samples_world = tf.reshape(samples_world, [-1, 3]) ious = [] status = False if status: _, axs = plt.subplots(labeled_translations.shape[0], 5) fig_obj_count = 0 for class_id in range(self.max_num_classes): # Do the same for the ground truth and predictions sdf_values = tf.zeros_like(samples_world)[:, 0:1] for mtype, (classes, sdfs, poses) in enumerate([ (labeled_classes, labeled_sdfs, labeled_poses), (predicted_classes, predicted_sdfs, predicted_poses) ]): for i in range(classes.shape[0]): if class_id == classes[i]: sdf = tf.expand_dims(sdfs[i], -1) sdf = sdf * -1.0 # inside positive, outside zero samples_object = centernet_utils.transform_pointcloud( tf.reshape(samples_world, [1, 1, -1, 3]), tf.reshape(poses[2][i], [1, 1, 3]), tf.reshape(poses[0][i], [1, 1, 3, 3]), tf.reshape(poses[1][i], [1, 1, 3]), inverse=True) * 2.0 samples_object = \ (samples_object * (29.0/32.0) / 2.0 + 0.5) * 32.0 - 0.5 samples = tf.squeeze(samples_object) interpolated = trilinear.interpolate(sdf, samples) sdf_values += tf.math.sign( tf.nn.relu(interpolated + self.tol)) status2 = False if status2: a = 2 values = interpolated inter = tf.reshape(values, [ self.resolution, self.resolution, self.resolution ]) inter = tf.transpose(tf.reduce_max(inter, axis=a)) im = axs[fig_obj_count, mtype * 2 + 0].matshow(inter.numpy()) plt.colorbar(im, ax=axs[fig_obj_count, mtype * 2 + 0]) print(mtype, fig_obj_count, 0) values = tf.math.sign( tf.nn.relu(interpolated + self.tol)) inter = tf.reshape(values, [ self.resolution, self.resolution, self.resolution ]) inter = tf.transpose(tf.reduce_max(inter, axis=a)) im = axs[fig_obj_count, mtype * 2 + 1].matshow(inter.numpy()) plt.colorbar(im, ax=axs[fig_obj_count, mtype * 2 + 1]) print(mtype, fig_obj_count, 1) if mtype == 1: values = sdf_values inter = tf.reshape(values, [ self.resolution, self.resolution, self.resolution ]) inter = tf.transpose( tf.reduce_max(inter, axis=a)) im = axs[fig_obj_count, 4].matshow(inter.numpy()) plt.colorbar(im, ax=axs[fig_obj_count, 4]) print(mtype, fig_obj_count, 2) fig_obj_count += 1 intersection = tf.reduce_sum( tf.math.sign(tf.nn.relu(sdf_values - 1))) union = tf.reduce_sum(tf.math.sign(sdf_values)) iou = intersection / union if not tf.math.is_nan(iou): ious.append(iou) status3 = False if status3: _ = plt.figure(figsize=(5, 5)) plt.clf() # mask = (sdf_values.numpy() > 0)[:, 0] # plt.scatter(samples_world.numpy()[mask, 0], # samples_world.numpy()[mask, 1], # marker='.', c=sdf_values.numpy()[mask, 0]) plt.scatter(samples_world.numpy()[:, 0], samples_world.numpy()[:, 1], marker='.', c=sdf_values.numpy()[:, 0]) plt.colorbar() if not tf.math.is_nan(iou): self.iou_per_class[class_id].append(iou) if ious: ious = [0] return np.mean(ious), np.min(ious)