def pick_labeled_image(mesh_inputs, view_image_inputs, view_indices_2d_inputs, view_name): """Pick the image with most number of labeled points projecting to it.""" if view_name not in view_image_inputs: return if view_name not in view_indices_2d_inputs: return if standard_fields.InputDataFields.point_loss_weights not in mesh_inputs: raise ValueError('The key `weights` is missing from mesh_inputs.') height = tf.shape(view_image_inputs[view_name])[1] width = tf.shape(view_image_inputs[view_name])[2] valid_points_y = tf.logical_and( tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 0], 0), tf.less(view_indices_2d_inputs[view_name][:, :, 0], height)) valid_points_x = tf.logical_and( tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 1], 0), tf.less(view_indices_2d_inputs[view_name][:, :, 1], width)) valid_points = tf.logical_and(valid_points_y, valid_points_x) image_total_weights = tf.reduce_sum( tf.cast(valid_points, dtype=tf.float32) * tf.squeeze( mesh_inputs[standard_fields.InputDataFields.point_loss_weights], axis=1), axis=1) image_total_weights = tf.cond( tf.equal(tf.reduce_sum(image_total_weights), 0), lambda: tf.reduce_sum(tf.cast(valid_points, dtype=tf.float32), axis=1), lambda: image_total_weights) best_image = tf.math.argmax(image_total_weights) view_image_inputs[view_name] = view_image_inputs[view_name][ best_image:best_image + 1, :, :, :] view_indices_2d_inputs[view_name] = view_indices_2d_inputs[view_name][ best_image:best_image + 1, :, :]
def _filter_valid_objects(inputs): """Removes the objects that do not contain 3d info. Args: inputs: A dictionary containing input tensors. """ if standard_fields.InputDataFields.objects_class not in inputs: return valid_objects_mask = tf.reshape( tf.greater(inputs[standard_fields.InputDataFields.objects_class], 0), [-1]) if standard_fields.InputDataFields.objects_has_3d_info in inputs: objects_with_3d_info = tf.reshape( tf.cast( inputs[standard_fields.InputDataFields.objects_has_3d_info], dtype=tf.bool), [-1]) valid_objects_mask = tf.logical_and(objects_with_3d_info, valid_objects_mask) if standard_fields.InputDataFields.objects_difficulty in inputs: valid_objects_mask = tf.logical_and( valid_objects_mask, tf.greater( tf.reshape( inputs[standard_fields.InputDataFields.objects_difficulty], [-1]), 0)) for key in _OBJECT_KEYS: if key in inputs: inputs[key] = tf.boolean_mask(inputs[key], valid_objects_mask)
def randomly_crop_points(mesh_inputs, view_indices_2d_inputs, x_random_crop_size, y_random_crop_size, epsilon=1e-5): """Randomly crops points. Args: mesh_inputs: A dictionary containing input mesh (point) tensors. view_indices_2d_inputs: A dictionary containing input point to view correspondence tensors. x_random_crop_size: Size of the random crop in x dimension. If None, random crop will not take place on x dimension. y_random_crop_size: Size of the random crop in y dimension. If None, random crop will not take place on y dimension. epsilon: Epsilon (a very small value) used to add as a small margin to thresholds. """ if x_random_crop_size is None and y_random_crop_size is None: return points = mesh_inputs[standard_fields.InputDataFields.point_positions] num_points = tf.shape(points)[0] # Pick a random point if x_random_crop_size is not None or y_random_crop_size is not None: random_index = tf.random.uniform([], minval=0, maxval=num_points, dtype=tf.int32) center_x = points[random_index, 0] center_y = points[random_index, 1] points_x = points[:, 0] points_y = points[:, 1] min_x = tf.reduce_min(points_x) - epsilon max_x = tf.reduce_max(points_x) + epsilon min_y = tf.reduce_min(points_y) - epsilon max_y = tf.reduce_max(points_y) + epsilon if x_random_crop_size is not None: min_x = center_x - x_random_crop_size / 2.0 - epsilon max_x = center_x + x_random_crop_size / 2.0 + epsilon if y_random_crop_size is not None: min_y = center_y - y_random_crop_size / 2.0 - epsilon max_y = center_y + y_random_crop_size / 2.0 + epsilon x_mask = tf.logical_and(tf.greater(points_x, min_x), tf.less(points_x, max_x)) y_mask = tf.logical_and(tf.greater(points_y, min_y), tf.less(points_y, max_y)) points_mask = tf.logical_and(x_mask, y_mask) for key in sorted(mesh_inputs): mesh_inputs[key] = tf.boolean_mask(mesh_inputs[key], points_mask) for key in sorted(view_indices_2d_inputs): view_indices_2d_inputs[key] = tf.transpose( tf.boolean_mask( tf.transpose(view_indices_2d_inputs[key], [1, 0, 2]), points_mask), [1, 0, 2])
def _box_rotation_regression_loss(loss_type, is_balanced, input_boxes_rotation_matrix, input_boxes_instance_id, output_boxes_rotation_matrix, delta): """Computes regression loss on object rotations.""" def fn(): """Loss function for when number of input and output boxes is positive.""" if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=input_boxes_instance_id) else: weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1], dtype=tf.float32) gt_rotation_matrix = tf.reshape(input_boxes_rotation_matrix, [-1, 9]) predicted_rotation_matrix = tf.reshape(output_boxes_rotation_matrix, [-1, 9]) if loss_type == 'huber': loss_fn = tf.keras.losses.Huber( delta=delta, reduction=tf.keras.losses.Reduction.NONE) elif loss_type == 'absolute_difference': loss_fn = tf.keras.losses.MeanAbsoluteError( reduction=tf.keras.losses.Reduction.NONE) else: raise ValueError(('Unknown loss type %s.' % loss_type)) rotation_losses = loss_fn( y_true=gt_rotation_matrix, y_pred=predicted_rotation_matrix) return tf.reduce_mean(rotation_losses * tf.reshape(weights, [-1])) cond_input = tf.greater(tf.shape(input_boxes_rotation_matrix)[0], 0) cond_output = tf.greater(tf.shape(output_boxes_rotation_matrix)[0], 0) cond = tf.logical_and(cond_input, cond_output) return tf.cond(cond, fn, lambda: tf.constant(0.0, dtype=tf.float32))
def box_corner_distance_loss_on_object_tensors(inputs, outputs, loss_type, delta=1.0, is_balanced=False): """Computes regression loss on object corner locations using object tensors. Args: inputs: A dictionary of tf.Tensors with our input data. outputs: A dictionary of tf.Tensors with the network output. loss_type: Loss type. delta: float, the voxel where the huber loss function changes from a quadratic to linear. is_balanced: If True, the per-voxel losses are re-weighted to have equal total weight for each object instance. Returns: localization_loss: A tf.float32 scalar corresponding to localization loss. """ def fn(inputs_1, outputs_1): return _box_corner_distance_loss_on_object_tensors( inputs=inputs_1, outputs=outputs_1, loss_type=loss_type, delta=delta, is_balanced=is_balanced) batch_size = len(inputs[standard_fields.InputDataFields.objects_length]) losses = [] for b in range(batch_size): inputs_1 = batch_utils.get_batch_size_1_input_objects(inputs=inputs, b=b) outputs_1 = batch_utils.get_batch_size_1_output_objects( outputs=outputs, b=b) cond_input = tf.greater( tf.shape( inputs_1[standard_fields.InputDataFields.objects_length])[0], 0) cond_output = tf.greater( tf.shape(outputs_1[ standard_fields.DetectionResultFields.objects_length])[0], 0) cond = tf.logical_and(cond_input, cond_output) # pylint: disable=cell-var-from-loop loss = tf.cond(cond, lambda: fn(inputs_1=inputs_1, outputs_1=outputs_1), lambda: tf.constant(0.0, dtype=tf.float32)) # pylint: enable=cell-var-from-loop losses.append(loss) return tf.reduce_mean(tf.stack(losses))
def _box_corner_distance_loss_on_object_tensors(inputs, outputs, loss_type, delta, is_balanced): """Computes huber loss on object corner locations.""" valid_mask_class = tf.greater( tf.reshape(inputs[standard_fields.InputDataFields.objects_class], [-1]), 0) valid_mask_instance = tf.greater( tf.reshape(inputs[standard_fields.InputDataFields.objects_instance_id], [-1]), 0) valid_mask = tf.logical_and(valid_mask_class, valid_mask_instance) def fn(): for field in standard_fields.get_input_object_fields(): if field in inputs: inputs[field] = tf.boolean_mask(inputs[field], valid_mask) for field in standard_fields.get_output_object_fields(): if field in outputs: outputs[field] = tf.boolean_mask(outputs[field], valid_mask) return _box_corner_distance_loss( loss_type=loss_type, is_balanced=is_balanced, input_boxes_length=inputs[ standard_fields.InputDataFields.objects_length], input_boxes_height=inputs[ standard_fields.InputDataFields.objects_height], input_boxes_width=inputs[ standard_fields.InputDataFields.objects_width], input_boxes_center=inputs[ standard_fields.InputDataFields.objects_center], input_boxes_rotation_matrix=inputs[ standard_fields.InputDataFields.objects_rotation_matrix], input_boxes_instance_id=inputs[ standard_fields.InputDataFields.objects_instance_id], output_boxes_length=outputs[ standard_fields.DetectionResultFields.objects_length], output_boxes_height=outputs[ standard_fields.DetectionResultFields.objects_height], output_boxes_width=outputs[ standard_fields.DetectionResultFields.objects_width], output_boxes_center=outputs[ standard_fields.DetectionResultFields.objects_center], output_boxes_rotation_matrix=outputs[ standard_fields.DetectionResultFields.objects_rotation_matrix], delta=delta) return tf.cond(tf.reduce_any(valid_mask), fn, lambda: tf.constant(0.0, dtype=tf.float32))
def _box_classification_loss_unbatched(inputs_1, outputs_1, is_intermediate, is_balanced, mine_hard_negatives, hard_negative_score_threshold): """Loss function for input and outputs of batch size 1.""" valid_mask = _get_voxels_valid_mask(inputs_1=inputs_1) if is_intermediate: logits = outputs_1[standard_fields.DetectionResultFields. intermediate_object_semantic_voxels] else: logits = outputs_1[ standard_fields.DetectionResultFields.object_semantic_voxels] num_classes = logits.get_shape().as_list()[-1] if num_classes is None: raise ValueError('Number of classes is unknown.') logits = tf.boolean_mask(tf.reshape(logits, [-1, num_classes]), valid_mask) labels = tf.boolean_mask( tf.reshape( inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1, 1]), valid_mask) if mine_hard_negatives or is_balanced: instances = tf.boolean_mask( tf.reshape( inputs_1[ standard_fields.InputDataFields.object_instance_id_voxels], [-1]), valid_mask) params = {} if mine_hard_negatives: negative_scores = tf.reshape(tf.nn.softmax(logits)[:, 0], [-1]) hard_negative_mask = tf.logical_and( tf.less(negative_scores, hard_negative_score_threshold), tf.equal(tf.reshape(labels, [-1]), 0)) hard_negative_labels = tf.boolean_mask(labels, hard_negative_mask) hard_negative_logits = tf.boolean_mask(logits, hard_negative_mask) hard_negative_instances = tf.boolean_mask( tf.ones_like(instances) * (tf.reduce_max(instances) + 1), hard_negative_mask) logits = tf.concat([logits, hard_negative_logits], axis=0) instances = tf.concat([instances, hard_negative_instances], axis=0) labels = tf.concat([labels, hard_negative_labels], axis=0) if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=tf.expand_dims(instances, axis=1)) params['weights'] = weights return classification_loss_fn(logits=logits, labels=labels, **params)
def _box_size_regression_loss(loss_type, is_balanced, input_boxes_length, input_boxes_height, input_boxes_width, input_boxes_instance_id, output_boxes_length, output_boxes_height, output_boxes_width, delta): """Computes regression loss on object sizes.""" def fn(): """Loss function for when number of input and output boxes is positive.""" if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=input_boxes_instance_id) else: weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1], dtype=tf.float32) gt_length = tf.reshape(input_boxes_length, [-1, 1]) gt_height = tf.reshape(input_boxes_height, [-1, 1]) gt_width = tf.reshape(input_boxes_width, [-1, 1]) predicted_length = tf.reshape(output_boxes_length, [-1, 1]) predicted_height = tf.reshape(output_boxes_height, [-1, 1]) predicted_width = tf.reshape(output_boxes_width, [-1, 1]) predicted_length /= gt_length predicted_height /= gt_height predicted_width /= gt_width predicted_size = tf.concat( [predicted_length, predicted_height, predicted_width], axis=1) gt_size = tf.ones_like(predicted_size) if loss_type == 'huber': loss_fn = tf.keras.losses.Huber( delta=delta, reduction=tf.keras.losses.Reduction.NONE) elif loss_type == 'absolute_difference': loss_fn = tf.keras.losses.MeanAbsoluteError( reduction=tf.keras.losses.Reduction.NONE) else: raise ValueError(('Unknown loss type %s.' % loss_type)) size_losses = loss_fn(y_true=gt_size, y_pred=predicted_size) return tf.reduce_mean(size_losses * tf.reshape(weights, [-1])) cond_input = tf.greater(tf.shape(input_boxes_length)[0], 0) cond_output = tf.greater(tf.shape(output_boxes_length)[0], 0) cond = tf.logical_and(cond_input, cond_output) return tf.cond(cond, fn, lambda: tf.constant(0.0, dtype=tf.float32))
def _voxel_hard_negative_classification_loss_unbatched(inputs_1, outputs_1, is_intermediate, gamma): """Loss function for input and outputs of batch size 1.""" inputs_1, outputs_1 = _get_voxels_valid_inputs_outputs(inputs_1=inputs_1, outputs_1=outputs_1) if is_intermediate: logits = outputs_1[standard_fields.DetectionResultFields. intermediate_object_semantic_voxels] else: logits = outputs_1[ standard_fields.DetectionResultFields.object_semantic_voxels] labels = tf.reshape( inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1]) background_mask = tf.equal(labels, 0) num_background_points = tf.reduce_sum( tf.cast(background_mask, dtype=tf.int32)) def loss_fn(): """Loss function.""" num_classes = logits.get_shape().as_list()[-1] if num_classes is None: raise ValueError('Number of classes is unknown.') masked_logits = tf.boolean_mask(logits, background_mask) masked_weights = tf.pow( 1.0 - tf.reshape(tf.nn.softmax(masked_logits)[:, 0], [-1, 1]), gamma) num_points = tf.shape(masked_logits)[0] masked_weights = masked_weights * tf.cast( num_points, dtype=tf.float32) / tf.reduce_sum(masked_weights) masked_labels_one_hot = tf.one_hot(indices=tf.boolean_mask( labels, background_mask), depth=num_classes) loss = classification_loss_fn(logits=masked_logits, labels=masked_labels_one_hot, weights=masked_weights) return loss cond = tf.logical_and(tf.greater(num_background_points, 0), tf.greater(tf.shape(labels)[0], 0)) return tf.cond(cond, loss_fn, lambda: tf.constant(0.0, dtype=tf.float32))
def _get_voxels_valid_mask(inputs_1): """Returns the mask that removes voxels that are outside objects.""" num_voxels_mask = mask_utils.num_voxels_mask(inputs=inputs_1) within_objects_mask = mask_utils.voxels_within_objects_mask(inputs=inputs_1) return tf.logical_and(within_objects_mask, num_voxels_mask)
def _box_corner_distance_loss( loss_type, is_balanced, input_boxes_length, input_boxes_height, input_boxes_width, input_boxes_center, input_boxes_rotation_matrix, input_boxes_instance_id, output_boxes_length, output_boxes_height, output_boxes_width, output_boxes_center, output_boxes_rotation_matrix, delta): """Computes regression loss on object corner locations.""" def fn(): """Loss function for when number of input and output boxes is positive.""" if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=input_boxes_instance_id) else: weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1], dtype=tf.float32) normalized_box_size = 5.0 predicted_boxes_length = output_boxes_length predicted_boxes_height = output_boxes_height predicted_boxes_width = output_boxes_width predicted_boxes_center = output_boxes_center predicted_boxes_rotation_matrix = output_boxes_rotation_matrix gt_boxes_length = input_boxes_length gt_boxes_height = input_boxes_height gt_boxes_width = input_boxes_width gt_boxes_center = input_boxes_center gt_boxes_rotation_matrix = input_boxes_rotation_matrix if loss_type in ['normalized_huber', 'normalized_euclidean']: predicted_boxes_length /= (gt_boxes_length / normalized_box_size) predicted_boxes_height /= (gt_boxes_height / normalized_box_size) predicted_boxes_width /= (gt_boxes_width / normalized_box_size) gt_boxes_length = tf.ones_like( gt_boxes_length, dtype=tf.float32) * normalized_box_size gt_boxes_height = tf.ones_like( gt_boxes_height, dtype=tf.float32) * normalized_box_size gt_boxes_width = tf.ones_like( gt_boxes_width, dtype=tf.float32) * normalized_box_size gt_box_corners = box_utils.get_box_corners_3d( boxes_length=gt_boxes_length, boxes_height=gt_boxes_height, boxes_width=gt_boxes_width, boxes_rotation_matrix=gt_boxes_rotation_matrix, boxes_center=gt_boxes_center) predicted_box_corners = box_utils.get_box_corners_3d( boxes_length=predicted_boxes_length, boxes_height=predicted_boxes_height, boxes_width=predicted_boxes_width, boxes_rotation_matrix=predicted_boxes_rotation_matrix, boxes_center=predicted_boxes_center) corner_weights = tf.tile(weights, [1, 8]) if loss_type in ['huber', 'normalized_huber']: loss_fn = tf.keras.losses.Huber( delta=delta, reduction=tf.keras.losses.Reduction.NONE) elif loss_type in ['normalized_absolute_difference', 'absolute_difference']: loss_fn = tf.keras.losses.MeanAbsoluteError( reduction=tf.keras.losses.Reduction.NONE) else: raise ValueError(('Unknown loss type %s.' % loss_type)) box_corner_losses = loss_fn( y_true=tf.reshape(gt_box_corners, [-1, 3]), y_pred=tf.reshape(predicted_box_corners, [-1, 3])) return tf.reduce_mean(box_corner_losses * tf.reshape(corner_weights, [-1])) cond_input = tf.greater(tf.shape(input_boxes_length)[0], 0) cond_output = tf.greater(tf.shape(output_boxes_length)[0], 0) cond = tf.logical_and(cond_input, cond_output) return tf.cond(cond, fn, lambda: tf.constant(0.0, dtype=tf.float32))
def true_p(y, y_hat, name='tp'): return tf.logical_and( tf.equal(y, True), tf.equal(y_hat, True), name=name)
def true_n(y, y_hat, name='tn'): return tf.logical_and( tf.equal(y, False), tf.equal(y_hat, False), name=name)
def false_n(y, y_hat, name='fn'): return tf.logical_and( tf.equal(y, True), tf.equal(y_hat, False), name=name)
def prepare_scannet_frame_dataset(inputs, min_pixel_depth=0.3, max_pixel_depth=6.0, valid_object_classes=None): """Maps the fields from loaded input to standard fields. Args: inputs: A dictionary of input tensors. min_pixel_depth: Pixels with depth values less than this are pruned. max_pixel_depth: Pixels with depth values more than this are pruned. valid_object_classes: List of valid object classes. if None, it is ignored. Returns: A dictionary of input tensors with standard field names. """ prepared_inputs = {} if 'cameras/rgbd_camera/intrinsics/K' not in inputs: raise ValueError('Intrinsic matrix is missing.') if 'cameras/rgbd_camera/extrinsics/R' not in inputs: raise ValueError('Extrinsic rotation matrix is missing.') if 'cameras/rgbd_camera/extrinsics/t' not in inputs: raise ValueError('Extrinsics translation is missing.') if 'cameras/rgbd_camera/depth_image' not in inputs: raise ValueError('Depth image is missing.') if 'cameras/rgbd_camera/color_image' not in inputs: raise ValueError('Color image is missing.') if 'frame_name' in inputs: prepared_inputs[standard_fields.InputDataFields .camera_image_name] = inputs['frame_name'] camera_intrinsics = inputs['cameras/rgbd_camera/intrinsics/K'] depth_image = inputs['cameras/rgbd_camera/depth_image'] image_height = tf.shape(depth_image)[0] image_width = tf.shape(depth_image)[1] x, y = tf.meshgrid( tf.range(image_width), tf.range(image_height), indexing='xy') x = tf.reshape(tf.cast(x, dtype=tf.float32) + 0.5, [-1, 1]) y = tf.reshape(tf.cast(y, dtype=tf.float32) + 0.5, [-1, 1]) point_positions = projections.image_frame_to_camera_frame( image_frame=tf.concat([x, y], axis=1), camera_intrinsics=camera_intrinsics) rotate_world_to_camera = inputs['cameras/rgbd_camera/extrinsics/R'] translate_world_to_camera = inputs['cameras/rgbd_camera/extrinsics/t'] point_positions = projections.to_world_frame( camera_frame_points=point_positions, rotate_world_to_camera=rotate_world_to_camera, translate_world_to_camera=translate_world_to_camera) prepared_inputs[standard_fields.InputDataFields .point_positions] = point_positions * tf.reshape( depth_image, [-1, 1]) depth_values = tf.reshape(depth_image, [-1]) valid_depth_mask = tf.logical_and( tf.greater_equal(depth_values, min_pixel_depth), tf.less_equal(depth_values, max_pixel_depth)) prepared_inputs[standard_fields.InputDataFields.point_colors] = tf.reshape( tf.cast(inputs['cameras/rgbd_camera/color_image'], dtype=tf.float32), [-1, 3]) prepared_inputs[standard_fields.InputDataFields.point_colors] *= (2.0 / 255.0) prepared_inputs[standard_fields.InputDataFields.point_colors] -= 1.0 prepared_inputs[ standard_fields.InputDataFields.point_positions] = tf.boolean_mask( prepared_inputs[standard_fields.InputDataFields.point_positions], valid_depth_mask) prepared_inputs[ standard_fields.InputDataFields.point_colors] = tf.boolean_mask( prepared_inputs[standard_fields.InputDataFields.point_colors], valid_depth_mask) if 'cameras/rgbd_camera/semantic_image' in inputs: prepared_inputs[ standard_fields.InputDataFields.object_class_points] = tf.cast( tf.reshape(inputs['cameras/rgbd_camera/semantic_image'], [-1, 1]), dtype=tf.int32) prepared_inputs[ standard_fields.InputDataFields.object_class_points] = tf.boolean_mask( prepared_inputs[ standard_fields.InputDataFields.object_class_points], valid_depth_mask) if 'cameras/rgbd_camera/instance_image' in inputs: prepared_inputs[ standard_fields.InputDataFields.object_instance_id_points] = tf.cast( tf.reshape(inputs['cameras/rgbd_camera/instance_image'], [-1]), dtype=tf.int32) prepared_inputs[standard_fields.InputDataFields .object_instance_id_points] = tf.boolean_mask( prepared_inputs[standard_fields.InputDataFields .object_instance_id_points], valid_depth_mask) if valid_object_classes is not None: valid_objects_mask = tf.cast( tf.zeros_like( prepared_inputs[ standard_fields.InputDataFields.object_class_points], dtype=tf.int32), dtype=tf.bool) for object_class in valid_object_classes: valid_objects_mask = tf.logical_or( valid_objects_mask, tf.equal( prepared_inputs[ standard_fields.InputDataFields.object_class_points], object_class)) valid_objects_mask = tf.cast( valid_objects_mask, dtype=prepared_inputs[ standard_fields.InputDataFields.object_class_points].dtype) prepared_inputs[standard_fields.InputDataFields .object_class_points] *= valid_objects_mask return prepared_inputs