def test_embedding_centers_to_soft_masks_given_similarity_strategy(self): embedding = tf.constant( [[1.0, 0.0], [0.9, 0.01], [0.01, 1.0], [0.0, -1.0]], dtype=tf.float32) centers = tf.constant([[1.0, 0.0], [0.0, 1.0]], dtype=tf.float32) masks1 = metric_learning_utils.embedding_centers_to_soft_masks( embedding, centers, 'dotproduct') expected_masks1 = tf.constant([[0.7310586, 0.71094948, 0.5025, 0.5], [0.5, 0.5025, 0.7310586, 0.26894143]], dtype=tf.float32) masks2 = metric_learning_utils.embedding_centers_to_soft_masks( embedding, centers, 'distance') expected_masks2 = tf.constant( [[1.0, 0.99495012, 0.24261642, 0.23840588], [0.23840588, 0.28612095, 0.99995017, 0.03597248]], dtype=tf.float32) self.assertAllClose(masks1.numpy(), expected_masks1.numpy()) self.assertAllClose(masks2.numpy(), expected_masks2.numpy())
def classification_loss_using_mask_iou_func_unbatched( embeddings, instance_ids, sampled_embeddings, sampled_instance_ids, sampled_class_labels, sampled_logits, similarity_strategy, is_balanced): """Classification loss using mask iou. Args: embeddings: A tf.float32 tensor of size [n, f]. instance_ids: A tf.int32 tensor of size [n]. sampled_embeddings: A tf.float32 tensor of size [num_samples, f]. sampled_instance_ids: A tf.int32 tensor of size [num_samples]. sampled_class_labels: A tf.int32 tensor of size [num_samples, 1]. sampled_logits: A tf.float32 tensor of size [num_samples, num_classes]. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. is_balanced: If True, the per-voxel losses are re-weighted to have equal total weight for foreground vs. background voxels. Returns: A tf.float32 loss scalar tensor. """ predicted_soft_masks = metric_learning_utils.embedding_centers_to_soft_masks( embedding=embeddings, centers=sampled_embeddings, similarity_strategy=similarity_strategy) predicted_masks = tf.cast(tf.greater(predicted_soft_masks, 0.5), dtype=tf.float32) gt_masks = tf.cast(tf.equal(tf.expand_dims(sampled_instance_ids, axis=1), tf.expand_dims(instance_ids, axis=0)), dtype=tf.float32) pairwise_iou = instance_segmentation_utils.points_mask_pairwise_iou( masks1=predicted_masks, masks2=gt_masks) num_classes = sampled_logits.get_shape().as_list()[1] sampled_class_labels_one_hot = tf.one_hot(indices=tf.reshape( sampled_class_labels, [-1]), depth=num_classes) sampled_class_labels_one_hot_fg = sampled_class_labels_one_hot[:, 1:] iou_coefs = tf.tile(tf.reshape(pairwise_iou, [-1, 1]), [1, num_classes - 1]) sampled_class_labels_one_hot_fg *= iou_coefs sampled_class_labels_one_hot_bg = tf.maximum( 1.0 - tf.math.reduce_sum( sampled_class_labels_one_hot_fg, axis=1, keepdims=True), 0.0) sampled_class_labels_one_hot = tf.concat( [sampled_class_labels_one_hot_bg, sampled_class_labels_one_hot_fg], axis=1) params = {} if is_balanced: weights = loss_utils.get_balanced_loss_weights_multiclass( labels=tf.expand_dims(sampled_instance_ids, axis=1)) params['weights'] = weights return classification_loss_fn(logits=sampled_logits, labels=sampled_class_labels_one_hot, **params)
def postprocess(outputs, num_furthest_voxel_samples=200, sampler_score_vs_distance_coef=0.5, embedding_similarity_strategy='distance', embedding_similarity_threshold=0.5, apply_nms=False, nms_score_threshold=0.1, nms_iou_threshold=0.5): """Postprocess the outputs of our network, including untiling. Args: outputs: A dict of `Tensor` objects with network outputs. num_furthest_voxel_samples: Number of voxels to be sampled using furthest voxel sampling. sampler_score_vs_distance_coef: The coefficient that balances the weight between furthest voxel sampling and highest score sampling. embedding_similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. embedding_similarity_threshold: Similarity threshold used to decide if two point embedding vectors belong to the same instance. apply_nms: If True, performs non-maximum suppression after proposing instances. nms_score_threshold: Score threshold used for non-maximum suppression. nms_iou_threshold: IOU threshold used for non-maximum suppression. Returns: outputs: Our dict of `Tensor` objects with outputs post processed. """ # Softmax semantic scores, removing backgorund scores voxel_scores = tf.nn.softmax( outputs[standard_fields.DetectionResultFields.object_semantic_voxels]) voxel_scores = voxel_scores[:, 1:] voxel_instance_embeddings = outputs[ standard_fields.DetectionResultFields.instance_embedding_voxels] # Remove low score instances top_scores, top_instance_embeddings = _remove_low_score_instances( scores=voxel_scores, instance_embeddings=voxel_instance_embeddings, score_threshold=nms_score_threshold) # Sample furthest high score voxels top_scores, top_instance_embeddings = _sample_furthest_voxels( scores=top_scores, instance_embeddings=top_instance_embeddings, num_furthest_voxel_samples=num_furthest_voxel_samples, sampler_score_vs_distance_coef=sampler_score_vs_distance_coef) # Setting instance segment masks, scores and class predicted_soft_masks = embedding_utils.embedding_centers_to_soft_masks( embedding=voxel_instance_embeddings, centers=top_instance_embeddings, similarity_strategy=embedding_similarity_strategy) outputs[standard_fields.DetectionResultFields. instance_segments_voxel_mask] = tf.cast(tf.greater( predicted_soft_masks, embedding_similarity_threshold), dtype=tf.float32) if apply_nms: num_classes = top_scores.get_shape().as_list()[1] (outputs[standard_fields.DetectionResultFields. instance_segments_voxel_mask], outputs[standard_fields.DetectionResultFields.objects_score], outputs[standard_fields.DetectionResultFields.objects_class]) = ( instance_segmentation_utils. instance_non_maximum_suppression_2d_scores( masks=tf.expand_dims( outputs[standard_fields.DetectionResultFields. instance_segments_voxel_mask], axis=2), scores=top_scores, num_classes=num_classes, min_score_thresh=nms_score_threshold, min_iou_thresh=nms_iou_threshold)) outputs[standard_fields.DetectionResultFields. instance_segments_voxel_mask] = tf.squeeze( outputs[standard_fields.DetectionResultFields. instance_segments_voxel_mask], axis=2) else: outputs[standard_fields.DetectionResultFields. objects_class] = tf.math.argmax(top_scores, axis=1) outputs[standard_fields.DetectionResultFields. objects_score] = tf.math.reduce_max(top_scores, axis=1) outputs[standard_fields.DetectionResultFields.objects_class] += 1 outputs[ standard_fields.DetectionResultFields.objects_score] = tf.expand_dims( outputs[standard_fields.DetectionResultFields.objects_score], axis=1) outputs[ standard_fields.DetectionResultFields.objects_class] = tf.expand_dims( outputs[standard_fields.DetectionResultFields.objects_class], axis=1) outputs[standard_fields.DetectionResultFields.objects_class] = tf.cast( outputs[standard_fields.DetectionResultFields.objects_class], tf.int32)
def instance_embedding_iou_loss(embedding, instance_labels, num_samples, similarity_strategy='dotproduct'): """IOU loss on soft masks predicted from embedding. Here is how this loss is implemented. First draws one random seed point from each instance. Then it computes the similarity between each pixel embedding and each of the seed embedding vectors. Ideally, we like pixels that are in the same instance with a seed have a large dot product with the embedding of that seed and pixels that are in other instances have a very small dot product with the embedding of that seed. Assume we call the embedding vector of the seed as s and embedding vector of a pixel as e. For each seed, we build a soft mask where the mask value at each pixel is exp(e.s)/exp(s.s). For each seed mask, we compute its loss as 1.0 - IOU with the ground-truth instance that it corresponds to. We average these values to compute the final loss. This process can happen multiple times. Each time we sample seeds at random, compute the loss and at the end average the losses. The argument num_samples defines how many times to repeat this process. Args: embedding: A tf.float32 tensor of [height, width, dims]. instance_labels: A tf.int32 tensor of [height, width] containing instance ids. Assumed values in target start from 0 and cover 0 to N-1. num_samples: Number of samples. similarity_strategy: Defines the method for computing similarity between embedding vectors. Possible values are 'dotproduct' and 'distance'. Returns: Iou loss of the sigmoid of masks that are grown from random points. Scalar tensor of type tf.float32. """ embedding_shape = tf.shape(embedding) height = embedding_shape[0] width = embedding_shape[1] dims = embedding_shape[2] embedding = tf.reshape(embedding, tf.stack([-1, dims])) instance_labels = tf.reshape(instance_labels, [-1]) num_inst = tf.reduce_max(instance_labels) + 1 indices = isu.randomly_select_n_points_per_segment(instance_labels, num_samples) indices = tf.reshape(indices, [-1]) init_centers = tf.gather(embedding, indices) soft_masks = eutils.embedding_centers_to_soft_masks( embedding, init_centers, similarity_strategy) soft_masks = tf.reshape(soft_masks, tf.stack([num_samples * num_inst, height, width])) instance_masks = tf.one_hot(instance_labels, num_inst, dtype=tf.float32) instance_masks = tf.transpose(instance_masks) instance_masks = tf.tile(tf.expand_dims(instance_masks, axis=0), tf.stack([num_samples, 1, 1])) instance_masks = tf.reshape(instance_masks, tf.stack([num_samples * num_inst, height, width])) # Loss on pixels inside ground-truth segment loss_fn = tf.keras.losses.MeanSquaredError() losses = loss_fn( y_pred=tf.reshape(soft_masks, [-1]), y_true=tf.reshape(instance_masks, [-1])) loss1 = tf.reduce_mean(losses * tf.reshape(instance_masks, [-1])) # Loss on pixels outside ground-truth segment loss2 = tf.reduce_mean(losses * (1.0 - tf.reshape(instance_masks, [-1]))) # Loss on pixels in the difference between ground-truth and predicted segments diff_weights = tf.maximum(soft_masks, instance_masks) - tf.minimum( soft_masks, instance_masks) loss3 = tf.reduce_mean(losses * tf.reshape(diff_weights, [-1])) # IOU loss loss4 = tf.reduce_mean( losses * tf.reshape(tf.maximum(instance_masks, soft_masks), [-1])) return (loss1 + loss2 + loss3 + loss4) / 4.0