def test_embedding_centers_to_soft_masks_given_similarity_strategy(self):
     embedding = tf.constant(
         [[1.0, 0.0], [0.9, 0.01], [0.01, 1.0], [0.0, -1.0]],
         dtype=tf.float32)
     centers = tf.constant([[1.0, 0.0], [0.0, 1.0]], dtype=tf.float32)
     masks1 = metric_learning_utils.embedding_centers_to_soft_masks(
         embedding, centers, 'dotproduct')
     expected_masks1 = tf.constant([[0.7310586, 0.71094948, 0.5025, 0.5],
                                    [0.5, 0.5025, 0.7310586, 0.26894143]],
                                   dtype=tf.float32)
     masks2 = metric_learning_utils.embedding_centers_to_soft_masks(
         embedding, centers, 'distance')
     expected_masks2 = tf.constant(
         [[1.0, 0.99495012, 0.24261642, 0.23840588],
          [0.23840588, 0.28612095, 0.99995017, 0.03597248]],
         dtype=tf.float32)
     self.assertAllClose(masks1.numpy(), expected_masks1.numpy())
     self.assertAllClose(masks2.numpy(), expected_masks2.numpy())
def classification_loss_using_mask_iou_func_unbatched(
        embeddings, instance_ids, sampled_embeddings, sampled_instance_ids,
        sampled_class_labels, sampled_logits, similarity_strategy,
        is_balanced):
    """Classification loss using mask iou.

  Args:
    embeddings: A tf.float32 tensor of size [n, f].
    instance_ids: A tf.int32 tensor of size [n].
    sampled_embeddings: A tf.float32 tensor of size [num_samples, f].
    sampled_instance_ids: A tf.int32 tensor of size [num_samples].
    sampled_class_labels: A tf.int32 tensor of size [num_samples, 1].
    sampled_logits: A tf.float32 tensor of size [num_samples, num_classes].
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    is_balanced: If True, the per-voxel losses are re-weighted to have equal
      total weight for foreground vs. background voxels.

  Returns:
    A tf.float32 loss scalar tensor.
  """
    predicted_soft_masks = metric_learning_utils.embedding_centers_to_soft_masks(
        embedding=embeddings,
        centers=sampled_embeddings,
        similarity_strategy=similarity_strategy)
    predicted_masks = tf.cast(tf.greater(predicted_soft_masks, 0.5),
                              dtype=tf.float32)
    gt_masks = tf.cast(tf.equal(tf.expand_dims(sampled_instance_ids, axis=1),
                                tf.expand_dims(instance_ids, axis=0)),
                       dtype=tf.float32)
    pairwise_iou = instance_segmentation_utils.points_mask_pairwise_iou(
        masks1=predicted_masks, masks2=gt_masks)
    num_classes = sampled_logits.get_shape().as_list()[1]
    sampled_class_labels_one_hot = tf.one_hot(indices=tf.reshape(
        sampled_class_labels, [-1]),
                                              depth=num_classes)
    sampled_class_labels_one_hot_fg = sampled_class_labels_one_hot[:, 1:]
    iou_coefs = tf.tile(tf.reshape(pairwise_iou, [-1, 1]),
                        [1, num_classes - 1])
    sampled_class_labels_one_hot_fg *= iou_coefs
    sampled_class_labels_one_hot_bg = tf.maximum(
        1.0 - tf.math.reduce_sum(
            sampled_class_labels_one_hot_fg, axis=1, keepdims=True), 0.0)
    sampled_class_labels_one_hot = tf.concat(
        [sampled_class_labels_one_hot_bg, sampled_class_labels_one_hot_fg],
        axis=1)
    params = {}
    if is_balanced:
        weights = loss_utils.get_balanced_loss_weights_multiclass(
            labels=tf.expand_dims(sampled_instance_ids, axis=1))
        params['weights'] = weights
    return classification_loss_fn(logits=sampled_logits,
                                  labels=sampled_class_labels_one_hot,
                                  **params)
Esempio n. 3
0
def postprocess(outputs,
                num_furthest_voxel_samples=200,
                sampler_score_vs_distance_coef=0.5,
                embedding_similarity_strategy='distance',
                embedding_similarity_threshold=0.5,
                apply_nms=False,
                nms_score_threshold=0.1,
                nms_iou_threshold=0.5):
    """Postprocess the outputs of our network, including untiling.

  Args:
    outputs: A dict of `Tensor` objects with network outputs.
    num_furthest_voxel_samples: Number of voxels to be sampled using furthest
      voxel sampling.
    sampler_score_vs_distance_coef: The coefficient that balances the weight
      between furthest voxel sampling and highest score sampling.
    embedding_similarity_strategy: Defines the method for computing similarity
      between embedding vectors. Possible values are 'dotproduct'
      and 'distance'.
    embedding_similarity_threshold: Similarity threshold used to decide if two
      point embedding vectors belong to the same instance.
    apply_nms: If True, performs non-maximum suppression after proposing
      instances.
    nms_score_threshold: Score threshold used for non-maximum suppression.
    nms_iou_threshold: IOU threshold used for non-maximum suppression.

  Returns:
    outputs: Our dict of `Tensor` objects with outputs post processed.
  """
    # Softmax semantic scores, removing backgorund scores
    voxel_scores = tf.nn.softmax(
        outputs[standard_fields.DetectionResultFields.object_semantic_voxels])
    voxel_scores = voxel_scores[:, 1:]
    voxel_instance_embeddings = outputs[
        standard_fields.DetectionResultFields.instance_embedding_voxels]

    # Remove low score instances
    top_scores, top_instance_embeddings = _remove_low_score_instances(
        scores=voxel_scores,
        instance_embeddings=voxel_instance_embeddings,
        score_threshold=nms_score_threshold)

    # Sample furthest high score voxels
    top_scores, top_instance_embeddings = _sample_furthest_voxels(
        scores=top_scores,
        instance_embeddings=top_instance_embeddings,
        num_furthest_voxel_samples=num_furthest_voxel_samples,
        sampler_score_vs_distance_coef=sampler_score_vs_distance_coef)

    # Setting instance segment masks, scores and class
    predicted_soft_masks = embedding_utils.embedding_centers_to_soft_masks(
        embedding=voxel_instance_embeddings,
        centers=top_instance_embeddings,
        similarity_strategy=embedding_similarity_strategy)
    outputs[standard_fields.DetectionResultFields.
            instance_segments_voxel_mask] = tf.cast(tf.greater(
                predicted_soft_masks, embedding_similarity_threshold),
                                                    dtype=tf.float32)
    if apply_nms:
        num_classes = top_scores.get_shape().as_list()[1]
        (outputs[standard_fields.DetectionResultFields.
                 instance_segments_voxel_mask],
         outputs[standard_fields.DetectionResultFields.objects_score],
         outputs[standard_fields.DetectionResultFields.objects_class]) = (
             instance_segmentation_utils.
             instance_non_maximum_suppression_2d_scores(
                 masks=tf.expand_dims(
                     outputs[standard_fields.DetectionResultFields.
                             instance_segments_voxel_mask],
                     axis=2),
                 scores=top_scores,
                 num_classes=num_classes,
                 min_score_thresh=nms_score_threshold,
                 min_iou_thresh=nms_iou_threshold))
        outputs[standard_fields.DetectionResultFields.
                instance_segments_voxel_mask] = tf.squeeze(
                    outputs[standard_fields.DetectionResultFields.
                            instance_segments_voxel_mask],
                    axis=2)
    else:
        outputs[standard_fields.DetectionResultFields.
                objects_class] = tf.math.argmax(top_scores, axis=1)
        outputs[standard_fields.DetectionResultFields.
                objects_score] = tf.math.reduce_max(top_scores, axis=1)
    outputs[standard_fields.DetectionResultFields.objects_class] += 1
    outputs[
        standard_fields.DetectionResultFields.objects_score] = tf.expand_dims(
            outputs[standard_fields.DetectionResultFields.objects_score],
            axis=1)
    outputs[
        standard_fields.DetectionResultFields.objects_class] = tf.expand_dims(
            outputs[standard_fields.DetectionResultFields.objects_class],
            axis=1)
    outputs[standard_fields.DetectionResultFields.objects_class] = tf.cast(
        outputs[standard_fields.DetectionResultFields.objects_class], tf.int32)
Esempio n. 4
0
def instance_embedding_iou_loss(embedding,
                                instance_labels,
                                num_samples,
                                similarity_strategy='dotproduct'):
  """IOU loss on soft masks predicted from embedding.

  Here is how this loss is implemented. First draws one random seed point from
  each instance. Then it computes the similarity between each pixel embedding
  and each of the seed embedding vectors. Ideally, we like pixels that are
  in the same instance with a seed have a large dot product with the embedding
  of that seed and pixels that are in other instances have a very small
  dot product with the embedding of that seed. Assume we call the embedding
  vector of the seed as s and embedding vector of a pixel as e. For each seed,
  we build a soft mask where the mask value at each pixel is
  exp(e.s)/exp(s.s). For each seed mask, we compute its loss as 1.0 - IOU with
  the ground-truth instance that it corresponds to.
  We average these values to compute the final loss.

  This process can happen multiple times. Each time we sample seeds at random,
  compute the loss and at the end average the losses.
  The argument num_samples defines how many times to repeat this process.

  Args:
    embedding: A tf.float32 tensor of [height, width, dims].
    instance_labels: A tf.int32 tensor of [height, width] containing
      instance ids. Assumed values in target start from 0 and cover 0 to N-1.
    num_samples: Number of samples.
    similarity_strategy: Defines the method for computing similarity between
      embedding vectors. Possible values are 'dotproduct' and 'distance'.

  Returns:
    Iou loss of the sigmoid of masks that are grown from random points.
    Scalar tensor of type tf.float32.
  """
  embedding_shape = tf.shape(embedding)
  height = embedding_shape[0]
  width = embedding_shape[1]
  dims = embedding_shape[2]
  embedding = tf.reshape(embedding, tf.stack([-1, dims]))
  instance_labels = tf.reshape(instance_labels, [-1])
  num_inst = tf.reduce_max(instance_labels) + 1
  indices = isu.randomly_select_n_points_per_segment(instance_labels,
                                                     num_samples)
  indices = tf.reshape(indices, [-1])
  init_centers = tf.gather(embedding, indices)
  soft_masks = eutils.embedding_centers_to_soft_masks(
      embedding,
      init_centers,
      similarity_strategy)
  soft_masks = tf.reshape(soft_masks,
                          tf.stack([num_samples * num_inst, height, width]))
  instance_masks = tf.one_hot(instance_labels, num_inst, dtype=tf.float32)
  instance_masks = tf.transpose(instance_masks)
  instance_masks = tf.tile(tf.expand_dims(instance_masks, axis=0),
                           tf.stack([num_samples, 1, 1]))
  instance_masks = tf.reshape(instance_masks,
                              tf.stack([num_samples * num_inst, height, width]))
  # Loss on pixels inside ground-truth segment
  loss_fn = tf.keras.losses.MeanSquaredError()
  losses = loss_fn(
      y_pred=tf.reshape(soft_masks, [-1]),
      y_true=tf.reshape(instance_masks, [-1]))
  loss1 = tf.reduce_mean(losses * tf.reshape(instance_masks, [-1]))
  # Loss on pixels outside ground-truth segment
  loss2 = tf.reduce_mean(losses * (1.0 - tf.reshape(instance_masks, [-1])))
  # Loss on pixels in the difference between ground-truth and predicted segments
  diff_weights = tf.maximum(soft_masks, instance_masks) - tf.minimum(
      soft_masks, instance_masks)
  loss3 = tf.reduce_mean(losses * tf.reshape(diff_weights, [-1]))

  # IOU loss
  loss4 = tf.reduce_mean(
      losses * tf.reshape(tf.maximum(instance_masks, soft_masks), [-1]))

  return (loss1 + loss2 + loss3 + loss4) / 4.0