Beispiel #1
0
def instance_embedding_npair_random_center_loss(
    embedding,
    instance_labels,
    similarity_strategy='dotproduct',
    loss_strategy='softmax'):
  """Computes n-pair loss by drawing random points from each instance segment.

  It uses npair_loss (above) to compute the embedding loss given the
  ground-truth instance_labels. instance_labels contains the ground-truth
  labels. The loss is computed as follows: We compute the dot product between
  the embedding vector of randomly drawn seed (center) points and every other
  pixel. If we have N pixels, this will give us a [num_inst, N] matrix.
  Since there are num_inst randomly drawn centers (one per instance).
  In this matrix, each column captures the similarity between the pixel
  embedding that it corresponds to with each of the centers. We want the dot
  product be high for the center that belongs to the same instance as the pixel.
  So we can do a softmax loss given this ground-truth for each column and then
  average this over rows.

  Args:
    embedding: A tf.float32 tensor of [height, width, dims].
    instance_labels: A tf.int32 tensor of [height, width]. Assumed values in
      target start from 0 and cover 0 to N-1.
    similarity_strategy: Defines the method for computing similarity between
      embedding vectors. Possible values are 'dotproduct' and 'distance'.
    loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'.

  Returns:
    Total loss value.

  Raises:
    ValueError: If strategy is not known.
  """
  embedding_shape = tf.shape(embedding)
  embedding_dims = embedding_shape[2]
  embedding = tf.reshape(embedding, tf.stack([-1, embedding_dims]))
  instance_labels = tf.reshape(instance_labels, [-1])
  valid_mask = tf.greater_equal(instance_labels, 0)
  embedding = tf.boolean_mask(embedding, valid_mask)
  instance_labels = tf.boolean_mask(instance_labels, valid_mask)
  (indices,
   instance_label_one_hot) = isu.randomly_select_one_point_per_segment(
       instance_labels)
  centers = tf.gather(embedding, indices)
  embedding_similarity = eutils.embedding_centers_to_logits(centers, embedding,
                                                            similarity_strategy)
  if loss_strategy == 'softmax':
    loss_fn = tf.keras.losses.CategoricalCrossentropy(
        from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
    losses = loss_fn(y_true=instance_label_one_hot, y_pred=embedding_similarity)
    return tf.reduce_mean(losses)
  elif loss_strategy == 'sigmoid':
    if similarity_strategy == 'distance':
      instance_label_one_hot *= 0.5
    loss_fn = tf.keras.losses.BinaryCrossentropy(
        from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
    losses = loss_fn(y_true=instance_label_one_hot, y_pred=embedding_similarity)
    return tf.reduce_mean(losses)
  else:
    raise ValueError('Loss strategy is unknown')
Beispiel #2
0
def npair_loss(embedding,
               target,
               similarity_strategy='dotproduct',
               loss_strategy='softmax'):
  """n-pair loss.

  This loss is based on the following paper:
  Kihyuk Sohn, Improved Deep Metric Learning with Multi-class N-pair Loss
  Objective, NIPS 2016.

  The loss is computed as follows: The dot product between every pair of
  embedding vectors is computed. Given N embedding vectors, this will result
  in a [N, N] matrix. In this matrix, the softmax (or sigmoid) loss for each row
  is computed. The total loss is the average over the losses for all the rows.
  In order to perform the softmax (sigmoid) loss, the one-hot ground-truth
  labels for each row are required. In the row i, the columns that have the same
  target label as this row will be set to 1 and other columns will be set to 0.
  Each row is normalized so the sum of each row is equal to 1.

  Args:
    embedding: A float32 matrix of [N, D] where N is the number of pixels and
               D is the number of embedding dimensions.
    target: A float32 one-hot matrix of [N, K] where K is the number of labels.
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'.

  Returns:
    Total loss value.

  Raises:
    ValueError: If similarity or loss strategy is not known.
  """
  similarity_matrix = eutils.embedding_centers_to_logits(embedding, embedding,
                                                         similarity_strategy)
  target_matrix = tf.matmul(target, target, transpose_b=True)
  if loss_strategy == 'sigmoid':
    if similarity_strategy == 'distance':
      target_matrix *= 0.5
    loss_fn = tf.keras.losses.BinaryCrossentropy(
        from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
    losses = loss_fn(y_true=target_matrix, y_pred=similarity_matrix)
    return tf.reduce_mean(losses)
  elif loss_strategy == 'softmax':
    target_matrix_sum = tf.reduce_sum(target_matrix, axis=1)
    target_matrix_sum = tf.expand_dims(target_matrix_sum, axis=1)
    num_pixels = tf.size(target_matrix_sum)
    target_matrix_sum = tf.tile(target_matrix_sum,
                                tf.stack([1, num_pixels]))
    target_matrix /= target_matrix_sum
    loss_fn = tf.keras.losses.CategoricalCrossentropy(
        from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
    losses = loss_fn(y_true=target_matrix, y_pred=similarity_matrix)
    return tf.reduce_mean(losses)
  else:
    raise ValueError('Unknown strategy')
 def test_embedding_centers_to_logits(self):
     embedding = tf.constant(
         [[1.0, 0.0], [0.9, 0.01], [0.01, 1.0], [0.0, -1.0]],
         dtype=tf.float32)
     centers = tf.constant([[1.0, 0.0], [0.0, 1.0]], dtype=tf.float32)
     logits1 = metric_learning_utils.embedding_centers_to_logits(
         embedding, centers, 'dotproduct')
     expected_logits1 = tf.constant(
         [[1.0, 0.89999998, 0.01, 0.0], [0.0, 0.01, 1.0, -1.0]],
         dtype=tf.float32)
     logits2 = metric_learning_utils.embedding_centers_to_logits(
         embedding, centers, 'distance')
     expected_logits2 = tf.constant(
         [[1.19209290e-07, -1.00998878e-02, -1.98009968, -1.99999988],
          [-1.99999988, -1.79009986, -9.96589661e-05, -4]],
         dtype=tf.float32)
     self.assertAllClose(logits1.numpy(), expected_logits1.numpy())
     self.assertAllClose(logits2.numpy(), expected_logits2.numpy())
Beispiel #4
0
def instance_embedding_npair_random_sample_loss(
    embedding,
    instance_labels,
    num_samples,
    similarity_strategy='dotproduct',
    loss_strategy='softmax'):
  """Compute n-pair loss by drawing random samples from segments.

  Given n samples from each of the k instance, we create a [k x n x n, k]
  matrix of dot products. We compute the softmax loss for the rows of this
  matrix and average.

  Args:
    embedding: A tf.float32 tensor of [height, width, dims].
    instance_labels: A tf.int32 tensor of [height, width] containing
      instance ids. Assumed values in target start from 0 and cover 0 to N-1.
    num_samples: Number of sampled points from each label.
    similarity_strategy: Defines the method for computing similarity between
      embedding vectors. Possible values are 'dotproduct' and 'distance'.
    loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'.

  Returns:
    Total loss value.

  Raises:
    ValueError: If loss or similarity strategy are unknown.
  """
  embedding_shape = tf.shape(embedding)
  embedding_dims = embedding_shape[2]
  embedding = tf.reshape(embedding, tf.stack([-1, embedding_dims]))
  instance_labels = tf.reshape(instance_labels, [-1])
  valid_mask = tf.greater_equal(instance_labels, 0)
  embedding = tf.boolean_mask(embedding, valid_mask)
  instance_labels = tf.boolean_mask(instance_labels, valid_mask)
  num_instances = tf.reduce_max(instance_labels) + 1
  # num_samples (ns) x num_instances (ni)
  sampled_indices = isu.randomly_select_n_points_per_segment(instance_labels,
                                                             num_samples)
  sampled_indices_ns_ni = tf.reshape(sampled_indices, [-1])
  sampled_indices = tf.transpose(sampled_indices)
  sampled_indices_ni_ns = tf.reshape(sampled_indices, [-1])
  sampled_embedding_ns_ni = tf.gather(embedding, sampled_indices_ns_ni)
  sampled_embedding_ni_ns = tf.gather(embedding, sampled_indices_ni_ns)
  sampled_instance_label_ns_ni = tf.gather(instance_labels,
                                           sampled_indices_ns_ni)
  sampled_instance_label_ns_ni = tf.one_hot(sampled_instance_label_ns_ni,
                                            num_instances)
  sampled_instance_label_ni_ns = tf.gather(instance_labels,
                                           sampled_indices_ni_ns)
  sampled_instance_label_ni_ns = tf.one_hot(sampled_instance_label_ni_ns,
                                            num_instances)
  # [nc x ns, ns x nc]
  target_one_hot = tf.matmul(sampled_instance_label_ni_ns,
                             sampled_instance_label_ns_ni,
                             transpose_b=True)
  embedding_similarity = eutils.embedding_centers_to_logits(
      sampled_embedding_ns_ni, sampled_embedding_ni_ns, similarity_strategy)

  target_one_hot = tf.reshape(target_one_hot, [-1, num_instances])
  embedding_similarity = tf.reshape(embedding_similarity, [-1, num_instances])
  if loss_strategy == 'softmax':
    loss_fn = tf.keras.losses.CategoricalCrossentropy(
        from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
    losses = loss_fn(
        y_true=target_one_hot, y_pred=embedding_similarity)
    return tf.reduce_mean(losses)
  elif loss_strategy == 'sigmoid':
    if similarity_strategy == 'distance':
      target_one_hot *= 0.5
    loss_fn = tf.keras.losses.BinaryCrossentropy(
        from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
    losses = loss_fn(
        y_true=target_one_hot, y_pred=embedding_similarity)
    return tf.reduce_mean(losses)
  else:
    raise ValueError('Loss strategy is unknown')
Beispiel #5
0
def instance_embedding_npair_random_center_random_sample_loss(
    embedding,
    instance_labels,
    num_samples,
    similarity_strategy='dotproduct',
    loss_strategy='softmax'):
  """Compute n-pair loss by drawing random points and samples from instances.

  This loss is very similar to instance_embedding_npair_random_center_loss,
  with the difference that instead of computing the dot product of every pixel
  with the seeds(centers), we compute the dot product of sampled points with the
  centers. This strategy has the advantage of an even distribution over
  predicted instances in the loss. In particular, large object instances will
  no longer contribute an overwhelming number of the terms in the loss,
  a limitation inherent to `npair_loss`.

  Args:
    embedding: A tf.float32 tensor of [height, width, dims].
    instance_labels: A tf.int32 tensor of [height, width] containing instance
      ids. Assumed values in target start from 0 and cover 0 to N-1.
    num_samples: Number of sampled points from each label.
    similarity_strategy: Defines the method for computing similarity between
      embedding vectors. Possible values are 'dotproduct' and 'distance'.
    loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'.

  Returns:
    Total loss value.

  Raises:
    ValueError: If strategy is unknown.
  """
  instance_labels = tf.reshape(instance_labels, [-1])
  embedding_shape = tf.shape(embedding)
  embedding_dims = embedding_shape[2]
  embedding = tf.reshape(embedding, tf.stack([-1, embedding_dims]))
  valid_mask = tf.greater_equal(instance_labels, 0)
  embedding = tf.boolean_mask(embedding, valid_mask)
  instance_labels = tf.boolean_mask(instance_labels, valid_mask)
  (center_indices,
   instance_label_one_hot) = isu.randomly_select_one_point_per_segment(
       instance_labels)
  centers = tf.gather(embedding, center_indices)
  sampled_indices = isu.randomly_select_n_points_per_segment(instance_labels,
                                                             num_samples)
  sampled_indices = tf.reshape(sampled_indices, [-1])
  sampled_embedding = tf.gather(embedding, sampled_indices)
  sampled_instance_label_one_hot = tf.gather(instance_label_one_hot,
                                             sampled_indices)
  embedding_similarity = eutils.embedding_centers_to_logits(centers,
                                                            sampled_embedding,
                                                            similarity_strategy)
  if loss_strategy == 'softmax':
    loss_fn = tf.keras.losses.CategoricalCrossentropy(
        from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
    losses = loss_fn(
        y_true=sampled_instance_label_one_hot, y_pred=embedding_similarity)
    return tf.reduce_mean(losses)
  elif loss_strategy == 'sigmoid':
    if similarity_strategy == 'distance':
      sampled_instance_label_one_hot *= 0.5
    loss_fn = tf.keras.losses.BinaryCrossentropy(
        from_logits=True, reduction=tf.keras.losses.Reduction.NONE)
    losses = loss_fn(
        y_true=sampled_instance_label_one_hot, y_pred=embedding_similarity)
    return tf.reduce_mean(losses)
  else:
    raise ValueError('Loss strategy is unknown')