def test_correct_distance(self):
        """Compare against numpy caluclation."""
        tf_embeddings = tf.constant([[0.5, 0.5], [1.0, 1.0]])

        expected_distance = np.array([[0, np.sqrt(2) / 2], [np.sqrt(2) / 2, 0]])

        distances = pairwise_distance(tf_embeddings, squared=False)
        self.assertAllClose(expected_distance, distances)
    def test_correct_distance_squared(self):
        """Compare against numpy caluclation for squared distances."""
        tf_embeddings = tf.constant([[0.5, 0.5], [1.0, 1.0]])

        expected_distance = np.array([[0, 0.5], [0.5, 0]])

        distances = pairwise_distance(tf_embeddings, squared=True)
        self.assertAllClose(expected_distance, distances)
    def test_positive_distances(self):
        """Test that the pairwise distances are always positive."""

        # Create embeddings very close to each other in [1.0 - 2e-7, 1.0 + 2e-7]
        # This will encourage errors in the computation
        embeddings = 1.0 + 2e-7 * tf.random.uniform([64, 6], dtype=tf.float32)
        distances = pairwise_distance(embeddings, squared=False)
        self.assertAllGreaterEqual(distances, 0)
Exemple #4
0
def triplet_hard_loss(
    y_true: TensorLike,
    y_pred: TensorLike,
    margin: FloatTensorLike = 1.0,
    soft: bool = False,
) -> tf.Tensor:
    """Computes the triplet loss with hard negative and hard positive mining.

    Args:
      y_true: 1-D integer `Tensor` with shape [batch_size] of
        multiclass integer labels.
      y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should
        be l2 normalized.
      margin: Float, margin term in the loss definition.
      soft: Boolean, if set, use the soft margin version.
    """
    labels, embeddings = y_true, y_pred
    # Reshape label tensor to [batch_size, 1].
    lshape = tf.shape(labels)
    labels = tf.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = metric_learning.pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = tf.math.equal(labels, tf.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = tf.math.logical_not(adjacency)

    adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
    # hard negatives: smallest D_an.
    hard_negatives = _masked_minimum(pdist_matrix, adjacency_not)

    batch_size = tf.size(labels)

    adjacency = tf.cast(adjacency, dtype=tf.dtypes.float32)

    mask_positives = tf.cast(adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag(
        tf.ones([batch_size])
    )

    # hard positives: largest D_ap.
    hard_positives = _masked_maximum(pdist_matrix, mask_positives)

    if soft:
        triplet_loss = tf.math.log1p(tf.math.exp(hard_positives - hard_negatives))
    else:
        triplet_loss = tf.maximum(hard_positives - hard_negatives + margin, 0.0)

    # Get final mean triplet loss
    triplet_loss = tf.reduce_mean(triplet_loss)

    return triplet_loss
def Quadruplet_loss(y_true, y_pred):
    labels = tf.convert_to_tensor(y_true, name="labels")
    embeddings = tf.convert_to_tensor(y_pred, name="embeddings")

    pdist_matrix = metric_learning.pairwise_distance(embeddings, squared=False)

    adjacency = tf.math.equal(labels, tf.transpose(labels))

    # Invert so we can select negatives only.
    adjacency_not = tf.math.logical_not(adjacency)

    # cast to float32
    adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
    adjacency = tf.cast(adjacency, dtype=tf.dtypes.float32)

    # hard negatives: smallest D_an.
    hard_negatives = _masked_minimum(pdist_matrix, adjacency_not)

    # Remove negative from adjacency_not
    adjacency_not2 = tf.math.equal(pdist_matrix, hard_negatives)
    adjacency_not2 = tf.math.logical_not(adjacency_not2)
    adjacency_not2 = tf.cast(adjacency_not2, dtype=tf.dtypes.float32)
    adjacency_not2_2 = tf.math.multiply(adjacency_not, adjacency_not2)
    hard_negatives2 = _masked_minimum(pdist_matrix, adjacency_not2_2)

    # batch size of Training
    batch_size = tf.size(labels)

    mask_positives = adjacency - tf.linalg.diag(tf.ones([batch_size]))

    # hard positives: largest D_ap.
    hard_positives = _masked_maximum(pdist_matrix, mask_positives)

    triplet_loss = tf.maximum(
        (hard_positives * 2) - hard_negatives - hard_negatives2 + 0.2, 0.0)
    return triplet_loss
Exemple #6
0
def lifted_struct_loss(labels, embeddings, margin=1.0):
    """Computes the lifted structured loss.

    Args:
      labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
        multiclass integer labels.
      embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
        not be l2 normalized.
      margin: Float, margin term in the loss definition.

    Returns:
      lifted_loss: tf.float32 scalar.
    """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = tf.shape(labels)
    assert lshape.shape == 1
    labels = tf.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pairwise_distances = metric_learning.pairwise_distance(embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = tf.math.equal(labels, tf.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = tf.math.logical_not(adjacency)

    batch_size = tf.size(labels)

    diff = margin - pairwise_distances
    mask = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
    # Safe maximum: Temporarily shift negative distances
    #   above zero before taking max.
    #     this is to take the max only among negatives.
    row_minimums = tf.math.reduce_min(diff, 1, keepdims=True)
    row_negative_maximums = tf.math.reduce_max(
        tf.math.multiply(diff - row_minimums, mask), 1,
        keepdims=True) + row_minimums

    # Compute the loss.
    # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
    #   where m_i is the max of alpha - negative D_i's.
    # This matches the Caffe loss layer implementation at:
    #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long

    max_elements = tf.math.maximum(row_negative_maximums,
                                   tf.transpose(row_negative_maximums))
    diff_tiled = tf.tile(diff, [batch_size, 1])
    mask_tiled = tf.tile(mask, [batch_size, 1])
    max_elements_vect = tf.reshape(tf.transpose(max_elements), [-1, 1])

    loss_exp_left = tf.reshape(
        tf.math.reduce_sum(
            tf.math.multiply(
                tf.math.exp(diff_tiled - max_elements_vect), mask_tiled),
            1,
            keepdims=True), [batch_size, batch_size])

    loss_mat = max_elements + tf.math.log(loss_exp_left +
                                          tf.transpose(loss_exp_left))
    # Add the positive distance.
    loss_mat += pairwise_distances

    mask_positives = tf.cast(
        adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag(
            tf.ones([batch_size]))

    # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
    num_positives = tf.math.reduce_sum(mask_positives) / 2.0

    lifted_loss = tf.math.truediv(
        0.25 * tf.math.reduce_sum(
            tf.math.square(
                tf.math.maximum(
                    tf.math.multiply(loss_mat, mask_positives), 0.0))),
        num_positives)
    return lifted_loss
Exemple #7
0
    def call(self, y_true, y_pred):
        from tensorflow_addons.losses import metric_learning

        self.sd.update_state(y_true, y_pred)

        labels = tf.cast(
            tf.convert_to_tensor(y_true, name="labels"),
            dtype=tf.dtypes.float32
        )
        if len(labels.shape) == 1:
            labels = tf.reshape(labels, (1, -1))

        embeddings = tf.convert_to_tensor(y_pred, name="embeddings")

        convert_to_float32 = (
            (embeddings.dtype == tf.dtypes.float16) or
            (embeddings.dtype == tf.dtypes.bfloat16)
        )
        precise_embeddings = (
            tf.cast(embeddings, tf.dtypes.float32)
            if convert_to_float32
            else embeddings
        )

        # Reshape label tensor to [batch_size, 1].
        # lshape = tf.shape(labels)
        # labels = tf.reshape(labels, [lshape[0], 1])

        # Build pairwise squared distance matrix
        distance_metric = self.distance_metric

        if distance_metric == "L2":
            pdist_matrix = metric_learning.pairwise_distance(
                precise_embeddings, squared=False
            )

        elif distance_metric == "squared-L2":
            pdist_matrix = metric_learning.pairwise_distance(
                precise_embeddings, squared=True
            )

        elif distance_metric == "angular":
            pdist_matrix = metric_learning.angular_distance(precise_embeddings)

        else:
            pdist_matrix = distance_metric(precise_embeddings)

        # Fetch pairwise labels as adjacency matrix.
        adjacency = self.response_diffs(labels)
        # Invert so we can select negatives only.
        adjacency_not = tf.math.logical_not(adjacency)

        batch_size = tf.size(labels)

        # Compute the mask.
        pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1])
        mask = tf.math.logical_and(
            tf.tile(adjacency_not, [batch_size, 1]),
            tf.math.greater(
                pdist_matrix_tile,
                tf.reshape(tf.transpose(pdist_matrix), [-1, 1])
            ),
        )
        mask_final = tf.reshape(
            tf.math.greater(
                tf.math.reduce_sum(
                    tf.cast(mask, dtype=tf.dtypes.float32),
                    1,
                    keepdims=True
                ),
                0.0,
            ),
            [batch_size, batch_size],
        )
        mask_final = tf.transpose(mask_final)

        adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
        mask = tf.cast(mask, dtype=tf.dtypes.float32)

        # negatives_outside: smallest D_an where D_an > D_ap.
        negatives_outside = tf.reshape(
            _masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size]
        )
        negatives_outside = tf.transpose(negatives_outside)

        # negatives_inside: largest D_an.
        negatives_inside = tf.tile(
            _masked_maximum(pdist_matrix, adjacency_not), [1, batch_size]
        )
        semi_hard_negatives = tf.where(
            mask_final,
            negatives_outside,
            negatives_inside
        )

        loss_mat = tf.math.add(self.margin, pdist_matrix - semi_hard_negatives)

        mask_positives = (
            tf.cast(adjacency, dtype=tf.dtypes.float32) -
            tf.linalg.diag(tf.ones([batch_size]))
        )

        # In lifted-struct, the authors multiply 0.5 for upper triangular
        #   in semihard, they take all positive pairs except the diagonal.
        # Max(n, 1) necessary to stop nan loss, which just stops the whole
        # model from running.
        # Setting to 1 will just mean zero loss, since everything
        # else will be 0.
        num_positives = tf.math.maximum(
            tf.math.reduce_sum(mask_positives),
            1.0
        )

        triplet_loss = tf.math.truediv(
            tf.math.reduce_sum(
                tf.math.maximum(
                    tf.math.multiply(loss_mat, mask_positives),
                    0.0
                )
            ),
            num_positives,
        )

        if convert_to_float32:
            return tf.cast(triplet_loss, embeddings.dtype)
        else:
            return triplet_loss
Exemple #8
0
    def call(self, y_true, y_pred):
        from tensorflow_addons.losses import metric_learning

        self.sd.update_state(y_true, y_pred)

        labels = tf.cast(
            tf.convert_to_tensor(y_true, name="labels"),
            dtype=tf.dtypes.float32
        )
        if len(labels.shape) == 1:
            labels = tf.reshape(labels, (1, -1))

        embeddings = tf.convert_to_tensor(y_pred, name="embeddings")

        convert_to_float32 = (
            (embeddings.dtype == tf.dtypes.float16) or
            (embeddings.dtype == tf.dtypes.bfloat16)
        )
        precise_embeddings = (
            tf.cast(embeddings, tf.dtypes.float32)
            if convert_to_float32
            else embeddings
        )

        # Reshape label tensor to [batch_size, 1].
        # lshape = tf.shape(labels)
        # labels = tf.reshape(labels, [lshape[0], 1])

        # Build pairwise squared distance matrix
        distance_metric = self.distance_metric

        if distance_metric == "L2":
            pdist_matrix = metric_learning.pairwise_distance(
                precise_embeddings, squared=False
            )

        elif distance_metric == "squared-L2":
            pdist_matrix = metric_learning.pairwise_distance(
                precise_embeddings, squared=True
            )

        elif distance_metric == "angular":
            pdist_matrix = metric_learning.angular_distance(precise_embeddings)

        else:
            pdist_matrix = distance_metric(precise_embeddings)

        # Fetch pairwise labels as adjacency matrix.
        adjacency = self.response_diffs(labels)

        # Invert so we can select negatives only.
        adjacency_not = tf.math.logical_not(adjacency)

        adjacency = tf.cast(adjacency, dtype=tf.dtypes.float32)
        adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
        hard_negatives = _masked_minimum(pdist_matrix, adjacency_not)

        batch_size = tf.size(labels)

        mask_positives = (
            tf.cast(adjacency, dtype=tf.dtypes.float32) -
            tf.linalg.diag(tf.ones([batch_size]))
        )

        # hard positives: largest D_ap.
        hard_positives = _masked_maximum(pdist_matrix, mask_positives)

        if self.soft:
            triplet_loss = tf.math.log1p(
                tf.math.exp(hard_positives - hard_negatives))
        else:
            triplet_loss = tf.maximum(
                hard_positives - hard_negatives + self.margin,
                0.0
            )

        # Get final mean triplet loss
        triplet_loss = tf.reduce_mean(triplet_loss)

        if convert_to_float32:
            return tf.cast(triplet_loss, embeddings.dtype)
        else:
            return triplet_loss
Exemple #9
0
def triplet_semihard_loss(
    y_true: TensorLike,
    y_pred: TensorLike,
    margin: FloatTensorLike = 1.0,
    distance_metric: Union[str, Callable] = "L2",
) -> tf.Tensor:
    """Computes the triplet loss with semi-hard negative mining.

    Args:
      y_true: 1-D integer `Tensor` with shape [batch_size] of
        multiclass integer labels.
      y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should
        be l2 normalized.
      margin: Float, margin term in the loss definition.
      distance_metric: str or function, determines distance metric:
                       "L2" for l2-norm distance
                       "squared-L2" for squared l2-norm distance
                       "angular" for cosine similarity
                        A custom function returning a 2d adjacency
                          matrix of a chosen distance metric can
                          also be passed here. e.g.

                          def custom_distance(batch):
                              batch = 1 - batch @ batch.T
                              return batch

                          triplet_semihard_loss(batch, labels,
                                        distance_metric=custom_distance
                                    )


    Returns:
      triplet_loss: float scalar with dtype of y_pred.
    """

    labels, embeddings = y_true, y_pred

    convert_to_float32 = (embeddings.dtype == tf.dtypes.float16
                          or embeddings.dtype == tf.dtypes.bfloat16)
    precise_embeddings = (tf.cast(embeddings, tf.dtypes.float32)
                          if convert_to_float32 else embeddings)

    # Reshape label tensor to [batch_size, 1].
    lshape = tf.shape(labels)
    labels = tf.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix

    if distance_metric == "L2":
        pdist_matrix = metric_learning.pairwise_distance(precise_embeddings,
                                                         squared=False)

    elif distance_metric == "squared-L2":
        pdist_matrix = metric_learning.pairwise_distance(precise_embeddings,
                                                         squared=True)

    elif distance_metric == "angular":
        pdist_matrix = metric_learning.angular_distance(precise_embeddings)

    else:
        pdist_matrix = distance_metric(precise_embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = tf.math.equal(labels, tf.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = tf.math.logical_not(adjacency)

    batch_size = tf.size(labels)

    # Compute the mask.
    pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1])
    mask = tf.math.logical_and(
        tf.tile(adjacency_not, [batch_size, 1]),
        tf.math.greater(pdist_matrix_tile,
                        tf.reshape(tf.transpose(pdist_matrix), [-1, 1])),
    )
    mask_final = tf.reshape(
        tf.math.greater(
            tf.math.reduce_sum(tf.cast(mask, dtype=tf.dtypes.float32),
                               1,
                               keepdims=True),
            0.0,
        ),
        [batch_size, batch_size],
    )
    mask_final = tf.transpose(mask_final)

    adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
    mask = tf.cast(mask, dtype=tf.dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = tf.reshape(_masked_minimum(pdist_matrix_tile, mask),
                                   [batch_size, batch_size])
    negatives_outside = tf.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = tf.tile(_masked_maximum(pdist_matrix, adjacency_not),
                               [1, batch_size])
    semi_hard_negatives = tf.where(mask_final, negatives_outside,
                                   negatives_inside)

    loss_mat = tf.math.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = tf.cast(adjacency,
                             dtype=tf.dtypes.float32) - tf.linalg.diag(
                                 tf.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = tf.math.reduce_sum(mask_positives)

    triplet_loss = tf.math.truediv(
        tf.math.reduce_sum(
            tf.math.maximum(tf.math.multiply(loss_mat, mask_positives), 0.0)),
        num_positives,
    )

    if convert_to_float32:
        return tf.cast(triplet_loss, embeddings.dtype)
    else:
        return triplet_loss
Exemple #10
0
    def call(self, y_true, y_pred):
        from tensorflow_addons.losses import metric_learning

        self.sd.update_state(y_true, y_pred)

        labels = tf.cast(
            tf.convert_to_tensor(y_true, name="labels"),
            dtype=tf.dtypes.float32
        )
        if len(labels.shape) == 1:
            labels = tf.reshape(labels, (1, -1))

        batch_size = tf.shape(labels)[0]

        embeddings = tf.convert_to_tensor(y_pred, name="embeddings")

        convert_to_float32 = (
            (embeddings.dtype == tf.dtypes.float16) or
            (embeddings.dtype == tf.dtypes.bfloat16)
        )
        precise_embeddings = (
            tf.cast(embeddings, tf.dtypes.float32)
            if convert_to_float32
            else embeddings
        )

        # Reshape label tensor to [batch_size, 1].
        # lshape = tf.shape(labels)
        # labels = tf.reshape(labels, [lshape[0], 1])

        # Build pairwise squared distance matrix
        distance_metric = self.distance_metric

        if distance_metric == "L2":
            pdist_matrix = metric_learning.pairwise_distance(
                precise_embeddings, squared=False
            )

        elif distance_metric == "squared-L2":
            pdist_matrix = metric_learning.pairwise_distance(
                precise_embeddings, squared=True
            )

        elif distance_metric == "angular":
            pdist_matrix = metric_learning.angular_distance(precise_embeddings)

        else:
            pdist_matrix = distance_metric(precise_embeddings)

        # Fetch pairwise labels as adjacency matrix.
        adjacency = self.response_diffs(labels)
        # Invert so we can select negatives only.
        adjacency_not = tf.math.logical_not(adjacency)

        radii = (
            tf.reduce_mean(pdist_matrix, axis=1) -
            (tf.math.reduce_std(pdist_matrix, axis=1) / 2.)
        )
        neighbors = tf.math.less(pdist_matrix, tf.reshape(radii, (-1, 1)))

        hits = (
            tf.cast(
                tf.math.logical_and(neighbors, adjacency),
                tf.dtypes.float32
            ) - tf.linalg.diag(tf.ones([batch_size]))
        )

        misses = tf.cast(
            tf.math.logical_and(neighbors, adjacency_not),
            tf.dtypes.float32
        )

        nhits = tf.reduce_sum(hits)
        nmisses = tf.reduce_sum(misses)

        n = tf.cast(batch_size, tf.dtypes.float32)
        hits_dists = tf.multiply(pdist_matrix, hits)
        hits_dists = tf.math.divide_no_nan(
            hits_dists,
            tf.math.multiply(n, nhits)
        )
        misses_dists = tf.multiply(pdist_matrix, misses)
        misses_dists = tf.math.divide_no_nan(
            misses_dists,
            tf.math.multiply(n, nmisses)
        )

        loss = tf.subtract(misses_dists, hits_dists)
        loss = tf.reduce_sum(loss, axis=1)

        if convert_to_float32:
            return tf.cast(loss, embeddings.dtype)
        else:
            return loss
Exemple #11
0
def triplet_semihard_loss(y_true, y_pred, margin=1.0):
    """Computes the triplet loss with semi-hard negative mining.

    Args:
      y_true: 1-D integer `Tensor` with shape [batch_size] of
        multiclass integer labels.
      y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should
        be l2 normalized.
      margin: Float, margin term in the loss definition.
    """
    labels, embeddings = y_true, y_pred
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = tf.shape(labels)
    assert lshape.shape == 1
    labels = tf.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pdist_matrix = metric_learning.pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    adjacency = tf.math.equal(labels, tf.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = tf.math.logical_not(adjacency)

    batch_size = tf.size(labels)

    # Compute the mask.
    pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1])
    mask = tf.math.logical_and(
        tf.tile(adjacency_not, [batch_size, 1]),
        tf.math.greater(pdist_matrix_tile,
                        tf.reshape(tf.transpose(pdist_matrix), [-1, 1])))
    mask_final = tf.reshape(
        tf.math.greater(
            tf.math.reduce_sum(tf.cast(mask, dtype=tf.dtypes.float32),
                               1,
                               keepdims=True), 0.0), [batch_size, batch_size])
    mask_final = tf.transpose(mask_final)

    adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
    mask = tf.cast(mask, dtype=tf.dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = tf.reshape(_masked_minimum(pdist_matrix_tile, mask),
                                   [batch_size, batch_size])
    negatives_outside = tf.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = tf.tile(_masked_maximum(pdist_matrix, adjacency_not),
                               [1, batch_size])
    semi_hard_negatives = tf.where(mask_final, negatives_outside,
                                   negatives_inside)

    loss_mat = tf.math.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = tf.cast(adjacency,
                             dtype=tf.dtypes.float32) - tf.linalg.diag(
                                 tf.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = tf.math.reduce_sum(mask_positives)

    triplet_loss = tf.math.truediv(
        tf.math.reduce_sum(
            tf.math.maximum(tf.math.multiply(loss_mat, mask_positives), 0.0)),
        num_positives)

    return triplet_loss
Exemple #12
0
def triplet_hard_loss(
    y_true: TensorLike,
    y_pred: TensorLike,
    margin: FloatTensorLike = 1.0,
    soft: bool = False,
    distance_metric: Union[str, Callable] = "L2",
) -> tf.Tensor:
    """Computes the triplet loss with hard negative and hard positive mining.

    Args:
      y_true: 1-D integer `Tensor` with shape [batch_size] of
        multiclass integer labels.
      y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should
        be l2 normalized.
      margin: Float, margin term in the loss definition.
      soft: Boolean, if set, use the soft margin version.
      distance_metric: str or function, determines distance metric:
                       "L2" for l2-norm distance
                       "squared-L2" for squared l2-norm distance
                       "angular" for cosine similarity
                        A custom function returning a 2d adjacency
                          matrix of a chosen distance metric can
                          also be passed here. e.g.

                          def custom_distance(batch):
                              batch = 1 - batch @ batch.T
                              return batch

                          triplet_semihard_loss(batch, labels,
                                        distance_metric=custom_distance
                                    )

    Returns:
      triplet_loss: float scalar with dtype of y_pred.
    """
    labels, embeddings = y_true, y_pred

    convert_to_float32 = (embeddings.dtype == tf.dtypes.float16
                          or embeddings.dtype == tf.dtypes.bfloat16)
    precise_embeddings = (tf.cast(embeddings, tf.dtypes.float32)
                          if convert_to_float32 else embeddings)

    # Reshape label tensor to [batch_size, 1].
    lshape = tf.shape(labels)
    labels = tf.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    if distance_metric == "L2":
        pdist_matrix = metric_learning.pairwise_distance(precise_embeddings,
                                                         squared=False)

    elif distance_metric == "squared-L2":
        pdist_matrix = metric_learning.pairwise_distance(precise_embeddings,
                                                         squared=True)

    elif distance_metric == "angular":
        pdist_matrix = metric_learning.angular_distance(precise_embeddings)

    else:
        pdist_matrix = distance_metric(precise_embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = tf.math.equal(labels, tf.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = tf.math.logical_not(adjacency)

    adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
    # hard negatives: smallest D_an.
    hard_negatives = _masked_minimum(pdist_matrix, adjacency_not)

    batch_size = tf.size(labels)

    adjacency = tf.cast(adjacency, dtype=tf.dtypes.float32)

    mask_positives = tf.cast(adjacency,
                             dtype=tf.dtypes.float32) - tf.linalg.diag(
                                 tf.ones([batch_size]))

    # hard positives: largest D_ap.
    hard_positives = _masked_maximum(pdist_matrix, mask_positives)

    if soft:
        triplet_loss = tf.math.log1p(
            tf.math.exp(hard_positives - hard_negatives))
    else:
        triplet_loss = tf.maximum(hard_positives - hard_negatives + margin,
                                  0.0)

    # Get final mean triplet loss
    triplet_loss = tf.reduce_mean(triplet_loss)

    if convert_to_float32:
        return tf.cast(triplet_loss, embeddings.dtype)
    else:
        return triplet_loss
Exemple #13
0
def test_zero_distance():
    """Test that equal embeddings have a pairwise distance of 0."""
    equal_embeddings = tf.constant([[1.0, 0.5], [1.0, 0.5]])

    distances = pairwise_distance(equal_embeddings, squared=False)
    np.testing.assert_allclose(tf.math.reduce_sum(distances), 0, 1e-6, 1e-6)
    def call(self, inputs, **kwargs):
        x_source, x_driving = inputs
        kp_source_value, kp_source_jacobian = self.kp_extractor(x_source)
        kp_driving_value, kp_driving_jacobian = self.kp_extractor(x_driving)

        generated = {}
        kp_driving_jacobian_inv = tf.linalg.inv(kp_driving_jacobian)
        generated_prediction = self.generator(
            (x_source, kp_driving_value, kp_driving_jacobian_inv,
             kp_source_value, kp_source_jacobian))
        generated.update({
            'kp_source_value': kp_source_value,
            'kp_driving_value': kp_driving_value,
            'prediction': generated_prediction,
        })

        loss_values = {}

        pyramide_real = self.pyramid(x_driving)
        pyramide_generated = self.pyramid(generated_prediction)

        # kp detector normalize loss
        if self.use_kp_loss:
            kp_source_loss = 0.
            kp_driving_loss = 0.
            kp_loss_koef = 0.7
            for kp in kp_source_value:
                distances = metric_learning.pairwise_distance(kp)
                v, idx = tf.nn.top_k(-distances, 2)
                mins = -v[:, 1]  # 10
                # tf.print(mins)
                kp_source_loss += tf.reduce_sum(kp_loss_koef - mins)

            for kp in kp_driving_value:
                distances = metric_learning.pairwise_distance(kp)
                v, idx = tf.nn.top_k(-distances, 2)
                mins = -v[:, 1]  # 10
                # tf.print(mins)
                kp_driving_loss += tf.reduce_sum(kp_loss_koef - mins)

            kp_loss = (kp_source_loss + kp_driving_loss) / self.bs
            loss_values['kp_loss'] = kp_loss * self.kp_loss_weight

        if sum(self.loss_weights['perceptual']) != 0:
            value_total = 0
            for scale in self.scales:
                x_vgg = self.vgg(pyramide_generated['prediction_' +
                                                    str(scale)])
                y_vgg = self.vgg(pyramide_real['prediction_' + str(scale)])

                for i, weight in enumerate(self.loss_weights['perceptual']):
                    value = tf.reduce_mean(
                        tf.abs(x_vgg[i] - tf.stop_gradient(y_vgg[i])))
                    value_total += self.loss_weights['perceptual'][i] * value
                loss_values['perceptual'] = value_total

        if self.loss_weights['generator_gan'] != 0:
            discriminator_maps_generated = self.discriminator(
                (pyramide_generated, tf.stop_gradient(kp_driving_value)))
            discriminator_maps_real = self.discriminator(
                (pyramide_real, tf.stop_gradient(kp_driving_value)))
            value_total = 0
            for scale in self.disc_scales:
                key = f'prediction_map_{scale}'
                value = tf.reduce_mean(
                    (1 - discriminator_maps_generated[key])**2)
                value_total += self.loss_weights['generator_gan'] * value
            loss_values['gen_gan'] = value_total

            if sum(self.loss_weights['feature_matching']) != 0:
                value_total = 0
                for scale in self.disc_scales:
                    key = f'feature_maps_{scale}'
                    for i, (a, b) in enumerate(
                            zip(discriminator_maps_real[key],
                                discriminator_maps_generated[key])):
                        if self.loss_weights['feature_matching'][i] == 0:
                            continue
                        value = tf.reduce_mean(tf.abs(a - b))
                        value_total += self.loss_weights['feature_matching'][
                            i] * value
                    loss_values['feature_matching'] = value_total

        if (self.loss_weights['equivariance_value'] +
                self.loss_weights['equivariance_jacobian']) != 0:
            # if self.transform is None:
            #     self.transform = Transform(x_driving.shape[0], **self.train_params['transform_params'])

            transform = Transform(self.train_params['batch_size'],
                                  **self.train_params['transform_params'])
            transformed_frame = transform.transform_frame(x_driving)
            transformed_kp_value, transformed_kp_jacobian = self.kp_extractor(
                transformed_frame)

            # generated['transformed_frame'] = transformed_frame
            # generated['transformed_kp_value'] = transformed_kp_value
            # generated['transformed_kp_jacobian'] = transformed_kp_jacobian

            # Value loss part
            if self.loss_weights['equivariance_value'] != 0:
                value = tf.reduce_mean(
                    tf.abs(kp_driving_value -
                           transform.warp_coordinates(transformed_kp_value)))
                loss_values['equivariance_value'] = self.loss_weights[
                    'equivariance_value'] * value

            # jacobian loss part
            if self.loss_weights['equivariance_jacobian'] != 0:
                jacobian_transformed = tf.matmul(
                    transform.jacobian(transformed_kp_value, self.grad_tape),
                    transformed_kp_jacobian)

                normed_driving = tf.linalg.inv(kp_driving_jacobian)
                normed_transformed = jacobian_transformed
                value = tf.matmul(normed_driving, normed_transformed)

                eye = tf.reshape(tf.eye(2), [1, 1, 2, 2])

                value = tf.reduce_mean(tf.abs(eye - value))
                loss_values['equivariance_jacobian'] = self.loss_weights[
                    'equivariance_jacobian'] * value

        return loss_values, generated
    def test_zero_distance(self):
        """Test that equal embeddings have a pairwise distance of 0."""
        equal_embeddings = tf.constant([[1.0, 0.5], [1.0, 0.5]])

        distances = pairwise_distance(equal_embeddings, squared=False)
        self.assertAllClose(tf.math.reduce_sum(distances), 0)
Exemple #16
0
def triplet_semihard_loss(
    y_true: TensorLike,
    y_pred: TensorLike,
    margin: FloatTensorLike = 1.0,
    distance_metric: Union[str, Callable] = "L2",
) -> tf.Tensor:
    r"""Computes the triplet loss with semi-hard negative mining.

    Usage:

    >>> y_true = tf.convert_to_tensor([0, 0])
    >>> y_pred = tf.convert_to_tensor([[0.0, 1.0], [1.0, 0.0]])
    >>> tfa.losses.triplet_semihard_loss(y_true, y_pred, distance_metric="L2")
    <tf.Tensor: shape=(), dtype=float32, numpy=2.4142137>

    >>> # Calling with callable `distance_metric`
    >>> distance_metric = lambda x: tf.linalg.matmul(x, x, transpose_b=True)
    >>> tfa.losses.triplet_semihard_loss(y_true, y_pred, distance_metric=distance_metric)
    <tf.Tensor: shape=(), dtype=float32, numpy=1.0>

    Args:
      y_true: 1-D integer `Tensor` with shape `[batch_size]` of
        multiclass integer labels.
      y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should
        be l2 normalized.
      margin: Float, margin term in the loss definition.
      distance_metric: `str` or a `Callable` that determines distance metric.
        Valid strings are "L2" for l2-norm distance,
        "squared-L2" for squared l2-norm distance,
        and "angular" for cosine similarity.

        A `Callable` should take a batch of embeddings as input and
        return the pairwise distance matrix.

    Returns:
      triplet_loss: float scalar with dtype of `y_pred`.
    """

    labels, embeddings = y_true, y_pred

    convert_to_float32 = (embeddings.dtype == tf.dtypes.float16
                          or embeddings.dtype == tf.dtypes.bfloat16)
    precise_embeddings = (tf.cast(embeddings, tf.dtypes.float32)
                          if convert_to_float32 else embeddings)

    # Reshape label tensor to [batch_size, 1].
    lshape = tf.shape(labels)
    labels = tf.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix

    if distance_metric == "L2":
        pdist_matrix = metric_learning.pairwise_distance(precise_embeddings,
                                                         squared=False)

    elif distance_metric == "squared-L2":
        pdist_matrix = metric_learning.pairwise_distance(precise_embeddings,
                                                         squared=True)

    elif distance_metric == "angular":
        pdist_matrix = metric_learning.angular_distance(precise_embeddings)

    else:
        pdist_matrix = distance_metric(precise_embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = tf.math.equal(labels, tf.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = tf.math.logical_not(adjacency)

    batch_size = tf.size(labels)

    # Compute the mask.
    pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1])
    mask = tf.math.logical_and(
        tf.tile(adjacency_not, [batch_size, 1]),
        tf.math.greater(pdist_matrix_tile,
                        tf.reshape(tf.transpose(pdist_matrix), [-1, 1])),
    )
    mask_final = tf.reshape(
        tf.math.greater(
            tf.math.reduce_sum(tf.cast(mask, dtype=tf.dtypes.float32),
                               1,
                               keepdims=True),
            0.0,
        ),
        [batch_size, batch_size],
    )
    mask_final = tf.transpose(mask_final)

    adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
    mask = tf.cast(mask, dtype=tf.dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = tf.reshape(_masked_minimum(pdist_matrix_tile, mask),
                                   [batch_size, batch_size])
    negatives_outside = tf.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = tf.tile(_masked_maximum(pdist_matrix, adjacency_not),
                               [1, batch_size])
    semi_hard_negatives = tf.where(mask_final, negatives_outside,
                                   negatives_inside)

    loss_mat = tf.math.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = tf.cast(adjacency,
                             dtype=tf.dtypes.float32) - tf.linalg.diag(
                                 tf.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = tf.math.reduce_sum(mask_positives)

    triplet_loss = tf.math.truediv(
        tf.math.reduce_sum(
            tf.math.maximum(tf.math.multiply(loss_mat, mask_positives), 0.0)),
        num_positives,
    )

    if convert_to_float32:
        return tf.cast(triplet_loss, embeddings.dtype)
    else:
        return triplet_loss
Exemple #17
0
def triplet_hard_loss(
    y_true: TensorLike,
    y_pred: TensorLike,
    margin: FloatTensorLike = 1.0,
    soft: bool = False,
    distance_metric: Union[str, Callable] = "L2",
) -> tf.Tensor:
    r"""Computes the triplet loss with hard negative and hard positive mining.

    Usage:

    >>> y_true = tf.convert_to_tensor([0, 0])
    >>> y_pred = tf.convert_to_tensor([[0.0, 1.0], [1.0, 0.0]])
    >>> tfa.losses.triplet_hard_loss(y_true, y_pred, distance_metric="L2")
    <tf.Tensor: shape=(), dtype=float32, numpy=1.0>

    >>> # Calling with callable `distance_metric`
    >>> distance_metric = lambda x: tf.linalg.matmul(x, x, transpose_b=True)
    >>> tfa.losses.triplet_hard_loss(y_true, y_pred, distance_metric=distance_metric)
    <tf.Tensor: shape=(), dtype=float32, numpy=0.0>

    Args:
      y_true: 1-D integer `Tensor` with shape `[batch_size]` of
        multiclass integer labels.
      y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should
        be l2 normalized.
      margin: Float, margin term in the loss definition.
      soft: Boolean, if set, use the soft margin version.
      distance_metric: `str` or a `Callable` that determines distance metric.
        Valid strings are "L2" for l2-norm distance,
        "squared-L2" for squared l2-norm distance,
        and "angular" for cosine similarity.

        A `Callable` should take a batch of embeddings as input and
        return the pairwise distance matrix.

    Returns:
      triplet_loss: float scalar with dtype of `y_pred`.
    """
    labels, embeddings = y_true, y_pred

    convert_to_float32 = (embeddings.dtype == tf.dtypes.float16
                          or embeddings.dtype == tf.dtypes.bfloat16)
    precise_embeddings = (tf.cast(embeddings, tf.dtypes.float32)
                          if convert_to_float32 else embeddings)

    # Reshape label tensor to [batch_size, 1].
    lshape = tf.shape(labels)
    labels = tf.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    if distance_metric == "L2":
        pdist_matrix = metric_learning.pairwise_distance(precise_embeddings,
                                                         squared=False)

    elif distance_metric == "squared-L2":
        pdist_matrix = metric_learning.pairwise_distance(precise_embeddings,
                                                         squared=True)

    elif distance_metric == "angular":
        pdist_matrix = metric_learning.angular_distance(precise_embeddings)

    else:
        pdist_matrix = distance_metric(precise_embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = tf.math.equal(labels, tf.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = tf.math.logical_not(adjacency)

    adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32)
    # hard negatives: smallest D_an.
    hard_negatives = _masked_minimum(pdist_matrix, adjacency_not)

    batch_size = tf.size(labels)

    adjacency = tf.cast(adjacency, dtype=tf.dtypes.float32)

    mask_positives = tf.cast(adjacency,
                             dtype=tf.dtypes.float32) - tf.linalg.diag(
                                 tf.ones([batch_size]))

    # hard positives: largest D_ap.
    hard_positives = _masked_maximum(pdist_matrix, mask_positives)

    if soft:
        triplet_loss = tf.math.log1p(
            tf.math.exp(hard_positives - hard_negatives))
    else:
        triplet_loss = tf.maximum(hard_positives - hard_negatives + margin,
                                  0.0)

    # Get final mean triplet loss
    triplet_loss = tf.reduce_mean(triplet_loss)

    if convert_to_float32:
        return tf.cast(triplet_loss, embeddings.dtype)
    else:
        return triplet_loss