Ejemplo n.º 1
0
    def test_masked_avg(self):
        tf.reset_default_graph()

        data = tf.placeholder(tf.float32, shape=[None, None])
        mask = tf.placeholder(tf.float32, shape=[None, None])
        masked_avgs = utils.masked_avg(data, mask)

        with self.test_session() as sess:
            result = sess.run(masked_avgs,
                              feed_dict={
                                  data: [[1, 2, 3], [4, 5, 6]],
                                  mask: [[1, 0, 1], [0, 1, 0]]
                              })
            self.assertAllClose(result, [[2], [5]])

            result = sess.run(masked_avgs,
                              feed_dict={
                                  data: [[1, 2, 3], [4, 5, 6]],
                                  mask: [[0, 1, 0], [1, 0, 1]]
                              })
            self.assertAllClose(result, [[2], [5]])

            result = sess.run(masked_avgs,
                              feed_dict={
                                  data: [[1, 2, 3], [4, 5, 6]],
                                  mask: [[0, 0, 0], [0, 0, 0]]
                              })
            self.assertAllClose(result, [[0], [0]])
Ejemplo n.º 2
0
    def build_loss(self, predictions, **kwargs):
        """Build tf graph to compute loss.

    Args:
      predictions: dict of prediction results keyed by name.

    Returns:
      loss_dict: dict of loss tensors keyed by name.
    """
        options = self._model_proto

        (image_id, image_ids_gathered,
         similarity) = (predictions[_FIELD_IMAGE_ID],
                        predictions[_FIELD_IMAGE_IDS_GATHERED],
                        predictions[_FIELD_SIMILARITY])

        distance = 1.0 - similarity

        pos_mask = tf.cast(
            tf.equal(tf.expand_dims(image_id, axis=1),
                     tf.expand_dims(image_ids_gathered, axis=0)), tf.float32)
        neg_mask = 1.0 - pos_mask

        if options.triplet_ap_use_avg:
            distance_ap = utils.masked_avg(distance, pos_mask)
        else:
            distance_ap = utils.masked_maximum(distance, pos_mask)

        # negatives_outside: smallest D_an where D_an > D_ap.

        mask = tf.cast(tf.greater(distance, distance_ap), tf.float32)
        mask = mask * neg_mask
        negatives_outside = utils.masked_minimum(distance, mask)

        # negatives_inside: largest D_an.

        negatives_inside = utils.masked_maximum(distance, neg_mask)

        # distance_an: the semihard negatives.

        mask_condition = tf.greater(tf.reduce_sum(mask, axis=1, keepdims=True),
                                    0.0)

        distance_an = tf.where(mask_condition, negatives_outside,
                               negatives_inside)

        # Triplet loss.

        losses = tf.maximum(distance_ap - distance_an + options.triplet_margin,
                            0)

        return {
            'triplet_loss': tf.reduce_mean(losses),
        }
Ejemplo n.º 3
0
    def _create_lv0_edge_scores(self, proposal_repr, slogan_repr, label_repr,
                                dbpedia_repr, proposal_mask, slogan_mask,
                                label_mask, dbpedia_mask):
        """Creates adjacency matrix. Each elem denotes an edge weight.

    Args:
      proposal_repr: A [batch, max_proposal_num, dims] float tensor.
      slogan_repr: A [batch, max_slogan_num, dims] float tensor.

    Returns:
      proposal_scores: A [batch, max_proposal_num] float tensor denoting
        weights of different proposals.
      slogan_scores: A [batch, max_slogan_num] float tensor denoting weights
        of different slogans.
      label_to_proposal_scores: A [batch, max_proposal_num, max_label_num] tensor.
      dbpedia_to_slogan_scores: A [batch, max_slogan_num, max_dbpedia_num] tensor.
    """
        options = self._options
        is_training = self._is_training

        (batch_i, max_proposal_num, max_slogan_num, max_label_num,
         max_dbpedia_num) = (proposal_repr.get_shape()[0].value,
                             utils.get_tensor_shape(proposal_repr)[1],
                             utils.get_tensor_shape(slogan_repr)[1],
                             utils.get_tensor_shape(label_repr)[1],
                             utils.get_tensor_shape(dbpedia_repr)[1])

        with tf.name_scope('create_lv0_attention_weights'):

            # Predictions for updating slogan.
            #   slogan_dbpedia_to_proposal_scores shape =
            #     [batch, max_proposal_num, max_slogan_num + max_dbpedia_num].
            #   slogan_dbpedia_to_slogan_scores shape =
            #     [batch, max_slogan_num, max_slogan_num + max_dbpedia_num].

            slogan_dbpedia_repr = tf.concat([slogan_repr, dbpedia_repr],
                                            axis=1)
            slogan_dbpedia_to_proposal_scores = self._create_edge_weights_helper(
                proposal_repr,
                slogan_dbpedia_repr,
                scope='slogan_dbpedia_to_proposal_scores')
            slogan_dbpedia_to_slogan_scores = self._create_edge_weights_helper(
                slogan_repr,
                slogan_dbpedia_repr,
                scope='slogan_dbpedia_to_slogan_scores')

            # Predictions for updating proposal.
            #   proposal_label_to_proposal_scores shape =
            #     [batch, max_proposal_num, max_proposal_num + max_label_num].
            #   proposal_label_to_slogan_scores shape =
            #     [batch, max_slogan_num, max_proposal_num + max_label_num]

            proposal_label_repr = tf.concat([proposal_repr, label_repr],
                                            axis=1)
            proposal_label_to_proposal_scores = self._create_edge_weights_helper(
                proposal_repr,
                proposal_label_repr,
                scope='proposal_label_to_proposal_scores')
            proposal_label_to_slogan_scores = self._create_edge_weights_helper(
                slogan_repr,
                proposal_label_repr,
                scope='proposal_label_to_slogan_scores')

            # Compute slogan_dbpedia_to_slogan_scores.
            #   slogan_dbpedia_context_scores shape = [batch, 1, max_slogan_num + max_dbpedia_num]
            #   slogan_dbpedia_to_slogan_scores shape = [batch, max_slogan_num, max_slogan_num + max_dbpedia_num]

            slogan_dbpedia_context_scores = utils.masked_avg(
                slogan_dbpedia_to_proposal_scores,
                mask=tf.expand_dims(proposal_mask, 2),
                dim=1)
            slogan_dbpedia_to_slogan_scores = tf.add(
                slogan_dbpedia_context_scores, slogan_dbpedia_to_slogan_scores)

            slogan_scores = tf.linalg.diag_part(
                tf.slice(slogan_dbpedia_to_slogan_scores,
                         begin=[0, 0, 0],
                         size=[batch_i, max_slogan_num, max_slogan_num]))
            dbpedia_to_slogan_scores = tf.slice(
                slogan_dbpedia_to_slogan_scores,
                begin=[0, 0, max_slogan_num],
                size=[batch_i, max_slogan_num, max_dbpedia_num])

            # Compute proposal_label_to_proposal_scores.
            #   proposal_label_context_scores shape = [batch, 1, max_proposal_num + max_label_num]
            #   proposal_label_to_proposal_scores shape = [batch, max_proposal_num, max_proposal_num + max_label_num]

            proposal_label_context_scores = utils.masked_avg(
                proposal_label_to_slogan_scores,
                mask=tf.expand_dims(slogan_mask, 2),
                dim=1)
            proposal_label_to_proposal_scores = tf.add(
                proposal_label_context_scores,
                proposal_label_to_proposal_scores)

            proposal_scores = tf.linalg.diag_part(
                tf.slice(proposal_label_to_proposal_scores,
                         begin=[0, 0, 0],
                         size=[batch_i, max_proposal_num, max_proposal_num]))
            label_to_proposal_scores = tf.slice(
                proposal_label_to_proposal_scores,
                begin=[0, 0, max_proposal_num],
                size=[batch_i, max_proposal_num, max_label_num])

            return proposal_scores, slogan_scores, label_to_proposal_scores, dbpedia_to_slogan_scores
Ejemplo n.º 4
0
    def _create_lv1_edge_scores(self, proposal_repr, slogan_repr,
                                proposal_mask, slogan_mask):
        """Creates adjacency matrix. Each elem denotes an edge weight.

    Args:
      proposal_repr: A [batch, max_proposal_num, dims] float tensor.
      slogan_repr: A [batch, max_slogan_num, dims] float tensor.

    Returns:
      proposal_scores: A [batch, max_proposal_num] float tensor denoting
        weights of different proposals.
      slogan_scores: A [batch, max_slogan_num] float tensor denoting weights
        of different slogans.
    """
        options = self._options
        is_training = self._is_training

        with tf.name_scope('create_attention_weights'):

            if options.attention_type == graph_creator_pb2.ConvGraphCreator.CO_ATTENTION:

                # Use co-attention to determine edge importance.
                #   slogan_to_proposal_scores shape = [batch, max_proposal_num, max_slogan_num].
                #   proposal_scores shape = [batch, max_proposal_num]
                #   slogan_scores shape = [batch,  max_slogan_num]
                slogan_to_proposal_scores = self._create_edge_weights_helper(
                    proposal_repr,
                    slogan_repr,
                    scope='slogan_to_proposal_scores')
                proposal_scores = utils.masked_avg(slogan_to_proposal_scores,
                                                   mask=tf.expand_dims(
                                                       slogan_mask, 1),
                                                   dim=2)
                proposal_scores = tf.squeeze(proposal_scores, axis=-1)
                slogan_scores = utils.masked_avg(slogan_to_proposal_scores,
                                                 mask=tf.expand_dims(
                                                     proposal_mask, 2),
                                                 dim=1)
                slogan_scores = tf.squeeze(slogan_scores, axis=1)

            elif options.attention_type == graph_creator_pb2.ConvGraphCreator.SELF_ATTENTION:

                # Use self-attention to determine edge importance.
                #   similarity_proposal_proposal shape = [batch, max_proposal_num, max_proposal_num]
                #   similarity_slogan_slogan shape = [batch, max_slogan_num, max_slogan_num]
                #   proposal_scores shape = [batch, 1, max_proposal_num]
                #   slogan_scores shape = [batch, 1, max_slogan_num]

                similarity_proposal_proposal = self._create_edge_weights_helper(
                    proposal_repr,
                    proposal_repr,
                    scope='similarity_proposal_proposal')
                similarity_slogan_slogan = self._create_edge_weights_helper(
                    slogan_repr, slogan_repr, scope='similarity_slogan_slogan')
                proposal_scores = utils.masked_avg(
                    similarity_proposal_proposal,
                    tf.expand_dims(proposal_mask, 2),
                    dim=1)
                proposal_scores = tf.squeeze(proposal_scores, axis=1)
                slogan_scores = utils.masked_avg(similarity_slogan_slogan,
                                                 tf.expand_dims(
                                                     slogan_mask, 2),
                                                 dim=1)
                slogan_scores = tf.squeeze(slogan_scores, axis=1)

            else:
                raise ValueError('Invalid attention type %s' %
                                 options.attention_type)

        return proposal_scores, slogan_scores
Ejemplo n.º 5
0
    def _calc_oicr_loss(self,
                        labels,
                        num_proposals,
                        proposals,
                        scores_0,
                        scores_1,
                        scope,
                        iou_threshold=0.5):
        """Calculates the OICR loss at refinement stage `i`.

    Args:
      labels: A [batch, num_classes] float tensor.
      num_proposals: A [batch] int tensor.
      proposals: A [batch, max_num_proposals, 4] float tensor.
      scores_0: A [batch, max_num_proposal, 1 + num_classes] float tensor, 
        representing the proposal score at `k-th` refinement.
      scores_1: A [batch, max_num_proposal, 1 + num_classes] float tensor,
        representing the proposal score at `(k+1)-th` refinement.

    Returns:
      oicr_cross_entropy_loss: a scalar float tensor.
    """
        with tf.name_scope(scope):
            (batch, max_num_proposals,
             num_classes_plus_one) = utils.get_tensor_shape(scores_0)
            num_classes = num_classes_plus_one - 1

            # For each class, look for the most confident proposal.
            #   proposal_ind shape = [batch, num_classes].

            proposal_mask = tf.sequence_mask(num_proposals,
                                             maxlen=max_num_proposals,
                                             dtype=tf.float32)
            proposal_ind = utils.masked_argmax(tf.nn.softmax(scores_0,
                                                             axis=-1)[:, :,
                                                                      1:],
                                               tf.expand_dims(proposal_mask,
                                                              axis=-1),
                                               dim=1)

            # Deal with the most confident proposal per each class.
            #   Unstack the `proposal_ind`, `labels`.
            #   proposal_labels shape = [batch, max_num_proposals, num_classes].

            proposal_labels = []
            indices_0 = tf.range(batch, dtype=tf.int64)
            for indices_1, label_per_class in zip(
                    tf.unstack(proposal_ind, axis=-1),
                    tf.unstack(labels, axis=-1)):

                # Gather the most confident proposal for the class.
                #   confident_proosal shape = [batch, 4].

                indices = tf.stack([indices_0, indices_1], axis=-1)
                confident_proposal = tf.gather_nd(proposals, indices)

                # Get the Iou from all the proposals to the most confident proposal.
                #   iou shape = [batch, max_num_proposals].

                confident_proposal_tiled = tf.tile(
                    tf.expand_dims(confident_proposal, axis=1),
                    [1, max_num_proposals, 1])
                iou = box_utils.iou(
                    tf.reshape(proposals, [-1, 4]),
                    tf.reshape(confident_proposal_tiled, [-1, 4]))
                iou = tf.reshape(iou, [batch, max_num_proposals])

                # Filter out irrelevant predictions using image-level label.

                target = tf.to_float(tf.greater_equal(iou, iou_threshold))
                target = tf.where(label_per_class > 0,
                                  x=target,
                                  y=tf.zeros_like(target))
                proposal_labels.append(target)

            proposal_labels = tf.stack(proposal_labels, axis=-1)

            # Add background targets, and normalize the sum value to 1.0.
            #   proposal_labels shape = [batch, max_num_proposals, 1 + num_classes].

            bkg = tf.logical_not(tf.reduce_sum(proposal_labels, axis=-1) > 0)
            proposal_labels = tf.concat(
                [tf.expand_dims(tf.to_float(bkg), axis=-1), proposal_labels],
                axis=-1)

            proposal_labels = tf.div(
                proposal_labels,
                tf.reduce_sum(proposal_labels, axis=-1, keepdims=True))

            assert_op = tf.Assert(
                tf.reduce_all(
                    tf.abs(tf.reduce_sum(proposal_labels, axis=-1) -
                           1) < 1e-6),
                ["Probabilities not sum to ONE", proposal_labels])

            # Compute the loss.

            with tf.control_dependencies([assert_op]):
                losses = tf.nn.softmax_cross_entropy_with_logits(
                    labels=tf.stop_gradient(proposal_labels), logits=scores_1)
                oicr_cross_entropy_loss = tf.reduce_mean(
                    utils.masked_avg(data=losses, mask=proposal_mask, dim=1))

        return oicr_cross_entropy_loss
Ejemplo n.º 6
0
    def build_loss(self, predictions, examples, **kwargs):
        """Build tf graph to compute loss.

    Args:
      predictions: dict of prediction results keyed by name.
      examples: dict of inputs keyed by name.

    Returns:
      loss_dict: dict of loss tensors keyed by name.
    """
        options = self._model_proto

        loss_dict = {}

        with tf.name_scope('losses'):

            # Extract image-level labels.

            if not options.caption_as_label:
                labels = self._extract_class_label(
                    class_texts=examples[InputDataFields.object_texts],
                    vocabulary_list=self._vocabulary_list)
            else:
                labels = self._extract_class_label(
                    class_texts=slim.flatten(
                        examples[InputDataFields.caption_strings]),
                    vocabulary_list=self._vocabulary_list)

            # A prediction model from caption to class

            # Loss of the multi-instance detection network.

            midn_class_logits = predictions[NOD2Predictions.midn_class_logits]
            losses = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=labels, logits=midn_class_logits)

            # Hard-negative mining.

            if options.midn_loss_negative_mining == nod2_model_pb2.NOD2Model.NONE:
                if options.classification_loss_use_sum:
                    assert False
                    loss_dict['midn_cross_entropy_loss'] = tf.multiply(
                        tf.reduce_mean(tf.reduce_sum(losses, axis=-1)),
                        options.midn_loss_weight)
                else:
                    if options.caption_as_label:
                        loss_masks = tf.to_float(
                            tf.reduce_any(labels > 0, axis=-1))
                        loss_dict['midn_cross_entropy_loss'] = tf.multiply(
                            tf.squeeze(
                                utils.masked_avg(tf.reduce_mean(losses,
                                                                axis=-1),
                                                 mask=loss_masks,
                                                 dim=0)),
                            options.midn_loss_weight)
                    else:
                        loss_dict['midn_cross_entropy_loss'] = tf.multiply(
                            tf.reduce_mean(losses), options.midn_loss_weight)
            elif options.midn_loss_negative_mining == nod2_model_pb2.NOD2Model.HARDEST:
                assert False
                loss_masks = self._midn_loss_mine_hardest_negative(
                    labels, losses)
                loss_dict['midn_cross_entropy_loss'] = tf.reduce_mean(
                    utils.masked_avg(data=losses, mask=loss_masks, dim=1))
            else:
                raise ValueError('Invalid negative mining method.')

            # Losses of the online instance classifier refinement network.

            (num_proposals,
             proposals) = (predictions[DetectionResultFields.num_proposals],
                           predictions[DetectionResultFields.proposal_boxes])
            batch, max_num_proposals, _ = utils.get_tensor_shape(proposals)

            proposal_scores_0 = predictions[
                NOD2Predictions.oicr_proposal_scores + '_at_0']
            if options.oicr_use_proba_r_given_c:
                proposal_scores_0 = predictions[
                    NOD2Predictions.midn_proba_r_given_c]

            proposal_scores_0 = tf.concat([
                tf.fill([batch, max_num_proposals, 1], 0.0), proposal_scores_0
            ],
                                          axis=-1)

            global_step = tf.train.get_or_create_global_step()
            oicr_loss_mask = tf.cast(global_step > options.oicr_start_step,
                                     tf.float32)

            for i in range(options.oicr_iterations):
                proposal_scores_1 = predictions[
                    NOD2Predictions.oicr_proposal_scores +
                    '_at_{}'.format(i + 1)]
                oicr_cross_entropy_loss_at_i = model_utils.calc_oicr_loss(
                    labels,
                    num_proposals,
                    proposals,
                    tf.stop_gradient(proposal_scores_0),
                    proposal_scores_1,
                    scope='oicr_{}'.format(i + 1),
                    iou_threshold=options.oicr_iou_threshold)
                loss_dict['oicr_cross_entropy_loss_at_{}'.format(
                    i + 1)] = tf.multiply(
                        oicr_loss_mask * oicr_cross_entropy_loss_at_i,
                        options.oicr_loss_weight)

                proposal_scores_0 = tf.nn.softmax(proposal_scores_1, axis=-1)

            # Min-entropy loss.

            mask = tf.sequence_mask(num_proposals,
                                    maxlen=max_num_proposals,
                                    dtype=tf.float32)
            proba_r_given_c = predictions[NOD2Predictions.midn_proba_r_given_c]
            losses = tf.log(proba_r_given_c + _EPSILON)
            losses = tf.squeeze(utils.masked_sum_nd(data=losses,
                                                    mask=mask,
                                                    dim=1),
                                axis=1)
            min_entropy_loss = tf.reduce_mean(
                tf.reduce_sum(losses * labels, axis=1))
            min_entropy_loss = tf.multiply(min_entropy_loss,
                                           options.min_entropy_loss_weight)

            max_proba = tf.reduce_mean(
                utils.masked_maximum(data=proba_r_given_c,
                                     mask=tf.expand_dims(mask, -1),
                                     dim=1))
            tf.losses.add_loss(min_entropy_loss)

        tf.summary.scalar('loss/min_entropy_loss', min_entropy_loss)
        tf.summary.scalar('loss/max_proba', max_proba)

        return loss_dict
Ejemplo n.º 7
0
    def build_loss(self, predictions, examples, **kwargs):
        """Build tf graph to compute loss.

    Args:
      predictions: dict of prediction results keyed by name.
      examples: dict of inputs keyed by name.

    Returns:
      loss_dict: dict of loss tensors keyed by name.
    """
        options = self._model_proto

        loss_dict = {}

        with tf.name_scope('losses'):

            # Extract image-level labels.

            labels = self._extract_class_label(
                class_texts=slim.flatten(predictions[
                    NOD3Predictions.training_only_caption_strings]),
                vocabulary_list=self._vocabulary_list)

            # A prediction model from caption to class

            # Loss of the multi-instance detection network.

            midn_class_logits = predictions[NOD3Predictions.midn_class_logits]
            losses = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=labels, logits=midn_class_logits)

            # Hard-negative mining.

            if options.midn_loss_negative_mining == nod3_model_pb2.NOD3Model.NONE:
                if options.classification_loss_use_sum:
                    assert False
                    loss_dict['midn_cross_entropy_loss'] = tf.multiply(
                        tf.reduce_mean(tf.reduce_sum(losses, axis=-1)),
                        options.midn_loss_weight)
                else:
                    if options.caption_as_label:
                        loss_masks = tf.to_float(
                            tf.reduce_any(labels > 0, axis=-1))
                        loss_dict['midn_cross_entropy_loss'] = tf.multiply(
                            tf.squeeze(
                                utils.masked_avg(tf.reduce_mean(losses,
                                                                axis=-1),
                                                 mask=loss_masks,
                                                 dim=0)),
                            options.midn_loss_weight)
                    else:
                        loss_dict['midn_cross_entropy_loss'] = tf.multiply(
                            tf.reduce_mean(losses), options.midn_loss_weight)
            elif options.midn_loss_negative_mining == nod3_model_pb2.NOD3Model.HARDEST:
                assert False
                loss_masks = self._midn_loss_mine_hardest_negative(
                    labels, losses)
                loss_dict['midn_cross_entropy_loss'] = tf.reduce_mean(
                    utils.masked_avg(data=losses, mask=loss_masks, dim=1))
            else:
                raise ValueError('Invalid negative mining method.')

            # Triplet loss
            if options.triplet_loss_weight > 0:
                (image_id, image_ids_gathered,
                 similarity) = (predictions[NOD3Predictions.image_id],
                                predictions[NOD3Predictions.image_id],
                                predictions[NOD3Predictions.similarity])

                distance = 1.0 - similarity
                pos_mask = tf.cast(
                    tf.equal(tf.expand_dims(image_id, axis=1),
                             tf.expand_dims(image_ids_gathered, axis=0)),
                    tf.float32)
                neg_mask = 1.0 - pos_mask
                distance_ap = utils.masked_maximum(distance, pos_mask)

                if options.triplet_loss_use_semihard:

                    # Use the semihard.

                    # negatives_outside: smallest D_an where D_an > D_ap.

                    mask = tf.cast(tf.greater(distance, distance_ap),
                                   tf.float32)
                    mask = mask * neg_mask
                    negatives_outside = utils.masked_minimum(distance, mask)

                    # negatives_inside: largest D_an.

                    negatives_inside = utils.masked_maximum(distance, neg_mask)

                    # distance_an: the semihard negatives.

                    mask_condition = tf.greater(
                        tf.reduce_sum(mask, axis=1, keepdims=True), 0.0)

                    distance_an = tf.where(mask_condition, negatives_outside,
                                           negatives_inside)

                else:

                    # Use the hardest.

                    distance_an = utils.masked_minimum(distance, neg_mask)

                losses = tf.maximum(
                    distance_ap - distance_an + options.triplet_loss_margin, 0)

                num_loss_examples = tf.count_nonzero(losses, dtype=tf.float32)
                triplet_loss = tf.reduce_mean(losses)

                loss_dict['triplet_loss'] = tf.multiply(
                    triplet_loss, options.triplet_loss_weight)

            # Losses of the online instance classifier refinement network.

            (num_proposals,
             proposals) = (predictions[DetectionResultFields.num_proposals],
                           predictions[DetectionResultFields.proposal_boxes])
            batch, max_num_proposals, _ = utils.get_tensor_shape(proposals)

            proposal_scores_0 = predictions[
                NOD3Predictions.oicr_proposal_scores + '_at_0']
            if options.oicr_use_proba_r_given_c:
                proposal_scores_0 = predictions[
                    NOD3Predictions.midn_proba_r_given_c]

            proposal_scores_0 = tf.concat([
                tf.fill([batch, max_num_proposals, 1], 0.0), proposal_scores_0
            ],
                                          axis=-1)

            global_step = tf.train.get_or_create_global_step()
            oicr_loss_mask = tf.cast(global_step > options.oicr_start_step,
                                     tf.float32)

            for i in range(options.oicr_iterations):
                proposal_scores_1 = predictions[
                    NOD3Predictions.oicr_proposal_scores +
                    '_at_{}'.format(i + 1)]
                oicr_cross_entropy_loss_at_i = model_utils.calc_oicr_loss(
                    labels,
                    num_proposals,
                    proposals,
                    tf.stop_gradient(proposal_scores_0),
                    proposal_scores_1,
                    scope='oicr_{}'.format(i + 1),
                    iou_threshold=options.oicr_iou_threshold)
                loss_dict['oicr_cross_entropy_loss_at_{}'.format(
                    i + 1)] = tf.multiply(
                        oicr_loss_mask * oicr_cross_entropy_loss_at_i,
                        options.oicr_loss_weight)

                proposal_scores_0 = tf.nn.softmax(proposal_scores_1, axis=-1)

            # Min-entropy loss.

            mask = tf.sequence_mask(num_proposals,
                                    maxlen=max_num_proposals,
                                    dtype=tf.float32)
            proba_r_given_c = predictions[NOD3Predictions.midn_proba_r_given_c]
            losses = tf.log(proba_r_given_c + _EPSILON)
            losses = tf.squeeze(utils.masked_sum_nd(data=losses,
                                                    mask=mask,
                                                    dim=1),
                                axis=1)
            min_entropy_loss = tf.reduce_mean(
                tf.reduce_sum(losses * labels, axis=1))
            min_entropy_loss = tf.multiply(min_entropy_loss,
                                           options.min_entropy_loss_weight)

            max_proba = tf.reduce_mean(
                utils.masked_maximum(data=proba_r_given_c,
                                     mask=tf.expand_dims(mask, -1),
                                     dim=1))
            tf.losses.add_loss(min_entropy_loss)

        if options.triplet_loss_weight > 0:
            tf.summary.scalar('loss/num_loss_examples', num_loss_examples)
        tf.summary.scalar('loss/min_entropy_loss', min_entropy_loss)
        tf.summary.scalar('loss/max_proba', max_proba)

        return loss_dict
Ejemplo n.º 8
0
    def build_loss(self, predictions, examples, **kwargs):
        """Build tf graph to compute loss.

    Args:
      predictions: dict of prediction results keyed by name.
      examples: dict of inputs keyed by name.

    Returns:
      loss_dict: dict of loss tensors keyed by name.
    """
        options = self._model_proto

        loss_dict = {}

        with tf.name_scope('losses'):

            # Extract image-level labels.

            assert options.caption_as_label

            vocabulary_list = self._vocabulary_list
            mapping = {
                'traffic light': 'stoplight',
                'fire hydrant': 'hydrant',
                'stop sign': 'sign',
                'parking meter': 'meter',
                'sports ball': 'ball',
                'baseball bat': 'bat',
                'baseball glove': 'glove',
                'tennis racket': 'racket',
                'wine glass': 'wineglass',
                'hot dog': 'hotdog',
                'potted plant': 'plant',
                'dining table': 'table',
                'cell phone': 'cellphone',
                'teddy bear': 'teddy',
                'hair drier': 'hairdryer',
            }
            vocabulary_list = [
                mapping.get(cls, cls) for cls in vocabulary_list
            ]

            labels_gt = self._extract_class_label(
                class_texts=slim.flatten(
                    examples[InputDataFields.caption_strings]),
                vocabulary_list=vocabulary_list)

            examples[NOD4Predictions.debug_groundtruth_labels] = labels_gt
            if options.label_strategem == nod4_model_pb2.NOD4Model.EXACTLY_MATCH:
                labels = labels_gt
            elif options.label_strategem == nod4_model_pb2.NOD4Model.W2V_SYNONYM_MATCH:
                labels_ps = self._extract_pseudo_label(
                    texts=slim.flatten(
                        examples[InputDataFields.caption_strings]),
                    vocabulary_list=vocabulary_list,
                    open_vocabulary_list=self._open_vocabulary_list,
                    embedding_dims=options.embedding_dims)
                select_op = tf.reduce_any(labels_gt > 0, axis=-1)
                labels = tf.where(select_op, labels_gt, labels_ps)
                labels_ps = tf.where(select_op, tf.zeros_like(labels_ps),
                                     labels_ps)
                examples[NOD4Predictions.debug_pseudo_labels] = labels_ps
            else:
                raise ValueError('Invalid label strategy')

            # Loss of the multi-instance detection network.

            midn_class_logits = predictions[NOD4Predictions.midn_class_logits]
            losses = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=labels, logits=midn_class_logits)

            # Hard-negative mining.

            if options.midn_loss_negative_mining == nod4_model_pb2.NOD4Model.NONE:
                if options.classification_loss_use_sum:
                    assert False
                    loss_dict['midn_cross_entropy_loss'] = tf.multiply(
                        tf.reduce_mean(tf.reduce_sum(losses, axis=-1)),
                        options.midn_loss_weight)
                else:
                    if options.caption_as_label:
                        loss_masks = tf.to_float(
                            tf.reduce_any(labels > 0, axis=-1))
                        loss_dict['midn_cross_entropy_loss'] = tf.multiply(
                            tf.squeeze(
                                utils.masked_avg(tf.reduce_mean(losses,
                                                                axis=-1),
                                                 mask=loss_masks,
                                                 dim=0)),
                            options.midn_loss_weight)
                    else:
                        loss_dict['midn_cross_entropy_loss'] = tf.multiply(
                            tf.reduce_mean(losses), options.midn_loss_weight)
            elif options.midn_loss_negative_mining == nod4_model_pb2.NOD4Model.HARDEST:
                assert False
                loss_masks = self._midn_loss_mine_hardest_negative(
                    labels, losses)
                loss_dict['midn_cross_entropy_loss'] = tf.reduce_mean(
                    utils.masked_avg(data=losses, mask=loss_masks, dim=1))
            else:
                raise ValueError('Invalid negative mining method.')

            # Losses of the online instance classifier refinement network.

            (num_proposals,
             proposals) = (predictions[DetectionResultFields.num_proposals],
                           predictions[DetectionResultFields.proposal_boxes])
            batch, max_num_proposals, _ = utils.get_tensor_shape(proposals)

            proposal_scores_0 = predictions[
                NOD4Predictions.oicr_proposal_scores + '_at_0']
            if options.oicr_use_proba_r_given_c:
                proposal_scores_0 = predictions[
                    NOD4Predictions.midn_proba_r_given_c]

            proposal_scores_0 = tf.concat([
                tf.fill([batch, max_num_proposals, 1], 0.0), proposal_scores_0
            ],
                                          axis=-1)

            global_step = tf.train.get_or_create_global_step()
            oicr_loss_mask = tf.cast(global_step > options.oicr_start_step,
                                     tf.float32)

            for i in range(options.oicr_iterations):
                proposal_scores_1 = predictions[
                    NOD4Predictions.oicr_proposal_scores +
                    '_at_{}'.format(i + 1)]
                oicr_cross_entropy_loss_at_i = model_utils.calc_oicr_loss(
                    labels,
                    num_proposals,
                    proposals,
                    tf.stop_gradient(proposal_scores_0),
                    proposal_scores_1,
                    scope='oicr_{}'.format(i + 1),
                    iou_threshold=options.oicr_iou_threshold)
                loss_dict['oicr_cross_entropy_loss_at_{}'.format(
                    i + 1)] = tf.multiply(
                        oicr_loss_mask * oicr_cross_entropy_loss_at_i,
                        options.oicr_loss_weight)

                proposal_scores_0 = tf.nn.softmax(proposal_scores_1, axis=-1)

            # Min-entropy loss.

            mask = tf.sequence_mask(num_proposals,
                                    maxlen=max_num_proposals,
                                    dtype=tf.float32)
            proba_r_given_c = predictions[NOD4Predictions.midn_proba_r_given_c]
            losses = tf.log(proba_r_given_c + _EPSILON)
            losses = tf.squeeze(utils.masked_sum_nd(data=losses,
                                                    mask=mask,
                                                    dim=1),
                                axis=1)
            min_entropy_loss = tf.reduce_mean(
                tf.reduce_sum(losses * labels, axis=1))
            min_entropy_loss = tf.multiply(min_entropy_loss,
                                           options.min_entropy_loss_weight)

            max_proba = tf.reduce_mean(
                utils.masked_maximum(data=proba_r_given_c,
                                     mask=tf.expand_dims(mask, -1),
                                     dim=1))
            tf.losses.add_loss(min_entropy_loss)

        tf.summary.scalar('loss/min_entropy_loss', min_entropy_loss)
        tf.summary.scalar('loss/max_proba', max_proba)

        return loss_dict