Ejemplo n.º 1
0
    def testFocalLossPerfectScore(self):
        # Test focal loss for a perfect case where logit probabilities are
        # higher for the expected classes
        # Set inputs for focal_loss
        logits_array = np.transpose(
            np.array(
                [
                    [
                        _logit(0.55),
                        _logit(0.52),
                        _logit(0.50),
                        _logit(0.48),
                        _logit(0.45),
                    ],
                    [
                        _logit(0.95),
                        _logit(0.82),
                        _logit(0.80),
                        _logit(0.28),
                        _logit(0.35),
                    ],
                ],
                dtype=np.float32,
            )
        )
        labels_array = np.transpose(
            np.array([[0, 0, 0, 1, 1], [1, 1, 1, 0, 0]], dtype=np.float32)
        )
        prediction_tensor = tf.placeholder(tf.float32)
        target_tensor = tf.placeholder(tf.float32)
        loss_tf = focal_loss(prediction_tensor, target_tensor)

        with tf.Session() as sess:
            loss = sess.run(
                loss_tf,
                feed_dict={
                    prediction_tensor: logits_array,
                    target_tensor: labels_array,
                },
            )
            expected_loss = [0.00022774, 0.00787424, 0.00892574, 0.03088996, 0.07966313]

            self.assertAllClose(loss, expected_loss)
Ejemplo n.º 2
0
    def testFocalLossImperfectScore(self):
        # Test focal loss for a perfect case where logit probabilities are
        # lower for the expected classes
        # Set inputs for focal_loss
        logits_array = np.transpose(
            np.array(
                [
                    [
                        _logit(0.55),
                        _logit(0.52),
                        _logit(0.50),
                        _logit(0.48),
                        _logit(0.45),
                    ],
                    [
                        _logit(0.95),
                        _logit(0.82),
                        _logit(0.80),
                        _logit(0.28),
                        _logit(0.35),
                    ],
                ],
                dtype=np.float32,
            )
        )
        labels_array = np.transpose(
            np.array([[1, 1, 1, 0, 0], [0, 0, 0, 1, 1]], dtype=np.float32)
        )
        prediction_tensor = tf.placeholder(tf.float32)
        target_tensor = tf.placeholder(tf.float32)
        loss_tf = focal_loss(prediction_tensor, target_tensor)

        with tf.Session() as sess:
            loss = sess.run(
                loss_tf,
                feed_dict={
                    prediction_tensor: logits_array,
                    target_tensor: labels_array,
                },
            )
            expected_loss = [2.4771614, 1.0766783, 1.0300404, 0.60194975, 0.33609733]

            self.assertAllClose(loss, expected_loss)
Ejemplo n.º 3
0
    def loss(self, prediction_dict):
        """
        Returns cost for Region Proposal Network based on:

        Args:
            rpn_cls_score: Score for being an object or not for each anchor
                in the image. Shape: (num_anchors, 2)
            rpn_cls_target: Ground truth labeling for each anchor. Should be
                * 1: for positive labels
                * 0: for negative labels
                * -1: for labels we should ignore.
                Shape: (num_anchors, )
            rpn_bbox_target: Bounding box output delta target for rpn.
                Shape: (num_anchors, 4)
            rpn_bbox_pred: Bounding box output delta prediction for rpn.
                Shape: (num_anchors, 4)
        Returns:
            Multiloss between cls probability and bbox target.
        """

        rpn_cls_score = prediction_dict["rpn_cls_score"]
        rpn_cls_target = prediction_dict["rpn_cls_target"]

        rpn_bbox_target = prediction_dict["rpn_bbox_target"]
        rpn_bbox_pred = prediction_dict["rpn_bbox_pred"]

        with tf.variable_scope("RPNLoss"):
            # Flatten already flat Tensor for usage as boolean mask filter.
            rpn_cls_target = tf.cast(tf.reshape(rpn_cls_target, [-1]),
                                     tf.int32,
                                     name="rpn_cls_target")
            # Transform to boolean tensor mask for not ignored.
            labels_not_ignored = tf.not_equal(rpn_cls_target,
                                              -1,
                                              name="labels_not_ignored")

            # Now we only have the labels we are going to compare with the
            # cls probability.
            labels = tf.boolean_mask(rpn_cls_target, labels_not_ignored)
            cls_score = tf.boolean_mask(rpn_cls_score, labels_not_ignored)

            # We need to transform `labels` to `cls_score` shape.
            # convert [1, 0] to [[0, 1], [1, 0]] for ce with logits.
            cls_target = tf.one_hot(labels, depth=2)

            # Equivalent to log loss
            if self.loss_type == CROSS_ENTROPY:
                # TODO PV make this a loss function in losses.py
                ce_per_anchor = tf.nn.softmax_cross_entropy_with_logits_v2(
                    labels=cls_target, logits=cls_score)
                if self.loss_weight != 1:
                    ce_per_anchor = ce_per_anchor * self.loss_weight
            elif self.loss_type == FOCAL:
                ce_per_anchor = focal_loss(cls_score, cls_target,
                                           self.focal_gamma)
            # TODO PV Rename cross entropy per anchor to reflect focal loss is
            # calculated
            prediction_dict["cross_entropy_per_anchor"] = ce_per_anchor

            # Finally, we need to calculate the regression loss over
            # `rpn_bbox_target` and `rpn_bbox_pred`.
            # We use SmoothL1Loss.
            rpn_bbox_target = tf.reshape(rpn_bbox_target, [-1, 4])
            rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4])

            # We only care for positive labels (we ignore backgrounds since
            # we don't have any bounding box information for it).
            positive_labels = tf.equal(rpn_cls_target, 1)
            rpn_bbox_target = tf.boolean_mask(rpn_bbox_target, positive_labels)
            rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, positive_labels)

            # We apply smooth l1 loss as described by the Fast R-CNN paper.
            reg_loss_per_anchor = smooth_l1_loss(rpn_bbox_pred,
                                                 rpn_bbox_target,
                                                 sigma=self._l1_sigma)

            prediction_dict["reg_loss_per_anchor"] = reg_loss_per_anchor

            # Loss summaries.
            tf.summary.scalar("batch_size", tf.shape(labels)[0], ["rpn"])
            foreground_cls_loss = tf.boolean_mask(ce_per_anchor,
                                                  tf.equal(labels, 1))
            background_cls_loss = tf.boolean_mask(ce_per_anchor,
                                                  tf.equal(labels, 0))
            tf.summary.scalar("foreground_cls_loss",
                              tf.reduce_mean(foreground_cls_loss), ["rpn"])
            tf.summary.histogram("foreground_cls_loss", foreground_cls_loss,
                                 ["rpn"])
            tf.summary.scalar("background_cls_loss",
                              tf.reduce_mean(background_cls_loss), ["rpn"])
            tf.summary.histogram("background_cls_loss", background_cls_loss,
                                 ["rpn"])
            tf.summary.scalar("foreground_samples",
                              tf.shape(rpn_bbox_target)[0], ["rpn"])

            return {
                "rpn_cls_loss": tf.reduce_mean(ce_per_anchor),
                "rpn_reg_loss": tf.reduce_mean(reg_loss_per_anchor),
            }
Ejemplo n.º 4
0
    def loss(self, prediction_dict, return_all=False):
        """Compute the loss for SSD.

        Args:
            prediction_dict: The output dictionary of the _build method from
                which we use different main keys:

                cls_pred: A dictionary with the classes classification.
                loc_pred: A dictionary with the localization predictions
                target: A dictionary with the targets for both classes and
                    localizations.

        Returns:
            A tensor for the total loss.
        """

        with tf.name_scope("losses"):

            cls_pred = prediction_dict["cls_pred"]
            cls_target = tf.cast(prediction_dict["target"]["cls"], tf.int32)
            # Transform to one-hot vector
            cls_target_one_hot = tf.one_hot(cls_target,
                                            depth=self._num_classes + 1,
                                            name="cls_target_one_hot")

            # We get cross entropy loss of each proposal.
            # TODO: Optimization opportunity: We calculate the probabilities
            #       earlier in the program, so if we used those instead of the
            #       logits we would not have the need to do softmax here too.
            if self.loss_type == CROSS_ENTROPY:
                classification_loss_per_proposal = (
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=cls_target_one_hot, logits=cls_pred))
            elif self.loss_type == FOCAL:
                classification_loss_per_proposal = focal_loss(
                    cls_pred, cls_target_one_hot, self.focal_gamma)
            # Second we need to calculate the smooth l1 loss between
            # `bbox_offsets` and `bbox_offsets_targets`.
            bbox_offsets = prediction_dict["loc_pred"]
            bbox_offsets_targets = prediction_dict["target"]["bbox_offsets"]

            # We only want the non-background labels bounding boxes.
            not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1])
            bbox_offsets_positives = tf.boolean_mask(
                bbox_offsets, not_ignored, name="bbox_offsets_positives")
            bbox_offsets_target_positives = tf.boolean_mask(
                bbox_offsets_targets,
                not_ignored,
                name="bbox_offsets_target_positives")

            # Calculate the smooth l1 regression loss between the flatten
            # bboxes offsets  and the labeled targets.
            reg_loss_per_proposal = smooth_l1_loss(
                bbox_offsets_positives, bbox_offsets_target_positives)

            cls_loss = tf.reduce_sum(classification_loss_per_proposal)
            bbox_loss = tf.reduce_sum(reg_loss_per_proposal)

            # Following the paper, set loss to 0 if there are 0 bboxes
            # assigned as foreground targets.
            safety_condition = tf.not_equal(
                tf.shape(bbox_offsets_positives)[0], 0)
            final_loss = tf.cond(
                safety_condition,
                true_fn=lambda:
                ((cls_loss + bbox_loss * self._loc_loss_weight) / tf.cast(
                    tf.shape(bbox_offsets_positives)[0], tf.float32)),
                false_fn=lambda: 0.0,
            )
            tf.losses.add_loss(final_loss)
            total_loss = tf.losses.get_total_loss()

            prediction_dict["reg_loss_per_proposal"] = reg_loss_per_proposal
            prediction_dict[
                "cls_loss_per_proposal"] = classification_loss_per_proposal

            tf.summary.scalar("cls_loss",
                              cls_loss,
                              collections=self._losses_collections)

            tf.summary.scalar("bbox_loss",
                              bbox_loss,
                              collections=self._losses_collections)

            tf.summary.scalar("total_loss",
                              total_loss,
                              collections=self._losses_collections)
            if return_all:
                return {
                    "total_loss": total_loss,
                    "cls_loss": cls_loss,
                    "bbox_loss": bbox_loss,
                }
            else:
                return total_loss
Ejemplo n.º 5
0
    def loss(self, prediction_dict):
        """
        Returns cost for RCNN based on:

        Args:
            prediction_dict with keys:
                rcnn:
                    cls_score: shape (num_proposals, num_classes + 1)
                        Has the class scoring for each the proposals. Classes
                        are 1-indexed with 0 being the background.

                    cls_prob: shape (num_proposals, num_classes + 1)
                        Application of softmax on cls_score.

                    bbox_offsets: shape (num_proposals, num_classes * 4)
                        Has the offset for each proposal for each class.
                        We have to compare only the proposals labeled with the
                        offsets for that label.

                target:
                    cls_target: shape (num_proposals,)
                        Has the correct label for each of the proposals.
                        0 => background
                        1..n => 1-indexed classes

                    bbox_offsets_target: shape (num_proposals, 4)
                        Has the true offset of each proposal for the true
                        label.
                        In case of not having a true label (non-background)
                        then it's just zeroes.

        Returns:
            loss_dict with keys:
                rcnn_cls_loss: The cross-entropy or log-loss of the
                    classification tasks between then num_classes + background.
                rcnn_reg_loss: The smooth L1 loss for the bounding box
                    regression task to adjust correctly labeled boxes.

        """
        with tf.name_scope("RCNNLoss"):
            cls_score = prediction_dict["rcnn"]["cls_score"]
            # cls_prob = prediction_dict['rcnn']['cls_prob']
            # Cast target explicitly as int32.
            cls_target = tf.cast(prediction_dict["target"]["cls"], tf.int32)

            # First we need to calculate the log loss betweetn cls_prob and
            # cls_target

            # We only care for the targets that are >= 0
            not_ignored = tf.reshape(
                tf.greater_equal(cls_target, 0), [-1], name="not_ignored"
            )
            # We apply boolean mask to score, prob and target.
            cls_score_labeled = tf.boolean_mask(
                cls_score, not_ignored, name="cls_score_labeled"
            )
            # cls_prob_labeled = tf.boolean_mask(
            #    cls_prob, not_ignored, name='cls_prob_labeled')
            cls_target_labeled = tf.boolean_mask(
                cls_target, not_ignored, name="cls_target_labeled"
            )

            tf.summary.scalar("batch_size", tf.shape(cls_score_labeled)[0], ["rcnn"])

            # Transform to one-hot vector
            cls_target_one_hot = tf.one_hot(
                cls_target_labeled,
                depth=self._num_classes + 1,
                name="cls_target_one_hot",
            )

            if self.loss_type == CROSS_ENTROPY:

                # your class weights
                class_weights = self.loss_weight
                onehot_labels = tf.stop_gradient(cls_target_one_hot)
                # deduce weights for batch samples based on their true label
                # compute your (unweighted) softmax cross entropy loss
                cross_entropy_per_proposal = tf.nn.softmax_cross_entropy_with_logits(
                    labels=onehot_labels, logits=cls_score_labeled
                )
                if class_weights != 1:
                    class_weights = tf.constant([class_weights], dtype=tf.float32)
                    weights = tf.reduce_sum(class_weights * onehot_labels, axis=1)
                    # apply the weights, relying on broadcasting
                    # of the multiplication
                    cross_entropy_per_proposal = cross_entropy_per_proposal * weights
            elif self.loss_type == FOCAL:

                cross_entropy_per_proposal = focal_loss(
                    cls_score_labeled,
                    tf.stop_gradient(cls_target_one_hot),
                    self.focal_gamma,
                )

            if self._debug:
                prediction_dict["_debug"]["losses"] = {}
                # Save the classification loss per proposal to be able to
                # visualize proposals with high and low error.
                prediction_dict["_debug"]["losses"][
                    "cross_entropy_per_proposal"
                ] = cross_entropy_per_proposal

            # Second we need to calculate the smooth l1 loss between
            # `bbox_offsets` and `bbox_offsets_target`.
            bbox_offsets = prediction_dict["rcnn"]["bbox_offsets"]
            bbox_offsets_target = prediction_dict["target"]["bbox_offsets"]

            # We only want the non-background labels bounding boxes.
            not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1])
            bbox_offsets_labeled = tf.boolean_mask(
                bbox_offsets, not_ignored, name="bbox_offsets_labeled"
            )
            bbox_offsets_target_labeled = tf.boolean_mask(
                bbox_offsets_target, not_ignored, name="bbox_offsets_target_labeled"
            )

            cls_target_labeled = tf.boolean_mask(
                cls_target, not_ignored, name="cls_target_labeled"
            )
            # `cls_target_labeled` is based on `cls_target` which has
            # `num_classes` + 1 classes.
            # for making `one_hot` with depth `num_classes` to work we need
            # to lower them to make them 0-index.
            cls_target_labeled = cls_target_labeled - 1

            cls_target_one_hot = tf.one_hot(
                cls_target_labeled, depth=self._num_classes, name="cls_target_one_hot"
            )

            # cls_target now is (num_labeled, num_classes)
            bbox_flatten = tf.reshape(
                bbox_offsets_labeled, [-1, 4], name="bbox_flatten"
            )

            # We use the flatten cls_target_one_hot as boolean mask for the
            # bboxes.
            cls_flatten = tf.cast(
                tf.reshape(cls_target_one_hot, [-1]), tf.bool, "cls_flatten_as_bool"
            )

            bbox_offset_cleaned = tf.boolean_mask(
                bbox_flatten, cls_flatten, "bbox_offset_cleaned"
            )

            # Calculate the smooth l1 loss between the "cleaned" bboxes
            # offsets (that means, the useful results) and the labeled
            # targets.
            reg_loss_per_proposal = smooth_l1_loss(
                bbox_offset_cleaned, bbox_offsets_target_labeled, sigma=self._l1_sigma
            )

            tf.summary.scalar(
                "rcnn_foreground_samples", tf.shape(bbox_offset_cleaned)[0], ["rcnn"]
            )

            if self._debug:
                # Also save reg loss per proposals to be able to visualize
                # good and bad proposals in debug mode.
                prediction_dict["_debug"]["losses"][
                    "reg_loss_per_proposal"
                ] = reg_loss_per_proposal

            return {
                "rcnn_cls_loss": tf.reduce_mean(cross_entropy_per_proposal),
                "rcnn_reg_loss": tf.reduce_mean(reg_loss_per_proposal),
            }