Ejemplo n.º 1
0
    def __call__(self, true_counts, inds, y_true, boxes, classes, y_pred):
        """Call function to compute the loss and a set of metrics per FPN level.

    Args:
      true_counts: `Tensor` of shape [batchsize, height, width, num_anchors]
        represeneting how many boxes are in a given pixel [j, i] in the output
        map.
      inds: `Tensor` of shape [batchsize, None, 3] indicating the location [j,
        i] that a given box is associatied with in the FPN prediction map.
      y_true: `Tensor` of shape [batchsize, None, 8] indicating the actual box
        associated with each index in the inds tensor list.
      boxes: `Tensor` of shape [batchsize, None, 4] indicating the original
        ground truth boxes for each image as they came from the decoder used for
        bounding box search.
      classes: `Tensor` of shape [batchsize, None, 1] indicating the original
        ground truth classes for each image as they came from the decoder used
        for bounding box search.
      y_pred: `Tensor` of shape [batchsize, height, width, output_depth] holding
        the models output at a specific FPN level.

    Returns:
      loss: `float` for the actual loss.
      box_loss: `float` loss on the boxes used for metrics.
      conf_loss: `float` loss on the confidence used for metrics.
      class_loss: `float` loss on the classes used for metrics.
      avg_iou: `float` metric for the average iou between predictions and ground
        truth.
      avg_obj: `float` metric for the average confidence of the model for
        predictions.
    """
        (loss, box_loss, conf_loss, class_loss, mean_loss, iou, pred_conf,
         ind_mask, grid_mask) = self._compute_loss(true_counts, inds, y_true,
                                                   boxes, classes, y_pred)

        # Temporary metrics
        box_loss = tf.stop_gradient(0.05 * box_loss / self._iou_normalizer)

        # Metric compute using done here to save time and resources.
        sigmoid_conf = tf.stop_gradient(tf.sigmoid(pred_conf))
        iou = tf.stop_gradient(iou)
        avg_iou = loss_utils.average_iou(
            loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1), iou))
        avg_obj = loss_utils.average_iou(
            tf.squeeze(sigmoid_conf, axis=-1) * grid_mask)
        return (loss, box_loss, conf_loss, class_loss, mean_loss,
                tf.stop_gradient(avg_iou), tf.stop_gradient(avg_obj))
Ejemplo n.º 2
0
    def _compute_loss(self, true_counts, inds, y_true, boxes, classes, y_pred):
        """Per FPN path loss logic for Yolov4-csp, Yolov4-Large, and Yolov5."""
        # Generate shape constants.
        shape = tf.shape(true_counts)
        batch_size, width, height, num = shape[0], shape[1], shape[2], shape[3]
        fwidth = tf.cast(width, tf.float32)
        fheight = tf.cast(height, tf.float32)

        # Cast all input compontnts to float32 and stop gradient to save memory.
        y_true = tf.cast(y_true, tf.float32)
        true_counts = tf.cast(true_counts, tf.float32)
        true_conf = tf.clip_by_value(true_counts, 0.0, 1.0)
        grid_points, anchor_grid = self._anchor_generator(width,
                                                          height,
                                                          batch_size,
                                                          dtype=tf.float32)

        # Split the y_true list.
        (true_box, ind_mask, true_class) = tf.split(y_true, [4, 1, 1], axis=-1)
        grid_mask = true_conf = tf.squeeze(true_conf, axis=-1)
        true_class = tf.squeeze(true_class, axis=-1)
        num_objs = tf.cast(tf.reduce_sum(ind_mask), dtype=y_pred.dtype)

        # Split up the predicitons.
        y_pred = tf.cast(
            tf.reshape(y_pred, [batch_size, width, height, num, -1]),
            tf.float32)
        pred_box, pred_conf, pred_class = tf.split(y_pred, [4, 1, -1], axis=-1)

        # Decode the boxes for loss compute.
        scale, pred_box, pbg = self._decode_boxes(fwidth,
                                                  fheight,
                                                  pred_box,
                                                  anchor_grid,
                                                  grid_points,
                                                  darknet=False)

        # If the ignore threshold is enabled, search all boxes ignore all
        # IOU valeus larger than the ignore threshold that are not in the
        # noted ground truth list.
        if self._ignore_thresh != 0.0:
            (_, obj_mask) = self._tiled_global_box_search(
                pbg,
                tf.stop_gradient(tf.sigmoid(pred_class)),
                boxes,
                classes,
                true_conf,
                smoothed=False,
                scale=None)

        # Scale and shift and select the ground truth boxes
        # and predictions to the prediciton domain.
        if self._box_type == 'anchor_free':
            true_box = loss_utils.apply_mask(
                ind_mask, (scale * self._path_stride * true_box))
        else:
            offset = tf.cast(tf.gather_nd(grid_points, inds, batch_dims=1),
                             true_box.dtype)
            offset = tf.concat([offset, tf.zeros_like(offset)], axis=-1)
            true_box = loss_utils.apply_mask(ind_mask,
                                             (scale * true_box) - offset)
        pred_box = loss_utils.apply_mask(
            ind_mask, tf.gather_nd(pred_box, inds, batch_dims=1))

        # Select the correct/used prediction classes.
        true_class = tf.one_hot(tf.cast(true_class, tf.int32),
                                depth=tf.shape(pred_class)[-1],
                                dtype=pred_class.dtype)
        true_class = loss_utils.apply_mask(ind_mask, true_class)
        pred_class = loss_utils.apply_mask(
            ind_mask, tf.gather_nd(pred_class, inds, batch_dims=1))

        # Compute the box loss.
        _, iou, box_loss = self.box_loss(true_box, pred_box, darknet=False)
        box_loss = loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1),
                                         box_loss)
        box_loss = math_ops.divide_no_nan(tf.reduce_sum(box_loss), num_objs)

        # Use the box IOU to build the map for confidence loss computation.
        iou = tf.maximum(tf.stop_gradient(iou), 0.0)
        smoothed_iou = ((
            (1 - self._objectness_smooth) * tf.cast(ind_mask, iou.dtype)) +
                        self._objectness_smooth * tf.expand_dims(iou, axis=-1))
        smoothed_iou = loss_utils.apply_mask(ind_mask, smoothed_iou)
        true_conf = loss_utils.build_grid(inds,
                                          smoothed_iou,
                                          pred_conf,
                                          ind_mask,
                                          update=self._update_on_repeat)
        true_conf = tf.squeeze(true_conf, axis=-1)

        # Compute the cross entropy loss for the confidence map.
        bce = tf.keras.losses.binary_crossentropy(tf.expand_dims(true_conf,
                                                                 axis=-1),
                                                  pred_conf,
                                                  from_logits=True)
        if self._ignore_thresh != 0.0:
            bce = loss_utils.apply_mask(obj_mask, bce)
            conf_loss = tf.reduce_sum(bce) / tf.reduce_sum(obj_mask)
        else:
            conf_loss = tf.reduce_mean(bce)

        # Compute the cross entropy loss for the class maps.
        class_loss = tf.keras.losses.binary_crossentropy(
            true_class,
            pred_class,
            label_smoothing=self._label_smoothing,
            from_logits=True)
        class_loss = loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1),
                                           class_loss)
        class_loss = math_ops.divide_no_nan(tf.reduce_sum(class_loss),
                                            num_objs)

        # Apply the weights to each loss.
        box_loss *= self._iou_normalizer
        class_loss *= self._cls_normalizer
        conf_loss *= self._obj_normalizer

        # Add all the losses together then take the sum over the batches.
        mean_loss = box_loss + class_loss + conf_loss
        loss = mean_loss * tf.cast(batch_size, mean_loss.dtype)

        return (loss, box_loss, conf_loss, class_loss, mean_loss, iou,
                pred_conf, ind_mask, grid_mask)
Ejemplo n.º 3
0
    def _compute_loss(self, true_counts, inds, y_true, boxes, classes, y_pred):
        """Per FPN path loss logic used for Yolov3, Yolov4, and Yolo-Tiny."""
        if self._box_type == 'scaled':
            # Darknet Model Propagates a sigmoid once in back prop so we replicate
            # that behaviour
            y_pred = grad_sigmoid(y_pred)

        # Generate and store constants and format output.
        shape = tf.shape(true_counts)
        batch_size, width, height, num = shape[0], shape[1], shape[2], shape[3]
        fwidth = tf.cast(width, tf.float32)
        fheight = tf.cast(height, tf.float32)
        grid_points, anchor_grid = self._anchor_generator(width,
                                                          height,
                                                          batch_size,
                                                          dtype=tf.float32)

        # Cast all input compontnts to float32 and stop gradient to save memory.
        boxes = tf.stop_gradient(tf.cast(boxes, tf.float32))
        classes = tf.stop_gradient(tf.cast(classes, tf.float32))
        y_true = tf.stop_gradient(tf.cast(y_true, tf.float32))
        true_counts = tf.stop_gradient(tf.cast(true_counts, tf.float32))
        true_conf = tf.stop_gradient(tf.clip_by_value(true_counts, 0.0, 1.0))
        grid_points = tf.stop_gradient(grid_points)
        anchor_grid = tf.stop_gradient(anchor_grid)

        # Split all the ground truths to use as seperate items in loss computation.
        (true_box, ind_mask, true_class) = tf.split(y_true, [4, 1, 1], axis=-1)
        true_conf = tf.squeeze(true_conf, axis=-1)
        true_class = tf.squeeze(true_class, axis=-1)
        grid_mask = true_conf

        # Splits all predictions.
        y_pred = tf.cast(
            tf.reshape(y_pred, [batch_size, width, height, num, -1]),
            tf.float32)
        pred_box, pred_conf, pred_class = tf.split(y_pred, [4, 1, -1], axis=-1)

        # Decode the boxes to be used for loss compute.
        _, _, pred_box = self._decode_boxes(fwidth,
                                            fheight,
                                            pred_box,
                                            anchor_grid,
                                            grid_points,
                                            darknet=True)

        # If the ignore threshold is enabled, search all boxes ignore all
        # IOU valeus larger than the ignore threshold that are not in the
        # noted ground truth list.
        if self._ignore_thresh != 0.0:
            (true_conf, obj_mask) = self._tiled_global_box_search(
                pred_box,
                tf.stop_gradient(tf.sigmoid(pred_class)),
                boxes,
                classes,
                true_conf,
                smoothed=self._objectness_smooth > 0)

        # Build the one hot class list that are used for class loss.
        true_class = tf.one_hot(tf.cast(true_class, tf.int32),
                                depth=tf.shape(pred_class)[-1],
                                dtype=pred_class.dtype)
        true_classes = tf.stop_gradient(
            loss_utils.apply_mask(ind_mask, true_class))

        # Reorganize the one hot class list as a grid.
        true_class = loss_utils.build_grid(inds,
                                           true_classes,
                                           pred_class,
                                           ind_mask,
                                           update=False)
        true_class = tf.stop_gradient(true_class)

        # Use the class mask to find the number of objects located in
        # each predicted grid cell/pixel.
        counts = true_class
        counts = tf.reduce_sum(counts, axis=-1, keepdims=True)
        reps = tf.gather_nd(counts, inds, batch_dims=1)
        reps = tf.squeeze(reps, axis=-1)
        reps = tf.stop_gradient(tf.where(reps == 0.0, tf.ones_like(reps),
                                         reps))

        # Compute the loss for only the cells in which the boxes are located.
        pred_box = loss_utils.apply_mask(
            ind_mask, tf.gather_nd(pred_box, inds, batch_dims=1))
        iou, _, box_loss = self.box_loss(true_box, pred_box, darknet=True)
        box_loss = loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1),
                                         box_loss)
        box_loss = math_ops.divide_no_nan(box_loss, reps)
        box_loss = tf.cast(tf.reduce_sum(box_loss, axis=1), dtype=y_pred.dtype)

        # Compute the sigmoid binary cross entropy for the class maps.
        class_loss = tf.reduce_mean(loss_utils.sigmoid_bce(
            tf.expand_dims(true_class, axis=-1),
            tf.expand_dims(pred_class, axis=-1), self._label_smoothing),
                                    axis=-1)

        # Apply normalization to the class losses.
        if self._cls_normalizer < 1.0:
            # Build a mask based on the true class locations.
            cls_norm_mask = true_class
            # Apply the classes weight to class indexes were one_hot is one.
            class_loss *= ((1 - cls_norm_mask) +
                           cls_norm_mask * self._cls_normalizer)

        # Mask to the class loss and compute the sum over all the objects.
        class_loss = tf.reduce_sum(class_loss, axis=-1)
        class_loss = loss_utils.apply_mask(grid_mask, class_loss)
        class_loss = math_ops.rm_nan_inf(class_loss, val=0.0)
        class_loss = tf.cast(tf.reduce_sum(class_loss, axis=(1, 2, 3)),
                             dtype=y_pred.dtype)

        # Compute the sigmoid binary cross entropy for the confidence maps.
        bce = tf.reduce_mean(loss_utils.sigmoid_bce(
            tf.expand_dims(true_conf, axis=-1), pred_conf, 0.0),
                             axis=-1)

        # Mask the confidence loss and take the sum across all the grid cells.
        if self._ignore_thresh != 0.0:
            bce = loss_utils.apply_mask(obj_mask, bce)
        conf_loss = tf.cast(tf.reduce_sum(bce, axis=(1, 2, 3)),
                            dtype=y_pred.dtype)

        # Apply the weights to each loss.
        box_loss *= self._iou_normalizer
        conf_loss *= self._obj_normalizer

        # Add all the losses together then take the mean over the batches.
        loss = box_loss + class_loss + conf_loss
        loss = tf.reduce_mean(loss)

        # Reduce the mean of the losses to use as a metric.
        box_loss = tf.reduce_mean(box_loss)
        conf_loss = tf.reduce_mean(conf_loss)
        class_loss = tf.reduce_mean(class_loss)

        return (loss, box_loss, conf_loss, class_loss, loss, iou, pred_conf,
                ind_mask, grid_mask)