Exemple #1
0
    def _build_per_path_attributes(self):
        """Paramterization of pair wise search and grid generators.

    Objects created here are used for box decoding and dynamic ground truth
    association.
    """
        self._anchor_generator = loss_utils.GridGenerator(
            anchors=self._anchors, scale_anchors=self._path_stride)

        if self._ignore_thresh > 0.0:
            self._search_pairs = loss_utils.PairWiseSearch(iou_type='iou',
                                                           any_match=True,
                                                           min_conf=0.25)
        return
 def get_generators(self, anchors, path_scale, path_key):
   anchor_generator = loss_utils.GridGenerator(
       anchors, scale_anchors=path_scale)
   return anchor_generator
Exemple #3
0
    def __init__(self,
                 anchors,
                 classes,
                 iou_thresh=0.0,
                 ignore_thresh=0.7,
                 truth_thresh=1.0,
                 nms_thresh=0.6,
                 max_delta=10.0,
                 loss_type='ciou',
                 iou_normalizer=1.0,
                 cls_normalizer=1.0,
                 object_normalizer=1.0,
                 use_scaled_loss=False,
                 update_on_repeat=False,
                 pre_nms_points=5000,
                 label_smoothing=0.0,
                 max_boxes=200,
                 box_type='original',
                 path_scale=None,
                 scale_xy=None,
                 nms_type='greedy',
                 objectness_smooth=False,
                 **kwargs):
        """Parameters for the loss functions used at each detection head output.

    Args:
      anchors: `List[List[int]]` for the anchor boxes that are used in the
        model.
      classes: `int` for the number of classes.
      iou_thresh: `float` to use many anchors per object if IoU(Obj, Anchor) >
        iou_thresh.
      ignore_thresh: `float` for the IOU value over which the loss is not
        propagated, and a detection is assumed to have been made.
      truth_thresh: `float` for the IOU value over which the loss is propagated
        despite a detection being made'.
      nms_thresh: `float` for the minimum IOU value for an overlap.
      max_delta: gradient clipping to apply to the box loss.
      loss_type: `str` for the typeof iou loss to use with in {ciou, diou,
        giou, iou}.
      iou_normalizer: `float` for how much to scale the loss on the IOU or the
        boxes.
      cls_normalizer: `float` for how much to scale the loss on the classes.
      object_normalizer: `float` for how much to scale loss on the detection
        map.
      use_scaled_loss: `bool` for whether to use the scaled loss
        or the traditional loss.
      update_on_repeat: `bool` indicating how you would like to handle repeated
        indexes in a given [j, i] index. Setting this to True will give more
        consistent MAP, setting it to falls will improve recall by 1-2% but will
        sacrifice some MAP.
      pre_nms_points: `int` number of top candidate detections per class before
        NMS.
      label_smoothing: `float` for how much to smooth the loss on the classes.
      max_boxes: `int` for the maximum number of boxes retained over all
        classes.
      box_type: `str`, there are 3 different box types that will affect training
        differently {original, scaled and anchor_free}. The original method
        decodes the boxes by applying an exponential to the model width and
        height maps, then scaling the maps by the anchor boxes. This method is
        used in Yolo-v4, Yolo-v3, and all its counterparts. The Scale method
        squares the width and height and scales both by a fixed factor of 4.
        This method is used in the Scale Yolo models, as well as Yolov4-CSP.
        Finally, anchor_free is like the original method but will not apply an
        activation function to the boxes, this is used for some of the newer
        anchor free versions of YOLO.
      path_scale: `dict` for the size of the input tensors. Defaults to
        precalulated values from the `mask`.
      scale_xy: dictionary `float` values inidcating how far each pixel can see
        outside of its containment of 1.0. a value of 1.2 indicates there is a
        20% extended radius around each pixel that this specific pixel can
        predict values for a center at. the center can range from 0 - value/2
        to 1 + value/2, this value is set in the yolo filter, and resused here.
        there should be one value for scale_xy for each level from min_level to
        max_level.
      nms_type: `str` for which non max suppression to use.
      objectness_smooth: `float` for how much to smooth the loss on the
        detection map.
      **kwargs: Addtional keyword arguments.
    """
        super().__init__(**kwargs)
        self._anchors = anchors
        self._thresh = iou_thresh
        self._ignore_thresh = ignore_thresh
        self._truth_thresh = truth_thresh
        self._iou_normalizer = iou_normalizer
        self._cls_normalizer = cls_normalizer
        self._object_normalizer = object_normalizer
        self._objectness_smooth = objectness_smooth
        self._nms_thresh = nms_thresh
        self._max_boxes = max_boxes
        self._max_delta = max_delta
        self._classes = classes
        self._loss_type = loss_type

        self._use_scaled_loss = use_scaled_loss
        self._update_on_repeat = update_on_repeat

        self._pre_nms_points = pre_nms_points
        self._label_smoothing = label_smoothing

        self._keys = list(anchors.keys())
        self._len_keys = len(self._keys)
        self._box_type = box_type
        self._path_scale = path_scale or {
            key: 2**int(key)
            for key in self._keys
        }

        self._nms_type = nms_type
        self._scale_xy = scale_xy or {key: 1.0 for key, _ in anchors.items()}

        self._generator = {}
        self._len_mask = {}
        for key in self._keys:
            anchors = self._anchors[key]
            self._generator[key] = loss_utils.GridGenerator(
                anchors, scale_anchors=self._path_scale[key])
            self._len_mask[key] = len(anchors)
        return
Exemple #4
0
    def _get_anchor_free(self, key, boxes, classes, height, width, stride,
                         center_radius):
        """Find the box assignements in an anchor free paradigm."""
        level_limits = self.anchor_free_level_limits[key]
        gen = loss_utils.GridGenerator(anchors=[[1, 1]], scale_anchors=stride)
        grid_points = gen(width, height, 1, boxes.dtype)[0]
        grid_points = tf.squeeze(grid_points, axis=0)
        box_list = boxes
        class_list = classes

        grid_points = (grid_points + 0.5) * stride
        x_centers, y_centers = grid_points[..., 0], grid_points[..., 1]
        boxes *= (tf.convert_to_tensor([width, height, width, height]) *
                  stride)

        tlbr_boxes = box_ops.xcycwh_to_yxyx(boxes)

        boxes = tf.reshape(boxes, [1, 1, -1, 4])
        tlbr_boxes = tf.reshape(tlbr_boxes, [1, 1, -1, 4])
        if self.use_tie_breaker:
            area = tf.reduce_prod(boxes[..., 2:], axis=-1)

        # check if the box is in the receptive feild of the this fpn level
        b_t = y_centers - tlbr_boxes[..., 0]
        b_l = x_centers - tlbr_boxes[..., 1]
        b_b = tlbr_boxes[..., 2] - y_centers
        b_r = tlbr_boxes[..., 3] - x_centers
        box_delta = tf.stack([b_t, b_l, b_b, b_r], axis=-1)
        if level_limits is not None:
            max_reg_targets_per_im = tf.reduce_max(box_delta, axis=-1)
            gt_min = max_reg_targets_per_im >= level_limits[0]
            gt_max = max_reg_targets_per_im <= level_limits[1]
            is_in_boxes = tf.logical_and(gt_min, gt_max)
        else:
            is_in_boxes = tf.reduce_min(box_delta, axis=-1) > 0.0
        is_in_boxes_all = tf.reduce_any(is_in_boxes,
                                        axis=(0, 1),
                                        keepdims=True)

        # check if the center is in the receptive feild of the this fpn level
        c_t = y_centers - (boxes[..., 1] - center_radius * stride)
        c_l = x_centers - (boxes[..., 0] - center_radius * stride)
        c_b = (boxes[..., 1] + center_radius * stride) - y_centers
        c_r = (boxes[..., 0] + center_radius * stride) - x_centers
        centers_delta = tf.stack([c_t, c_l, c_b, c_r], axis=-1)
        is_in_centers = tf.reduce_min(centers_delta, axis=-1) > 0.0
        is_in_centers_all = tf.reduce_any(is_in_centers,
                                          axis=(0, 1),
                                          keepdims=True)

        # colate all masks to get the final locations
        is_in_index = tf.logical_or(is_in_boxes_all, is_in_centers_all)
        is_in_boxes_and_center = tf.logical_and(is_in_boxes, is_in_centers)
        is_in_boxes_and_center = tf.logical_and(is_in_index,
                                                is_in_boxes_and_center)

        if self.use_tie_breaker:
            boxes_all = tf.cast(is_in_boxes_and_center, area.dtype)
            boxes_all = ((boxes_all * area) + ((1 - boxes_all) * INF))
            boxes_min = tf.reduce_min(boxes_all, axis=-1, keepdims=True)
            boxes_min = tf.where(boxes_min == INF, -1.0, boxes_min)
            is_in_boxes_and_center = boxes_all == boxes_min

        # construct the index update grid
        reps = tf.reduce_sum(tf.cast(is_in_boxes_and_center, tf.int16),
                             axis=-1)
        indexes = tf.cast(tf.where(is_in_boxes_and_center), tf.int32)
        y, x, t = tf.split(indexes, 3, axis=-1)

        boxes = tf.gather_nd(box_list, t)
        classes = tf.cast(tf.gather_nd(class_list, t), boxes.dtype)
        reps = tf.gather_nd(reps, tf.concat([y, x], axis=-1))
        reps = tf.cast(tf.expand_dims(reps, axis=-1), boxes.dtype)
        classes = tf.cast(tf.expand_dims(classes, axis=-1), boxes.dtype)
        conf = tf.ones_like(classes)

        # return the samples and the indexes
        samples = tf.concat([boxes, conf, classes], axis=-1)
        indexes = tf.concat([y, x, tf.zeros_like(t)], axis=-1)
        return indexes, samples