예제 #1
0
 def test_box_conversions(self, num_boxes):
     boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4))
     expected_shape = np.array([num_boxes, 4])
     xywh_box = box_ops.yxyx_to_xcycwh(boxes)
     yxyx_box = box_ops.xcycwh_to_yxyx(boxes)
     self.assertAllEqual(tf.shape(xywh_box).numpy(), expected_shape)
     self.assertAllEqual(tf.shape(yxyx_box).numpy(), expected_shape)
예제 #2
0
    def __call__(self,
                 pred_boxes,
                 pred_classes,
                 boxes,
                 classes,
                 scale=None,
                 yxyx=True,
                 clip_thresh=0.0):
        num_boxes = tf.shape(boxes)[-2]
        num_tiles = (num_boxes // TILE_SIZE) - 1

        if yxyx:
            boxes = box_ops.yxyx_to_xcycwh(boxes)

        if scale is not None:
            boxes = boxes * tf.stop_gradient(scale)

        if self._min_conf > 0.0:
            pred_classes = tf.cast(pred_classes > self._min_conf,
                                   pred_classes.dtype)

        def _loop_cond(unused_pred_box, unused_pred_class, boxes,
                       unused_classes, unused_running_boxes,
                       unused_running_classes, unused_max_iou, idx):

            # check that the slice has boxes that all zeros
            batch_size = tf.shape(boxes)[0]
            box_slice = tf.slice(boxes, [0, idx * TILE_SIZE, 0],
                                 [batch_size, TILE_SIZE, 4])

            return tf.logical_and(idx < num_tiles,
                                  tf.math.greater(tf.reduce_sum(box_slice), 0))

        running_boxes = tf.zeros_like(pred_boxes)
        running_classes = tf.zeros_like(tf.reduce_sum(running_boxes, axis=-1))
        max_iou = tf.zeros_like(tf.reduce_sum(running_boxes, axis=-1))
        max_iou = tf.expand_dims(max_iou, axis=-1)

        (pred_boxes, pred_classes, boxes, classes, running_boxes,
         running_classes, max_iou,
         _) = tf.while_loop(_loop_cond, self._search_body, [
             pred_boxes, pred_classes, boxes, classes, running_boxes,
             running_classes, max_iou,
             tf.constant(0)
         ])

        mask = tf.cast(max_iou > clip_thresh, running_boxes.dtype)
        running_boxes *= mask
        running_classes *= tf.squeeze(mask, axis=-1)
        max_iou *= mask
        max_iou = tf.squeeze(max_iou, axis=-1)
        mask = tf.squeeze(mask, axis=-1)

        return (tf.stop_gradient(running_boxes),
                tf.stop_gradient(running_classes), tf.stop_gradient(max_iou),
                tf.stop_gradient(mask))
예제 #3
0
    def _parse_eval_data(self, data):
        """Generates images and labels that are usable for model training.

    Args:
      data: a dict of Tensors produced by the decoder.
    Returns:
      images: the image tensor.
      labels: a dict of Tensors that contains labels.
    """

        shape = tf.shape(data['image'])
        image = data['image'] / 255
        boxes = data['groundtruth_boxes']
        width = shape[0]
        height = shape[1]

        image, boxes = yolo_preprocess_ops.fit_preserve_aspect_ratio(
            image, boxes, width=width, height=height, target_dim=self._image_w)
        boxes = yolo_box_ops.yxyx_to_xcycwh(boxes)

        # Find the best anchor for the ground truth labels to maximize the iou
        best_anchors = yolo_preprocess_ops.get_best_anchor(
            boxes, self._anchors, width=self._image_w, height=self._image_h)
        boxes = yolo_preprocess_ops.pad_max_instances(boxes,
                                                      self._max_num_instances,
                                                      0)
        classes = yolo_preprocess_ops.pad_max_instances(
            data['groundtruth_classes'], self._max_num_instances, 0)
        best_anchors = yolo_preprocess_ops.pad_max_instances(
            best_anchors, self._max_num_instances, 0)
        area = yolo_preprocess_ops.pad_max_instances(data['groundtruth_area'],
                                                     self._max_num_instances,
                                                     0)
        is_crowd = yolo_preprocess_ops.pad_max_instances(
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
            self._max_num_instances, 0)

        labels = {
            'source_id': data['source_id'],
            'bbox': tf.cast(boxes, self._dtype),
            'classes': tf.cast(classes, self._dtype),
            'area': tf.cast(area, self._dtype),
            'is_crowd': is_crowd,
            'best_anchors': tf.cast(best_anchors, self._dtype),
            'width': width,
            'height': height,
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
        }

        grid = self._build_grid(labels,
                                self._image_w,
                                batch=False,
                                use_tie_breaker=self._use_tie_breaker)
        labels.update({'grid_form': grid})
        return image, labels
예제 #4
0
def fit_preserve_aspect_ratio(image,
                              boxes,
                              width=None,
                              height=None,
                              target_dim=None):
    """Resizes the image while peserving the image aspect ratio.

  Args:
      image: a `Tensor` representing the image.
      boxes: a `Tensor` representing the boxes.
      width: int for the image width.
      height: int for the image height.
      target_dim: list or a Tensor of height and width.
  Returns:
      image: a `Tensor` representing the image.
      box: a `Tensor` representing the boxes.
  """
    if width is None or height is None:
        shape = tf.shape(image)
        if tf.shape(shape)[0] == 4:
            width = shape[1]
            height = shape[2]
        else:
            width = shape[0]
            height = shape[1]

    clipper = tf.math.maximum(width, height)
    if target_dim is None:
        target_dim = clipper

    pad_width = clipper - width
    pad_height = clipper - height
    image = tf.image.pad_to_bounding_box(image, pad_width // 2,
                                         pad_height // 2, clipper, clipper)

    boxes = box_ops.yxyx_to_xcycwh(boxes)
    x, y, w, h = tf.split(boxes, 4, axis=-1)

    y *= tf.cast(width / clipper, tf.float32)
    x *= tf.cast(height / clipper, tf.float32)

    y += tf.cast((pad_width / clipper) / 2, tf.float32)
    x += tf.cast((pad_height / clipper) / 2, tf.float32)

    h *= tf.cast(width / clipper, tf.float32)
    w *= tf.cast(height / clipper, tf.float32)

    boxes = tf.concat([x, y, w, h], axis=-1)

    boxes = box_ops.xcycwh_to_yxyx(boxes)
    image = tf.image.resize(image, (target_dim, target_dim))
    return image, boxes
예제 #5
0
파일: anchor.py 프로젝트: xiangww00/models
    def __call__(self, boxes, classes, width, height):
        """Builds the labels for a single image, not functional in batch mode.

    Args:
      boxes: `Tensor` of shape [None, 4] indicating the object locations in an
        image.
      classes: `Tensor` of shape [None] indicating the each objects classes.
      width: `int` for the images width.
      height: `int` for the images height.

    Returns:
      centers: `Tensor` of shape [None, 3] of indexes in the final grid where
        boxes are located.
      updates: `Tensor` of shape [None, 8] the value to place in the final grid.
      full: `Tensor` of [width/stride, height/stride, num_anchors, 1] holding
        a mask of where boxes are locates for confidence losses.
    """
        indexes = {}
        updates = {}
        true_grids = {}
        iou_index = None

        boxes = box_ops.yxyx_to_xcycwh(boxes)
        if not self.best_matches_only and self.anchor_free_level_limits is None:
            # stitch and search boxes across fpn levels
            anchorsvec = []
            for stitch in self.anchors:
                anchorsvec.extend(self.anchors[stitch])

            stride = tf.cast([width, height], boxes.dtype)
            # get the best anchor for each box
            iou_index, _ = get_best_anchor(
                boxes,
                anchorsvec,
                stride,
                width=1.0,
                height=1.0,
                best_match_only=False,
                use_tie_breaker=self.use_tie_breaker,
                iou_thresh=self.match_threshold)

        for key in self.keys:
            indexes[key], updates[key], true_grids[
                key] = self.build_label_per_path(key,
                                                 boxes,
                                                 classes,
                                                 width,
                                                 height,
                                                 iou_index=iou_index)
        return indexes, updates, true_grids
예제 #6
0
    def _tiled_global_box_search(self,
                                 pred_boxes,
                                 pred_classes,
                                 boxes,
                                 classes,
                                 true_conf,
                                 smoothed,
                                 scale=None):
        """Search of all groundtruths to associate groundtruths to predictions."""

        boxes = box_ops.yxyx_to_xcycwh(boxes)

        if scale is not None:
            boxes = boxes * tf.cast(tf.stop_gradient(scale), boxes.dtype)

        # Search all predictions against ground truths to find mathcing boxes for
        # each pixel.
        _, _, iou_max, _ = self._search_pairs(pred_boxes, pred_classes, boxes,
                                              classes)

        if iou_max is None:
            return true_conf, tf.ones_like(true_conf)

        # Find the exact indexes to ignore and keep.
        ignore_mask = tf.cast(iou_max < self._ignore_thresh, pred_boxes.dtype)
        iou_mask = iou_max > self._ignore_thresh

        if not smoothed:
            # Ignore all pixels where a box was not supposed to be predicted but a
            # high confidence box was predicted.
            obj_mask = true_conf + (1 - true_conf) * ignore_mask
        else:
            # Replace pixels in the tre confidence map with the max iou predicted
            # with in that cell.
            obj_mask = tf.ones_like(true_conf)
            iou_ = (1 - self._objectness_smooth
                    ) + self._objectness_smooth * iou_max
            iou_ = tf.where(iou_max > 0, iou_, tf.zeros_like(iou_))
            true_conf = tf.where(iou_mask, iou_, true_conf)

        # Stop gradient so while loop is not tracked.
        obj_mask = tf.stop_gradient(obj_mask)
        true_conf = tf.stop_gradient(true_conf)
        return true_conf, obj_mask
예제 #7
0
    def _parse_eval_data(self, data):
        """Parses data for evaluation.
    !!! All augmentations and transformations are on bboxes with format
      (ymin, xmin, ymax, xmax). Required to do the appropriate transformations.
    !!! Images are supposed to be in RGB format
    """
        image, boxes = data['image'], data['boxes']

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_size[:2],
            self._input_size[:2],
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])

        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)
        image = tf.cast(image, dtype=self._dtype)

        boxes = tf.clip_by_value(boxes, 0, self._input_size[0] - 1)
        bbox_labels = yolo_box_ops.yxyx_to_xcycwh(boxes)
        bbox_labels = tf.concat([bbox_labels, data['classes'][:, tf.newaxis]],
                                axis=-1)

        labels, bbox_labels = yolo_ops.preprocess_true_boxes(
            bboxes=bbox_labels,
            train_output_sizes=self.train_output_sizes,
            anchor_per_scale=self.anchor_per_scale,
            num_classes=self.num_classes,
            max_bbox_per_scale=self.max_bbox_per_scale,
            strides=self.strides,
            anchors=self.anchors)

        targets = {'labels': labels, 'bboxes': bbox_labels}

        return image, targets
예제 #8
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation.
    !!! All augmentations and transformations are on bboxes with format
      (ymin, xmin, ymax, xmax). Required to do the appropriate transformations.
    !!! Images are supposed to be in RGB format
    """
        image, boxes = data['image'], data['boxes']

        # Execute RandAugment first as some ops require uint8 colors
        if self._augmenter is not None:
            image = self._augmenter.distort(image)

        if self._aug_rand_hflip:
            image, boxes = yolo_ops.random_horizontal_flip(image, boxes)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_size[:2],
            self._input_size[:2],
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])

        if self._aug_jitter_im != 0.0:
            image, boxes = yolo_ops.random_translate(image, boxes,
                                                     self._aug_jitter_im)

        if self._aug_jitter_boxes != 0.0:
            boxes = box_ops.jitter_boxes(boxes, self._aug_jitter_boxes)

        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)
        image = tf.cast(image, dtype=self._dtype)

        boxes = tf.clip_by_value(boxes, 0, self._input_size[0] - 1)
        bbox_labels = yolo_box_ops.yxyx_to_xcycwh(boxes)
        bbox_labels = tf.concat([bbox_labels, data['classes'][:, tf.newaxis]],
                                axis=-1)

        labels, bbox_labels = yolo_ops.preprocess_true_boxes(
            bboxes=bbox_labels,
            train_output_sizes=self.train_output_sizes,
            anchor_per_scale=self.anchor_per_scale,
            num_classes=self.num_classes,
            max_bbox_per_scale=self.max_bbox_per_scale,
            strides=self.strides,
            anchors=self.anchors)

        # TODO: Figure out why we need to fix the num BBOX if not there will be an error
        # https://github.com/whizzmobility/models/pull/61
        # pad / limit to MAX_DISPLAY_BBOX boxes for constant size
        raw_bboxes = boxes
        num_bboxes = tf.shape(raw_bboxes)[0]
        if num_bboxes > MAX_DISPLAY_BBOX:
            raw_bboxes = raw_bboxes[:, :MAX_DISPLAY_BBOX]
        else:
            paddings = tf.stack([0, MAX_DISPLAY_BBOX - num_bboxes], axis=-1)
            paddings = tf.stack([paddings, [0, 0]], axis=0)
            raw_bboxes = tf.pad(raw_bboxes, paddings)

        targets = {
            'labels': labels,
            'bboxes': bbox_labels,
            'raw_bboxes': raw_bboxes
        }

        return image, targets
예제 #9
0
    def _parse_train_data(self, data):
        """Generates images and labels that are usable for model training.

    Args:
      data: a dict of Tensors produced by the decoder.
    Returns:
      images: the image tensor.
      labels: a dict of Tensors that contains labels.
    """

        shape = tf.shape(data['image'])
        image = data['image'] / 255
        boxes = data['groundtruth_boxes']
        width = shape[0]
        height = shape[1]

        image, boxes = yolo_preprocess_ops.fit_preserve_aspect_ratio(
            image,
            boxes,
            width=width,
            height=height,
            target_dim=self._max_process_size)

        image_shape = tf.shape(image)[:2]

        if self._random_flip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes, seed=self._seed)

        randscale = self._image_w // self._net_down_scale

        if not self._fixed_size:
            do_scale = tf.greater(
                tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
                0.5)
            if do_scale:
                # This scales the image to a random multiple of net_down_scale
                # between 320 to 608
                randscale = tf.random.uniform(
                    [],
                    minval=self._min_process_size // self._net_down_scale,
                    maxval=self._max_process_size // self._net_down_scale,
                    seed=self._seed,
                    dtype=tf.int32) * self._net_down_scale

        if self._jitter_boxes != 0.0:
            boxes = box_ops.denormalize_boxes(boxes, image_shape)
            boxes = box_ops.jitter_boxes(boxes, 0.025)
            boxes = box_ops.normalize_boxes(boxes, image_shape)

        # YOLO loss function uses x-center, y-center format
        boxes = yolo_box_ops.yxyx_to_xcycwh(boxes)

        if self._jitter_im != 0.0:
            image, boxes = yolo_preprocess_ops.random_translate(
                image, boxes, self._jitter_im, seed=self._seed)

        if self._aug_rand_zoom:
            image, boxes = yolo_preprocess_ops.resize_crop_filter(
                image,
                boxes,
                default_width=self._image_w,
                default_height=self._image_h,
                target_width=randscale,
                target_height=randscale)
        image = tf.image.resize(image, (416, 416), preserve_aspect_ratio=False)

        if self._aug_rand_brightness:
            image = tf.image.random_brightness(image=image,
                                               max_delta=.1)  # Brightness
        if self._aug_rand_saturation:
            image = tf.image.random_saturation(image=image,
                                               lower=0.75,
                                               upper=1.25)  # Saturation
        if self._aug_rand_hue:
            image = tf.image.random_hue(image=image, max_delta=.3)  # Hue
        image = tf.clip_by_value(image, 0.0, 1.0)
        # Find the best anchor for the ground truth labels to maximize the iou
        best_anchors = yolo_preprocess_ops.get_best_anchor(
            boxes, self._anchors, width=self._image_w, height=self._image_h)

        # Padding
        boxes = preprocess_ops.clip_or_pad_to_fixed_size(
            boxes, self._max_num_instances, 0)
        classes = preprocess_ops.clip_or_pad_to_fixed_size(
            data['groundtruth_classes'], self._max_num_instances, -1)
        best_anchors = preprocess_ops.clip_or_pad_to_fixed_size(
            best_anchors, self._max_num_instances, 0)
        area = preprocess_ops.clip_or_pad_to_fixed_size(
            data['groundtruth_area'], self._max_num_instances, 0)
        is_crowd = preprocess_ops.clip_or_pad_to_fixed_size(
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
            self._max_num_instances, 0)

        labels = {
            'source_id': data['source_id'],
            'bbox': tf.cast(boxes, self._dtype),
            'classes': tf.cast(classes, self._dtype),
            'area': tf.cast(area, self._dtype),
            'is_crowd': is_crowd,
            'best_anchors': tf.cast(best_anchors, self._dtype),
            'width': width,
            'height': height,
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
        }

        if self._fixed_size:
            grid = self._build_grid(labels,
                                    self._image_w,
                                    use_tie_breaker=self._use_tie_breaker)
            labels.update({'grid_form': grid})

        return image, labels
예제 #10
0
    def testYoloPreprocessTrueBoxes(self):
        bboxes = tf.constant([[40, 79, 109, 144, 74], [174, 242, 187, 269, 24],
                              [341, 265, 357, 291,
                               26], [261, 220, 300, 362, 0],
                              [217, 228, 252, 338, 0], [202, 228, 219, 274, 0],
                              [135, 232, 153, 278, 0], [94, 229, 124, 306, 0],
                              [44, 232, 74, 321, 0], [191, 238, 196, 255, 24],
                              [117, 86, 122, 157, 74], [180, 224, 193, 285, 0],
                              [375, 226, 415, 326, 0], [245, 222, 274, 317, 0],
                              [317, 228, 352, 334, 0], [369, 226, 389, 263, 0],
                              [135, 225, 180, 355, 0], [171, 229, 185, 311, 0],
                              [0, 216, 415, 363, 0]])  # x1, y1, x2, y2, class

        classes = bboxes[:, 4]
        bboxes = tf.stack(
            [bboxes[:, 1], bboxes[:, 0], bboxes[:, 3], bboxes[:, 2]],
            axis=-1)  #yxyx
        bboxes = box_ops.yxyx_to_xcycwh(tf.cast(bboxes, tf.float32))
        inputs = tf.concat(
            [bboxes, tf.cast(classes[:, tf.newaxis], tf.float32)], axis=-1)

        train_output_sizes = tf.constant([52, 26, 13])
        anchor_per_scale = 3
        num_classes = 80
        max_bbox_per_scale = 150
        strides = tf.constant([8, 16, 32])
        anchors = tf.constant([[[12, 16], [19, 36], [40, 28]],
                               [[36, 75], [76, 55], [72, 146]],
                               [[142, 110], [192, 243], [459, 401]]])

        result = yolo_ops.preprocess_true_boxes(
            bboxes=inputs,
            train_output_sizes=train_output_sizes,
            anchor_per_scale=anchor_per_scale,
            num_classes=num_classes,
            max_bbox_per_scale=max_bbox_per_scale,
            strides=strides,
            anchors=anchors)  # only takes xywh

        target_labels, target_bboxes = result

        groundtruth_label_small_bbox = np.array([
            74.5, 111.5, 69., 65., 1., 1., 119.5, 121.5, 5., 71., 1., 1.,
            193.5, 246.5, 5., 17., 1., 1., 379., 244.5, 20., 37., 1., 1., 144.,
            255., 18., 46., 1., 1., 180.5, 255.5, 13., 27., 1., 1., 186.5,
            254.5, 13., 61., 1., 1., 210.5, 251., 17., 46., 1., 1., 109.,
            267.5, 30., 77., 1., 1., 178., 270., 14., 82., 1., 1., 259.5,
            269.5, 29., 95., 1., 1., 59., 276.5, 30., 89., 1., 1., 349., 278.,
            16., 26., 1., 1., 395., 276., 40., 100., 1., 1., 234.5, 283., 35.,
            110., 1., 1., 334.5, 281., 35., 106., 1., 1., 157.5, 290., 45.,
            130., 1., 1., 207.5, 289.5, 415., 147., 1., 1., 280.5, 291., 39.,
            142., 1., 1.
        ])
        groundtruth_small_bbox = np.array([
            74.5, 111.5, 69., 65., 180.5, 255.5, 13., 27., 349., 278., 16.,
            26., 280.5, 291., 39., 142., 234.5, 283., 35., 110., 210.5, 251.,
            17., 46., 144., 255., 18., 46., 109., 267.5, 30., 77., 59., 276.5,
            30., 89., 193.5, 246.5, 5., 17., 119.5, 121.5, 5., 71., 186.5,
            254.5, 13., 61., 395., 276., 40., 100., 259.5, 269.5, 29., 95.,
            334.5, 281., 35., 106., 379., 244.5, 20., 37., 157.5, 290., 45.,
            130., 178., 270., 14., 82., 207.5, 289.5, 415., 147.
        ])

        self.assertAllClose(
            tf.boolean_mask(target_labels[0],
                            tf.greater(target_labels[0], 0.5)),
            groundtruth_label_small_bbox)
        self.assertAllClose(
            tf.boolean_mask(target_bboxes[0],
                            tf.greater(target_bboxes[0], 0.5)),
            groundtruth_small_bbox)
        self.assertAllEqual(target_labels[0].shape, np.array([52, 52, 3, 85]))
        self.assertAllEqual(target_bboxes[0].shape, np.array([150, 4]))
        self.assertAllEqual(target_labels[1], tf.zeros([26, 26, 3, 85]))
        self.assertAllEqual(target_bboxes[1], tf.zeros([150, 4]))
        self.assertAllEqual(target_labels[2], tf.zeros([13, 13, 3, 85]))
        self.assertAllEqual(target_bboxes[2], tf.zeros([150, 4]))