コード例 #1
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation.
    !!! All augmentations and transformations are on bboxes with format
      (ymin, xmin, ymax, xmax). Required to do the appropriate transformations.
    !!! Images are supposed to be in RGB format
    """
        image, boxes = data['image'], data['boxes']

        # Execute RandAugment first as some ops require uint8 colors
        if self._augmenter is not None:
            image = self._augmenter.distort(image)

        if self._aug_rand_hflip:
            image, boxes = yolo_ops.random_horizontal_flip(image, boxes)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_size[:2],
            self._input_size[:2],
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])

        if self._aug_jitter_im != 0.0:
            image, boxes = yolo_ops.random_translate(image, boxes,
                                                     self._aug_jitter_im)

        if self._aug_jitter_boxes != 0.0:
            boxes = box_ops.jitter_boxes(boxes, self._aug_jitter_boxes)

        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)
        image = tf.cast(image, dtype=self._dtype)

        boxes = tf.clip_by_value(boxes, 0, self._input_size[0] - 1)
        bbox_labels = yolo_box_ops.yxyx_to_xcycwh(boxes)
        bbox_labels = tf.concat([bbox_labels, data['classes'][:, tf.newaxis]],
                                axis=-1)

        labels, bbox_labels = yolo_ops.preprocess_true_boxes(
            bboxes=bbox_labels,
            train_output_sizes=self.train_output_sizes,
            anchor_per_scale=self.anchor_per_scale,
            num_classes=self.num_classes,
            max_bbox_per_scale=self.max_bbox_per_scale,
            strides=self.strides,
            anchors=self.anchors)

        # TODO: Figure out why we need to fix the num BBOX if not there will be an error
        # https://github.com/whizzmobility/models/pull/61
        # pad / limit to MAX_DISPLAY_BBOX boxes for constant size
        raw_bboxes = boxes
        num_bboxes = tf.shape(raw_bboxes)[0]
        if num_bboxes > MAX_DISPLAY_BBOX:
            raw_bboxes = raw_bboxes[:, :MAX_DISPLAY_BBOX]
        else:
            paddings = tf.stack([0, MAX_DISPLAY_BBOX - num_bboxes], axis=-1)
            paddings = tf.stack([paddings, [0, 0]], axis=0)
            raw_bboxes = tf.pad(raw_bboxes, paddings)

        targets = {
            'labels': labels,
            'bboxes': bbox_labels,
            'raw_bboxes': raw_bboxes
        }

        return image, targets
コード例 #2
0
 def jitter_fn(input_boxes, arg_noise_scale):
   return box_ops.jitter_boxes(input_boxes, arg_noise_scale)
コード例 #3
0
    def _parse_train_data(self, data):
        """Generates images and labels that are usable for model training.

    Args:
      data: a dict of Tensors produced by the decoder.
    Returns:
      images: the image tensor.
      labels: a dict of Tensors that contains labels.
    """

        shape = tf.shape(data['image'])
        image = data['image'] / 255
        boxes = data['groundtruth_boxes']
        width = shape[0]
        height = shape[1]

        image, boxes = yolo_preprocess_ops.fit_preserve_aspect_ratio(
            image,
            boxes,
            width=width,
            height=height,
            target_dim=self._max_process_size)

        image_shape = tf.shape(image)[:2]

        if self._random_flip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes, seed=self._seed)

        randscale = self._image_w // self._net_down_scale

        if not self._fixed_size:
            do_scale = tf.greater(
                tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
                0.5)
            if do_scale:
                # This scales the image to a random multiple of net_down_scale
                # between 320 to 608
                randscale = tf.random.uniform(
                    [],
                    minval=self._min_process_size // self._net_down_scale,
                    maxval=self._max_process_size // self._net_down_scale,
                    seed=self._seed,
                    dtype=tf.int32) * self._net_down_scale

        if self._jitter_boxes != 0.0:
            boxes = box_ops.denormalize_boxes(boxes, image_shape)
            boxes = box_ops.jitter_boxes(boxes, 0.025)
            boxes = box_ops.normalize_boxes(boxes, image_shape)

        # YOLO loss function uses x-center, y-center format
        boxes = yolo_box_ops.yxyx_to_xcycwh(boxes)

        if self._jitter_im != 0.0:
            image, boxes = yolo_preprocess_ops.random_translate(
                image, boxes, self._jitter_im, seed=self._seed)

        if self._aug_rand_zoom:
            image, boxes = yolo_preprocess_ops.resize_crop_filter(
                image,
                boxes,
                default_width=self._image_w,
                default_height=self._image_h,
                target_width=randscale,
                target_height=randscale)
        image = tf.image.resize(image, (416, 416), preserve_aspect_ratio=False)

        if self._aug_rand_brightness:
            image = tf.image.random_brightness(image=image,
                                               max_delta=.1)  # Brightness
        if self._aug_rand_saturation:
            image = tf.image.random_saturation(image=image,
                                               lower=0.75,
                                               upper=1.25)  # Saturation
        if self._aug_rand_hue:
            image = tf.image.random_hue(image=image, max_delta=.3)  # Hue
        image = tf.clip_by_value(image, 0.0, 1.0)
        # Find the best anchor for the ground truth labels to maximize the iou
        best_anchors = yolo_preprocess_ops.get_best_anchor(
            boxes, self._anchors, width=self._image_w, height=self._image_h)

        # Padding
        boxes = preprocess_ops.clip_or_pad_to_fixed_size(
            boxes, self._max_num_instances, 0)
        classes = preprocess_ops.clip_or_pad_to_fixed_size(
            data['groundtruth_classes'], self._max_num_instances, -1)
        best_anchors = preprocess_ops.clip_or_pad_to_fixed_size(
            best_anchors, self._max_num_instances, 0)
        area = preprocess_ops.clip_or_pad_to_fixed_size(
            data['groundtruth_area'], self._max_num_instances, 0)
        is_crowd = preprocess_ops.clip_or_pad_to_fixed_size(
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
            self._max_num_instances, 0)

        labels = {
            'source_id': data['source_id'],
            'bbox': tf.cast(boxes, self._dtype),
            'classes': tf.cast(classes, self._dtype),
            'area': tf.cast(area, self._dtype),
            'is_crowd': is_crowd,
            'best_anchors': tf.cast(best_anchors, self._dtype),
            'width': width,
            'height': height,
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
        }

        if self._fixed_size:
            grid = self._build_grid(labels,
                                    self._image_w,
                                    use_tie_breaker=self._use_tie_breaker)
            labels.update({'grid_form': grid})

        return image, labels
コード例 #4
0
  def _parse_train_data(self, data):
    """Generates images and labels that are usable for model training.
        Args:
          data: a dict of Tensors produced by the decoder.
        Returns:
          images: the image tensor.
          labels: a dict of Tensors that contains labels.
        """

    image = data['image'] / 255

    # / 255
    boxes = data['groundtruth_boxes']
    classes = data['groundtruth_classes']

    do_blur = tf.random.uniform([],
                                minval=0,
                                maxval=1,
                                seed=self._seed,
                                dtype=tf.float32)
    if do_blur > 0.9:
      image = tfa.image.gaussian_filter2d(image, filter_shape=7, sigma=15)
    elif do_blur > 0.7:
      image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=6)
    elif do_blur > 0.4:
      image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=3)

    image = tf.image.rgb_to_hsv(image)
    i_h, i_s, i_v = tf.split(image, 3, axis=-1)
    if self._aug_rand_hue:
      delta = preprocessing_ops.rand_uniform_strong(
          -0.1, 0.1
      )  # tf.random.uniform([], minval= -0.1,maxval=0.1, seed=self._seed, dtype=tf.float32)
      i_h = i_h + delta  # Hue
      i_h = tf.clip_by_value(i_h, 0.0, 1.0)
    if self._aug_rand_saturation:
      delta = preprocessing_ops.rand_scale(
          0.75
      )  # tf.random.uniform([], minval= 0.5,maxval=1.1, seed=self._seed, dtype=tf.float32)
      i_s = i_s * delta
    if self._aug_rand_brightness:
      delta = preprocessing_ops.rand_scale(
          0.75
      )  # tf.random.uniform([], minval= -0.15,maxval=0.15, seed=self._seed, dtype=tf.float32)
      i_v = i_v * delta
    image = tf.concat([i_h, i_s, i_v], axis=-1)
    image = tf.image.hsv_to_rgb(image)

    stddev = tf.random.uniform([],
                               minval=0,
                               maxval=40 / 255,
                               seed=self._seed,
                               dtype=tf.float32)
    noise = tf.random.normal(
        shape=tf.shape(image), mean=0.0, stddev=stddev, seed=self._seed)
    noise = tf.math.minimum(noise, 0.5)
    noise = tf.math.maximum(noise, 0)
    image += noise
    image = tf.clip_by_value(image, 0.0, 1.0)

    image_shape = tf.shape(image)[:2]

    if self._random_flip:
      image, boxes, _ = preprocess_ops.random_horizontal_flip(
          image, boxes, seed=self._seed)

    if self._jitter_boxes != 0.0:
      boxes = box_ops.denormalize_boxes(boxes, image_shape)
      boxes = box_ops.jitter_boxes(boxes, 0.025)
      boxes = box_ops.normalize_boxes(boxes, image_shape)

    if self._jitter_im != 0.0:
      image, boxes, classes = preprocessing_ops.random_jitter(
          image, boxes, classes, self._jitter_im, seed=self._seed)
      # image, boxes, classes = preprocessing_ops.random_translate(image, boxes, classes, 0.2, seed=self._seed)

    if self._aug_rand_zoom:
      image, boxes, classes = preprocessing_ops.random_zoom_crop(
          image, boxes, classes, self._jitter_im)

    shape = tf.shape(image)
    width = shape[1]
    height = shape[0]
    randscale = self._image_w // self._net_down_scale

    if self._fixed_size:
      do_scale = tf.greater(
          tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
          1 - self._pct_rand)
      if do_scale:
        randscale = tf.random.uniform([],
                                      minval=10,
                                      maxval=15,
                                      seed=self._seed,
                                      dtype=tf.int32)

    if self._letter_box:
      image, boxes = preprocessing_ops.fit_preserve_aspect_ratio(
          image,
          boxes,
          width=width,
          height=height,
          target_dim=randscale * self._net_down_scale)
      width = randscale * self._net_down_scale
      height = randscale * self._net_down_scale

    shape = tf.shape(image)
    width = shape[1]
    height = shape[0]
    image, boxes, classes = preprocessing_ops.resize_crop_filter(
        image,
        boxes,
        classes,
        default_width=width,  # randscale * self._net_down_scale,
        default_height=height,  # randscale * self._net_down_scale,
        target_width=self._image_w,
        target_height=self._image_h,
        randomize=False)

    boxes = box_utils.yxyx_to_xcycwh(boxes)
    image = tf.clip_by_value(image, 0.0, 1.0)
    num_dets = tf.shape(classes)[0]

    # padding
    classes = preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                       self._max_num_instances,
                                                       -1)

    if self._fixed_size and not self._cutmix:
      best_anchors = preprocessing_ops.get_best_anchor(
          boxes, self._anchors, width=self._image_w, height=self._image_h)
      best_anchors = preprocess_ops.clip_or_pad_to_fixed_size(
          best_anchors, self._max_num_instances, 0)
      boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                       self._max_num_instances,
                                                       0)
      labels = {
          'source_id': data['source_id'],
          'bbox': tf.cast(boxes, self._dtype),
          'classes': tf.cast(classes, self._dtype),
          'best_anchors': tf.cast(best_anchors, self._dtype),
          'width': width,
          'height': height,
          'num_detections': num_dets
      }
      grid = self._build_grid(
          labels, self._image_w, use_tie_breaker=self._use_tie_breaker)
      labels.update({'grid_form': grid})
      labels['bbox'] = box_utils.xcycwh_to_yxyx(labels['bbox'])
    else:
      boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                       self._max_num_instances,
                                                       0)
      labels = {
          'source_id': data['source_id'],
          'bbox': tf.cast(boxes, self._dtype),
          'classes': tf.cast(classes, self._dtype),
          'width': width,
          'height': height,
          'num_detections': num_dets
      }
    return image, labels