Ejemplos de yxyx_to_xcycwh en Python, ejemplos de yolo.ops.box_ops.yxyx_to_xcycwh en Python

Ejemplo n.º 1

0

Mostrar archivo

 def get_box_from_dataset(self, dataset):
     box_ls = None
     if not isinstance(dataset, list):
         dataset = [dataset]
     for ds in dataset:
         for el in ds:
             if type(box_ls) == type(None):
                 box_ls = yxyx_to_xcycwh(el['groundtruth_boxes'])[..., 2:]
             else:
                 box_ls = tf.concat([
                     box_ls,
                     yxyx_to_xcycwh(el['groundtruth_boxes'])[..., 2:]
                 ],
                                    axis=0)
     self._boxes = box_ls

Ejemplo n.º 2

0

Mostrar archivo

def crop_filter_to_bbox(image,
                        boxes,
                        classes,
                        target_width,
                        target_height,
                        offset_width,
                        offset_height,
                        fix=False):
    with tf.name_scope('resize_crop_filter'):
        shape = tf.shape(image)

        if tf.shape(shape)[0] == 4:
            height = shape[1]
            width = shape[2]
        else:  # tf.shape(shape)[0] == 3:
            height = shape[0]
            width = shape[1]

        image = tf.image.crop_to_bounding_box(image, offset_height,
                                              offset_width, target_height,
                                              target_width)
        if fix:
            image = tf.image.pad_to_bounding_box(image, offset_height,
                                                 offset_width, height, width)

        x_lower_bound = offset_width / width
        y_lower_bound = offset_height / height

        x_upper_bound = (offset_width + target_width) / width
        y_upper_bound = (offset_height + target_height) / height

        boxes = box_ops.yxyx_to_xcycwh(boxes)
        x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1)

        x_mask_lower = x > x_lower_bound
        y_mask_lower = y > y_lower_bound
        x_mask_upper = x < x_upper_bound
        y_mask_upper = y < y_upper_bound

        x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper)
        y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper)

        mask = tf.math.logical_and(x_mask, y_mask)

        x = shift_zeros(x, mask)  # tf.boolean_mask(x, mask)
        y = shift_zeros(y, mask)  # tf.boolean_mask(y, mask)
        w = shift_zeros(w, mask)  # tf.boolean_mask(w, mask)
        h = shift_zeros(h, mask)  # tf.boolean_mask(h, mask)
        classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask)
        classes = tf.squeeze(classes, axis=-1)

        if not fix:
            x = (x - x_lower_bound) * tf.cast(width / target_width, x.dtype)
            y = (y - y_lower_bound) * tf.cast(height / target_height, y.dtype)
            w = w * tf.cast(width / target_width, w.dtype)
            h = h * tf.cast(height / target_height, h.dtype)

        boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype)
        boxes = box_ops.xcycwh_to_yxyx(boxes)
    return image, boxes, classes

Ejemplo n.º 3

0

Mostrar archivo

def translate_boxes(box, classes, translate_x, translate_y):
    with tf.name_scope('translate_boxs'):
        box = box_ops.yxyx_to_xcycwh(box)
        x, y, w, h = tf.split(box, 4, axis=-1)
        x = x + translate_x
        y = y + translate_y

        x_mask_lower = x >= 0
        y_mask_lower = y >= 0
        x_mask_upper = x < 1
        y_mask_upper = y < 1

        x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper)
        y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper)
        mask = tf.math.logical_and(x_mask, y_mask)

        x = shift_zeros(x, mask)  # tf.boolean_mask(x, mask)
        y = shift_zeros(y, mask)  # tf.boolean_mask(y, mask)
        w = shift_zeros(w, mask)  # tf.boolean_mask(w, mask)
        h = shift_zeros(h, mask)  # tf.boolean_mask(h, mask)
        classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask)
        classes = tf.squeeze(classes, axis=-1)

        box = tf.concat([x, y, w, h], axis=-1)
        box = box_ops.xcycwh_to_yxyx(box)
    return box, classes

Ejemplo n.º 4

0

Mostrar archivo

def pad_filter_to_bbox(image, boxes, classes, target_width, target_height,
                       offset_width, offset_height):
    with tf.name_scope('resize_crop_filter'):
        shape = tf.shape(image)

        if tf.shape(shape)[0] == 4:
            height = shape[1]
            width = shape[2]
        else:  # tf.shape(shape)[0] == 3:
            height = shape[0]
            width = shape[1]

        image = tf.image.pad_to_bounding_box(image, offset_height,
                                             offset_width, target_height,
                                             target_width)

        x_lower_bound = tf.cast(offset_width / width, tf.float32)
        y_lower_bound = tf.cast(offset_height / height, tf.float32)

        boxes = box_ops.yxyx_to_xcycwh(boxes)
        x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1)

        x = (x + x_lower_bound) * tf.cast(width / target_width, x.dtype)
        y = (y + y_lower_bound) * tf.cast(height / target_height, y.dtype)
        w = w * tf.cast(width / target_width, w.dtype)
        h = h * tf.cast(height / target_height, h.dtype)

        boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype)
        boxes = box_ops.xcycwh_to_yxyx(boxes)
    return image, boxes, classes

Ejemplo n.º 5

0

Mostrar archivo

def cutmix_1(image_to_crop, boxes1, classes1, image_mask, boxes2, classes2,
             target_width, target_height, offset_width, offset_height):
    with tf.name_scope('cutmix'):
        image, boxes, classes = cut_out(image_mask, boxes2, classes2,
                                        target_width, target_height,
                                        offset_width, offset_height)
        image_, boxes_, classes_ = crop_filter_to_bbox(image_to_crop,
                                                       boxes1,
                                                       classes1,
                                                       target_width,
                                                       target_height,
                                                       offset_width,
                                                       offset_height,
                                                       fix=True)
        image += image_
        boxes = tf.concat([boxes, boxes_], axis=-2)
        classes = tf.concat([classes, classes_], axis=-1)

        boxes = box_ops.yxyx_to_xcycwh(boxes)
        x, y, w, h = tf.split(boxes, 4, axis=-1)

        mask = x > 0
        x = shift_zeros(x, mask)  # tf.boolean_mask(x, mask)
        y = shift_zeros(y, mask)  # tf.boolean_mask(y, mask)
        w = shift_zeros(w, mask)  # tf.boolean_mask(w, mask)
        h = shift_zeros(h, mask)  # tf.boolean_mask(h, mask)
        classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask)
        classes = tf.squeeze(classes, axis=-1)

        boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype)
        boxes = box_ops.xcycwh_to_yxyx(boxes)

    return image, boxes, classes

Ejemplo n.º 6

0

Mostrar archivo

Archivo: yolo_input.py Proyecto: Tomysinee/TensorFlowModels

    def _parse_eval_data(self, data):
        """Generates images and labels that are usable for model training.
        Args:
          data: a dict of Tensors produced by the decoder.
        Returns:
          images: the image tensor.
          labels: a dict of Tensors that contains labels.
        """

        shape = tf.shape(data['image'])
        image = data['image'] / 255
        boxes = data['groundtruth_boxes']
        width = shape[0]
        height = shape[1]

        image, boxes = preprocessing_ops.fit_preserve_aspect_ratio(
            image, boxes, width=width, height=height, target_dim=self._image_w)
        boxes = box_utils.yxyx_to_xcycwh(boxes)

        best_anchors = preprocessing_ops.get_best_anchor(boxes,
                                                         self._anchors,
                                                         width=self._image_w,
                                                         height=self._image_h)
        boxes = preprocessing_ops.pad_max_instances(boxes,
                                                    self._max_num_instances, 0)
        classes = preprocessing_ops.pad_max_instances(
            data['groundtruth_classes'], self._max_num_instances, 0)
        best_anchors = preprocessing_ops.pad_max_instances(
            best_anchors, self._max_num_instances, 0)
        area = preprocessing_ops.pad_max_instances(data['groundtruth_area'],
                                                   self._max_num_instances, 0)
        is_crowd = preprocessing_ops.pad_max_instances(
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
            self._max_num_instances, 0)

        labels = {
            'source_id': data['source_id'],
            'bbox': tf.cast(boxes, self._dtype),
            'classes': tf.cast(classes, self._dtype),
            'area': tf.cast(area, self._dtype),
            'is_crowd': is_crowd,
            'best_anchors': tf.cast(best_anchors, self._dtype),
            'width': width,
            'height': height,
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
        }

        # if self._fixed_size:
        grid = self._build_grid(labels,
                                self._image_w,
                                batch=False,
                                use_tie_breaker=self._use_tie_breaker)
        labels.update({'grid_form': grid})
        return image, labels

Ejemplo n.º 7

0

Mostrar archivo

def cutmix_batch(image, boxes, classes, target_width, target_height,
                 offset_width, offset_height):
    with tf.name_scope('cutmix_batch'):

        image_, boxes_, classes_ = cut_out(image, boxes, classes, target_width,
                                           target_height, offset_width,
                                           offset_height)
        image__, boxes__, classes__ = crop_filter_to_bbox(image,
                                                          boxes,
                                                          classes,
                                                          target_width,
                                                          target_height,
                                                          offset_width,
                                                          offset_height,
                                                          fix=True)

        mix = tf.random.uniform([], minval=0, maxval=1)
        if mix > 0.5:
            i_split1, i_split2 = tf.split(image__, 2, axis=0)
            b_split1, b_split2 = tf.split(boxes__, 2, axis=0)
            c_split1, c_split2 = tf.split(classes__, 2, axis=0)

            image__ = tf.concat([i_split2, i_split1], axis=0)
            boxes__ = tf.concat([b_split2, b_split1], axis=0)
            classes__ = tf.concat([c_split2, c_split1], axis=0)

        image = image_ + image__
        boxes = tf.concat([boxes_, boxes__], axis=-2)
        classes = tf.concat([classes_, classes__], axis=-1)

        boxes = box_ops.yxyx_to_xcycwh(boxes)
        x, y, w, h = tf.split(boxes, 4, axis=-1)

        mask = x > 0
        x = shift_zeros(x, mask)  # tf.boolean_mask(x, mask)
        y = shift_zeros(y, mask)  # tf.boolean_mask(y, mask)
        w = shift_zeros(w, mask)  # tf.boolean_mask(w, mask)
        h = shift_zeros(h, mask)  # tf.boolean_mask(h, mask)
        classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask)
        classes = tf.squeeze(classes, axis=-1)

        boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype)
        boxes = box_ops.xcycwh_to_yxyx(boxes)

        x = tf.squeeze(x, axis=-1)
        classes = tf.where(x == 0, -1, classes)

        num_detections = tf.reduce_sum(tf.cast(x > 0, tf.int32), axis=-1)

    return image, boxes, classes, num_detections

Ejemplo n.º 8

0

Mostrar archivo

def cut_out(image_full, boxes, classes, target_width, target_height,
            offset_width, offset_height):
    shape = tf.shape(image_full)

    if tf.shape(shape)[0] == 4:
        width = shape[1]
        height = shape[2]
    else:  # tf.shape(shape)[0] == 3:
        width = shape[0]
        height = shape[1]

    image_crop = tf.image.crop_to_bounding_box(image_full, offset_height,
                                               offset_width, target_height,
                                               target_width) + 1
    image_crop = tf.ones_like(image_crop)
    image_crop = tf.image.pad_to_bounding_box(image_crop, offset_height,
                                              offset_width, height, width)
    image_crop = 1 - image_crop

    x_lower_bound = offset_width / width
    y_lower_bound = offset_height / height

    x_upper_bound = (offset_width + target_width) / width
    y_upper_bound = (offset_height + target_height) / height

    boxes = box_ops.yxyx_to_xcycwh(boxes)

    x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1)

    x_mask_lower = x > x_lower_bound
    y_mask_lower = y > y_lower_bound
    x_mask_upper = x < x_upper_bound
    y_mask_upper = y < y_upper_bound

    x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper)
    y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper)
    mask = tf.math.logical_not(tf.math.logical_and(x_mask, y_mask))

    x = shift_zeros(x, mask)  # tf.boolean_mask(x, mask)
    y = shift_zeros(y, mask)  # tf.boolean_mask(y, mask)
    w = shift_zeros(w, mask)  # tf.boolean_mask(w, mask)
    h = shift_zeros(h, mask)  # tf.boolean_mask(h, mask)
    classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask)
    classes = tf.squeeze(classes, axis=-1)

    boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype)
    boxes = box_ops.xcycwh_to_yxyx(boxes)

    image_full *= image_crop
    return image_full, boxes, classes

Ejemplo n.º 9

0

Mostrar archivo

def fit_preserve_aspect_ratio(image,
                              boxes,
                              width=None,
                              height=None,
                              target_dim=None):
    if width is None or height is None:
        shape = tf.shape(data['image'])
        if tf.shape(shape)[0] == 4:
            width = shape[1]
            height = shape[2]
        else:
            width = shape[0]
            height = shape[1]

    clipper = tf.math.maximum(width, height)
    if target_dim is None:
        target_dim = clipper

    pad_width = clipper - width
    pad_height = clipper - height
    image = tf.image.pad_to_bounding_box(image, pad_width // 2,
                                         pad_height // 2, clipper, clipper)

    boxes = box_ops.yxyx_to_xcycwh(boxes)
    x, y, w, h = tf.split(boxes, 4, axis=-1)

    y *= tf.cast(width / clipper, tf.float32)
    x *= tf.cast(height / clipper, tf.float32)

    y += tf.cast((pad_width / clipper) / 2, tf.float32)
    x += tf.cast((pad_height / clipper) / 2, tf.float32)

    h *= tf.cast(width / clipper, tf.float32)
    w *= tf.cast(height / clipper, tf.float32)

    boxes = tf.concat([x, y, w, h], axis=-1)

    boxes = box_ops.xcycwh_to_yxyx(boxes)
    image = tf.image.resize(image, (target_dim, target_dim))
    return image, boxes

Ejemplo n.º 10

0

Mostrar archivo

  def _postprocess_fn(self, image, label):

    if self._cutmix:
      batch_size = tf.shape(image)[0]
      if batch_size >= 1:
        boxes = box_utils.xcycwh_to_yxyx(label['bbox'])
        classes = label['classes']
        image, boxes, classes, num_detections = preprocessing_ops.randomized_cutmix_batch(
            image, boxes, classes)
        boxes = box_utils.yxyx_to_xcycwh(boxes)
        label['bbox'] = pad_max_instances(
            boxes, self._max_num_instances, pad_axis=-2, pad_value=0)
        label['classes'] = pad_max_instances(
            classes, self._max_num_instances, pad_axis=-1, pad_value=-1)

    randscale = self._image_w // self._net_down_scale
    if not self._fixed_size:
      do_scale = tf.greater(
          tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
          1 - self._pct_rand)
      if do_scale:
        randscale = tf.random.uniform([],
                                      minval=10,
                                      maxval=21,
                                      seed=self._seed,
                                      dtype=tf.int32)
    width = randscale * self._net_down_scale
    image = tf.image.resize(image, (width, width))

    best_anchors = preprocessing_ops.get_best_anchor_batch(
        label['bbox'], self._anchors, width=self._image_w, height=self._image_h)
    label['best_anchors'] = pad_max_instances(
        best_anchors, self._max_num_instances, pad_axis=-2, pad_value=0)

    grid = self._build_grid(
        label, width, batch=True, use_tie_breaker=self._use_tie_breaker)
    label.update({'grid_form': grid})
    label['bbox'] = box_utils.xcycwh_to_yxyx(label['bbox'])
    return image, label

Ejemplo n.º 11

0

Mostrar archivo

  def _parse_train_data(self, data):
    """Generates images and labels that are usable for model training.
        Args:
          data: a dict of Tensors produced by the decoder.
        Returns:
          images: the image tensor.
          labels: a dict of Tensors that contains labels.
        """

    image = data['image'] / 255

    # / 255
    boxes = data['groundtruth_boxes']
    classes = data['groundtruth_classes']

    do_blur = tf.random.uniform([],
                                minval=0,
                                maxval=1,
                                seed=self._seed,
                                dtype=tf.float32)
    if do_blur > 0.9:
      image = tfa.image.gaussian_filter2d(image, filter_shape=7, sigma=15)
    elif do_blur > 0.7:
      image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=6)
    elif do_blur > 0.4:
      image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=3)

    image = tf.image.rgb_to_hsv(image)
    i_h, i_s, i_v = tf.split(image, 3, axis=-1)
    if self._aug_rand_hue:
      delta = preprocessing_ops.rand_uniform_strong(
          -0.1, 0.1
      )  # tf.random.uniform([], minval= -0.1,maxval=0.1, seed=self._seed, dtype=tf.float32)
      i_h = i_h + delta  # Hue
      i_h = tf.clip_by_value(i_h, 0.0, 1.0)
    if self._aug_rand_saturation:
      delta = preprocessing_ops.rand_scale(
          0.75
      )  # tf.random.uniform([], minval= 0.5,maxval=1.1, seed=self._seed, dtype=tf.float32)
      i_s = i_s * delta
    if self._aug_rand_brightness:
      delta = preprocessing_ops.rand_scale(
          0.75
      )  # tf.random.uniform([], minval= -0.15,maxval=0.15, seed=self._seed, dtype=tf.float32)
      i_v = i_v * delta
    image = tf.concat([i_h, i_s, i_v], axis=-1)
    image = tf.image.hsv_to_rgb(image)

    stddev = tf.random.uniform([],
                               minval=0,
                               maxval=40 / 255,
                               seed=self._seed,
                               dtype=tf.float32)
    noise = tf.random.normal(
        shape=tf.shape(image), mean=0.0, stddev=stddev, seed=self._seed)
    noise = tf.math.minimum(noise, 0.5)
    noise = tf.math.maximum(noise, 0)
    image += noise
    image = tf.clip_by_value(image, 0.0, 1.0)

    image_shape = tf.shape(image)[:2]

    if self._random_flip:
      image, boxes, _ = preprocess_ops.random_horizontal_flip(
          image, boxes, seed=self._seed)

    if self._jitter_boxes != 0.0:
      boxes = box_ops.denormalize_boxes(boxes, image_shape)
      boxes = box_ops.jitter_boxes(boxes, 0.025)
      boxes = box_ops.normalize_boxes(boxes, image_shape)

    if self._jitter_im != 0.0:
      image, boxes, classes = preprocessing_ops.random_jitter(
          image, boxes, classes, self._jitter_im, seed=self._seed)
      # image, boxes, classes = preprocessing_ops.random_translate(image, boxes, classes, 0.2, seed=self._seed)

    if self._aug_rand_zoom:
      image, boxes, classes = preprocessing_ops.random_zoom_crop(
          image, boxes, classes, self._jitter_im)

    shape = tf.shape(image)
    width = shape[1]
    height = shape[0]
    randscale = self._image_w // self._net_down_scale

    if self._fixed_size:
      do_scale = tf.greater(
          tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
          1 - self._pct_rand)
      if do_scale:
        randscale = tf.random.uniform([],
                                      minval=10,
                                      maxval=15,
                                      seed=self._seed,
                                      dtype=tf.int32)

    if self._letter_box:
      image, boxes = preprocessing_ops.fit_preserve_aspect_ratio(
          image,
          boxes,
          width=width,
          height=height,
          target_dim=randscale * self._net_down_scale)
      width = randscale * self._net_down_scale
      height = randscale * self._net_down_scale

    shape = tf.shape(image)
    width = shape[1]
    height = shape[0]
    image, boxes, classes = preprocessing_ops.resize_crop_filter(
        image,
        boxes,
        classes,
        default_width=width,  # randscale * self._net_down_scale,
        default_height=height,  # randscale * self._net_down_scale,
        target_width=self._image_w,
        target_height=self._image_h,
        randomize=False)

    boxes = box_utils.yxyx_to_xcycwh(boxes)
    image = tf.clip_by_value(image, 0.0, 1.0)
    num_dets = tf.shape(classes)[0]

    # padding
    classes = preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                       self._max_num_instances,
                                                       -1)

    if self._fixed_size and not self._cutmix:
      best_anchors = preprocessing_ops.get_best_anchor(
          boxes, self._anchors, width=self._image_w, height=self._image_h)
      best_anchors = preprocess_ops.clip_or_pad_to_fixed_size(
          best_anchors, self._max_num_instances, 0)
      boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                       self._max_num_instances,
                                                       0)
      labels = {
          'source_id': data['source_id'],
          'bbox': tf.cast(boxes, self._dtype),
          'classes': tf.cast(classes, self._dtype),
          'best_anchors': tf.cast(best_anchors, self._dtype),
          'width': width,
          'height': height,
          'num_detections': num_dets
      }
      grid = self._build_grid(
          labels, self._image_w, use_tie_breaker=self._use_tie_breaker)
      labels.update({'grid_form': grid})
      labels['bbox'] = box_utils.xcycwh_to_yxyx(labels['bbox'])
    else:
      boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                       self._max_num_instances,
                                                       0)
      labels = {
          'source_id': data['source_id'],
          'bbox': tf.cast(boxes, self._dtype),
          'classes': tf.cast(classes, self._dtype),
          'width': width,
          'height': height,
          'num_detections': num_dets
      }
    return image, labels

Ejemplo n.º 12

0

Mostrar archivo

Archivo: yolo_input.py Proyecto: Tomysinee/TensorFlowModels

    def _parse_train_data(self, data):
        """Generates images and labels that are usable for model training.
        Args:
          data: a dict of Tensors produced by the decoder.
        Returns:
          images: the image tensor.
          labels: a dict of Tensors that contains labels.
        """

        shape = tf.shape(data['image'])
        image = data['image'] / 255
        boxes = data['groundtruth_boxes']
        width = shape[0]
        height = shape[1]

        image, boxes = preprocessing_ops.fit_preserve_aspect_ratio(
            image,
            boxes,
            width=width,
            height=height,
            target_dim=self._max_process_size)

        image_shape = tf.shape(image)[:2]

        if self._random_flip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes, seed=self._seed)

        randscale = self._image_w // self._net_down_scale

        if self._fixed_size:
            do_scale = tf.greater(
                tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
                1 - self._pct_rand)
            if do_scale:
                randscale = tf.random.uniform([],
                                              minval=10,
                                              maxval=21,
                                              seed=self._seed,
                                              dtype=tf.int32)

        if self._jitter_boxes != 0.0:
            boxes = box_ops.denormalize_boxes(boxes, image_shape)
            boxes = box_ops.jitter_boxes(boxes, 0.025)
            boxes = box_ops.normalize_boxes(boxes, image_shape)

        boxes = box_utils.yxyx_to_xcycwh(boxes)

        if self._jitter_im != 0.0:
            image, boxes = preprocessing_ops.random_translate(image,
                                                              boxes,
                                                              self._jitter_im,
                                                              seed=self._seed)

        if self._aug_rand_zoom:
            image, boxes = preprocessing_ops.resize_crop_filter(
                image,
                boxes,
                default_width=self._image_w,
                default_height=self._image_h,
                target_width=randscale * self._net_down_scale,
                target_height=randscale * self._net_down_scale)
        image = tf.image.resize(image, (416, 416), preserve_aspect_ratio=False)

        if self._aug_rand_brightness:
            image = tf.image.random_brightness(image=image,
                                               max_delta=.1)  # Brightness
        if self._aug_rand_saturation:
            image = tf.image.random_saturation(image=image,
                                               lower=0.75,
                                               upper=1.25)  # Saturation
        if self._aug_rand_hue:
            image = tf.image.random_hue(image=image, max_delta=.3)  # Hue
        image = tf.clip_by_value(image, 0.0, 1.0)
        best_anchors = preprocessing_ops.get_best_anchor(boxes,
                                                         self._anchors,
                                                         width=self._image_w,
                                                         height=self._image_h)

        # padding
        boxes = preprocess_ops.clip_or_pad_to_fixed_size(
            boxes, self._max_num_instances, 0)
        classes = preprocess_ops.clip_or_pad_to_fixed_size(
            data['groundtruth_classes'], self._max_num_instances, -1)
        best_anchors = preprocess_ops.clip_or_pad_to_fixed_size(
            best_anchors, self._max_num_instances, 0)
        area = preprocess_ops.clip_or_pad_to_fixed_size(
            data['groundtruth_area'], self._max_num_instances, 0)
        is_crowd = preprocess_ops.clip_or_pad_to_fixed_size(
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
            self._max_num_instances, 0)

        labels = {
            'source_id': data['source_id'],
            'bbox': tf.cast(boxes, self._dtype),
            'classes': tf.cast(classes, self._dtype),
            'area': tf.cast(area, self._dtype),
            'is_crowd': is_crowd,
            'best_anchors': tf.cast(best_anchors, self._dtype),
            'width': width,
            'height': height,
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
        }

        if self._fixed_size:
            grid = self._build_grid(labels,
                                    self._image_w,
                                    use_tie_breaker=self._use_tie_breaker)
            labels.update({'grid_form': grid})

        return image, labels