def compute_iou(box1, box2, yxyx=False):
    """Calculates the intersection of union between box1 and box2.
    Args:
        box1: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in
            x_center, y_center, width, height.
        box2: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in
            x_center, y_center, width, height.
    Returns:
        iou: a `Tensor` who represents the intersection over union.
    """
    # get box corners
    with tf.name_scope('iou'):
        if not yxyx:
            box1 = box_utils.xcycwh_to_yxyx(box1)
            box2 = box_utils.xcycwh_to_yxyx(box2)

        b1mi, b1ma = tf.split(box1, 2, axis=-1)
        b2mi, b2ma = tf.split(box2, 2, axis=-1)
        intersect_mins = tf.math.maximum(b1mi, b2mi)
        intersect_maxes = tf.math.minimum(b1ma, b2ma)
        intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
                                       tf.zeros_like(intersect_mins))
        intersection = tf.reduce_prod(
            intersect_wh,
            axis=-1)  # intersect_wh[..., 0] * intersect_wh[..., 1]

        box1_area = tf.math.abs(tf.reduce_prod(b1ma - b1mi, axis=-1))
        box2_area = tf.math.abs(tf.reduce_prod(b2ma - b2mi, axis=-1))
        union = box1_area + box2_area - intersection

        iou = intersection / (union + 1e-7
                              )  # tf.math.divide_no_nan(intersection, union)
        iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
    return iou
Exemple #2
0
    def validation_step(self, inputs, model, metrics=None):
        #get the data point
        image, label = inputs

        # computer detivative and apply gradients
        y_pred = model(image, training=False)
        loss, metrics = self.build_losses(y_pred['raw_output'], label)

        # #custom metrics
        loss_metrics = {'loss': loss}
        loss_metrics.update(metrics)
        label['boxes'] = xcycwh_to_yxyx(label['bbox'])
        del label['bbox']

        coco_model_outputs = {
            'detection_boxes': y_pred['bbox'],
            'detection_scores': y_pred['confidence'],
            'detection_classes': y_pred['classes'],
            'num_detections': tf.shape(y_pred['bbox'])[:-1],
            'source_id': label['source_id'],
        }

        loss_metrics.update(
            {self.coco_metric.name: (label, coco_model_outputs)})
        return loss_metrics
Exemple #3
0
    def parse_prediction_path(self, generator, len_mask, scale_xy, inputs):
        shape = tf.shape(inputs)
        # reshape the yolo output to (batchsize, width, height, number_anchors, remaining_points)
        data = tf.reshape(inputs, [shape[0], shape[1], shape[2], len_mask, -1])

        # compute the true box output values
        _, obns, classifics = tf.split(data, [4, 1, -1], axis=-1)
        scaled = tf.shape(classifics)[-1]
        objectness = tf.squeeze(obns, axis=-1)
        box = box_utils.xcycwh_to_yxyx(boxes)

        # compute the mask of where objects have been located
        num_dets = tf.reduce_sum(objectness, axis=(1, 2, 3))

        mask = tf.cast(tf.ones_like(sub), dtype=tf.bool)
        mask = tf.reduce_any(mask, axis=(0, -1))

        # reduce the dimentions of the predictions to (batch size, max predictions, -1)
        box = tf.boolean_mask(box, mask, axis=1)
        classifications = tf.boolean_mask(scaled, mask, axis=1)
        objectness = tf.squeeze(tf.boolean_mask(objectness, mask, axis=1),
                                axis=-1)

        objectness, box, classifications = nms_ops.sort_drop(
            objectness, box, classifications, self._max_boxes)
        return objectness, box, classifications, num_dets
Exemple #4
0
def translate_boxes(box, classes, translate_x, translate_y):
    with tf.name_scope('translate_boxs'):
        box = box_ops.yxyx_to_xcycwh(box)
        x, y, w, h = tf.split(box, 4, axis=-1)
        x = x + translate_x
        y = y + translate_y

        x_mask_lower = x >= 0
        y_mask_lower = y >= 0
        x_mask_upper = x < 1
        y_mask_upper = y < 1

        x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper)
        y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper)
        mask = tf.math.logical_and(x_mask, y_mask)

        x = shift_zeros(x, mask)  # tf.boolean_mask(x, mask)
        y = shift_zeros(y, mask)  # tf.boolean_mask(y, mask)
        w = shift_zeros(w, mask)  # tf.boolean_mask(w, mask)
        h = shift_zeros(h, mask)  # tf.boolean_mask(h, mask)
        classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask)
        classes = tf.squeeze(classes, axis=-1)

        box = tf.concat([x, y, w, h], axis=-1)
        box = box_ops.xcycwh_to_yxyx(box)
    return box, classes
Exemple #5
0
def pad_filter_to_bbox(image, boxes, classes, target_width, target_height,
                       offset_width, offset_height):
    with tf.name_scope('resize_crop_filter'):
        shape = tf.shape(image)

        if tf.shape(shape)[0] == 4:
            height = shape[1]
            width = shape[2]
        else:  # tf.shape(shape)[0] == 3:
            height = shape[0]
            width = shape[1]

        image = tf.image.pad_to_bounding_box(image, offset_height,
                                             offset_width, target_height,
                                             target_width)

        x_lower_bound = tf.cast(offset_width / width, tf.float32)
        y_lower_bound = tf.cast(offset_height / height, tf.float32)

        boxes = box_ops.yxyx_to_xcycwh(boxes)
        x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1)

        x = (x + x_lower_bound) * tf.cast(width / target_width, x.dtype)
        y = (y + y_lower_bound) * tf.cast(height / target_height, y.dtype)
        w = w * tf.cast(width / target_width, w.dtype)
        h = h * tf.cast(height / target_height, h.dtype)

        boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype)
        boxes = box_ops.xcycwh_to_yxyx(boxes)
    return image, boxes, classes
Exemple #6
0
def cutmix_1(image_to_crop, boxes1, classes1, image_mask, boxes2, classes2,
             target_width, target_height, offset_width, offset_height):
    with tf.name_scope('cutmix'):
        image, boxes, classes = cut_out(image_mask, boxes2, classes2,
                                        target_width, target_height,
                                        offset_width, offset_height)
        image_, boxes_, classes_ = crop_filter_to_bbox(image_to_crop,
                                                       boxes1,
                                                       classes1,
                                                       target_width,
                                                       target_height,
                                                       offset_width,
                                                       offset_height,
                                                       fix=True)
        image += image_
        boxes = tf.concat([boxes, boxes_], axis=-2)
        classes = tf.concat([classes, classes_], axis=-1)

        boxes = box_ops.yxyx_to_xcycwh(boxes)
        x, y, w, h = tf.split(boxes, 4, axis=-1)

        mask = x > 0
        x = shift_zeros(x, mask)  # tf.boolean_mask(x, mask)
        y = shift_zeros(y, mask)  # tf.boolean_mask(y, mask)
        w = shift_zeros(w, mask)  # tf.boolean_mask(w, mask)
        h = shift_zeros(h, mask)  # tf.boolean_mask(h, mask)
        classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask)
        classes = tf.squeeze(classes, axis=-1)

        boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype)
        boxes = box_ops.xcycwh_to_yxyx(boxes)

    return image, boxes, classes
Exemple #7
0
def crop_filter_to_bbox(image,
                        boxes,
                        classes,
                        target_width,
                        target_height,
                        offset_width,
                        offset_height,
                        fix=False):
    with tf.name_scope('resize_crop_filter'):
        shape = tf.shape(image)

        if tf.shape(shape)[0] == 4:
            height = shape[1]
            width = shape[2]
        else:  # tf.shape(shape)[0] == 3:
            height = shape[0]
            width = shape[1]

        image = tf.image.crop_to_bounding_box(image, offset_height,
                                              offset_width, target_height,
                                              target_width)
        if fix:
            image = tf.image.pad_to_bounding_box(image, offset_height,
                                                 offset_width, height, width)

        x_lower_bound = offset_width / width
        y_lower_bound = offset_height / height

        x_upper_bound = (offset_width + target_width) / width
        y_upper_bound = (offset_height + target_height) / height

        boxes = box_ops.yxyx_to_xcycwh(boxes)
        x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1)

        x_mask_lower = x > x_lower_bound
        y_mask_lower = y > y_lower_bound
        x_mask_upper = x < x_upper_bound
        y_mask_upper = y < y_upper_bound

        x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper)
        y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper)

        mask = tf.math.logical_and(x_mask, y_mask)

        x = shift_zeros(x, mask)  # tf.boolean_mask(x, mask)
        y = shift_zeros(y, mask)  # tf.boolean_mask(y, mask)
        w = shift_zeros(w, mask)  # tf.boolean_mask(w, mask)
        h = shift_zeros(h, mask)  # tf.boolean_mask(h, mask)
        classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask)
        classes = tf.squeeze(classes, axis=-1)

        if not fix:
            x = (x - x_lower_bound) * tf.cast(width / target_width, x.dtype)
            y = (y - y_lower_bound) * tf.cast(height / target_height, y.dtype)
            w = w * tf.cast(width / target_width, w.dtype)
            h = h * tf.cast(height / target_height, h.dtype)

        boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype)
        boxes = box_ops.xcycwh_to_yxyx(boxes)
    return image, boxes, classes
def compute_diou(box1, box2):
    """Calculates the distance intersection of union between box1 and box2.
    Args:
        box1: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in
            x_center, y_center, width, height.
        box2: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in
            x_center, y_center, width, height.
    Returns:
        iou: a `Tensor` who represents the distance intersection over union.
    """
    with tf.name_scope('diou'):
        # compute center distance
        dist = box_utils.center_distance(box1[..., 0:2], box2[..., 0:2])

        # get box corners
        box1 = box_utils.xcycwh_to_yxyx(box1)
        box2 = box_utils.xcycwh_to_yxyx(box2)

        # compute IOU
        intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2])
        intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4])
        intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
                                       tf.zeros_like(intersect_mins))
        intersection = intersect_wh[..., 0] * intersect_wh[..., 1]

        box1_area = tf.math.abs(
            tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1))
        box2_area = tf.math.abs(
            tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1))
        union = box1_area + box2_area - intersection

        iou = tf.math.divide_no_nan(intersection, union)
        iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)

        # compute max diagnal of the smallest enclosing box
        c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2])
        c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4])

        diag_dist = tf.reduce_sum((c_maxes - c_mins)**2, axis=-1)

        regularization = tf.math.divide_no_nan(dist, diag_dist)
        diou = iou + regularization
    return iou, diou
Exemple #9
0
  def parse_prediction_path(self, generator, len_mask, scale_xy, inputs):
    shape = tf.shape(inputs)
    # reshape the yolo output to (batchsize, width, height, number_anchors, remaining_points)
    data = tf.reshape(inputs, [shape[0], shape[1], shape[2], len_mask, -1])
    centers, anchors = generator(shape[1], shape[2], shape[0], dtype=data.dtype)

    # compute the true box output values
    ubox, obns, classifics = tf.split(data, [4, 1, -1], axis=-1)
    classes = tf.shape(classifics)[-1]
    obns = tf.squeeze(obns, axis=-1)
    _, _, boxes = self.parse_yolo_box_predictions(
        ubox,
        tf.cast(shape[1], data.dtype),
        tf.cast(shape[2], data.dtype),
        anchors,
        centers,
        scale_x_y=scale_xy)
    box = box_utils.xcycwh_to_yxyx(boxes)

    # computer objectness and generate grid cell mask for where objects are located in the image
    objectness = tf.expand_dims(tf.math.sigmoid(obns), axis=-1)
    scaled = tf.math.sigmoid(classifics) * objectness

    # compute the mask of where objects have been located
    mask_check = tf.fill(
        tf.shape(objectness), tf.cast(self._thresh, dtype=objectness.dtype))
    sub = tf.math.ceil(tf.nn.relu(objectness - mask_check))
    num_dets = tf.reduce_sum(sub, axis=(1, 2, 3))

    box = box * sub
    scaled = scaled * sub
    objectness = objectness * sub

    mask = tf.cast(tf.ones_like(sub), dtype=tf.bool)
    mask = tf.reduce_any(mask, axis=(0, -1))

    # reduce the dimentions of the predictions to (batch size, max predictions, -1)
    box = tf.boolean_mask(box, mask, axis=1)
    classifications = tf.boolean_mask(scaled, mask, axis=1)
    objectness = tf.squeeze(tf.boolean_mask(objectness, mask, axis=1), axis=-1)

    #objectness, box, classifications = nms_ops.sort_drop(objectness, box, classifications, self._max_boxes)
    box, classifications, objectness = nms_ops.nms(
        box,
        classifications,
        objectness,
        self._max_boxes,
        2.5,
        self._nms_thresh,
        sorted=False,
        one_hot=True)
    return objectness, box, classifications, num_dets
Exemple #10
0
  def _parse_eval_data(self, data):
    """Generates images and labels that are usable for model training.
        Args:
          data: a dict of Tensors produced by the decoder.
        Returns:
          images: the image tensor.
          labels: a dict of Tensors that contains labels.
        """

    shape = tf.shape(data['image'])
    image = data['image'] / 255
    boxes = data['groundtruth_boxes']
    width = shape[1]
    height = shape[0]

    image, boxes = preprocessing_ops.fit_preserve_aspect_ratio(
        image, boxes, width=width, height=height, target_dim=self._image_w)
    boxes = box_utils.yxyx_to_xcycwh(boxes)

    best_anchors = preprocessing_ops.get_best_anchor(
        boxes, self._anchors, width=self._image_w, height=self._image_h)
    boxes = pad_max_instances(boxes, self._max_num_instances, 0)
    classes = pad_max_instances(data['groundtruth_classes'],
                                self._max_num_instances, -1)
    best_anchors = pad_max_instances(best_anchors, self._max_num_instances, 0)
    area = pad_max_instances(data['groundtruth_area'], self._max_num_instances,
                             0)
    is_crowd = pad_max_instances(
        tf.cast(data['groundtruth_is_crowd'], tf.int32),
        self._max_num_instances, 0)

    labels = {
        'source_id': data['source_id'],
        'bbox': tf.cast(boxes, self._dtype),
        'classes': tf.cast(classes, self._dtype),
        'area': tf.cast(area, self._dtype),
        'is_crowd': is_crowd,
        'best_anchors': tf.cast(best_anchors, self._dtype),
        'width': width,
        'height': height,
        'num_detections': tf.shape(data['groundtruth_classes'])[0]
    }

    # if self._fixed_size:
    grid = self._build_grid(
        labels,
        self._image_w,
        batch=False,
        use_tie_breaker=self._use_tie_breaker)
    labels.update({'grid_form': grid})
    labels['bbox'] = box_utils.xcycwh_to_yxyx(labels['bbox'])
    return image, labels
Exemple #11
0
def compute_giou(box1, box2):
    """Calculates the generalized intersection of union between box1 and box2.
    Args:
        box1: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in
            x_center, y_center, width, height.
        box2: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in
            x_center, y_center, width, height.
    Returns:
        iou: a `Tensor` who represents the generalized intersection over union.
    """
    with tf.name_scope('giou'):
        # get box corners
        box1 = box_utils.xcycwh_to_yxyx(box1)
        box2 = box_utils.xcycwh_to_yxyx(box2)

        # compute IOU
        intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2])
        intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4])
        intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
                                       tf.zeros_like(intersect_mins))
        intersection = intersect_wh[..., 0] * intersect_wh[..., 1]

        box1_area = tf.math.abs(
            tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1))
        box2_area = tf.math.abs(
            tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1))
        union = box1_area + box2_area - intersection

        iou = tf.math.divide_no_nan(intersection, union)
        iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)

        # find the smallest box to encompase both box1 and box2
        c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2])
        c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4])
        c = box_utils.get_area((c_mins, c_maxes), use_tuple=True)

        # compute giou
        giou = iou - tf.math.divide_no_nan((c - union), c)
    return iou, giou
Exemple #12
0
  def _postprocess_fn(self, image, label):

    if self._cutmix:
      batch_size = tf.shape(image)[0]
      if batch_size >= 1:
        boxes = box_utils.xcycwh_to_yxyx(label['bbox'])
        classes = label['classes']
        image, boxes, classes, num_detections = preprocessing_ops.randomized_cutmix_batch(
            image, boxes, classes)
        boxes = box_utils.yxyx_to_xcycwh(boxes)
        label['bbox'] = pad_max_instances(
            boxes, self._max_num_instances, pad_axis=-2, pad_value=0)
        label['classes'] = pad_max_instances(
            classes, self._max_num_instances, pad_axis=-1, pad_value=-1)

    randscale = self._image_w // self._net_down_scale
    if not self._fixed_size:
      do_scale = tf.greater(
          tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
          1 - self._pct_rand)
      if do_scale:
        randscale = tf.random.uniform([],
                                      minval=10,
                                      maxval=21,
                                      seed=self._seed,
                                      dtype=tf.int32)
    width = randscale * self._net_down_scale
    image = tf.image.resize(image, (width, width))

    best_anchors = preprocessing_ops.get_best_anchor_batch(
        label['bbox'], self._anchors, width=self._image_w, height=self._image_h)
    label['best_anchors'] = pad_max_instances(
        best_anchors, self._max_num_instances, pad_axis=-2, pad_value=0)

    grid = self._build_grid(
        label, width, batch=True, use_tie_breaker=self._use_tie_breaker)
    label.update({'grid_form': grid})
    label['bbox'] = box_utils.xcycwh_to_yxyx(label['bbox'])
    return image, label
Exemple #13
0
def cut_out(image_full, boxes, classes, target_width, target_height,
            offset_width, offset_height):
    shape = tf.shape(image_full)

    if tf.shape(shape)[0] == 4:
        width = shape[1]
        height = shape[2]
    else:  # tf.shape(shape)[0] == 3:
        width = shape[0]
        height = shape[1]

    image_crop = tf.image.crop_to_bounding_box(image_full, offset_height,
                                               offset_width, target_height,
                                               target_width) + 1
    image_crop = tf.ones_like(image_crop)
    image_crop = tf.image.pad_to_bounding_box(image_crop, offset_height,
                                              offset_width, height, width)
    image_crop = 1 - image_crop

    x_lower_bound = offset_width / width
    y_lower_bound = offset_height / height

    x_upper_bound = (offset_width + target_width) / width
    y_upper_bound = (offset_height + target_height) / height

    boxes = box_ops.yxyx_to_xcycwh(boxes)

    x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1)

    x_mask_lower = x > x_lower_bound
    y_mask_lower = y > y_lower_bound
    x_mask_upper = x < x_upper_bound
    y_mask_upper = y < y_upper_bound

    x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper)
    y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper)
    mask = tf.math.logical_not(tf.math.logical_and(x_mask, y_mask))

    x = shift_zeros(x, mask)  # tf.boolean_mask(x, mask)
    y = shift_zeros(y, mask)  # tf.boolean_mask(y, mask)
    w = shift_zeros(w, mask)  # tf.boolean_mask(w, mask)
    h = shift_zeros(h, mask)  # tf.boolean_mask(h, mask)
    classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask)
    classes = tf.squeeze(classes, axis=-1)

    boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype)
    boxes = box_ops.xcycwh_to_yxyx(boxes)

    image_full *= image_crop
    return image_full, boxes, classes
Exemple #14
0
def cutmix_batch(image, boxes, classes, target_width, target_height,
                 offset_width, offset_height):
    with tf.name_scope('cutmix_batch'):

        image_, boxes_, classes_ = cut_out(image, boxes, classes, target_width,
                                           target_height, offset_width,
                                           offset_height)
        image__, boxes__, classes__ = crop_filter_to_bbox(image,
                                                          boxes,
                                                          classes,
                                                          target_width,
                                                          target_height,
                                                          offset_width,
                                                          offset_height,
                                                          fix=True)

        mix = tf.random.uniform([], minval=0, maxval=1)
        if mix > 0.5:
            i_split1, i_split2 = tf.split(image__, 2, axis=0)
            b_split1, b_split2 = tf.split(boxes__, 2, axis=0)
            c_split1, c_split2 = tf.split(classes__, 2, axis=0)

            image__ = tf.concat([i_split2, i_split1], axis=0)
            boxes__ = tf.concat([b_split2, b_split1], axis=0)
            classes__ = tf.concat([c_split2, c_split1], axis=0)

        image = image_ + image__
        boxes = tf.concat([boxes_, boxes__], axis=-2)
        classes = tf.concat([classes_, classes__], axis=-1)

        boxes = box_ops.yxyx_to_xcycwh(boxes)
        x, y, w, h = tf.split(boxes, 4, axis=-1)

        mask = x > 0
        x = shift_zeros(x, mask)  # tf.boolean_mask(x, mask)
        y = shift_zeros(y, mask)  # tf.boolean_mask(y, mask)
        w = shift_zeros(w, mask)  # tf.boolean_mask(w, mask)
        h = shift_zeros(h, mask)  # tf.boolean_mask(h, mask)
        classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask)
        classes = tf.squeeze(classes, axis=-1)

        boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype)
        boxes = box_ops.xcycwh_to_yxyx(boxes)

        x = tf.squeeze(x, axis=-1)
        classes = tf.where(x == 0, -1, classes)

        num_detections = tf.reduce_sum(tf.cast(x > 0, tf.int32), axis=-1)

    return image, boxes, classes, num_detections
Exemple #15
0
def fit_preserve_aspect_ratio(image,
                              boxes,
                              width=None,
                              height=None,
                              target_dim=None):
    if width is None or height is None:
        shape = tf.shape(data['image'])
        if tf.shape(shape)[0] == 4:
            width = shape[1]
            height = shape[2]
        else:
            width = shape[0]
            height = shape[1]

    clipper = tf.math.maximum(width, height)
    if target_dim is None:
        target_dim = clipper

    pad_width = clipper - width
    pad_height = clipper - height
    image = tf.image.pad_to_bounding_box(image, pad_width // 2,
                                         pad_height // 2, clipper, clipper)

    boxes = box_ops.yxyx_to_xcycwh(boxes)
    x, y, w, h = tf.split(boxes, 4, axis=-1)

    y *= tf.cast(width / clipper, tf.float32)
    x *= tf.cast(height / clipper, tf.float32)

    y += tf.cast((pad_width / clipper) / 2, tf.float32)
    x += tf.cast((pad_height / clipper) / 2, tf.float32)

    h *= tf.cast(width / clipper, tf.float32)
    w *= tf.cast(height / clipper, tf.float32)

    boxes = tf.concat([x, y, w, h], axis=-1)

    boxes = box_ops.xcycwh_to_yxyx(boxes)
    image = tf.image.resize(image, (target_dim, target_dim))
    return image, boxes
Exemple #16
0
  def parse_prediction_path(self, inputs, len_mask):
    shape = tf.shape(inputs)
    # reshape the yolo output to (batchsize, width, height, number_anchors, remaining_points)
    data = tf.reshape(inputs, [shape[0], shape[1], shape[2], len_mask, -1])

    # compute the true box output values
    boxes, objectness, classifics = tf.split(data, [4, 1, -1], axis=-1)
    #objectness = tf.squeeze(obns, axis=-1)
    box = box_utils.xcycwh_to_yxyx(boxes)

    mask = tf.cast(tf.ones_like(objectness), dtype=tf.bool)
    mask = tf.reduce_any(mask, axis=(0, -1))

    # reduce the dimentions of the predictions to (batch size, max predictions, -1)
    box = tf.boolean_mask(box, mask, axis=1)
    classifications = tf.boolean_mask(classifics, mask, axis=1)
    #objectness = tf.boolean_mask(objectness, mask, axis=1)
    objectness = tf.squeeze(tf.boolean_mask(objectness, mask, axis=1), axis=-1)

    objectness, box, classifications = nms_ops.sort_drop(
        objectness, box, classifications, self._max_boxes)
    return objectness, box, classifications
Exemple #17
0
            logging.info('Finished loading pretrained checkpoint from %s',
                         ckpt_dir_or_file)


if __name__ == '__main__':
    import matplotlib.pyplot as plt
    from yolo.utils.run_utils import prep_gpu
    prep_gpu()

    config = exp_cfg.YoloTask(model=exp_cfg.Yolo(base='v3'))
    task = YoloTask(config)
    model = task.build_model()
    model.summary()
    task.initialize(model)

    train_data = task.build_inputs(config.train_data)
    # test_data = task.build_inputs(config.task.validation_data)

    for l, (i, j) in enumerate(train_data):
        preds = model(i, training=False)
        boxes = xcycwh_to_yxyx(j['bbox'])

        i = tf.image.draw_bounding_boxes(i, boxes, [[1.0, 0.0, 0.0]])

        i = tf.image.draw_bounding_boxes(i, preds['bbox'], [[0.0, 1.0, 0.0]])
        plt.imshow(i[0].numpy())
        plt.show()

        if l > 2:
            break
Exemple #18
0
  def _parse_train_data(self, data):
    """Generates images and labels that are usable for model training.
        Args:
          data: a dict of Tensors produced by the decoder.
        Returns:
          images: the image tensor.
          labels: a dict of Tensors that contains labels.
        """

    image = data['image'] / 255

    # / 255
    boxes = data['groundtruth_boxes']
    classes = data['groundtruth_classes']

    do_blur = tf.random.uniform([],
                                minval=0,
                                maxval=1,
                                seed=self._seed,
                                dtype=tf.float32)
    if do_blur > 0.9:
      image = tfa.image.gaussian_filter2d(image, filter_shape=7, sigma=15)
    elif do_blur > 0.7:
      image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=6)
    elif do_blur > 0.4:
      image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=3)

    image = tf.image.rgb_to_hsv(image)
    i_h, i_s, i_v = tf.split(image, 3, axis=-1)
    if self._aug_rand_hue:
      delta = preprocessing_ops.rand_uniform_strong(
          -0.1, 0.1
      )  # tf.random.uniform([], minval= -0.1,maxval=0.1, seed=self._seed, dtype=tf.float32)
      i_h = i_h + delta  # Hue
      i_h = tf.clip_by_value(i_h, 0.0, 1.0)
    if self._aug_rand_saturation:
      delta = preprocessing_ops.rand_scale(
          0.75
      )  # tf.random.uniform([], minval= 0.5,maxval=1.1, seed=self._seed, dtype=tf.float32)
      i_s = i_s * delta
    if self._aug_rand_brightness:
      delta = preprocessing_ops.rand_scale(
          0.75
      )  # tf.random.uniform([], minval= -0.15,maxval=0.15, seed=self._seed, dtype=tf.float32)
      i_v = i_v * delta
    image = tf.concat([i_h, i_s, i_v], axis=-1)
    image = tf.image.hsv_to_rgb(image)

    stddev = tf.random.uniform([],
                               minval=0,
                               maxval=40 / 255,
                               seed=self._seed,
                               dtype=tf.float32)
    noise = tf.random.normal(
        shape=tf.shape(image), mean=0.0, stddev=stddev, seed=self._seed)
    noise = tf.math.minimum(noise, 0.5)
    noise = tf.math.maximum(noise, 0)
    image += noise
    image = tf.clip_by_value(image, 0.0, 1.0)

    image_shape = tf.shape(image)[:2]

    if self._random_flip:
      image, boxes, _ = preprocess_ops.random_horizontal_flip(
          image, boxes, seed=self._seed)

    if self._jitter_boxes != 0.0:
      boxes = box_ops.denormalize_boxes(boxes, image_shape)
      boxes = box_ops.jitter_boxes(boxes, 0.025)
      boxes = box_ops.normalize_boxes(boxes, image_shape)

    if self._jitter_im != 0.0:
      image, boxes, classes = preprocessing_ops.random_jitter(
          image, boxes, classes, self._jitter_im, seed=self._seed)
      # image, boxes, classes = preprocessing_ops.random_translate(image, boxes, classes, 0.2, seed=self._seed)

    if self._aug_rand_zoom:
      image, boxes, classes = preprocessing_ops.random_zoom_crop(
          image, boxes, classes, self._jitter_im)

    shape = tf.shape(image)
    width = shape[1]
    height = shape[0]
    randscale = self._image_w // self._net_down_scale

    if self._fixed_size:
      do_scale = tf.greater(
          tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
          1 - self._pct_rand)
      if do_scale:
        randscale = tf.random.uniform([],
                                      minval=10,
                                      maxval=15,
                                      seed=self._seed,
                                      dtype=tf.int32)

    if self._letter_box:
      image, boxes = preprocessing_ops.fit_preserve_aspect_ratio(
          image,
          boxes,
          width=width,
          height=height,
          target_dim=randscale * self._net_down_scale)
      width = randscale * self._net_down_scale
      height = randscale * self._net_down_scale

    shape = tf.shape(image)
    width = shape[1]
    height = shape[0]
    image, boxes, classes = preprocessing_ops.resize_crop_filter(
        image,
        boxes,
        classes,
        default_width=width,  # randscale * self._net_down_scale,
        default_height=height,  # randscale * self._net_down_scale,
        target_width=self._image_w,
        target_height=self._image_h,
        randomize=False)

    boxes = box_utils.yxyx_to_xcycwh(boxes)
    image = tf.clip_by_value(image, 0.0, 1.0)
    num_dets = tf.shape(classes)[0]

    # padding
    classes = preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                       self._max_num_instances,
                                                       -1)

    if self._fixed_size and not self._cutmix:
      best_anchors = preprocessing_ops.get_best_anchor(
          boxes, self._anchors, width=self._image_w, height=self._image_h)
      best_anchors = preprocess_ops.clip_or_pad_to_fixed_size(
          best_anchors, self._max_num_instances, 0)
      boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                       self._max_num_instances,
                                                       0)
      labels = {
          'source_id': data['source_id'],
          'bbox': tf.cast(boxes, self._dtype),
          'classes': tf.cast(classes, self._dtype),
          'best_anchors': tf.cast(best_anchors, self._dtype),
          'width': width,
          'height': height,
          'num_detections': num_dets
      }
      grid = self._build_grid(
          labels, self._image_w, use_tie_breaker=self._use_tie_breaker)
      labels.update({'grid_form': grid})
      labels['bbox'] = box_utils.xcycwh_to_yxyx(labels['bbox'])
    else:
      boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                       self._max_num_instances,
                                                       0)
      labels = {
          'source_id': data['source_id'],
          'bbox': tf.cast(boxes, self._dtype),
          'classes': tf.cast(classes, self._dtype),
          'width': width,
          'height': height,
          'num_detections': num_dets
      }
    return image, labels
Exemple #19
0
            max_level=params.parser.max_level,
            min_process_size=params.parser.min_process_size,
            max_process_size=params.parser.max_process_size,
            max_num_instances=params.parser.max_num_instances,
            random_flip=params.parser.random_flip,
            pct_rand=params.parser.pct_rand,
            seed=params.parser.seed,
            anchors=anchors)

        reader = input_reader.InputReader(params,
                                          dataset_fn=tf.data.TFRecordDataset,
                                          decoder_fn=decoder.decode,
                                          parser_fn=parser.parse_fn(
                                              params.is_training))
        dataset = reader.read(input_context=None)
    return dataset


if __name__ == '__main__':
    dataset, dsp = test_yolo_input_task()

    for l, (i, j) in enumerate(dataset):

        boxes = box_ops.xcycwh_to_yxyx(j['bbox'])
        i = tf.image.draw_bounding_boxes(i, boxes, [[1.0, 0.0, 1.0]])
        plt.imshow(i[0].numpy())
        plt.show()

        if l > 30:
            break