Example #1
0
 def call(self, inputs):
     roi_bboxes = inputs[0]
     gt_boxes = inputs[1]
     gt_labels = inputs[2]
     gt_box_indices = inputs[3]
     total_labels = self.hyper_params["total_labels"]
     total_pos_bboxes = self.hyper_params["total_pos_bboxes"]
     total_neg_bboxes = self.hyper_params["total_neg_bboxes"]
     total_bboxes = total_pos_bboxes + total_neg_bboxes
     batch_size = tf.shape(roi_bboxes)[0]
     #
     gt_boxes_map = helpers.get_gt_boxes_map(gt_boxes, gt_box_indices, batch_size, total_neg_bboxes)
     #
     pos_gt_labels_map = tf.gather(gt_labels, gt_box_indices, batch_dims=1)
     neg_gt_labels_map = tf.fill((batch_size, total_neg_bboxes), total_labels-1)
     gt_labels_map = tf.concat([pos_gt_labels_map, neg_gt_labels_map], axis=1)
     #
     roi_bbox_deltas = helpers.get_deltas_from_bboxes(roi_bboxes, gt_boxes_map)
     #
     flatted_batch_indices = helpers.get_tiled_indices(batch_size, total_bboxes)
     flatted_bbox_indices = tf.reshape(tf.tile(tf.range(total_bboxes), (batch_size, )), (-1, 1))
     flatted_gt_labels_indices = tf.reshape(gt_labels_map, (-1, 1))
     scatter_indices = helpers.get_scatter_indices_for_bboxes([flatted_batch_indices, flatted_bbox_indices, flatted_gt_labels_indices], batch_size, total_bboxes)
     roi_bbox_deltas = tf.scatter_nd(scatter_indices, roi_bbox_deltas, (batch_size, total_bboxes, total_labels, 4))
     roi_bbox_deltas = tf.reshape(roi_bbox_deltas, (batch_size, total_bboxes, total_labels * 4))
     roi_bbox_labels = tf.scatter_nd(scatter_indices, tf.ones((batch_size, total_bboxes), tf.int32), (batch_size, total_bboxes, total_labels))
     #
     return tf.stop_gradient(roi_bbox_deltas), tf.stop_gradient(roi_bbox_labels)
Example #2
0
 def call(self, inputs):
     roi_bboxes = inputs[0]
     gt_boxes = inputs[1]
     gt_labels = inputs[2]
     total_labels = self.hyper_params["total_labels"]
     total_pos_bboxes = self.hyper_params["total_pos_bboxes"]
     total_neg_bboxes = self.hyper_params["total_neg_bboxes"]
     variances = self.hyper_params["variances"]
     batch_size, total_bboxes = tf.shape(roi_bboxes)[0], tf.shape(
         roi_bboxes)[1]
     # Calculate iou values between each bboxes and ground truth boxes
     iou_map = helpers.generate_iou_map(roi_bboxes, gt_boxes)
     # Get max index value for each row
     max_indices_each_gt_box = tf.argmax(iou_map,
                                         axis=2,
                                         output_type=tf.int32)
     # IoU map has iou values for every gt boxes and we merge these values column wise
     merged_iou_map = tf.reduce_max(iou_map, axis=2)
     #
     pos_mask = tf.greater(merged_iou_map, 0.5)
     pos_mask = helpers.randomly_select_xyz_mask(
         pos_mask,
         tf.tile(tf.constant([total_pos_bboxes], dtype=tf.int32),
                 (batch_size, )))
     #
     neg_mask = tf.logical_and(tf.less(merged_iou_map, 0.5),
                               tf.greater(merged_iou_map, 0.1))
     neg_mask = helpers.randomly_select_xyz_mask(
         neg_mask,
         tf.tile(tf.constant([total_neg_bboxes], dtype=tf.int32),
                 (batch_size, )))
     #
     gt_boxes_map = tf.gather(gt_boxes,
                              max_indices_each_gt_box,
                              batch_dims=1)
     expanded_gt_boxes = tf.where(tf.expand_dims(pos_mask, axis=-1),
                                  gt_boxes_map, tf.zeros_like(gt_boxes_map))
     #
     gt_labels_map = tf.gather(gt_labels,
                               max_indices_each_gt_box,
                               batch_dims=1)
     pos_gt_labels = tf.where(pos_mask, gt_labels_map,
                              tf.constant(-1, dtype=tf.int32))
     neg_gt_labels = tf.cast(neg_mask, dtype=tf.int32)
     expanded_gt_labels = pos_gt_labels + neg_gt_labels
     #
     roi_bbox_deltas = helpers.get_deltas_from_bboxes(
         roi_bboxes, expanded_gt_boxes) / variances
     #
     roi_bbox_labels = tf.one_hot(expanded_gt_labels, total_labels)
     scatter_indices = tf.tile(tf.expand_dims(roi_bbox_labels, -1),
                               (1, 1, 1, 4))
     roi_bbox_deltas = scatter_indices * tf.expand_dims(roi_bbox_deltas, -2)
     roi_bbox_deltas = tf.reshape(
         roi_bbox_deltas, (batch_size, total_bboxes * total_labels, 4))
     #
     return tf.stop_gradient(roi_bbox_deltas), tf.stop_gradient(
         roi_bbox_labels)
Example #3
0
def get_step_data(image_data, hyper_params, input_processor, mode="training"):
    """Generating one step data for training or inference.
    Batch operations supported.
    inputs:
        image_data =
            img (batch_size, height, width, channels)
            gt_boxes (batch_size, gt_box_size, [y1, x1, y2, x2])
                these values in normalized format between [0, 1]
            gt_labels (batch_size, gt_box_size)
        hyper_params = dictionary
        input_processor = function for preparing image for input. It's getting from backbone.
        mode = "training" or "inference"

    outputs:
        input_img = (batch_size, height, width, channels)
            preprocessed image using input_processor
        bbox_deltas = (batch_size, output_height, output_width, anchor_count * [y1, x1, y2, x2])
            actual outputs for rpn, generating only training mode
        bbox_labels = (batch_size, output_height, output_width, anchor_count)
            actual outputs for rpn, generating only training mode
        anchors = (batch_size, output_height * output_width * anchor_count, [y1, x1, y2, x2])
    """
    img, gt_boxes, gt_labels = image_data
    batch_size = tf.shape(img)[0]
    input_img = input_processor(img)
    stride = hyper_params["stride"]
    anchor_count = hyper_params["anchor_count"]
    total_pos_bboxes = hyper_params["total_pos_bboxes"]
    total_neg_bboxes = hyper_params["total_neg_bboxes"]
    total_bboxes = total_pos_bboxes + total_neg_bboxes
    img_params = helpers.get_image_params(img, stride)
    height, width, output_height, output_width = img_params
    total_anchors = output_height * output_width * anchor_count
    anchors = generate_anchors(img_params, hyper_params)
    # We use same anchors for each batch so we multiplied anchors to the batch size
    anchors = tf.reshape(tf.tile(anchors, (batch_size, 1)),
                         (batch_size, total_anchors, 4))
    if mode != "training":
        return input_img, anchors
    ################################################################################################################
    pos_bbox_indices, neg_bbox_indices, gt_box_indices = helpers.get_selected_indices(
        anchors, gt_boxes, total_pos_bboxes, total_neg_bboxes)
    #
    gt_boxes_map = helpers.get_gt_boxes_map(gt_boxes, gt_box_indices,
                                            batch_size, total_neg_bboxes)
    #
    pos_labels_map = tf.ones((batch_size, total_pos_bboxes), tf.int32)
    neg_labels_map = tf.zeros((batch_size, total_neg_bboxes), tf.int32)
    gt_labels_map = tf.concat([pos_labels_map, neg_labels_map], axis=1)
    #
    bbox_indices = tf.concat([pos_bbox_indices, neg_bbox_indices], axis=1)
    #
    flatted_batch_indices = helpers.get_tiled_indices(batch_size, total_bboxes)
    flatted_bbox_indices = tf.reshape(bbox_indices, (-1, 1))
    scatter_indices = helpers.get_scatter_indices_for_bboxes(
        [flatted_batch_indices, flatted_bbox_indices], batch_size,
        total_bboxes)
    expanded_gt_boxes = tf.scatter_nd(scatter_indices, gt_boxes_map,
                                      tf.shape(anchors))
    #
    bbox_deltas = helpers.get_deltas_from_bboxes(anchors, expanded_gt_boxes)
    #
    bbox_labels = tf.negative(tf.ones((batch_size, total_anchors), tf.int32))
    bbox_labels = tf.tensor_scatter_nd_update(bbox_labels, scatter_indices,
                                              gt_labels_map)
    #
    bbox_deltas = tf.reshape(
        bbox_deltas,
        (batch_size, output_height, output_width, anchor_count * 4))
    bbox_labels = tf.reshape(
        bbox_labels, (batch_size, output_height, output_width, anchor_count))
    #
    return input_img, bbox_deltas, bbox_labels, anchors
Example #4
0
def get_step_data(image_data, anchors, hyper_params, input_processor):
    """Generating one step data for training or inference.
    Batch operations supported.
    inputs:
        image_data =
            img (batch_size, height, width, channels)
            gt_boxes (batch_size, gt_box_size, [y1, x1, y2, x2])
                these values in normalized format between [0, 1]
            gt_labels (batch_size, gt_box_size)
        anchors = (total_anchors, [y1, x1, y2, x2])
            these values in normalized format between [0, 1]
        hyper_params = dictionary
        input_processor = function for preparing image for input. It's getting from backbone.

    outputs:
        input_img = (batch_size, height, width, channels)
            preprocessed image using input_processor
        bbox_deltas = (batch_size, output_height, output_width, anchor_count * [delta_y, delta_x, delta_h, delta_w])
        bbox_labels = (batch_size, output_height, output_width, anchor_count)
    """
    img, gt_boxes, gt_labels = image_data
    batch_size, image_height, image_width = tf.shape(img)[0], tf.shape(
        img)[1], tf.shape(img)[2]
    input_img = input_processor(img)
    input_img = tf.image.convert_image_dtype(input_img, tf.float32)
    stride = hyper_params["stride"]
    anchor_count = hyper_params["anchor_count"]
    total_pos_bboxes = hyper_params["total_pos_bboxes"]
    total_neg_bboxes = hyper_params["total_neg_bboxes"]
    variances = hyper_params["variances"]
    output_height, output_width = image_height // stride, image_width // stride
    total_anchors = anchors.shape[0]
    # Calculate iou values between each bboxes and ground truth boxes
    iou_map = helpers.generate_iou_map(anchors, gt_boxes)
    # Get max index value for each row
    max_indices_each_row = tf.argmax(iou_map, axis=2, output_type=tf.int32)
    # Get max index value for each column
    max_indices_each_column = tf.argmax(iou_map, axis=1, output_type=tf.int32)
    # IoU map has iou values for every gt boxes and we merge these values column wise
    merged_iou_map = tf.reduce_max(iou_map, axis=2)
    #
    pos_mask = tf.greater(merged_iou_map, 0.7)
    #
    valid_indices_cond = tf.not_equal(gt_labels, -1)
    valid_indices = tf.cast(tf.where(valid_indices_cond), tf.int32)
    valid_max_indices = max_indices_each_column[valid_indices_cond]
    #
    scatter_bbox_indices = tf.stack([valid_indices[..., 0], valid_max_indices],
                                    1)
    max_pos_mask = tf.scatter_nd(scatter_bbox_indices,
                                 tf.fill((tf.shape(valid_indices)[0], ), True),
                                 tf.shape(pos_mask))
    pos_mask = tf.logical_or(pos_mask, max_pos_mask)
    pos_mask = helpers.randomly_select_xyz_mask(
        pos_mask,
        tf.tile(tf.constant([total_pos_bboxes], dtype=tf.int32),
                (batch_size, )))
    #
    pos_count = tf.reduce_sum(tf.cast(pos_mask, tf.int32), axis=-1)
    neg_count = (total_pos_bboxes + total_neg_bboxes) - pos_count
    #
    neg_mask = tf.logical_and(tf.less(merged_iou_map, 0.3),
                              tf.logical_not(pos_mask))
    neg_mask = helpers.randomly_select_xyz_mask(neg_mask, neg_count)
    #
    pos_labels = tf.where(pos_mask, tf.ones_like(pos_mask, dtype=tf.float32),
                          tf.constant(-1.0, dtype=tf.float32))
    neg_labels = tf.cast(neg_mask, dtype=tf.float32)
    bbox_labels = tf.add(pos_labels, neg_labels)
    #
    gt_boxes_map = tf.gather(gt_boxes, max_indices_each_row, batch_dims=1)
    # Replace negative bboxes with zeros
    expanded_gt_boxes = tf.where(tf.expand_dims(pos_mask, -1), gt_boxes_map,
                                 tf.zeros_like(gt_boxes_map))
    # Calculate delta values between anchors and ground truth bboxes
    bbox_deltas = helpers.get_deltas_from_bboxes(anchors,
                                                 expanded_gt_boxes) / variances
    #
    # bbox_deltas = tf.reshape(bbox_deltas, (batch_size, output_height, output_width, anchor_count * 4))
    bbox_labels = tf.reshape(
        bbox_labels, (batch_size, output_height, output_width, anchor_count))
    #
    return input_img, bbox_deltas, bbox_labels