Beispiel #1
0
def patch(img, gt_boxes):
    """Generating random patch and adjusting image and ground truth objects to this patch.
    After this operation some of the ground truth boxes / objects could be removed from the image.
    However, these objects are not excluded from the output, only the coordinates are changed as zero.
    inputs:
        img = (height, width, depth)
        gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
            in normalized form [0, 1]
    outputs:
        modified_img = (final_height, final_width, depth)
        modified_gt_boxes = (ground_truth_object_count, [y1, x1, y2, x2])
            in normalized form [0, 1]
    """
    img_shape = tf.shape(img)
    height, width = tf.cast(img_shape[0],
                            tf.float32), tf.cast(img_shape[1], tf.float32)
    # Denormalizing bounding boxes for further operations
    denormalized_gt_boxes = bbox_utils.denormalize_bboxes(
        gt_boxes, height, width)
    # Randomly expand image and adjust bounding boxes
    img, denormalized_gt_boxes, height, width = randomly_apply_operation(
        expand_image, img, denormalized_gt_boxes, height, width)
    # Generate random patches
    patches = generate_random_patches(height, width)
    # Calculate jaccard/iou value for each bounding box
    iou_map = bbox_utils.generate_iou_map(patches,
                                          denormalized_gt_boxes,
                                          transpose_perm=[1, 0])
    # Check each ground truth box center in the generated patch and return a boolean condition list
    center_in_patch_condition = get_center_in_patch_condition(
        patches, denormalized_gt_boxes)
    # Get random minimum overlap value
    min_overlap = get_random_min_overlap()
    # Check and merge center condition and minimum intersection condition
    valid_patch_condition = tf.logical_and(center_in_patch_condition,
                                           tf.greater(iou_map, min_overlap))
    # Check at least one valid patch then apply patch
    img, denormalized_gt_boxes, height, width = tf.cond(
        tf.reduce_any(valid_patch_condition), lambda: select_and_apply_patch(
            img, denormalized_gt_boxes, patches, valid_patch_condition,
            center_in_patch_condition), lambda:
        (img, denormalized_gt_boxes, height, width))
    # Finally normalized ground truth boxes
    gt_boxes = bbox_utils.normalize_bboxes(denormalized_gt_boxes, height,
                                           width)
    gt_boxes = tf.clip_by_value(gt_boxes, 0, 1)
    #
    return img, gt_boxes
Beispiel #2
0
def calculate_actual_outputs(prior_boxes, gt_boxes, gt_labels, hyper_params):
    """Calculate ssd actual output values.
    Batch operations supported.
    inputs:
        prior_boxes = (total_prior_boxes, [y1, x1, y2, x2])
            these values in normalized format between [0, 1]
        gt_boxes (batch_size, gt_box_size, [y1, x1, y2, x2])
            these values in normalized format between [0, 1]
        gt_labels (batch_size, gt_box_size)
        hyper_params = dictionary

    outputs:
        bbox_deltas = (batch_size, total_bboxes, [delta_y, delta_x, delta_h, delta_w])
        bbox_labels = (batch_size, total_bboxes, [0,0,...,0])
    """
    batch_size = tf.shape(gt_boxes)[0]
    total_labels = hyper_params["total_labels"]
    iou_threshold = hyper_params["iou_threshold"]
    variances = hyper_params["variances"]
    total_prior_boxes = prior_boxes.shape[0]
    # Calculate iou values between each bboxes and ground truth boxes
    iou_map = bbox_utils.generate_iou_map(prior_boxes, gt_boxes)
    # Get max index value for each row
    max_indices_each_gt_box = tf.argmax(iou_map, axis=2, output_type=tf.int32)
    # IoU map has iou values for every gt boxes and we merge these values column wise
    merged_iou_map = tf.reduce_max(iou_map, axis=2)
    #
    pos_cond = tf.greater(merged_iou_map, iou_threshold)
    #
    gt_boxes_map = tf.gather(gt_boxes, max_indices_each_gt_box, batch_dims=1)
    expanded_gt_boxes = tf.where(tf.expand_dims(pos_cond, -1), gt_boxes_map,
                                 tf.zeros_like(gt_boxes_map))
    bbox_deltas = bbox_utils.get_deltas_from_bboxes(
        prior_boxes, expanded_gt_boxes) / variances
    #
    gt_labels_map = tf.gather(gt_labels, max_indices_each_gt_box, batch_dims=1)
    expanded_gt_labels = tf.where(pos_cond, gt_labels_map,
                                  tf.zeros_like(gt_labels_map))
    bbox_labels = tf.one_hot(expanded_gt_labels, total_labels)
    #
    return bbox_deltas, bbox_labels
 def call(self, inputs):
     roi_bboxes = inputs[0]
     gt_boxes = inputs[1]
     gt_labels = inputs[2]
     total_labels = self.hyper_params["total_labels"]
     total_pos_bboxes = self.hyper_params["total_pos_bboxes"]
     total_neg_bboxes = self.hyper_params["total_neg_bboxes"]
     variances = self.hyper_params["variances"]
     batch_size, total_bboxes = tf.shape(roi_bboxes)[0], tf.shape(roi_bboxes)[1]
     # Calculate iou values between each bboxes and ground truth boxes
     iou_map = bbox_utils.generate_iou_map(roi_bboxes, gt_boxes)
     # Get max index value for each row
     max_indices_each_gt_box = tf.argmax(iou_map, axis=2, output_type=tf.int32)
     # IoU map has iou values for every gt boxes and we merge these values column wise
     merged_iou_map = tf.reduce_max(iou_map, axis=2)
     #
     pos_mask = tf.greater(merged_iou_map, 0.5)
     pos_mask = train_utils.randomly_select_xyz_mask(pos_mask, tf.constant([total_pos_bboxes], dtype=tf.int32))
     #
     neg_mask = tf.logical_and(tf.less(merged_iou_map, 0.5), tf.greater(merged_iou_map, 0.1))
     neg_mask = train_utils.randomly_select_xyz_mask(neg_mask, tf.constant([total_neg_bboxes], dtype=tf.int32))
     #
     gt_boxes_map = tf.gather(gt_boxes, max_indices_each_gt_box, batch_dims=1)
     expanded_gt_boxes = tf.where(tf.expand_dims(pos_mask, axis=-1), gt_boxes_map, tf.zeros_like(gt_boxes_map))
     #
     gt_labels_map = tf.gather(gt_labels, max_indices_each_gt_box, batch_dims=1)
     pos_gt_labels = tf.where(pos_mask, gt_labels_map, tf.constant(-1, dtype=tf.int32))
     neg_gt_labels = tf.cast(neg_mask, dtype=tf.int32)
     expanded_gt_labels = pos_gt_labels + neg_gt_labels
     #
     roi_bbox_deltas = bbox_utils.get_deltas_from_bboxes(roi_bboxes, expanded_gt_boxes) / variances
     #
     roi_bbox_labels = tf.one_hot(expanded_gt_labels, total_labels)
     scatter_indices = tf.tile(tf.expand_dims(roi_bbox_labels, -1), (1, 1, 1, 4))
     roi_bbox_deltas = scatter_indices * tf.expand_dims(roi_bbox_deltas, -2)
     roi_bbox_deltas = tf.reshape(roi_bbox_deltas, (batch_size, total_bboxes * total_labels, 4))
     #
     return tf.stop_gradient(roi_bbox_deltas), tf.stop_gradient(roi_bbox_labels)
def update_stats(pred_bboxes, pred_labels, pred_scores, gt_boxes, gt_labels,
                 stats):
    iou_map = bbox_utils.generate_iou_map(pred_bboxes, gt_boxes)
    merged_iou_map = tf.reduce_max(iou_map, axis=-1)
    max_indices_each_gt = tf.argmax(iou_map, axis=-1, output_type=tf.int32)
    sorted_ids = tf.argsort(merged_iou_map, direction="DESCENDING")
    #
    count_holder = tf.unique_with_counts(tf.reshape(gt_labels, (-1, )))
    for i, gt_label in enumerate(count_holder[0]):
        if gt_label == -1:
            continue
        gt_label = int(gt_label)
        stats[gt_label]["total"] += int(count_holder[2][i])
    for batch_id, m in enumerate(merged_iou_map):
        true_labels = []
        for i, sorted_id in enumerate(sorted_ids[batch_id]):
            pred_label = pred_labels[batch_id, sorted_id]
            if pred_label == 0:
                continue
            #
            iou = merged_iou_map[batch_id, sorted_id]
            gt_id = max_indices_each_gt[batch_id, sorted_id]
            gt_label = int(gt_labels[batch_id, gt_id])
            pred_label = int(pred_label)
            score = pred_scores[batch_id, sorted_id]
            stats[pred_label]["scores"].append(score)
            stats[pred_label]["tp"].append(0)
            stats[pred_label]["fp"].append(0)
            if iou >= 0.5 and pred_label == gt_label and gt_id not in true_labels:
                stats[pred_label]["tp"][-1] = 1
                true_labels.append(gt_id)
            else:
                stats[pred_label]["fp"][-1] = 1
            #
        #
    #
    return stats
Beispiel #5
0
def calculate_rpn_actual_outputs(anchors, gt_boxes, gt_labels, hyper_params):
    """Generating one step data for training or inference.
    Batch operations supported.
    inputs:
        anchors = (total_anchors, [y1, x1, y2, x2])
            these values in normalized format between [0, 1]
        gt_boxes (batch_size, gt_box_size, [y1, x1, y2, x2])
            these values in normalized format between [0, 1]
        gt_labels (batch_size, gt_box_size)
        hyper_params = dictionary

    outputs:
        bbox_deltas = (batch_size, total_anchors, [delta_y, delta_x, delta_h, delta_w])
        bbox_labels = (batch_size, feature_map_shape, feature_map_shape, anchor_count)
    """
    batch_size = tf.shape(gt_boxes)[0]
    feature_map_shape = hyper_params["feature_map_shape"]
    anchor_count = hyper_params["anchor_count"]
    total_pos_bboxes = hyper_params["total_pos_bboxes"]
    total_neg_bboxes = hyper_params["total_neg_bboxes"]
    variances = hyper_params["variances"]
    # Calculate iou values between each bboxes and ground truth boxes
    iou_map = bbox_utils.generate_iou_map(anchors, gt_boxes)
    # Get max index value for each row
    max_indices_each_row = tf.argmax(iou_map, axis=2, output_type=tf.int32)
    # Get max index value for each column
    max_indices_each_column = tf.argmax(iou_map, axis=1, output_type=tf.int32)
    # IoU map has iou values for every gt boxes and we merge these values column wise
    merged_iou_map = tf.reduce_max(iou_map, axis=2)
    #
    pos_mask = tf.greater(merged_iou_map, 0.7)
    #
    valid_indices_cond = tf.not_equal(gt_labels, -1)
    valid_indices = tf.cast(tf.where(valid_indices_cond), tf.int32)
    valid_max_indices = max_indices_each_column[valid_indices_cond]
    #
    scatter_bbox_indices = tf.stack([valid_indices[..., 0], valid_max_indices],
                                    1)
    max_pos_mask = tf.scatter_nd(scatter_bbox_indices,
                                 tf.fill((tf.shape(valid_indices)[0], ), True),
                                 tf.shape(pos_mask))
    pos_mask = tf.logical_or(pos_mask, max_pos_mask)
    pos_mask = randomly_select_xyz_mask(
        pos_mask, tf.constant([total_pos_bboxes], dtype=tf.int32))
    #
    pos_count = tf.reduce_sum(tf.cast(pos_mask, tf.int32), axis=-1)
    neg_count = (total_pos_bboxes + total_neg_bboxes) - pos_count
    #
    neg_mask = tf.logical_and(tf.less(merged_iou_map, 0.3),
                              tf.logical_not(pos_mask))
    neg_mask = randomly_select_xyz_mask(neg_mask, neg_count)
    #
    pos_labels = tf.where(pos_mask, tf.ones_like(pos_mask, dtype=tf.float32),
                          tf.constant(-1.0, dtype=tf.float32))
    neg_labels = tf.cast(neg_mask, dtype=tf.float32)
    bbox_labels = tf.add(pos_labels, neg_labels)
    #
    gt_boxes_map = tf.gather(gt_boxes, max_indices_each_row, batch_dims=1)
    # Replace negative bboxes with zeros
    expanded_gt_boxes = tf.where(tf.expand_dims(pos_mask, -1), gt_boxes_map,
                                 tf.zeros_like(gt_boxes_map))
    # Calculate delta values between anchors and ground truth bboxes
    bbox_deltas = bbox_utils.get_deltas_from_bboxes(
        anchors, expanded_gt_boxes) / variances
    #
    # bbox_deltas = tf.reshape(bbox_deltas, (batch_size, feature_map_shape, feature_map_shape, anchor_count * 4))
    bbox_labels = tf.reshape(
        bbox_labels,
        (batch_size, feature_map_shape, feature_map_shape, anchor_count))
    #
    return bbox_deltas, bbox_labels