예제 #1
0
def get_feature_map_spatial_dims(feature_maps):
    """Return list of spatial dimensions for each feature map in a list.

    Args:
      feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].

    Returns:
      a list of pairs (height, width) for each feature map in feature_maps
    """
    feature_map_shapes = [
        shape_utils.combined_static_and_dynamic_shape(feature_map)
        for feature_map in feature_maps
    ]
    return [(shape[1], shape[2]) for shape in feature_map_shapes]
예제 #2
0
def _compute_clip_window(preprocessed_images, true_image_shapes):
    resized_inputs_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_images)
    true_heights, true_widths, _ = np.split(true_image_shapes, 3, axis=1)
    padded_height = float(resized_inputs_shape[1])
    padded_width = float(resized_inputs_shape[2])

    cliped_image = np.stack([
        np.zeros_like(true_heights),
        np.zeros_like(true_widths), true_heights / padded_height,
        true_widths / padded_width
    ],
                            axis=1)

    cliped_imaged = cliped_image.reshape(1, -1)
    return cliped_imaged
예제 #3
0
def _batch_decode(anchors, box_encodings):
    """Decodes a batch of box encodings with respect to the anchors.

    Args:
      box_encodings: A float32 tensor of shape
        [batch_size, num_anchors, box_code_size] containing box encodings.

    Returns:
      decoded_boxes: A float32 tensor of shape
        [batch_size, num_anchors, 4] containing the decoded boxes.
      decoded_keypoints: A float32 tensor of shape
        [batch_size, num_anchors, num_keypoints, 2] containing the decoded
        keypoints if present in the input `box_encodings`, None otherwise.
    """
    combined_shape = shape_utils.combined_static_and_dynamic_shape(
        box_encodings)
    batch_size = combined_shape[0]
    tiled_anchor_boxes = np.tile(np.expand_dims(anchors.get(), 0),
                                 [batch_size, 1, 1])

    tiled_anchors_boxlist = box_list.BoxList(
        np.reshape(tiled_anchor_boxes, [-1, 4]))

    box_coder = box_coder_builder.build("faster_rcnn_box_coder")
    decoded_boxes = box_coder.decode(
        np.reshape(box_encodings, [-1, box_coder.code_size]),
        tiled_anchors_boxlist)

    decoded_keypoints = None
    if decoded_boxes.has_field(fields.BoxListFields.keypoints):
        decoded_keypoints = decoded_boxes.get_field(
            fields.BoxListFields.keypoints)
        num_keypoints = decoded_keypoints.get_shape()[1]
        decoded_keypoints = np.reshape(
            decoded_keypoints,
            np.stack([combined_shape[0], combined_shape[1], num_keypoints, 2]))
    decoded_boxes = np.reshape(
        decoded_boxes.get(),
        np.stack([combined_shape[0], combined_shape[1], 4]))
    return decoded_boxes, decoded_keypoints
예제 #4
0
def _batch_decode_boxes(box_encodings, anchor_boxes):
    print("================ _batch_decode_boxes ==================")
    combined_shape = shape_utils.combined_static_and_dynamic_shape(
        box_encodings)

    num_classes = combined_shape[2]
    # num_classes = 1
    # (1, 28728, 4)
    print("====== anchor_boxes:", anchor_boxes[0][0])
    anchor_boxes_exp_dim = np.expand_dims(anchor_boxes, 2)
    # anchor_boxes_exp_dim: (1, 28728, 1, 4)
    tiled_anchor_boxes = np.tile(anchor_boxes_exp_dim, [1, 1, num_classes, 1])
    # tiled_anchor_boxes (1, 28728, 1, 4)
    reshaped_anchors = np.reshape(tiled_anchor_boxes, [-1, 4])
    # reshaped: (28728, 4)
    print("====== reshaped_anchors:", reshaped_anchors[0])
    tiled_anchors_boxlist = box_list.BoxList(reshaped_anchors)

    _proposal_target_assigner = target_assigner.create_target_assigner(
        'FasterRCNN', 'proposal')

    _box_coder = _proposal_target_assigner.box_coder

    print("================ _box_coder.decode ==================")

    reshaped = np.reshape(box_encodings, [-1, _box_coder.code_size])
    print("reshaped:", reshaped.shape)

    decoded_boxes = _box_coder.decode(reshaped, tiled_anchors_boxlist)

    decoded_boxes_reahpe = np.reshape(
        decoded_boxes.get(),
        np.stack([combined_shape[0], combined_shape[1], num_classes, 4]))

    print("decoded_boxes_reahpe:", decoded_boxes_reahpe.shape)
    return decoded_boxes_reahpe
예제 #5
0
def last_predict_part(boxes_encodings,
                      classes_predictions_with_background,
                      feature_maps,
                      preprocessed_inputs=None):
    print("------------------ last_predict_part ------------------")
    """Predicts unpostprocessed tensors from input tensor.

    This function takes an input batch of images and runs it through the forward
    pass of the network to yield unpostprocessesed predictions.

    A side effect of calling the predict method is that self._anchors is
    populated with a box_list.BoxList of anchors.  These anchors must be
    constructed before the postprocess or loss functions can be called.

    Args:
      boxes_encodings:
      classes_predictions_with_background:
      feature_maps:

      preprocessed_inputs: a [batch, height, width, channels] image tensor.
      true_image_shapes: int32 tensor of shape [batch, 3] where each row is
        of the form [height, width, channels] indicating the shapes
        of true images in the resized images, as resized images can be padded
        with zeros.

    """
    anchor_generator = anchor_generator_builder.build()

    num_predictions_per_location_list = anchor_generator.num_anchors_per_location(
    )

    # print("num_predictions_per_location_list:", num_predictions_per_location_list)
    prediction_dict = post_processor(boxes_encodings,
                                     classes_predictions_with_background,
                                     feature_maps,
                                     num_predictions_per_location_list)

    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)
    feature_map_spatial_dims = get_feature_map_spatial_dims(feature_maps)

    anchors_list = anchor_generator.generate(feature_map_spatial_dims,
                                             im_height=image_shape[1],
                                             im_width=image_shape[2])

    anchors = box_list_ops.concatenate(anchors_list)

    box_encodings = np.concatenate(prediction_dict['box_encodings'], axis=1)
    if box_encodings.ndim == 4 and box_encodings.shape[2] == 1:
        box_encodings = np.squeeze(box_encodings, axis=2)

    class_predictions_with_background = np.concatenate(
        prediction_dict['class_predictions_with_background'], axis=1)
    predictions_dict = {
        'preprocessed_inputs': preprocessed_inputs,
        'box_encodings': box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
        'feature_maps': feature_maps,
        'anchors': anchors.get()
    }
    return predictions_dict, anchors
예제 #6
0
def _postprocess_rpn(rpn_box_encodings_batch,
                     rpn_objectness_predictions_with_background_batch, anchors,
                     image_shapes, first_stage_max_proposals):
    first_stage_nms_score_threshold = config.cfg.POSTPROCESSOR.FIRST_STAGE_NMS_SCORE_THRESHOLD
    first_stage_nms_iou_threshold = config.cfg.POSTPROCESSOR.FIRST_STAGE_NMS_IOU_THRESHOLD

    rpn_box_encodings_batch = np.expand_dims(rpn_box_encodings_batch, axis=2)

    rpn_encodings_shape = shape_utils.combined_static_and_dynamic_shape(
        rpn_box_encodings_batch)

    # print("=== anchors:", anchors[0])

    tiled_anchor_boxes = np.tile(np.expand_dims(anchors, 0),
                                 [rpn_encodings_shape[0], 1, 1])
    # print("=== tiled_anchor_boxes:", tiled_anchor_boxes[0][0])

    proposal_boxes = _batch_decode_boxes(rpn_box_encodings_batch,
                                         tiled_anchor_boxes)

    proposal_boxes = np.squeeze(proposal_boxes, axis=2)
    # (1, 28728, 4)
    # 0 [11.60262919  3.12900102 41.31160688 18.96688846]

    # rpn_objectness_predictions_with_background_batch (1, 28728, 2)
    # rpn_objectness_predictions_with_background_batch[:, :, 1][0][0] -2.7135613
    rpn_objectness_softmax_without_background = ops.softmax(
        rpn_objectness_predictions_with_background_batch)[:, :, 1]
    # rpn_objectness_softmax_without_background: (1, 28728)
    # ====== softmax score : 0.0032150035
    print("====== softmax score :",
          rpn_objectness_softmax_without_background[0][0])
    clip_window = _compute_clip_window(image_shapes)
    # [[   0    0  600 1002]]

    print("clip_window:", clip_window)
    (proposal_boxes, proposal_scores, _, _, _,
     num_proposals) = post_processing.batch_multiclass_non_max_suppression(
         np.expand_dims(proposal_boxes, axis=2),
         np.expand_dims(rpn_objectness_softmax_without_background, axis=2),
         first_stage_nms_score_threshold,
         first_stage_nms_iou_threshold,
         first_stage_max_proposals,
         first_stage_max_proposals,
         clip_window=clip_window)

    print("proposal_boxes:", proposal_boxes.shape)
    print("proposal_boxes [0][0]:", proposal_boxes[0][0])

    # import h5py
    # with h5py.File('tf_proposal.h5', 'w') as f:
    #     f["tf_proposal"] = proposal_boxes[0]

    print("proposal_scores:", proposal_scores.shape)
    print("proposal_scores [0][0]:", proposal_scores[0][0])

    # proposal_boxes [0][0]: [  6.95569825 402.90691757 398.87478089 947.73357773]
    # proposal_scores: (1, 100)
    # proposal_scores [0][0]: 0.9992391
    # caffe 'proposals final:', array([237.24371 ,  18.908209, 561.04926 , 175.2929  ], dtype=float32))

    # normalize proposal boxes

    def normalize_boxes(args):
        proposal_boxes_per_image = args[0][0]
        image_shape = args[1][0]

        normalized_boxes_per_image = box_list_ops.to_normalized_coordinates(
            box_list.BoxList(proposal_boxes_per_image), image_shape[0],
            image_shape[1]).get()

        return normalized_boxes_per_image

    normalized_proposal_boxes = shape_utils.static_or_dynamic_map_fn(
        normalize_boxes, elems=[proposal_boxes, image_shapes])

    return normalized_proposal_boxes, proposal_scores, num_proposals
예제 #7
0
def _flatten_first_two_dimensions(inputs):
    combined_shape = shape_utils.combined_static_and_dynamic_shape(inputs)
    flattened_shape = np.stack([combined_shape[0] * combined_shape[1]] +
                               combined_shape[2:])
    return np.reshape(inputs, flattened_shape)
예제 #8
0
def second_stage_box_predictor(preprocessed_inputs, box_encoding_reshape,
                               class_prediction_reshape, rpn_features_to_crop,
                               rpn_box_encodings,
                               rpn_objectness_predictions_with_background,
                               true_image_shapes, rpn_box_predictor_features):
    image_shape = shape_utils.combined_static_and_dynamic_shape(
        preprocessed_inputs)

    first_stage_anchor_generator = anchor_generator_builder.build()

    clip_window = np.stack([0, 0, image_shape[1], image_shape[2]])
    feature_map_shape = rpn_features_to_crop.shape

    anchors_boxlist = box_list_ops.concatenate(
        first_stage_anchor_generator.generate([(feature_map_shape[1],
                                                feature_map_shape[2])]))
    anchors_boxlist = box_list_ops.clip_to_window(anchors_boxlist, clip_window)
    _anchors = anchors_boxlist

    image_shape_2d = _image_batch_shape_2d(image_shape)

    num_anchors_per_location = (
        first_stage_anchor_generator.num_anchors_per_location())

    if len(num_anchors_per_location) != 1:
        raise RuntimeError('anchor_generator is expected to generate anchors '
                           'corresponding to a single feature map.')
    box_predictions = _first_stage_box_predictor_predict(
        [rpn_box_predictor_features], [rpn_box_encodings],
        [rpn_objectness_predictions_with_background], num_anchors_per_location)

    predictions_box_encodings = np.concatenate(box_predictions[BOX_ENCODINGS],
                                               axis=1)

    rpn_box_encodings = np.squeeze(predictions_box_encodings, axis=2)

    rpn_objectness_predictions_with_background = np.concatenate(
        box_predictions[CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)

    first_stage_max_proposals = config.cfg.POSTPROCESSOR.FIRST_STAGE_MAX_PROPOSALS

    proposal_boxes_normalized, _, num_proposals = _postprocess_rpn(
        rpn_box_encodings,
        rpn_objectness_predictions_with_background,
        _anchors.get(),
        image_shape_2d,
        first_stage_max_proposals=first_stage_max_proposals)

    print("proposal_boxes_normalized:", proposal_boxes_normalized.shape)

    prediction_dict = {
        'rpn_box_predictor_features':
        rpn_box_predictor_features,
        'rpn_features_to_crop':
        rpn_features_to_crop,
        'image_shape':
        image_shape,
        'rpn_box_encodings':
        rpn_box_encodings,
        'rpn_objectness_predictions_with_background':
        rpn_objectness_predictions_with_background,
    }
    print("=========== box_encoding_reshape", box_encoding_reshape.shape)
    refined_box_encodings = np.squeeze(box_encoding_reshape, axis=1)
    print("=========== class_prediction_reshape",
          class_prediction_reshape.shape)
    class_predictions_with_background = np.squeeze(class_prediction_reshape,
                                                   axis=1)

    _parallel_iterations = 16

    proposal_boxes_normalized = proposal_boxes_normalized[0]

    absolute_proposal_boxes = ops.normalized_to_image_coordinates(
        proposal_boxes_normalized, image_shape, _parallel_iterations)

    prediction_dict1 = {
        'refined_box_encodings': refined_box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
        'num_proposals': num_proposals,
        'proposal_boxes': absolute_proposal_boxes,
    }

    prediction_dict.update(prediction_dict1)
    result_output = second_postprocess(prediction_dict, true_image_shapes)

    return result_output