Beispiel #1
 def __init__(self, batch_size, **kwargs):
     super(DetectionLayer, self).__init__(**kwargs)
     self.batch_size = batch_size
     self.detection_max_instances = cfg.TEST.DETECTION_MAX_INSTANCES
     self.image_utils = ImageUtils()
     self.bbox_utils = BboxUtil()
     self.misc_utils = MiscUtils()
Beispiel #2
    def __init__(self, batch_size, **kwargs):
        super(DetectionTargetLayer, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.misc_utils = MiscUtils()

        self.rois_per_image = cfg.TRAIN.ROIS_PER_IMAGE
        self.mask_shape = cfg.TRAIN.MASK_SHAPE
    def __init__(self):
        self.misc_utils = MiscUtils()
        self.bbox_utils = BboxUtil()

        # Cache anchors and reuse if image shape is the same
        self._anchor_cache = {}
        # self.anchors = None
Beispiel #4
    def __init__(self, proposal_count, nms_threshold, batch_size, **kwargs):
        super(ProposalLayer, self).__init__(**kwargs)

        self.proposal_count = proposal_count
        self.nms_threshold = nms_threshold
        self.batch_size = batch_size

        self.misc_utils = MiscUtils()
        self.bbox_utils = BboxUtil()

Beispiel #5
class DetectionLayer(KE.Layer):
        Takes classified proposal boxes and their bounding box deltas and
        returns the final detection boxes.
            [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where
            coordinates are normalized.
    def __init__(self, batch_size, **kwargs):
        super(DetectionLayer, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.detection_max_instances = cfg.TEST.DETECTION_MAX_INSTANCES
        self.image_utils = ImageUtils()
        self.bbox_utils = BboxUtil()
        self.misc_utils = MiscUtils()

    def call(self, inputs):
        rois = inputs[0]
        mrcnn_class = inputs[1]
        mrcnn_bbox = inputs[2]
        image_meta = inputs[3]

        # Get windows of images in normalized coordinates. Windows are the area
        # in the image that excludes the padding.
        # Use the shape of the first image in the batch to normalize the window
        # because we know that all images get resized to the same size.
        m = self.image_utils.parse_image_meta_graph(image_meta)
        image_shape = m['image_shape'][0]
        window = self.bbox_utils.norm_boxes_graph(m['window'], image_shape[:2])

        # Run detection refinement graph on each item in the batch
        detections_batch = self.misc_utils.batch_slice(
            [rois, mrcnn_class, mrcnn_bbox, window],
            lambda x, y, w, z: refine_detections_graph(x, y, w, z),

        # Reshape output
        # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in
        # normalized coordinates
        return tf.reshape(detections_batch,
                          [self.batch_size, self.detection_max_instances, 6])

    def compute_output_shape(self, input_shape):
        return (None, self.detection_max_instances, 6)
Beispiel #6
class DetectionTargetLayer(KE.Layer):
        Subsamples proposals and generates target box refinement, class_ids, and masks for each.
            proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might
               be zero padded if there are not enough proposals.
            gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs.
            gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized
            gt_masks: [batch, height, width, MAX_GT_INSTANCES] of boolean type

        Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks.
            rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized
            target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs.
            target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]
            target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]
                         Masks cropped to bbox boundaries and resized to neural
                         network output size.
        Note: Returned arrays might be zero padded if not enough target ROIs.
    def __init__(self, batch_size, **kwargs):
        super(DetectionTargetLayer, self).__init__(**kwargs)
        self.batch_size = batch_size
        self.misc_utils = MiscUtils()

        self.rois_per_image = cfg.TRAIN.ROIS_PER_IMAGE
        self.mask_shape = cfg.TRAIN.MASK_SHAPE

    def call(self, inputs):
            这里的 call 方法,会被 __init__() 方法回调
        :param inputs: 参数如下所示
        proposals = inputs[0]
        gt_class_ids = inputs[1]
        gt_boxes = inputs[2]
        gt_masks = inputs[3]

        # Slice the batch and run a graph for each slice
        # TODO: Rename target_bbox to target_deltas for clarity
        names = ["rois", "target_class_ids", "target_bbox", "target_mask"]
        outputs = self.misc_utils.batch_slice(
            [proposals, gt_class_ids, gt_boxes, gt_masks],
            lambda w, x, y, z: self.misc_utils.detection_targets_graph(
                w, x, y, z),
        return outputs

    def compute_output_shape(self, input_shape):
        return [
            (None, self.rois_per_image, 4),  # rois
            (None, self.rois_per_image),  # class_ids
            (None, self.rois_per_image, 4),  # deltas
            (None, self.rois_per_image, self.mask_shape[0], self.mask_shape[1]
             )  # masks

    def compute_mask(self, inputs, mask=None):
        return [None, None, None, None]
Beispiel #7
class ProposalLayer(KE.Layer):
        Receives anchor scores and selects a subset to pass as proposals
        to the second stage. Filtering is done based on anchor scores and
        non-max suppression to remove overlaps. It also applies bounding
        box refinement deltas to anchors.

            rpn_probs: [batch, num_anchors, (bg prob, fg prob)]
            rpn_bbox: [batch, num_anchors, (dy, dx, log(dh), log(dw))]
            anchors: [batch, num_anchors, (y1, x1, y2, x2)] anchors in normalized coordinates

            Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
    def __init__(self, proposal_count, nms_threshold, batch_size, **kwargs):
        super(ProposalLayer, self).__init__(**kwargs)

        self.proposal_count = proposal_count
        self.nms_threshold = nms_threshold
        self.batch_size = batch_size

        self.misc_utils = MiscUtils()
        self.bbox_utils = BboxUtil()


    def call(self, inputs):
            这里的 call 方法,会被 __init__() 方法回调
        :param inputs:
        # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1]
        scores = inputs[0][:, :, 1]
        # Box deltas [batch, num_rois, 4]
        deltas = inputs[1]
        rpn_bbox_std_dev = np.array(cfg.COMMON.RPN_BBOX_STD_DEV)
        deltas = deltas * np.reshape(rpn_bbox_std_dev, [1, 1, 4])
        # Anchors
        anchors = inputs[2]

        # Improve performance by trimming to top anchors by score
        # and doing the rest on the smaller subset.
        pre_nms_limit = tf.minimum(cfg.COMMON.PRE_NMS_LIMIT,
        ix = tf.nn.top_k(scores,

        scores = self.misc_utils.batch_slice([scores, ix],
                                             lambda x, y: tf.gather(x, y),
        deltas = self.misc_utils.batch_slice([deltas, ix],
                                             lambda x, y: tf.gather(x, y),
        pre_nms_anchors = self.misc_utils.batch_slice(
            [anchors, ix],
            lambda a, x: tf.gather(a, x),

        # Apply deltas to anchors to get refined anchors.
        # [batch, N, (y1, x1, y2, x2)]
        boxes = self.misc_utils.batch_slice(
            [pre_nms_anchors, deltas],
            lambda x, y: self.bbox_utils.apply_box_deltas_graph(x, y),

        # Clip to image boundaries. Since we're in normalized coordinates,
        # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)]
        window = np.array([0, 0, 1, 1], dtype=np.float32)
        boxes = self.misc_utils.batch_slice(
            lambda x: self.bbox_utils.clip_boxes_graph(x, window),

        # Filter out small boxes
        # According to Xinlei Chen's paper, this reduces detection accuracy
        # for small objects, so we're skipping it.

        # Non-max suppression
        def nms(boxes, scores):
            indices = tf.image.non_max_suppression(
            proposals = tf.gather(boxes, indices)
            # Pad if needed
            padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0],
            proposals = tf.pad(proposals, [(0, padding), (0, 0)])
            return proposals

        proposals = self.misc_utils.batch_slice([boxes, scores], nms,

        return proposals

    def compute_output_shape(self, input_shape):
        return (None, self.proposal_count, 4)
class AnchorUtils(object):
    def __init__(self):
        self.misc_utils = MiscUtils()
        self.bbox_utils = BboxUtil()

        # Cache anchors and reuse if image shape is the same
        self._anchor_cache = {}
        # self.anchors = None

    def get_anchors(self, image_shape):
        :return: Returns anchor pyramid for the given image size
        if tuple(image_shape) not in self._anchor_cache:
            # Generate Anchors
            anchor = self.generate_pyramid_anchors(image_shape)

            # Keep a copy of the latest anchors in pixel coordinates because
            # it's used in inspect_model notebooks.
            # TODO: Remove this after the notebook are refactored to not use it
            # self.anchors = anchor

                image_shape)] = self.bbox_utils.norm_boxes(
                    anchor, image_shape[:2])

        return self._anchor_cache[tuple(image_shape)]

    def generate_pyramid_anchors(self, image_shape):
            Generate anchors at different levels of a feature pyramid.
            Each scale is associated with a level of the pyramid,
            but each ratio is used in all levels of the pyramid.
        :param image_shape: [h, w, c]
        :return: anchors: [N, (y1, x1, y2, x2)]
            All generated anchors in one array.
            Sorted with the same order of the given scales.
            So, anchors of scale[0] come first, then anchors of scale[1], and so on.

        backbone_strides = cfg.COMMON.BACKBONE_STRIDES
        # [N, (height, width)]. Where N is the number of stages
        backbone_shape = self.misc_utils.compute_backbone_shapes(
            image_shape, backbone_strides)

        # Anchors
        # [anchor_count, (y1, x1, y2, x2)]
        anchors = []
        scales = cfg.COMMON.RPN_ANCHOR_SCALES
        scales_len = len(scales)

        for i in range(scales_len):
            anchor_box = self.generate_anchors(scales[i], backbone_shape[i],

        return np.concatenate(anchors, axis=0)

    # generate anchor box
    def generate_anchors(self, scales, backbone_shape, backbone_strides):
        :param scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
        :param backbone_shape: [height, width] spatial shape of the feature map over which to generate anchors.
        :param backbone_strides: Stride of the feature map relative to the image in pixels.
        :return: anchor box: Convert to corner coordinates (y1, x1, y2, x2)
        # 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
        ratios = cfg.COMMON.RPN_ANCHOR_RATIOS

        # Stride of anchors on the feature map. For example,
        # if the value is 2 then generate anchors for every other feature map pixel.
        anchor_stride = cfg.COMMON.RPN_ANCHOR_STRIDE

        # Get all combinations of scales and ratios
        scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
        scales = scales.flatten()
        ratios = ratios.flatten()

        # Enumerate heights and widths from scales and ratios
        heights = scales / np.sqrt(ratios)
        widths = scales * np.sqrt(ratios)

        # Enumerate shifts in feature space
        shifts_y = np.arange(0, backbone_shape[0],
                             anchor_stride) * backbone_strides
        shifts_x = np.arange(0, backbone_shape[1],
                             anchor_stride) * backbone_strides
        shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)

        # Enumerate combinations of shifts, widths, and heights
        box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
        box_heights, box_centers_y = np.meshgrid(heights, shifts_y)

        # Reshape to get a list of (y, x) and a list of (h, w)
        box_centers = np.stack([box_centers_y, box_centers_x],
                               axis=2).reshape([-1, 2])
        box_sizes = np.stack([box_heights, box_widths],
                             axis=2).reshape([-1, 2])

        # Convert to corner coordinates (y1, x1, y2, x2)
        boxes = np.concatenate(
            [box_centers - 0.5 * box_sizes, box_centers + 0.5 * box_sizes],
        return boxes