コード例 #1
0
 def get_anchors(self, image_shape):
     backbone_shapes = utils.compute_backbone_shapes(
         self.backbone, self.backbone_strides, image_shape)
     if not hasattr(self, "_anchor_cache"):
         self._anchor_cache = {}
     if not tuple(image_shape) in self._anchor_cache:
         a = utils.generate_pyramid_anchors(self.rpn_anchor_scales,
                                            self.rpn_anchor_ratios,
                                            backbone_shapes,
                                            self.backbone_strides,
                                            self.rpn_anchor_stride)
         self._anchor_cache[tuple(image_shape)] = utils.norm_boxes(
             a, image_shape[:2])
     return self._anchor_cache[tuple(image_shape)]
コード例 #2
0
 def get_anchors(self, image_shape):
     """Returns anchor pyramid for the given image size."""
     backbone_shapes = utils.compute_backbone_shapes(
         self.config, image_shape)
     # Cache anchors and reuse if image shape is the same
     if not hasattr(self, "_anchor_cache"):
         self._anchor_cache = {}
     if not tuple(image_shape) in self._anchor_cache:
         # Generate Anchors
         a = utils.generate_pyramid_anchors(self.config.RPN_ANCHOR_SCALES,
                                            self.config.RPN_ANCHOR_RATIOS,
                                            backbone_shapes,
                                            self.config.BACKBONE_STRIDES,
                                            self.config.RPN_ANCHOR_STRIDE)
         # Keep a copy of the latest anchors in pixel coordinates because
         # it's used in inspect_model notebooks.
         # TODO: Remove this after the notebook are refactored to not use it
         self.anchors = a
         # Normalize coordinates
         self._anchor_cache[tuple(image_shape)] = utils.norm_boxes(
             a, image_shape[:2])
     return self._anchor_cache[tuple(image_shape)]
コード例 #3
0
def data_generator(dataset,
                   shuffle=True,
                   augment=False,
                   augmentation=None,
                   random_rois=0,
                   batch_size=1,
                   detection_targets=False,
                   no_augmentation_sources=None):
    """A generator that returns images and corresponding target class ids,
    bounding box deltas, and masks.

    dataset: The Dataset object to pick data from
    config: The model config object
    shuffle: If True, shuffles the samples before every epoch
    augment: (deprecated. Use augmentation instead). If true, apply random
        image augmentation. Currently, only horizontal flipping is offered.
    augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
        For example, passing imgaug.augmenters.Fliplr(0.5) flips images
        right/left 50% of the time.
    random_rois: If > 0 then generate proposals to be used to train the
                 network classifier and mask heads. Useful if training
                 the Mask RCNN part without the RPN.
    batch_size: How many images to return in each call
    detection_targets: If True, generate detection targets (class IDs, bbox
        deltas, and masks). Typically for debugging or visualizations because
        in trainig detection targets are generated by DetectionTargetLayer.
    no_augmentation_sources: Optional. List of sources to exclude for
        augmentation. A source is string that identifies a dataset and is
        defined in the Dataset class.

    Returns a Python generator. Upon calling next() on it, the
    generator returns two lists, inputs and outputs. The contents
    of the lists differs depending on the received arguments:
    inputs list:
    - images: [batch, H, W, C]
    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
    - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
    - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
    - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
    - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
                are those of the image unless use_mini_mask is True, in which
                case they are defined in MINI_MASK_SHAPE.

    outputs list: Usually empty in regular training. But if detection_targets
        is True then the outputs list contains target class_ids, bbox deltas,
        and masks.
    """
    b = 0  # batch item index
    image_index = -1
    image_ids = np.copy(dataset.image_ids)

    error_count = 0
    no_augmentation_sources = no_augmentation_sources or []

    backbone_shapes = utils.compute_backbone_shapes(
        hyper_parameters.FLAGS.BACKBONE,
        hyper_parameters.FLAGS.BACKBONE_STRIDES,
        hyper_parameters.FLAGS.IMAGE_SHAPE)
    anchors = utils.generate_pyramid_anchors(
        hyper_parameters.FLAGS.RPN_ANCHOR_SCALES,
        hyper_parameters.FLAGS.RPN_ANCHOR_RATIOS, backbone_shapes,
        hyper_parameters.FLAGS.BACKBONE_STRIDES,
        hyper_parameters.FLAGS.RPN_ANCHOR_STRIDE)

    while True:
        try:
            # Increment index to pick next image. Shuffle if at the start of an epoch.
            image_index = (image_index + 1) % len(image_ids)
            if shuffle and image_index == 0:
                np.random.shuffle(image_ids)

            # Get GT bounding boxes and masks for image.
            image_id = image_ids[image_index]

            if dataset.image_info[image_id][
                    'source'] in no_augmentation_sources:
                image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
                    load_image_gt(dataset, image_id, augment=augment,
                                  augmentation=None,
                                  use_mini_mask=hyper_parameters.FLAGS.USE_MINI_MASK)
            else:
                image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
                    load_image_gt(dataset, image_id, augment=augment,
                                  augmentation=augmentation,
                                  use_mini_mask=hyper_parameters.FLAGS.USE_MINI_MASK)

            # Skip images that have no instances. This can happen in cases
            # where we train on a subset of classes and the image doesn't
            # have any of the classes we care about.
            if not np.any(gt_class_ids > 0):
                continue

            # RPN Targets
            rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors,
                                                    gt_class_ids, gt_boxes)

            # Mask R-CNN Targets
            if random_rois:
                rpn_rois = generate_random_rois(image.shape, random_rois,
                                                gt_class_ids, gt_boxes)
                if detection_targets:
                    rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask = \
                        build_detection_targets(
                            rpn_rois, gt_class_ids, gt_boxes, gt_masks)

            if b == 0:
                batch_image_meta = np.zeros((batch_size, ) + image_meta.shape,
                                            dtype=image_meta.dtype)
                batch_rpn_match = np.zeros([batch_size, anchors.shape[0], 1],
                                           dtype=rpn_match.dtype)
                batch_rpn_bbox = np.zeros([
                    batch_size,
                    hyper_parameters.FLAGS.RPN_TRAIN_ANCHORS_PER_IMAGE, 4
                ],
                                          dtype=rpn_bbox.dtype)
                batch_images = np.zeros((batch_size, ) + image.shape,
                                        dtype=np.float32)
                batch_gt_class_ids = np.zeros(
                    (batch_size, hyper_parameters.FLAGS.MAX_GT_INSTANCES),
                    dtype=np.int32)
                batch_gt_boxes = np.zeros(
                    (batch_size, hyper_parameters.FLAGS.MAX_GT_INSTANCES, 4),
                    dtype=np.int32)
                batch_gt_masks = np.zeros(
                    (batch_size, gt_masks.shape[0], gt_masks.shape[1],
                     hyper_parameters.FLAGS.MAX_GT_INSTANCES),
                    dtype=gt_masks.dtype)

                if random_rois:
                    batch_rpn_rois = np.zeros((batch_size, random_rois, 4),
                                              dtype=np.int32)
                    if detection_targets:
                        batch_rois = np.zeros((batch_size, ) + rois.shape,
                                              dtype=rois.dtype)
                        batch_mrcnn_class_ids = np.zeros(
                            (batch_size, ) + mrcnn_class_ids.shape,
                            dtype=mrcnn_class_ids.dtype)
                        batch_mrcnn_bbox = np.zeros(
                            (batch_size, ) + mrcnn_bbox.shape,
                            dtype=mrcnn_bbox.dtype)
                        batch_mrcnn_mask = np.zeros(
                            (batch_size, ) + mrcnn_mask.shape,
                            dtype=mrcnn_mask.dtype)

            if gt_boxes.shape[0] > hyper_parameters.FLAGS.MAX_GT_INSTANCES:
                ids = np.random.choice(np.arange(gt_boxes.shape[0]),
                                       hyper_parameters.FLAGS.MAX_GT_INSTANCES,
                                       replace=False)
                gt_boxes = gt_boxes[ids]
                gt_class_ids = gt_class_ids[ids]
                gt_masks = gt_masks[:, :, ids]

            batch_image_meta[b] = image_meta
            batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes
            batch_rpn_match[b] = rpn_match[:, np.newaxis]
            batch_rpn_bbox[b] = rpn_bbox
            batch_images[b] = utils.mold_image(
                image.astype(np.float32), hyper_parameters.FLAGS.MEAN_PIXEL)
            batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids
            batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks
            if random_rois:
                batch_rpn_rois[b] = rpn_rois
                if detection_targets:
                    batch_rois[b] = rois
                    batch_mrcnn_class_ids[b] = mrcnn_class_ids
                    batch_mrcnn_bbox[b] = mrcnn_bbox
                    batch_mrcnn_mask[b] = mrcnn_mask

            b += 1

            if b >= batch_size:
                inputs = [
                    batch_images, batch_image_meta, batch_rpn_match,
                    batch_rpn_bbox, batch_gt_class_ids, batch_gt_boxes,
                    batch_gt_masks
                ]
                outputs = []

                if random_rois:
                    inputs.extend([batch_rpn_rois])
                    if detection_targets:
                        inputs.extend([batch_rois])
                        # Keras requires that output and targets have the same number of dimensions
                        batch_mrcnn_class_ids = np.expand_dims(
                            batch_mrcnn_class_ids, -1)
                        outputs.extend([
                            batch_mrcnn_class_ids, batch_mrcnn_bbox,
                            batch_mrcnn_mask
                        ])

                yield inputs, outputs

                b = 0

        except (GeneratorExit, KeyboardInterrupt):
            raise
        except:
            # Log it and skip the image
            logging.exception("Error processing image {}".format(
                dataset.image_info[image_id]))
            error_count += 1
            if error_count > 5:
                raise
コード例 #4
0
                are those of the image unless use_mini_mask is True, in which
                case they are defined in MINI_MASK_SHAPE.
    outputs list: Usually empty in regular training. But if detection_targets
        is True then the outputs list contains target class_ids, bbox deltas,
        and masks.
    """

    b = 0 # batch item index
    image_index = -1
    image_ids = np.copy(dataset.image_ids)
    error_count = 0
    no_augmentation_source = no_augmentation_sources or []

    # Anchors
    # [anchor_count, (y1, x1, y2, x2)]
    backbone_shapes = utils.compute_backbone_shapes(config.IMAGE_SHAPE)
    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                             config.RPN_ANCHOR_RATIOS,
                                             backbone_shapes,
                                             config.BACKBONE_STRIDES,
                                             config.RPN_ANCHOR_STRIDE)

    # Keras equires a generator to run indefinitely.
    while True:
        try:
            # Increment index to pick next image. Shuffle if at the start of an epoch.
            image_index = (image_index + 1) % len(image_ids)
            if shuffle and image_index == 0:
                np.random.shuffle(image_ids)
            
            # Get GT bounding boxes and masks for image.
コード例 #5
0
    def __init__(self,
                 mode,
                 rpn_anchor_ratios,
                 rpn_anchor_scales,
                 mask_shape,
                 pool_size,
                 image_shape,
                 mini_mask_shape,
                 backbone_strides,
                 mean_pixel,
                 roi_size=7,
                 backbone='resnet50',
                 stage5=True,
                 norm='batch',
                 use_bias=True,
                 rpn_anchor_stride=1,
                 image_per_gpu=1,
                 gpu_count=1,
                 detection_max_instances=100,
                 train_rois_per_image=200,
                 num_classes=1,
                 use_mini_mask=True,
                 use_pretrained_model=True,
                 top_down_pyramid_size=256,
                 post_nms_rois_training=2000,
                 post_nms_rois_inference=1000,
                 pre_nms_limit=6000,
                 rpn_nms_threshold=0.7,
                 use_rpn_rois=True,
                 model_dir=None,
                 optimizer_method='Adam',
                 learning_rate=0.001,
                 momentum=0.9,
                 weight_decay=0.0001,
                 image_min_dim=800,
                 image_max_dim=1024,
                 image_min_scale=0.0,
                 image_resize_mode='square',
                 max_gt_instances=100,
                 rpn_train_anchors_per_image=256):

        assert mode in ['training', 'inference']
        assert optimizer_method in ['Adam', 'SGD']

        tf.reset_default_graph()
        self.graph = tf.Graph()

        self.mode = mode
        self.rpn_anchor_ratios = rpn_anchor_ratios
        self.rpn_anchor_scales = rpn_anchor_scales
        self.mask_shape = mask_shape
        self.pool_size = pool_size
        self.image_shape = np.array(image_shape)
        self.mini_mask_shape = mini_mask_shape
        self.backbone_strides = backbone_strides
        self.mean_pixel = mean_pixel

        self.roi_size = roi_size
        self.backbone = backbone
        self.stage5 = stage5
        self.norm = norm
        self.use_bias = use_bias
        self.rpn_anchor_stride = rpn_anchor_stride
        self.image_per_gpu = image_per_gpu
        self.gpu_count = gpu_count
        self.detection_max_instances = detection_max_instances
        self.train_rois_per_image = train_rois_per_image
        self.num_classes = num_classes
        self.use_mini_mask = use_mini_mask
        self.use_pretrained_model = use_pretrained_model
        self.top_down_pyramid_size = top_down_pyramid_size
        self.post_nms_rois_training = post_nms_rois_training
        self.post_nms_rois_inference = post_nms_rois_inference
        self.pre_nms_limit = pre_nms_limit
        self.rpn_nms_threshold = rpn_nms_threshold
        self.use_rpn_rois = use_rpn_rois
        self.model_dir = model_dir
        self.optimizer_method = optimizer_method
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.weight_decay = weight_decay
        self.image_min_dim = image_min_dim
        self.image_max_dim = image_max_dim
        self.image_min_scale = image_min_scale
        self.image_resize_mode = image_resize_mode
        self.max_gt_instances = max_gt_instances
        self.rpn_train_anchors_per_image = rpn_train_anchors_per_image

        self.image_meta_size = 1 + 3 + 3 + 4 + 1 + self.num_classes
        self.reuse = False
        self._anchor_cache = {}
        self.batch_size = self.gpu_count * self.image_per_gpu
        self.backbone_shape = utils.compute_backbone_shapes(
            self.backbone, self.backbone_strides, self.image_shape)
        self.num_anchors_per_image = len(self.rpn_anchor_ratios) * (
            self.backbone_shape[0][0] * self.backbone_shape[0][0] +
            self.backbone_shape[1][0] * self.backbone_shape[1][0] +
            self.backbone_shape[2][0] * self.backbone_shape[2][0] +
            self.backbone_shape[3][0] * self.backbone_shape[3][0] +
            self.backbone_shape[4][0] * self.backbone_shape[4][0])

        with self.graph.as_default():

            self.is_training = tf.placeholder_with_default(False, [])
            self.input_image = tf.placeholder(dtype=tf.float32,
                                              shape=[
                                                  None, self.image_shape[0],
                                                  self.image_shape[1],
                                                  self.image_shape[2]
                                              ],
                                              name='input_image')
            self.input_image_meta = tf.placeholder(
                dtype=tf.int32,
                shape=[None, self.image_meta_size],
                name='input_image_meta')

            if mode == 'training':
                self.input_rpn_match = tf.placeholder(
                    dtype=tf.int32,
                    shape=[None, self.num_anchors_per_image, 1],
                    name='input_rpn_match')
                self.input_rpn_boxes = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, self.rpn_train_anchors_per_image, 4],
                    name='input_rpn_boxes')
                self.input_gt_class_ids = tf.placeholder(
                    dtype=tf.int32,
                    shape=[None, self.max_gt_instances],
                    name='input_gt_class_ids')
                self.input_gt_boxes = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, self.max_gt_instances, 4],
                    name='input_gt_boxes')
                self.input_gt_boxes_normalized = utils.norm_boxes_graph(
                    self.input_gt_boxes,
                    tf.shape(self.input_image)[1:3])
                self.proposal_count = self.post_nms_rois_training
                if self.use_mini_mask:
                    self.input_gt_masks = tf.placeholder(
                        dtype=tf.bool,
                        shape=[
                            None, self.mini_mask_shape[0],
                            self.mini_mask_shape[1], self.max_gt_instances
                        ],
                        name='input_gt_mask')
                else:
                    self.input_gt_masks = tf.placeholder(
                        dtype=tf.bool,
                        shape=[
                            None, self.image_shape[0], self.image_shape[1],
                            self.max_gt_instances
                        ],
                        name='input_gt_mask')

            elif mode == 'inference':
                self.input_anchors = tf.placeholder(dtype=tf.float32,
                                                    shape=[None, None, 4],
                                                    name='input_anchors')
                self.proposal_count = self.post_nms_rois_inference

            self.resnet = Resnet(name='resnet',
                                 architecture=self.backbone,
                                 is_training=self.is_training,
                                 stage5=self.stage5,
                                 use_bias=self.use_bias)

            arg_scope = nets.resnet_v2.resnet_arg_scope()
            with slim.arg_scope(arg_scope):
                _, self.end_points = nets.resnet_v2.resnet_v2_50(
                    self.input_image,
                    num_classes=None,
                    is_training=self.is_training)

            self.fpn = FPN(name='fpn',
                           top_down_pyramid_size=self.top_down_pyramid_size,
                           use_bias=self.use_bias)

            self.rpn = RPN(name='rpn',
                           anchors_per_location=len(self.rpn_anchor_ratios),
                           anchor_stride=self.rpn_anchor_stride,
                           is_training=self.is_training,
                           use_bias=self.use_bias)
            self.proposal = ProposalLayer(self.pre_nms_limit,
                                          self.proposal_count,
                                          self.rpn_nms_threshold,
                                          self.image_per_gpu)
            self.pyramidRoiPooling = PyramidRoiPooling(
                name='PyramidRoiPooling', roi_size=self.roi_size)
            self.objDetection = ObjDetection(
                image_per_gpu=self.image_per_gpu,
                gpu_count=self.gpu_count,
                detection_max_instances=self.detection_max_instances)
            self.targetDetection = TargetDetection(
                mask_shape=self.mask_shape,
                image_per_gpu=self.image_per_gpu,
                train_rois_per_image=self.train_rois_per_image)
            self.fpnClassifier = FpnClassifier('FpnClassifier',
                                               pool_size=self.pool_size,
                                               num_classes=self.num_classes,
                                               is_training=self.is_training)
            self.fpnMask = FpnMask('FpnMask',
                                   num_classes=self.num_classes,
                                   is_training=self.is_training)