def get_anchors(self, image_shape): backbone_shapes = utils.compute_backbone_shapes( self.backbone, self.backbone_strides, image_shape) if not hasattr(self, "_anchor_cache"): self._anchor_cache = {} if not tuple(image_shape) in self._anchor_cache: a = utils.generate_pyramid_anchors(self.rpn_anchor_scales, self.rpn_anchor_ratios, backbone_shapes, self.backbone_strides, self.rpn_anchor_stride) self._anchor_cache[tuple(image_shape)] = utils.norm_boxes( a, image_shape[:2]) return self._anchor_cache[tuple(image_shape)]
def get_anchors(self, image_shape): """Returns anchor pyramid for the given image size.""" backbone_shapes = utils.compute_backbone_shapes( self.config, image_shape) # Cache anchors and reuse if image shape is the same if not hasattr(self, "_anchor_cache"): self._anchor_cache = {} if not tuple(image_shape) in self._anchor_cache: # Generate Anchors a = utils.generate_pyramid_anchors(self.config.RPN_ANCHOR_SCALES, self.config.RPN_ANCHOR_RATIOS, backbone_shapes, self.config.BACKBONE_STRIDES, self.config.RPN_ANCHOR_STRIDE) # Keep a copy of the latest anchors in pixel coordinates because # it's used in inspect_model notebooks. # TODO: Remove this after the notebook are refactored to not use it self.anchors = a # Normalize coordinates self._anchor_cache[tuple(image_shape)] = utils.norm_boxes( a, image_shape[:2]) return self._anchor_cache[tuple(image_shape)]
def data_generator(dataset, shuffle=True, augment=False, augmentation=None, random_rois=0, batch_size=1, detection_targets=False, no_augmentation_sources=None): """A generator that returns images and corresponding target class ids, bounding box deltas, and masks. dataset: The Dataset object to pick data from config: The model config object shuffle: If True, shuffles the samples before every epoch augment: (deprecated. Use augmentation instead). If true, apply random image augmentation. Currently, only horizontal flipping is offered. augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. random_rois: If > 0 then generate proposals to be used to train the network classifier and mask heads. Useful if training the Mask RCNN part without the RPN. batch_size: How many images to return in each call detection_targets: If True, generate detection targets (class IDs, bbox deltas, and masks). Typically for debugging or visualizations because in trainig detection targets are generated by DetectionTargetLayer. no_augmentation_sources: Optional. List of sources to exclude for augmentation. A source is string that identifies a dataset and is defined in the Dataset class. Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs. The contents of the lists differs depending on the received arguments: inputs list: - images: [batch, H, W, C] - image_meta: [batch, (meta data)] Image details. See compose_image_meta() - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ b = 0 # batch item index image_index = -1 image_ids = np.copy(dataset.image_ids) error_count = 0 no_augmentation_sources = no_augmentation_sources or [] backbone_shapes = utils.compute_backbone_shapes( hyper_parameters.FLAGS.BACKBONE, hyper_parameters.FLAGS.BACKBONE_STRIDES, hyper_parameters.FLAGS.IMAGE_SHAPE) anchors = utils.generate_pyramid_anchors( hyper_parameters.FLAGS.RPN_ANCHOR_SCALES, hyper_parameters.FLAGS.RPN_ANCHOR_RATIOS, backbone_shapes, hyper_parameters.FLAGS.BACKBONE_STRIDES, hyper_parameters.FLAGS.RPN_ANCHOR_STRIDE) while True: try: # Increment index to pick next image. Shuffle if at the start of an epoch. image_index = (image_index + 1) % len(image_ids) if shuffle and image_index == 0: np.random.shuffle(image_ids) # Get GT bounding boxes and masks for image. image_id = image_ids[image_index] if dataset.image_info[image_id][ 'source'] in no_augmentation_sources: image, image_meta, gt_class_ids, gt_boxes, gt_masks = \ load_image_gt(dataset, image_id, augment=augment, augmentation=None, use_mini_mask=hyper_parameters.FLAGS.USE_MINI_MASK) else: image, image_meta, gt_class_ids, gt_boxes, gt_masks = \ load_image_gt(dataset, image_id, augment=augment, augmentation=augmentation, use_mini_mask=hyper_parameters.FLAGS.USE_MINI_MASK) # Skip images that have no instances. This can happen in cases # where we train on a subset of classes and the image doesn't # have any of the classes we care about. if not np.any(gt_class_ids > 0): continue # RPN Targets rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors, gt_class_ids, gt_boxes) # Mask R-CNN Targets if random_rois: rpn_rois = generate_random_rois(image.shape, random_rois, gt_class_ids, gt_boxes) if detection_targets: rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask = \ build_detection_targets( rpn_rois, gt_class_ids, gt_boxes, gt_masks) if b == 0: batch_image_meta = np.zeros((batch_size, ) + image_meta.shape, dtype=image_meta.dtype) batch_rpn_match = np.zeros([batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype) batch_rpn_bbox = np.zeros([ batch_size, hyper_parameters.FLAGS.RPN_TRAIN_ANCHORS_PER_IMAGE, 4 ], dtype=rpn_bbox.dtype) batch_images = np.zeros((batch_size, ) + image.shape, dtype=np.float32) batch_gt_class_ids = np.zeros( (batch_size, hyper_parameters.FLAGS.MAX_GT_INSTANCES), dtype=np.int32) batch_gt_boxes = np.zeros( (batch_size, hyper_parameters.FLAGS.MAX_GT_INSTANCES, 4), dtype=np.int32) batch_gt_masks = np.zeros( (batch_size, gt_masks.shape[0], gt_masks.shape[1], hyper_parameters.FLAGS.MAX_GT_INSTANCES), dtype=gt_masks.dtype) if random_rois: batch_rpn_rois = np.zeros((batch_size, random_rois, 4), dtype=np.int32) if detection_targets: batch_rois = np.zeros((batch_size, ) + rois.shape, dtype=rois.dtype) batch_mrcnn_class_ids = np.zeros( (batch_size, ) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype) batch_mrcnn_bbox = np.zeros( (batch_size, ) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype) batch_mrcnn_mask = np.zeros( (batch_size, ) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype) if gt_boxes.shape[0] > hyper_parameters.FLAGS.MAX_GT_INSTANCES: ids = np.random.choice(np.arange(gt_boxes.shape[0]), hyper_parameters.FLAGS.MAX_GT_INSTANCES, replace=False) gt_boxes = gt_boxes[ids] gt_class_ids = gt_class_ids[ids] gt_masks = gt_masks[:, :, ids] batch_image_meta[b] = image_meta batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes batch_rpn_match[b] = rpn_match[:, np.newaxis] batch_rpn_bbox[b] = rpn_bbox batch_images[b] = utils.mold_image( image.astype(np.float32), hyper_parameters.FLAGS.MEAN_PIXEL) batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks if random_rois: batch_rpn_rois[b] = rpn_rois if detection_targets: batch_rois[b] = rois batch_mrcnn_class_ids[b] = mrcnn_class_ids batch_mrcnn_bbox[b] = mrcnn_bbox batch_mrcnn_mask[b] = mrcnn_mask b += 1 if b >= batch_size: inputs = [ batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox, batch_gt_class_ids, batch_gt_boxes, batch_gt_masks ] outputs = [] if random_rois: inputs.extend([batch_rpn_rois]) if detection_targets: inputs.extend([batch_rois]) # Keras requires that output and targets have the same number of dimensions batch_mrcnn_class_ids = np.expand_dims( batch_mrcnn_class_ids, -1) outputs.extend([ batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask ]) yield inputs, outputs b = 0 except (GeneratorExit, KeyboardInterrupt): raise except: # Log it and skip the image logging.exception("Error processing image {}".format( dataset.image_info[image_id])) error_count += 1 if error_count > 5: raise
are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ b = 0 # batch item index image_index = -1 image_ids = np.copy(dataset.image_ids) error_count = 0 no_augmentation_source = no_augmentation_sources or [] # Anchors # [anchor_count, (y1, x1, y2, x2)] backbone_shapes = utils.compute_backbone_shapes(config.IMAGE_SHAPE) anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, backbone_shapes, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE) # Keras equires a generator to run indefinitely. while True: try: # Increment index to pick next image. Shuffle if at the start of an epoch. image_index = (image_index + 1) % len(image_ids) if shuffle and image_index == 0: np.random.shuffle(image_ids) # Get GT bounding boxes and masks for image.
def __init__(self, mode, rpn_anchor_ratios, rpn_anchor_scales, mask_shape, pool_size, image_shape, mini_mask_shape, backbone_strides, mean_pixel, roi_size=7, backbone='resnet50', stage5=True, norm='batch', use_bias=True, rpn_anchor_stride=1, image_per_gpu=1, gpu_count=1, detection_max_instances=100, train_rois_per_image=200, num_classes=1, use_mini_mask=True, use_pretrained_model=True, top_down_pyramid_size=256, post_nms_rois_training=2000, post_nms_rois_inference=1000, pre_nms_limit=6000, rpn_nms_threshold=0.7, use_rpn_rois=True, model_dir=None, optimizer_method='Adam', learning_rate=0.001, momentum=0.9, weight_decay=0.0001, image_min_dim=800, image_max_dim=1024, image_min_scale=0.0, image_resize_mode='square', max_gt_instances=100, rpn_train_anchors_per_image=256): assert mode in ['training', 'inference'] assert optimizer_method in ['Adam', 'SGD'] tf.reset_default_graph() self.graph = tf.Graph() self.mode = mode self.rpn_anchor_ratios = rpn_anchor_ratios self.rpn_anchor_scales = rpn_anchor_scales self.mask_shape = mask_shape self.pool_size = pool_size self.image_shape = np.array(image_shape) self.mini_mask_shape = mini_mask_shape self.backbone_strides = backbone_strides self.mean_pixel = mean_pixel self.roi_size = roi_size self.backbone = backbone self.stage5 = stage5 self.norm = norm self.use_bias = use_bias self.rpn_anchor_stride = rpn_anchor_stride self.image_per_gpu = image_per_gpu self.gpu_count = gpu_count self.detection_max_instances = detection_max_instances self.train_rois_per_image = train_rois_per_image self.num_classes = num_classes self.use_mini_mask = use_mini_mask self.use_pretrained_model = use_pretrained_model self.top_down_pyramid_size = top_down_pyramid_size self.post_nms_rois_training = post_nms_rois_training self.post_nms_rois_inference = post_nms_rois_inference self.pre_nms_limit = pre_nms_limit self.rpn_nms_threshold = rpn_nms_threshold self.use_rpn_rois = use_rpn_rois self.model_dir = model_dir self.optimizer_method = optimizer_method self.learning_rate = learning_rate self.momentum = momentum self.weight_decay = weight_decay self.image_min_dim = image_min_dim self.image_max_dim = image_max_dim self.image_min_scale = image_min_scale self.image_resize_mode = image_resize_mode self.max_gt_instances = max_gt_instances self.rpn_train_anchors_per_image = rpn_train_anchors_per_image self.image_meta_size = 1 + 3 + 3 + 4 + 1 + self.num_classes self.reuse = False self._anchor_cache = {} self.batch_size = self.gpu_count * self.image_per_gpu self.backbone_shape = utils.compute_backbone_shapes( self.backbone, self.backbone_strides, self.image_shape) self.num_anchors_per_image = len(self.rpn_anchor_ratios) * ( self.backbone_shape[0][0] * self.backbone_shape[0][0] + self.backbone_shape[1][0] * self.backbone_shape[1][0] + self.backbone_shape[2][0] * self.backbone_shape[2][0] + self.backbone_shape[3][0] * self.backbone_shape[3][0] + self.backbone_shape[4][0] * self.backbone_shape[4][0]) with self.graph.as_default(): self.is_training = tf.placeholder_with_default(False, []) self.input_image = tf.placeholder(dtype=tf.float32, shape=[ None, self.image_shape[0], self.image_shape[1], self.image_shape[2] ], name='input_image') self.input_image_meta = tf.placeholder( dtype=tf.int32, shape=[None, self.image_meta_size], name='input_image_meta') if mode == 'training': self.input_rpn_match = tf.placeholder( dtype=tf.int32, shape=[None, self.num_anchors_per_image, 1], name='input_rpn_match') self.input_rpn_boxes = tf.placeholder( dtype=tf.float32, shape=[None, self.rpn_train_anchors_per_image, 4], name='input_rpn_boxes') self.input_gt_class_ids = tf.placeholder( dtype=tf.int32, shape=[None, self.max_gt_instances], name='input_gt_class_ids') self.input_gt_boxes = tf.placeholder( dtype=tf.float32, shape=[None, self.max_gt_instances, 4], name='input_gt_boxes') self.input_gt_boxes_normalized = utils.norm_boxes_graph( self.input_gt_boxes, tf.shape(self.input_image)[1:3]) self.proposal_count = self.post_nms_rois_training if self.use_mini_mask: self.input_gt_masks = tf.placeholder( dtype=tf.bool, shape=[ None, self.mini_mask_shape[0], self.mini_mask_shape[1], self.max_gt_instances ], name='input_gt_mask') else: self.input_gt_masks = tf.placeholder( dtype=tf.bool, shape=[ None, self.image_shape[0], self.image_shape[1], self.max_gt_instances ], name='input_gt_mask') elif mode == 'inference': self.input_anchors = tf.placeholder(dtype=tf.float32, shape=[None, None, 4], name='input_anchors') self.proposal_count = self.post_nms_rois_inference self.resnet = Resnet(name='resnet', architecture=self.backbone, is_training=self.is_training, stage5=self.stage5, use_bias=self.use_bias) arg_scope = nets.resnet_v2.resnet_arg_scope() with slim.arg_scope(arg_scope): _, self.end_points = nets.resnet_v2.resnet_v2_50( self.input_image, num_classes=None, is_training=self.is_training) self.fpn = FPN(name='fpn', top_down_pyramid_size=self.top_down_pyramid_size, use_bias=self.use_bias) self.rpn = RPN(name='rpn', anchors_per_location=len(self.rpn_anchor_ratios), anchor_stride=self.rpn_anchor_stride, is_training=self.is_training, use_bias=self.use_bias) self.proposal = ProposalLayer(self.pre_nms_limit, self.proposal_count, self.rpn_nms_threshold, self.image_per_gpu) self.pyramidRoiPooling = PyramidRoiPooling( name='PyramidRoiPooling', roi_size=self.roi_size) self.objDetection = ObjDetection( image_per_gpu=self.image_per_gpu, gpu_count=self.gpu_count, detection_max_instances=self.detection_max_instances) self.targetDetection = TargetDetection( mask_shape=self.mask_shape, image_per_gpu=self.image_per_gpu, train_rois_per_image=self.train_rois_per_image) self.fpnClassifier = FpnClassifier('FpnClassifier', pool_size=self.pool_size, num_classes=self.num_classes, is_training=self.is_training) self.fpnMask = FpnMask('FpnMask', num_classes=self.num_classes, is_training=self.is_training)