Beispiel #1
0
def load_image_gt(dataset,
                  config,
                  image_id,
                  augment=False,
                  use_mini_mask=False):
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    shape = image.shape
    image, window, scale, padding = utils.resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,
        max_dim=config.IMAGE_MAX_DIM,
        padding=config.IMAGE_PADDING)
    mask = utils.resize_mask(mask, scale, padding)

    bbox = utils.extract_bboxes(mask)
    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]
                                                ["source"]]
    active_class_ids[source_class_ids] = 1

    # Resize masks to smaller size to reduce memory usage
    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)

    # Image meta data
    image_meta = compose_image_meta(image_id, shape, window, active_class_ids)

    return image, image_meta, class_ids, bbox, mask
    def process_labels(images):
        for i, img_desc in enumerate(images):
            step.value += 1
            print('\rSaving masked labels: {} of {}, time left: {}, predicted size: {}'
                  .format(step.value,
                          data_len,
                          calc_time_left(mean_time, data_len - step.value),
                          calc_pred_size(mean_size, data_len)),
                  end='', flush=True)

            s = time.time()
            img_id = img_desc['id']
            img_w = img_desc['width']
            img_h = img_desc['height']
            img_annotations = collect_annotations(img_id, annotations_list)
            mask = create_full_mask(img_w, img_h, img_annotations)

            resized = resize_mask(mask, dst_w, dst_h)
            non_zero_ids = np.count_nonzero(resized, axis=(0, 1))
            non_zero_ids = np.nonzero(non_zero_ids)[0]
            resized = resized[:, :, non_zero_ids]

            mask_name = img_desc['file_name'].replace('jpg', 'npz')
            np.savez(os.path.join(dst_dir, mask_name), resized, non_zero_ids)

            e = time.time()
            mean_time.value = i / (i + 1) * mean_time.value + (e - s) / (i + 1) / num_processes
            mean_size.value = i / (i + 1) * mean_size.value + resized.nbytes / (i + 1)
Beispiel #3
0
    def test_return(self):
        mask: torch.Tensor = torch.randn((5, 10, 20))
        height: int = 5
        width: int = 15

        resized_mask: torch.Tensor = utils.resize_mask(mask, height, width)
        self.assertEqual(resized_mask.size(), torch.Size((5, height, width)))
Beispiel #4
0
def load_image_gt(dataset, config, image_id, augment=False,
                  use_mini_mask=False):
    """Load and return ground truth data for an image (image, mask, bounding boxes).

    augment: If true, apply random image augmentation. Currently, only
        horizontal flipping is offered.
    use_mini_mask: If False, returns full-size masks that are the same height
        and width as the original image. These can be big, for example
        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
        224x224 and are generated by extracting the bounding box of the
        object and resizing it to MINI_MASK_SHAPE.

    Returns:
    image: [height, width, 3]
    shape: the original shape of the image before resizing and cropping.
    class_ids: [instance_count] Integer class IDs
    bbox: [instance_count, (y1, x1, y2, x2)]
    mask: [height, width, instance_count]. The height and width are those
        of the image unless use_mini_mask is True, in which case they are
        defined in MINI_MASK_SHAPE.
    """
    # Load image and mask
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    shape = image.shape
    image, window, scale, padding = utils.resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,
        max_dim=config.IMAGE_MAX_DIM,
        padding=config.IMAGE_PADDING)
    mask = utils.resize_mask(mask, scale, padding)

    # Random horizontal flips.
    if augment:
        if random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)

    # Bounding boxes. Note that some boxes might be all zeros
    # if the corresponding mask got cropped out.
    # bbox: [num_instances, (y1, x1, y2, x2)]
    bbox = utils.extract_bboxes(mask)

    # Active classes
    # Different datasets have different classes, so track the
    # classes supported in the dataset of this image.
    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]]
    active_class_ids[source_class_ids] = 1

    # Resize masks to smaller size to reduce memory usage
    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)

    # Image meta data
    image_meta = compose_image_meta(image_id, shape, window, active_class_ids)

    return image, image_meta, class_ids, bbox, mask
Beispiel #5
0
def train_generator(dataset, inputs, config, num_epoch, sess):
    input_images, input_metas, input_anchors, input_rpn_gt_deltas, input_rpn_gt_matchs,\
        input_gt_cls_ids, input_gt_box, input_gt_mask = inputs
    for i in range(num_epoch):
        scales = config.SCALES
        ratios = config.RATIOS
        if True:
            images, gt_boxes, gt_masks, gt_class_ids, img_ids = dataset.make_batch(config.BATCH_SIZE_TRAIN, img_ids=[284282, 109441])#139, 285, 32811
        else:
            images, gt_boxes, gt_masks, gt_class_ids, img_ids = dataset.make_batch(config.BATCH_SIZE_TRAIN, img_ids=None)# 226111, 58636,
        # batch毎にndarrayにする.
        molded_images, image_metas, windows, pads, mold_scales = utils.mold_images(images, max_dim=1024, min_dim=800, config=config)
        # mini_maskでないならばmaskもmoldedされる(paddingを出して)
        assert not molded_images.dtype == np.dtype("O"), "image shape is not same"
        molded_shape = molded_images.shape
        # gt_boxをresize(scale)->シフト(window)->正規化(molded_shape)を行う.
        #gt_boxes = utils.pack_gt_boxes(gt_boxes, mold_scales, windows, molded_shape)
        #gt_boxes_molded = utils.mold_gt_boxes(gt_boxes, mold_scales, windows, molded_shape)
        #gt_masks = utils.mold_gt_masks(gt_masks, mold_scales, pads, gt_boxes_molded, config)
        #gt_masks = utils.mold_gt_masks(gt_masks, mold_scales, pads)
        gt_masks = [[utils.resize_mask(m, s, p) for m in gt_mask] for gt_mask, s, p in zip(gt_masks, mold_scales, pads)]
        gt_boxes = utils.make_gt_boxes_from_mask(gt_masks)
        gt_boxes_molded = utils.mold_gt_boxes(gt_boxes, mold_scales, windows, molded_shape)
        gt_minimasks = [utils.minimize_mask(b, np.array(m), config.MINI_MASK_SIZE) for b, m in zip(gt_boxes, gt_masks)]
        #gt_minimasks = utils.pack_gt_minimasks_already_molded(gt_masks, gt_boxes_molded, config)
        # gt_maskをresize->pad->crop and resize
        #gt_masks = utils.pack_gt_minimasks(gt_masks, mold_scales, pads, gt_boxes_molded, config)
        anchors = utils.make_anchors_nomold(scales, ratios, molded_images[0].shape, (4, 8, 16, 32, 64))
        anchors_molded = utils.make_anchors(scales, ratios, molded_images[0].shape, (4, 8, 16, 32, 64))
        rpn_gt_matchs, rpn_gt_deltas = zip(*[utils.make_matchs(anchors, b, config) for b in gt_boxes])
        #rpn_gt_matchs_new, rpn_gt_deltas_new = zip(*[utils.build_rpn_targets(molded_shape, anchors, c, b, config) for b, c in zip(gt_boxes, gt_class_ids)])
        """
        import pickle
        with open("rpn.pkl", "rb") as f:
            d = pickle.load(f)
        with open("rpn_deltas.pkl", "rb") as f:
            dd = pickle.load(f)
        positive_indices_master = dd["positive_indices"][0]
        negative_indices_master = dd["negative_indices"][0]
        """
        #master_masks = np.transpose(np.squeeze(dd["batch_gt_masks"], axis=0), (2, 0, 1))
        
        #utils.apply_deltas_np(anchors, rpn_gt_deltas[0]*config.RPN_BBOX_STD_DEV)
        # リストをndarray(batch, anchors)(batch, anchors, 4)にbatch方向にくっつける

        ix = np.where(rpn_gt_matchs[0]==1)[0]
        rpn_rois = utils.apply_deltas_np(anchors[ix], rpn_gt_deltas[0]*config.RPN_BBOX_STD_DEV)
        #visualize.show_boxes_demold(images[0], gt_boxes[0], windows[0], molded_shape)
        #visualize.show_boxes_demold(images[0], rpn_rois, windows[0], molded_shape)

        rpn_gt_matchs, = [np.stack(gts, axis=0) for gts in [rpn_gt_matchs]]
        gt_boxes_molded, gt_class_ids, gt_minimasks, rpn_gt_deltas =\
                    [utils.pack_on(m) for m in (gt_boxes_molded, gt_class_ids, gt_minimasks, rpn_gt_deltas)]
        
        feed_dict = {
            input_images: molded_images,
            input_metas: image_metas,
            input_anchors: anchors_molded,
            input_rpn_gt_matchs: rpn_gt_matchs,
            input_rpn_gt_deltas: rpn_gt_deltas,
            input_gt_cls_ids: gt_class_ids,
            input_gt_box: gt_boxes_molded,
            input_gt_mask: gt_minimasks,
            #input_positive_indices_master: positive_indices_master,
            #input_negative_indices_master: negative_indices_master,
        }
        yield i, feed_dict, images, windows, molded_shape, anchors_molded[ix], img_ids
Beispiel #6
0
    dataset_train.prepare()

    image_ids = np.random.choice(dataset_train.image_ids, 4)
    for image_id in image_ids:
        image = dataset_train.load_image(image_id)
        mask, class_ids = dataset_train.load_mask(image_id)
        visualize.display_top_masks(image, mask, class_ids,
                                    dataset_train.class_names)
        original_shape = image.shape
        # Resize
        image, window, scale, padding, _ = utils.resize_image(
            image,
            min_dim=config.IMAGE_MIN_DIM,
            max_dim=config.IMAGE_MAX_DIM,
            mode=config.IMAGE_RESIZE_MODE)
        mask = utils.resize_mask(mask, scale, padding)
        # Compute Bounding box
        bbox = utils.extract_bboxes(mask)

        # Display image and additional stats
        print("image_id: ", image_id, dataset_train.image_reference(image_id))
        print("Original shape: ", original_shape)
        log("image", image)
        log("mask", mask)
        log("class_ids", class_ids)
        log("bbox", bbox)
        # Display image and instances
        visualize.display_instances(image, bbox, mask, class_ids,
                                    dataset_train.class_names)

    # Validation dataset
Beispiel #7
0
def load_image_gt(dataset,
                  image_id,
                  augment=False,
                  augmentation=None,
                  use_mini_mask=False):
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    origin_shape = image.shape

    image, window, scale, padding, crop = utils.resize_image(
        image,
        min_dim=hyper_parameters.FLAGS.IMAGE_MIN_DIM,
        min_scale=hyper_parameters.FLAGS.IMAGE_MIN_SCALE,
        max_dim=hyper_parameters.FLAGS.IMAGE_MAX_DIM,
        mode=hyper_parameters.FLAGS.IMAGE_RESIZE_MODE)

    mask = utils.resize_mask(mask, scale, padding, crop)

    if augment:
        logging.warning("'augment' is deprecated. Use 'augmentation' instead.")
        if random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)

    if augmentation:
        import imgaug

        mask_augmenters = [
            "Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", "Flipud",
            "CropAndPad", "Affine", "PiecewiseAffine"
        ]

        def hook(images, augmenter, parents, default):
            return augmenter.__class__.__name__ in mask_augmenters

        image_shape = image.shape
        mask_shape = mask.shape

        det = augmentation.to_deterministic()
        image = det.augment_image(image)

        mask = det.augment_image(mask.astype(np.uint8),
                                 hooks=imgaug.HooksImages(activator=hook))

        assert image.shape == image_shape, "Augmentation shouldn't change image size"
        assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
        # Change mask back to bool
        mask = mask.astype(np.bool)

    _idx = np.sum(mask, axis=(0, 1)) > 0
    mask = mask[:, :, _idx]
    class_ids = class_ids[_idx]

    bbox = utils.extract_bboxes(mask)

    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]
                                                ["source"]]
    active_class_ids[source_class_ids] = 1

    if use_mini_mask:
        mask = utils.minimize_mask(
            bbox, mask, tuple(hyper_parameters.FLAGS.MINI_MASK_SHAPE))

    image_meta = utils.compose_image_meta(image_id, origin_shape, image.shape,
                                          window, scale, active_class_ids)

    return image, image_meta, class_ids, bbox, mask
Beispiel #8
0
def load_image_gt(config,
                  image_id,
                  image,
                  depth,
                  mask,
                  class_ids,
                  parameters,
                  augment=False,
                  use_mini_mask=True):
    """Load and return ground truth data for an image (image, mask, bounding boxes).

    augment: If true, apply random image augmentation. Currently, only
        horizontal flipping is offered.
    use_mini_mask: If False, returns full-size masks that are the same height
        and width as the original image. These can be big, for example
        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
        224x224 and are generated by extracting the bounding box of the
        object and resizing it to MINI_MASK_SHAPE.

    Returns:
    image: [height, width, 3]
    shape: the original shape of the image before resizing and cropping.
    class_ids: [instance_count] Integer class IDs
    bbox: [instance_count, (y1, x1, y2, x2)]
    mask: [height, width, instance_count]. The height and width are those
        of the image unless use_mini_mask is True, in which case they are
        defined in MINI_MASK_SHAPE.
    """
    ## Load image and mask
    shape = image.shape
    image, window, scale, padding = utils.resize_image(
        image,
        min_dim=config.IMAGE_MAX_DIM,
        max_dim=config.IMAGE_MAX_DIM,
        padding=config.IMAGE_PADDING)

    mask = utils.resize_mask(mask, scale, padding)

    ## Random horizontal flips.
    if augment and False:
        if np.random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)
            depth = np.fliplr(depth)
            pass
        pass

    ## Bounding boxes. Note that some boxes might be all zeros
    ## if the corresponding mask got cropped out.
    ## bbox: [num_instances, (y1, x1, y2, x2)]
    bbox = utils.extract_bboxes(mask)
    ## Resize masks to smaller size to reduce memory usage
    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)
        pass

    active_class_ids = np.ones(config.NUM_CLASSES, dtype=np.int32)
    ## Image meta data
    image_meta = utils.compose_image_meta(image_id, shape, window,
                                          active_class_ids)

    if config.NUM_PARAMETER_CHANNELS > 0:
        if config.OCCLUSION:
            depth = utils.resize_mask(depth, scale, padding)
            mask_visible = utils.minimize_mask(bbox, depth,
                                               config.MINI_MASK_SHAPE)
            mask = np.stack([mask, mask_visible], axis=-1)
        else:
            depth = np.expand_dims(depth, -1)
            depth = utils.resize_mask(depth, scale, padding).squeeze(-1)
            depth = utils.minimize_depth(bbox, depth, config.MINI_MASK_SHAPE)
            mask = np.stack([mask, depth], axis=-1)
            pass
        pass
    return image, image_meta, class_ids, bbox, mask, parameters
Beispiel #9
0
def load_image_gt(dataset, config, image_id, augment=False, augmentation=None, use_mini_mask=False):
    """Load and return ground truth data for an image (image, mask, bounding boxes).
    augment: (deprecated. Use augmentation instead). If true, apply random
        image augmentation. Currently, only horizontal flipping is offered.
    augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
        For example, passing imgaug.augmenters.Fliplr(0.5) flips images
        right/left 50% of the time.
    use_mini_mask: If False, returns full-size masks that are the same height
        and width as the original image. These can be big, for example
        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
        224x224 and are generated by extracting the bounding box of the
        object and resizing it to MINI_MASK_SHAPE.
    Returns:
    image: [height, width, 3]
    shape: the original shape of the image before resizing and cropping.
    class_ids: [instance_count] Integer class IDs
    bbox: [instance_count, (y1, x1, y2, x2)]
    mask: [height, width, instance_count]. The height and width are those
        of the image unless use_mini_mask is True, in which case they are
        defined in MINI_MASK_SHAPE.
    """

    # Load image and mask
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    original_shape = image.shape
    image, window, scale, padding, crop = utils.resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,
        min_scale=config.IMAGE_MIN_SCALE,
        max_dim=config.IMAGE_MAX_DIM,
        mode=config.IMAGE_RESIZE_MODE
    )
    mask = utils.resize_mask(mask, scale, padding, crop)

    # Random horizontal flips.
    # TODO: will be removed in a future update in favor of augmentation

    if augment:
        logging.warning("'augment' id deprecated. Use 'augmentation' instead.")
        if random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)

    # Augmentation
    # This requires the imgaug lib (https://github.com/aleju/imgaug)

    if augmentation:
        import imgaug

        # Augmenters that are safe to apply to masks
        # Some, such as Affine, have settings that make them unsafe, so always
        # test your augmentation on masks
        MASK_AUGMENTS = ["Sequential", "SomeOf", "OneOf", "Sometimes",
                         "Fliplr", 'Flipud', 'CropAndPad', "Affine", "PiecewiseAffine"]

        def hook(images, augmenter, parents, default):
            """Determines which augmenters to apply to masks."""
            return augmenter.__class__.__name__ in MASK_AUGMENTS

        # Store shapes before augmentation to compare
        image_shape = image.shape
        mask_shape = mask.shape
        # Make augmenters deterministic to apply similarly to images and masks
        det = augmentation.to_deterministic()

        image = det.augment_image(image)
        # Change mask to np.uint because imgaug does not support np.bool
        mask = det.augment_image(mask.astype(np.uint8),
                                 hooks=imgaug.HooksImage(activator=hook))
        # Verify that shapes didn't change
        det = augmentation.to_deterministic()
        image = det.augment_image(image)
        # Change mask to np.uint8 because imgaug doesn't support np.bool
        mask = det.augment_image(mask.astype(np.uint8),
                                 hooks=imgaug.HooksImage(activator=hook))
        # Verify that shapes didn't change
        assert image.shape == image_shape, "Augmentation shouldn't change image size"
        assert mask.shape == mask_shape, "Augmentation shouldn;t change mask size"

        # Change mask back to bool
        mask = mask.astype(np.bool)

    # Note that some boxes might be all zeros if the corresponding mask got cropped out.
    # and here is to filter them out
    _idx = np.sum(mask, axis=(0, 1)) > 0
    mask = mask[:, :, _idx]
    class_ids = class_ids[_idx]
    # Bounding boxes. Note that some boxes might be all zeros
    # if the corresponding mask got cropped out.
    # bbox: [num_instances, (y1, x1, y2, x2)]
    bbox = utils.extract_bboxes(mask)

    # Active classes
    # Different datasets have different classes, so track the
    # classes supported in the dataset of this image.

    active_class_ids = np.zeros([dataset.num_classes], dtype='np.int32')
    source_class_ids = dataset.source_class_ids(dataset.image_info[image_id]["source"])
    active_class_ids[source_class_ids] = 1

    # Resize masks to smaller size to reduce memory usage
    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MNI_MASK_SHAPE)

    # Image meta data
    image_meta = utils.compose_image_meta(image_id, original_shape, image.shape,
                                          window, scale, active_class_ids)

    return image, image_meta, class_ids, bbox, mask
def data_generator(config, shuffle=True, augmentation=None,batch_size=1):
    """
    A generator that returns images and corresponding target class ids,
    bounding box deltas, and masks.

    Returns a Python generator. Upon calling next() on it, the
    generator returns two lists, inputs and outputs. The contents
    of the lists differs depending on the received arguments:
    inputs list:
    - images: [batch, H, W, C]
    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
    - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
    - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
    - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
    - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width
                are those of the image unless use_mini_mask is True, in which
                case they are defined in MINI_MASK_SHAPE.

    outputs list: Usually empty in regular training. But if detection_targets
        is True then the outputs list contains target class_ids, bbox deltas,
        and masks.

    """
    b = 0
    ix = 0
    image_files = glob.glob("./data/train/*.jpg")

    # Anchors
    # [anchor_count, (y1, x1, y2, x2)]
    backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE)
    anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                             config.RPN_ANCHOR_RATIOS,
                                             backbone_shapes,
                                             config.BACKBONE_STRIDES,
                                             config.RPN_ANCHOR_STRIDE)

    while True:
        if shuffle and ix == 0:
            np.random.shuffle(image_files)
        image_path = image_files[ix]

        json_path = image_files[ix].replace("jpg", "json")

        image = load_image(image_path)
        original_shape = image.shape

        mask, class_ids = load_mask(json_path)

        image, window, scale, padding, crop = utils.resize_image(
            image,
            min_dim=config.IMAGE_MIN_DIM,
            min_scale=config.IMAGE_MIN_SCALE,
            max_dim=config.IMAGE_MAX_DIM,
            mode=config.IMAGE_RESIZE_MODE)
        mask = utils.resize_mask(mask, scale, padding, crop)

        # Augmentation
        # This requires the imgaug lib (https://github.com/aleju/imgaug)
        if augmentation:
            import imgaug
            # Augmenters that are safe to apply to masks
            # Some, such as Affine, have settings that make them unsafe, so always
            # test your augmentation on masks
            MASK_AUGMENTERS = ["Sequential", "SomeOf", "OneOf", "Sometimes",
                               "Fliplr", "Flipud", "CropAndPad",
                               "Affine", "PiecewiseAffine"]

            def hook(images, augmenter, parents, default):
                """Determines which augmenters to apply to masks."""
                return augmenter.__class__.__name__ in MASK_AUGMENTERS

            # Store shapes before augmentation to compare
            image_shape = image.shape
            mask_shape = mask.shape
            # Make augmenters deterministic to apply similarly to images and masks
            det = augmentation.to_deterministic()
            image = det.augment_image(image)
            # Change mask to np.uint8 because imgaug doesn't support np.bool
            mask = det.augment_image(mask.astype(np.uint8),
                                     hooks=imgaug.HooksImages(activator=hook))
            # Verify that shapes didn't change
            assert image.shape == image_shape, "Augmentation shouldn't change image size"
            assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
            # Change mask back to bool
            mask = mask.astype(np.bool)

        bbox = utils.extract_bboxes(mask)

        use_mini_mask = True

        if use_mini_mask:
            mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)

        # image_meta is for debug
        image_meta = compose_image_meta(0, original_shape, image.shape,
                                        window, scale, np.ones(len(class_name2idx)))

        # RPN Targets
        rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors,
                                                class_ids, bbox, config)

        if b == 0:
            batch_image_meta = np.zeros(
                (batch_size,) + image_meta.shape, dtype=image_meta.dtype)
            batch_rpn_match = np.zeros(
                [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype)
            batch_rpn_bbox = np.zeros(
                [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype)
            batch_images = np.zeros(
                (batch_size,) + image.shape, dtype=np.float32)
            batch_gt_class_ids = np.zeros(
                (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
            batch_gt_boxes = np.zeros(
                (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32)
            batch_gt_masks = np.zeros(
                (batch_size, mask.shape[0], mask.shape[1],
                 config.MAX_GT_INSTANCES), dtype=mask.dtype)
        # Add to batch
        batch_image_meta[b] = image_meta
        batch_rpn_match[b] = rpn_match[:, np.newaxis]
        batch_rpn_bbox[b] = rpn_bbox
        batch_images[b] = mold_image(image.astype(np.float32), config)
        batch_gt_class_ids[b, :class_ids.shape[0]] = class_ids
        batch_gt_boxes[b, :bbox.shape[0]] = bbox
        batch_gt_masks[b, :, :, :mask.shape[-1]] = mask
        b += 1
        ix = (ix + 1) % len(image_files)
        if b >= batch_size:
            inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox,
                      batch_gt_class_ids, batch_gt_boxes, batch_gt_masks]
            outputs = []

            yield inputs,outputs
            b = 0