Python compose_image_metaの例、utils.compose_image_meta Pythonの例

コード例 #1

0

ファイルを表示

 def mold_inputs(self, images):
     """Takes a list of images and modifies them to the format expected
     as an input to the neural network.
     images: List of image matrices [height,width,depth]. Images can have
         different sizes.
     Returns 3 Numpy matrices:
     molded_images: [N, h, w, 3]. Images resized and normalized.
     image_metas: [N, length of meta data]. Details about each image.
     windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the
         original image (padding excluded).
     """
     molded_images = []
     image_metas = []
     windows = []
     for image in images:
         # Resize image
         # TODO: move resizing to mold_image()
         original_shape = image.shape
         normalize(image, self.config.MEANS, self.config.STD)
         image, scale, window = resize_image(image, self.config.VIEW_SIZE)
         # Build image_meta
         image_meta = compose_image_meta(
             0, original_shape, image.shape, window, scale,
             np.zeros([self.config.CLASSES], dtype=np.int32))
         # Append
         molded_images.append(image)
         windows.append(window)
         image_metas.append(image_meta)
     # Pack into arrays
     molded_images = np.stack(molded_images)
     image_metas = np.stack(image_metas)
     windows = np.stack(windows)
     return molded_images, image_metas, windows

コード例 #2

0

ファイルを表示

ファイル: model.py プロジェクト: datnguyenzzz/faster-rcnn-handgun-detection

    def mold_inputs(self, images):
        molded_images = []
        image_metas = []
        windows = []
        for image in images:
            # Resize image to fit the model expected size
            # TODO: move resizing to mold_image()

            molded_image = resize(
                image, (self.config.IMAGE_MAX_DIM, self.config.IMAGE_MAX_DIM))

            shape = molded_image.shape
            molded_image = molded_image * np.full((shape), 255.0)

            molded_image = utils.mold_image(molded_image, self.config)

            window = (0, 0, self.config.IMAGE_MAX_DIM,
                      self.config.IMAGE_MAX_DIM)
            # Build image_meta
            image_meta = utils.compose_image_meta(
                0, molded_image.shape, window,
                np.zeros([self.config.NUM_CLASSES], dtype=np.int32))
            # Append
            molded_images.append(molded_image)
            windows.append(window)
            image_metas.append(image_meta)
        # Pack into arrays
        molded_images = np.stack(molded_images)
        image_metas = np.stack(image_metas)
        windows = np.stack(windows)
        #print(molded_images.shape)
        #print(image_metas.shape)
        #print(windows.shape)
        return molded_images, image_metas, windows

コード例 #3

0

ファイルを表示

ファイル: data.py プロジェクト: linkchainiii/centernet

def load_image_gt(dataset, config, image_id, augment=False):
    """Load and return ground truth data for an image (image, mask, bounding boxes).
    Returns:
    image: [height, width, 3]
    shape: the original shape of the image before resizing and cropping.
    class_ids: [instance_count] Integer class IDs
    bbox: [instance_count, (y1, x1, y2, x2)]
    """
    # Load image and mask
    image = dataset.load_image(image_id)
    bboxs, class_ids = dataset.load_bbox(image_id)
    original_shape = image.shape
    if augment:
        rand_scales = np.random.choice(config.RANDOM_SCALES)
        image, bboxs = random_crop(image,
                                   bboxs,
                                   rand_scales,
                                   config.VIEW_SIZE,
                                   border=config.BORDER)
        data_rng = np.random.RandomState(123)
        color_jittering(data_rng, image)
        lighting(data_rng, image, 0.1, config.EIG_VAL, config.EIG_VEC)
    normalize(image, config.MEANS, config.STD)
    image, bboxs, scale, windows = pad_same_size(image, bboxs,
                                                 config.VIEW_SIZE)

    valid_bbox_index = np.logical_and(bboxs[:, 0] < bboxs[:, 2],
                                      bboxs[:, 1] < bboxs[:, 3])
    bboxs = bboxs[valid_bbox_index]
    class_ids = class_ids[valid_bbox_index]
    assert bboxs.shape[0] == class_ids.shape[0]

    # Active classes
    # Different datasets have different classes, so track the
    # classes supported in the dataset of this image.
    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]
                                                ["source"]]
    active_class_ids[source_class_ids] = 1
    image_meta = utils.compose_image_meta(image_id, original_shape,
                                          image.shape, windows, scale,
                                          active_class_ids)

    return image, bboxs, class_ids, image_meta

コード例 #4

0

ファイルを表示

    def mold_inputs(self, images):
        """Takes a list of images and modifies them to the format expected
        as an input to the neural network.
        images: List of image matrices [height,width,depth]. Images can have
            different sizes.

        Returns 3 Numpy matrices:
        molded_images: [N, h, w, 3]. Images resized and normalized.
        image_metas: [N, length of meta data]. Details about each image.
        windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the
            original image (padding excluded).
        """
        molded_images = []
        image_metas = []
        windows = []
        for image in images:
            # Resize image
            # TODO: move resizing to mold_image()
            molded_image, window, scale, padding, crop = utils.resize_image(
                image,
                min_dim=self.image_min_dim,
                min_scale=self.image_min_scale,
                max_dim=self.image_max_dim,
                mode=self.image_resize_mode)
            molded_image = utils.mold_image(molded_image,
                                            np.array(self.mean_pixel))
            # Build image_meta
            image_meta = utils.compose_image_meta(
                0, image.shape, molded_image.shape, window, scale,
                np.zeros([self.num_classes], dtype=np.int32))
            # Append
            molded_images.append(molded_image)
            windows.append(window)
            image_metas.append(image_meta)
        # Pack into arrays
        molded_images = np.stack(molded_images)
        image_metas = np.stack(image_metas)
        windows = np.stack(windows)
        return molded_images, image_metas, windows

コード例 #5

0

ファイルを表示

ファイル: data.py プロジェクト: linkchainiii/centernet

def data_generator_flyai(x_data, y_data, config, shuffle=True, augment=False):
    """A generator that returns images and corresponding target class ids,
    bounding box deltas, and masks.
    x_data, y_data: The Dataset object to pick data from FlyaiDataset
    config: The model config object
    shuffle: If True, shuffles the samples before every epoch
    augment:  If true, apply random image augmentation.

    Returns a Python generator. Upon calling next() on it, the
    generator returns two lists, inputs and outputs. The contents
    of the lists differs depending on the received arguments:
    inputs list:
    - images: [batch, H, W, C]
    - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
    - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
    - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
    outputs list: Usually empty in regular training. But if detection_targets
        is True then the outputs list contains target class_ids, bbox deltas,.
    """
    global batch_tl_heatmaps, batch_br_heatmaps, batch_ct_heatmaps, batch_tl_reg,\
        batch_br_reg, batch_ct_reg,batch_mask, batch_tl_tag, batch_br_tag, batch_tag_mask,\
        batch_images, batch_image_metas, batch_gt_boxes, batch_gt_class_ids

    b = 0  # batch item index
    image_index = -1
    image_ids = np.arange(x_data.shape[0])
    error_count = 0
    while True:
        try:
            # Increment index to pick next image. Shuffle if at the start of an epoch.
            image_index = (image_index + 1) % x_data.shape[0]
            if shuffle and image_index == 0:
                np.random.shuffle(image_ids)

            # Get GT bounding boxes
            image_id = image_ids[image_index]
            image_path = x_data[image_id]
            image = cv2.imread(image_path)

            boxes = str(y_data[image_index]).split()
            boxes = [list(map(int, x.split(','))) for x in boxes]
            gt_boxes = [[box[1], box[0], box[3], box[2]] for box in boxes]
            gt_class_ids = [box[4] for box in boxes]
            gt_boxes = np.array(gt_boxes, dtype=np.float32)
            gt_class_ids = np.array(gt_class_ids, dtype=np.int32)

            original_shape = image.shape
            image = np.array(image, dtype=np.float32)
            if augment:
                rand_scales = np.random.choice(config.RANDOM_SCALES)
                image, gt_boxes = random_crop(image,
                                              gt_boxes,
                                              rand_scales,
                                              config.VIEW_SIZE,
                                              border=config.BORDER)
                data_rng = np.random.RandomState(123)
                color_jittering(data_rng, image)
                lighting(data_rng, image, 0.1, config.EIG_VAL, config.EIG_VEC)
            normalize(image, config.MEANS, config.STD)
            image, gt_boxes, scale, windows = pad_same_size(
                image, gt_boxes, config.VIEW_SIZE)

            valid_bbox_index = np.logical_and(gt_boxes[:, 0] < gt_boxes[:, 2],
                                              gt_boxes[:, 1] < gt_boxes[:, 3])
            gt_boxes = gt_boxes[valid_bbox_index]
            gt_class_ids = gt_class_ids[valid_bbox_index]
            assert gt_boxes.shape[0] == gt_class_ids.shape[0]

            # Active classes
            # Different datasets have different classes, so track the
            # classes supported in the dataset of this image.
            active_class_ids = np.ones([config.CLASSES], dtype=np.int32)
            # source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]]
            # active_class_ids[source_class_ids] = 1
            image_meta = utils.compose_image_meta(image_id, original_shape,
                                                  image.shape, windows, scale,
                                                  active_class_ids)

            # image, gt_boxes, gt_class_ids, image_meta = load_image_gt(dataset, config, image_id, augment=augment)

            # Skip images that have no instances. This can happen in cases
            # where we train on a subset of classes and the image doesn't
            # have any of the classes we care about.
            if not np.any(gt_class_ids > 0):
                continue

            # generating the keypoint heatmap
            # tl_heatmaps, br_heatmaps, ct_heatmaps, tl_regrs, br_regrs, ct_regrs, mask, tag_mask, tl_tags, br_tags
            out = np_draw_gaussian(gt_boxes, gt_class_ids, config)

            # Init batch arrays
            if b == 0:
                batch_tl_heatmaps = np.zeros([
                    config.BATCH_SIZE,
                ] + config.OUTPUT_SIZE + [config.CLASSES],
                                             dtype=out[0].dtype)
                batch_br_heatmaps = np.zeros([
                    config.BATCH_SIZE,
                ] + config.OUTPUT_SIZE + [config.CLASSES],
                                             dtype=out[1].dtype)
                batch_ct_heatmaps = np.zeros([
                    config.BATCH_SIZE,
                ] + config.OUTPUT_SIZE + [config.CLASSES],
                                             dtype=out[2].dtype)
                batch_tl_reg = np.zeros([
                    config.BATCH_SIZE,
                ] + config.OUTPUT_SIZE + [2],
                                        dtype=out[3].dtype)
                batch_br_reg = np.zeros([
                    config.BATCH_SIZE,
                ] + config.OUTPUT_SIZE + [2],
                                        dtype=out[4].dtype)
                batch_ct_reg = np.zeros([
                    config.BATCH_SIZE,
                ] + config.OUTPUT_SIZE + [2],
                                        dtype=out[5].dtype)
                batch_mask = np.zeros([
                    config.BATCH_SIZE,
                    3,
                ] + config.OUTPUT_SIZE,
                                      dtype=out[6].dtype)
                batch_tag_mask = np.zeros([config.BATCH_SIZE, config.MAX_NUMS],
                                          dtype=out[7].dtype)
                batch_tl_tag = np.zeros([config.BATCH_SIZE, config.MAX_NUMS],
                                        dtype=out[8].dtype)
                batch_br_tag = np.zeros([config.BATCH_SIZE, config.MAX_NUMS],
                                        dtype=out[9].dtype)
                batch_images = np.zeros([
                    config.BATCH_SIZE,
                ] + config.VIEW_SIZE + [3],
                                        dtype=np.float32)
                batch_image_metas = np.zeros(
                    (config.BATCH_SIZE, config.META_SHAPE),
                    dtype=image_meta.dtype)
                batch_gt_class_ids = np.zeros(
                    [config.BATCH_SIZE, config.MAX_NUMS], dtype=np.int64)
                batch_gt_boxes = np.zeros(
                    [config.BATCH_SIZE, config.MAX_NUMS, 4], dtype=np.float32)

            # If more instances than fits in the array, sub-sample from them.
            if gt_boxes.shape[0] > config.MAX_NUMS:
                ids = np.random.choice(np.arange(gt_boxes.shape[0]),
                                       config.MAX_NUMS,
                                       replace=False)
                gt_class_ids = gt_class_ids[ids]
                gt_boxes = gt_boxes[ids]

            # Add to batch
            batch_tl_heatmaps[b] = out[0]
            batch_br_heatmaps[b] = out[1]
            batch_ct_heatmaps[b] = out[2]
            batch_tl_reg[b] = out[3]
            batch_br_reg[b] = out[4]
            batch_ct_reg[b] = out[5]
            batch_mask[b] = out[6]
            batch_tag_mask[b] = out[7]
            batch_tl_tag[b] = out[8]
            batch_br_tag[b] = out[9]
            batch_images[b] = image
            batch_image_metas[b] = image_meta
            batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes
            batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids

            b += 1
            # Batch full?
            if b >= config.BATCH_SIZE:
                inputs = [batch_images, batch_image_metas, batch_tl_heatmaps, batch_br_heatmaps, batch_ct_heatmaps, batch_tl_reg,\
                          batch_br_reg, batch_ct_reg ,batch_mask, batch_tl_tag, batch_br_tag, batch_tag_mask, \
                           batch_gt_boxes, batch_gt_class_ids]
                outputs = []
                yield inputs, outputs
                # start a new batch
                b = 0
        except (GeneratorExit, KeyboardInterrupt):
            raise

コード例 #6

0

ファイルを表示

def load_image_gt(dataset, config, image_id):
    image_id = int(image_id)
    image = dataset.load_image(image_id)  #

    old_shape = 416
    image = resize(image, (config.IMAGE_MAX_DIM, config.IMAGE_MAX_DIM))

    shape = image.shape

    image = image * np.full((shape), 255.0)

    window = (0, 0, self.config.IMAGE_MAX_DIM, self.config.IMAGE_MAX_DIM)

    #print(window)
    #print(padding)

    bboxes = dataset.load_bboxes(image_id)
    class_ids = np.ones([bboxes.shape[0]], dtype=np.int32)

    for i, bbox in enumerate(bboxes):
        y1, x1, y2, x2 = bbox

        x1 = x1 * 1.0 * config.IMAGE_MAX_DIM / old_shape
        x2 = x2 * 1.0 * config.IMAGE_MAX_DIM / old_shape
        y1 = y1 * 1.0 * config.IMAGE_MAX_DIM / old_shape
        y2 = y2 * 1.0 * config.IMAGE_MAX_DIM / old_shape

        bboxes[i] = np.array([y1, x1, y2, x2])
    """
    if random.randint(0,1):
        import imgaug as ia
        import imgaug.augmenters as iaa

        bbs = []
        for bbox in bboxes:
            y1,x1,y2,x2 = bbox
            bbs.append(ia.BoundingBox(x1=x1,y1=y1,x2=x2,y2=y2))

            image_aug, bbs_aug = iaa.Fliplr(1.0)(image=image, bounding_boxes=bbs)

        image = image_aug

        gt_boxes_aug = np.zeros([len(bbs_aug),4], dtype=np.float32)

        for i,bbox in enumerate(bbs_aug):
            #print(bbox.y1,bbox.x1,bbox.y2,bbox.x2)
            y1,x1,y2,x2 = bbox.y1,bbox.x1,bbox.y2,bbox.x2

            gt_boxes_aug[i] = np.array([y1,x1,y2,x2])

        bboxes = gt_boxes_aug

    if random.randint(0,1):
        import imgaug as ia
        import imgaug.augmenters as iaa

        bbs = []
        for bbox in bboxes:
            y1,x1,y2,x2 = bbox
            bbs.append(ia.BoundingBox(x1=x1,y1=y1,x2=x2,y2=y2))

            image_aug, bbs_aug = iaa.Flipud(1.0)(image=image, bounding_boxes=bbs)

        image = image_aug

        gt_boxes_aug = np.zeros([len(bbs_aug),4], dtype=np.float32)

        for i,bbox in enumerate(bbs_aug):
            #print(bbox.y1,bbox.x1,bbox.y2,bbox.x2)
            y1,x1,y2,x2 = bbox.y1,bbox.x1,bbox.y2,bbox.x2

            gt_boxes_aug[i] = np.array([y1,x1,y2,x2])

        bboxes = gt_boxes_aug

    """
    #print(mask.shape)

    #for image meta

    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]
                                                ["source"]]
    active_class_ids[source_class_ids] = 1

    image_meta = utils.compose_image_meta(image_id, shape, window,
                                          active_class_ids)

    return image, image_meta, class_ids, bboxes

コード例 #7

0

ファイルを表示

ファイル: dataset.py プロジェクト: Xlsean/Mask-RCNN

def load_image_gt(dataset,
                  config,
                  image_id,
                  augment=False,
                  use_mini_mask=False):
    """Load and return ground truth data for an image (image, mask, bounding boxes).
    augment: If true, apply random image augmentation. Currently, only
        horizontal flipping is offered.
    use_mini_mask: If False, returns full-size masks that are the same height
        and width as the original image. These can be big, for example
        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
        224x224 and are generated by extracting the bounding box of the
        object and resizing it to MINI_MASK_SHAPE.
    Returns:
    image: [height, width, 3]
    shape: the original shape of the image before resizing and cropping.
    class_ids: [instance_count] Integer class IDs
    bbox: [instance_count, (y1, x1, y2, x2)]
    mask: [height, width, instance_count]. The height and width are those
        of the image unless use_mini_mask is True, in which case they are
        defined in MINI_MASK_SHAPE.
    """
    # Load image and mask
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    shape = image.shape
    image, window, scale, padding = utils.resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,
        max_dim=config.IMAGE_MAX_DIM,
        padding=config.IMAGE_PADDING)
    mask = utils.resize_mask(mask, scale, padding)

    # Random horizontal flips.
    if augment:
        if random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)

    # Bounding boxes. Note that some boxes might be all zeros
    # if the corresponding mask got cropped out.
    # bbox: [num_instances, (y1, x1, y2, x2)]
    bbox = utils.extract_bboxes(mask)

    # Active classes
    # Different datasets have different classes, so track the
    # classes supported in the dataset of this image.
    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]
                                                ["source"]]
    active_class_ids[source_class_ids] = 1

    # Resize masks to smaller size to reduce memory usage
    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)

    # Image meta data
    image_meta = utils.compose_image_meta(image_id, shape, window,
                                          active_class_ids)

    return image, image_meta, class_ids, bbox, mask

コード例 #8

0

ファイルを表示

def load_image_gt(dataset,
                  image_id,
                  augment=False,
                  augmentation=None,
                  use_mini_mask=False):
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    origin_shape = image.shape

    image, window, scale, padding, crop = utils.resize_image(
        image,
        min_dim=hyper_parameters.FLAGS.IMAGE_MIN_DIM,
        min_scale=hyper_parameters.FLAGS.IMAGE_MIN_SCALE,
        max_dim=hyper_parameters.FLAGS.IMAGE_MAX_DIM,
        mode=hyper_parameters.FLAGS.IMAGE_RESIZE_MODE)

    mask = utils.resize_mask(mask, scale, padding, crop)

    if augment:
        logging.warning("'augment' is deprecated. Use 'augmentation' instead.")
        if random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)

    if augmentation:
        import imgaug

        mask_augmenters = [
            "Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", "Flipud",
            "CropAndPad", "Affine", "PiecewiseAffine"
        ]

        def hook(images, augmenter, parents, default):
            return augmenter.__class__.__name__ in mask_augmenters

        image_shape = image.shape
        mask_shape = mask.shape

        det = augmentation.to_deterministic()
        image = det.augment_image(image)

        mask = det.augment_image(mask.astype(np.uint8),
                                 hooks=imgaug.HooksImages(activator=hook))

        assert image.shape == image_shape, "Augmentation shouldn't change image size"
        assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
        # Change mask back to bool
        mask = mask.astype(np.bool)

    _idx = np.sum(mask, axis=(0, 1)) > 0
    mask = mask[:, :, _idx]
    class_ids = class_ids[_idx]

    bbox = utils.extract_bboxes(mask)

    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]
                                                ["source"]]
    active_class_ids[source_class_ids] = 1

    if use_mini_mask:
        mask = utils.minimize_mask(
            bbox, mask, tuple(hyper_parameters.FLAGS.MINI_MASK_SHAPE))

    image_meta = utils.compose_image_meta(image_id, origin_shape, image.shape,
                                          window, scale, active_class_ids)

    return image, image_meta, class_ids, bbox, mask

コード例 #9

0

ファイルを表示

def load_image_gt(config,
                  image_id,
                  image,
                  depth,
                  mask,
                  class_ids,
                  parameters,
                  augment=False,
                  use_mini_mask=True):
    """Load and return ground truth data for an image (image, mask, bounding boxes).

    augment: If true, apply random image augmentation. Currently, only
        horizontal flipping is offered.
    use_mini_mask: If False, returns full-size masks that are the same height
        and width as the original image. These can be big, for example
        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
        224x224 and are generated by extracting the bounding box of the
        object and resizing it to MINI_MASK_SHAPE.

    Returns:
    image: [height, width, 3]
    shape: the original shape of the image before resizing and cropping.
    class_ids: [instance_count] Integer class IDs
    bbox: [instance_count, (y1, x1, y2, x2)]
    mask: [height, width, instance_count]. The height and width are those
        of the image unless use_mini_mask is True, in which case they are
        defined in MINI_MASK_SHAPE.
    """
    ## Load image and mask
    shape = image.shape
    image, window, scale, padding = utils.resize_image(
        image,
        min_dim=config.IMAGE_MAX_DIM,
        max_dim=config.IMAGE_MAX_DIM,
        padding=config.IMAGE_PADDING)

    mask = utils.resize_mask(mask, scale, padding)

    ## Random horizontal flips.
    if augment and False:
        if np.random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)
            depth = np.fliplr(depth)
            pass
        pass

    ## Bounding boxes. Note that some boxes might be all zeros
    ## if the corresponding mask got cropped out.
    ## bbox: [num_instances, (y1, x1, y2, x2)]
    bbox = utils.extract_bboxes(mask)
    ## Resize masks to smaller size to reduce memory usage
    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)
        pass

    active_class_ids = np.ones(config.NUM_CLASSES, dtype=np.int32)
    ## Image meta data
    image_meta = utils.compose_image_meta(image_id, shape, window,
                                          active_class_ids)

    if config.NUM_PARAMETER_CHANNELS > 0:
        if config.OCCLUSION:
            depth = utils.resize_mask(depth, scale, padding)
            mask_visible = utils.minimize_mask(bbox, depth,
                                               config.MINI_MASK_SHAPE)
            mask = np.stack([mask, mask_visible], axis=-1)
        else:
            depth = np.expand_dims(depth, -1)
            depth = utils.resize_mask(depth, scale, padding).squeeze(-1)
            depth = utils.minimize_depth(bbox, depth, config.MINI_MASK_SHAPE)
            mask = np.stack([mask, depth], axis=-1)
            pass
        pass
    return image, image_meta, class_ids, bbox, mask, parameters

コード例 #10

0

ファイルを表示

def load_image_gt(dataset, config, image_id, augment=False, augmentation=None, use_mini_mask=False):
    """Load and return ground truth data for an image (image, mask, bounding boxes).
    augment: (deprecated. Use augmentation instead). If true, apply random
        image augmentation. Currently, only horizontal flipping is offered.
    augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
        For example, passing imgaug.augmenters.Fliplr(0.5) flips images
        right/left 50% of the time.
    use_mini_mask: If False, returns full-size masks that are the same height
        and width as the original image. These can be big, for example
        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
        224x224 and are generated by extracting the bounding box of the
        object and resizing it to MINI_MASK_SHAPE.
    Returns:
    image: [height, width, 3]
    shape: the original shape of the image before resizing and cropping.
    class_ids: [instance_count] Integer class IDs
    bbox: [instance_count, (y1, x1, y2, x2)]
    mask: [height, width, instance_count]. The height and width are those
        of the image unless use_mini_mask is True, in which case they are
        defined in MINI_MASK_SHAPE.
    """

    # Load image and mask
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    original_shape = image.shape
    image, window, scale, padding, crop = utils.resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,
        min_scale=config.IMAGE_MIN_SCALE,
        max_dim=config.IMAGE_MAX_DIM,
        mode=config.IMAGE_RESIZE_MODE
    )
    mask = utils.resize_mask(mask, scale, padding, crop)

    # Random horizontal flips.
    # TODO: will be removed in a future update in favor of augmentation

    if augment:
        logging.warning("'augment' id deprecated. Use 'augmentation' instead.")
        if random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)

    # Augmentation
    # This requires the imgaug lib (https://github.com/aleju/imgaug)

    if augmentation:
        import imgaug

        # Augmenters that are safe to apply to masks
        # Some, such as Affine, have settings that make them unsafe, so always
        # test your augmentation on masks
        MASK_AUGMENTS = ["Sequential", "SomeOf", "OneOf", "Sometimes",
                         "Fliplr", 'Flipud', 'CropAndPad', "Affine", "PiecewiseAffine"]

        def hook(images, augmenter, parents, default):
            """Determines which augmenters to apply to masks."""
            return augmenter.__class__.__name__ in MASK_AUGMENTS

        # Store shapes before augmentation to compare
        image_shape = image.shape
        mask_shape = mask.shape
        # Make augmenters deterministic to apply similarly to images and masks
        det = augmentation.to_deterministic()

        image = det.augment_image(image)
        # Change mask to np.uint because imgaug does not support np.bool
        mask = det.augment_image(mask.astype(np.uint8),
                                 hooks=imgaug.HooksImage(activator=hook))
        # Verify that shapes didn't change
        det = augmentation.to_deterministic()
        image = det.augment_image(image)
        # Change mask to np.uint8 because imgaug doesn't support np.bool
        mask = det.augment_image(mask.astype(np.uint8),
                                 hooks=imgaug.HooksImage(activator=hook))
        # Verify that shapes didn't change
        assert image.shape == image_shape, "Augmentation shouldn't change image size"
        assert mask.shape == mask_shape, "Augmentation shouldn;t change mask size"

        # Change mask back to bool
        mask = mask.astype(np.bool)

    # Note that some boxes might be all zeros if the corresponding mask got cropped out.
    # and here is to filter them out
    _idx = np.sum(mask, axis=(0, 1)) > 0
    mask = mask[:, :, _idx]
    class_ids = class_ids[_idx]
    # Bounding boxes. Note that some boxes might be all zeros
    # if the corresponding mask got cropped out.
    # bbox: [num_instances, (y1, x1, y2, x2)]
    bbox = utils.extract_bboxes(mask)

    # Active classes
    # Different datasets have different classes, so track the
    # classes supported in the dataset of this image.

    active_class_ids = np.zeros([dataset.num_classes], dtype='np.int32')
    source_class_ids = dataset.source_class_ids(dataset.image_info[image_id]["source"])
    active_class_ids[source_class_ids] = 1

    # Resize masks to smaller size to reduce memory usage
    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MNI_MASK_SHAPE)

    # Image meta data
    image_meta = utils.compose_image_meta(image_id, original_shape, image.shape,
                                          window, scale, active_class_ids)

    return image, image_meta, class_ids, bbox, mask