Esempio n. 1
0
    def prepare_image(self, image_id):
        """use config to processing coco image size and others,
        augment: (deprecated. Use augmentation instead). If true, apply random
            image augmentation. Currently, only horizontal flipping is offered.
        augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
            For example, passing imgaug.augmenters.Fliplr(0.5) flips images
            right/left 50% of the time.

        Returns:
        image: [height, width, 3]
        image_meta: the original shape of the image and resizing and cropping.
        class_ids: [instance_count] Integer class IDs
        bbox: [instance_count, (y1, x1, y2, x2)]
        mask: [height, width, instance_count]. The height and width are those
            of the image.
        gt_y: [instance_count]
        gt_x: [instance_count]
        vector_mask: [height, width, 2*class_num]. Set pixel relative center vector.
        """
        # Load image and mask
        image = self.load_image(image_id=image_id)
        mask, class_ids = self.load_mask(image_id=image_id)
        original_shape = image.shape
        # print(original_shape)
        # print(type(original_shape))
        image, window, scale, padding, crop = cocoutils.resize_image(
            image,
            min_dim=self.config.IMAGE_MIN_DIM,
            min_scale=self.config.IMAGE_MIN_SCALE,
            max_dim=self.config.IMAGE_MAX_DIM,
            mode=self.config.IMAGE_RESIZE_MODE)
        mask = cocoutils.resize_mask(mask, scale, padding, 0, crop)
        _idx = np.sum(mask, axis=(0, 1)) > 16
        class_ids = class_ids[_idx]
        if len(class_ids) != 0:
            # print(class_ids)
            # [y, x, num_instance]
            mask = mask[:, :, _idx]
            # print(np.amax(mask, axis=(0, 1)))
            # Bounding boxes. Note that some boxes might be all zeros
            # if the corresponding mask got cropped out.
            # bbox: [num_instances, (y1, x1, y2, x2)]
            bbox = cocoutils.extract_bboxes(mask)
            gt_cy, gt_cx = cocoutils.gravity_center(mask)
            return image, class_ids, bbox, mask, gt_cy, gt_cx
        print("return nothing")
        return None
Esempio n. 2
0
    def generator(self, image_id):
        """Load and return ground truth data for an image (image, mask, bounding boxes).

        augment: (deprecated. Use augmentation instead). If true, apply random
            image augmentation. Currently, only horizontal flipping is offered.
        augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
            For example, passing imgaug.augmenters.Fliplr(0.5) flips images
            right/left 50% of the time.

        Returns:
        image: [height, width, 3]
        # shape: the original shape of the image before resizing and cropping.
        gt: {   class_ids: [instance_count] Integer class IDs,
                bbox: [instance_count, (y1, x1, y2, x2)]
            }
        stride_mask: [height, width, class_num*2]. The height and width are 1/4 those
            of the image.
        """
        # print("=========prepare for gt=========")
        gt = self.prepare_image(image_id,
                                augment=self.augment,
                                augmentation=self.augmentation)
        if gt is None:
            return None
        else:
            image, class_ids, bbox, gt_y, gt_x, gt_mask = gt

            floor_y = np.floor(gt_y / int(self.config.STRIDE)).astype(int)
            floor_x = np.floor(gt_x / int(self.config.STRIDE)).astype(int)

            mean = np.array([0.485, 0.456, 0.406])
            std = np.array([0.229, 0.224, 0.225])
            mean = np.reshape(mean, [1, 1, 3])
            std = np.reshape(std, [1, 1, 3])
            image = (image / 255. - mean) / std
            # print(class_ids)
            # bbox: [num_instances, (y1, x1, y2, x2)]
            gt_top = np.expand_dims((gt_y - bbox[..., 0]), axis=-1)
            gt_bot = np.expand_dims((bbox[..., 2] - gt_y), axis=-1)
            gt_left = np.expand_dims((gt_x - bbox[..., 1]), axis=-1)
            gt_right = np.expand_dims((bbox[..., 3] - gt_x), axis=-1)
            gt_y = np.expand_dims(gt_y, axis=-1)
            gt_x = np.expand_dims(gt_x, axis=-1)
            class_ids = np.expand_dims(class_ids, axis=-1)
            gt_basic = [
                gt_y, gt_x, gt_top, gt_left, gt_bot, gt_right, class_ids
            ]
            gt = np.concatenate(gt_basic, axis=-1)
            instance_num = np.shape(gt)[0]

            # for normal distribution scalar
            scalar_y = []
            scalar_x = []
            # TODO: pshape without 56
            for i in range(instance_num):
                vertical_ones = np.tile(
                    [[1]],
                    [self.config.IMAGE_MAX_DIM // int(self.config.STRIDE), 1])
                # print(np.shape(vertical_ones))
                horizontal_ones = tf.tile(
                    [[1]],
                    [1, self.config.IMAGE_MAX_DIM // int(self.config.STRIDE)])
                pad_l = floor_x[i]
                pad_r = self.config.IMAGE_MAX_DIM // int(
                    self.config.STRIDE) - floor_x[i] - 1
                vertical_pads = np.pad(
                    vertical_ones, [[0, 0], [pad_l, pad_r]],
                    "constant",
                    constant_values=(int(gt_left[i][0] / self.config.STRIDE),
                                     int(gt_right[i][0] / self.config.STRIDE)))
                vertical_pads = np.expand_dims(vertical_pads, -1)
                # print(np.shape(vertical_pads))
                pad_t = floor_y[i]
                pad_b = self.config.IMAGE_MAX_DIM // int(
                    self.config.STRIDE) - floor_y[i] - 1
                horizontal_pads = np.pad(
                    horizontal_ones, [[pad_t, pad_b], [0, 0]],
                    "constant",
                    constant_values=(int(gt_top[i][0] / self.config.STRIDE),
                                     int(gt_bot[i][0] / self.config.STRIDE)))
                horizontal_pads = np.expand_dims(horizontal_pads, -1)
                # print(np.shape(horizontal_pads))
                scalar_y.append(horizontal_pads)
                scalar_x.append(vertical_pads)
            scalar_y = np.concatenate(scalar_y, axis=-1)  # [y, x, num_g]
            scalar_x = np.concatenate(scalar_x, axis=-1)  # [y, x, num_g]
            # print(np.shape(scalar_x))
            padding = [(0, 0), (0, 0), (0, 0)]
            stride_mask = resize_mask(gt_mask, 1 / self.config.STRIDE, padding,
                                      0)
            masks = stride_mask.astype(np.uint8).astype(np.float)

            gravity_y = gt_y[..., 0] / self.config.STRIDE  # [gt_valid,1]
            gravity_x = gt_x[..., 0] / self.config.STRIDE
            gbbox_y1 = bbox[
                ...,
                0] / self.config.STRIDE  # not a rect-box shape, is mask shape
            gbbox_x1 = bbox[..., 1] / self.config.STRIDE
            gbbox_y2 = bbox[..., 2] / self.config.STRIDE
            gbbox_x2 = bbox[..., 3] / self.config.STRIDE
            class_id = class_ids[..., 0]

            # for gravity center
            gravi_yx = gt[..., 0:2] / self.config.STRIDE
            gravi_yx_round_int = np.floor(gravi_yx).astype(int)

            gravity_y = np.reshape(gravity_y, [1, 1, -1])
            gravity_x = np.reshape(gravity_x, [1, 1, -1])

            # for mask part
            gravity_y_tile = np.reshape(gravity_y, [1, 1, -1])
            gravity_x_tile = np.reshape(gravity_x, [1, 1, -1])

            gbbox_y1 = np.reshape(gbbox_y1, [1, 1, -1])
            gbbox_x1 = np.reshape(gbbox_x1, [1, 1, -1])
            gbbox_y2 = np.reshape(gbbox_y2, [1, 1, -1])
            gbbox_x2 = np.reshape(gbbox_x2, [1, 1, -1])
            num_g = np.shape(gbbox_y1)[-1]

            h = np.arange(self.config.IMAGE_MAX_DIM /
                          self.config.STRIDE).astype(np.float32)
            w = np.arange(self.config.IMAGE_MAX_DIM /
                          self.config.STRIDE).astype(np.float32)

            # shape of coordinate equals [h_y_num, w_x_mun]
            [grid_x, grid_y] = tf.meshgrid(w, h)

            grid_y = np.expand_dims(grid_y, -1)
            grid_x = np.expand_dims(grid_x, -1)
            grid_y = np.tile(grid_y, [1, 1, num_g])  # (y, x, num_g)
            grid_x = np.tile(grid_x, [1, 1, num_g])
            dist_l = grid_x - gbbox_x1  # (y, x, num_g)
            dist_r = gbbox_x2 - grid_x
            dist_t = grid_y - gbbox_y1
            dist_b = gbbox_y2 - grid_y
            grid_y_mask = (dist_t > 0.).astype(
                np.float32) * (dist_b > 0.).astype(np.float32)
            grid_x_mask = (dist_l > 0.).astype(
                np.float32) * (dist_r > 0.).astype(np.float32)

            heatmask = grid_y_mask * grid_x_mask * masks  # not a rect-box shape, is mask shape (y, x, num_g)
            dist_l *= heatmask  # not a rect-box shape, is mask shape shape (y, x, num_g)
            dist_r *= heatmask
            dist_t *= heatmask
            dist_b *= heatmask

            loc = np.max(heatmask, axis=-1)  # (y, x) objects mask
            dist_area = (dist_l + dist_r) * (
                dist_t + dist_b
            )  # not a rect-box shape, is mask shape shape (y, x, num_g)
            dist_area_ = dist_area + (1. - heatmask) * 1e8
            dist_area_min = np.min(
                dist_area_, axis=-1,
                keepdims=True)  # small things on the top, background is 1e8
            # not overlap things mask (y, x, num_g)
            dist_mask = np.equal(dist_area, dist_area_min).astype(
                np.float32) * np.expand_dims(loc, axis=-1)

            # gravity_y_tile = dist_mask * gravity_y_tile  # not a rect-box shape, gravity center mask (y, x, num_g)
            # gravity_x_tile = dist_mask * gravity_x_tile
            # gbbox_y1 *= dist_mask  # (y, x, num_g)
            # gbbox_x1 *= dist_mask
            # gbbox_y2 *= dist_mask
            # gbbox_x2 *= dist_mask

            dist_l *= dist_mask  # valid dist l, r, t, b
            dist_r *= dist_mask
            dist_t *= dist_mask
            dist_b *= dist_mask
            dist_l = np.expand_dims(np.max(dist_l, axis=-1),
                                    -1)  # not overlap 1 (y, x)
            dist_r = np.expand_dims(np.max(dist_r, axis=-1), -1)
            dist_t = np.expand_dims(np.max(dist_t, axis=-1), -1)
            dist_b = np.expand_dims(np.max(dist_b, axis=-1), -1)
            gt_reg = np.concatenate([dist_t, dist_l, dist_b, dist_r],
                                    axis=-1).astype(np.float32)

            # for normal distribution
            reduction = np.exp(
                -(((grid_y - gravity_y // 1) / np.sqrt(scalar_y + 1e-8))**2 +
                  ((grid_x - gravity_x // 1) / np.sqrt(scalar_x + 1e-8))**2) /
                (2 * 1**2))
            iou_reduction = np.max(reduction,
                                   axis=2)  # [y, x, num_g] --> [y, x]
            iou_reduction = np.expand_dims(loc * iou_reduction,
                                           axis=-1).astype(np.float32)

            zero_like = np.zeros(
                (int(self.config.IMAGE_MAX_DIM / self.config.STRIDE),
                 int(self.config.IMAGE_MAX_DIM / self.config.STRIDE), 1),
                np.float32)
            gt_keypoints = []
            heatmap_gt = []
            reduction_gt = []
            for i in range(self.num_classes):
                # [num_g, 1]
                exist_i = np.equal(class_id - 1, i)  # pass BG CLASS_ID: 0
                gy = gravi_yx_round_int[..., 0][exist_i]
                gx = gravi_yx_round_int[..., 1][exist_i]
                num_i = np.sum(exist_i.astype(np.int32))
                # [num_g_of_i, y, x]
                reduce_i = reduction[..., exist_i]
                heatmask_i = dist_mask[..., exist_i]
                gbbox_yx_i = gravi_yx_round_int[exist_i, ...]
                # [y, x, 1] heat_map for class i , if null class i, product zero_like_map
                if np.shape(reduce_i)[-1] == 0:
                    reduce_i = zero_like
                    heatmap_i = zero_like
                    gt_keypoints_i = zero_like
                else:
                    reduce_i = np.expand_dims(np.max(reduce_i, axis=2),
                                              axis=-1)
                    heatmap_i = np.expand_dims(np.max(heatmask_i, axis=2),
                                               axis=-1)
                    gt_keypoints_i = csr_matrix(
                        (np.ones(num_i), (gy, gx)),
                        shape=(int(self.config.IMAGE_MAX_DIM /
                                   self.config.STRIDE),
                               int(self.config.IMAGE_MAX_DIM /
                                   self.config.STRIDE))).toarray()
                    gt_keypoints_i = np.expand_dims(gt_keypoints_i, -1)
                reduction_gt.append(reduce_i)
                heatmap_gt.append(heatmap_i)
                gt_keypoints.append(gt_keypoints_i)
            reduction_gt = np.concatenate(reduction_gt,
                                          axis=-1).astype(np.float32)
            heatmap_gt = np.concatenate(heatmap_gt, axis=-1).astype(np.float32)
            gt_keypoints = np.concatenate(gt_keypoints,
                                          axis=-1).astype(np.float32)
            # bg_gt = np.expand_dims(1.0 - np.max(heatmap_gt, axis=2), axis=-1)
            # heatmap_gt = np.concatenate([heatmap_gt, bg_gt], axis=-1).astype(np.float32)

            # plt.imshow(image)
            # plt.show()
            # plt.imshow(iou_reduction[:, :, 0])
            # plt.show()
            # plt.imshow(reduction_gt[:, :, 0]**2)
            # plt.show()
            # size_center = np.max(gt_keypoints, axis=2)
            # print(np.max(gt_keypoints))
            # plt.imshow(size_center)
            # plt.show()
            # print(np.max(dist_r))
            # plt.imshow(dist_l[:,:,0])
            # plt.show()

            # plt.imshow(gt_keypoints[:, :, 0])
            # plt.show()

            gt = np.concatenate([
                gt_reg, iou_reduction, gt_keypoints, reduction_gt, heatmap_gt
            ],
                                axis=-1)

            return image, gt
Esempio n. 3
0
    def prepare_image(self, image_id, augment=False, augmentation=None):
        """use config to processing coco image size and others,
        augment: (deprecated. Use augmentation instead). If true, apply random
            image augmentation. Currently, only horizontal flipping is offered.
        augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
            For example, passing imgaug.augmenters.Fliplr(0.5) flips images
            right/left 50% of the time.

        Returns:
        image: [height, width, 3]
        image_meta: the original shape of the image and resizing and cropping.
        class_ids: [instance_count] Integer class IDs
        bbox: [instance_count, (y1, x1, y2, x2)]
        mask: [height, width, instance_count]. The height and width are those
            of the image.
        gt_y: [instance_count]
        gt_x: [instance_count]
        vector_mask: [height, width, 2*class_num]. Set pixel relative center vector.
        """
        # Load image and mask
        image = self.load_image(image_id)
        mask, class_ids = self.load_mask(image_id)
        # original_shape = image.shape
        # print(original_shape)
        # print(type(original_shape))
        image, window, scale, padding, crop = cocoutils.resize_image(
            image,
            min_dim=self.config.IMAGE_MIN_DIM,
            min_scale=self.config.IMAGE_MIN_SCALE,
            max_dim=self.config.IMAGE_MAX_DIM,
            mode=self.config.IMAGE_RESIZE_MODE)
        mask = cocoutils.resize_mask(mask, scale, padding, 0, crop)

        # Random horizontal flips.
        # TODO: will be removed in a future update in favor of augmentation
        if self.augment:
            logging.warning(
                "'augment' is deprecated. Use 'augmentation' instead.")
            if random.randint(0, 1):
                image = np.fliplr(image)
                mask = np.fliplr(mask)

        # Augmentation
        # This requires the imgaug lib (https://github.com/aleju/imgaug)
        if self.augmentation:
            import imgaug

            # Augmenters that are safe to apply to masks
            # Some, such as Affine, have settings that make them unsafe, so always
            # test your augmentation on masks
            MASK_AUGMENTERS = [
                "Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr",
                "Flipud", "CropAndPad", "Affine", "PiecewiseAffine"
            ]

            def hook(images, augmenter, parents, default):
                """Determines which augmenters to apply to masks."""
                return augmenter.__class__.__name__ in MASK_AUGMENTERS

            # Store shapes before augmentation to compare
            image_shape = image.shape
            mask_shape = mask.shape
            # Make augmenters deterministic to apply similarly to images and masks
            det = augmentation.to_deterministic()
            image = det.augment_image(image)
            # Change mask to np.uint8 because imgaug doesn't support np.bool
            mask = det.augment_image(mask.astype(np.uint8),
                                     hooks=imgaug.HooksImages(activator=hook))
            # Verify that shapes didn't change
            assert image.shape == image_shape, "Augmentation shouldn't change image size"
            assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
            # Change mask back to bool
            mask = mask.astype(np.bool)

        _idx = np.sum(mask, axis=(0, 1)) > 48
        # print(_idx)
        class_ids = class_ids[_idx]
        if len(class_ids) != 0:
            # print(class_ids)
            # [y, x, num_instance]
            mask = mask[:, :, _idx]
            # print(np.amax(mask, axis=(0, 1)))
            # Bounding boxes. Note that some boxes might be all zeros
            # if the corresponding mask got cropped out.
            # bbox: [num_instances, (y1, x1, y2, x2)]
            bbox = cocoutils.extract_bboxes(mask)
            gt_cy, gt_cx = cocoutils.gravity_center(mask)

            # Image meta data
            # image_meta = cocoutils.compose_image_meta(image_id, original_shape, image.shape, window, scale)
            # vector_mask = self.vector_mask(self.num_classes, class_ids, mask, gt_cx, gt_cy, image.shape, bbox)
            return image, class_ids, bbox, gt_cy, gt_cx, mask
        return None
def load_image_gt(dataset,
                  config,
                  image_id,
                  augment=False,
                  augmentation=None,
                  use_mini_mask=False):
    # 载入图片和语义分割效果
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    # print("\nbefore:",image_id,np.shape(mask),np.shape(class_ids))
    # 原始shape
    original_shape = image.shape
    # 获得新图片,原图片在新图片中的位置,变化的尺度,填充的情况等
    image, window, scale, padding, crop = utils.resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,
        min_scale=config.IMAGE_MIN_SCALE,
        max_dim=config.IMAGE_MAX_DIM,
        mode=config.IMAGE_RESIZE_MODE)
    mask = utils.resize_mask(mask, scale, padding, crop)
    # print("\nafter:",np.shape(mask),np.shape(class_ids))
    # print(np.shape(image),np.shape(mask))
    # 可以把图片进行翻转
    if augment:
        logging.warning("'augment' is deprecated. Use 'augmentation' instead.")
        if random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)

    if augmentation:
        import imgaug
        # 可用于图像增强
        MASK_AUGMENTERS = [
            "Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", "Flipud",
            "CropAndPad", "Affine", "PiecewiseAffine"
        ]

        def hook(images, augmenter, parents, default):
            """Determines which augmenters to apply to masks."""
            return augmenter.__class__.__name__ in MASK_AUGMENTERS

        image_shape = image.shape
        mask_shape = mask.shape
        det = augmentation.to_deterministic()
        image = det.augment_image(image)
        mask = det.augment_image(mask.astype(np.uint8),
                                 hooks=imgaug.HooksImages(activator=hook))
        assert image.shape == image_shape, "Augmentation shouldn't change image size"
        assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
        mask = mask.astype(np.bool)
    # 检漏,防止某些层内部实际上不存在语义分割情况
    _idx = np.sum(mask, axis=(0, 1)) > 0

    # print("\nafterer:",np.shape(mask),np.shape(_idx))
    mask = mask[:, :, _idx]
    class_ids = class_ids[_idx]
    # 找到mask对应的box
    bbox = utils.extract_bboxes(mask)

    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]
                                                ["source"]]
    active_class_ids[source_class_ids] = 1

    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)

    # 生成Image_meta
    image_meta = utils.compose_image_meta(image_id, original_shape,
                                          image.shape, window, scale,
                                          active_class_ids)

    return image, image_meta, class_ids, bbox, mask
Esempio n. 5
0
File: test1.py Progetto: zlf1993/PCT
final_masks = np.zeros([
    int(Config.IMAGE_MAX_DIM // Config.STRIDE),
    int(Config.IMAGE_MAX_DIM // Config.STRIDE), num_select
], np.float32)

for i in range(Config.NUM_CLASSES):
    exist_i = np.equal(select_class_id, i)  # [0,1,...]
    exist_int = exist_i.astype(int)
    index = np.where(exist_int > 0)[0]  # [a, b, 5, 8..]
    num_i = np.sum(exist_int)
    masks = ccc(Config, num_select, index, select_bbox, exist_i,
                class_seg[..., i], num_i, pic_preg)
    final_masks = final_masks + masks

# TODO: resize masks
padding = [(0, 0), (0, 0), (0, 0)]
stride_mask = resize_mask(final_masks, 4, padding, 0)
stride_mask = cv2.medianBlur(stride_mask, 5)
masks = stride_mask.astype(np.uint8).astype(np.float)
if len(np.shape(masks)) is 2:
    masks = np.expand_dims(masks, -1)

visualize.display_instances(image,
                            select_center * 4 + 2,
                            select_bbox * 4,
                            masks,
                            select_class_id + 1,
                            class_names,
                            select_scores,
                            show_mask=True)
Esempio n. 6
0
File: coco.py Progetto: zlf1993/tf
    def generator(self, image_id):
        """Load and return ground truth data for an image (image, mask, bounding boxes).

        augment: (deprecated. Use augmentation instead). If true, apply random
            image augmentation. Currently, only horizontal flipping is offered.
        augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
            For example, passing imgaug.augmenters.Fliplr(0.5) flips images
            right/left 50% of the time.

        Returns:
        image: [height, width, 3]
        # shape: the original shape of the image before resizing and cropping.
        gt: {   class_ids: [instance_count] Integer class IDs,
                bbox: [instance_count, (y1, x1, y2, x2)]
            }
        stride_mask: [height, width, class_num*2]. The height and width are 1/4 those
            of the image.
        """
        # print("=========prepare for gt=========")
        gt = self.prepare_image(image_id,
                                augment=self.augment,
                                augmentation=self.augmentation)
        if gt is None:
            return None
        else:
            image, class_ids, bbox, gt_y, gt_x, gt_mask = gt
            # print("gt_y: ", gt_y)
            # print("gt_x: ", gt_x)

            floor_y = np.floor(gt_y / 8).astype(int)
            floor_x = np.floor(gt_x / 8).astype(int)

            mean = np.array([0.485, 0.456, 0.406])
            std = np.array([0.229, 0.224, 0.225])
            mean = np.reshape(mean, [1, 1, 3])
            std = np.reshape(std, [1, 1, 3])
            image = (image / 255. - mean) / std
            # print(class_ids)
            # bbox: [num_instances, (y1, x1, y2, x2)]
            gt_top = np.expand_dims((gt_y - bbox[..., 0]), axis=-1)
            gt_bot = np.expand_dims((bbox[..., 2] - gt_y), axis=-1)
            gt_left = np.expand_dims((gt_x - bbox[..., 1]), axis=-1)
            gt_right = np.expand_dims((bbox[..., 3] - gt_x), axis=-1)
            gt_y = np.expand_dims(gt_y, axis=-1)
            gt_x = np.expand_dims(gt_x, axis=-1)
            class_ids = np.expand_dims(class_ids, axis=-1)
            gt_basic = [gt_y, gt_x, bbox, class_ids]
            # gt_basic = [gt_y, gt_x, gt_top, gt_left, gt_bot, gt_right, class_ids]
            gt = np.concatenate(gt_basic, axis=-1)
            instance_num = np.shape(gt)[0]

            # for normal distribution scalar
            scalar_y = []
            scalar_x = []
            # TODO: pshape without 56
            for i in range(instance_num):
                vertical_ones = np.tile([[1]],
                                        [self.config.IMAGE_MAX_DIM // 8, 1])
                # print(np.shape(vertical_ones))
                horizontal_ones = tf.tile([[1]],
                                          [1, self.config.IMAGE_MAX_DIM // 8])
                pad_l = floor_x[i]
                pad_r = self.config.IMAGE_MAX_DIM // 8 - floor_x[i] - 1
                vertical_pads = np.pad(vertical_ones, [[0, 0], [pad_l, pad_r]],
                                       "constant",
                                       constant_values=(gt_left[i][0] // 8,
                                                        gt_right[i][0] // 8))
                vertical_pads = np.expand_dims(vertical_pads, -1)
                # print(np.shape(vertical_pads))
                pad_t = floor_y[i]
                pad_b = self.config.IMAGE_MAX_DIM // 8 - floor_y[i] - 1
                horizontal_pads = np.pad(horizontal_ones,
                                         [[pad_t, pad_b], [0, 0]],
                                         "constant",
                                         constant_values=(gt_top[i][0] // 8,
                                                          gt_bot[i][0] // 8))
                horizontal_pads = np.expand_dims(horizontal_pads, -1)
                # print(np.shape(horizontal_pads))
                scalar_y.append(horizontal_pads)
                scalar_x.append(vertical_pads)
            scalar_y = np.concatenate(scalar_y, axis=-1)  # [y, x, num_g]
            scalar_x = np.concatenate(scalar_x, axis=-1)  # [y, x, num_g]
            # print(np.shape(scalar_x))

            if instance_num <= self.config.MAX_GT_INSTANCES:
                gt = np.pad(gt,
                            ((0, self.config.MAX_GT_INSTANCES - instance_num),
                             (0, 0)),
                            mode='constant')
            else:
                gt = gt[:self.config.MAX_GT_INSTANCES, ...]
            if instance_num <= self.config.MAX_GT_INSTANCES:
                masks = np.pad(
                    gt_mask,
                    ((0, 0), (0, 0),
                     (0, self.config.MAX_GT_INSTANCES - instance_num)),
                    mode='constant')
                scalar_y = np.pad(
                    scalar_y,
                    ((0, 0), (0, 0),
                     (0, self.config.MAX_GT_INSTANCES - instance_num)),
                    mode='constant')
                scalar_x = np.pad(
                    scalar_x,
                    ((0, 0), (0, 0),
                     (0, self.config.MAX_GT_INSTANCES - instance_num)),
                    mode='constant')
            else:
                masks = gt_mask[:, :, 0:self.config.MAX_GT_INSTANCES]
                scalar_y = scalar_y[:, :, 0:self.config.MAX_GT_INSTANCES]
                scalar_x = scalar_x[:, :, 0:self.config.MAX_GT_INSTANCES]

            padding = [(0, 0), (0, 0), (0, 0)]
            stride_mask = resize_mask(masks, 0.125, padding, 0)
            masks = stride_mask.astype(np.uint8).astype(np.float)

            # TODO: test and visual for img, mask and gaussion kernal
            # plt.imshow(image)
            # plt.show()
            # plt.imshow(masks[:, :, 0])
            # plt.show()
            # gravity_y = np.reshape(gt_y // 4, [1, 1, instance_num])
            # gravity_x = np.reshape(gt_x // 4, [1, 1, instance_num])
            # h = np.arange(56)
            # w = np.arange(56)
            # [grid_x, grid_y] = np.meshgrid(w, h)
            # grid_x = np.expand_dims(grid_x, -1)
            # grid_y = np.expand_dims(grid_y, -1)
            # reduction = np.exp(-(((grid_y - gravity_y) / np.sqrt(scalar_y)) ** 2 +
            #                      ((grid_x - gravity_x) / np.sqrt(scalar_x)) ** 2) / (2 * 1 ** 2))
            # reduction = np.amax(reduction, -1)
            # plt.imshow(reduction)
            # plt.show()

            return image, gt, masks, scalar_y, scalar_x
Esempio n. 7
0
def parse_fn(image_id,
             dataset,
             anchors_path,
             augmentation=None,
             dtype=np.float32,
             max_num_boxes_per_image=20,
             image_size=416):
    """Load and return ground truth data for an image (image, mask, bounding boxes)."""

    image = dataset.load_image(image_id)
    # original_shape = image.shape
    image, window, scale, padding, crop = utils.resize_image(
        image, min_dim=0, min_scale=0, max_dim=image_size, mode='square')

    mask, class_ids = dataset.load_mask(image_id)

    mask = utils.resize_mask(mask, scale, padding, crop)

    if augmentation:
        import imgaug

        # Augmenters that are safe to apply to masks
        # Some, such as Affine, have settings that make them unsafe, so always
        # test your augmentation on masks
        MASK_AUGMENTERS = [
            "Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", "Flipud",
            "CropAndPad", "Affine", "PiecewiseAffine"
        ]

        def hook(images, augmenter, parents, default):
            """Determines which augmenters to apply to masks."""
            return augmenter.__class__.__name__ in MASK_AUGMENTERS

        # Store shapes before augmentation to compare
        image_shape = image.shape
        mask_shape = mask.shape
        # Make augmenters deterministic to apply similarly to images and masks
        det = augmentation.to_deterministic()
        image = det.augment_image(image)
        # Change mask to np.uint8 because imgaug doesn't support np.bool
        mask = det.augment_image(mask.astype(np.uint8),
                                 hooks=imgaug.HooksImages(activator=hook))
        # Verify that shapes didn't change
        assert image.shape == image_shape, "Augmentation shouldn't change image size"
        assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
        # Change mask back to bool
        mask = mask.astype(np.bool)

    # Note that some boxes might be all zeros if the corresponding mask got cropped out.
    # and here is to filter them out
    _idx = np.sum(mask, axis=(0, 1)) > 0
    mask = mask[:, :, _idx]
    class_ids = class_ids[_idx]
    # Bounding boxes. Note that some boxes might be all zeros
    # if the corresponding mask got cropped out.
    # bbox: [num_instances, (y1, x1, y2, x2)]
    bbox = utils.extract_bboxes(mask)

    if mask.shape[-1] > max_num_boxes_per_image:
        ids = np.random.choice(np.arange(mask.shape[-1]),
                               max_num_boxes_per_image,
                               replace=False)
        class_ids = class_ids[ids]
        bbox = bbox[ids, :]

    # confs = np.ones((bbox.shape[0], 1), dtype=dtype)
    # bbox = np.concatenate([bbox, confs], axis=-1)

    # Active classes
    # Different datasets have different classes, so track the
    # classes supported in the dataset of this image.
    # active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    # source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]]
    # active_class_ids[source_class_ids] = 1

    # image_meta = utils.compose_image_meta(image_id, original_shape, image.shape,
    #                                       window, scale, active_class_ids)
    # image_meta.astype(dtype)

    # gt_mask = np.zeros((mask.shape[0], mask.shape[1], 20), mask.dtype)
    gt_class_ids = np.zeros(max_num_boxes_per_image, class_ids.dtype)
    gt_bbox = np.zeros((max_num_boxes_per_image, bbox.shape[1]), bbox.dtype)
    # gt_data = np.zeros((max_num_boxes_per_image, bbox.shape[1] + dataset.num_classes), dtype=dtype)

    if class_ids.shape[0] > 0:
        gt_class_ids[:class_ids.shape[0]] = class_ids
        # gt_mask[:, :, :mask.shape[-1]] = mask
        gt_bbox[:bbox.shape[0], :] = bbox

    gt_class_ids = np.expand_dims(gt_class_ids, axis=-1).astype(dtype)

    gt_bbox = np.concatenate([gt_bbox, gt_class_ids], axis=-1)

    anchors = utils.get_anchors(anchors_path)
    anchors = np.array(anchors, dtype=np.float32)

    boxes_yx = (gt_bbox[:, 0:2] + gt_bbox[:, 2:4]) // 2
    boxes_hw = gt_bbox[:, 2:4] - gt_bbox[:, 0:2]

    gt_bbox[:, 0] = boxes_yx[..., 1] / image_size
    gt_bbox[:, 1] = boxes_yx[..., 0] / image_size
    gt_bbox[:, 2] = boxes_hw[..., 1] / image_size
    gt_bbox[:, 3] = boxes_hw[..., 0] / image_size

    hw = np.expand_dims(boxes_hw, -2)
    anchors_broad = np.expand_dims(anchors, 0)

    anchor_maxes = anchors_broad / 2.
    anchor_mins = -anchor_maxes
    box_maxes = hw / 2.
    box_mins = -box_maxes
    intersect_mins = np.maximum(box_mins, anchor_mins)
    intersect_maxes = np.minimum(box_maxes, anchor_maxes)
    intersect_hw = np.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_hw[..., 0] * intersect_hw[..., 1]
    box_area = hw[..., 0] * hw[..., 1]
    anchor_area = anchors[..., 0] * anchors[..., 1]
    iou = intersect_area / (box_area + anchor_area - intersect_area)
    best_anchors = np.argmax(iou, axis=-1)

    # TODO: write a function to calculate the stride automatically.
    large_obj_image_size = image_size // 32
    medium_obj_image_size = image_size // 16
    small_obj_image_size = image_size // 8

    large_obj_detectors, large_obj_boxes = get_detector_heatmap_each_scale(
        gt_bbox,
        best_anchors_=best_anchors,
        anchors_mask=[6, 7, 8],
        grid_size=(large_obj_image_size, large_obj_image_size),
        num_classes=dataset.num_classes)

    medium_obj_detectors, medium_obj_boxes = get_detector_heatmap_each_scale(
        gt_bbox,
        best_anchors_=best_anchors,
        anchors_mask=[3, 4, 5],
        grid_size=(medium_obj_image_size, medium_obj_image_size),
        num_classes=dataset.num_classes)

    small_obj_detectors, small_obj_boxes = get_detector_heatmap_each_scale(
        gt_bbox,
        best_anchors_=best_anchors,
        anchors_mask=[0, 1, 2],
        grid_size=(small_obj_image_size, small_obj_image_size),
        num_classes=dataset.num_classes)

    yolo_true_data = np.concatenate(
        [large_obj_detectors, medium_obj_detectors, small_obj_detectors],
        axis=0).reshape([-1])
    yolo_true_boxes = np.concatenate(
        [large_obj_boxes, medium_obj_boxes, small_obj_boxes],
        axis=0).reshape([-1])

    yolo_gt = np.concatenate([yolo_true_data, yolo_true_boxes], axis=-1)

    return image.astype(dtype) / 255., yolo_gt.astype(dtype)
Esempio n. 8
0
final_masks = np.zeros([
    int(Config.IMAGE_MAX_DIM // Config.STRIDE),
    int(Config.IMAGE_MAX_DIM // Config.STRIDE), num_select
], np.float32)

for i in range(Config.NUM_CLASSES):
    exist_i = np.equal(select_class_id, i)  # [0,1,...]
    exist_int = exist_i.astype(int)
    index = np.where(exist_int > 0)[0]  # [a, b, 5, 8..]
    num_i = np.sum(exist_int)
    masks = ccc(Config, num_select, index, select_bbox, exist_i,
                class_seg[..., i], num_i, pic_preg)
    final_masks = final_masks + masks

# TODO: resize masks
padding = [(0, 0), (0, 0), (0, 0)]
stride_mask = resize_mask(final_masks, Config.STRIDE, padding, 0)
stride_mask = cv2.medianBlur(stride_mask, 5)
masks = stride_mask.astype(np.uint8).astype(np.float)
if len(np.shape(masks)) is 2:
    masks = np.expand_dims(masks, -1)
class_names = {0: "bg", 1: 'person', 2: "car"}
visualize.display_instances(image,
                            select_center * 4 + 2,
                            select_bbox * 4,
                            masks,
                            select_class_id + 1,
                            class_names,
                            select_scores,
                            show_mask=True)
Esempio n. 9
0
def load_image_gt(dataset,
                  config,
                  image_id,
                  augment=False,
                  use_mini_mask=False):
    """Load and return ground truth data for an image (image, mask, bounding boxes).

    augment: If true, apply random image augmentation. Currently, only
        horizontal flipping is offered.
    use_mini_mask: If False, returns full-size masks that are the same height
        and width as the original image. These can be big, for example
        1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
        224x224 and are generated by extracting the bounding box of the
        object and resizing it to MINI_MASK_SHAPE.

    Returns:
    image: [height, width, 3]
    shape: the original shape of the image before resizing and cropping.
    class_ids: [instance_count] Integer class IDs
    bbox: [instance_count, (y1, x1, y2, x2)]
    mask: [height, width, instance_count]. The height and width are those
        of the image unless use_mini_mask is True, in which case they are
        defined in MINI_MASK_SHAPE.
    """
    # Load image and mask
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    shape = image.shape
    image, window, scale, padding = utils.resize_image(
        image,
        min_dim=config.TRAIN.IMAGE_MIN_DIM,
        max_dim=config.TRAIN.IMAGE_MAX_DIM,
        padding=config.TRAIN.IMAGE_PADDING)
    mask = utils.resize_mask(mask, scale, padding)

    # Random horizontal flips.
    if augment:
        if random.randint(0, 1):
            image = np.fliplr(image)
            mask = np.fliplr(mask)

    # Bounding boxes. Note that some boxes might be all zeros
    # if the corresponding mask got cropped out.
    # bbox: [num_instances, (y1, x1, y2, x2)]
    bbox = utils.extract_bboxes(mask)

    # Active classes
    # Different datasets have different classes, so track the
    # classes supported in the dataset of this image.
    active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
    source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]
                                                ["source"]]
    active_class_ids[source_class_ids] = 1

    # Resize masks to smaller size to reduce memory usage
    if use_mini_mask:
        mask = utils.minimize_mask(bbox, mask, config.MRCNN.MINI_MASK_SHAPE)

    # Image meta data
    image_meta = compose_image_meta(image_id, shape, window, active_class_ids)

    return image, image_meta, class_ids, bbox, mask
Esempio n. 10
0
def load_image_gt(dataset, config, image_id, augment=False, use_mini_mask=False):
    # Load image and mask
    image_name = dataset.image_info[str(image_id)]['image_name']
    # print(image_name)
    image = dataset.load_image(image_id)
    shape = image.shape
    image, window, scale, padding = resize_image(
        image,
        min_dim=config.IMAGE_MIN_DIM,
        max_dim=config.IMAGE_MAX_DIM,
        padding=config.IMAGE_PADDING)
    image_meta = compose_image_meta(image_id, shape, window)

    thing_mask, thing_class_ids, stuff_mask, stuff_class_ids, influence_mask, influence_class_ids = dataset.load_mask(image_id)
    thing_mask = resize_mask(thing_mask, scale, padding)  # 1024
    stuff_mask = resize_mask(stuff_mask, scale, padding)  # 1024
    influence_mask = resize_mask(influence_mask, scale, padding)  # 1024
    influence_mask = resize_map(influence_mask, 1 / 8)  # 1024 -> 128
    # Resize masks to smaller size to reduce memory usage
    thing_bbox = extract_bboxes(thing_mask)
    stuff_bbox = extract_bboxes(stuff_mask)
    influence_bbox = extract_bboxes(influence_mask)

    if use_mini_mask:
        thing_mask = minimize_mask(
            thing_bbox, thing_mask, config.MINI_MASK_SHAPE)
        stuff_mask = minimize_mask(
            stuff_bbox, stuff_mask, config.MINI_MASK_SHAPE)

    segmentation = skimage.io.imread(os.path.join(dataset.annotation_dir, image_name.replace("jpg", "png")))

    semantic_label = np.zeros_like(segmentation)
    segmentation_instance_id_map=rgb2id(segmentation)
    instance_id_list=list(dataset.image_info[str(image_id)]['instances'].keys())
    for instance_id in instance_id_list:
        instance=dataset.image_info[str(image_id)]['instances'][instance_id]
        instance_mask=segmentation_instance_id_map==int(instance_id)
        semantic_label[instance_mask]=dataset.category_info[str(instance['category_id'])]['class_id']
    semantic_label=semantic_label[:,:,0]

    semantic_label_h = semantic_label.shape[0]
    semantic_label_w = semantic_label.shape[1]
    semantic_label_scale = min(500 / semantic_label_h, 500 / semantic_label_w)
    semantic_label = scipy.misc.imresize(semantic_label, (round(semantic_label_h * semantic_label_scale), round(semantic_label_w * semantic_label_scale)), interp="nearest")


    h, w = semantic_label.shape[:2]
    top_pad = (500 - h) // 2
    bottom_pad = 500 - h - top_pad
    left_pad = (500 - w) // 2
    right_pad = 500 - w - left_pad
    padding = [(top_pad, bottom_pad), (left_pad, right_pad)]
    semantic_label = np.pad(semantic_label, padding, mode='constant', constant_values=0)

    image_info = dataset.image_info[str(image_id)]

    # Random horizontal flips.
    if augment:
        if random.randint(0, 1):
            image = np.fliplr(image)
            thing_mask = np.fliplr(thing_mask)
            semantic_label = np.fliplr(semantic_label)
            segmentation = np.fliplr(segmentation)
    return image, image_meta, thing_class_ids, thing_bbox, thing_mask, stuff_class_ids, stuff_bbox, stuff_mask, \
           semantic_label, segmentation, image_info, influence_class_ids, influence_bbox, influence_mask