コード例 #1
0
ファイル: app_scene.py プロジェクト: cheverebe/Arkanoid-CV
    def _adapt_to_screen(self):
        screen_resolution = ScreenUtils.get_screen_resolution()
        screen_aspect_ratio = float(screen_resolution[0]) / screen_resolution[1]
        scene_aspect_ratio = float(self.dimensions[0]) / self.dimensions[1]

        if screen_aspect_ratio < scene_aspect_ratio:
            self.resize_factor = screen_resolution[0] / float(self.dimensions[0])
        else:
            self.resize_factor = screen_resolution[1] / float(self.dimensions[1])

        self.background = ImageUtils.resize_image(self.background, self.resize_factor)
        self.dimensions = self.background.shape[:2]
コード例 #2
0
ファイル: bbox_utils.py プロジェクト: wanxinjun/mask_rcnn_pro
class BboxUtil(object):
    def __init__(self):
        self.image_utils = ImageUtils()
        self.mask_util = MaskUtil()
        pass

    # 提取 bounding boxes
    def extract_bboxes(self, mask):
        """
        :param mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
        :return: bbox array [num_instances, (y1, x1, y2, x2)]
        """
        # 获取无类别的 instances 值,只区分前景和背景,类别在 目标检测的时候区分
        num_instance = mask.shape[-1]
        # 初始化 boxes
        boxes = np.zeros([num_instance, 4], dtype=np.int32)

        for i in range(num_instance):
            m = mask[:, :, i]

            # bounding box
            # x 轴方向
            horizontal_indicies = np.where(np.any(m, axis=0))[0]
            # y 轴方向
            vertical_indicies = np.where(np.any(m, axis=1))[0]
            if horizontal_indicies.shape[0]:
                x1, x2 = horizontal_indicies[[0, -1]]
                y1, y2 = vertical_indicies[[0, -1]]
                # x2 and y2 should not be part of the box. Increment by 1.
                # 就是 x2 和 y2 不包含在 box 内,如 x1 = 1, x2 = 5, y1 = 1, y2 = 5
                # 围起来的面积右下角不包含 (5, 5),所以加 1,以使 右下角超出 mask 面积外
                x2 += 1
                y2 += 1
                pass
            else:
                # No mask for this instance. Might happen due to
                # resizing or cropping. Set bbox to zeros
                x1, x2, y1, y2 = 0, 0, 0, 0
                pass
            boxes[i] = np.array([y1, x1, y2, x2])
            pass
        return boxes.astype(np.int32)
        pass

    # 计算 box 的 IOU
    def compute_iou(self, box, boxes):
        """
        :param box: (y1, x1, y2, x2)
        :param boxes: [N, (y1, x1, y2, x2)]
        :return: iou
        """
        # 计算 box 面积
        # area = (x2 - x1) * (y2 - y1)
        box_area = (box[3] - box[1]) * (box[2] - box[0])
        boxes_area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # 计算交面积
        x1 = np.maximum(box[1], boxes[:, 1])
        x2 = np.minimum(box[3], boxes[:, 3])
        y1 = np.maximum(box[0], boxes[:, 0])
        y2 = np.minimum(box[2], boxes[:, 2])
        intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)

        # 计算 IOU
        union = box_area + boxes_area[:] - intersection[:]
        # iou = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
        iou = intersection / union
        return iou
        pass

    # 计算 boxes 的 IOU 重叠率
    def compute_overlaps(self, boxes1, boxes2):
        """
        :param boxes1: [N, (y1, x1, y2, x2)]
        :param boxes2: [N, (y1, x1, y2, x2)]
        :return:
        """
        # 定义覆盖率结构
        overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))

        for i in range(overlaps.shape[1]):
            box2 = boxes2[i]
            overlaps[:, i] = self.compute_iou(box2, boxes1)
            pass
        return overlaps
        pass

    def overlaps_graph(self, boxes1, boxes2):
        """
            Computes IoU overlaps between two sets of boxes.
        :param boxes1: [N, (y1, x1, y2, x2)].
        :param boxes2: [N, (y1, x1, y2, x2)].
        :return:
        """
        # 1. Tile boxes2 and repeat boxes1. This allows us to compare
        # every boxes1 against every boxes2 without loops.
        # TF doesn't have an equivalent to np.repeat() so simulate it
        # using tf.tile() and tf.reshape.
        b1 = tf.reshape(
            tf.tile(tf.expand_dims(boxes1, 1),
                    [1, 1, tf.shape(boxes2)[0]]), [-1, 4])
        b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1])
        # 2. Compute intersections
        b1_y1, b1_x1, b1_y2, b1_x2 = tf.split(b1, 4, axis=1)
        b2_y1, b2_x1, b2_y2, b2_x2 = tf.split(b2, 4, axis=1)
        y1 = tf.maximum(b1_y1, b2_y1)
        x1 = tf.maximum(b1_x1, b2_x1)
        y2 = tf.minimum(b1_y2, b2_y2)
        x2 = tf.minimum(b1_x2, b2_x2)
        intersection = tf.maximum(x2 - x1, 0) * tf.maximum(y2 - y1, 0)
        # 3. Compute unions
        b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1)
        b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1)
        union = b1_area + b2_area - intersection
        # 4. Compute IoU and reshape to [boxes1, boxes2]
        iou = intersection / union
        overlaps = tf.reshape(iou, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]])
        return overlaps
        pass

    # 非极大值抑制
    def non_max_suppression(self, boxes, scores, threshold):
        """
        :param boxes: [N, (y1, x1, y2, x2)]. 注意,(y2, x2) 处于 box 之外
        :param scores: box 的得分
        :param threshold: IOU 阈值
        :return:
        """

        assert boxes.shape[0] > 0
        if boxes.dtype.kind != "f":
            boxes = boxes.astype(np.float32)
            pass

        # Get indices of boxes sorted by scores (highest first)
        ixs = scores.argsort()[::-1]

        pick = []
        while len(ixs) > 0:
            # Pick top box and add its index to the list
            i = ixs[0]
            pick.append(i)
            # Compute IoU of the picked box with the rest
            iou = self.compute_iou(boxes[i], boxes[ixs[1:]])
            # Identify boxes with IoU over the threshold. This
            # returns indices into ixs[1:], so add 1 to get
            # indices into ixs.
            remove_ixs = np.where(iou > threshold)[0] + 1
            # Remove indices of the picked and overlapped boxes.
            ixs = np.delete(ixs, remove_ixs)
            ixs = np.delete(ixs, 0)

        return np.array(pick, dtype=np.int32)
        pass

    # boxes 信息转换,bounding box regression
    # tx = (x − xa) / wa , ty = (y − ya) / ha,
    # tw = log(w / wa), th = log(h / ha)
    def apply_box_deltas(self, boxes, deltas):
        """
        :param boxes: [N, (y1, x1, y2, x2)]. 注意,(y2, x2) 处于 box 之外
        :param deltas: [N, (dy, dx, log(dh), log(dw))]
        :return:
        """
        boxes = boxes.astype(np.float32)
        # Convert to y, x, h, w
        height = boxes[:, 2] - boxes[:, 0]
        width = boxes[:, 3] - boxes[:, 1]
        center_y = boxes[:, 0] + 0.5 * height
        center_x = boxes[:, 1] + 0.5 * width

        # Apply deltas
        center_y += deltas[:, 0] * height
        center_x += deltas[:, 1] * width
        height *= np.exp(deltas[:, 2])
        width *= np.exp(deltas[:, 3])

        # Convert back to y1, x1, y2, x2
        y1 = center_y - 0.5 * height
        x1 = center_x - 0.5 * width
        y2 = y1 + height
        x2 = x1 + width

        return np.stack([y1, x1, y2, x2], axis=1)
        pass

    # boxes 与 ground truth 信息转换 tf 图,bounding box regression
    # 参考 bounding box regression 的公式
    def box_refinement_graph(self, box, gt_box):
        """
        :param box: [N, (y1, x1, y2, x2)]
        :param gt_box: [N, (y1, x1, y2, x2)]
        :return:
        """
        box = tf.cast(box, tf.float32)
        gt_box = tf.cast(gt_box, tf.float32)

        height = box[:, 2] - box[:, 0]
        width = box[:, 3] - box[:, 1]
        center_y = box[:, 0] + 0.5 * height
        center_x = box[:, 1] + 0.5 * width

        gt_height = gt_box[:, 2] - gt_box[:, 0]
        gt_width = gt_box[:, 3] - gt_box[:, 1]
        gt_center_y = gt_box[:, 0] + 0.5 * gt_height
        gt_center_x = gt_box[:, 1] + 0.5 * gt_width

        dy = (gt_center_y - center_y) / height
        dx = (gt_center_x - center_x) / width
        dh = tf.log(gt_height / height)
        dw = tf.log(gt_width / width)

        result = tf.stack([dy, dx, dh, dw], axis=1)
        return result
        pass

    # boxes 与 ground truth 信息转换,bounding box regression
    # 参考 bounding box regression 的公式
    def box_refinement(self, box, gt_box):
        """
        :param box: [N, (y1, x1, y2, x2)], 假设 (y2, x2) 处于 box 之外
        :param gt_box: [N, (y1, x1, y2, x2)]
        :return:
        """
        box = box.astype(np.float32)
        gt_box = gt_box.astype(np.float32)

        height = box[:, 2] - box[:, 0]
        width = box[:, 3] - box[:, 1]
        center_y = box[:, 0] + 0.5 * height
        center_x = box[:, 1] + 0.5 * width

        gt_height = gt_box[:, 2] - gt_box[:, 0]
        gt_width = gt_box[:, 3] - gt_box[:, 1]
        gt_center_y = gt_box[:, 0] + 0.5 * gt_height
        gt_center_x = gt_box[:, 1] + 0.5 * gt_width

        dy = (gt_center_y - center_y) / height
        dx = (gt_center_x - center_x) / width
        dh = np.log(gt_height / height)
        dw = np.log(gt_width / width)

        return np.stack([dy, dx, dh, dw], axis=1)
        pass

    # 将框从像素坐标转为标准坐标
    def norm_boxes_graph(self, boxes, shape):
        """
        :param boxes: [..., (y1, x1, y2, x2)] in pixel coordinates
        :param shape: [..., (height, width)] in pixels
        :return: [..., (y1, x1, y2, x2)] in normalized coordinates
        注意:像素坐标 (y2,x2) 在框外。但在标准化坐标系下它在盒子里。
        """
        h, w = tf.split(tf.cast(shape, tf.float32), 2)
        scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
        shift = tf.constant([0., 0., 1., 1.])
        return tf.divide(boxes - shift, scale)
        pass

    def norm_boxes(self, boxes, shape):
        """
            Converts boxes from pixel coordinates to normalized coordinates.
        :param boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
        :param shape: [..., (height, width)] in pixels
        :return: [N, (y1, x1, y2, x2)] in normalized coordinates
            Note: In pixel coordinates (y2, x2) is outside the box.
                  But in normalized coordinates it's inside the box.
        """
        h, w = shape
        scale = np.array([h - 1, w - 1, h - 1, w - 1])
        shift = np.array([0, 0, 1, 1])
        return np.divide((boxes - shift), scale).astype(np.float32)
        pass

    def denorm_boxes(self, boxes, shape):
        """
            Converts boxes from normalized coordinates to pixel coordinates.
        :param boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
        :param shape: [..., (height, width)] in pixels
        :return: [N, (y1, x1, y2, x2)] in pixel coordinates

        Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
             coordinates it's inside the box.
        """
        h, w = shape
        scale = np.array([h - 1, w - 1, h - 1, w - 1])
        shift = np.array([0, 0, 1, 1])
        return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
        pass

    def apply_box_deltas_graph(self, boxes, deltas):
        """
            Applies the given deltas to the given boxes.
        :param boxes: [N, (y1, x1, y2, x2)] boxes to update
        :param deltas: [N, (dy, dx, log(dh), log(dw))] refinements to apply
        :return:
        """
        # Convert to y, x, h, w
        height = boxes[:, 2] - boxes[:, 0]
        width = boxes[:, 3] - boxes[:, 1]
        center_y = boxes[:, 0] + 0.5 * height
        center_x = boxes[:, 1] + 0.5 * width
        # Apply deltas
        center_y += deltas[:, 0] * height
        center_x += deltas[:, 1] * width
        height *= tf.exp(deltas[:, 2])
        width *= tf.exp(deltas[:, 3])
        # Convert back to y1, x1, y2, x2
        y1 = center_y - 0.5 * height
        x1 = center_x - 0.5 * width
        y2 = y1 + height
        x2 = x1 + width
        result = tf.stack([y1, x1, y2, x2],
                          axis=1,
                          name="apply_box_deltas_out")
        return result
        pass

    def clip_boxes_graph(self, boxes, window):
        """
        :param boxes: [N, (y1, x1, y2, x2)]
        :param window: [4] in the form y1, x1, y2, x2
        :return:
        """
        # Split
        wy1, wx1, wy2, wx2 = tf.split(window, 4)
        y1, x1, y2, x2 = tf.split(boxes, 4, axis=1)
        # Clip
        y1 = tf.maximum(tf.minimum(y1, wy2), wy1)
        x1 = tf.maximum(tf.minimum(x1, wx2), wx1)
        y2 = tf.maximum(tf.minimum(y2, wy2), wy1)
        x2 = tf.maximum(tf.minimum(x2, wx2), wx1)
        clipped = tf.concat([y1, x1, y2, x2], axis=1, name="clipped_boxes")
        clipped.set_shape((clipped.shape[0], 4))
        return clipped
        pass

    def load_image_gt(self,
                      data,
                      image_id,
                      augmentation=None,
                      use_mini_mask=False):
        """
            Load and return ground truth data for an image (image, mask, bounding boxes).
        :param data: The Dataset object to pick data from
        :param image_id: GT bounding boxes and masks for image id.
        :param augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
                            For example, passing imgaug.augmenters.Fliplr(0.5) flips images
                            right/left 50% of the time.
        :param use_mini_mask: If False, returns full-size masks that are the same height
                            and width as the original image. These can be big, for example
                            1024x1024x100 (for 100 instances). Mini masks are smaller, typically,
                            224x224 and are generated by extracting the bounding box of the
                            object and resizing it to MINI_MASK_SHAPE.
        :return:
            image: [height, width, 3]
            shape: the original shape of the image before resizing and cropping.
            class_ids: [instance_count] Integer class IDs
            bbox: [instance_count, (y1, x1, y2, x2)]
            mask: [height, width, instance_count]. The height and width are those
                of the image unless use_mini_mask is True, in which case they are
                defined in MINI_MASK_SHAPE.
        """

        # Load image and mask
        image_path = data.image_info_list[image_id]["path"]
        image = self.image_utils.load_image(image_path)
        mask, class_ids = self.mask_util.load_mask(data, image_id)

        original_shape = image.shape

        image, window, scale, padding, crop = self.image_utils.resize_image(
            image,
            min_dim=cfg.COMMON.IMAGE_MIN_DIM,
            min_scale=cfg.COMMON.IMAGE_MIN_SCALE,
            max_dim=cfg.COMMON.IMAGE_MAX_DIM,
            resize_mode=cfg.COMMON.IMAGE_RESIZE_MODE)
        mask = self.mask_util.resize_mask(mask, scale, padding, crop)

        # Augmentation
        # This requires the imgaug lib (https://github.com/aleju/imgaug)
        if augmentation:
            import imgaug

            def hook(images, augmenter, parents, default):
                """Determines which augmenters to apply to masks."""
                return augmenter.__class__.__name__ in cfg.TRAIN.MASK_AUGMENTERS

            # Store shapes before augmentation to compare
            image_shape = image.shape
            mask_shape = mask.shape

            # Make augmenters deterministic to apply similarly to images and masks
            det = augmentation.to_deterministic()
            image = det.augment_image(image)

            # Change mask to np.uint8 because imgaug doesn't support np.bool
            mask = det.augment_image(mask.astype(np.uint8),
                                     hooks=imgaug.HooksImages(activator=hook))

            # Verify that shapes didn't change
            assert image.shape == image_shape, "Augmentation shouldn't change image size"
            assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"

            # Change mask back to bool
            mask = mask.astype(np.bool)
            pass

        # Note that some boxes might be all zeros if the corresponding mask got cropped out.
        # and here is to filter them out
        _idx = np.sum(mask, axis=(0, 1)) > 0
        mask = mask[:, :, _idx]
        class_ids = class_ids[_idx]
        # Bounding boxes. Note that some boxes might be all zeros
        # if the corresponding mask got cropped out.
        # bbox: [num_instances, (y1, x1, y2, x2)]
        bbox = self.extract_bboxes(mask)

        # Active classes
        # Different datasets have different classes, so track the
        # classes supported in the dataset of this image.
        active_class_ids = np.zeros([data.class_num], dtype=np.int32)
        source_class_ids = data.source_class_ids[data.image_info_list[image_id]
                                                 ["source"]]
        active_class_ids[source_class_ids] = 1

        # Resize masks to smaller size to reduce memory usage
        if use_mini_mask:
            mask = self.mask_util.minimize_mask(bbox, mask,
                                                cfg.TRAIN.MINI_MASK_SHAPE)

        # Image meta data
        image_meta = self.image_utils.compose_image_meta(
            image_id, original_shape, image.shape, window, scale,
            active_class_ids)

        return image, image_meta, class_ids, bbox, mask
        pass
コード例 #3
0
ファイル: game_object.py プロジェクト: cheverebe/Arkanoid-CV
 def adapt_size(self, resize_factor):
     self.sprite = ImageUtils.resize_image(self.sprite, resize_factor)
     self.dimensions = self.sprite.shape[:2]