Exemple #1
0
def preprocess(image, points, size=cfg.image_size):
    """
    Preprocess for test.
    Args:
        image: test image
        points: text polygon
        size: test image size
    """
    height, width = image.shape[:2]
    mask = polygons_to_mask([np.asarray(points, np.float32)], height, width)
    x, y, w, h = cv2.boundingRect(mask)
    mask = np.expand_dims(np.float32(mask), axis=-1)
    image = image * mask
    image = image[y:y+h, x:x+w,:]

    new_height, new_width = (size, int(w*size/h)) if h>w else (int(h*size/w), size)
    image = cv2.resize(image, (new_width, new_height))

    if new_height > new_width:
        padding_top, padding_down = 0, 0
        padding_left = (size - new_width)//2
        padding_right = size - padding_left - new_width
    else:
        padding_left, padding_right = 0, 0
        padding_top = (size - new_height)//2
        padding_down = size - padding_top - new_height

    image = cv2.copyMakeBorder(image, padding_top, padding_down, padding_left, padding_right, borderType=cv2.BORDER_CONSTANT, value=[0,0,0])

    image = image/255.
    return image
Exemple #2
0
def affine_transform(image, polygon):
    """
    Conduct same affine transform for both image and polygon for data augmentation.
    """
    height, width, _ = image.shape
    center_x, center_y = width / 2, height / 2

    angle = 0 if np.random.uniform() > 0.5 else np.random.uniform(-20., 20.)
    shear_x, shear_y = (0, 0) if np.random.uniform() > 0.5 else (
        np.random.uniform(-0.2, 0.2), np.random.uniform(-0.2, 0.2))

    rad = math.radians(angle)
    sin, cos = math.sin(rad), math.cos(rad)  # x, y
    abs_sin, abs_cos = abs(sin), abs(cos)

    new_width = ((height * abs_sin) + (width * abs_cos))
    new_height = ((height * abs_cos) + (width * abs_sin))

    new_width += np.abs(shear_y * new_height)
    new_height += np.abs(shear_x * new_width)

    new_width = int(new_width)
    new_height = int(new_height)

    M = np.array([[
        cos, sin + shear_y, new_width / 2 - center_x + (1 - cos) * center_x -
        (sin + shear_y) * center_y
    ],
                  [
                      -sin + shear_x, cos, new_height / 2 - center_y +
                      (sin - shear_x) * center_x + (1 - cos) * center_y
                  ]])

    rotatedImage = cv2.warpAffine(image,
                                  M, (new_width, new_height),
                                  flags=cv2.INTER_CUBIC,
                                  borderMode=cv2.BORDER_CONSTANT,
                                  borderValue=(0, 0, 0))

    height, width = rotatedImage.shape[:2]
    rotatedPoints = [rotatedPoint(M, point) for point in polygon]
    mask = polygons_to_mask([np.array(rotatedPoints, np.float32)], new_height,
                            new_width)
    x, y, w, h = cv2.boundingRect(mask)
    mask = np.expand_dims(np.float32(mask), axis=-1)
    rotatedImage = rotatedImage * mask

    cropImage = rotatedImage[y:y + h, x:x + w, :]

    return cropImage
Exemple #3
0
def draw_annotation(img, boxes, klass, polygons=None, is_crowd=None):
    """Will not modify img"""
    labels = []
    assert len(boxes) == len(klass)
    if is_crowd is not None:
        assert len(boxes) == len(is_crowd)
        for cls, crd in zip(klass, is_crowd):
            clsname = cfg.DATA.CLASS_NAMES[cls]
            if crd == 1:
                clsname += ';Crowd'
            labels.append(clsname)
    else:
        for cls in klass:
            labels.append(cfg.DATA.CLASS_NAMES[cls])
    img = viz.draw_boxes(img, boxes, labels)

    if polygons is not None:
        for p in polygons:
            mask = polygons_to_mask(p, img.shape[0], img.shape[1])
            img = draw_mask(img, mask)
    return img
Exemple #4
0
    def __call__(self, roidb):
        fname, boxes, klass, is_crowd = roidb["file_name"], roidb[
            "boxes"], roidb["class"], roidb["is_crowd"]
        assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype("float32")
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return float32 boxes!"

        if not self.cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        tfms = self.aug.get_transform(im)
        im = tfms.apply_image(im)
        points = box_to_point4(boxes)
        points = tfms.apply_coords(points)
        boxes = point4_to_box(points)
        if len(boxes):
            assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \
                "Invalid category {}!".format(klass.max())
            assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {"image": im}
        # Add rpn data to dataflow:
        try:
            if self.cfg.MODE_FPN:
                multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels
                    ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes
            else:
                ret["anchor_labels"], ret[
                    "anchor_boxes"] = self.get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret["gt_boxes"] = boxes
            ret["gt_labels"] = klass
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                "warn")
            return None

        if self.cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb["segmentation"])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            gt_mask_width = int(np.ceil(im.shape[1] / 8.0) *
                                8)  # pad to 8 in order to pack mask into bits

            for polys in segmentation:
                if not self.cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [tfms.apply_coords(p) for p in polys]
                masks.append(
                    polygons_to_mask(polys, im.shape[0], gt_mask_width))

            if len(masks):
                masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
                masks = np.packbits(masks, axis=-1)
            else:  # no gt on the image
                masks = np.zeros((0, im.shape[0], gt_mask_width // 8),
                                 dtype='uint8')

            ret['gt_masks_packed'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret