Ejemplo n.º 1
0
def test_rotate_boxes():
    boxes = np.array([[0.1, 0.1, 0.8, 0.3, 0.5]])
    rboxes = np.array([[0.1, 0.1], [0.8, 0.1], [0.8, 0.3], [0.1, 0.3]])
    # Angle = 0
    rotated = geometry.rotate_boxes(boxes, angle=0.0, orig_shape=(1, 1))
    assert np.all(rotated == rboxes)
    # Angle < 1:
    rotated = geometry.rotate_boxes(boxes, angle=0.5, orig_shape=(1, 1))
    assert np.all(rotated == rboxes)
    # Angle = 30
    rotated = geometry.rotate_boxes(boxes, angle=30, orig_shape=(1, 1))
    assert rotated.shape == (1, 4, 2)

    boxes = np.array([[0.0, 0.0, 0.6, 0.2, 0.5]])
    # Angle = -90:
    rotated = geometry.rotate_boxes(boxes,
                                    angle=-90,
                                    orig_shape=(1, 1),
                                    min_angle=0)
    assert np.allclose(
        rotated, np.array([[[1, 0.0], [1, 0.6], [0.8, 0.6], [0.8, 0.0]]]))
    # Angle = 90
    rotated = geometry.rotate_boxes(boxes,
                                    angle=+90,
                                    orig_shape=(1, 1),
                                    min_angle=0)
    assert np.allclose(
        rotated, np.array([[[0, 1.0], [0, 0.4], [0.2, 0.4], [0.2, 1.0]]]))
Ejemplo n.º 2
0
def test_estimate_page_angle():
    straight_polys = np.array([
        [[0.3, 0.3], [0.4, 0.3], [0.4, 0.4], [0.3, 0.4]],
        [[0.4, 0.4], [0.5, 0.4], [0.5, 0.5], [0.4, 0.5]],
        [[0.5, 0.5], [0.6, 0.5], [0.6, 0.6], [0.5, 0.6]],
    ])
    rotated_polys = geometry.rotate_boxes(straight_polys,
                                          angle=20,
                                          orig_shape=(512, 512))
    angle = geometry.estimate_page_angle(rotated_polys)
    assert np.isclose(angle, 20)
Ejemplo n.º 3
0
    def __call__(
        self,
        pages: List[Union[np.ndarray, tf.Tensor]],
        **kwargs: Any,
    ) -> Document:

        # Dimension check
        if any(page.ndim != 3 for page in pages):
            raise ValueError(
                "incorrect input shape: all pages are expected to be multi-channel 2D images."
            )

        origin_page_shapes = [page.shape[:2] for page in pages]

        # Detect document rotation and rotate pages
        if self.straighten_pages:
            origin_page_orientations = [
                estimate_orientation(page) for page in pages
            ]
            pages = [
                rotate_image(page, -angle, expand=True)
                for page, angle in zip(pages, origin_page_orientations)
            ]

        # Localize text elements
        loc_preds = self.det_predictor(pages, **kwargs)

        # Crop images
        crops, loc_preds = self._prepare_crops(
            pages,
            loc_preds,
            channels_last=True,
            assume_straight_pages=self.assume_straight_pages)
        # Rectify crop orientation
        if not self.assume_straight_pages:
            crops, loc_preds = self._rectify_crops(crops, loc_preds)

        # Identify character sequences
        word_preds = self.reco_predictor(
            [crop for page_crops in crops for crop in page_crops], **kwargs)

        boxes, text_preds = self._process_predictions(loc_preds, word_preds)

        # Rotate back pages and boxes while keeping original image size
        if self.straighten_pages:
            boxes = [
                rotate_boxes(page_boxes, angle, orig_shape=page.shape[:2])
                for page_boxes, page, angle in zip(boxes, pages,
                                                   origin_page_orientations)
            ]

        out = self.doc_builder(boxes, text_preds,
                               origin_page_shapes)  # type: ignore[misc]
        return out
Ejemplo n.º 4
0
    def _sort_boxes(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """Sort bounding boxes from top to bottom, left to right

        Args:
            boxes: bounding boxes of shape (N, 4) or (N, 4, 2) (in case of rotated bbox)

        Returns:
            tuple: indices of ordered boxes of shape (N,), boxes
                If straight boxes are passed tpo the function, boxes are unchanged
                else: boxes returned are straight boxes fitted to the straightened rotated boxes
                so that we fit the lines afterwards to the straigthened page
        """
        if boxes.ndim == 3:
            boxes = rotate_boxes(
                loc_preds=boxes,
                angle=-estimate_page_angle(boxes),
                orig_shape=(1024, 1024),
                min_angle=5.0,
            )
            boxes = np.concatenate((boxes.min(1), boxes.max(1)), -1)
        return (boxes[:, 0] + 2 * boxes[:, 3] /
                np.median(boxes[:, 3] - boxes[:, 1])).argsort(), boxes
Ejemplo n.º 5
0
 def rotate_doc(self, X, doc, angles=None):
     if angles is None:
         angles = [-1., -0.5, 0.5, 1.]
     list_df = []
     for page_id, page in enumerate(
             X[X['document_name'] == doc]['page_id'].unique()):
         for angle in angles:
             df = X[(X['document_name'] == doc)
                    & (X['page_id'] == page)].copy()
             boxes = self.get_rel_boxes(df)
             r_boxes = rotate_boxes(boxes, angle=angle, min_angle=0)
             # the multiplications by 1000 is to get some more details for cv2.boundingRect
             r_poly = np.array([
                 cv2.boundingRect(rbbox_to_polygon(rbox) * 1000)
                 for rbox in r_boxes
             ]) / 1000
             boxes_tilted = np.array(
                 [bbox_width_to_bbox(r_p) for r_p in r_poly])
             df['document_name'] = df['document_name'] + f'_angle_{angle}'
             df = self.replace_boxes(df, boxes_tilted)
             list_df.append(df)
     return list_df
Ejemplo n.º 6
0
    def forward(
        self,
        pages: List[Union[np.ndarray, torch.Tensor]],
        **kwargs: Any,
    ) -> Document:

        # Dimension check
        if any(page.ndim != 3 for page in pages):
            raise ValueError(
                "incorrect input shape: all pages are expected to be multi-channel 2D images."
            )

        origin_page_shapes = [
            page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:]
            for page in pages
        ]

        # Detect document rotation and rotate pages
        if self.detect_orientation:
            origin_page_orientations = [
                estimate_orientation(page) for page in pages
            ]  # type: ignore[arg-type]
            orientations = [{
                "value": orientation_page,
                "confidence": 1.0
            } for orientation_page in origin_page_orientations]
        else:
            orientations = None
        if self.straighten_pages:
            origin_page_orientations = (
                origin_page_orientations if self.detect_orientation else
                [estimate_orientation(page)
                 for page in pages]  # type: ignore[arg-type]
            )
            pages = [
                rotate_image(page, -angle,
                             expand=True)  # type: ignore[arg-type]
                for page, angle in zip(pages, origin_page_orientations)
            ]

        # Localize text elements
        loc_preds = self.det_predictor(pages, **kwargs)
        # Check whether crop mode should be switched to channels first
        channels_last = len(pages) == 0 or isinstance(pages[0], np.ndarray)

        # Rectify crops if aspect ratio
        loc_preds = self._remove_padding(pages,
                                         loc_preds)  # type: ignore[arg-type]

        # Crop images
        crops, loc_preds = self._prepare_crops(
            pages,  # type: ignore[arg-type]
            loc_preds,
            channels_last=channels_last,
            assume_straight_pages=self.assume_straight_pages,
        )
        # Rectify crop orientation
        if not self.assume_straight_pages:
            crops, loc_preds = self._rectify_crops(crops, loc_preds)
        # Identify character sequences
        word_preds = self.reco_predictor(
            [crop for page_crops in crops for crop in page_crops], **kwargs)

        boxes, text_preds = self._process_predictions(loc_preds, word_preds)

        if self.detect_language:
            languages = [
                get_language(" ".join([item[0] for item in text_pred]))
                for text_pred in text_preds
            ]
            languages_dict = [{
                "value": lang[0],
                "confidence": lang[1]
            } for lang in languages]
        else:
            languages_dict = None
        # Rotate back pages and boxes while keeping original image size
        if self.straighten_pages:
            boxes = [
                rotate_boxes(
                    page_boxes,
                    angle,
                    orig_shape=page.shape[:2] if isinstance(page, np.ndarray)
                    else page.shape[1:],  # type: ignore[arg-type]
                    target_shape=mask,  # type: ignore[arg-type]
                ) for page_boxes, page, angle, mask in zip(
                    boxes, pages, origin_page_orientations, origin_page_shapes)
            ]

        out = self.doc_builder(
            boxes,
            text_preds,
            [
                page.shape[:2] if channels_last else page.shape[-2:]
                for page in pages
            ],  # type: ignore[misc]
            orientations,
            languages_dict,
        )
        return out