Esempio n. 1
0
 def __init__(self, page: Page):
     self.page = copy.deepcopy(page)
     self.grid_resolution = 10
     self.mock_canvas = Canvas().set_parent(
         self.page)  # type: ignore [attr-defined]
     self.grid: typing.List[typing.List[bool]] = [[]]
     self._draw_text_and_objects()
Esempio n. 2
0
 def __init__(self, page: Page):
     self.page_width = page.get_page_info().get_width()
     self.page_height = page.get_page_info().get_height()
     assert self.page_width
     assert self.page_height
     self.page = copy.deepcopy(page)
     self.grid_resolution = 10
     self.mock_canvas = Canvas().set_parent(
         self.page)  # type: ignore [attr-defined]
     self.grid: typing.List[typing.List[bool]] = [[]]
     self._render_canvas()
    def transform(
        self,
        object_to_transform: Union[io.BufferedIOBase, io.RawIOBase,
                                   AnyPDFType],
        parent_object: Any,
        context: Optional[ReadTransformerContext] = None,
        event_listeners: typing.List[EventListener] = [],
    ) -> Any:
        """
        This function reads a \Page Dictionary from a byte stream
        """

        if isinstance(object_to_transform, Page):
            return object_to_transform

        # convert dictionary like structure
        tmp = Page().set_parent(parent_object)  # type: ignore [attr-defined]

        # add listener(s)
        for l in event_listeners:
            tmp.add_event_listener(l)  # type: ignore [attr-defined]

        # convert key/value pairs
        assert isinstance(object_to_transform, Dictionary)
        for k, v in object_to_transform.items():
            # avoid circular reference
            if k == "Parent":
                continue
            v = self.get_root_transformer().transform(v, tmp, context, [])
            if v is not None:
                tmp[k] = v

        # send out BeginPageEvent
        tmp._event_occurred(BeginPageEvent(tmp))

        # set up canvas
        assert "Contents" in tmp
        contents = tmp["Contents"]
        if contents is not None:
            canvas = Canvas().set_parent(tmp)  # type: ignore [attr-defined]

            # process bytes in stream
            if isinstance(contents, dict):
                canvas.read(io.BytesIO(contents["DecodedBytes"]))

            # process bytes in array
            if isinstance(contents, list):
                bts = b"".join([x["DecodedBytes"] + b" " for x in contents])
                canvas.read(io.BytesIO(bts))

        # send out EndPageEvent
        tmp._event_occurred(EndPageEvent(tmp))

        # return
        return tmp
Esempio n. 4
0
    def transform(
        self,
        object_to_transform: Union[io.BufferedIOBase, io.RawIOBase, AnyPDFType],
        parent_object: Any,
        context: Optional[TransformerContext] = None,
        event_listeners: typing.List[EventListener] = [],
    ) -> Any:

        # convert dictionary like structure
        tmp = Page().set_parent(parent_object)

        # add listener(s)
        for l in event_listeners:
            tmp.add_event_listener(l)

        # convert key/value pairs
        assert isinstance(object_to_transform, Dictionary)
        for k, v in object_to_transform.items():
            # avoid circular reference
            if k == "Parent":
                continue
            v = self.get_root_transformer().transform(v, tmp, context, [])
            if v is not None:
                tmp[k] = v

        # send out BeginPageEvent
        tmp.event_occurred(BeginPageEvent(tmp))

        # set up canvas
        if "Contents" not in tmp:
            raise PDFTypeError(
                expected_type=Union[List, Dictionary],
                received_type=None,
            )
        contents = tmp["Contents"]
        if contents is not None:
            canvas = Canvas().set_parent(tmp)

            # process bytes in stream
            if isinstance(contents, dict):
                canvas.read(io.BytesIO(contents["DecodedBytes"]))

            # process bytes in array
            if isinstance(contents, list):
                bts = b"".join([x["DecodedBytes"] + b" " for x in contents])
                canvas.read(io.BytesIO(bts))

        # send out EndPageEvent
        tmp.event_occurred(EndPageEvent(tmp))

        # return
        return tmp
Esempio n. 5
0
class FreeSpaceFinder(EventListener):
    """
    This implementation of EventListener keeps track of which space on a Page is available
    """
    def __init__(self, page: Page):
        self.page_width = page.get_page_info().get_width()
        self.page_height = page.get_page_info().get_height()
        assert self.page_width
        assert self.page_height
        self.page = copy.deepcopy(page)
        self.grid_resolution = 10
        self.mock_canvas = Canvas().set_parent(
            self.page)  # type: ignore [attr-defined]
        self.grid: typing.List[typing.List[bool]] = [[]]
        self._render_canvas()

    def _mark_as_unavailable(self, rectangle: Rectangle):
        x_grid = int(int(rectangle.x) / self.grid_resolution)
        y_grid = int(int(rectangle.y) / self.grid_resolution)
        w = int(int(rectangle.width) / self.grid_resolution)
        h = int(int(rectangle.height) / self.grid_resolution)
        for i in range(x_grid - 1, x_grid + w + 1):
            for j in range(y_grid - 1, y_grid + h + 1):
                if i < 0 or i >= len(self.grid):
                    continue
                if j < 0 or j >= len(self.grid[i]):
                    continue
                self.grid[i][j] = False

    def _render_canvas(self):
        w = int(int(self.page_width) / self.grid_resolution)
        h = int(int(self.page_height) / self.grid_resolution)

        # mark everything as available
        for i in range(0, w):
            self.grid.append([True for x in range(0, h)])

        # add listeners
        self.mock_canvas.add_event_listener(self)

        # process canvas
        contents = self.page["Contents"]
        if isinstance(contents, dict):
            self.mock_canvas.read(io.BytesIO(contents["DecodedBytes"]))
        if isinstance(contents, list):
            bts = b"".join([x["DecodedBytes"] + b" " for x in contents])
            self.mock_canvas.read(io.BytesIO(bts))

    def find_free_space(self,
                        needed_space: Rectangle) -> typing.Optional[Rectangle]:
        w = int(int(needed_space.width) / self.grid_resolution)
        h = int(int(needed_space.height) / self.grid_resolution)
        possible_points: typing.List[typing.Tuple[Decimal, Decimal]] = []
        for i in range(0, len(self.grid) - w):
            for j in range(0, len(self.grid[i]) - h):
                is_free = True
                for k in range(0, w):
                    for l in range(0, h):
                        if not self.grid[i + k][j + l]:
                            is_free = False
                            break
                    if not is_free:
                        break
                if is_free:
                    possible_points.append((
                        Decimal(i * self.grid_resolution),
                        Decimal(j * self.grid_resolution),
                    ))
        # find point closest to desired location
        if len(possible_points) == 0:
            return None
        min_dist = (needed_space.x - possible_points[0][0])**2 + (
            needed_space.y - possible_points[0][1])**2
        min_dist_point = possible_points[0]
        for p in possible_points:
            d = (needed_space.x - p[0])**2 + (needed_space.y - p[1])**2
            if d < min_dist:
                min_dist = d
                min_dist_point = p
        # return
        return Rectangle(
            min_dist_point[0],
            min_dist_point[1],
            needed_space.width,
            needed_space.height,
        )

    def event_occurred(self, event: Event) -> None:
        if isinstance(event, ChunkOfTextRenderEvent):
            assert isinstance(event, ChunkOfTextRenderEvent)
            bb: typing.Optional[Rectangle] = event.get_bounding_box()
            if bb is not None:
                self._mark_as_unavailable(bb)