def _read_xref(self,
                   context: TransformerContext,
                   initial_offset: Optional[int] = None) -> None:
        """
        This function attempts to read the XREF table, first as plaintext, then as a stream
        :param context:         the TransformerContext (containing the io source)
        :type context:          TransformerContext
        :param initial_offset:  the initial byte offset at which to read (set to None to allow this method to find the XREF)
        :type initial_offset:   int
        """
        doc = context.root_object
        src = context.source
        tok = context.tokenizer

        most_recent_xref = None
        exceptions_to_rethrow = []

        # attempt to read plaintext XREF
        try:
            most_recent_xref = PlainTextXREF()
            most_recent_xref.parent = doc
            most_recent_xref.read(src, tok, initial_offset)
            if "XRef" in doc:
                doc["XRef"] = doc["XRef"].merge(most_recent_xref)
            else:
                doc["XRef"] = most_recent_xref
        except Exception as ex0:
            most_recent_xref = None
            exceptions_to_rethrow.append(ex0)

        # attempt to read stream XREF
        if most_recent_xref is None:
            try:
                most_recent_xref = StreamXREF()
                most_recent_xref.parent = doc
                most_recent_xref.read(src, tok, initial_offset)
                if "XRef" in doc:
                    doc["XRef"] = doc["XRef"].merge(most_recent_xref)
                else:
                    doc["XRef"] = most_recent_xref
            except Exception as ex0:
                raise ex0
                most_recent_xref = None
                exceptions_to_rethrow.append(ex0)

        # unable to read XREF
        # re-throw exceptions
        if most_recent_xref is None:
            for e in exceptions_to_rethrow:
                raise e

        # handle Prev, Previous
        prev = None
        if "Prev" in most_recent_xref["Trailer"]:
            prev = int(most_recent_xref["Trailer"]["Prev"])
        if "Previous" in most_recent_xref["Trailer"]:
            prev = int(most_recent_xref["Trailer"]["Previous"])
        if prev is not None:
            self._read_xref(context, initial_offset=prev)
Beispiel #2
0
 def insert_page(
         self,
         page: Page,
         index: int = -1) -> "Document":  # type: ignore [name-defined]
     # build XRef
     if "XRef" not in self:
         self["XRef"] = PlainTextXREF()
     # build Trailer
     if "Trailer" not in self["XRef"]:
         self["XRef"]["Trailer"] = Dictionary()
         self["XRef"][Name("Size")] = Decimal(0)
     # build Root
     if "Root" not in self["XRef"]["Trailer"]:
         self["XRef"]["Trailer"][Name("Root")] = Dictionary()
     # build Pages
     if "Pages" not in self["XRef"]["Trailer"]["Root"]:
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")] = Dictionary()
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name(
             "Count")] = Decimal(0)
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name(
             "Kids")] = List()
     # update /Kids
     kids = self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"]
     assert kids is not None
     assert isinstance(kids, List)
     kids.insert(index, page)
     # update /Count
     prev_count = self["XRef"]["Trailer"]["Root"]["Pages"]["Count"]
     self["XRef"]["Trailer"]["Root"]["Pages"]["Count"] = Decimal(
         prev_count + 1)
     # return
     return self
Beispiel #3
0
 def insert_page(
     self,
     page: Page,
     index: typing.Optional[int] = None
 ) -> "Document":  # type: ignore [name-defined]
     """
     This method appends a page (from another Document) to this Document at a given index
     """
     # build XRef
     if "XRef" not in self:
         self[Name("XRef")] = PlainTextXREF()
         self[Name("XRef")].set_parent(self)
     # build Trailer
     if "Trailer" not in self["XRef"]:
         self["XRef"][Name("Trailer")] = Dictionary()
         self["XRef"][Name("Size")] = Decimal(0)
         self["XRef"]["Trailer"].set_parent(self["XRef"])
     # build Root
     if "Root" not in self["XRef"]["Trailer"]:
         self["XRef"]["Trailer"][Name("Root")] = Dictionary()
         self["XRef"]["Trailer"]["Root"].set_parent(self["XRef"]["Trailer"])
     # build Pages
     if "Pages" not in self["XRef"]["Trailer"]["Root"]:
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")] = Dictionary()
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name(
             "Count")] = Decimal(0)
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name(
             "Kids")] = List()
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name(
             "Type")] = Name("Pages")
         self["XRef"]["Trailer"]["Root"]["Pages"].set_parent(
             self["XRef"]["Trailer"]["Root"])
         self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"].set_parent(
             self["XRef"]["Trailer"]["Root"]["Pages"])
     # update /Kids
     kids = self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"]
     assert kids is not None
     assert isinstance(kids, List)
     if index is None:
         index = len(kids)
     kids.insert(index, page)
     # update /Count
     prev_count = self["XRef"]["Trailer"]["Root"]["Pages"]["Count"]
     self["XRef"]["Trailer"]["Root"]["Pages"][Name("Count")] = Decimal(
         prev_count + 1)
     # set /Parent
     page[Name("Parent")] = self["XRef"]["Trailer"]["Root"]["Pages"]
     page.set_parent(kids)  # type: ignore [attr-defined]
     # return
     return self