def _read_xref(self, context: TransformerContext, initial_offset: Optional[int] = None) -> None: """ This function attempts to read the XREF table, first as plaintext, then as a stream :param context: the TransformerContext (containing the io source) :type context: TransformerContext :param initial_offset: the initial byte offset at which to read (set to None to allow this method to find the XREF) :type initial_offset: int """ doc = context.root_object src = context.source tok = context.tokenizer most_recent_xref = None exceptions_to_rethrow = [] # attempt to read plaintext XREF try: most_recent_xref = PlainTextXREF() most_recent_xref.parent = doc most_recent_xref.read(src, tok, initial_offset) if "XRef" in doc: doc["XRef"] = doc["XRef"].merge(most_recent_xref) else: doc["XRef"] = most_recent_xref except Exception as ex0: most_recent_xref = None exceptions_to_rethrow.append(ex0) # attempt to read stream XREF if most_recent_xref is None: try: most_recent_xref = StreamXREF() most_recent_xref.parent = doc most_recent_xref.read(src, tok, initial_offset) if "XRef" in doc: doc["XRef"] = doc["XRef"].merge(most_recent_xref) else: doc["XRef"] = most_recent_xref except Exception as ex0: raise ex0 most_recent_xref = None exceptions_to_rethrow.append(ex0) # unable to read XREF # re-throw exceptions if most_recent_xref is None: for e in exceptions_to_rethrow: raise e # handle Prev, Previous prev = None if "Prev" in most_recent_xref["Trailer"]: prev = int(most_recent_xref["Trailer"]["Prev"]) if "Previous" in most_recent_xref["Trailer"]: prev = int(most_recent_xref["Trailer"]["Previous"]) if prev is not None: self._read_xref(context, initial_offset=prev)
def insert_page( self, page: Page, index: int = -1) -> "Document": # type: ignore [name-defined] # build XRef if "XRef" not in self: self["XRef"] = PlainTextXREF() # build Trailer if "Trailer" not in self["XRef"]: self["XRef"]["Trailer"] = Dictionary() self["XRef"][Name("Size")] = Decimal(0) # build Root if "Root" not in self["XRef"]["Trailer"]: self["XRef"]["Trailer"][Name("Root")] = Dictionary() # build Pages if "Pages" not in self["XRef"]["Trailer"]["Root"]: self["XRef"]["Trailer"][Name("Root")][Name("Pages")] = Dictionary() self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name( "Count")] = Decimal(0) self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name( "Kids")] = List() # update /Kids kids = self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"] assert kids is not None assert isinstance(kids, List) kids.insert(index, page) # update /Count prev_count = self["XRef"]["Trailer"]["Root"]["Pages"]["Count"] self["XRef"]["Trailer"]["Root"]["Pages"]["Count"] = Decimal( prev_count + 1) # return return self
def insert_page( self, page: Page, index: typing.Optional[int] = None ) -> "Document": # type: ignore [name-defined] """ This method appends a page (from another Document) to this Document at a given index """ # build XRef if "XRef" not in self: self[Name("XRef")] = PlainTextXREF() self[Name("XRef")].set_parent(self) # build Trailer if "Trailer" not in self["XRef"]: self["XRef"][Name("Trailer")] = Dictionary() self["XRef"][Name("Size")] = Decimal(0) self["XRef"]["Trailer"].set_parent(self["XRef"]) # build Root if "Root" not in self["XRef"]["Trailer"]: self["XRef"]["Trailer"][Name("Root")] = Dictionary() self["XRef"]["Trailer"]["Root"].set_parent(self["XRef"]["Trailer"]) # build Pages if "Pages" not in self["XRef"]["Trailer"]["Root"]: self["XRef"]["Trailer"][Name("Root")][Name("Pages")] = Dictionary() self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name( "Count")] = Decimal(0) self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name( "Kids")] = List() self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name( "Type")] = Name("Pages") self["XRef"]["Trailer"]["Root"]["Pages"].set_parent( self["XRef"]["Trailer"]["Root"]) self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"].set_parent( self["XRef"]["Trailer"]["Root"]["Pages"]) # update /Kids kids = self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"] assert kids is not None assert isinstance(kids, List) if index is None: index = len(kids) kids.insert(index, page) # update /Count prev_count = self["XRef"]["Trailer"]["Root"]["Pages"]["Count"] self["XRef"]["Trailer"]["Root"]["Pages"][Name("Count")] = Decimal( prev_count + 1) # set /Parent page[Name("Parent")] = self["XRef"]["Trailer"]["Root"]["Pages"] page.set_parent(kids) # type: ignore [attr-defined] # return return self