def read_array(self) -> List: """ This function processes the next tokens and returns a List. It fails and throws various errors if the next tokens do not represent a List. """ token = self.next_non_comment_token() assert token is not None assert token.token_type == TokenType.START_ARRAY out = List() while True: token = self.next_non_comment_token() assert token is not None if token.token_type == TokenType.END_ARRAY: break assert token.token_type != TokenType.END_DICT # go back self.seek(token.byte_offset) # read obj = self.read_object() # append out.append(obj) # return return out
def transform( self, object_to_transform: Any, context: Optional[WriteTransformerContext] = None, ): """ This method writes a Document object to a byte stream """ # write header assert context is not None assert context.destination is not None context.destination.write(b"%PDF-1.7\n") context.destination.write(b"%") context.destination.write(bytes([226, 227, 207, 211])) context.destination.write(b"\n") # invalidate all references WritePDFTransformer._invalidate_all_references(object_to_transform) # create Info dictionary if needed if "Info" not in object_to_transform["XRef"]["Trailer"]: object_to_transform["XRef"]["Trailer"][Name("Info")] = Dictionary() # set /ID random_id = HexadecimalString("%032x" % random.randrange(16**32)) if "ID" not in object_to_transform["XRef"]["Trailer"]: object_to_transform["XRef"]["Trailer"][Name("ID")] = List( ).set_can_be_referenced( # type: ignore [attr-defined] False) object_to_transform["XRef"]["Trailer"]["ID"].append(random_id) object_to_transform["XRef"]["Trailer"]["ID"].append(random_id) else: object_to_transform["XRef"]["Trailer"]["ID"][1] = random_id object_to_transform["XRef"]["Trailer"]["ID"].set_can_be_referenced( False) # set CreationDate modification_date = WritePDFTransformer._timestamp_to_str() if "CreationDate" not in object_to_transform["XRef"]["Trailer"][Name( "Info")]: object_to_transform["XRef"]["Trailer"][Name("Info")][Name( "CreationDate")] = String(modification_date) # set ModDate object_to_transform["XRef"]["Trailer"]["Info"][Name( "ModDate")] = String(modification_date) # set Producer object_to_transform["XRef"]["Trailer"]["Info"][Name( "Producer")] = String("pText") # transform XREF self.get_root_transformer().transform(object_to_transform["XRef"], context)
def test_hash_types(self): obj0 = Dictionary() obj0[Name("Root")] = Reference(object_number=10) obj0[Name("Marked")] = Boolean(True) obj1 = List() obj1.append(Name("Red")) obj1.append(Decimal(0.5)) print(hash(obj1))
def insert_page( self, page: Page, index: typing.Optional[int] = None ) -> "Document": # type: ignore [name-defined] """ This method appends a page (from another Document) to this Document at a given index """ # build XRef if "XRef" not in self: self[Name("XRef")] = PlainTextXREF() self[Name("XRef")].set_parent(self) # build Trailer if "Trailer" not in self["XRef"]: self["XRef"][Name("Trailer")] = Dictionary() self["XRef"][Name("Size")] = Decimal(0) self["XRef"]["Trailer"].set_parent(self["XRef"]) # build Root if "Root" not in self["XRef"]["Trailer"]: self["XRef"]["Trailer"][Name("Root")] = Dictionary() self["XRef"]["Trailer"]["Root"].set_parent(self["XRef"]["Trailer"]) # build Pages if "Pages" not in self["XRef"]["Trailer"]["Root"]: self["XRef"]["Trailer"][Name("Root")][Name("Pages")] = Dictionary() self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name( "Count")] = Decimal(0) self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name( "Kids")] = List() self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name( "Type")] = Name("Pages") self["XRef"]["Trailer"]["Root"]["Pages"].set_parent( self["XRef"]["Trailer"]["Root"]) self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"].set_parent( self["XRef"]["Trailer"]["Root"]["Pages"]) # update /Kids kids = self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"] assert kids is not None assert isinstance(kids, List) if index is None: index = len(kids) kids.insert(index, page) # update /Count prev_count = self["XRef"]["Trailer"]["Root"]["Pages"]["Count"] self["XRef"]["Trailer"]["Root"]["Pages"][Name("Count")] = Decimal( prev_count + 1) # set /Parent page[Name("Parent")] = self["XRef"]["Trailer"]["Root"]["Pages"] page.set_parent(kids) # type: ignore [attr-defined] # return return self
def _copy_font_descriptor(self, font_descriptor_to_copy: Dictionary) -> Dictionary: f0: Dictionary = font_descriptor_to_copy f1: Dictionary = self["FontDescriptor"] f1[Name("Type")] = f0["Type"] f1[Name("FontName")] = f0["FontName"] if "FontFamily" in f0: f1[Name("FontFamily")] = f0["FontFamily"] if "FontStretch" in f0: f1[Name("FontStretch")] = f0["FontStretch"] if "FontWeight" in f0: f1[Name("FontWeight")] = f0["FontWeight"] f1[Name("Flags")] = f0["Flags"] if "FontBBox" in f0 and False: # TODO f1[Name("FontBBox")] = List().set_can_be_referenced(False) # type: ignore [attr-defined] for i in range(0, len(f0["FontBBox"])): f1["FontBBox"].append(f0["FontBBox"][i]) f1[Name("ItalicAngle")] = f0["ItalicAngle"] if "Ascent" in f0: f1[Name("Ascent")] = f0["Ascent"] if "Descent" in f0: f1[Name("Descent")] = f0["Descent"] if "Leading" in f0: f1[Name("Leading")] = f0["Leading"] if "CapHeight" in f0: f1[Name("CapHeight")] = f0["CapHeight"] if "XHeight" in f0: f1[Name("XHeight")] = f0["XHeight"] if "StemV" in f0: f1[Name("StemV")] = f0["StemV"] if "StemH" in f0: f1[Name("StemH")] = f0["StemH"] if "AvgWidth" in f0: f1[Name("AvgWidth")] = f0["AvgWidth"] if "MaxWidth" in f0: f1[Name("MaxWidth")] = f0["MaxWidth"] if "MissingWidth" in f0: f1[Name("MissingWidth")] = f0["MissingWidth"] if "FontFile" in f0 and False: # TODO f1[Name("FontFile")] = copy.deepcopy(f0["FontFile"]) if "FontFile2" in f0 and False: # TODO f1[Name("FontFile2")] = copy.deepcopy(f0["FontFile2"]) if "FontFile3" in f0 and False: # TODO f1[Name("FontFile3")] = copy.deepcopy(f0["FontFile3"]) if "CharSet" in f0 and False: # TODO f1[Name("CharSet")] = f0["CharSet"] # default for k,v in f0.items(): if k not in f1: f1[k] = copy.deepcopy(v) return f1
def add_outline( self, text: str, level: int, destination_type: DestinationType, page_nr: int, top: typing.Optional[Decimal] = None, right: typing.Optional[Decimal] = None, bottom: typing.Optional[Decimal] = None, left: typing.Optional[Decimal] = None, zoom: typing.Optional[Decimal] = None, ) -> "Document": destination = List().set_can_be_referenced( False) # type: ignore [attr-defined] destination.append(Decimal(page_nr)) destination.append(destination_type.value) if destination_type == DestinationType.X_Y_Z: assert (left is not None and bottom is None and right is None and top is not None and zoom is not None) destination.append(Decimal(left)) destination.append(Decimal(top)) destination.append(Decimal(zoom)) if destination_type == DestinationType.FIT: assert (left is None and bottom is None and right is None and top is None and zoom is None) if destination_type == DestinationType.FIT_H: assert (left is None and bottom is None and right is None and top is not None and zoom is None) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_V: assert (left is not None and bottom is None and right is None and top is None and zoom is None) destination.append(Decimal(left)) if destination_type == DestinationType.FIT_R: assert (left is not None and bottom is not None and right is not None and top is not None and zoom is None) destination.append(Decimal(left)) destination.append(Decimal(bottom)) destination.append(Decimal(right)) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_B_H: assert (left is None and bottom is None and right is None and top is not None and zoom is None) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_B_V: assert (left is not None and bottom is None and right is None and top is None and zoom is None) destination.append(Decimal(left)) # add \Outlines entry in \Root if "Outlines" not in self["XRef"]["Trailer"]["Root"]: outline_dictionary: Dictionary = Dictionary() self["XRef"]["Trailer"]["Root"][Name( "Outlines")] = outline_dictionary outline_dictionary.set_parent( # type: ignore [attr-defined] self["XRef"]["Trailer"]["Root"][Name("Outlines")]) outline_dictionary[Name("Type")] = Name("Outlines") outline_dictionary[Name("Count")] = Decimal(0) # create entry outline = Dictionary() outline[Name("Dest")] = destination outline[Name("Parent")] = None outline[Name("Title")] = String(text) # get \Outlines outline_dictionary = self["XRef"]["Trailer"]["Root"]["Outlines"] # if everything is empty, add the new entry as the only entry if "First" not in outline_dictionary or "Last" not in outline_dictionary: outline_dictionary[Name("First")] = outline outline_dictionary[Name("Last")] = outline outline_dictionary[Name("Count")] = Decimal(1) outline[Name("Parent")] = outline_dictionary return self # helper function to make DFS easier def _children(x: Dictionary): if "First" not in x: return [] children = [x["First"]] while children[-1] != x["Last"]: children.append(children[-1]["Next"]) return children # DFS outline(s) outlines_done: typing.List[typing.Tuple[int, Dictionary]] = [] outlines_todo: typing.List[typing.Tuple[int, Dictionary]] = [ (-1, outline_dictionary) ] while len(outlines_todo) > 0: t = outlines_todo[0] outlines_done.append(t) outlines_todo.pop(0) for c in _children(t[1]): outlines_todo.append((t[0] + 1, c)) # find parent parent = [x[1] for x in outlines_done if x[0] == level - 1][-1] # update sibling-linking if "Last" in parent: sibling = parent["Last"] sibling[Name("Next")] = outline # update parent-linking outline[Name("Parent")] = parent if "First" not in parent: parent[Name("First")] = outline if "Count" not in parent: parent[Name("Count")] = Decimal(0) parent[Name("Last")] = outline # update count outline_to_update_count = parent while outline_to_update_count: outline_to_update_count[Name("Count")] = Decimal( outline_to_update_count["Count"] + Decimal(1)) if "Parent" in outline_to_update_count: outline_to_update_count = outline_to_update_count["Parent"] else: break return self
def true_type_font_from_file(path_to_font_file: Path) -> "TrueTypeFont": """ This function returns the PDF TrueTypeFont object for a given TTF file """ assert path_to_font_file.exists() assert path_to_font_file.name.endswith(".ttf") font_file_bytes: typing.Optional[bytes] = None with open(path_to_font_file, "rb") as ffh: font_file_bytes = ffh.read() assert font_file_bytes # read file ttf_font_file = TTFont(path_to_font_file) # build font font: TrueTypeFont = TrueTypeFont() font_name: str = str( [ x for x in ttf_font_file["name"].names if x.platformID == 3 and x.nameID == 1 ][0].string, "latin1", ) font_name = "".join([ x for x in font_name if x.lower() in "abcdefghijklmnopqrstuvwxyz" ]) font[Name("Name")] = Name(font_name) font[Name("BaseFont")] = Name(font_name) cmap: typing.Optional[typing.Dict[int, str]] = ttf_font_file.getBestCmap() cmap_reverse: typing.Dict[str, int] = {} for k, v in cmap.items(): if v in cmap_reverse: cmap_reverse[v] = min(cmap_reverse[v], k) else: cmap_reverse[v] = k glyph_order: typing.List[str] = [ x for x in ttf_font_file.glyphOrder if x in cmap_reverse ] # build widths units_per_em: pDecimal = pDecimal(ttf_font_file["head"].unitsPerEm) if cmap is not None: font[Name("FirstChar")] = pDecimal(0) font[Name("LastChar")] = pDecimal(len(glyph_order)) font[Name("Widths")] = List() for glyph_name in glyph_order: w: pDecimal = ( pDecimal(ttf_font_file.getGlyphSet()[glyph_name].width) / units_per_em) * pDecimal(1000) w = pDecimal(round(w, 2)) font["Widths"].append(w) font[Name("FontDescriptor")] = Dictionary() font["FontDescriptor"][Name("Type")] = Name("FontDescriptor") font["FontDescriptor"][Name("FontName")] = String(font_name) font["FontDescriptor"][Name("FontStretch")] = Name("Normal") # TODO font["FontDescriptor"][Name("FontWeight")] = pDecimal(400) # TODO font["FontDescriptor"][Name("Flags")] = pDecimal(4) # TODO font["FontDescriptor"][Name("FontBBox")] = List( ).set_can_be_referenced( # type: ignore [attr-defined] False) # TODO for _ in range(0, 4): font["FontDescriptor"]["FontBBox"].append(pDecimal(0)) # fmt: off font["FontDescriptor"][Name("ItalicAngle")] = pDecimal( ttf_font_file["post"].italicAngle) font["FontDescriptor"][Name("Ascent")] = pDecimal( pDecimal(ttf_font_file["hhea"].ascent) / units_per_em * Decimal(1000)) font["FontDescriptor"][Name("Descent")] = pDecimal( pDecimal(ttf_font_file["hhea"].descent) / units_per_em * Decimal(1000)) font["FontDescriptor"][Name("CapHeight")] = pDecimal(0) # TODO font["FontDescriptor"][Name("StemV")] = pDecimal(0) # TODO # fmt: on font[Name("Encoding")] = Dictionary() font["Encoding"][Name("BaseEncoding")] = Name("WinAnsiEncoding") font["Encoding"][Name("Differences")] = List() for i in range(0, len(glyph_order)): font["Encoding"]["Differences"].append(pDecimal(i)) font["Encoding"]["Differences"].append(Name(glyph_order[i])) # embed font file font_stream: Stream = Stream() font_stream[Name("Type")] = Name("Font") font_stream[Name("Subtype")] = Name("TrueType") font_stream[Name("Length")] = pDecimal(len(font_file_bytes)) font_stream[Name("Length1")] = pDecimal(len(font_file_bytes)) font_stream[Name("Filter")] = Name("FlateDecode") font_stream[Name("DecodedBytes")] = font_file_bytes font_stream[Name("Bytes")] = zlib.compress(font_file_bytes, 9) font["FontDescriptor"][Name("FontFile2")] = font_stream # return return font
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): """ This method writes a List to a byte stream """ assert isinstance(object_to_transform, List) assert context is not None assert context.destination is not None assert context.destination # avoid resolving objects twice object_ref: typing.Optional[ Reference] = object_to_transform.get_reference( ) # type: ignore [attr-defined] if object_ref is not None and object_ref in context.resolved_references: assert object_ref is not None assert object_ref.object_number is not None logger.debug( "skip writing object %d %d R (already resolved)" % (object_ref.object_number, object_ref.generation_number or 0)) return # output value out_value = List() # objects to turn into reference queue: typing.List[AnyPDFType] = [] for v in object_to_transform: if (isinstance(v, Dictionary) or isinstance(v, List) or isinstance(v, Stream) or isinstance(v, Image) ) and v.can_be_referenced(): # type: ignore [union-attr] out_value.append(self.get_reference(v, context)) queue.append(v) else: out_value.append(v) # start object if needed started_object = False if object_ref is not None: assert object_ref.object_number is not None if object_ref.object_number is not None and object_ref.byte_offset is None: started_object = True self.start_object(object_to_transform, context) context.resolved_references.append(object_ref) # write dictionary at current location context.destination.write(bytes("[", "latin1")) N = len(out_value) for i, v in enumerate(out_value): self.get_root_transformer().transform(v, context) if i != N - 1: context.destination.write(bytes(" ", "latin1")) context.destination.write(bytes("]\n", "latin1")) # end object if needed if started_object: self.end_object(object_to_transform, context) for e in queue: self.get_root_transformer().transform(e, context) # return return out_value
def __deepcopy__(self, memodict={}) -> "Font": out: Font = self._empty_copy() # Type out[Name("Type")] = Name("Font") # BaseFont out[Name("BaseFont")] = Name(str(self["BaseFont"])) # FirstChar if "FirstChar" in self: out[Name("FirstChar")] = self["FirstChar"] # LastChar if "LastChar" in self: out[Name("LastChar")] = self["LastChar"] # Widths if "Widths" in self: out[Name("Widths")] = List() for k in self["Widths"]: out[Name("Widths")].append(k) # FontDescriptor if "FontDescriptor" in self: out[Name("FontDescriptor")] = self._copy_font_descriptor(self["FontDescriptor"]) # Encoding if "Encoding" in self: # Name if isinstance(self["Encoding"], Name): out[Name("Encoding")] = Name(str(self["Encoding"])) # Dictionary if isinstance(self["Encoding"], Dictionary): out[Name("Encoding")] = Dictionary() out["Encoding"][Name("Type")] = Name("Encoding") if "BaseEncoding" in self["Encoding"]: out["Encoding"][Name("BaseEncoding")] = Name( str(self["Encoding"]["BaseEncoding"]) ) if "Differences" in self["Encoding"]: l = List() for x in self["Encoding"]["Differences"]: l.append(x) out["Encoding"][Name("Differences")] = l # ToUnicode if "ToUnicode" in self: out[Name("ToUnicode")] = copy.deepcopy(self["ToUnicode"]) # FontBBox if "FontBBox" in self: out[Name("FontBBox")] = List() for x in self["FontBBox"]: out["FontBBox"].append(x) # FontMatrix if "FontMatrix" in self: out[Name("FontMatrix")] = List() for x in self["FontMatrix"]: out["FontMatrix"].append(x) # CharProcs # Resources # CIDSystemInfo if "CIDSystemInfo" in self: out[Name("CIDSystemInfo")] = Dictionary() out["CIDSystemInfo"][Name("Registry")] = self["CIDSystemInfo"]["Registry"] out["CIDSystemInfo"][Name("Ordering")] = self["CIDSystemInfo"]["Ordering"] out["CIDSystemInfo"][Name("Supplement")] = self["CIDSystemInfo"][ "Supplement" ] # DW if "DW" in self: out[Name("DW")] = self["DW"] # W if "W" in self: out[Name("W")] = List() for x in self["W"]: if isinstance(x, pDecimal): out["W"].append(x) if isinstance(x, List): l = List() for y in x: l.append(y) out["W"].append(l) # DescendantFonts if "DescendantFonts" in self: out[Name("DescendantFonts")] = List() out["DescendantFonts"].append( self["DescendantFonts"][0].__deepcopy__(memodict) ) # DW2 if "DW2" in self: out[Name("DW2")] = List() for x in self["DW2"]: out["DW2"].append(x) # W2 # CIDToGIDMap # default for k,v in self.items(): if k not in out: out[k] = copy.deepcopy(v, memodict) return out
def append_embedded_file(self, file_name: str, file_bytes: bytes, apply_compression: bool = True) -> "Document": """ If a PDF file contains file specifications that refer to an external file and the PDF file is archived or transmitted, some provision should be made to ensure that the external references will remain valid. One way to do this is to arrange for copies of the external files to accompany the PDF file. Embedded file streams (PDF 1.3) address this problem by allowing the contents of referenced files to be embedded directly within the body of the PDF file. This makes the PDF file a self-contained unit that can be stored or transmitted as a single entity. (The embedded files are included purely for convenience and need not be directly processed by any conforming reader.) This method embeds a file (specified by its name and bytes) into this Document """ assert "XRef" in self assert "Trailer" in self["XRef"] assert "Root" in self["XRef"]["Trailer"] root = self["XRef"]["Trailer"]["Root"] # set up /Names dictionary if "Names" not in root: root[Name("Names")] = Dictionary() names = root["Names"] # set up /EmbeddedFiles if "EmbeddedFiles" not in names: names[Name("EmbeddedFiles")] = Dictionary() names["EmbeddedFiles"][Name("Kids")] = List() # find parent parent = names["EmbeddedFiles"] while "Kids" in parent: for k in parent["Kids"]: lower_limit = str(k["Limits"][0]) upper_limit = str(k["Limits"][1]) if lower_limit == upper_limit: continue if lower_limit < file_name < upper_limit: parent = k break break # add new child if (len([ x for x in parent["Kids"] if x["Limits"][0] == x["Limits"][1] == file_name ]) == 0): kid = Dictionary() kid[Name("F")] = String(file_name) kid[Name("Type")] = Name("Filespec") kid[Name("Limits")] = List() for _ in range(0, 2): kid["Limits"].append(String(file_name)) # build leaf \Names dictionary names = List() names.append(String(file_name)) kid[Name("Names")] = names # build actual file stream stream = Stream() stream[Name("Type")] = Name("EmbeddedFile") stream[Name("DecodedBytes")] = file_bytes if not apply_compression: stream[Name("Bytes")] = file_bytes else: stream[Name("Bytes")] = zlib.compress( stream[Name("DecodedBytes")], 9) stream[Name("Filter")] = Name("FlateDecode") stream[Name("Length")] = Decimal(len(stream[Name("Bytes")])) # build leaf \Filespec dictionary file_spec = Dictionary() file_spec[Name("EF")] = Dictionary() file_spec["EF"][Name("F")] = stream file_spec[Name("F")] = String(file_name) file_spec[Name("Type")] = Name("Filespec") names.append(file_spec) # append parent["Kids"].append(kid) # change existing child else: kid = [ x for x in parent["Kids"] if x["Limits"][0] == x["Limits"][1] == file_name ][0] # TODO # return return self
def add_outline( self, text: str, level: int, destination_type: DestinationType, page_nr: int, top: typing.Optional[Decimal] = None, right: typing.Optional[Decimal] = None, bottom: typing.Optional[Decimal] = None, left: typing.Optional[Decimal] = None, zoom: typing.Optional[Decimal] = None, ) -> "Document": """ A PDF document may contain a document outline that the conforming reader may display on the screen, allowing the user to navigate interactively from one part of the document to another. The outline consists of a tree-structured hierarchy of outline items (sometimes called bookmarks), which serve as a visual table of contents to display the document’s structure to the user. This function adds an outline to this Document """ destination = List().set_can_be_referenced( False) # type: ignore [attr-defined] destination.append(Decimal(page_nr)) destination.append(destination_type.value) if destination_type == DestinationType.X_Y_Z: assert (left is not None and bottom is None and right is None and top is not None and zoom is not None) destination.append(Decimal(left)) destination.append(Decimal(top)) destination.append(Decimal(zoom)) if destination_type == DestinationType.FIT: assert (left is None and bottom is None and right is None and top is None and zoom is None) if destination_type == DestinationType.FIT_H: assert (left is None and bottom is None and right is None and top is not None and zoom is None) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_V: assert (left is not None and bottom is None and right is None and top is None and zoom is None) destination.append(Decimal(left)) if destination_type == DestinationType.FIT_R: assert (left is not None and bottom is not None and right is not None and top is not None and zoom is None) destination.append(Decimal(left)) destination.append(Decimal(bottom)) destination.append(Decimal(right)) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_B_H: assert (left is None and bottom is None and right is None and top is not None and zoom is None) destination.append(Decimal(top)) if destination_type == DestinationType.FIT_B_V: assert (left is not None and bottom is None and right is None and top is None and zoom is None) destination.append(Decimal(left)) # add \Outlines entry in \Root if "Outlines" not in self["XRef"]["Trailer"]["Root"]: outline_dictionary: Dictionary = Dictionary() self["XRef"]["Trailer"]["Root"][Name( "Outlines")] = outline_dictionary outline_dictionary.set_parent( # type: ignore [attr-defined] self["XRef"]["Trailer"]["Root"][Name("Outlines")]) outline_dictionary[Name("Type")] = Name("Outlines") outline_dictionary[Name("Count")] = Decimal(0) # create entry outline = Dictionary() outline[Name("Dest")] = destination outline[Name("Parent")] = None outline[Name("Title")] = String(text) # get \Outlines outline_dictionary = self["XRef"]["Trailer"]["Root"]["Outlines"] # if everything is empty, add the new entry as the only entry if "First" not in outline_dictionary or "Last" not in outline_dictionary: outline_dictionary[Name("First")] = outline outline_dictionary[Name("Last")] = outline outline_dictionary[Name("Count")] = Decimal(1) outline[Name("Parent")] = outline_dictionary return self # helper function to make DFS easier def _children(x: Dictionary): if "First" not in x: return [] children = [x["First"]] while children[-1] != x["Last"]: children.append(children[-1]["Next"]) return children # DFS outline(s) outlines_done: typing.List[typing.Tuple[int, Dictionary]] = [] outlines_todo: typing.List[typing.Tuple[int, Dictionary]] = [ (-1, outline_dictionary) ] while len(outlines_todo) > 0: t = outlines_todo[0] outlines_done.append(t) outlines_todo.pop(0) for c in _children(t[1]): outlines_todo.append((t[0] + 1, c)) # find parent parent = [x[1] for x in outlines_done if x[0] == level - 1][-1] # update sibling-linking if "Last" in parent: sibling = parent["Last"] sibling[Name("Next")] = outline # update parent-linking outline[Name("Parent")] = parent if "First" not in parent: parent[Name("First")] = outline if "Count" not in parent: parent[Name("Count")] = Decimal(0) parent[Name("Last")] = outline # update count outline_to_update_count = parent while outline_to_update_count: outline_to_update_count[Name("Count")] = Decimal( outline_to_update_count["Count"] + Decimal(1)) if "Parent" in outline_to_update_count: outline_to_update_count = outline_to_update_count["Parent"] else: break return self
def _read_file(input: typing.TextIO) -> Optional[Font]: lines: typing.List[str] = [x for x in input.readlines()] lines = [x for x in lines if not x.startswith("Comment")] lines = [x[:-1] if x.endswith("\n") else x for x in lines] # check first/last line if not lines[0].startswith("StartFontMetrics") or not lines[-1].startswith( "EndFontMetrics" ): return None out_font = Font() # FontDescriptor out_font_descriptor = FontDescriptor().set_parent(out_font) # type: ignore [attr-defined] font_name = AdobeFontMetrics._find_and_parse_as_string(lines, "FontName") if font_name: out_font_descriptor[Name("FontName")] = Name(font_name) font_family = AdobeFontMetrics._find_and_parse_as_string(lines, "FamilyName") if font_family: out_font_descriptor[Name("FontFamily")] = String(font_family) # FontStretch # FontWeight # Flags # FontBBox fontbbox_str = AdobeFontMetrics._find_and_parse_as_string(lines, "FontBBox") if fontbbox_str: fontbbox = [Decimal(x) for x in fontbbox_str.split(" ")] out_font_descriptor[Name("FontBBox")] = List().set_can_be_referenced(False) # type: ignore [attr-defined] for x in fontbbox: out_font_descriptor[Name("FontBBox")].append(x) # ItalicAngle italic_angle = AdobeFontMetrics._find_and_parse_as_float(lines, "ItalicAngle") if italic_angle: out_font_descriptor[Name("ItalicAngle")] = Decimal(italic_angle) else: out_font_descriptor[Name("ItalicAngle")] = Decimal(0) # Ascent ascent = AdobeFontMetrics._find_and_parse_as_float(lines, "Ascender") if ascent: out_font_descriptor[Name("Ascent")] = Decimal(ascent) else: out_font_descriptor[Name("Ascent")] = Decimal(0) # Descent descent = AdobeFontMetrics._find_and_parse_as_float(lines, "Descender") if descent: out_font_descriptor[Name("Descent")] = Decimal(descent) else: out_font_descriptor[Name("Descent")] = Decimal(0) # Flags out_font_descriptor[Name("Flags")] = Decimal(131104) # Leading # CapHeight capheight = AdobeFontMetrics._find_and_parse_as_float(lines, "CapHeight") if capheight: out_font_descriptor[Name("CapHeight")] = Decimal(capheight) else: out_font_descriptor[Name("CapHeight")] = Decimal(0) # XHeight xheight = AdobeFontMetrics._find_and_parse_as_float(lines, "XHeight") if xheight: out_font_descriptor[Name("XHeight")] = Decimal(xheight) # StemV stemv = AdobeFontMetrics._find_and_parse_as_float(lines, "StemV") if stemv: assert stemv is not None out_font_descriptor[Name("StemV")] = Decimal(stemv) else: out_font_descriptor[Name("StemV")] = Decimal(0) # StemH stemh = AdobeFontMetrics._find_and_parse_as_float(lines, "StemH") if stemh: assert stemh is not None out_font_descriptor[Name("StemH")] = Decimal(stemh) else: out_font_descriptor[Name("StemH")] = Decimal(0) # AvgWidth avgwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "AvgWidth") if avgwidth: assert avgwidth is not None out_font_descriptor[Name("AvgWidth")] = Decimal(avgwidth) # MaxWidth maxwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "MaxWidth") if maxwidth: assert maxwidth is not None out_font_descriptor[Name("MaxWidth")] = Decimal(maxwidth) # MissingWidth missingwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "MissingWidth") if missingwidth: assert missingwidth is not None out_font_descriptor[Name("MissingWidth")] = Decimal(missingwidth) # CharSet charset = AdobeFontMetrics._find_and_parse_as_float(lines, "CharSet") if charset: assert charset is not None out_font_descriptor[Name("CharSet")] = Decimal(charset) # Font out_font[Name("Type")] = Name("Font") out_font[Name("Subtype")] = Name("Type1") out_font[Name("Name")] = out_font_descriptor["FontName"] out_font[Name("BaseFont")] = out_font_descriptor["FontName"] widths = List().set_parent(out_font) # type: ignore [attr-defined] avg_char_width: float = 0 avg_char_width_norm: float = 0 first_char = None last_char = None char_metrics_lines = lines[ lines.index( [x for x in lines if x.startswith("StartCharMetrics")][0] ) : lines.index("EndCharMetrics") + 1 ] char_metrics_lines = char_metrics_lines[1:-1] for cml in char_metrics_lines: tmp = { y.split(" ")[0]: y.split(" ")[1] for y in [x.strip() for x in cml.split(";")] if " " in y } # determine char ch = -1 if "C" in tmp: ch = int(tmp["C"]) if "CH" in tmp: ch = int(tmp["CH"][1:-1], 16) if (first_char is None or ch < first_char) and ch != -1: first_char = ch if (last_char is None or ch > last_char) and ch != -1: last_char = ch w = float(tmp["WX"]) if ch != -1 and w != 0: avg_char_width += w avg_char_width_norm += 1 widths.append(Decimal(w)) assert first_char is not None assert last_char is not None out_font[Name("FirstChar")] = Decimal(first_char) out_font[Name("LastChar")] = Decimal(last_char) out_font[Name("Widths")] = widths if avgwidth is None: out_font_descriptor[Name("AvgWidth")] = Decimal( round(Decimal(avg_char_width / avg_char_width_norm), 2) ) if maxwidth is None: out_font_descriptor[Name("MaxWidth")] = Decimal(max(widths)) out_font[Name("FontDescriptor")] = out_font_descriptor # return return out_font