Beispiel #1
0
    def transform(
        self,
        object_to_transform: Union[io.BufferedIOBase, io.RawIOBase,
                                   AnyPDFType],
        parent_object: Any,
        context: Optional[ReadTransformerContext] = None,
        event_listeners: typing.List[EventListener] = [],
    ) -> Any:
        """
        This function writes a \FontDescriptor Dictionary to a byte stream
        """

        assert isinstance(object_to_transform, Dictionary)

        # convert like regular dictionary
        if isinstance(parent_object, Font):
            for t in self.get_root_transformer().children:
                if isinstance(t, ReadDictionaryTransformer):
                    return t.transform(object_to_transform, parent_object,
                                       context, event_listeners)

        # build intermittent Font object
        tmp = Font().set_parent(parent_object)  # type: ignore [attr-defined]

        # add listener(s)
        for l in event_listeners:
            tmp.add_event_listener(l)

        tmp[Name("FontDescriptor")] = self.get_root_transformer().transform(
            object_to_transform, tmp, context, [])

        # return
        return tmp
    def transform(
        self,
        object_to_transform: Union[io.BufferedIOBase, io.RawIOBase,
                                   AnyPDFType],
        parent_object: Any,
        context: Optional[TransformerContext] = None,
        event_listeners: typing.List[EventListener] = [],
    ) -> Any:

        # convert like regular dictionary
        if isinstance(parent_object, Font):
            for t in self.get_root_transformer().handlers:
                if isinstance(t, DefaultDictionaryTransformer):
                    return t.transform(object_to_transform, parent_object,
                                       context, event_listeners)

        # build intermittent Font object
        tmp = Font().set_parent(parent_object)  # type: ignore [attr-defined]

        # add listener(s)
        for l in event_listeners:
            tmp.add_event_listener(l)

        tmp["FontDescriptor"] = self.get_root_transformer().transform(
            object_to_transform, tmp, context, [])

        # return
        return tmp
Beispiel #3
0
    def __init__(
            self,
            text_bytes: bytes,
            font: Font,
            font_size: Decimal,
            character_spacing: Decimal = Decimal(0),
            word_spacing: Decimal = Decimal(0),
            horizontal_scaling: Decimal = Decimal(100),
    ):
        assert isinstance(font, Font)
        self._glyphs: typing.List[Glyph] = []
        i: int = 0
        while i < len(text_bytes):
            # sometimes, 2 bytes make up 1 unicode char
            unicode_chars: typing.Optional[str] = None
            if i + 1 < len(text_bytes):
                multi_byte_char_code: int = text_bytes[i] * 256 + text_bytes[
                    i + 1]
                unicode_chars = font.character_identifier_to_unicode(
                    multi_byte_char_code)
                if unicode_chars is not None:
                    self._glyphs.append(
                        Glyph(
                            multi_byte_char_code,
                            unicode_chars,
                            font.get_width(multi_byte_char_code)
                            or pDecimal(0),
                        ))
                    i += 2
                    continue
            # usually it's 1 byte though
            if i < len(text_bytes):
                unicode_chars = font.character_identifier_to_unicode(
                    text_bytes[i])
                if unicode_chars is not None:
                    self._glyphs.append(
                        Glyph(
                            text_bytes[i],
                            unicode_chars,
                            font.get_width(text_bytes[i]) or Decimal(0),
                        ))
                    i += 1
                    continue
            # no mapping found
            if i < len(text_bytes):
                self._glyphs.append(Glyph(text_bytes[i], "�", Decimal(250)))
                i += 1

        self._font = font
        self._font_size = font_size
        self._character_spacing = character_spacing
        self._word_spacing = word_spacing
        self._horizontal_scaling = horizontal_scaling
Beispiel #4
0
    def _find_best_matching_predefined_cmap(
            cmap_name: str) -> typing.Dict[int, str]:
        cmap_dir: Path = Path(__file__).parent / "cmaps"
        assert cmap_dir.exists()
        predefined_cmaps: typing.List[str] = [
            x.name for x in cmap_dir.iterdir()
        ]

        # pseudo match
        if cmap_name not in predefined_cmaps:

            if cmap_name == "Adobe-Identity-UCS2":
                logger.info(
                    "Encoding Adobe-Identity-UCS2 was specified, using Adobe-Identity-H in stead"
                )
                cmap_name = "Adobe-Identity-H"

            if cmap_name == "Adobe-Japan1-UCS2":
                logger.info(
                    "Encoding Adobe-Identity-UCS2 was specified, using Adobe-Japan1-0 in stead"
                )
                cmap_name = "Adobe-Japan1-0"

            if cmap_name not in predefined_cmaps:
                logger.info(
                    "Encoding %s was specified, defaulting to Adobe-Identity-H in stead"
                    % cmap_name)
                cmap_name = "Adobe-Identity-H"

        cmap_bytes: typing.Optional[bytes] = None
        with open(cmap_dir / cmap_name, "rb") as cmap_file_handle:
            cmap_bytes = cmap_file_handle.read()

        assert cmap_bytes is not None
        return Font._read_cmap(cmap_bytes)
    def _read_file(input: io.IOBase) -> Optional[Font]:
        lines = [x for x in input.readlines() if not x.startswith("Comment")]
        lines = [x[:-1] if x.endswith("\n") else x for x in lines]

        # check first/last line
        if not lines[0].startswith("StartFontMetrics") or not lines[-1].startswith(
            "EndFontMetrics"
        ):
            return None

        out_font = Font()

        # FontDescriptor
        out_font_descriptor = FontDescriptor().set_parent(out_font)
        out_font_descriptor["FontName"] = AdobeFontMetrics._find_and_parse_as_string(
            lines, "FontName"
        )
        out_font_descriptor["FontFamily"] = AdobeFontMetrics._find_and_parse_as_string(
            lines, "FamilyName"
        )
        # FontStretch
        # FontWeight
        # Flags
        # FontBBox
        # ItalicAngle
        out_font_descriptor["Ascent"] = AdobeFontMetrics._find_and_parse_as_float(
            lines, "Ascender"
        )
        out_font_descriptor["Descent"] = AdobeFontMetrics._find_and_parse_as_float(
            lines, "Descender"
        )
        # Leading
        out_font_descriptor["CapHeight"] = AdobeFontMetrics._find_and_parse_as_float(
            lines, "CapHeight"
        )
        out_font_descriptor["XHeight"] = AdobeFontMetrics._find_and_parse_as_float(
            lines, "XHeight"
        )
        # StemV
        out_font_descriptor["StemV"] = AdobeFontMetrics._find_and_parse_as_float(
            lines, "StemV"
        )
        # StemH
        out_font_descriptor["StemH"] = AdobeFontMetrics._find_and_parse_as_float(
            lines, "StemH"
        )
        # AvgWidth
        out_font_descriptor["AvgWidth"] = AdobeFontMetrics._find_and_parse_as_float(
            lines, "AvgWidth"
        )
        # MaxWidth
        out_font_descriptor["MaxWidth"] = AdobeFontMetrics._find_and_parse_as_float(
            lines, "MaxWidth"
        )
        # MissingWidth
        out_font_descriptor["MissingWidth"] = AdobeFontMetrics._find_and_parse_as_float(
            lines, "MissingWidth"
        )
        # FontFile
        # FontFile2
        # FontFile3
        out_font_descriptor["CharSet"] = AdobeFontMetrics._find_and_parse_as_integer(
            lines, "Characters"
        )

        # Font
        out_font["Type"] = "Font"
        out_font["Subtype"] = "Type1"
        out_font["Name"] = out_font_descriptor["FontName"]
        out_font["BaseFont"] = out_font_descriptor["FontName"]

        widths = List().set_parent(out_font)
        avg_char_width = 0
        avg_char_width_norm = 0
        first_char = None
        last_char = None

        char_metrics_lines = lines[
            lines.index(
                [x for x in lines if x.startswith("StartCharMetrics")][0]
            ) : lines.index("EndCharMetrics")
            + 1
        ]
        char_metrics_lines = char_metrics_lines[1:-1]
        for cml in char_metrics_lines:
            tmp = {
                y.split(" ")[0]: y.split(" ")[1]
                for y in [x.strip() for x in cml.split(";")]
                if " " in y
            }

            # determine char
            ch = -1
            if "C" in tmp:
                ch = int(tmp["C"])
            if "CH" in tmp:
                ch = int(tmp["CH"][1:-1], 16)

            if (first_char is None or ch < first_char) and ch != -1:
                first_char = ch
            if (last_char is None or ch > last_char) and ch != -1:
                last_char = ch

            w = float(tmp["WX"])
            if ch != -1 and w != 0:
                avg_char_width += w
                avg_char_width_norm += 1

            widths.append(Decimal(w))

        out_font["FirstChar"] = Decimal(first_char)
        out_font["LastChar"] = Decimal(last_char)
        out_font["Widths"] = widths

        if out_font_descriptor["AvgWidth"] is None:
            out_font_descriptor["AvgWidth"] = round(
                Decimal(avg_char_width / avg_char_width_norm), 2
            )
        if out_font_descriptor["MaxWidth"] is None:
            out_font_descriptor["MaxWidth"] = max(widths)
        out_font["FontDescriptor"] = out_font_descriptor

        # return
        return out_font
    def _read_file(input: typing.TextIO) -> Optional[Font]:
        lines: typing.List[str] = [x for x in input.readlines()]
        lines = [x for x in lines if not x.startswith("Comment")]
        lines = [x[:-1] if x.endswith("\n") else x for x in lines]

        # check first/last line
        if not lines[0].startswith("StartFontMetrics") or not lines[-1].startswith(
            "EndFontMetrics"
        ):
            return None

        out_font = Font()

        # FontDescriptor
        out_font_descriptor = FontDescriptor().set_parent(out_font)  # type: ignore [attr-defined]
        font_name = AdobeFontMetrics._find_and_parse_as_string(lines, "FontName")
        if font_name:
            out_font_descriptor[Name("FontName")] = Name(font_name)
        font_family = AdobeFontMetrics._find_and_parse_as_string(lines, "FamilyName")
        if font_family:
            out_font_descriptor[Name("FontFamily")] = String(font_family)

        # FontStretch

        # FontWeight

        # Flags

        # FontBBox
        fontbbox_str = AdobeFontMetrics._find_and_parse_as_string(lines, "FontBBox")
        if fontbbox_str:
            fontbbox = [Decimal(x) for x in fontbbox_str.split(" ")]
            out_font_descriptor[Name("FontBBox")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
            for x in fontbbox:
                out_font_descriptor[Name("FontBBox")].append(x)

        # ItalicAngle
        italic_angle = AdobeFontMetrics._find_and_parse_as_float(lines, "ItalicAngle")
        if italic_angle:
            out_font_descriptor[Name("ItalicAngle")] = Decimal(italic_angle)
        else:
            out_font_descriptor[Name("ItalicAngle")] = Decimal(0)

        # Ascent
        ascent = AdobeFontMetrics._find_and_parse_as_float(lines, "Ascender")
        if ascent:
            out_font_descriptor[Name("Ascent")] = Decimal(ascent)
        else:
            out_font_descriptor[Name("Ascent")] = Decimal(0)

        # Descent
        descent = AdobeFontMetrics._find_and_parse_as_float(lines, "Descender")
        if descent:
            out_font_descriptor[Name("Descent")] = Decimal(descent)
        else:
            out_font_descriptor[Name("Descent")] = Decimal(0)

        # Flags
        out_font_descriptor[Name("Flags")] = Decimal(131104)

        # Leading

        # CapHeight
        capheight = AdobeFontMetrics._find_and_parse_as_float(lines, "CapHeight")
        if capheight:
            out_font_descriptor[Name("CapHeight")] = Decimal(capheight)
        else:
            out_font_descriptor[Name("CapHeight")] = Decimal(0)

        # XHeight
        xheight = AdobeFontMetrics._find_and_parse_as_float(lines, "XHeight")
        if xheight:
            out_font_descriptor[Name("XHeight")] = Decimal(xheight)

        # StemV
        stemv = AdobeFontMetrics._find_and_parse_as_float(lines, "StemV")
        if stemv:
            assert stemv is not None
            out_font_descriptor[Name("StemV")] = Decimal(stemv)
        else:
            out_font_descriptor[Name("StemV")] = Decimal(0)
        # StemH
        stemh = AdobeFontMetrics._find_and_parse_as_float(lines, "StemH")
        if stemh:
            assert stemh is not None
            out_font_descriptor[Name("StemH")] = Decimal(stemh)
        else:
            out_font_descriptor[Name("StemH")] = Decimal(0)

        # AvgWidth
        avgwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "AvgWidth")
        if avgwidth:
            assert avgwidth is not None
            out_font_descriptor[Name("AvgWidth")] = Decimal(avgwidth)

        # MaxWidth
        maxwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "MaxWidth")
        if maxwidth:
            assert maxwidth is not None
            out_font_descriptor[Name("MaxWidth")] = Decimal(maxwidth)

        # MissingWidth
        missingwidth = AdobeFontMetrics._find_and_parse_as_float(lines, "MissingWidth")
        if missingwidth:
            assert missingwidth is not None
            out_font_descriptor[Name("MissingWidth")] = Decimal(missingwidth)

        # CharSet
        charset = AdobeFontMetrics._find_and_parse_as_float(lines, "CharSet")
        if charset:
            assert charset is not None
            out_font_descriptor[Name("CharSet")] = Decimal(charset)

        # Font
        out_font[Name("Type")] = Name("Font")
        out_font[Name("Subtype")] = Name("Type1")
        out_font[Name("Name")] = out_font_descriptor["FontName"]
        out_font[Name("BaseFont")] = out_font_descriptor["FontName"]

        widths = List().set_parent(out_font)  # type: ignore [attr-defined]
        avg_char_width: float = 0
        avg_char_width_norm: float = 0
        first_char = None
        last_char = None

        char_metrics_lines = lines[
            lines.index(
                [x for x in lines if x.startswith("StartCharMetrics")][0]
            ) : lines.index("EndCharMetrics")
            + 1
        ]
        char_metrics_lines = char_metrics_lines[1:-1]
        for cml in char_metrics_lines:
            tmp = {
                y.split(" ")[0]: y.split(" ")[1]
                for y in [x.strip() for x in cml.split(";")]
                if " " in y
            }

            # determine char
            ch = -1
            if "C" in tmp:
                ch = int(tmp["C"])
            if "CH" in tmp:
                ch = int(tmp["CH"][1:-1], 16)

            if (first_char is None or ch < first_char) and ch != -1:
                first_char = ch
            if (last_char is None or ch > last_char) and ch != -1:
                last_char = ch

            w = float(tmp["WX"])
            if ch != -1 and w != 0:
                avg_char_width += w
                avg_char_width_norm += 1

            widths.append(Decimal(w))

        assert first_char is not None
        assert last_char is not None

        out_font[Name("FirstChar")] = Decimal(first_char)
        out_font[Name("LastChar")] = Decimal(last_char)
        out_font[Name("Widths")] = widths

        if avgwidth is None:
            out_font_descriptor[Name("AvgWidth")] = Decimal(
                round(Decimal(avg_char_width / avg_char_width_norm), 2)
            )
        if maxwidth is None:
            out_font_descriptor[Name("MaxWidth")] = Decimal(max(widths))
        out_font[Name("FontDescriptor")] = out_font_descriptor

        # return
        return out_font