Python PdfString Examples, pdfrw.objects.PdfString Python Examples

Example #1

0

Show file

File: pdf_redactor.py Project: dkloving/pdf-redactor

		def __str__(self):
			# __str__ is used for serialization
			if self.value == self.original_value:
				# If unchanged, return the raw original value without decoding/encoding.
				return PdfString.from_bytes(self.raw_original_value)
			else:
				# If the value changed, encode it from Unicode according to the encoding
				# of the font that is active at the location of this token.
				return PdfString.from_bytes(fromUnicode(self.value, self.font, fontcache, options))

Example #2

0

Show file

File: pdf_redactor.py Project: vitalbeats/pdf-redactor

def update_annotation(annotation, options):
    import re
    from pdfrw.objects import PdfString

    # Contents holds a plain-text representation of the annotation
    # content, such as for accessibility. All annotation types may
    # have a Contents. NM holds the "annotation name" which also
    # could have redactable text, I suppose. Markup annotations have
    # "T" fields that hold a title / text label. Subj holds a
    # comment subject. CA, RC, and AC are used in widget annotations.
    for string_field in ("Contents", "NM", "T", "Subj", "CA", "RC", "AC"):
        if getattr(annotation, string_field):
            value = getattr(annotation, string_field).to_unicode()
            for pattern, function in options.content_filters:
                value = pattern.sub(function, value)
            setattr(annotation, string_field, PdfString.from_unicode(value))

    # A rich-text stream. Not implemented. Bail so that we don't
    # accidentally leak something that should be redacted.
    if annotation.RC:
        raise ValueError(
            "Annotation rich-text streams (Annot/RC) are not supported.")

    # An action, usually used for links.
    if annotation.A:
        update_annotation_action(annotation, annotation.A, options)
    if annotation.PA:
        update_annotation_action(annotation, annotation.PA, options)

    # If set, another annotation.
    if annotation.Popup:
        update_annotation(annotation.Popup, options)

Example #3

0

Show file

File: pdf_redactor.py Project: vitalbeats/pdf-redactor

def update_annotation_action(annotation, action, options):
    from pdfrw.objects import PdfString

    if action.URI and options.link_filters:
        value = action.URI.to_unicode()
        for func in options.link_filters:
            value = func(value, annotation)
        if value is None:
            # Remove annotation by supressing the action.
            action.URI = None
        else:
            action.URI = PdfString.from_unicode(value)

    if action.Next:
        # May be an Action or array of Actions to execute next.
        next_action = action.Next
        if isinstance(action.Next, dict):
            next_action = [action.Next]
        for a in next_action:
            update_annotation_action(annotation, a, options)

Example #4

0

Show file

File: pdf_redactor.py Project: vitalbeats/pdf-redactor

def update_metadata(trailer, options):
    # Update the PDF's Document Information Dictionary, which contains keys like
    # Title, Author, Subject, Keywords, Creator, Producer, CreationDate, and ModDate
    # (the latter two containing Date values, the rest strings).

    import codecs
    from pdfrw.objects import PdfString, PdfName

    # Create the metadata dict if it doesn't exist, since the caller may be adding fields.
    if not trailer.Info:
        trailer.Info = PdfDict()

    # Get a list of all metadata fields that exist in the PDF plus any fields
    # that there are metadata filters for (since they may insert field values).
    keys = set(str(k)[1:] for k in trailer.Info.keys()) \
      | set(k for k in options.metadata_filters.keys() if k not in ("DEFAULT", "ALL"))

    # Update each metadata field.
    for key in keys:
        # Get the functions to apply to this field.
        functions = options.metadata_filters.get(key)
        if functions is None:
            # If nothing is defined for this field, use the DEFAULT functions.
            functions = options.metadata_filters.get("DEFAULT", [])

        # Append the ALL functions.
        functions += options.metadata_filters.get("ALL", [])

        # Run the functions on any existing values.
        value = trailer.Info[PdfName(key)]
        for f in functions:
            # Before passing to the function, convert from a PdfString to a Python string.
            if isinstance(value, PdfString):
                # decode from PDF's "(...)" syntax.
                value = value.decode()

            # Filter the value.
            value = f(value)

            # Convert Python data type to PdfString.
            if isinstance(value, str) or (sys.version_info < (3, )
                                          and isinstance(value, unicode)):
                # Convert string to a PdfString instance.
                value = PdfString.from_unicode(value)

            elif isinstance(value, datetime):
                # Convert datetime into a PDF "D" string format.
                value = value.strftime("%Y%m%d%H%M%S%z")
                if len(value) == 19:
                    # If TZ info was included, add an apostrophe between the hour/minutes offsets.
                    value = value[:17] + "'" + value[17:]
                value = PdfString("(D:%s)" % value)

            elif value is None:
                # delete the metadata value
                pass

            else:
                raise ValueError(
                    "Invalid type of value returned by metadata_filter function. %s was returned by %s."
                    % (repr(value), f.__name__ or "anonymous function"))

            # Replace value.
            trailer.Info[PdfName(key)] = value

Example #5

0

Show file

File: djpdf.py Project: Unrud/djpdf

    def _build_font():
        with open(FONT_FILENAME, "rb") as f:
            embedded_font_stream = f.read()
        embedded_font = PdfDict()
        embedded_font.indirect = True
        embedded_font.Filter = [PdfName.FlateDecode]
        embedded_font.stream = zlib.compress(embedded_font_stream, 9).decode(
            "latin-1")
        embedded_font.Length1 = len(embedded_font_stream)

        font_descriptor = PdfDict()
        font_descriptor.indirect = True
        font_descriptor.Ascent = 1000
        font_descriptor.CapHeight = 1000
        font_descriptor.Descent = -1
        font_descriptor.Flags = 5  # FixedPitch + Symbolic
        font_descriptor.FontBBox = PdfArray([0, 0, 1000, 500])
        font_descriptor.FontFile2 = embedded_font
        font_descriptor.FontName = PdfName.GlyphLessFont
        font_descriptor.ItalicAngle = 0
        font_descriptor.StemV = 80
        font_descriptor.Type = PdfName.FontDescriptor

        # Map everything to glyph 1
        cid_to_gid_map_stream = b"\0\1" * (1 << 16)
        cid_to_gid_map = PdfDict()
        cid_to_gid_map.indirect = True
        cid_to_gid_map.Filter = [PdfName.FlateDecode]
        cid_to_gid_map.stream = zlib.compress(
            cid_to_gid_map_stream, 9).decode("latin-1")
        cid_to_gid_map.Length1 = len(cid_to_gid_map_stream)

        cid_system_info = PdfDict()
        cid_system_info.Ordering = PdfString.from_unicode("Identity")
        cid_system_info.Registry = PdfString.from_unicode("Adobe")
        cid_system_info.Supplement = 0

        cid_font = PdfDict()
        cid_font.indirect = True
        cid_font.CIDToGIDMap = cid_to_gid_map
        cid_font.BaseFont = PdfName.GlyphLessFont
        cid_font.CIDSystemInfo = cid_system_info
        cid_font.FontDescriptor = font_descriptor
        cid_font.Subtype = PdfName.CIDFontType2
        cid_font.Type = PdfName.Font
        cid_font.DW = 500

        with open(UNICODE_CMAP_FILENAME, "rb") as f:
            unicode_cmap_stream = f.read()
        unicode_cmap = PdfDict()
        unicode_cmap.indirect = True
        unicode_cmap.Filter = [PdfName.FlateDecode]
        unicode_cmap.stream = zlib.compress(unicode_cmap_stream, 9).decode(
            "latin-1")

        font = PdfDict()
        font.indirect = True
        font.BaseFont = PdfName.GlyphLessFont
        font.DescendantFonts = PdfArray([cid_font])
        font.Encoding = PdfName("Identity-H")
        font.Subtype = PdfName.Type0
        font.ToUnicode = unicode_cmap
        font.Type = PdfName.Font

        return font

Example #6

0

Show file

File: djpdf.py Project: 5l1v3r1/djpdf

        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))

            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem), get_pdf_background(psem),
                    asyncio.gather(
                        *[fg.pdf_image(psem) for fg in page.foreground]),
                    asyncio.gather(
                        *[get_pdf_mask(fg, psem) for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray(
                [0, 0, PdfNumber(page.width),
                 PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_colorspace = PdfDict()
            pdf_colorspace.DefaultRGB = default_rgb_colorspace
            pdf_resources.ColorSpace = pdf_colorspace
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " + "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None
                        and current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(), PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [
                        PdfNumber(t.x),
                        PdfNumber(t.y),
                        PdfNumber(t.x + t.width),
                        PdfNumber(t.y + t.height)
                    ]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page, PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y), 0
                        ]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"), 9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))

Example #7

0

Show file

File: djpdf.py Project: 5l1v3r1/djpdf

    def write_async(self, outfile, process_semaphore, progress_cb=None):
        pdf_writer = PdfWriter(version="1.5")

        pdf_group = PdfDict()
        pdf_group.indirect = True
        pdf_group.CS = PdfName.DeviceRGB
        pdf_group.I = PdfBool(True)
        pdf_group.S = PdfName.Transparency

        pdf_font_mapping = PdfDict()
        pdf_font_mapping.indirect = True
        pdf_font_mapping.F1 = self._build_font()

        for _ in self._pages:
            pdf_page = PdfDict()
            pdf_page.Type = PdfName.Page
            pdf_writer.addpage(pdf_page)
        # pdfrw makes a internal copy of the pages
        # use the copy so that references to pages in links are correct
        pdf_pages = list(pdf_writer.pagearray)

        srgb_colorspace = PdfDict()
        srgb_colorspace.indirect = True
        srgb_colorspace.N = 3  # Number of components (red, green, blue)
        with open(SRGB_ICC_FILENAME, "rb") as f:
            srgb_colorspace_stream = f.read()
        srgb_colorspace.Filter = [PdfName.FlateDecode]
        srgb_colorspace.stream = zlib.compress(srgb_colorspace_stream,
                                               9).decode("latin-1")
        srgb_colorspace.Length1 = len(srgb_colorspace_stream)
        default_rgb_colorspace = PdfArray([PdfName.ICCBased, srgb_colorspace])
        default_rgb_colorspace.indirect = True

        # Handle all pages in parallel
        @asyncio.coroutine
        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))

            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem), get_pdf_background(psem),
                    asyncio.gather(
                        *[fg.pdf_image(psem) for fg in page.foreground]),
                    asyncio.gather(
                        *[get_pdf_mask(fg, psem) for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray(
                [0, 0, PdfNumber(page.width),
                 PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_colorspace = PdfDict()
            pdf_colorspace.DefaultRGB = default_rgb_colorspace
            pdf_resources.ColorSpace = pdf_colorspace
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " + "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None
                        and current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(), PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [
                        PdfNumber(t.x),
                        PdfNumber(t.y),
                        PdfNumber(t.x + t.width),
                        PdfNumber(t.y + t.height)
                    ]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page, PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y), 0
                        ]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"), 9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))

        finished_pages = 0
        yield from asyncio.gather(*[
            make_page(page, pdf_page, process_semaphore)
            for page, pdf_page in zip(self._pages, pdf_pages)
        ])

        trailer = pdf_writer.trailer

        document_id = PdfString().from_bytes(os.urandom(16))
        trailer.ID = [document_id, document_id]

        mark_info = PdfDict()
        mark_info.Marked = PdfBool(True)
        trailer.Root.MarkInfo = mark_info

        struct_tree_root = PdfDict()
        struct_tree_root.Type = PdfName.StructTreeRoot
        trailer.Root.StructTreeRoot = struct_tree_root

        metadata = PdfDict()
        metadata.indirect = True
        metadata.Type = PdfName.Metadata
        metadata.Subtype = PdfName.XML
        xmp = XMPMeta()
        xmp.set_property(XMP_NS_PDFA_ID, "part", "2")
        xmp.set_property(XMP_NS_PDFA_ID, "conformance", "A")
        metadata_stream = xmp.serialize_to_str().encode("utf-8")
        metadata.Filter = [PdfName.FlateDecode]
        metadata.stream = zlib.compress(metadata_stream, 9).decode("latin-1")
        metadata.Length1 = len(metadata_stream)
        trailer.Root.Metadata = metadata

        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            pdf_writer.write(path.join(temp_dir, "temp.pdf"))
            cmd = [
                QPDF_CMD, "--stream-data=preserve",
                "--object-streams=preserve", "--normalize-content=n",
                "--newline-before-endstream"
            ]
            if LINEARIZE_PDF:
                cmd.extend(["--linearize"])
            cmd.extend([
                path.abspath(path.join(temp_dir, "temp.pdf")),
                path.abspath(outfile)
            ])
            yield from run_command_async(cmd, process_semaphore)

Example #8

0

Show file

File: djpdf.py Project: 5l1v3r1/djpdf

    def _build_font():
        with open(FONT_FILENAME, "rb") as f:
            embedded_font_stream = f.read()
        embedded_font = PdfDict()
        embedded_font.indirect = True
        embedded_font.Filter = [PdfName.FlateDecode]
        embedded_font.stream = zlib.compress(embedded_font_stream,
                                             9).decode("latin-1")
        embedded_font.Length1 = len(embedded_font_stream)

        font_descriptor = PdfDict()
        font_descriptor.indirect = True
        font_descriptor.Ascent = 1000
        font_descriptor.CapHeight = 1000
        font_descriptor.Descent = -1
        font_descriptor.Flags = 5  # FixedPitch + Symbolic
        font_descriptor.FontBBox = PdfArray([0, 0, 1000, 500])
        font_descriptor.FontFile2 = embedded_font
        font_descriptor.FontName = PdfName.GlyphLessFont
        font_descriptor.ItalicAngle = 0
        font_descriptor.StemV = 80
        font_descriptor.Type = PdfName.FontDescriptor

        # Map everything to glyph 1
        cid_to_gid_map_stream = b"\0\1" * (1 << 16)
        cid_to_gid_map = PdfDict()
        cid_to_gid_map.indirect = True
        cid_to_gid_map.Filter = [PdfName.FlateDecode]
        cid_to_gid_map.stream = zlib.compress(cid_to_gid_map_stream,
                                              9).decode("latin-1")
        cid_to_gid_map.Length1 = len(cid_to_gid_map_stream)

        cid_system_info = PdfDict()
        cid_system_info.Ordering = PdfString.from_unicode("Identity")
        cid_system_info.Registry = PdfString.from_unicode("Adobe")
        cid_system_info.Supplement = 0

        cid_font = PdfDict()
        cid_font.indirect = True
        cid_font.CIDToGIDMap = cid_to_gid_map
        cid_font.BaseFont = PdfName.GlyphLessFont
        cid_font.CIDSystemInfo = cid_system_info
        cid_font.FontDescriptor = font_descriptor
        cid_font.Subtype = PdfName.CIDFontType2
        cid_font.Type = PdfName.Font
        cid_font.DW = 500

        with open(UNICODE_CMAP_FILENAME, "rb") as f:
            unicode_cmap_stream = f.read()
        unicode_cmap = PdfDict()
        unicode_cmap.indirect = True
        unicode_cmap.Filter = [PdfName.FlateDecode]
        unicode_cmap.stream = zlib.compress(unicode_cmap_stream,
                                            9).decode("latin-1")

        font = PdfDict()
        font.indirect = True
        font.BaseFont = PdfName.GlyphLessFont
        font.DescendantFonts = PdfArray([cid_font])
        font.Encoding = PdfName("Identity-H")
        font.Subtype = PdfName.Type0
        font.ToUnicode = unicode_cmap
        font.Type = PdfName.Font

        return font