Python PdfDict.Filter Examples

Programming Language: Python

Namespace/Package Name: pdfrw.objects

Class/Type: PdfDict

Method/Function: Filter

Examples at hotexamples.com: 8

Python PdfDict.Filter - 8 examples found. These are the top rated real world Python examples of pdfrw.objects.PdfDict.Filter extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PdfDict(24)

Filter(5)

Length1(2)

DecodeParms(2)

ColorSpace(2)

FontFile2(1)

FontName(1)

Height(1)

I(1)

ImageMask(1)

ItalicAngle(1)

A(1)

Length(1)

FontBBox(1)

Marked(1)

Mask(1)

Matrix(1)

N(1)

Ordering(1)

P(1)

FontDescriptor(1)

Flags(1)

Font(1)

CapHeight(1)

BaseFont(1)

BitsPerComponent(1)

Border(1)

CIDSystemInfo(1)

CIDToGIDMap(1)

CS(1)

D(1)

Ascent(1)

DW(1)

DefaultRGB(1)

DescendantFonts(1)

Descent(1)

Dest(1)

Encoding(1)

F1(1)

Rect(1)

Example #1

Show file

def wrap_object(obj, width, margin):
    ''' Wrap an xobj in its own page object.
    '''
    fmt = 'q %s 0 0 %s %s %s cm /MyImage Do Q'
    contents = PdfDict(indirect=True)
    subtype = obj.Subtype
    if subtype == PdfName.Form:
        contents._stream = obj.stream
        contents.Length = obj.Length
        contents.Filter = obj.Filter
        contents.DecodeParms = obj.DecodeParms
        resources = obj.Resources
        mbox = obj.BBox
    elif subtype == PdfName.Image:  # Image
        xoffset = margin[0]
        yoffset = margin[1]
        cw = width - margin[0] - margin[2]
        iw, ih = float(obj.Width), float(obj.Height)
        ch = 1.0 * cw / iw * ih
        height = ch + margin[1] + margin[3]
        p = tuple(('%.9f' % x).rstrip('0').rstrip('.') for x in (cw, ch, xoffset, yoffset))
        contents.stream = fmt % p
        resources = PdfDict(XObject=PdfDict(MyImage=obj))
        mbox = PdfArray((0, 0, width, height))
    else:
        raise TypeError("Expected Form or Image XObject")

    return PdfDict(
        indirect=True,
        Type=PdfName.Page,
        MediaBox=mbox,
        Resources=resources,
        Contents=contents,
        )

Example #2

Show file

File: djpdf.py Project: Unrud/djpdf

        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))
            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem),
                    get_pdf_background(psem),
                    asyncio.gather(*[fg.pdf_image(psem)
                                     for fg in page.foreground]),
                    asyncio.gather(*[get_pdf_mask(fg, psem)
                                     for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray([0, 0,
                                          PdfNumber(page.width),
                                          PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" +
                               "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " +
                             "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None and
                        current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" +
                           "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(),
                        PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [PdfNumber(t.x),
                                      PdfNumber(t.y),
                                      PdfNumber(t.x + t.width),
                                      PdfNumber(t.y + t.height)]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page,
                            PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y),
                            0]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"),
                        9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))

Example #3

Show file

File: djpdf.py Project: Unrud/djpdf

    def _build_font():
        with open(FONT_FILENAME, "rb") as f:
            embedded_font_stream = f.read()
        embedded_font = PdfDict()
        embedded_font.indirect = True
        embedded_font.Filter = [PdfName.FlateDecode]
        embedded_font.stream = zlib.compress(embedded_font_stream, 9).decode(
            "latin-1")
        embedded_font.Length1 = len(embedded_font_stream)

        font_descriptor = PdfDict()
        font_descriptor.indirect = True
        font_descriptor.Ascent = 1000
        font_descriptor.CapHeight = 1000
        font_descriptor.Descent = -1
        font_descriptor.Flags = 5  # FixedPitch + Symbolic
        font_descriptor.FontBBox = PdfArray([0, 0, 1000, 500])
        font_descriptor.FontFile2 = embedded_font
        font_descriptor.FontName = PdfName.GlyphLessFont
        font_descriptor.ItalicAngle = 0
        font_descriptor.StemV = 80
        font_descriptor.Type = PdfName.FontDescriptor

        # Map everything to glyph 1
        cid_to_gid_map_stream = b"\0\1" * (1 << 16)
        cid_to_gid_map = PdfDict()
        cid_to_gid_map.indirect = True
        cid_to_gid_map.Filter = [PdfName.FlateDecode]
        cid_to_gid_map.stream = zlib.compress(
            cid_to_gid_map_stream, 9).decode("latin-1")
        cid_to_gid_map.Length1 = len(cid_to_gid_map_stream)

        cid_system_info = PdfDict()
        cid_system_info.Ordering = PdfString.from_unicode("Identity")
        cid_system_info.Registry = PdfString.from_unicode("Adobe")
        cid_system_info.Supplement = 0

        cid_font = PdfDict()
        cid_font.indirect = True
        cid_font.CIDToGIDMap = cid_to_gid_map
        cid_font.BaseFont = PdfName.GlyphLessFont
        cid_font.CIDSystemInfo = cid_system_info
        cid_font.FontDescriptor = font_descriptor
        cid_font.Subtype = PdfName.CIDFontType2
        cid_font.Type = PdfName.Font
        cid_font.DW = 500

        with open(UNICODE_CMAP_FILENAME, "rb") as f:
            unicode_cmap_stream = f.read()
        unicode_cmap = PdfDict()
        unicode_cmap.indirect = True
        unicode_cmap.Filter = [PdfName.FlateDecode]
        unicode_cmap.stream = zlib.compress(unicode_cmap_stream, 9).decode(
            "latin-1")

        font = PdfDict()
        font.indirect = True
        font.BaseFont = PdfName.GlyphLessFont
        font.DescendantFonts = PdfArray([cid_font])
        font.Encoding = PdfName("Identity-H")
        font.Subtype = PdfName.Type0
        font.ToUnicode = unicode_cmap
        font.Type = PdfName.Font

        return font

Example #4

Show file

File: djpdf.py Project: Unrud/djpdf

    def _pdf_image(self, psem):
        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            # JBIG2Globals are only used in symbol mode
            # In symbol mode jbig2 writes output to files otherwise
            # it's written to stdout
            symbol_mode = self.jbig2_threshold != 1
            images_with_shared_globals = []
            if symbol_mode and SHARE_JBIG2_GLOBALS:
                # Find all Jbig2Images that share the same symbol directory
                for obj in self._factory._cache:
                    if (isinstance(obj, Jbig2Image) and
                            self.compression == obj.compression and
                            self.jbig2_threshold == obj.jbig2_threshold):
                        images_with_shared_globals.append(obj)
            else:
                # The symbol directory is not shared with other Jbig2Images
                images_with_shared_globals.append(self)
            # Promise all handled Jbig2Images the finished image
            image_futures = []
            my_image_future = None
            for image in images_with_shared_globals:
                future = asyncio.Future()
                asyncio.ensure_future(image._cache.get(future))
                image_futures.append(future)
                if image is self:
                    my_image_future = future
            # All futures are in place, the lock can be released
            self._factory._cache_lock.release()
            self._cache_lock_acquired = False

            # Prepare everything in parallel
            @asyncio.coroutine
            def get_jbig2_images(psem):
                # Convert images with ImageMagick to bitonal png in parallel
                yield from asyncio.gather(*[
                    run_command_async([
                        CONVERT_CMD,
                        "-alpha", "remove",
                        "-alpha", "off",
                        "-colorspace", "gray",
                        "-threshold", "50%",
                        path.abspath(image.filename),
                        path.abspath(path.join(temp_dir,
                                               "input.%d.png" % i))], psem)
                    for i, image in enumerate(images_with_shared_globals)])
                cmd = [JBIG2_CMD, "-p"]
                if symbol_mode:
                    cmd.extend(["-s", "-t",
                                format_number(self.jbig2_threshold, 4)])
                for i, _ in enumerate(images_with_shared_globals):
                    cmd.append(path.abspath(path.join(temp_dir,
                                                      "input.%d.png" % i)))
                jbig2_images = []
                jbig2_globals = None
                if symbol_mode:
                    yield from run_command_async(cmd, psem, cwd=temp_dir)
                    jbig2_globals = PdfDict()
                    jbig2_globals.indirect = True
                    with open(path.join(temp_dir, "output.sym"), "rb") as f:
                        jbig2_globals.stream = f.read().decode("latin-1")
                    for i, _ in enumerate(images_with_shared_globals):
                        with open(path.join(temp_dir,
                                  "output.%04d" % i), "rb") as f:
                            jbig2_images.append(f.read())
                else:
                    jbig2_images.append(
                        (yield from run_command_async(cmd, psem,
                                                      cwd=temp_dir)))
                return jbig2_images, jbig2_globals

            @asyncio.coroutine
            def get_image_mask(image, psem):
                if image._mask is None:
                    return None
                return (yield from image._mask.pdf_image(psem))
            ((jbig2_images, jbig2_globals),
             image_masks) = yield from asyncio.gather(
                get_jbig2_images(psem),
                asyncio.gather(*[get_image_mask(image, psem)
                                 for image in images_with_shared_globals]))

            for image, jbig2_image, image_mask, image_future in zip(
                    images_with_shared_globals, jbig2_images, image_masks,
                    image_futures):
                (width, height, xres, yres) = struct.unpack(
                    '>IIII', jbig2_image[11:27])
                pdf_image = PdfDict()
                pdf_image.indirect = True
                pdf_image.Type = PdfName.XObject
                pdf_image.Subtype = PdfName.Image
                pdf_image.Width = width
                pdf_image.Height = height
                if image._image_mask:
                    pdf_image.ImageMask = PdfBool(True)
                else:
                    pdf_image.ColorSpace = PdfName.DeviceGray
                if image_mask is not None:
                    pdf_image.Mask = image_mask
                pdf_image.BitsPerComponent = 1
                pdf_image.Filter = [PdfName.JBIG2Decode]
                if symbol_mode:
                    pdf_image.DecodeParms = [{
                        PdfName.JBIG2Globals: jbig2_globals}]
                pdf_image.stream = jbig2_image.decode("latin-1")
                image_future.set_result(pdf_image)
        return my_image_future.result()

Example #5

Show file

File: djpdf.py Project: 5l1v3r1/djpdf

        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))

            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem), get_pdf_background(psem),
                    asyncio.gather(
                        *[fg.pdf_image(psem) for fg in page.foreground]),
                    asyncio.gather(
                        *[get_pdf_mask(fg, psem) for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray(
                [0, 0, PdfNumber(page.width),
                 PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_colorspace = PdfDict()
            pdf_colorspace.DefaultRGB = default_rgb_colorspace
            pdf_resources.ColorSpace = pdf_colorspace
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " + "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None
                        and current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(), PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [
                        PdfNumber(t.x),
                        PdfNumber(t.y),
                        PdfNumber(t.x + t.width),
                        PdfNumber(t.y + t.height)
                    ]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page, PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y), 0
                        ]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"), 9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))

Example #6

Show file

File: djpdf.py Project: 5l1v3r1/djpdf

    def write_async(self, outfile, process_semaphore, progress_cb=None):
        pdf_writer = PdfWriter(version="1.5")

        pdf_group = PdfDict()
        pdf_group.indirect = True
        pdf_group.CS = PdfName.DeviceRGB
        pdf_group.I = PdfBool(True)
        pdf_group.S = PdfName.Transparency

        pdf_font_mapping = PdfDict()
        pdf_font_mapping.indirect = True
        pdf_font_mapping.F1 = self._build_font()

        for _ in self._pages:
            pdf_page = PdfDict()
            pdf_page.Type = PdfName.Page
            pdf_writer.addpage(pdf_page)
        # pdfrw makes a internal copy of the pages
        # use the copy so that references to pages in links are correct
        pdf_pages = list(pdf_writer.pagearray)

        srgb_colorspace = PdfDict()
        srgb_colorspace.indirect = True
        srgb_colorspace.N = 3  # Number of components (red, green, blue)
        with open(SRGB_ICC_FILENAME, "rb") as f:
            srgb_colorspace_stream = f.read()
        srgb_colorspace.Filter = [PdfName.FlateDecode]
        srgb_colorspace.stream = zlib.compress(srgb_colorspace_stream,
                                               9).decode("latin-1")
        srgb_colorspace.Length1 = len(srgb_colorspace_stream)
        default_rgb_colorspace = PdfArray([PdfName.ICCBased, srgb_colorspace])
        default_rgb_colorspace.indirect = True

        # Handle all pages in parallel
        @asyncio.coroutine
        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))

            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem), get_pdf_background(psem),
                    asyncio.gather(
                        *[fg.pdf_image(psem) for fg in page.foreground]),
                    asyncio.gather(
                        *[get_pdf_mask(fg, psem) for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray(
                [0, 0, PdfNumber(page.width),
                 PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_colorspace = PdfDict()
            pdf_colorspace.DefaultRGB = default_rgb_colorspace
            pdf_resources.ColorSpace = pdf_colorspace
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " + "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None
                        and current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(), PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [
                        PdfNumber(t.x),
                        PdfNumber(t.y),
                        PdfNumber(t.x + t.width),
                        PdfNumber(t.y + t.height)
                    ]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page, PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y), 0
                        ]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"), 9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))

        finished_pages = 0
        yield from asyncio.gather(*[
            make_page(page, pdf_page, process_semaphore)
            for page, pdf_page in zip(self._pages, pdf_pages)
        ])

        trailer = pdf_writer.trailer

        document_id = PdfString().from_bytes(os.urandom(16))
        trailer.ID = [document_id, document_id]

        mark_info = PdfDict()
        mark_info.Marked = PdfBool(True)
        trailer.Root.MarkInfo = mark_info

        struct_tree_root = PdfDict()
        struct_tree_root.Type = PdfName.StructTreeRoot
        trailer.Root.StructTreeRoot = struct_tree_root

        metadata = PdfDict()
        metadata.indirect = True
        metadata.Type = PdfName.Metadata
        metadata.Subtype = PdfName.XML
        xmp = XMPMeta()
        xmp.set_property(XMP_NS_PDFA_ID, "part", "2")
        xmp.set_property(XMP_NS_PDFA_ID, "conformance", "A")
        metadata_stream = xmp.serialize_to_str().encode("utf-8")
        metadata.Filter = [PdfName.FlateDecode]
        metadata.stream = zlib.compress(metadata_stream, 9).decode("latin-1")
        metadata.Length1 = len(metadata_stream)
        trailer.Root.Metadata = metadata

        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            pdf_writer.write(path.join(temp_dir, "temp.pdf"))
            cmd = [
                QPDF_CMD, "--stream-data=preserve",
                "--object-streams=preserve", "--normalize-content=n",
                "--newline-before-endstream"
            ]
            if LINEARIZE_PDF:
                cmd.extend(["--linearize"])
            cmd.extend([
                path.abspath(path.join(temp_dir, "temp.pdf")),
                path.abspath(outfile)
            ])
            yield from run_command_async(cmd, process_semaphore)

Example #7

Show file

File: djpdf.py Project: 5l1v3r1/djpdf

    def _build_font():
        with open(FONT_FILENAME, "rb") as f:
            embedded_font_stream = f.read()
        embedded_font = PdfDict()
        embedded_font.indirect = True
        embedded_font.Filter = [PdfName.FlateDecode]
        embedded_font.stream = zlib.compress(embedded_font_stream,
                                             9).decode("latin-1")
        embedded_font.Length1 = len(embedded_font_stream)

        font_descriptor = PdfDict()
        font_descriptor.indirect = True
        font_descriptor.Ascent = 1000
        font_descriptor.CapHeight = 1000
        font_descriptor.Descent = -1
        font_descriptor.Flags = 5  # FixedPitch + Symbolic
        font_descriptor.FontBBox = PdfArray([0, 0, 1000, 500])
        font_descriptor.FontFile2 = embedded_font
        font_descriptor.FontName = PdfName.GlyphLessFont
        font_descriptor.ItalicAngle = 0
        font_descriptor.StemV = 80
        font_descriptor.Type = PdfName.FontDescriptor

        # Map everything to glyph 1
        cid_to_gid_map_stream = b"\0\1" * (1 << 16)
        cid_to_gid_map = PdfDict()
        cid_to_gid_map.indirect = True
        cid_to_gid_map.Filter = [PdfName.FlateDecode]
        cid_to_gid_map.stream = zlib.compress(cid_to_gid_map_stream,
                                              9).decode("latin-1")
        cid_to_gid_map.Length1 = len(cid_to_gid_map_stream)

        cid_system_info = PdfDict()
        cid_system_info.Ordering = PdfString.from_unicode("Identity")
        cid_system_info.Registry = PdfString.from_unicode("Adobe")
        cid_system_info.Supplement = 0

        cid_font = PdfDict()
        cid_font.indirect = True
        cid_font.CIDToGIDMap = cid_to_gid_map
        cid_font.BaseFont = PdfName.GlyphLessFont
        cid_font.CIDSystemInfo = cid_system_info
        cid_font.FontDescriptor = font_descriptor
        cid_font.Subtype = PdfName.CIDFontType2
        cid_font.Type = PdfName.Font
        cid_font.DW = 500

        with open(UNICODE_CMAP_FILENAME, "rb") as f:
            unicode_cmap_stream = f.read()
        unicode_cmap = PdfDict()
        unicode_cmap.indirect = True
        unicode_cmap.Filter = [PdfName.FlateDecode]
        unicode_cmap.stream = zlib.compress(unicode_cmap_stream,
                                            9).decode("latin-1")

        font = PdfDict()
        font.indirect = True
        font.BaseFont = PdfName.GlyphLessFont
        font.DescendantFonts = PdfArray([cid_font])
        font.Encoding = PdfName("Identity-H")
        font.Subtype = PdfName.Type0
        font.ToUnicode = unicode_cmap
        font.Type = PdfName.Font

        return font

Example #8

Show file

File: djpdf.py Project: 5l1v3r1/djpdf

    def _pdf_image(self, psem):
        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            # JBIG2Globals are only used in symbol mode
            # In symbol mode jbig2 writes output to files otherwise
            # it's written to stdout
            symbol_mode = self.jbig2_threshold != 1
            images_with_shared_globals = []
            if symbol_mode and SHARE_JBIG2_GLOBALS:
                # Find all Jbig2Images that share the same symbol directory
                for obj in self._factory._cache:
                    if (isinstance(obj, Jbig2Image)
                            and self.compression == obj.compression
                            and self.jbig2_threshold == obj.jbig2_threshold):
                        images_with_shared_globals.append(obj)
            else:
                # The symbol directory is not shared with other Jbig2Images
                images_with_shared_globals.append(self)
            # Promise all handled Jbig2Images the finished image
            image_futures = []
            my_image_future = None
            for image in images_with_shared_globals:
                future = asyncio.Future()
                asyncio.ensure_future(image._cache.get(future))
                image_futures.append(future)
                if image is self:
                    my_image_future = future
            # All futures are in place, the lock can be released
            self._factory._cache_lock.release()
            self._cache_lock_acquired = False

            # Prepare everything in parallel
            @asyncio.coroutine
            def get_jbig2_images(psem):
                # Convert images with ImageMagick to bitonal png in parallel
                yield from asyncio.gather(*[
                    run_command_async([
                        CONVERT_CMD, "-alpha", "remove", "-alpha", "off",
                        "-colorspace", "gray", "-threshold", "50%",
                        path.abspath(image.filename),
                        path.abspath(path.join(temp_dir, "input.%d.png" % i))
                    ], psem)
                    for i, image in enumerate(images_with_shared_globals)
                ])
                cmd = [JBIG2_CMD, "-p"]
                if symbol_mode:
                    cmd.extend(
                        ["-s", "-t",
                         format_number(self.jbig2_threshold, 4)])
                for i, _ in enumerate(images_with_shared_globals):
                    cmd.append(
                        path.abspath(path.join(temp_dir, "input.%d.png" % i)))
                jbig2_images = []
                jbig2_globals = None
                if symbol_mode:
                    yield from run_command_async(cmd, psem, cwd=temp_dir)
                    jbig2_globals = PdfDict()
                    jbig2_globals.indirect = True
                    with open(path.join(temp_dir, "output.sym"), "rb") as f:
                        jbig2_globals.stream = f.read().decode("latin-1")
                    for i, _ in enumerate(images_with_shared_globals):
                        with open(path.join(temp_dir, "output.%04d" % i),
                                  "rb") as f:
                            jbig2_images.append(f.read())
                else:
                    jbig2_images.append((yield from
                                         run_command_async(cmd,
                                                           psem,
                                                           cwd=temp_dir)))
                return jbig2_images, jbig2_globals

            @asyncio.coroutine
            def get_image_mask(image, psem):
                if image._mask is None:
                    return None
                return (yield from image._mask.pdf_image(psem))

            ((jbig2_images, jbig2_globals),
             image_masks) = yield from asyncio.gather(
                 get_jbig2_images(psem),
                 asyncio.gather(*[
                     get_image_mask(image, psem)
                     for image in images_with_shared_globals
                 ]))

            for image, jbig2_image, image_mask, image_future in zip(
                    images_with_shared_globals, jbig2_images, image_masks,
                    image_futures):
                (width, height, xres,
                 yres) = struct.unpack('>IIII', jbig2_image[11:27])
                pdf_image = PdfDict()
                pdf_image.indirect = True
                pdf_image.Type = PdfName.XObject
                pdf_image.Subtype = PdfName.Image
                pdf_image.Width = width
                pdf_image.Height = height
                if image._image_mask:
                    pdf_image.ImageMask = PdfBool(True)
                else:
                    # NOTE: DefaultGray color space is required for PDF/A
                    pdf_image.ColorSpace = PdfName.DeviceGray
                if image_mask is not None:
                    pdf_image.Mask = image_mask
                pdf_image.BitsPerComponent = 1
                pdf_image.Filter = [PdfName.JBIG2Decode]
                if symbol_mode:
                    pdf_image.DecodeParms = [{
                        PdfName.JBIG2Globals:
                        jbig2_globals
                    }]
                pdf_image.stream = jbig2_image.decode("latin-1")
                image_future.set_result(pdf_image)
        return my_image_future.result()