Example #1
0
        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))

            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem), get_pdf_background(psem),
                    asyncio.gather(
                        *[fg.pdf_image(psem) for fg in page.foreground]),
                    asyncio.gather(
                        *[get_pdf_mask(fg, psem) for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray(
                [0, 0, PdfNumber(page.width),
                 PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_colorspace = PdfDict()
            pdf_colorspace.DefaultRGB = default_rgb_colorspace
            pdf_resources.ColorSpace = pdf_colorspace
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " + "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None
                        and current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(), PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [
                        PdfNumber(t.x),
                        PdfNumber(t.y),
                        PdfNumber(t.x + t.width),
                        PdfNumber(t.y + t.height)
                    ]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page, PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y), 0
                        ]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"), 9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))
Example #2
0
File: djpdf.py Project: Unrud/djpdf
    def _pdf_image(self, psem):
        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            # JBIG2Globals are only used in symbol mode
            # In symbol mode jbig2 writes output to files otherwise
            # it's written to stdout
            symbol_mode = self.jbig2_threshold != 1
            images_with_shared_globals = []
            if symbol_mode and SHARE_JBIG2_GLOBALS:
                # Find all Jbig2Images that share the same symbol directory
                for obj in self._factory._cache:
                    if (isinstance(obj, Jbig2Image) and
                            self.compression == obj.compression and
                            self.jbig2_threshold == obj.jbig2_threshold):
                        images_with_shared_globals.append(obj)
            else:
                # The symbol directory is not shared with other Jbig2Images
                images_with_shared_globals.append(self)
            # Promise all handled Jbig2Images the finished image
            image_futures = []
            my_image_future = None
            for image in images_with_shared_globals:
                future = asyncio.Future()
                asyncio.ensure_future(image._cache.get(future))
                image_futures.append(future)
                if image is self:
                    my_image_future = future
            # All futures are in place, the lock can be released
            self._factory._cache_lock.release()
            self._cache_lock_acquired = False

            # Prepare everything in parallel
            @asyncio.coroutine
            def get_jbig2_images(psem):
                # Convert images with ImageMagick to bitonal png in parallel
                yield from asyncio.gather(*[
                    run_command_async([
                        CONVERT_CMD,
                        "-alpha", "remove",
                        "-alpha", "off",
                        "-colorspace", "gray",
                        "-threshold", "50%",
                        path.abspath(image.filename),
                        path.abspath(path.join(temp_dir,
                                               "input.%d.png" % i))], psem)
                    for i, image in enumerate(images_with_shared_globals)])
                cmd = [JBIG2_CMD, "-p"]
                if symbol_mode:
                    cmd.extend(["-s", "-t",
                                format_number(self.jbig2_threshold, 4)])
                for i, _ in enumerate(images_with_shared_globals):
                    cmd.append(path.abspath(path.join(temp_dir,
                                                      "input.%d.png" % i)))
                jbig2_images = []
                jbig2_globals = None
                if symbol_mode:
                    yield from run_command_async(cmd, psem, cwd=temp_dir)
                    jbig2_globals = PdfDict()
                    jbig2_globals.indirect = True
                    with open(path.join(temp_dir, "output.sym"), "rb") as f:
                        jbig2_globals.stream = f.read().decode("latin-1")
                    for i, _ in enumerate(images_with_shared_globals):
                        with open(path.join(temp_dir,
                                  "output.%04d" % i), "rb") as f:
                            jbig2_images.append(f.read())
                else:
                    jbig2_images.append(
                        (yield from run_command_async(cmd, psem,
                                                      cwd=temp_dir)))
                return jbig2_images, jbig2_globals

            @asyncio.coroutine
            def get_image_mask(image, psem):
                if image._mask is None:
                    return None
                return (yield from image._mask.pdf_image(psem))
            ((jbig2_images, jbig2_globals),
             image_masks) = yield from asyncio.gather(
                get_jbig2_images(psem),
                asyncio.gather(*[get_image_mask(image, psem)
                                 for image in images_with_shared_globals]))

            for image, jbig2_image, image_mask, image_future in zip(
                    images_with_shared_globals, jbig2_images, image_masks,
                    image_futures):
                (width, height, xres, yres) = struct.unpack(
                    '>IIII', jbig2_image[11:27])
                pdf_image = PdfDict()
                pdf_image.indirect = True
                pdf_image.Type = PdfName.XObject
                pdf_image.Subtype = PdfName.Image
                pdf_image.Width = width
                pdf_image.Height = height
                if image._image_mask:
                    pdf_image.ImageMask = PdfBool(True)
                else:
                    pdf_image.ColorSpace = PdfName.DeviceGray
                if image_mask is not None:
                    pdf_image.Mask = image_mask
                pdf_image.BitsPerComponent = 1
                pdf_image.Filter = [PdfName.JBIG2Decode]
                if symbol_mode:
                    pdf_image.DecodeParms = [{
                        PdfName.JBIG2Globals: jbig2_globals}]
                pdf_image.stream = jbig2_image.decode("latin-1")
                image_future.set_result(pdf_image)
        return my_image_future.result()
Example #3
0
    def _pdf_image(self, psem):
        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            # JBIG2Globals are only used in symbol mode
            # In symbol mode jbig2 writes output to files otherwise
            # it's written to stdout
            symbol_mode = self.jbig2_threshold != 1
            images_with_shared_globals = []
            if symbol_mode and SHARE_JBIG2_GLOBALS:
                # Find all Jbig2Images that share the same symbol directory
                for obj in self._factory._cache:
                    if (isinstance(obj, Jbig2Image)
                            and self.compression == obj.compression
                            and self.jbig2_threshold == obj.jbig2_threshold):
                        images_with_shared_globals.append(obj)
            else:
                # The symbol directory is not shared with other Jbig2Images
                images_with_shared_globals.append(self)
            # Promise all handled Jbig2Images the finished image
            image_futures = []
            my_image_future = None
            for image in images_with_shared_globals:
                future = asyncio.Future()
                asyncio.ensure_future(image._cache.get(future))
                image_futures.append(future)
                if image is self:
                    my_image_future = future
            # All futures are in place, the lock can be released
            self._factory._cache_lock.release()
            self._cache_lock_acquired = False

            # Prepare everything in parallel
            @asyncio.coroutine
            def get_jbig2_images(psem):
                # Convert images with ImageMagick to bitonal png in parallel
                yield from asyncio.gather(*[
                    run_command_async([
                        CONVERT_CMD, "-alpha", "remove", "-alpha", "off",
                        "-colorspace", "gray", "-threshold", "50%",
                        path.abspath(image.filename),
                        path.abspath(path.join(temp_dir, "input.%d.png" % i))
                    ], psem)
                    for i, image in enumerate(images_with_shared_globals)
                ])
                cmd = [JBIG2_CMD, "-p"]
                if symbol_mode:
                    cmd.extend(
                        ["-s", "-t",
                         format_number(self.jbig2_threshold, 4)])
                for i, _ in enumerate(images_with_shared_globals):
                    cmd.append(
                        path.abspath(path.join(temp_dir, "input.%d.png" % i)))
                jbig2_images = []
                jbig2_globals = None
                if symbol_mode:
                    yield from run_command_async(cmd, psem, cwd=temp_dir)
                    jbig2_globals = PdfDict()
                    jbig2_globals.indirect = True
                    with open(path.join(temp_dir, "output.sym"), "rb") as f:
                        jbig2_globals.stream = f.read().decode("latin-1")
                    for i, _ in enumerate(images_with_shared_globals):
                        with open(path.join(temp_dir, "output.%04d" % i),
                                  "rb") as f:
                            jbig2_images.append(f.read())
                else:
                    jbig2_images.append((yield from
                                         run_command_async(cmd,
                                                           psem,
                                                           cwd=temp_dir)))
                return jbig2_images, jbig2_globals

            @asyncio.coroutine
            def get_image_mask(image, psem):
                if image._mask is None:
                    return None
                return (yield from image._mask.pdf_image(psem))

            ((jbig2_images, jbig2_globals),
             image_masks) = yield from asyncio.gather(
                 get_jbig2_images(psem),
                 asyncio.gather(*[
                     get_image_mask(image, psem)
                     for image in images_with_shared_globals
                 ]))

            for image, jbig2_image, image_mask, image_future in zip(
                    images_with_shared_globals, jbig2_images, image_masks,
                    image_futures):
                (width, height, xres,
                 yres) = struct.unpack('>IIII', jbig2_image[11:27])
                pdf_image = PdfDict()
                pdf_image.indirect = True
                pdf_image.Type = PdfName.XObject
                pdf_image.Subtype = PdfName.Image
                pdf_image.Width = width
                pdf_image.Height = height
                if image._image_mask:
                    pdf_image.ImageMask = PdfBool(True)
                else:
                    # NOTE: DefaultGray color space is required for PDF/A
                    pdf_image.ColorSpace = PdfName.DeviceGray
                if image_mask is not None:
                    pdf_image.Mask = image_mask
                pdf_image.BitsPerComponent = 1
                pdf_image.Filter = [PdfName.JBIG2Decode]
                if symbol_mode:
                    pdf_image.DecodeParms = [{
                        PdfName.JBIG2Globals:
                        jbig2_globals
                    }]
                pdf_image.stream = jbig2_image.decode("latin-1")
                image_future.set_result(pdf_image)
        return my_image_future.result()