Esempio n. 1
0
File: djpdf.py Progetto: Unrud/djpdf
    def _pdf_image(self, psem):
        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            # JBIG2Globals are only used in symbol mode
            # In symbol mode jbig2 writes output to files otherwise
            # it's written to stdout
            symbol_mode = self.jbig2_threshold != 1
            images_with_shared_globals = []
            if symbol_mode and SHARE_JBIG2_GLOBALS:
                # Find all Jbig2Images that share the same symbol directory
                for obj in self._factory._cache:
                    if (isinstance(obj, Jbig2Image) and
                            self.compression == obj.compression and
                            self.jbig2_threshold == obj.jbig2_threshold):
                        images_with_shared_globals.append(obj)
            else:
                # The symbol directory is not shared with other Jbig2Images
                images_with_shared_globals.append(self)
            # Promise all handled Jbig2Images the finished image
            image_futures = []
            my_image_future = None
            for image in images_with_shared_globals:
                future = asyncio.Future()
                asyncio.ensure_future(image._cache.get(future))
                image_futures.append(future)
                if image is self:
                    my_image_future = future
            # All futures are in place, the lock can be released
            self._factory._cache_lock.release()
            self._cache_lock_acquired = False

            # Prepare everything in parallel
            @asyncio.coroutine
            def get_jbig2_images(psem):
                # Convert images with ImageMagick to bitonal png in parallel
                yield from asyncio.gather(*[
                    run_command_async([
                        CONVERT_CMD,
                        "-alpha", "remove",
                        "-alpha", "off",
                        "-colorspace", "gray",
                        "-threshold", "50%",
                        path.abspath(image.filename),
                        path.abspath(path.join(temp_dir,
                                               "input.%d.png" % i))], psem)
                    for i, image in enumerate(images_with_shared_globals)])
                cmd = [JBIG2_CMD, "-p"]
                if symbol_mode:
                    cmd.extend(["-s", "-t",
                                format_number(self.jbig2_threshold, 4)])
                for i, _ in enumerate(images_with_shared_globals):
                    cmd.append(path.abspath(path.join(temp_dir,
                                                      "input.%d.png" % i)))
                jbig2_images = []
                jbig2_globals = None
                if symbol_mode:
                    yield from run_command_async(cmd, psem, cwd=temp_dir)
                    jbig2_globals = PdfDict()
                    jbig2_globals.indirect = True
                    with open(path.join(temp_dir, "output.sym"), "rb") as f:
                        jbig2_globals.stream = f.read().decode("latin-1")
                    for i, _ in enumerate(images_with_shared_globals):
                        with open(path.join(temp_dir,
                                  "output.%04d" % i), "rb") as f:
                            jbig2_images.append(f.read())
                else:
                    jbig2_images.append(
                        (yield from run_command_async(cmd, psem,
                                                      cwd=temp_dir)))
                return jbig2_images, jbig2_globals

            @asyncio.coroutine
            def get_image_mask(image, psem):
                if image._mask is None:
                    return None
                return (yield from image._mask.pdf_image(psem))
            ((jbig2_images, jbig2_globals),
             image_masks) = yield from asyncio.gather(
                get_jbig2_images(psem),
                asyncio.gather(*[get_image_mask(image, psem)
                                 for image in images_with_shared_globals]))

            for image, jbig2_image, image_mask, image_future in zip(
                    images_with_shared_globals, jbig2_images, image_masks,
                    image_futures):
                (width, height, xres, yres) = struct.unpack(
                    '>IIII', jbig2_image[11:27])
                pdf_image = PdfDict()
                pdf_image.indirect = True
                pdf_image.Type = PdfName.XObject
                pdf_image.Subtype = PdfName.Image
                pdf_image.Width = width
                pdf_image.Height = height
                if image._image_mask:
                    pdf_image.ImageMask = PdfBool(True)
                else:
                    pdf_image.ColorSpace = PdfName.DeviceGray
                if image_mask is not None:
                    pdf_image.Mask = image_mask
                pdf_image.BitsPerComponent = 1
                pdf_image.Filter = [PdfName.JBIG2Decode]
                if symbol_mode:
                    pdf_image.DecodeParms = [{
                        PdfName.JBIG2Globals: jbig2_globals}]
                pdf_image.stream = jbig2_image.decode("latin-1")
                image_future.set_result(pdf_image)
        return my_image_future.result()
Esempio n. 2
0
    def _pdf_image(self, psem):
        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            # JBIG2Globals are only used in symbol mode
            # In symbol mode jbig2 writes output to files otherwise
            # it's written to stdout
            symbol_mode = self.jbig2_threshold != 1
            images_with_shared_globals = []
            if symbol_mode and SHARE_JBIG2_GLOBALS:
                # Find all Jbig2Images that share the same symbol directory
                for obj in self._factory._cache:
                    if (isinstance(obj, Jbig2Image)
                            and self.compression == obj.compression
                            and self.jbig2_threshold == obj.jbig2_threshold):
                        images_with_shared_globals.append(obj)
            else:
                # The symbol directory is not shared with other Jbig2Images
                images_with_shared_globals.append(self)
            # Promise all handled Jbig2Images the finished image
            image_futures = []
            my_image_future = None
            for image in images_with_shared_globals:
                future = asyncio.Future()
                asyncio.ensure_future(image._cache.get(future))
                image_futures.append(future)
                if image is self:
                    my_image_future = future
            # All futures are in place, the lock can be released
            self._factory._cache_lock.release()
            self._cache_lock_acquired = False

            # Prepare everything in parallel
            @asyncio.coroutine
            def get_jbig2_images(psem):
                # Convert images with ImageMagick to bitonal png in parallel
                yield from asyncio.gather(*[
                    run_command_async([
                        CONVERT_CMD, "-alpha", "remove", "-alpha", "off",
                        "-colorspace", "gray", "-threshold", "50%",
                        path.abspath(image.filename),
                        path.abspath(path.join(temp_dir, "input.%d.png" % i))
                    ], psem)
                    for i, image in enumerate(images_with_shared_globals)
                ])
                cmd = [JBIG2_CMD, "-p"]
                if symbol_mode:
                    cmd.extend(
                        ["-s", "-t",
                         format_number(self.jbig2_threshold, 4)])
                for i, _ in enumerate(images_with_shared_globals):
                    cmd.append(
                        path.abspath(path.join(temp_dir, "input.%d.png" % i)))
                jbig2_images = []
                jbig2_globals = None
                if symbol_mode:
                    yield from run_command_async(cmd, psem, cwd=temp_dir)
                    jbig2_globals = PdfDict()
                    jbig2_globals.indirect = True
                    with open(path.join(temp_dir, "output.sym"), "rb") as f:
                        jbig2_globals.stream = f.read().decode("latin-1")
                    for i, _ in enumerate(images_with_shared_globals):
                        with open(path.join(temp_dir, "output.%04d" % i),
                                  "rb") as f:
                            jbig2_images.append(f.read())
                else:
                    jbig2_images.append((yield from
                                         run_command_async(cmd,
                                                           psem,
                                                           cwd=temp_dir)))
                return jbig2_images, jbig2_globals

            @asyncio.coroutine
            def get_image_mask(image, psem):
                if image._mask is None:
                    return None
                return (yield from image._mask.pdf_image(psem))

            ((jbig2_images, jbig2_globals),
             image_masks) = yield from asyncio.gather(
                 get_jbig2_images(psem),
                 asyncio.gather(*[
                     get_image_mask(image, psem)
                     for image in images_with_shared_globals
                 ]))

            for image, jbig2_image, image_mask, image_future in zip(
                    images_with_shared_globals, jbig2_images, image_masks,
                    image_futures):
                (width, height, xres,
                 yres) = struct.unpack('>IIII', jbig2_image[11:27])
                pdf_image = PdfDict()
                pdf_image.indirect = True
                pdf_image.Type = PdfName.XObject
                pdf_image.Subtype = PdfName.Image
                pdf_image.Width = width
                pdf_image.Height = height
                if image._image_mask:
                    pdf_image.ImageMask = PdfBool(True)
                else:
                    # NOTE: DefaultGray color space is required for PDF/A
                    pdf_image.ColorSpace = PdfName.DeviceGray
                if image_mask is not None:
                    pdf_image.Mask = image_mask
                pdf_image.BitsPerComponent = 1
                pdf_image.Filter = [PdfName.JBIG2Decode]
                if symbol_mode:
                    pdf_image.DecodeParms = [{
                        PdfName.JBIG2Globals:
                        jbig2_globals
                    }]
                pdf_image.stream = jbig2_image.decode("latin-1")
                image_future.set_result(pdf_image)
        return my_image_future.result()