async def _filename(self, psem): if (self._page["fg_enabled"] and self._page["fg_colors"] or self._page["bg_resize"] != 1): fname = path.join(self._temp_dir, "image.png") cmd = [CONVERT_CMD, "-fill", _color_to_hex(self._page["bg_color"])] if self._page["fg_enabled"]: for color in self._page["fg_colors"]: cmd.extend(["-opaque", _color_to_hex(color)]) cmd.extend([ "-resize", format_number(self._page["bg_resize"], 2, percentage=True), path.abspath(await self._input_image.filename(psem)), path.abspath(fname) ]) await run_command(cmd, psem) else: fname = await self._input_image.filename(psem) if await self._is_plain_color_file(fname, self._page["bg_color"], psem): return None return fname
async def get_jbig2_images(psem): # Convert images with ImageMagick to bitonal png in parallel await asyncio.gather(*[ run_command([ CONVERT_CMD, "-alpha", "remove", "-alpha", "off", "-colorspace", "gray", "-threshold", "50%", path.abspath(image.filename), path.abspath(path.join(temp_dir, "input.%d.png" % i)) ], psem) for i, image in enumerate(images_with_shared_globals) ]) cmd = [JBIG2_CMD, "-p"] if symbol_mode: cmd.extend( ["-s", "-t", format_number(self.jbig2_threshold, 4)]) for i, _ in enumerate(images_with_shared_globals): cmd.append( path.abspath(path.join(temp_dir, "input.%d.png" % i))) jbig2_images = [] jbig2_globals = None if symbol_mode: await run_command(cmd, psem, cwd=temp_dir) jbig2_globals = PdfDict() jbig2_globals.indirect = True with open(path.join(temp_dir, "output.sym"), "rb") as f: jbig2_globals.stream = f.read().decode("latin-1") for i, _ in enumerate(images_with_shared_globals): with open(path.join(temp_dir, "output.%04d" % i), "rb") as f: jbig2_images.append(f.read()) else: jbig2_images.append(await run_command(cmd, psem, cwd=temp_dir)) return jbig2_images, jbig2_globals
def format_number_percentage(d): return format_number(d, 2, percentage=True)
def main(): cli_setup() def rgb_to_name_or_hex(rgb): try: return webcolors.rgb_to_name(rgb) except ValueError: pass return webcolors.rgb_to_hex(rgb) def bool_to_name(b): if b: return "yes" else: return "no" def format_number_percentage(d): return format_number(d, 2, percentage=True) df = copy.deepcopy(DEFAULT_SETTINGS) # Autodetect features ocr_languages = find_ocr_languages() if not ocr_languages: df["ocr_enabled"] = False if test_command_exists([TESSERACT_CMD]): logging.warning("'%s' is missing language files" % TESSERACT_CMD) elif df["ocr_language"] not in ocr_languages: df["ocr_language"] = ocr_languages[0] if not test_command_exists([JBIG2_CMD]): df["fg_compression"] = "fax" test_command_exists([QPDF_CMD], fatal=True) test_command_exists([CONVERT_CMD], fatal=True) test_command_exists([IDENTIFY_CMD], fatal=True) parser = ArgumentParser( description="Options are valid for all following images.", usage="%(prog)s [options] INFILE [[options] INFILE ...] OUTFILE") parser.add_argument("--version", action="version", version="%%(prog)s %s" % VERSION, help="show version info and exit") parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") parser.add_argument( "--dpi", type=type_dpi, help="specify the dpi of the input image. If 'auto' is given the " "value gets read from the input file " "(default: %s)" % (df["dpi"] if isinstance(df["dpi"], str) else format_number(df["dpi"], 2))) parser.add_argument( "--bg-color", type=type_color, action="store", metavar="COLOR", help="sets the background color of the page. Colors can be either " "specified by name (e.g. white) or as a hash mark '#' followed " "by three pairs of hexadecimal digits, specifying values for " "red, green and blue components (e.g. #ffffff) " "(default: %s)" % rgb_to_name_or_hex(df["bg_color"])) parser.add_argument( "--bg", type=type_bool, action="store", metavar="BOOLEAN", help="sets if a low quality background image gets included, " "containing all the colors that are not in the foreground " "layer " "(default: %s)" % bool_to_name(df["bg_enabled"])) parser.add_argument( "--bg-resize", type=type_fraction, action="store", metavar="FRACTION", help=("sets the percentage by which the background image gets " "resized. A value of 100%%%% means that the resolution is not " "changed. " "(default: %s)" % format_number_percentage(df["bg_resize"]).replace("%", "%%"))) parser.add_argument( "--bg-compression", choices=["deflate", "jp2", "jpeg"], help=("specify the compression algorithm to use for the background " "layer. 'deflate' is lossless. 'jp2' and 'jpeg' are lossy " "depending on the quality setting. " "(default: %s)" % df["bg_compression"])) parser.add_argument( "--bg-quality", metavar="INTEGER", type=type_quality, help="for 'jp2' and 'jpeg' compression, quality is 1 (lowest image " "quality and highest compression) to 100 (best quality but " "least effective compression) " "(default: %d)" % df["bg_quality"]) parser.add_argument( "--fg", type=type_bool, action="store", metavar="BOOLEAN", help="sets if a high quality foreground layer gets included, " "containing only a limited set of colors " "(default: %s)" % bool_to_name(df["fg_enabled"])) parser.add_argument( "--fg-colors", type=type_colors, action="store", metavar="COLORS", help="specify the colors to separate into the foreground layer. " "Colors can be specified as described at '--bg-color'. " "Multiple colors must be comma-separated. " "(default: %s)" % ",".join(map(lambda c: rgb_to_name_or_hex(c), df["fg_colors"]))) parser.add_argument( "--fg-compression", choices=["fax", "jbig2"], help="specify the compression algorithm to use for the bitonal " "foreground layer. 'fax' is lossless. 'jbig2' is " "lossy depending on the threshold setting. " "(default: %s)" % df["fg_compression"]) parser.add_argument( "--fg-jbig2-threshold", type=type_jbig2_threshold, action="store", metavar="FRACTION", help=("sets the fraction of pixels which have to match in order for " "two symbols to be classed the same. This isn't strictly true, " "as there are other tests as well, but increasing this will " "generally increase the number of symbol classes. A value of " "100%%%% means lossless compression. " "(default: %s)" % format_number_percentage( df["fg_jbig2_threshold"]).replace("%", "%%"))) parser.add_argument("--ocr", type=type_bool, action="store", metavar="BOOLEAN", help="optical character recognition with tesseract " "(default: %s)" % bool_to_name(df["ocr_enabled"])) parser.add_argument("--ocr-lang", action="store", metavar="LANG", help="specify language used for OCR. " "Multiple languages may be specified, separated " "by plus characters. " "(default: %s)" % df["ocr_language"]) parser.add_argument("--ocr-list-langs", action="store_true", help="list available languages for OCR ") parser.add_argument( "--ocr-colors", type=type_ocr_colors, action="store", metavar="COLORS", help="specify the colors for ocr. 'all' specifies all colors " "(default: %s)" % (df["ocr_colors"] if isinstance(df["ocr_colors"], str) else ",".join( map(lambda c: rgb_to_name_or_hex(c), df["ocr_colors"])))) global_args = ("--vers", "-h", "--h", "-v", "--verb", "--ocr-li") global_argv = list( filter(lambda arg: any([arg.startswith(s) for s in global_args]), sys.argv[1:])) remaining_argv = list( filter(lambda arg: not any([arg.startswith(s) for s in global_args]), sys.argv[1:])) # handle global arguments ns = parser.parse_args(global_argv) cli_set_verbosity(ns.verbose) if ns.ocr_list_langs: print("\n".join(ocr_languages)) sys.exit(0) infile_parser = ArgumentParser(usage=parser.usage, prog=parser.prog, parents=(parser, ), add_help=False) infile_parser.add_argument("INFILE", type=type_infile) outfile_parser = ArgumentParser(usage=parser.usage, prog=parser.prog, parents=(parser, ), add_help=False) outfile_parser.add_argument("OUTFILE", type=type_outfile) def is_arg(s): if re.fullmatch(r"-\d+", s): return False return s.startswith("-") def expects_arg(s): # all non-global arguments expect one argument return is_arg(s) and s.startswith("--") pages = [] while True: current_argv = [] while (not current_argv or (current_argv and is_arg(current_argv[-1])) or (len(current_argv) >= 2 and expects_arg(current_argv[-2]))): if not remaining_argv: parser.error("the following arguments are required: " "INFILE, OUTFILE") current_argv.append(remaining_argv[0]) del remaining_argv[0] ns = infile_parser.parse_args(current_argv) update_page_from_namespace(df, ns) pages.append(df.copy()) if (not remaining_argv or len(remaining_argv) == 1 and not is_arg(remaining_argv[0])): break ns = outfile_parser.parse_args(remaining_argv) out_file = ns.OUTFILE try: compat_asyncio_run(build_pdf(pages, out_file)) except Exception: logging.debug("Exception occurred:\n%s" % traceback.format_exc()) logging.fatal("Operation failed") sys.exit(1)
def _pdf_format_number(f, decimal_places=PDF_DECIMAL_PLACES): return format_number(f, decimal_places, trim_leading_zero=True)