Example #1
0
 async def _filename(self, psem):
     if (self._page["fg_enabled"] and self._page["fg_colors"]
             or self._page["bg_resize"] != 1):
         fname = path.join(self._temp_dir, "image.png")
         cmd = [CONVERT_CMD, "-fill", _color_to_hex(self._page["bg_color"])]
         if self._page["fg_enabled"]:
             for color in self._page["fg_colors"]:
                 cmd.extend(["-opaque", _color_to_hex(color)])
         cmd.extend([
             "-resize",
             format_number(self._page["bg_resize"], 2, percentage=True),
             path.abspath(await self._input_image.filename(psem)),
             path.abspath(fname)
         ])
         await run_command(cmd, psem)
     else:
         fname = await self._input_image.filename(psem)
     if await self._is_plain_color_file(fname, self._page["bg_color"],
                                        psem):
         return None
     return fname
Example #2
0
File: djpdf.py Project: Unrud/djpdf
 async def get_jbig2_images(psem):
     # Convert images with ImageMagick to bitonal png in parallel
     await asyncio.gather(*[
         run_command([
             CONVERT_CMD, "-alpha", "remove", "-alpha", "off",
             "-colorspace", "gray", "-threshold", "50%",
             path.abspath(image.filename),
             path.abspath(path.join(temp_dir, "input.%d.png" % i))
         ], psem)
         for i, image in enumerate(images_with_shared_globals)
     ])
     cmd = [JBIG2_CMD, "-p"]
     if symbol_mode:
         cmd.extend(
             ["-s", "-t",
              format_number(self.jbig2_threshold, 4)])
     for i, _ in enumerate(images_with_shared_globals):
         cmd.append(
             path.abspath(path.join(temp_dir, "input.%d.png" % i)))
     jbig2_images = []
     jbig2_globals = None
     if symbol_mode:
         await run_command(cmd, psem, cwd=temp_dir)
         jbig2_globals = PdfDict()
         jbig2_globals.indirect = True
         with open(path.join(temp_dir, "output.sym"), "rb") as f:
             jbig2_globals.stream = f.read().decode("latin-1")
         for i, _ in enumerate(images_with_shared_globals):
             with open(path.join(temp_dir, "output.%04d" % i),
                       "rb") as f:
                 jbig2_images.append(f.read())
     else:
         jbig2_images.append(await run_command(cmd,
                                               psem,
                                               cwd=temp_dir))
     return jbig2_images, jbig2_globals
Example #3
0
 def format_number_percentage(d):
     return format_number(d, 2, percentage=True)
Example #4
0
def main():
    cli_setup()

    def rgb_to_name_or_hex(rgb):
        try:
            return webcolors.rgb_to_name(rgb)
        except ValueError:
            pass
        return webcolors.rgb_to_hex(rgb)

    def bool_to_name(b):
        if b:
            return "yes"
        else:
            return "no"

    def format_number_percentage(d):
        return format_number(d, 2, percentage=True)

    df = copy.deepcopy(DEFAULT_SETTINGS)
    # Autodetect features
    ocr_languages = find_ocr_languages()
    if not ocr_languages:
        df["ocr_enabled"] = False
        if test_command_exists([TESSERACT_CMD]):
            logging.warning("'%s' is missing language files" % TESSERACT_CMD)
    elif df["ocr_language"] not in ocr_languages:
        df["ocr_language"] = ocr_languages[0]
    if not test_command_exists([JBIG2_CMD]):
        df["fg_compression"] = "fax"
    test_command_exists([QPDF_CMD], fatal=True)
    test_command_exists([CONVERT_CMD], fatal=True)
    test_command_exists([IDENTIFY_CMD], fatal=True)

    parser = ArgumentParser(
        description="Options are valid for all following images.",
        usage="%(prog)s [options] INFILE [[options] INFILE ...] OUTFILE")

    parser.add_argument("--version",
                        action="version",
                        version="%%(prog)s %s" % VERSION,
                        help="show version info and exit")

    parser.add_argument("-v",
                        "--verbose",
                        help="increase output verbosity",
                        action="store_true")

    parser.add_argument(
        "--dpi",
        type=type_dpi,
        help="specify the dpi of the input image. If 'auto' is given the "
        "value gets read from the input file "
        "(default: %s)" % (df["dpi"] if isinstance(df["dpi"], str) else
                           format_number(df["dpi"], 2)))

    parser.add_argument(
        "--bg-color",
        type=type_color,
        action="store",
        metavar="COLOR",
        help="sets the background color of the page. Colors can be either "
        "specified by name (e.g. white) or as a hash mark '#' followed "
        "by three pairs of hexadecimal digits, specifying values for "
        "red, green and blue components (e.g. #ffffff) "
        "(default: %s)" % rgb_to_name_or_hex(df["bg_color"]))
    parser.add_argument(
        "--bg",
        type=type_bool,
        action="store",
        metavar="BOOLEAN",
        help="sets if a low quality background image gets included, "
        "containing all the colors that are not in the foreground "
        "layer "
        "(default: %s)" % bool_to_name(df["bg_enabled"]))
    parser.add_argument(
        "--bg-resize",
        type=type_fraction,
        action="store",
        metavar="FRACTION",
        help=("sets the percentage by which the background image gets "
              "resized. A value of 100%%%% means that the resolution is not "
              "changed. "
              "(default: %s)" %
              format_number_percentage(df["bg_resize"]).replace("%", "%%")))
    parser.add_argument(
        "--bg-compression",
        choices=["deflate", "jp2", "jpeg"],
        help=("specify the compression algorithm to use for the background "
              "layer. 'deflate' is lossless. 'jp2' and 'jpeg' are lossy "
              "depending on the quality setting. "
              "(default: %s)" % df["bg_compression"]))
    parser.add_argument(
        "--bg-quality",
        metavar="INTEGER",
        type=type_quality,
        help="for 'jp2' and 'jpeg' compression, quality is 1 (lowest image "
        "quality and highest compression) to 100 (best quality but "
        "least effective compression) "
        "(default: %d)" % df["bg_quality"])

    parser.add_argument(
        "--fg",
        type=type_bool,
        action="store",
        metavar="BOOLEAN",
        help="sets if a high quality foreground layer gets included, "
        "containing only a limited set of colors "
        "(default: %s)" % bool_to_name(df["fg_enabled"]))
    parser.add_argument(
        "--fg-colors",
        type=type_colors,
        action="store",
        metavar="COLORS",
        help="specify the colors to separate into the foreground layer. "
        "Colors can be specified as described at '--bg-color'. "
        "Multiple colors must be comma-separated. "
        "(default: %s)" %
        ",".join(map(lambda c: rgb_to_name_or_hex(c), df["fg_colors"])))
    parser.add_argument(
        "--fg-compression",
        choices=["fax", "jbig2"],
        help="specify the compression algorithm to use for the bitonal "
        "foreground layer. 'fax' is lossless. 'jbig2' is "
        "lossy depending on the threshold setting. "
        "(default: %s)" % df["fg_compression"])
    parser.add_argument(
        "--fg-jbig2-threshold",
        type=type_jbig2_threshold,
        action="store",
        metavar="FRACTION",
        help=("sets the fraction of pixels which have to match in order for "
              "two symbols to be classed the same. This isn't strictly true, "
              "as there are other tests as well, but increasing this will "
              "generally increase the number of symbol classes. A value of "
              "100%%%% means lossless compression. "
              "(default: %s)" % format_number_percentage(
                  df["fg_jbig2_threshold"]).replace("%", "%%")))

    parser.add_argument("--ocr",
                        type=type_bool,
                        action="store",
                        metavar="BOOLEAN",
                        help="optical character recognition with tesseract "
                        "(default: %s)" % bool_to_name(df["ocr_enabled"]))
    parser.add_argument("--ocr-lang",
                        action="store",
                        metavar="LANG",
                        help="specify language used for OCR. "
                        "Multiple languages may be specified, separated "
                        "by plus characters. "
                        "(default: %s)" % df["ocr_language"])
    parser.add_argument("--ocr-list-langs",
                        action="store_true",
                        help="list available languages for OCR ")
    parser.add_argument(
        "--ocr-colors",
        type=type_ocr_colors,
        action="store",
        metavar="COLORS",
        help="specify the colors for ocr. 'all' specifies all colors "
        "(default: %s)" %
        (df["ocr_colors"] if isinstance(df["ocr_colors"], str) else ",".join(
            map(lambda c: rgb_to_name_or_hex(c), df["ocr_colors"]))))

    global_args = ("--vers", "-h", "--h", "-v", "--verb", "--ocr-li")
    global_argv = list(
        filter(lambda arg: any([arg.startswith(s) for s in global_args]),
               sys.argv[1:]))
    remaining_argv = list(
        filter(lambda arg: not any([arg.startswith(s) for s in global_args]),
               sys.argv[1:]))

    # handle global arguments
    ns = parser.parse_args(global_argv)
    cli_set_verbosity(ns.verbose)

    if ns.ocr_list_langs:
        print("\n".join(ocr_languages))
        sys.exit(0)

    infile_parser = ArgumentParser(usage=parser.usage,
                                   prog=parser.prog,
                                   parents=(parser, ),
                                   add_help=False)
    infile_parser.add_argument("INFILE", type=type_infile)
    outfile_parser = ArgumentParser(usage=parser.usage,
                                    prog=parser.prog,
                                    parents=(parser, ),
                                    add_help=False)
    outfile_parser.add_argument("OUTFILE", type=type_outfile)

    def is_arg(s):
        if re.fullmatch(r"-\d+", s):
            return False
        return s.startswith("-")

    def expects_arg(s):
        # all non-global arguments expect one argument
        return is_arg(s) and s.startswith("--")

    pages = []
    while True:
        current_argv = []
        while (not current_argv or (current_argv and is_arg(current_argv[-1]))
               or (len(current_argv) >= 2 and expects_arg(current_argv[-2]))):
            if not remaining_argv:
                parser.error("the following arguments are required: "
                             "INFILE, OUTFILE")
            current_argv.append(remaining_argv[0])
            del remaining_argv[0]
        ns = infile_parser.parse_args(current_argv)
        update_page_from_namespace(df, ns)
        pages.append(df.copy())
        if (not remaining_argv
                or len(remaining_argv) == 1 and not is_arg(remaining_argv[0])):
            break
    ns = outfile_parser.parse_args(remaining_argv)
    out_file = ns.OUTFILE

    try:
        compat_asyncio_run(build_pdf(pages, out_file))
    except Exception:
        logging.debug("Exception occurred:\n%s" % traceback.format_exc())
        logging.fatal("Operation failed")
        sys.exit(1)
Example #5
0
File: djpdf.py Project: Unrud/djpdf
def _pdf_format_number(f, decimal_places=PDF_DECIMAL_PLACES):
    return format_number(f, decimal_places, trim_leading_zero=True)