コード例 #1
0
def _pages_from_ranges(ranges):
    if is_iterable_notstr(ranges):
        return set(ranges)
    pages = []
    page_groups = ranges.replace(' ', '').split(',')
    for g in page_groups:
        if not g:
            continue
        try:
            start, end = g.split('-')
        except ValueError:
            pages.append(int(g) - 1)
        else:
            try:
                pages.extend(range(int(start) - 1, int(end)))
            except ValueError:
                raise BadArgsError("invalid page range")

    if not monotonic(pages):
        log.warning(
            "List of pages to process contains duplicate pages, or pages that are "
            "out of order")
    if any(page < 0 for page in pages):
        raise BadArgsError("pages refers to a page number less than 1")

    log.debug("OCRing only these pages: %s", pages)
    return set(pages)
コード例 #2
0
ファイル: api.py プロジェクト: dennischancs/ocrmypdf
def create_options(*, input_file: PathOrIO, output_file: PathOrIO,
                   parser: ArgumentParser, **kwargs):
    cmdline = []
    deferred = []

    for arg, val in kwargs.items():
        if val is None:
            continue

        # These arguments with special handling for which we bypass
        # argparse
        if arg in {'progress_bar', 'plugins'}:
            deferred.append((arg, val))
            continue

        cmd_style_arg = arg.replace('_', '-')

        # Booleans are special: add only if True, omit for False
        if isinstance(val, bool):
            if val:
                cmdline.append(f"--{cmd_style_arg}")
            continue

        if is_iterable_notstr(val):
            for elem in val:
                cmdline.append(f"--{cmd_style_arg}")
                cmdline.append(elem)
            continue

        # We have a parameter
        cmdline.append(f"--{cmd_style_arg}")
        if isinstance(val, (int, float)):
            cmdline.append(str(val))
        elif isinstance(val, str):
            cmdline.append(val)
        elif isinstance(val, Path):
            cmdline.append(str(val))
        else:
            raise TypeError(f"{arg}: {val} ({type(val)})")

    try:
        cmdline.append(os.fspath(input_file))
    except TypeError:
        cmdline.append('stream://input_file')
    try:
        cmdline.append(os.fspath(output_file))
    except TypeError:
        cmdline.append('stream://output_file')

    parser._api_mode = True
    options = parser.parse_args(cmdline)
    for keyword, val in deferred:
        setattr(options, keyword, val)

    if options.input_file == 'stream://input_file':
        options.input_file = input_file
    if options.output_file == 'stream://output_file':
        options.output_file = output_file

    return options
コード例 #3
0
def _pages_from_ranges(ranges: str) -> Set[int]:
    if is_iterable_notstr(ranges):
        return set(ranges)
    pages: List[int] = []
    page_groups = ranges.replace(' ', '').split(',')
    for g in page_groups:
        if not g:
            continue
        try:
            start, end = g.split('-')
        except ValueError:
            pages.append(int(g) - 1)
        else:
            try:
                new_pages = list(range(int(start) - 1, int(end)))
                if not new_pages:
                    raise BadArgsError(f"invalid page subrange '{start}-{end}'")
                pages.extend(new_pages)
            except ValueError:
                raise BadArgsError("invalid page range") from None

    if not pages:
        raise BadArgsError(
            f"The string of page ranges '{ranges}' did not contain any recognizable "
            f"page ranges."
        )

    if not monotonic(pages):
        log.warning(
            "List of pages to process contains duplicate pages, or pages that are "
            "out of order"
        )
    if any(page < 0 for page in pages):
        raise BadArgsError("pages refers to a page number less than 1")

    log.debug("OCRing only these pages: %s", pages)
    return set(pages)