def _pages_from_ranges(ranges): if is_iterable_notstr(ranges): return set(ranges) pages = [] page_groups = ranges.replace(' ', '').split(',') for g in page_groups: if not g: continue try: start, end = g.split('-') except ValueError: pages.append(int(g) - 1) else: try: pages.extend(range(int(start) - 1, int(end))) except ValueError: raise BadArgsError("invalid page range") if not monotonic(pages): log.warning( "List of pages to process contains duplicate pages, or pages that are " "out of order") if any(page < 0 for page in pages): raise BadArgsError("pages refers to a page number less than 1") log.debug("OCRing only these pages: %s", pages) return set(pages)
def create_options(*, input_file: PathOrIO, output_file: PathOrIO, parser: ArgumentParser, **kwargs): cmdline = [] deferred = [] for arg, val in kwargs.items(): if val is None: continue # These arguments with special handling for which we bypass # argparse if arg in {'progress_bar', 'plugins'}: deferred.append((arg, val)) continue cmd_style_arg = arg.replace('_', '-') # Booleans are special: add only if True, omit for False if isinstance(val, bool): if val: cmdline.append(f"--{cmd_style_arg}") continue if is_iterable_notstr(val): for elem in val: cmdline.append(f"--{cmd_style_arg}") cmdline.append(elem) continue # We have a parameter cmdline.append(f"--{cmd_style_arg}") if isinstance(val, (int, float)): cmdline.append(str(val)) elif isinstance(val, str): cmdline.append(val) elif isinstance(val, Path): cmdline.append(str(val)) else: raise TypeError(f"{arg}: {val} ({type(val)})") try: cmdline.append(os.fspath(input_file)) except TypeError: cmdline.append('stream://input_file') try: cmdline.append(os.fspath(output_file)) except TypeError: cmdline.append('stream://output_file') parser._api_mode = True options = parser.parse_args(cmdline) for keyword, val in deferred: setattr(options, keyword, val) if options.input_file == 'stream://input_file': options.input_file = input_file if options.output_file == 'stream://output_file': options.output_file = output_file return options
def _pages_from_ranges(ranges: str) -> Set[int]: if is_iterable_notstr(ranges): return set(ranges) pages: List[int] = [] page_groups = ranges.replace(' ', '').split(',') for g in page_groups: if not g: continue try: start, end = g.split('-') except ValueError: pages.append(int(g) - 1) else: try: new_pages = list(range(int(start) - 1, int(end))) if not new_pages: raise BadArgsError(f"invalid page subrange '{start}-{end}'") pages.extend(new_pages) except ValueError: raise BadArgsError("invalid page range") from None if not pages: raise BadArgsError( f"The string of page ranges '{ranges}' did not contain any recognizable " f"page ranges." ) if not monotonic(pages): log.warning( "List of pages to process contains duplicate pages, or pages that are " "out of order" ) if any(page < 0 for page in pages): raise BadArgsError("pages refers to a page number less than 1") log.debug("OCRing only these pages: %s", pages) return set(pages)