def save_toc(final_toc): with open_pdf(pdf_path) as doc: write_toc(doc, final_toc) doc.save(toc_pdf_path) with open(final_toc_path, "w") as f: f.write(dump_toc(final_toc))
def find_title_pages(): recipe = toml.loads(recipe_str) with open_pdf(pdf_path) as doc: toc = extract_toc(doc, Recipe(recipe)) print("load from pdf, length", len(toc)) return toc
import os import io from mamba import description, it, before from fitzutils import (open_pdf, ToCEntry, dump_toc) from pdftocio.tocparser import parse_toc dirpath = os.path.dirname(os.path.abspath(__file__)) valid_file = os.path.join(dirpath, "files/level2.pdf") invalid_file = os.path.join(dirpath, "files/nothing.pdf") with description("open_pdf:") as self: with it("opens pdf file for reading"): with open_pdf(valid_file, False) as doc: assert doc is not None assert doc.pageCount == 6 with it("returns None if pdf file is invalid"): with open_pdf(invalid_file, False) as doc: assert doc is None with it("exits if pdf file is invalid and exit_on_error is true"): try: with open_pdf(invalid_file, True) as doc: assert False, "should have exited" except AssertionError as err: raise err except: pass
def main(): # parse arguments try: opts, args = getopt.gnu_getopt( sys.argv[1:], "hiVp:a:o:", ["help", "ignore-case", "version", "page=", "auto=", "out="] ) except GetoptError as e: print(e, file=sys.stderr) print(usage_s, file=sys.stderr) sys.exit(2) ignore_case: bool = False page: Optional[int] = None auto_level: Optional[int] = None out: TextIO = sys.stdout for o, a in opts: if o in ("-i", "--ignore-case"): ignore_case = True elif o in ("-p", "--page"): page = int(a) elif o in ("-a", "--auto"): auto_level = int(a) elif o in ("-o", "--out"): try: out = open(a, "w") except IOError as e: print("error: can't open file for writing", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) elif o in ("-V", "--version"): print("pdfxmeta", pdfxmeta.__version__, file=sys.stderr) sys.exit() elif o in ("-h", "--help"): print(help_s, file=sys.stderr) sys.exit() argc = len(args) if argc < 1: print("error: no input pdf is given", file=sys.stderr) print(usage_s, file=sys.stderr) sys.exit(1) path_in: str = args[0] pattern: str = "" if argc >= 2: pattern = args[1] # done parsing arguments with open_pdf(path_in) as doc: meta = extract_meta(doc, pattern, page, ignore_case) # nothing found if len(meta) == 0: sys.exit(1) # should we add \n between each output? addnl = not out.isatty() if auto_level: print('\n'.join( [dump_toml(m, auto_level, addnl) for m in meta] ), file=out) else: print('\n'.join(map(print_result, meta)), file=out)
def main(): # parse arguments try: opts, args = getopt.gnu_getopt(sys.argv[1:], "ht:pHo:gV", [ "help", "toc=", "print", "human-readable", "out=", "debug", "version" ]) except GetoptError as e: print(e, file=sys.stderr) print(usage_s, file=sys.stderr) sys.exit(2) toc_file: TextIO = sys.stdin print_toc: bool = False readable: bool = False out: Optional[str] = None debug: bool = False for o, a in opts: if o in ("-H", "--human-readable"): readable = True elif o in ("-p", "--print"): print_toc = True elif o in ("-t", "--toc"): try: toc_file = open(a, "r") except IOError as e: print("error: can't open file for reading", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) elif o in ("-o", "--out"): out = a elif o in ("-g", "--debug"): debug = True elif o in ("-V", "--version"): print("pdftocio", pdftocio.__version__, file=sys.stderr) sys.exit() elif o in ("-h", "--help"): print(help_s, file=sys.stderr) sys.exit() if len(args) < 1: print("error: no input pdf is given", file=sys.stderr) print(usage_s, file=sys.stderr) sys.exit(1) path_in: str = args[0] # done parsing arguments try: with open_pdf(path_in) as doc: if toc_file.isatty() or print_toc: # no input from user, switch to output mode and extract the toc # of pdf toc = read_toc(doc) if len(toc) == 0: print("error: no table of contents found", file=sys.stderr) sys.exit(1) if readable: print(pprint_toc(toc)) else: print(dump_toc(toc), end="") sys.exit(0) # an input is given, so switch to input mode toc = parse_toc(toc_file) write_toc(doc, toc) if out is None: # add suffix to input name as output pfx, ext = os.path.splitext(path_in) out = f"{pfx}_out{ext}" doc.save(out) except ValueError as e: if debug: raise e print("error:", e, file=sys.stderr) sys.exit(1) except IOError as e: if debug: raise e print("error: unable to open file", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) except IndexError as e: if debug: raise e print("index error:", e, file=sys.stderr) sys.exit(1) except KeyboardInterrupt as e: if debug: raise e print("error: interrupted", file=sys.stderr) sys.exit(1)
def main(): # parse arguments try: opts, args = getopt.gnu_getopt( sys.argv[1:], "hr:Hvo:gV", ["help", "recipe=", "human-readable", "vpos", "out=", "debug", "version"] ) except GetoptError as e: print(e, file=sys.stderr) print(usage_s, file=sys.stderr) sys.exit(2) recipe_file: TextIO = sys.stdin readable: bool = False vpos: bool = False out: TextIO = sys.stdout debug: bool = False for o, a in opts: if o in ("-H", "--human-readable"): readable = True elif o in ("-v", "--vpos"): vpos = True elif o in ("-r", "--recipe"): try: recipe_file = open(a, "r") except IOError as e: print("error: can't open file for reading", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) elif o in ("-o", "--out"): try: out = open(a, "w") except IOError as e: print("error: can't open file for writing", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) elif o in ("-g", "--debug"): debug = True elif o in ("-V", "--version"): print("pdftocgen", pdftocgen.__version__, file=sys.stderr) sys.exit() elif o in ("-h", "--help"): print(help_s, file=sys.stderr) sys.exit() if len(args) < 1: print("error: no input pdf is given", file=sys.stderr) print(usage_s, file=sys.stderr) sys.exit(1) path_in: str = args[0] # done parsing arguments try: with open_pdf(path_in) as doc: recipe = toml.load(recipe_file) toc = gen_toc(doc, recipe) if readable: print(pprint_toc(toc), file=out) else: print(dump_toc(toc, vpos), end="", file=out) except ValueError as e: if debug: raise e print("error:", e, file=sys.stderr) sys.exit(1) except IOError as e: if debug: raise e print("error: unable to open file", file=sys.stderr) print(e, file=sys.stderr) sys.exit(1) except KeyboardInterrupt as e: if debug: raise e print("error: interrupted", file=sys.stderr) sys.exit(1)