def save_toc(final_toc):
    with open_pdf(pdf_path) as doc:
        write_toc(doc, final_toc)
        doc.save(toc_pdf_path)

    with open(final_toc_path, "w") as f:
        f.write(dump_toc(final_toc))
def find_title_pages():
    recipe = toml.loads(recipe_str)

    with open_pdf(pdf_path) as doc:
        toc = extract_toc(doc, Recipe(recipe))

    print("load from pdf, length", len(toc))

    return toc
Example #3
0
import os
import io

from mamba import description, it, before
from fitzutils import (open_pdf, ToCEntry, dump_toc)
from pdftocio.tocparser import parse_toc

dirpath = os.path.dirname(os.path.abspath(__file__))

valid_file = os.path.join(dirpath, "files/level2.pdf")
invalid_file = os.path.join(dirpath, "files/nothing.pdf")

with description("open_pdf:") as self:
    with it("opens pdf file for reading"):
        with open_pdf(valid_file, False) as doc:
            assert doc is not None
            assert doc.pageCount == 6

    with it("returns None if pdf file is invalid"):
        with open_pdf(invalid_file, False) as doc:
            assert doc is None

    with it("exits if pdf file is invalid and exit_on_error is true"):
        try:
            with open_pdf(invalid_file, True) as doc:
                assert False, "should have exited"
        except AssertionError as err:
            raise err
        except:
            pass
Example #4
0
def main():
    # parse arguments
    try:
        opts, args = getopt.gnu_getopt(
            sys.argv[1:],
            "hiVp:a:o:",
            ["help", "ignore-case", "version", "page=", "auto=", "out="]
        )
    except GetoptError as e:
        print(e, file=sys.stderr)
        print(usage_s, file=sys.stderr)
        sys.exit(2)

    ignore_case: bool = False
    page: Optional[int] = None
    auto_level: Optional[int] = None
    out: TextIO = sys.stdout

    for o, a in opts:
        if o in ("-i", "--ignore-case"):
            ignore_case = True
        elif o in ("-p", "--page"):
            page = int(a)
        elif o in ("-a", "--auto"):
            auto_level = int(a)
        elif o in ("-o", "--out"):
            try:
                out = open(a, "w")
            except IOError as e:
                print("error: can't open file for writing", file=sys.stderr)
                print(e, file=sys.stderr)
                sys.exit(1)
        elif o in ("-V", "--version"):
            print("pdfxmeta", pdfxmeta.__version__, file=sys.stderr)
            sys.exit()
        elif o in ("-h", "--help"):
            print(help_s, file=sys.stderr)
            sys.exit()

    argc = len(args)

    if argc < 1:
        print("error: no input pdf is given", file=sys.stderr)
        print(usage_s, file=sys.stderr)
        sys.exit(1)

    path_in: str = args[0]
    pattern: str = ""

    if argc >= 2:
        pattern = args[1]

    # done parsing arguments

    with open_pdf(path_in) as doc:
        meta = extract_meta(doc, pattern, page, ignore_case)

        # nothing found
        if len(meta) == 0:
            sys.exit(1)

        # should we add \n between each output?
        addnl = not out.isatty()

        if auto_level:
            print('\n'.join(
                [dump_toml(m, auto_level, addnl) for m in meta]
            ), file=out)
        else:
            print('\n'.join(map(print_result, meta)), file=out)
Example #5
0
def main():
    # parse arguments
    try:
        opts, args = getopt.gnu_getopt(sys.argv[1:], "ht:pHo:gV", [
            "help", "toc=", "print", "human-readable", "out=", "debug",
            "version"
        ])
    except GetoptError as e:
        print(e, file=sys.stderr)
        print(usage_s, file=sys.stderr)
        sys.exit(2)

    toc_file: TextIO = sys.stdin
    print_toc: bool = False
    readable: bool = False
    out: Optional[str] = None
    debug: bool = False

    for o, a in opts:
        if o in ("-H", "--human-readable"):
            readable = True
        elif o in ("-p", "--print"):
            print_toc = True
        elif o in ("-t", "--toc"):
            try:
                toc_file = open(a, "r")
            except IOError as e:
                print("error: can't open file for reading", file=sys.stderr)
                print(e, file=sys.stderr)
                sys.exit(1)
        elif o in ("-o", "--out"):
            out = a
        elif o in ("-g", "--debug"):
            debug = True
        elif o in ("-V", "--version"):
            print("pdftocio", pdftocio.__version__, file=sys.stderr)
            sys.exit()
        elif o in ("-h", "--help"):
            print(help_s, file=sys.stderr)
            sys.exit()

    if len(args) < 1:
        print("error: no input pdf is given", file=sys.stderr)
        print(usage_s, file=sys.stderr)
        sys.exit(1)

    path_in: str = args[0]
    # done parsing arguments

    try:
        with open_pdf(path_in) as doc:
            if toc_file.isatty() or print_toc:
                # no input from user, switch to output mode and extract the toc
                # of pdf
                toc = read_toc(doc)
                if len(toc) == 0:
                    print("error: no table of contents found", file=sys.stderr)
                    sys.exit(1)

                if readable:
                    print(pprint_toc(toc))
                else:
                    print(dump_toc(toc), end="")
                sys.exit(0)

            # an input is given, so switch to input mode
            toc = parse_toc(toc_file)
            write_toc(doc, toc)

            if out is None:
                # add suffix to input name as output
                pfx, ext = os.path.splitext(path_in)
                out = f"{pfx}_out{ext}"
            doc.save(out)
    except ValueError as e:
        if debug:
            raise e
        print("error:", e, file=sys.stderr)
        sys.exit(1)
    except IOError as e:
        if debug:
            raise e
        print("error: unable to open file", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)
    except IndexError as e:
        if debug:
            raise e
        print("index error:", e, file=sys.stderr)
        sys.exit(1)
    except KeyboardInterrupt as e:
        if debug:
            raise e
        print("error: interrupted", file=sys.stderr)
        sys.exit(1)
Example #6
0
def main():
    # parse arguments
    try:
        opts, args = getopt.gnu_getopt(
            sys.argv[1:],
            "hr:Hvo:gV",
            ["help", "recipe=", "human-readable", "vpos", "out=", "debug", "version"]
        )
    except GetoptError as e:
        print(e, file=sys.stderr)
        print(usage_s, file=sys.stderr)
        sys.exit(2)

    recipe_file: TextIO = sys.stdin
    readable: bool = False
    vpos: bool = False
    out: TextIO = sys.stdout
    debug: bool = False

    for o, a in opts:
        if o in ("-H", "--human-readable"):
            readable = True
        elif o in ("-v", "--vpos"):
            vpos = True
        elif o in ("-r", "--recipe"):
            try:
                recipe_file = open(a, "r")
            except IOError as e:
                print("error: can't open file for reading", file=sys.stderr)
                print(e, file=sys.stderr)
                sys.exit(1)
        elif o in ("-o", "--out"):
            try:
                out = open(a, "w")
            except IOError as e:
                print("error: can't open file for writing", file=sys.stderr)
                print(e, file=sys.stderr)
                sys.exit(1)
        elif o in ("-g", "--debug"):
            debug = True
        elif o in ("-V", "--version"):
            print("pdftocgen", pdftocgen.__version__, file=sys.stderr)
            sys.exit()
        elif o in ("-h", "--help"):
            print(help_s, file=sys.stderr)
            sys.exit()

    if len(args) < 1:
        print("error: no input pdf is given", file=sys.stderr)
        print(usage_s, file=sys.stderr)
        sys.exit(1)

    path_in: str = args[0]
    # done parsing arguments

    try:
        with open_pdf(path_in) as doc:
            recipe = toml.load(recipe_file)
            toc = gen_toc(doc, recipe)
            if readable:
                print(pprint_toc(toc), file=out)
            else:
                print(dump_toc(toc, vpos), end="", file=out)
    except ValueError as e:
        if debug:
            raise e
        print("error:", e, file=sys.stderr)
        sys.exit(1)
    except IOError as e:
        if debug:
            raise e
        print("error: unable to open file", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit(1)
    except KeyboardInterrupt as e:
        if debug:
            raise e
        print("error: interrupted", file=sys.stderr)
        sys.exit(1)