Python PyPDF2.PageRange Examples

Programming Language: Python

Class/Type: PyPDF2

Method/Function: PageRange

Examples at hotexamples.com: 2

Python PyPDF2.PageRange - 2 examples found. These are the top rated real world Python examples of PyPDF2.PageRange extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PdfFileReader(30)

PdfFileMerger(30)

PdfFileWriter(30)

PdfMerger(6)

PdfReader(6)

PageRange(2)

DocumentInformation(1)

PDFFileWriter(1)

PDfFileMerger(1)

PdFileReader(1)

PdfFileRader(1)

PdfFileReaader(1)

PdfFileWrite(1)

PdffileReader(1)

PyPDF2(1)

reader(1)

Example #1

Show file

File: split_zohar_2.py Project: stellarnode/python_steps

    ("Шофтим", 1980),
    ("Ки Тецэ", 1983),
    ("Ваелех", 2003),
    ("Хаазину", 2010)
]

print(len(table_of_content))
print(table_of_content)

reader = pdf.PdfFileReader(file_name)
writer = pdf.PdfFileWriter()

print(reader.documentInfo)

num_pages = reader.numPages
table_of_content.append(("End", num_pages + delta))

for i in table_of_content:
    if i[1] - delta < num_pages:
        start_page = i[1] - delta
        end_page = table_of_content[table_of_content.index(i) + 1][1] - delta
        page_range = str(start_page) + ":" + str(end_page)
        merger = pdf.PdfFileMerger()
        merger.append(file_name, pages = pdf.PageRange(page_range))
        section = str(start_page + 1) + "_" + i[0] + "_" + str(table_of_content.index(i) + 1) + '.pdf'
        folder = '/Users/stellarnode/Desktop/sections/'
        print(page_range)
        print(folder + section)
        merger.write(folder + section)
    else:
        print('DONE')

Example #2

Show file

def application(env, start_response):
    head = ["200 OK", [("Content-Type", "text/html")]]
    body = b""

    # print(env)

    doc_root = env["DOCUMENT_ROOT"]
    page_root = os.path.dirname(env["PATH_INFO"])[1:]  # drop leading /
    auth_path = os.path.join(doc_root, page_root, auth_file)
    manifest_path = os.path.join(doc_root, page_root, manifest_file)

    auth_repl = ""
    try:
        with open(auth_path, "r") as af:
            auth_repl = esc(af.read())
    except Exception:
        print("Error reading authorisation tag file at", auth_path)
        head = ["500 Internal Server Error", [("Content-Type", "text/html")]]

    # Load up the manifest
    # Trust the OS to keep both files in RAM cache.
    manifest = {}
    try:
        with open(manifest_path, "r") as mf:
            manifest = json.load(mf)
    except Exception:
        print("Error loading manifest file at", manifest_path)
        head = ["500 Internal Server Error", [()]]

    # do intelligent things based on env['QUERY_STRING']
    query_dict = urllib.parse.parse_qs(env["QUERY_STRING"])
    # print(query_dict)

    # do appropriate things
    # we expect keys for name (a path fragment), printer tag and format

    qprint_tag = ""
    qfile = ""
    qformat = ""

    qkeys = query_dict.keys()

    if "printer" in qkeys:
        if query_dict["printer"]:
            qprint_tag = "Printed by " + esc(query_dict["printer"][0])

    if "name" not in qkeys:
        head = ["400 Bad Query", [("Content-Type", "text/html")]]
    elif not query_dict["name"][0] in manifest.keys():
        head = ["404 Not Found", [("Content-Type", "text/html")]]

    if "format" not in qkeys:
        head = ["400 Bad Query", [("Content-Type", "text/html")]]
    elif not (query_dict["format"][0].upper()
              == "PDF"):  # only support PDF now
        head = ["400 Bad Query", [("Content-Type", "text/html")]]
    elif (query_dict["format"][0].upper() == "PDF") and not qprint_tag:
        head = ["400 Bad Query", [("Content-Type", "text/html")]]

    if head[0] == "200 OK":
        # first see if we have a usable inkscape
        inkv = subprocess.Popen([inky, "-V", "--no-gui"],
                                stdout=subprocess.PIPE).stdout
        inkscape_version = "1"
        if "Inkscape 0.9" in inkv:
            inkscape_version = "0.9"

        if query_dict["format"][0].upper() == "PDF":
            # handle PDF
            head = ["200 OK", [("Content-Type", "application/pdf")]]
            if inkscape_version == "1":
                qformat = "--export-type=pdf"
            else:
                qformat = "-A"
        # only support PDF export now

        # support bleeds in Inkscape 1+ with --export-margin=MARGIN
        # right now inkscape only supports integer millimetre margins for PDFs
        bleed = "--export-margin=" + str(
            int(float(query_dict.get("bleed", ["0"])[0])))

        # This is the un-fun part where we need multiple things rendered

        item = manifest[query_dict["name"][0]]

        pagenums = sorted([int(k) for k in item.keys()])

        merger = PyPDF2.PdfFileMerger()
        bodyIO = io.BytesIO()

        page_range = PyPDF2.PageRange(":")

        for pn in pagenums:

            qfile = os.path.join(doc_root, page_root, SOURCE_DIR,
                                 item[str(pn)][0][:-9] + ".svg")
            print(qfile, pn)
            # sed
            # the specific problem here is we also need to escape forward slashes
            authsub = "s/" + re.escape(AUTH_TAG) + "/" + re.escape(
                auth_repl) + "/g"
            printsub = "s/" + re.escape(PRINT_TAG) + "/" + re.escape(
                qprint_tag) + "/g"
            #            print(authsub)
            #            print(printsub)

            sed = subprocess.Popen(
                [seddy, "-e", authsub, "-e", printsub, qfile],
                stdout=subprocess.PIPE)
            # inkscape
            inkscape_args = []
            if inkscape_version == "1":
                inkscape_args = [
                    inky,
                    "--export-dpi=300",
                    qformat,
                    bleed,
                    "--pipe",
                    "-o",
                    "-",
                ]
            elif inkscape_version == "0.9":
                inkscape_args = [
                    inky,
                    "-z",
                    "--export-dpi=300",
                    qformat,
                    "/dev/stdout",
                    "-f",
                    "/dev/stdin",
                ]
            #####

            inkscape = subprocess.Popen(
                inkscape_args,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                stdin=sed.stdout,
            )
            sed.stdout.close()  # as per docs for SIGPIPE
            outs, errs = inkscape.communicate()
            this_page = io.BytesIO(outs)
            #            print('page', pn, 'is', len(outs), 'bytes')
            #            print(errs)
            inkscape.kill()  # anti zombie measures?

            merger.append(this_page, pages=page_range)

        # end of that for loop

        # Now get the merged file out
        merger.write(bodyIO)
        body = bodyIO.getvalue()

        # Name the file we return
        head[1].append((
            "Content-Disposition",
            ('inline; filename="' + re.sub(
                "[^0-9a-zA-Z-_.]+",
                "-",
                query_dict["name"][0] + "." + query_dict["format"][0].lower(),
            ) + '"'),
        ))
        # cache it for a day, but only in the browser
        head[1].append(("Cache-Control", "private; max-age=86400"))

    # check head status again rather than an else, just in case something went wrong
    if not head[0] == "200 OK":
        body = (head[0] + " " + repr(query_dict)).encode("utf8")
        head[1] = [("Content-Type", "text/html")
                   ]  # reverts whatever we did with Content-Disposition

    # print(repr(head))
    start_response(head[0], head[1])
    return [body]