Python PdfFileMerger.addBookmark Exemples, PyPDF2.PdfFileMerger.addBookmark Python Exemples

Exemple #1

0

Afficher le fichier

def merge_pdf(path: str, output_filename: str, bookmark_separator: str = "", bookmark_start_index: int = 1,
              password: str = "") -> None:
    """
    合并一个文件里所有的pdf
    :param str path: 文件夹路径
    :param str output_filename: 输出文件名(包含路径)
    :param str bookmark_separator: 用来分割每一个pdf的书签格式, 如果没有会按照文件名命名书签
    :param int bookmark_start_index: 书签后缀开始的序号
    :param str password: 如果pdf有加密，这里填pdf的密码
    """
    if os.path.exists(output_filename):
        os.remove(output_filename)
    os.chmod(path, stat.S_IRWXU)  # ensure we have permission
    output_pdf = PdfFileMerger()
    output_page_num = 0
    for index, pdf_path_with_name in enumerate(get_pdf_names(path), bookmark_start_index):
        print(pdf_path_with_name)
        with open(pdf_path_with_name, "rb") as pdf:
            content = PdfFileReader(pdf)
            if content.isEncrypted:
                content.decrypt(password)
            # add bookmark at the beginning of each merged pdf if bookmark_separator is not None
            if bookmark_separator:
                output_pdf.addBookmark(bookmark_separator + str(index), output_page_num)
            else:
                output_pdf.addBookmark(pdf_path_with_name.split("\\")[-1].split(".")[0], output_page_num)
            output_pdf.append(content)
            output_page_num += content.numPages

    with codecs.open(output_filename, "wb") as f:
        output_pdf.write(f)
    print("mission complete")

Exemple #2

0

Afficher le fichier

def merge_pdf():
    # 创建一个用来合并文件的实例
    pdf_merger = PdfFileMerger()

    # 首先添加一个Week1_1.pdf文件
    pdf_merger.append('Week1_1.pdf')
    # 然后在第0页后面添加ex1.pdf文件
    pdf_merger.merge(0, 'ex1.pdf')
    # 添加书签
    pdf_merger.addBookmark('这是一个书签', 1)
    # 将其写入到文件中
    pdf_merger.write('merge_pdf.pdf')

Exemple #3

0

Afficher le fichier

Fichier : mergePDF.py Projet : gudeqing/biodev

def merger(files: list, out):
    pdf_merger = PdfFileMerger()

    for ind, path in enumerate(files):
        pdf_merger.append(path)
        title = os.path.basename(path).split('.', 1)[0]
        pdf_merger.addBookmark(title, ind, parent=None)

    pdf_merger.setPageLayout(layout='/TwoColumnLeft')

    with open(out, 'wb') as fileobj:
        pdf_merger.write(fileobj)

Exemple #4

0

Afficher le fichier

Fichier : pdfoperator.py Projet : mgjean/pdf-surgeon

 def bind(self, *files, outputDir="./"):
     clean = []
     merger = PdfFileMerger()
     output = open("%s/output-binder.pdf" % outputDir, "wb")
     for num, file in enumerate(files):
         if "blank-page" in str(file):
             clean.append(file)
         merger.append(file, import_bookmarks=False)
         merger.addBookmark("page-%s" % num, num, parent=None)
     merger.write(output)
     merger.close()
     output.close()
     [os.remove(i) for i in clean]

Exemple #5

0

Afficher le fichier

Fichier : spider_zhihu.py Projet : xut226/crawlerxt

def merge_pdf(dir, titlelist):
    pdf_manage = PdfFileMerger()
    for root, dir, filenames in os.walk(dir):
        filenames_sorted = sorted(
            filenames, key=lambda i: int(re.match(r'(\d+)', i).group()))
        merge_page_count = 0
        for filename, title in zip(filenames_sorted, title_list):
            file = root + filename
            f_input = PdfFileReader(open(file, 'rb'))
            pdf_manage.append(f_input)
            pdf_manage.addBookmark(title, merge_page_count)  #添加标签
            title_page_count = f_input.getNumPages()
            merge_page_count += title_page_count
        f_output = open(dir_save_pdf + 'merge.pdf', 'wb')
        pdf_manage.write(f_output)

Exemple #6

0

Afficher le fichier

Fichier : packetMaster.py Projet : longooglite/mps

	def mergeContents(self,tocFullPath,packetMeta,bookmarks):
		merger = PdfFileMerger()
		if not self.configDict.get('omitTOC',False):
			merger.append(open(tocFullPath),tocFullPath)
		if packetMeta:
			for packetgroup in packetMeta.get('groups',[]):
				for item in packetgroup.get('items',[]):
					if item.get('file_name',''):
						merger.append(open(item.get('file_name','')),item.get('file_name',''))
		if not self.configDict.get('omitTOC',False):
			merger.bookmarks = []
			for bookmark in bookmarks:
				parent_ref = merger.addBookmark(bookmark.get("description",''),bookmark.get('page',1)-1)
				for child in bookmark.get("children",[]):
					merger.addBookmark(child.get("description",''),child.get('page',1)-1,parent_ref)
		output = open(self.packetPath, "wb")
		merger.write(output)
		output.close()
		merger.close()

Exemple #7

0

Afficher le fichier

Fichier : convert.py Projet : caizefeng/html2pdf_bookdown

def merge_and_bookmark(contents_list, book_name):
    pdfmerger = PdfFileMerger()
    page_idx = 0
    last_level = -1
    level_list = [pdfmerger.addBookmark(book_name,
                                        page_idx)]  # Like a depth meter
    print("Merging and bookmarking PDFs:")
    for section in tqdm(contents_list):
        for _ in range(last_level - section["level"] + 1):
            level_list.pop()
        parent = level_list[-1]
        level_list.append(
            pdfmerger.addBookmark(section["name"], page_idx, parent))
        pdfmerger.append(open(section["single_pdf_path"], 'rb'))

        page_idx += section["page_num"]
        last_level = section["level"]

    with open("{}.pdf".format(book_name), 'wb') as f_output:
        pdfmerger.write(f_output)

Exemple #8

0

Afficher le fichier

Fichier : pdf_merger.py Projet : ihmeuw/covid-historical-model

def pdf_merger(pdfs: List, location_names: List, parent_names: List, levels: List, outfile: str):
    # how are inputs specified
    assert all([i.endswith('.pdf') for i in pdfs]), 'Not all files passed into `pdfs` are actual PDFs.'
    indir = '/'.join(pdfs[0].split('/')[:-1])

    # compile PDFs
    merger = PdfFileMerger()
    for i, (pdf, location_name, parent_name, level) in enumerate(zip(pdfs, location_names, parent_names, levels)):
        merger.append(pdf)
        if parent_name in location_names and level > 0:
            if parent_name == location_name:
                merger.addBookmark(f'{location_name} ', i, parent_name)
            else:
                merger.addBookmark(location_name, i, parent_name)
        else:
            merger.addBookmark(location_name, i)

    # get output file (if already exists, delete before writing new file)
    assert outfile.endswith('.pdf'), 'Provided output file is not a PDF.'
    if os.path.exists(outfile):
        os.remove(outfile)

    # write compiled PDF
    merger.write(outfile)
    merger.close()

Exemple #9

0

Afficher le fichier

Fichier : pdf_pager.py Projet : MarkStefanovic/pdf_pager

    def add_bookmarks(self, input_path: str, output_path: str) -> None:
        """This method loops through pages in a document and add [nested] bookmarks.

        PyPDF2 PdfFileWriter documentation: https://pythonhosted.org/PyPDF2/PdfFileWriter.html
        """

        merger = PdfFileMerger()
        input_pdf = open(input_path, "rb")
        reader = PdfFileReader(input_pdf)
        total_pages = reader.getNumPages()
        output_pdf = open(output_path, "wb")

        merger.append(fileobj=input_pdf, pages=(0, total_pages))
        logger.info('Bookmarks: {}'.format(self.bookmarks))
        page_numbers = self.get_page_numbers()
        parent_bookmarks = {}
        for val in self.bookmarks:
            if val.parent_bookmark_name:
                if val.parent_bookmark_name not in parent_bookmarks.keys():
                    parent_bookmarks[val.parent_bookmark_name] = merger.addBookmark(
                        title=val.parent_bookmark_name
                        , pagenum=page_numbers.get(val.input_path)
                        , parent=None
                    )
                merger.addBookmark(
                    title=val.bookmark_name
                    , pagenum=val.page_number
                    , parent=parent_bookmarks.get(val.parent_bookmark_name)
                )
            else:
                if val.bookmark_name:
                    merger.addBookmark(
                        title=val.bookmark_name
                        , pagenum=val.page_number
                        , parent=None
                    )
        logger.info('Parent bookmarks: {}'.format(parent_bookmarks))
        merger.write(output_pdf)
        input_pdf.close()
        output_pdf.close()

Exemple #10

0

Afficher le fichier

Fichier : pdf.py Projet : baris8/TeX-To-PDF

    def bookmarks_hinzufuegen(self, bookmarks):
        log.info("Füge dem PDF folgende Bookmarks hinzu: %r", bookmarks)
        if not self.pdf_bytes:
            log.warning(
                "Es existieren noch keine PDF-Bytes, "
                "daher wird das PDF kompiliert"
            )
            self.kompiliere_pdf()

        pdf = BytesIO(self.pdf_bytes)

        output = PdfFileMerger()
        output.append(pdf)

        for text, seite in bookmarks:
            log.debug(
                "Füge folgende Bookmark auf Seite %s hinzu: %s",
                seite, text
            )
            output.addBookmark(text, seite, None)

        with BytesIO() as neues_pdf:
            output.write(neues_pdf)
            self.pdf_bytes = neues_pdf.getvalue()

Exemple #11

0

Afficher le fichier

def assignToSpot(index):
    errorList = []
    monthlyReport = PdfFileMerger()
    monthlyReportLength = 0
    DistrictResult = monthlyReport.addBookmark(
        "District Packages", 0, parent=None
    )  # change this later if added more parent bookmarks (Pending Importance: Medium)
    DistrictName = None
    #  above this
    dex = index
    for x in range(0, len(dex)):
        # var = x[0]
        drawProgressBar(int(50 + x * 100 / len(dex) / 2), 50)
        doneWithOne = True
        for i in range(0, len(dex[x][1])):

            if i == 1 and doneWithOne:
                i = 2
            if i == 2 and not doneWithOne:
                i = 1
            for z in range(0, len(dex[x][1][i])):
                root = dex[x][1][i][z]
                foundFile = False
                firstPageOfWriteUpMerged = False
                firstPageOfTypeMerged = False
                if root == "7Harlan" and i == 2:  # Introduce Canada and WPE exceptions (Pending importance : High)
                    root = "Harlan"
                if root == "GSW - Large Projects":
                    root = "Large Projects"
                if root == "MARHE":
                    root = "MAR  "
                    # print root
                for s in range(0, len(indexList)):
                    if indexList[s][0] == i:
                        if indexList[s][1] == root:
                            # print indexList[s]
                            # print ("found it")
                            interestFile = getInterestFile(s, storageLocation)
                            if interestFile is not None:
                                fp = interestFile[1]
                                interestLoc = interestFile[0]
                                monthlyReport.append(fileobj=fp,
                                                     pages=(interestLoc,
                                                            interestLoc + 1),
                                                     import_bookmarks=False)
                                monthlyReportLength = monthlyReportLength + 1
                                foundFile = True

                                if i == 0 and z == 0 and not firstPageOfWriteUpMerged:
                                    DistrictName = monthlyReport.addBookmark(
                                        str(root), monthlyReportLength - 1,
                                        DistrictResult)
                                    firstPageOfWriteUpMerged = True
                                if DistrictName is not None:
                                    if i == 1 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "SG&A", monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                                    if i == 2 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "District Package",
                                            monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                                    if i == 3 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "Job Profitability",
                                            monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                                    if i == 4 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "A/R", monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                                    if i == 5 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "Bid Logs",
                                            monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                                    if i == 0 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "Write Up",
                                            monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                if not foundFile:
                    # print ("Can't find ", root)
                    if i == 5:  # This whole thing needs to be done in a for loop again, come back later :D (Pending Importance = Low)
                        noneFile = getInterestFile(5, noneLocation)

                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[5],
                             "at page ", monthlyReportLength))
                    if i == 3:
                        noneFile = getInterestFile(3, noneLocation)

                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[3],
                             "at page ", monthlyReportLength))
                    if i == 4:
                        noneFile = getInterestFile(4, noneLocation)

                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[4],
                             "at page ", monthlyReportLength))
                    if i == 2:
                        noneFile = getInterestFile(1, noneLocation)

                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[1],
                             "at page ", monthlyReportLength))
                    if i == 1:
                        noneFile = getInterestFile(2, noneLocation)

                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[2],
                             "at page ", monthlyReportLength))
                    if i == 0:
                        noneFile = getInterestFile(0, noneLocation)
                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[0],
                             "at page ", monthlyReportLength))

                    monthlyReportLength = monthlyReportLength + 1
                    fp2 = noneFile[1]
                    intLoc = noneFile[0]
                    monthlyReport.append(fp2,
                                         pages=(intLoc, intLoc + 1),
                                         import_bookmarks=False)
            if i != 0:
                doneWithOne = False
    for x in errorList:
        print "\n", x
    return monthlyReport

Exemple #12

0

Afficher le fichier

Fichier : slidesmerge.py Projet : tintincita/RprogCoursera

from PyPDF2 import PdfFileMerger, PdfFileReader
import os

os.chdir('slide')
merger = PdfFileMerger()
label = 0
for filename in sorted(os.listdir('.'),key=lambda k:int(k.split('.')[0])):
    with open(filename,'rb') as f:
        foo = PdfFileReader(f)
        p = foo.getNumPages()
        merger.append(foo)
        merger.addBookmark(filename[:-4], label, parent=None)
        label += p
        print('{}，共{}页'.format(filename, p))
merger.write(r"..\document-output.pdf")

Exemple #13

0

Afficher le fichier

def collect(project_folder, out_filename):
    """Collect PDF files to main document.
    Function searches for PDF files whose names satisfy the conditions,
    and collect it to the main PDF document.
    The title of the Cover Page must contain a “титул” or “обложка”.
    The name of the Information and Certification Sheet should contain "УЛ", "ИУЛ", "Информационно-удостоверяющий лист".
    The name of the Change Registration Table should include a “таблица регистрации изменений".
    The title of the Main document of the sheet should contain “ПЗ” or “Пояснительная записка”.
    
    Arguments:
        project_folder {str} -- Path to directory with PDF files
    """
    project_folder = Path(str(project_folder)).resolve()

    # List of PDF files paths
    files_list = sorted(Path(project_folder).glob("*.pdf"))
    files_list_str = list(str(s) for s in files_list)

    # Regular expressions for determine files type
    info_cert_page_re = re.compile(
        r"УЛ|ИУЛ|Информационно-удостоверяющий лист|информационно-удостоверяющий лист"
    )
    title_page_re = re.compile(r"титул|обложка", re.IGNORECASE)
    changes_page_re = re.compile(r"таблица регистрации изменений",
                                 re.IGNORECASE)
    main_doc_re = re.compile(r"ПЗ|Пояснительная записка|пояснительная записка")

    # Get files paths
    info_cert_path = list(filter(info_cert_page_re.search, files_list_str))
    title_path = list(filter(title_page_re.search, files_list_str))
    changes_path = list(filter(changes_page_re.search, files_list_str))
    main_doc_path = list(filter(main_doc_re.search, files_list_str))

    merger = PdfFileMerger()
    main_doc_pdf = PdfFileReader(open(main_doc_path[0], "rb"))

    # Append title pages if exist to main document, or just append main document
    if title_path:
        title_page_pdf = PdfFileReader(open(title_path[0], "rb"))
        title_page_num = title_page_pdf.getNumPages()

        main_doc_page_num = main_doc_pdf.getNumPages()
        merger.append(fileobj=title_page_pdf)
        print("Title page appended to document.")
        merger.merge(position=2,
                     fileobj=main_doc_pdf,
                     pages=(title_page_num, main_doc_page_num))
        print("Main doc appended to document.")
    else:
        merger.append(fileobj=main_doc_pdf)
        print("Main doc appended to document.")

    # Append Table of changes page to main document
    if changes_path:
        changes_page_pdf = PdfFileReader(open(changes_path[0], "rb"))
        merger.append(fileobj=changes_page_pdf)
        print("Change Registration Table page appended to document.")

        # Adding bookmark to inserted Table of changes page
        merger.addBookmark("Таблица регистрации изменений",
                           main_doc_pdf.getNumPages())

    output = open(out_filename, "wb")
    merger.write(output)
    merger.close()

Exemple #14

0

Afficher le fichier

Fichier : pdf-bookmarks.py Projet : michaelamie/pdf-bookmarks

#!/usr/bin/env python

import os.path, json
from argparse import ArgumentParser
from PyPDF2 import PdfFileMerger

if __name__ == '__main__':

    ap = ArgumentParser()
    ap.add_argument('pdf', help='the input PDF')
    ap.add_argument('bookmarks', help='JSON list of bookmarks')
    ap.add_argument('offset', help='page offset to use')

    pdf_path = ap.parse_args().pdf
    bookmarks_path = ap.parse_args().bookmarks
    page_offset = ap.parse_args().offset

    output_pdf = PdfFileMerger()

    with open(pdf_path, 'rb') as pdf_file:
        output_pdf.merge(position=0, fileobj=pdf_file)

    with open(bookmarks_path, 'r') as bookmarks_file:
        bookmarks = json.load(bookmarks_file)
    for name, page in bookmarks:
        output_pdf.addBookmark(name, page + int(page_offset) - 2)

    output_pdf_path = os.path.splitext(pdf_path)[0] + '-bookmarked.pdf'
    with open(output_pdf_path, 'wb') as pdf_file:
        output_pdf.write(pdf_file)

Exemple #15

0

Afficher le fichier

Fichier : tasks.py Projet : aolkin/hrdc

def render_and_send_app(pk):
    app = import_module('venueapp.models').Application.objects.get(pk=pk)
    cover = import_module('venueapp.views').make_cover_page(app)
    max_bytes = config.get_int("max_inline_attachment_bytes", 0)
    for venue in app.venues.all():
        html = render_to_string(
            "venueapp/pdf_app.html", {
                "object": app,
                "cover": cover,
                "venue": venue,
                "logo": finders.find("logo.png"),
                "pdf": True,
                "max_attachment_size": max_bytes,
            })
        doc = HTML(string=html, base_url=settings.SITE_URL).render()
        bookmark_tree = doc.make_bookmark_tree()
        bookmarks = list([_Bookmark(i) for i in bookmark_tree])
        app_pdf = BytesIO()
        doc.write_pdf(app_pdf)
        merger = PdfFileMerger()
        merger.append(app_pdf, import_bookmarks=False)
        for staff in app.staffmember_set.signed_on().filter(
                role__accepts_attachment=True).exclude(
                    Q(attachment=None) | Q(attachment="")):
            name = "{} {}'s ".format(staff.role_name, staff.person)
            try:
                if staff.attachment.size < max_bytes:
                    reader = PdfFileReader(staff.attachment.open(), False)
                    attachment_pages = reader.getNumPages()
                    page = None
                    for i, bookmark in enumerate(bookmarks):
                        if bookmark.label == name + "Supplement":
                            page = bookmarks[i + 1].location
                    if page:
                        merger.merge(page,
                                     staff.attachment.open(),
                                     import_bookmarks=False)
                        for i in bookmarks:
                            if i.location >= page:
                                i.location += attachment_pages
                    else:
                        merger.append(staff.attachment.open(),
                                      bookmark=name + "Attachment",
                                      import_bookmarks=False)
            except Exception as e:
                tb.print_exc()
        for i in bookmarks:
            merger.addBookmark(i.label, i.location)
        pdf = BytesIO()  # open("/tmp/{}.pdf".format(venue.venue), "wb")
        merger.write(pdf)
        msg = render_msg("venueapp/email/submission.html",
                         locals(),
                         to=[
                             "{} <{}>".format(i.get_full_name(False), i.email)
                             for i in venue.managers.all()
                         ],
                         cc=[
                             "{} <{}>".format(i.get_full_name(False), i.email)
                             for i in app.show.staff.all()
                         ],
                         subject="Application for {} in {} Submitted".format(
                             app, venue.venue),
                         tags=["venueapp", "venueapp-submission"])
        msg.attach("{} - {}.pdf".format(app, venue), BytesIO(pdf.getbuffer()),
                   "application/pdf")
        try:
            msg.send()
        except Exception as err:
            LOGGER.error("Application submission sending failed: {}".format(
                repr(err)))
            tb.print_exc()
        finally:
            merger.close()

Exemple #16

0

Afficher le fichier

Fichier : slidesmerge.py Projet : tintincita/RprogCoursera

from PyPDF2 import PdfFileMerger, PdfFileReader
import os

os.chdir('slide')
merger = PdfFileMerger()
label = 0
for filename in sorted(os.listdir('.'), key=lambda k: int(k.split('.')[0])):
    with open(filename, 'rb') as f:
        foo = PdfFileReader(f)
        p = foo.getNumPages()
        merger.append(foo)
        merger.addBookmark(filename[:-4], label, parent=None)
        label += p
        print('{}，共{}页'.format(filename, p))
merger.write(r"..\document-output.pdf")