Ejemplos de PdfFileMerger.addBookmark en Python, ejemplos de PyPDF2.PdfFileMerger.addBookmark en Python

Ejemplo n.º 1

0

Mostrar archivo

def merge_pdf(path: str, output_filename: str, bookmark_separator: str = "", bookmark_start_index: int = 1,
              password: str = "") -> None:
    """
    合并一个文件里所有的pdf
    :param str path: 文件夹路径
    :param str output_filename: 输出文件名(包含路径)
    :param str bookmark_separator: 用来分割每一个pdf的书签格式, 如果没有会按照文件名命名书签
    :param int bookmark_start_index: 书签后缀开始的序号
    :param str password: 如果pdf有加密，这里填pdf的密码
    """
    if os.path.exists(output_filename):
        os.remove(output_filename)
    os.chmod(path, stat.S_IRWXU)  # ensure we have permission
    output_pdf = PdfFileMerger()
    output_page_num = 0
    for index, pdf_path_with_name in enumerate(get_pdf_names(path), bookmark_start_index):
        print(pdf_path_with_name)
        with open(pdf_path_with_name, "rb") as pdf:
            content = PdfFileReader(pdf)
            if content.isEncrypted:
                content.decrypt(password)
            # add bookmark at the beginning of each merged pdf if bookmark_separator is not None
            if bookmark_separator:
                output_pdf.addBookmark(bookmark_separator + str(index), output_page_num)
            else:
                output_pdf.addBookmark(pdf_path_with_name.split("\\")[-1].split(".")[0], output_page_num)
            output_pdf.append(content)
            output_page_num += content.numPages

    with codecs.open(output_filename, "wb") as f:
        output_pdf.write(f)
    print("mission complete")

Ejemplo n.º 2

0

Mostrar archivo

def merge_pdf():
    # 创建一个用来合并文件的实例
    pdf_merger = PdfFileMerger()

    # 首先添加一个Week1_1.pdf文件
    pdf_merger.append('Week1_1.pdf')
    # 然后在第0页后面添加ex1.pdf文件
    pdf_merger.merge(0, 'ex1.pdf')
    # 添加书签
    pdf_merger.addBookmark('这是一个书签', 1)
    # 将其写入到文件中
    pdf_merger.write('merge_pdf.pdf')

Ejemplo n.º 3

0

Mostrar archivo

Archivo: mergePDF.py Proyecto: gudeqing/biodev

def merger(files: list, out):
    pdf_merger = PdfFileMerger()

    for ind, path in enumerate(files):
        pdf_merger.append(path)
        title = os.path.basename(path).split('.', 1)[0]
        pdf_merger.addBookmark(title, ind, parent=None)

    pdf_merger.setPageLayout(layout='/TwoColumnLeft')

    with open(out, 'wb') as fileobj:
        pdf_merger.write(fileobj)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: pdfoperator.py Proyecto: mgjean/pdf-surgeon

 def bind(self, *files, outputDir="./"):
     clean = []
     merger = PdfFileMerger()
     output = open("%s/output-binder.pdf" % outputDir, "wb")
     for num, file in enumerate(files):
         if "blank-page" in str(file):
             clean.append(file)
         merger.append(file, import_bookmarks=False)
         merger.addBookmark("page-%s" % num, num, parent=None)
     merger.write(output)
     merger.close()
     output.close()
     [os.remove(i) for i in clean]

Ejemplo n.º 5

0

Mostrar archivo

Archivo: spider_zhihu.py Proyecto: xut226/crawlerxt

def merge_pdf(dir, titlelist):
    pdf_manage = PdfFileMerger()
    for root, dir, filenames in os.walk(dir):
        filenames_sorted = sorted(
            filenames, key=lambda i: int(re.match(r'(\d+)', i).group()))
        merge_page_count = 0
        for filename, title in zip(filenames_sorted, title_list):
            file = root + filename
            f_input = PdfFileReader(open(file, 'rb'))
            pdf_manage.append(f_input)
            pdf_manage.addBookmark(title, merge_page_count)  #添加标签
            title_page_count = f_input.getNumPages()
            merge_page_count += title_page_count
        f_output = open(dir_save_pdf + 'merge.pdf', 'wb')
        pdf_manage.write(f_output)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: packetMaster.py Proyecto: longooglite/mps

	def mergeContents(self,tocFullPath,packetMeta,bookmarks):
		merger = PdfFileMerger()
		if not self.configDict.get('omitTOC',False):
			merger.append(open(tocFullPath),tocFullPath)
		if packetMeta:
			for packetgroup in packetMeta.get('groups',[]):
				for item in packetgroup.get('items',[]):
					if item.get('file_name',''):
						merger.append(open(item.get('file_name','')),item.get('file_name',''))
		if not self.configDict.get('omitTOC',False):
			merger.bookmarks = []
			for bookmark in bookmarks:
				parent_ref = merger.addBookmark(bookmark.get("description",''),bookmark.get('page',1)-1)
				for child in bookmark.get("children",[]):
					merger.addBookmark(child.get("description",''),child.get('page',1)-1,parent_ref)
		output = open(self.packetPath, "wb")
		merger.write(output)
		output.close()
		merger.close()

Ejemplo n.º 7

0

Mostrar archivo

Archivo: convert.py Proyecto: caizefeng/html2pdf_bookdown

def merge_and_bookmark(contents_list, book_name):
    pdfmerger = PdfFileMerger()
    page_idx = 0
    last_level = -1
    level_list = [pdfmerger.addBookmark(book_name,
                                        page_idx)]  # Like a depth meter
    print("Merging and bookmarking PDFs:")
    for section in tqdm(contents_list):
        for _ in range(last_level - section["level"] + 1):
            level_list.pop()
        parent = level_list[-1]
        level_list.append(
            pdfmerger.addBookmark(section["name"], page_idx, parent))
        pdfmerger.append(open(section["single_pdf_path"], 'rb'))

        page_idx += section["page_num"]
        last_level = section["level"]

    with open("{}.pdf".format(book_name), 'wb') as f_output:
        pdfmerger.write(f_output)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: pdf_merger.py Proyecto: ihmeuw/covid-historical-model

def pdf_merger(pdfs: List, location_names: List, parent_names: List, levels: List, outfile: str):
    # how are inputs specified
    assert all([i.endswith('.pdf') for i in pdfs]), 'Not all files passed into `pdfs` are actual PDFs.'
    indir = '/'.join(pdfs[0].split('/')[:-1])

    # compile PDFs
    merger = PdfFileMerger()
    for i, (pdf, location_name, parent_name, level) in enumerate(zip(pdfs, location_names, parent_names, levels)):
        merger.append(pdf)
        if parent_name in location_names and level > 0:
            if parent_name == location_name:
                merger.addBookmark(f'{location_name} ', i, parent_name)
            else:
                merger.addBookmark(location_name, i, parent_name)
        else:
            merger.addBookmark(location_name, i)

    # get output file (if already exists, delete before writing new file)
    assert outfile.endswith('.pdf'), 'Provided output file is not a PDF.'
    if os.path.exists(outfile):
        os.remove(outfile)

    # write compiled PDF
    merger.write(outfile)
    merger.close()

Ejemplo n.º 9

0

Mostrar archivo

Archivo: pdf_pager.py Proyecto: MarkStefanovic/pdf_pager

    def add_bookmarks(self, input_path: str, output_path: str) -> None:
        """This method loops through pages in a document and add [nested] bookmarks.

        PyPDF2 PdfFileWriter documentation: https://pythonhosted.org/PyPDF2/PdfFileWriter.html
        """

        merger = PdfFileMerger()
        input_pdf = open(input_path, "rb")
        reader = PdfFileReader(input_pdf)
        total_pages = reader.getNumPages()
        output_pdf = open(output_path, "wb")

        merger.append(fileobj=input_pdf, pages=(0, total_pages))
        logger.info('Bookmarks: {}'.format(self.bookmarks))
        page_numbers = self.get_page_numbers()
        parent_bookmarks = {}
        for val in self.bookmarks:
            if val.parent_bookmark_name:
                if val.parent_bookmark_name not in parent_bookmarks.keys():
                    parent_bookmarks[val.parent_bookmark_name] = merger.addBookmark(
                        title=val.parent_bookmark_name
                        , pagenum=page_numbers.get(val.input_path)
                        , parent=None
                    )
                merger.addBookmark(
                    title=val.bookmark_name
                    , pagenum=val.page_number
                    , parent=parent_bookmarks.get(val.parent_bookmark_name)
                )
            else:
                if val.bookmark_name:
                    merger.addBookmark(
                        title=val.bookmark_name
                        , pagenum=val.page_number
                        , parent=None
                    )
        logger.info('Parent bookmarks: {}'.format(parent_bookmarks))
        merger.write(output_pdf)
        input_pdf.close()
        output_pdf.close()

Ejemplo n.º 10

0

Mostrar archivo

Archivo: pdf.py Proyecto: baris8/TeX-To-PDF

    def bookmarks_hinzufuegen(self, bookmarks):
        log.info("Füge dem PDF folgende Bookmarks hinzu: %r", bookmarks)
        if not self.pdf_bytes:
            log.warning(
                "Es existieren noch keine PDF-Bytes, "
                "daher wird das PDF kompiliert"
            )
            self.kompiliere_pdf()

        pdf = BytesIO(self.pdf_bytes)

        output = PdfFileMerger()
        output.append(pdf)

        for text, seite in bookmarks:
            log.debug(
                "Füge folgende Bookmark auf Seite %s hinzu: %s",
                seite, text
            )
            output.addBookmark(text, seite, None)

        with BytesIO() as neues_pdf:
            output.write(neues_pdf)
            self.pdf_bytes = neues_pdf.getvalue()

Ejemplo n.º 11

0

Mostrar archivo

def assignToSpot(index):
    errorList = []
    monthlyReport = PdfFileMerger()
    monthlyReportLength = 0
    DistrictResult = monthlyReport.addBookmark(
        "District Packages", 0, parent=None
    )  # change this later if added more parent bookmarks (Pending Importance: Medium)
    DistrictName = None
    #  above this
    dex = index
    for x in range(0, len(dex)):
        # var = x[0]
        drawProgressBar(int(50 + x * 100 / len(dex) / 2), 50)
        doneWithOne = True
        for i in range(0, len(dex[x][1])):

            if i == 1 and doneWithOne:
                i = 2
            if i == 2 and not doneWithOne:
                i = 1
            for z in range(0, len(dex[x][1][i])):
                root = dex[x][1][i][z]
                foundFile = False
                firstPageOfWriteUpMerged = False
                firstPageOfTypeMerged = False
                if root == "7Harlan" and i == 2:  # Introduce Canada and WPE exceptions (Pending importance : High)
                    root = "Harlan"
                if root == "GSW - Large Projects":
                    root = "Large Projects"
                if root == "MARHE":
                    root = "MAR  "
                    # print root
                for s in range(0, len(indexList)):
                    if indexList[s][0] == i:
                        if indexList[s][1] == root:
                            # print indexList[s]
                            # print ("found it")
                            interestFile = getInterestFile(s, storageLocation)
                            if interestFile is not None:
                                fp = interestFile[1]
                                interestLoc = interestFile[0]
                                monthlyReport.append(fileobj=fp,
                                                     pages=(interestLoc,
                                                            interestLoc + 1),
                                                     import_bookmarks=False)
                                monthlyReportLength = monthlyReportLength + 1
                                foundFile = True

                                if i == 0 and z == 0 and not firstPageOfWriteUpMerged:
                                    DistrictName = monthlyReport.addBookmark(
                                        str(root), monthlyReportLength - 1,
                                        DistrictResult)
                                    firstPageOfWriteUpMerged = True
                                if DistrictName is not None:
                                    if i == 1 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "SG&A", monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                                    if i == 2 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "District Package",
                                            monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                                    if i == 3 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "Job Profitability",
                                            monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                                    if i == 4 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "A/R", monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                                    if i == 5 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "Bid Logs",
                                            monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                                    if i == 0 and z == 0 and not firstPageOfTypeMerged:
                                        monthlyReport.addBookmark(
                                            "Write Up",
                                            monthlyReportLength - 1,
                                            DistrictName)
                                        firstPageOfTypeMerged = True
                if not foundFile:
                    # print ("Can't find ", root)
                    if i == 5:  # This whole thing needs to be done in a for loop again, come back later :D (Pending Importance = Low)
                        noneFile = getInterestFile(5, noneLocation)

                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[5],
                             "at page ", monthlyReportLength))
                    if i == 3:
                        noneFile = getInterestFile(3, noneLocation)

                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[3],
                             "at page ", monthlyReportLength))
                    if i == 4:
                        noneFile = getInterestFile(4, noneLocation)

                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[4],
                             "at page ", monthlyReportLength))
                    if i == 2:
                        noneFile = getInterestFile(1, noneLocation)

                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[1],
                             "at page ", monthlyReportLength))
                    if i == 1:
                        noneFile = getInterestFile(2, noneLocation)

                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[2],
                             "at page ", monthlyReportLength))
                    if i == 0:
                        noneFile = getInterestFile(0, noneLocation)
                        errorList.append(
                            ("Can't find ", root, " in category ", typeName[0],
                             "at page ", monthlyReportLength))

                    monthlyReportLength = monthlyReportLength + 1
                    fp2 = noneFile[1]
                    intLoc = noneFile[0]
                    monthlyReport.append(fp2,
                                         pages=(intLoc, intLoc + 1),
                                         import_bookmarks=False)
            if i != 0:
                doneWithOne = False
    for x in errorList:
        print "\n", x
    return monthlyReport

Ejemplo n.º 12

0

Mostrar archivo

Archivo: slidesmerge.py Proyecto: tintincita/RprogCoursera

from PyPDF2 import PdfFileMerger, PdfFileReader
import os

os.chdir('slide')
merger = PdfFileMerger()
label = 0
for filename in sorted(os.listdir('.'),key=lambda k:int(k.split('.')[0])):
    with open(filename,'rb') as f:
        foo = PdfFileReader(f)
        p = foo.getNumPages()
        merger.append(foo)
        merger.addBookmark(filename[:-4], label, parent=None)
        label += p
        print('{}，共{}页'.format(filename, p))
merger.write(r"..\document-output.pdf")

Ejemplo n.º 13

0

Mostrar archivo

def collect(project_folder, out_filename):
    """Collect PDF files to main document.
    Function searches for PDF files whose names satisfy the conditions,
    and collect it to the main PDF document.
    The title of the Cover Page must contain a “титул” or “обложка”.
    The name of the Information and Certification Sheet should contain "УЛ", "ИУЛ", "Информационно-удостоверяющий лист".
    The name of the Change Registration Table should include a “таблица регистрации изменений".
    The title of the Main document of the sheet should contain “ПЗ” or “Пояснительная записка”.
    
    Arguments:
        project_folder {str} -- Path to directory with PDF files
    """
    project_folder = Path(str(project_folder)).resolve()

    # List of PDF files paths
    files_list = sorted(Path(project_folder).glob("*.pdf"))
    files_list_str = list(str(s) for s in files_list)

    # Regular expressions for determine files type
    info_cert_page_re = re.compile(
        r"УЛ|ИУЛ|Информационно-удостоверяющий лист|информационно-удостоверяющий лист"
    )
    title_page_re = re.compile(r"титул|обложка", re.IGNORECASE)
    changes_page_re = re.compile(r"таблица регистрации изменений",
                                 re.IGNORECASE)
    main_doc_re = re.compile(r"ПЗ|Пояснительная записка|пояснительная записка")

    # Get files paths
    info_cert_path = list(filter(info_cert_page_re.search, files_list_str))
    title_path = list(filter(title_page_re.search, files_list_str))
    changes_path = list(filter(changes_page_re.search, files_list_str))
    main_doc_path = list(filter(main_doc_re.search, files_list_str))

    merger = PdfFileMerger()
    main_doc_pdf = PdfFileReader(open(main_doc_path[0], "rb"))

    # Append title pages if exist to main document, or just append main document
    if title_path:
        title_page_pdf = PdfFileReader(open(title_path[0], "rb"))
        title_page_num = title_page_pdf.getNumPages()

        main_doc_page_num = main_doc_pdf.getNumPages()
        merger.append(fileobj=title_page_pdf)
        print("Title page appended to document.")
        merger.merge(position=2,
                     fileobj=main_doc_pdf,
                     pages=(title_page_num, main_doc_page_num))
        print("Main doc appended to document.")
    else:
        merger.append(fileobj=main_doc_pdf)
        print("Main doc appended to document.")

    # Append Table of changes page to main document
    if changes_path:
        changes_page_pdf = PdfFileReader(open(changes_path[0], "rb"))
        merger.append(fileobj=changes_page_pdf)
        print("Change Registration Table page appended to document.")

        # Adding bookmark to inserted Table of changes page
        merger.addBookmark("Таблица регистрации изменений",
                           main_doc_pdf.getNumPages())

    output = open(out_filename, "wb")
    merger.write(output)
    merger.close()

Ejemplo n.º 14

0

Mostrar archivo

Archivo: pdf-bookmarks.py Proyecto: michaelamie/pdf-bookmarks

#!/usr/bin/env python

import os.path, json
from argparse import ArgumentParser
from PyPDF2 import PdfFileMerger

if __name__ == '__main__':

    ap = ArgumentParser()
    ap.add_argument('pdf', help='the input PDF')
    ap.add_argument('bookmarks', help='JSON list of bookmarks')
    ap.add_argument('offset', help='page offset to use')

    pdf_path = ap.parse_args().pdf
    bookmarks_path = ap.parse_args().bookmarks
    page_offset = ap.parse_args().offset

    output_pdf = PdfFileMerger()

    with open(pdf_path, 'rb') as pdf_file:
        output_pdf.merge(position=0, fileobj=pdf_file)

    with open(bookmarks_path, 'r') as bookmarks_file:
        bookmarks = json.load(bookmarks_file)
    for name, page in bookmarks:
        output_pdf.addBookmark(name, page + int(page_offset) - 2)

    output_pdf_path = os.path.splitext(pdf_path)[0] + '-bookmarked.pdf'
    with open(output_pdf_path, 'wb') as pdf_file:
        output_pdf.write(pdf_file)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: tasks.py Proyecto: aolkin/hrdc

def render_and_send_app(pk):
    app = import_module('venueapp.models').Application.objects.get(pk=pk)
    cover = import_module('venueapp.views').make_cover_page(app)
    max_bytes = config.get_int("max_inline_attachment_bytes", 0)
    for venue in app.venues.all():
        html = render_to_string(
            "venueapp/pdf_app.html", {
                "object": app,
                "cover": cover,
                "venue": venue,
                "logo": finders.find("logo.png"),
                "pdf": True,
                "max_attachment_size": max_bytes,
            })
        doc = HTML(string=html, base_url=settings.SITE_URL).render()
        bookmark_tree = doc.make_bookmark_tree()
        bookmarks = list([_Bookmark(i) for i in bookmark_tree])
        app_pdf = BytesIO()
        doc.write_pdf(app_pdf)
        merger = PdfFileMerger()
        merger.append(app_pdf, import_bookmarks=False)
        for staff in app.staffmember_set.signed_on().filter(
                role__accepts_attachment=True).exclude(
                    Q(attachment=None) | Q(attachment="")):
            name = "{} {}'s ".format(staff.role_name, staff.person)
            try:
                if staff.attachment.size < max_bytes:
                    reader = PdfFileReader(staff.attachment.open(), False)
                    attachment_pages = reader.getNumPages()
                    page = None
                    for i, bookmark in enumerate(bookmarks):
                        if bookmark.label == name + "Supplement":
                            page = bookmarks[i + 1].location
                    if page:
                        merger.merge(page,
                                     staff.attachment.open(),
                                     import_bookmarks=False)
                        for i in bookmarks:
                            if i.location >= page:
                                i.location += attachment_pages
                    else:
                        merger.append(staff.attachment.open(),
                                      bookmark=name + "Attachment",
                                      import_bookmarks=False)
            except Exception as e:
                tb.print_exc()
        for i in bookmarks:
            merger.addBookmark(i.label, i.location)
        pdf = BytesIO()  # open("/tmp/{}.pdf".format(venue.venue), "wb")
        merger.write(pdf)
        msg = render_msg("venueapp/email/submission.html",
                         locals(),
                         to=[
                             "{} <{}>".format(i.get_full_name(False), i.email)
                             for i in venue.managers.all()
                         ],
                         cc=[
                             "{} <{}>".format(i.get_full_name(False), i.email)
                             for i in app.show.staff.all()
                         ],
                         subject="Application for {} in {} Submitted".format(
                             app, venue.venue),
                         tags=["venueapp", "venueapp-submission"])
        msg.attach("{} - {}.pdf".format(app, venue), BytesIO(pdf.getbuffer()),
                   "application/pdf")
        try:
            msg.send()
        except Exception as err:
            LOGGER.error("Application submission sending failed: {}".format(
                repr(err)))
            tb.print_exc()
        finally:
            merger.close()

Ejemplo n.º 16

0

Mostrar archivo

Archivo: slidesmerge.py Proyecto: tintincita/RprogCoursera

from PyPDF2 import PdfFileMerger, PdfFileReader
import os

os.chdir('slide')
merger = PdfFileMerger()
label = 0
for filename in sorted(os.listdir('.'), key=lambda k: int(k.split('.')[0])):
    with open(filename, 'rb') as f:
        foo = PdfFileReader(f)
        p = foo.getNumPages()
        merger.append(foo)
        merger.addBookmark(filename[:-4], label, parent=None)
        label += p
        print('{}，共{}页'.format(filename, p))
merger.write(r"..\document-output.pdf")