def merge_pdf(path: str, output_filename: str, bookmark_separator: str = "", bookmark_start_index: int = 1, password: str = "") -> None: """ 合并一个文件里所有的pdf :param str path: 文件夹路径 :param str output_filename: 输出文件名(包含路径) :param str bookmark_separator: 用来分割每一个pdf的书签格式, 如果没有会按照文件名命名书签 :param int bookmark_start_index: 书签后缀开始的序号 :param str password: 如果pdf有加密,这里填pdf的密码 """ if os.path.exists(output_filename): os.remove(output_filename) os.chmod(path, stat.S_IRWXU) # ensure we have permission output_pdf = PdfFileMerger() output_page_num = 0 for index, pdf_path_with_name in enumerate(get_pdf_names(path), bookmark_start_index): print(pdf_path_with_name) with open(pdf_path_with_name, "rb") as pdf: content = PdfFileReader(pdf) if content.isEncrypted: content.decrypt(password) # add bookmark at the beginning of each merged pdf if bookmark_separator is not None if bookmark_separator: output_pdf.addBookmark(bookmark_separator + str(index), output_page_num) else: output_pdf.addBookmark(pdf_path_with_name.split("\\")[-1].split(".")[0], output_page_num) output_pdf.append(content) output_page_num += content.numPages with codecs.open(output_filename, "wb") as f: output_pdf.write(f) print("mission complete")
def merge_pdf(): # 创建一个用来合并文件的实例 pdf_merger = PdfFileMerger() # 首先添加一个Week1_1.pdf文件 pdf_merger.append('Week1_1.pdf') # 然后在第0页后面添加ex1.pdf文件 pdf_merger.merge(0, 'ex1.pdf') # 添加书签 pdf_merger.addBookmark('这是一个书签', 1) # 将其写入到文件中 pdf_merger.write('merge_pdf.pdf')
def merger(files: list, out): pdf_merger = PdfFileMerger() for ind, path in enumerate(files): pdf_merger.append(path) title = os.path.basename(path).split('.', 1)[0] pdf_merger.addBookmark(title, ind, parent=None) pdf_merger.setPageLayout(layout='/TwoColumnLeft') with open(out, 'wb') as fileobj: pdf_merger.write(fileobj)
def bind(self, *files, outputDir="./"): clean = [] merger = PdfFileMerger() output = open("%s/output-binder.pdf" % outputDir, "wb") for num, file in enumerate(files): if "blank-page" in str(file): clean.append(file) merger.append(file, import_bookmarks=False) merger.addBookmark("page-%s" % num, num, parent=None) merger.write(output) merger.close() output.close() [os.remove(i) for i in clean]
def merge_pdf(dir, titlelist): pdf_manage = PdfFileMerger() for root, dir, filenames in os.walk(dir): filenames_sorted = sorted( filenames, key=lambda i: int(re.match(r'(\d+)', i).group())) merge_page_count = 0 for filename, title in zip(filenames_sorted, title_list): file = root + filename f_input = PdfFileReader(open(file, 'rb')) pdf_manage.append(f_input) pdf_manage.addBookmark(title, merge_page_count) #添加标签 title_page_count = f_input.getNumPages() merge_page_count += title_page_count f_output = open(dir_save_pdf + 'merge.pdf', 'wb') pdf_manage.write(f_output)
def mergeContents(self,tocFullPath,packetMeta,bookmarks): merger = PdfFileMerger() if not self.configDict.get('omitTOC',False): merger.append(open(tocFullPath),tocFullPath) if packetMeta: for packetgroup in packetMeta.get('groups',[]): for item in packetgroup.get('items',[]): if item.get('file_name',''): merger.append(open(item.get('file_name','')),item.get('file_name','')) if not self.configDict.get('omitTOC',False): merger.bookmarks = [] for bookmark in bookmarks: parent_ref = merger.addBookmark(bookmark.get("description",''),bookmark.get('page',1)-1) for child in bookmark.get("children",[]): merger.addBookmark(child.get("description",''),child.get('page',1)-1,parent_ref) output = open(self.packetPath, "wb") merger.write(output) output.close() merger.close()
def merge_and_bookmark(contents_list, book_name): pdfmerger = PdfFileMerger() page_idx = 0 last_level = -1 level_list = [pdfmerger.addBookmark(book_name, page_idx)] # Like a depth meter print("Merging and bookmarking PDFs:") for section in tqdm(contents_list): for _ in range(last_level - section["level"] + 1): level_list.pop() parent = level_list[-1] level_list.append( pdfmerger.addBookmark(section["name"], page_idx, parent)) pdfmerger.append(open(section["single_pdf_path"], 'rb')) page_idx += section["page_num"] last_level = section["level"] with open("{}.pdf".format(book_name), 'wb') as f_output: pdfmerger.write(f_output)
def pdf_merger(pdfs: List, location_names: List, parent_names: List, levels: List, outfile: str): # how are inputs specified assert all([i.endswith('.pdf') for i in pdfs]), 'Not all files passed into `pdfs` are actual PDFs.' indir = '/'.join(pdfs[0].split('/')[:-1]) # compile PDFs merger = PdfFileMerger() for i, (pdf, location_name, parent_name, level) in enumerate(zip(pdfs, location_names, parent_names, levels)): merger.append(pdf) if parent_name in location_names and level > 0: if parent_name == location_name: merger.addBookmark(f'{location_name} ', i, parent_name) else: merger.addBookmark(location_name, i, parent_name) else: merger.addBookmark(location_name, i) # get output file (if already exists, delete before writing new file) assert outfile.endswith('.pdf'), 'Provided output file is not a PDF.' if os.path.exists(outfile): os.remove(outfile) # write compiled PDF merger.write(outfile) merger.close()
def add_bookmarks(self, input_path: str, output_path: str) -> None: """This method loops through pages in a document and add [nested] bookmarks. PyPDF2 PdfFileWriter documentation: https://pythonhosted.org/PyPDF2/PdfFileWriter.html """ merger = PdfFileMerger() input_pdf = open(input_path, "rb") reader = PdfFileReader(input_pdf) total_pages = reader.getNumPages() output_pdf = open(output_path, "wb") merger.append(fileobj=input_pdf, pages=(0, total_pages)) logger.info('Bookmarks: {}'.format(self.bookmarks)) page_numbers = self.get_page_numbers() parent_bookmarks = {} for val in self.bookmarks: if val.parent_bookmark_name: if val.parent_bookmark_name not in parent_bookmarks.keys(): parent_bookmarks[val.parent_bookmark_name] = merger.addBookmark( title=val.parent_bookmark_name , pagenum=page_numbers.get(val.input_path) , parent=None ) merger.addBookmark( title=val.bookmark_name , pagenum=val.page_number , parent=parent_bookmarks.get(val.parent_bookmark_name) ) else: if val.bookmark_name: merger.addBookmark( title=val.bookmark_name , pagenum=val.page_number , parent=None ) logger.info('Parent bookmarks: {}'.format(parent_bookmarks)) merger.write(output_pdf) input_pdf.close() output_pdf.close()
def bookmarks_hinzufuegen(self, bookmarks): log.info("Füge dem PDF folgende Bookmarks hinzu: %r", bookmarks) if not self.pdf_bytes: log.warning( "Es existieren noch keine PDF-Bytes, " "daher wird das PDF kompiliert" ) self.kompiliere_pdf() pdf = BytesIO(self.pdf_bytes) output = PdfFileMerger() output.append(pdf) for text, seite in bookmarks: log.debug( "Füge folgende Bookmark auf Seite %s hinzu: %s", seite, text ) output.addBookmark(text, seite, None) with BytesIO() as neues_pdf: output.write(neues_pdf) self.pdf_bytes = neues_pdf.getvalue()
def assignToSpot(index): errorList = [] monthlyReport = PdfFileMerger() monthlyReportLength = 0 DistrictResult = monthlyReport.addBookmark( "District Packages", 0, parent=None ) # change this later if added more parent bookmarks (Pending Importance: Medium) DistrictName = None # above this dex = index for x in range(0, len(dex)): # var = x[0] drawProgressBar(int(50 + x * 100 / len(dex) / 2), 50) doneWithOne = True for i in range(0, len(dex[x][1])): if i == 1 and doneWithOne: i = 2 if i == 2 and not doneWithOne: i = 1 for z in range(0, len(dex[x][1][i])): root = dex[x][1][i][z] foundFile = False firstPageOfWriteUpMerged = False firstPageOfTypeMerged = False if root == "7Harlan" and i == 2: # Introduce Canada and WPE exceptions (Pending importance : High) root = "Harlan" if root == "GSW - Large Projects": root = "Large Projects" if root == "MARHE": root = "MAR " # print root for s in range(0, len(indexList)): if indexList[s][0] == i: if indexList[s][1] == root: # print indexList[s] # print ("found it") interestFile = getInterestFile(s, storageLocation) if interestFile is not None: fp = interestFile[1] interestLoc = interestFile[0] monthlyReport.append(fileobj=fp, pages=(interestLoc, interestLoc + 1), import_bookmarks=False) monthlyReportLength = monthlyReportLength + 1 foundFile = True if i == 0 and z == 0 and not firstPageOfWriteUpMerged: DistrictName = monthlyReport.addBookmark( str(root), monthlyReportLength - 1, DistrictResult) firstPageOfWriteUpMerged = True if DistrictName is not None: if i == 1 and z == 0 and not firstPageOfTypeMerged: monthlyReport.addBookmark( "SG&A", monthlyReportLength - 1, DistrictName) firstPageOfTypeMerged = True if i == 2 and z == 0 and not firstPageOfTypeMerged: monthlyReport.addBookmark( "District Package", monthlyReportLength - 1, DistrictName) firstPageOfTypeMerged = True if i == 3 and z == 0 and not firstPageOfTypeMerged: monthlyReport.addBookmark( "Job Profitability", monthlyReportLength - 1, DistrictName) firstPageOfTypeMerged = True if i == 4 and z == 0 and not firstPageOfTypeMerged: monthlyReport.addBookmark( "A/R", monthlyReportLength - 1, DistrictName) firstPageOfTypeMerged = True if i == 5 and z == 0 and not firstPageOfTypeMerged: monthlyReport.addBookmark( "Bid Logs", monthlyReportLength - 1, DistrictName) firstPageOfTypeMerged = True if i == 0 and z == 0 and not firstPageOfTypeMerged: monthlyReport.addBookmark( "Write Up", monthlyReportLength - 1, DistrictName) firstPageOfTypeMerged = True if not foundFile: # print ("Can't find ", root) if i == 5: # This whole thing needs to be done in a for loop again, come back later :D (Pending Importance = Low) noneFile = getInterestFile(5, noneLocation) errorList.append( ("Can't find ", root, " in category ", typeName[5], "at page ", monthlyReportLength)) if i == 3: noneFile = getInterestFile(3, noneLocation) errorList.append( ("Can't find ", root, " in category ", typeName[3], "at page ", monthlyReportLength)) if i == 4: noneFile = getInterestFile(4, noneLocation) errorList.append( ("Can't find ", root, " in category ", typeName[4], "at page ", monthlyReportLength)) if i == 2: noneFile = getInterestFile(1, noneLocation) errorList.append( ("Can't find ", root, " in category ", typeName[1], "at page ", monthlyReportLength)) if i == 1: noneFile = getInterestFile(2, noneLocation) errorList.append( ("Can't find ", root, " in category ", typeName[2], "at page ", monthlyReportLength)) if i == 0: noneFile = getInterestFile(0, noneLocation) errorList.append( ("Can't find ", root, " in category ", typeName[0], "at page ", monthlyReportLength)) monthlyReportLength = monthlyReportLength + 1 fp2 = noneFile[1] intLoc = noneFile[0] monthlyReport.append(fp2, pages=(intLoc, intLoc + 1), import_bookmarks=False) if i != 0: doneWithOne = False for x in errorList: print "\n", x return monthlyReport
from PyPDF2 import PdfFileMerger, PdfFileReader import os os.chdir('slide') merger = PdfFileMerger() label = 0 for filename in sorted(os.listdir('.'),key=lambda k:int(k.split('.')[0])): with open(filename,'rb') as f: foo = PdfFileReader(f) p = foo.getNumPages() merger.append(foo) merger.addBookmark(filename[:-4], label, parent=None) label += p print('{},共{}页'.format(filename, p)) merger.write(r"..\document-output.pdf")
def collect(project_folder, out_filename): """Collect PDF files to main document. Function searches for PDF files whose names satisfy the conditions, and collect it to the main PDF document. The title of the Cover Page must contain a “титул” or “обложка”. The name of the Information and Certification Sheet should contain "УЛ", "ИУЛ", "Информационно-удостоверяющий лист". The name of the Change Registration Table should include a “таблица регистрации изменений". The title of the Main document of the sheet should contain “ПЗ” or “Пояснительная записка”. Arguments: project_folder {str} -- Path to directory with PDF files """ project_folder = Path(str(project_folder)).resolve() # List of PDF files paths files_list = sorted(Path(project_folder).glob("*.pdf")) files_list_str = list(str(s) for s in files_list) # Regular expressions for determine files type info_cert_page_re = re.compile( r"УЛ|ИУЛ|Информационно-удостоверяющий лист|информационно-удостоверяющий лист" ) title_page_re = re.compile(r"титул|обложка", re.IGNORECASE) changes_page_re = re.compile(r"таблица регистрации изменений", re.IGNORECASE) main_doc_re = re.compile(r"ПЗ|Пояснительная записка|пояснительная записка") # Get files paths info_cert_path = list(filter(info_cert_page_re.search, files_list_str)) title_path = list(filter(title_page_re.search, files_list_str)) changes_path = list(filter(changes_page_re.search, files_list_str)) main_doc_path = list(filter(main_doc_re.search, files_list_str)) merger = PdfFileMerger() main_doc_pdf = PdfFileReader(open(main_doc_path[0], "rb")) # Append title pages if exist to main document, or just append main document if title_path: title_page_pdf = PdfFileReader(open(title_path[0], "rb")) title_page_num = title_page_pdf.getNumPages() main_doc_page_num = main_doc_pdf.getNumPages() merger.append(fileobj=title_page_pdf) print("Title page appended to document.") merger.merge(position=2, fileobj=main_doc_pdf, pages=(title_page_num, main_doc_page_num)) print("Main doc appended to document.") else: merger.append(fileobj=main_doc_pdf) print("Main doc appended to document.") # Append Table of changes page to main document if changes_path: changes_page_pdf = PdfFileReader(open(changes_path[0], "rb")) merger.append(fileobj=changes_page_pdf) print("Change Registration Table page appended to document.") # Adding bookmark to inserted Table of changes page merger.addBookmark("Таблица регистрации изменений", main_doc_pdf.getNumPages()) output = open(out_filename, "wb") merger.write(output) merger.close()
#!/usr/bin/env python import os.path, json from argparse import ArgumentParser from PyPDF2 import PdfFileMerger if __name__ == '__main__': ap = ArgumentParser() ap.add_argument('pdf', help='the input PDF') ap.add_argument('bookmarks', help='JSON list of bookmarks') ap.add_argument('offset', help='page offset to use') pdf_path = ap.parse_args().pdf bookmarks_path = ap.parse_args().bookmarks page_offset = ap.parse_args().offset output_pdf = PdfFileMerger() with open(pdf_path, 'rb') as pdf_file: output_pdf.merge(position=0, fileobj=pdf_file) with open(bookmarks_path, 'r') as bookmarks_file: bookmarks = json.load(bookmarks_file) for name, page in bookmarks: output_pdf.addBookmark(name, page + int(page_offset) - 2) output_pdf_path = os.path.splitext(pdf_path)[0] + '-bookmarked.pdf' with open(output_pdf_path, 'wb') as pdf_file: output_pdf.write(pdf_file)
def render_and_send_app(pk): app = import_module('venueapp.models').Application.objects.get(pk=pk) cover = import_module('venueapp.views').make_cover_page(app) max_bytes = config.get_int("max_inline_attachment_bytes", 0) for venue in app.venues.all(): html = render_to_string( "venueapp/pdf_app.html", { "object": app, "cover": cover, "venue": venue, "logo": finders.find("logo.png"), "pdf": True, "max_attachment_size": max_bytes, }) doc = HTML(string=html, base_url=settings.SITE_URL).render() bookmark_tree = doc.make_bookmark_tree() bookmarks = list([_Bookmark(i) for i in bookmark_tree]) app_pdf = BytesIO() doc.write_pdf(app_pdf) merger = PdfFileMerger() merger.append(app_pdf, import_bookmarks=False) for staff in app.staffmember_set.signed_on().filter( role__accepts_attachment=True).exclude( Q(attachment=None) | Q(attachment="")): name = "{} {}'s ".format(staff.role_name, staff.person) try: if staff.attachment.size < max_bytes: reader = PdfFileReader(staff.attachment.open(), False) attachment_pages = reader.getNumPages() page = None for i, bookmark in enumerate(bookmarks): if bookmark.label == name + "Supplement": page = bookmarks[i + 1].location if page: merger.merge(page, staff.attachment.open(), import_bookmarks=False) for i in bookmarks: if i.location >= page: i.location += attachment_pages else: merger.append(staff.attachment.open(), bookmark=name + "Attachment", import_bookmarks=False) except Exception as e: tb.print_exc() for i in bookmarks: merger.addBookmark(i.label, i.location) pdf = BytesIO() # open("/tmp/{}.pdf".format(venue.venue), "wb") merger.write(pdf) msg = render_msg("venueapp/email/submission.html", locals(), to=[ "{} <{}>".format(i.get_full_name(False), i.email) for i in venue.managers.all() ], cc=[ "{} <{}>".format(i.get_full_name(False), i.email) for i in app.show.staff.all() ], subject="Application for {} in {} Submitted".format( app, venue.venue), tags=["venueapp", "venueapp-submission"]) msg.attach("{} - {}.pdf".format(app, venue), BytesIO(pdf.getbuffer()), "application/pdf") try: msg.send() except Exception as err: LOGGER.error("Application submission sending failed: {}".format( repr(err))) tb.print_exc() finally: merger.close()
from PyPDF2 import PdfFileMerger, PdfFileReader import os os.chdir('slide') merger = PdfFileMerger() label = 0 for filename in sorted(os.listdir('.'), key=lambda k: int(k.split('.')[0])): with open(filename, 'rb') as f: foo = PdfFileReader(f) p = foo.getNumPages() merger.append(foo) merger.addBookmark(filename[:-4], label, parent=None) label += p print('{},共{}页'.format(filename, p)) merger.write(r"..\document-output.pdf")