def run(directory, output): """ Compiles a directory of HTML into a single pdf. :param directory: Directory to search. :param output: filename to write to. """ files = [f for f in os.listdir(directory) if f[-5:] == '.html'] files.sort(key=lambda x: int(x[:4])) with ChargingBar('Converting', max=len(files)) as progress: for file in files: html = weasyprint.HTML(filename=f'{directory}/{file}') html.write_pdf(target=f"{directory}/{file[:-5]}.pdf") progress.next() merger = PdfFileMerger() with ChargingBar('Merging', max=len(files) + 1) as progress: for file in files: reader = open(f"{directory}/{file[:-5]}.pdf") merger.append(reader, bookmark=f"{file[5:-5]}", import_bookmarks=False) progress.next() merger.write(output) progress.next() for file in files: os.remove(f"{directory}/{file[:-5]}.html") os.remove(f"{directory}/{file[:-5]}.pdf")
def render(self, form_data): merger = PdfFileMerger() o = PdfTicketOutput(self.event) qs = OrderPosition.objects.filter( order__event=self.event, order__status=Order.STATUS_PAID).select_related( 'order', 'item', 'variation') for op in qs: if op.addon_to_id and not self.event.settings.ticket_download_addons: continue if not op.item.admission and not self.event.settings.ticket_download_nonadm: continue with language(op.order.locale): buffer = BytesIO() p = o._create_canvas(buffer) layout = o.layout_map.get(op.item_id, o.default_layout) o._draw_page(layout, p, op, op.order) p.save() outbuffer = o._render_with_background(layout, buffer) merger.append(ContentFile(outbuffer.read())) outbuffer = BytesIO() merger.write(outbuffer) merger.close() outbuffer.seek(0) return '{}_tickets.pdf'.format( self.event.slug), 'application/pdf', outbuffer.read()
def _create_pdf_from_rtf_files(self): pdfs = [] self.progress.emit(0) for count, file in enumerate(self.files): changed_file = change_filetype(file, "pdf", self.engine) pdfs.append(changed_file) self.progress.emit(count + 1) merger = PdfFileMerger() pages = [] chapters = [] for file in pdfs: read_pdf = PdfFileReader(file) txt = read_pdf.getPage(0) page_content = txt.extractText() try: chapter = helper_functions.get_chapter_from_pdf_txt( page_content) chapters.append(chapter) except: chapter = os.path.basename(file) chapter = chapter.split(".")[0] chapter = chapter.replace("_", " ") chapters.append(chapter) pages.append(read_pdf.getNumPages()) merger.append(fileobj=file) self.pages = pages self.chapters = chapters if not self.create_toc: merger.write(self.master_file_name) else: merger.write("tmp.pdf") merger.close() self.trash += pdfs
def PDFsMerger(pathx: str, pathy: str, dest_folder: str): merger = PdfFileMerger() merger.append(pathx) merger.append(pathy) name_pathx = pathx.split("\\")[-1].split(".")[0] name_pathy = pathy.split("\\")[-1].split(".")[0] merger.write("{}\\{}_{}_merged.pdf".format(dest_folder, name_pathx, name_pathy)) merger.close()
def render(self, form_data): merger = PdfFileMerger() qs = OrderPosition.objects.filter( order__event__in=self.events ).prefetch_related( 'answers', 'answers__question' ).select_related('order', 'item', 'variation', 'addon_to') if form_data.get('include_pending'): qs = qs.filter(order__status__in=[Order.STATUS_PAID, Order.STATUS_PENDING]) else: qs = qs.filter(order__status__in=[Order.STATUS_PAID]) if form_data.get('order_by') == 'name': qs = qs.order_by('attendee_name_cached', 'order__code') elif form_data.get('order_by') == 'code': qs = qs.order_by('order__code') elif form_data.get('order_by', '').startswith('name:'): part = form_data['order_by'][5:] qs = qs.annotate( resolved_name=Coalesce('attendee_name_parts', 'addon_to__attendee_name_parts', 'order__invoice_address__name_parts') ).annotate( resolved_name_part=JSONExtract('resolved_name', part) ).order_by( 'resolved_name_part' ) o = PdfTicketOutput(Event.objects.none()) for op in qs: if not op.generate_ticket: continue if op.order.event != o.event: o = PdfTicketOutput(op.event) with language(op.order.locale, o.event.settings.region): layout = o.layout_map.get( (op.item_id, op.order.sales_channel), o.layout_map.get( (op.item_id, 'web'), o.default_layout ) ) outbuffer = o._draw_page(layout, op, op.order) merger.append(ContentFile(outbuffer.read())) outbuffer = BytesIO() merger.write(outbuffer) merger.close() outbuffer.seek(0) if self.is_multievent: return '{}_tickets.pdf'.format(self.events.first().organizer.slug), 'application/pdf', outbuffer.read() else: return '{}_tickets.pdf'.format(self.event.slug), 'application/pdf', outbuffer.read()
def render(self, form_data): merger = PdfFileMerger() o = PdfTicketOutput(self.event) qs = OrderPosition.objects.filter( order__event=self.event ).prefetch_related( 'answers', 'answers__question' ).select_related('order', 'item', 'variation', 'addon_to') if form_data.get('include_pending'): qs = qs.filter(order__status__in=[Order.STATUS_PAID, Order.STATUS_PENDING]) else: qs = qs.filter(order__status__in=[Order.STATUS_PAID]) if form_data.get('order_by') == 'name': qs = qs.order_by('attendee_name_cached', 'order__code') elif form_data.get('order_by') == 'code': qs = qs.order_by('order__code') elif form_data.get('order_by', '').startswith('name:'): part = form_data['order_by'][5:] qs = qs.annotate( resolved_name=Coalesce('attendee_name_parts', 'addon_to__attendee_name_parts', 'order__invoice_address__name_parts') ).annotate( resolved_name_part=JSONExtract('resolved_name', part) ).order_by( 'resolved_name_part' ) for op in qs: if op.addon_to_id and not self.event.settings.ticket_download_addons: continue if not op.item.admission and not self.event.settings.ticket_download_nonadm: continue with language(op.order.locale): buffer = BytesIO() p = o._create_canvas(buffer) layout = o.layout_map.get( (op.item_id, op.order.sales_channel), o.layout_map.get( (op.item_id, 'web'), o.default_layout ) ) o._draw_page(layout, p, op, op.order) p.save() outbuffer = o._render_with_background(layout, buffer) merger.append(ContentFile(outbuffer.read())) outbuffer = BytesIO() merger.write(outbuffer) merger.close() outbuffer.seek(0) return '{}_tickets.pdf'.format(self.event.slug), 'application/pdf', outbuffer.read()
def create_toc_pdf_and_append_it(self): link_locations, page_locations = self._create_toc_pdf_for_rtf() link_locations = [ change_coordinates(x, self.toc_orientation) for x in link_locations ] # Change the coordinate merger = PdfFileMerger() merger.append("toc.pdf") merger.append("tmp.pdf") merger.write("tmp2.pdf") merger.close() self._create_hyperlinks(link_locations, page_locations) self.trash += ["tmp.pdf", "tmp2.pdf", "toc.pdf"]
def prepare(self, address_override=None): """Prepare the PDF to be sent by appending attachments""" # generate the pdf and merge all pdf attachments # keep track of any problematic attachments self.generate() merger = PdfFileMerger(strict=False) merger.append(BytesIO(self.output(dest="S").encode("latin-1"))) total_pages = self.page files = [] for file_ in self.comm.files.all(): if file_.get_extension() == "pdf": try: pages = PdfFileReader(file_.ffile).getNumPages() if pages + total_pages > self.page_limit: # too long, skip files.append((file_, "skipped", pages)) else: merger.append(file_.ffile) files.append((file_, "attached", pages)) total_pages += pages except (PdfReadError, ValueError): files.append((file_, "error", 0)) else: files.append((file_, "skipped", 0)) single_pdf = BytesIO() try: self._resize_pages(merger.pages) merger.write(single_pdf) except PdfReadError: return (None, None, files, None) # create the mail communication object address = address_override if address_override else self.comm.foia.address mail, _ = MailCommunication.objects.update_or_create( communication=self.comm, defaults={ "to_address": address, "sent_datetime": timezone.now() }, ) single_pdf.seek(0) mail.pdf.save("{}.pdf".format(self.comm.pk), ContentFile(single_pdf.read())) # return to begining of merged pdf before returning single_pdf.seek(0) return (single_pdf, total_pages, files, mail)
def render(self, form_data): merger = PdfFileMerger() o = PdfTicketOutput(self.event) qs = OrderPosition.objects.filter( order__event=self.event).prefetch_related( 'answers', 'answers__question').select_related('order', 'item', 'variation', 'addon_to') if form_data.get('include_pending'): qs = qs.filter( order__status__in=[Order.STATUS_PAID, Order.STATUS_PENDING]) else: qs = qs.filter(order__status__in=[Order.STATUS_PAID]) if form_data.get('order_by') == 'name': qs = qs.order_by('attendee_name_cached', 'order__code') elif form_data.get('order_by') == 'code': qs = qs.order_by('order__code') elif form_data.get('order_by', '').startswith('name:'): part = form_data['order_by'][5:] qs = qs.annotate(resolved_name=Coalesce( 'attendee_name_parts', 'addon_to__attendee_name_parts', 'order__invoice_address__name_parts')).annotate( resolved_name_part=JSONExtract( 'resolved_name', part)).order_by('resolved_name_part') for op in qs: if op.addon_to_id and not self.event.settings.ticket_download_addons: continue if not op.item.admission and not self.event.settings.ticket_download_nonadm: continue with language(op.order.locale): buffer = BytesIO() p = o._create_canvas(buffer) layout = o.layout_map.get(op.item_id, o.default_layout) o._draw_page(layout, p, op, op.order) p.save() outbuffer = o._render_with_background(layout, buffer) merger.append(ContentFile(outbuffer.read())) outbuffer = BytesIO() merger.write(outbuffer) merger.close() outbuffer.seek(0) return '{}_tickets.pdf'.format( self.event.slug), 'application/pdf', outbuffer.read()
def prepare(self): """Prepare the PDF to be sent by appending attachments""" # generate the pdf and merge all pdf attachments # keep track of any problematic attachments self.generate() merger = PdfFileMerger(strict=False) merger.append(StringIO(self.output(dest='S'))) files = [] for file_ in self.comm.files.all(): if file_.get_extension() == 'pdf': try: pages = PdfFileReader(file_.ffile).getNumPages() merger.append(file_.ffile) files.append((file_, 'attached', pages)) except (PdfReadError, ValueError): files.append((file_, 'error', 0)) else: files.append((file_, 'skipped', 0)) single_pdf = StringIO() try: merger.write(single_pdf) except PdfReadError: return (None, None, files) # create the mail communication object mail, _ = MailCommunication.objects.update_or_create( communication=self.comm, defaults={ 'to_address': self.comm.foia.address, 'sent_datetime': timezone.now(), } ) single_pdf.seek(0) mail.pdf.save( '{}.pdf'.format(self.comm.pk), ContentFile(single_pdf.read()), ) # return to begining of merged pdf before returning single_pdf.seek(0) return (single_pdf, self.page, files, mail)
def mergePdfs(self, paths, fileName =None): """ Takes a list of paths to existing PDF files and merges them into a single pdf with the given file name. [fileName] :: String :: None The name of the file to be written. If not specified, a file name will be created using the name of this class. """ merger = PdfFileMerger() for p in paths: with open(p, 'rb') as f: merger.append(PdfFileReader(f)) if not fileName: fileName = '%s-Report.pdf' % self.__class__.__name__ if not StringUtils.toStr2(fileName).endswith('.pdf'): fileName += '.pdf' with open(self.getPath(fileName, isFile=True), 'wb') as f: merger.write(f)
print "\t-f[int]\tFiles you want to merge. [int] is the page it shall be (for the right order)" print "\t-o\tThe output file." exit(0) i = 0 documents = {} outfile = "" while i < len(argv): if argv[i].startswith('-f'): documents[argv[i].strip('-f')] = argv[i + 1] if argv[i].startswith('-o'): outfile = argv[i + 1] i += 1 for pdf in documents: if not isfile(documents[pdf]): print "Cannot find the file '%s'" % documents[pdf] exit(0) if isfile(outfile): print "The file '%s' already exists." % outfile pdfmerger = PdfFileMerger(False) for pdf in documents: pdfmerger.merge(int(pdf) - 1, documents[pdf]) pdfmerger.write(outfile) pdfmerger.close() print "Done."
from PyPDF2.merger import PdfFileMerger import os pdfs = [] merger = PdfFileMerger() for file in os.scandir(os.getcwd()): if file.path.endswith(".pdf"): # check if path leads to a pdf file_name = file.path.split(os.sep)[-1] if 'result_' not in file_name: # Save all pdfs except 'result_' ones pdfs.append(file_name) print('list of pdfs to merge: ', pdfs) for pdf in pdfs: merger.append(pdf) for n in range(1, 10): output_pdf_name = "result_" + str(n) + ".pdf" # result_1, result_2 ... path_check = "./" + output_pdf_name # './result_1.pdf' ... if not os.path.exists(path_check): # Check if pdf exists merger.write(output_pdf_name) merger.close() print("Merged files into ", output_pdf_name) break else: print(output_pdf_name, "already exists") print("\nDone.")