def pdf_metadata_save(pdf_file, metadata, substitute_all_metadata = False, make_backup = True): if type(make_backup) is str: bak_file = make_backup else: bak_file = os.path.splitext(pdf_file)[0] + ".bak" os.rename(pdf_file, bak_file) with open(bak_file, 'rb') as fin: pdf_in = PdfFileReader(fin) writer = PdfFileWriter() for page in range(pdf_in.getNumPages()): writer.addPage(pdf_in.getPage(page)) infoDict = writer._info.getObject() info = pdf_in.documentInfo if not substitute_all_metadata: for key in info: #infoDict.update({NameObject(key): createStringObject(info[key])}) infoDict.update({key: info[key]}) for key in metadata: infoDict.update({NameObject('/' + key): createStringObject(str(metadata[key]))}) with open(pdf_file, 'wb') as fout: writer.write(fout) if make_backup == False: os.unlink(bak_file)
def PDFMerge(savePath, pdfPath, watermarkPdfPath): # pdf파일 불러오기 pdfFile = open(pdfPath, 'rb') pdfReader = PdfFileReader(pdfFile, strict=False) # 워터마크 PDF파일 불러오기 watermarkPdfFile = open(watermarkPdfPath, 'rb') watermarkPdf = PdfFileReader(watermarkPdfFile, strict=False).getPage(0) pdfWriter = PdfFileWriter() #PDF 페이지 수만큼 반복 for pageNum in range(pdfReader.numPages): #페이지를 불러온다 pageObj = pdfReader.getPage(pageNum) #중앙으로 놓기 위해 좌표를 구한다 x = (pageObj.mediaBox[2] - watermarkPdf.mediaBox[2]) / 2 y = (pageObj.mediaBox[3] - watermarkPdf.mediaBox[3]) / 2 # 워터마크페이지와 합친다 pageObj.mergeTranslatedPage(page2=watermarkPdf, tx=x, ty=y, expand=False) #합친걸 저장할 PDF파일에 추가한다 pdfWriter.addPage(pageObj) #저장 resultFile = open(savePath, 'wb') pdfWriter.write(resultFile)
def encrypt(self, decrypt=None): # Create PDF writer object pdf_writer = PdfFileWriter() with open(self.pdf, 'rb') as pdf_file: # Read opened PDF file pdf_reader = pypdf3_reader(pdf_file, decrypt) # Add each page from source PDF for page_num in range(pdf_reader.numPages): page = pdf_reader.getPage(page_num) pdf_writer.addPage(page) # Apply encryption to writer object pdf_writer.encrypt(self.user_pw, self.owner_pw, use_128bit=self.encrypt_128, allow_printing=self.allow_printing, allow_commenting=self.allow_commenting, overwrite_permission=self.overwrite_permission) # todo: add metadata adding functionality pdf_writer.addMetadata({ '/Producer': 'pdfconduit', '/Creator': 'HPA Design', '/Author': 'HPA Design', }) # Write encrypted PDF to file with open(self.output, 'wb') as output_pdf: pdf_writer.write( output_pdf, progress_bar=self.progress_bar, progress_bar_enabled=self.progress_bar_enabled) return self.output
def split(file): """ Esse método irá separar página por página do arquivo que o usuário escolher e as salvar no 'output directory' como novos arquivos pdf. Cada arquivo corresponderá à uma página do documento original. :param file: O arquivo escolhido pelo usuário para fazer a separação das páginas :return: None """ # Limpando o diretório para evitar duplicidade em arquivos/diretórios Splitter.cleanDir() # Tratando o nome do arquivo file = Merger.toPath(file) # Lógica para separação das páginas dos arquivos PDF's e nova nomeclatura para os mesmos with open(file, mode='rb') as pdf_file_to_read: file_length = PdfFileReader(pdf_file_to_read).numPages for page in range(file_length): pdf_file = PdfFileReader(pdf_file_to_read) current_page = PdfFileWriter() current_page.addPage(pdf_file.getPage(page)) with open(join(Splitter.splitter_dir, f"página_{page + 1}.pdf"), mode='wb') as pdf: current_page.write(pdf)
def combine_and_bookmark(file_dict, pdfs): # Create the writer object out = PdfFileWriter() # This is used to track what bookmarks have been added, in order to add parent bookmarks as needed added_bookmarks = {} # Gives the numbers to store as keys in added_bookmarks file_nums = list(file_dict.keys()) counter = 0 # Do this for every PDF we've opened for name, pdf in pdfs.items(): # Determine the number of the pdf chapter pdf_num = int(file_nums[counter]) # Add the first page out.addPage(pdf.getPage(0)) # If we already added a pdf bookmark from this chapter: if pdf_num in added_bookmarks: # We add the bookmark with the parent of the root of the chapter out.addBookmark(name, out.getNumPages() - 1, added_bookmarks[pdf_num]) # Otherwise if we haven't added a bookmark from this chapter yet else: # Add the bookmark, and make sure to add that bookmark to the dict above added_bookmarks[pdf_num] = out.addBookmark(name, out.getNumPages() - 1) # Then, we iterate through the rest of the pages and add the rest for page_num in range(1, pdf.getNumPages()): out.addPage(pdf.getPage(page_num)) counter += 1 return out
def start_Encryption(self): global filename try: if self.filename[0] and self.userPassword.text( ) and self.ownerPassword.text(): pfw = PdfFileWriter() pdffile = PdfFileReader(self.filename[0]) total_pages = pdffile.numPages for page in range(total_pages): current_page = pdffile.getPage(page) pfw.addPage(current_page) pfw.encrypt(self.userPassword.text(), self.ownerPassword.text()) file = open(self.filename[0].replace('.pdf', '_encrypted.pdf'), 'wb') pfw.write(file) file.close() msg = QMessageBox() msg.setWindowTitle('Done') msg.setIcon(QMessageBox.Information) msg.setText('File encryption done successfully.') msg.exec_() self.filename = '' self.userPassword.setText('') self.ownerPassword.setText('') self.userPassword.setDisabled(True) self.ownerPassword.setDisabled(True) self.startEncryption.setDisabled(True) else: if self.ownerPassword.text() == '': msg = QMessageBox() msg.setWindowTitle('Error') msg.setIcon(QMessageBox.Critical) msg.setText('Owner Password Field is Empty.') msg.exec_() if self.userPassword.text() == '': msg = QMessageBox() msg.setWindowTitle('Error') msg.setIcon(QMessageBox.Critical) msg.setText('User Password Field is Empty.') msg.exec_() except: pass
def pdf_metadata_load(pdf_file): with open(pdf_file, 'rb') as fin: pdf_in = PdfFileReader(fin) writer = PdfFileWriter() for page in range(pdf_in.getNumPages()): writer.addPage(pdf_in.getPage(page)) infoDict = writer._info.getObject() return pdf_in.documentInfo
def pypdf3(self): with open(self.file_name, 'rb') as pdf_in: pdf_writer = PdfFileWriter() pdf_reader = PdfFileReader(pdf_in) for pagenum in range(pdf_reader.numPages): page = pdf_reader.getPage(pagenum) page.rotateClockwise(self.rotation) pdf_writer.addPage(page) with open(self.outfn, 'wb') as pdf_out: pdf_writer.write(pdf_out) return self.outfn
def merge_pdfs(paths, output): pdf_writer = PdfFileWriter() for path in paths: pdf_reader = PdfFileReader(path) for page in range(pdf_reader.getNumPages()): # Add each page to the writer object pdf_writer.addPage(pdf_reader.getPage(page)) # Write out the merged PDF with open(output, 'wb') as out: pdf_writer.write(out)
def split_pdf(myfile): pdf_in_file = open('/tmp/' + myfile, 'rb') inputpdf = PdfFileReader(pdf_in_file) pages_no = inputpdf.numPages print(pages_no) output = PdfFileWriter() for i in range(pages_no // 50): output.addPage(inputpdf.getPage(i * 50)) if i * 50 + 1 < inputpdf.numPages: output.addPage(inputpdf.getPage(i * 50 + 1)) print('/tmp/document-page%s.pdf' % i) newname = 'document-page%s.pdf' % i print(newname) with open("/tmp/document-page%s.pdf" % i, "wb") as outputStream: output.write(outputStream) client.upload_file('/tmp/' + newname, destbucketName, 'extracted-pdf/' + newname)
def pypdf3(): """Much slower than PyPDF3 method.""" # 5b. Get our files ready document_reader = PdfFileReader(document) output_file = PdfFileWriter() # Number of pages in input document page_count = document_reader.getNumPages() # Watermark objects watermark_reader = PdfFileReader(watermark) wtrmrk_page = watermark_reader.getPage(0) wtrmrk_width = (wtrmrk_page.mediaBox.getWidth() / 2) + 0 wtrmrk_height = (wtrmrk_page.mediaBox.getHeight() / 2) + 80 wtrmrk_rotate = -int(Info(watermark_reader).rotate) if Info(watermark_reader).rotate is not None else 0 # 5c. Go through all the input file pages to add a watermark to them for page_number in range(page_count): # Merge the watermark with the page if not self.underneath: input_page = document_reader.getPage(page_number) if wtrmrk_rotate != 0: input_page.mergeRotatedTranslatedPage(wtrmrk_page, wtrmrk_rotate, wtrmrk_width, wtrmrk_height) else: wtrmrk_width = 0 wtrmrk_height = 0 input_page.mergeTranslatedPage(wtrmrk_page, wtrmrk_width, wtrmrk_height) else: size = Info(document_reader).dimensions input_page = PageObject().createBlankPage(document_reader, size['w'], size['h']) if wtrmrk_rotate != 0: input_page.mergeRotatedTranslatedPage(wtrmrk_page, wtrmrk_rotate, wtrmrk_width, wtrmrk_height) else: wtrmrk_width = 0 wtrmrk_height = 0 input_page.mergeTranslatedPage(wtrmrk_page, wtrmrk_width, wtrmrk_height) input_page.mergePage(document_reader.getPage(page_number)) # Add page from input file to output document output_file.addPage(input_page) # 5d. finally, write "output" to PDF with open(output_filename, "wb") as outputStream: output_file.write(outputStream) return output_filename
def overlay_pdfs(top_pdf, bottom_pdf, destination): """ Overlay PDF objects to files :param top_pdf: PDF object to be placed on top :param bottom_pdf: PDF file to be placed underneath :param destination: Desintation path """ drawing = PdfFileReader(top_pdf) # Create new PDF object template = PdfFileReader(bottom_pdf) # read your existing PDF # add the "watermark" (which is the new pdf) on the existing page page = template.getPage(0) page.mergePage(drawing.getPage(0)) output = PdfFileWriter() # Create new PDF file output.addPage(page) # finally, write "output" to a real file with open(destination, "wb") as outputStream: output.write(outputStream)
def pypdf3(self): reader = PdfFileReader(self.file_name) writer = PdfFileWriter() # Number of pages in input document page_count = reader.getNumPages() for page_number in range(page_count): wtrmrk = reader.getPage(page_number) page = PageObject.createBlankPage(width=self.target_w, height=self.target_h) page.mergeScaledTranslatedPage(wtrmrk, self.scale, self.margin_x, self.margin_y) writer.addPage(page) with open(self.output, "wb") as outputStream: writer.write(outputStream) return self.output
def write_pdf(pdf_obj, destination): """ Write PDF object to file :param pdf_obj: PDF object to be written to file :param destination: Desintation path """ reader = PdfFileReader(pdf_obj) # Create new PDF object writer = PdfFileWriter() page_count = reader.getNumPages() # add the "watermark" (which is the new pdf) on the existing page for page_number in range(page_count): page = reader.getPage(page_number) writer.addPage(page) # finally, write "output" to a real file with open(destination, "wb") as outputStream: writer.write(outputStream)
def reorder(input_filename: str, output_filename: str) -> None: assert os.path.exists(input_filename) assert os.path.exists(output_filename) is False input_stream = open(input_filename, 'rb') output = PdfFileWriter() input_pdf = PdfFileReader(input_stream) pages = input_pdf.getNumPages() order = _make_sequence(pages) for page_number in order: page = input_pdf.getPage(page_number) output.addPage(page) output_stream = open(output_filename, "wb") output.write(output_stream) input_stream.close() output_stream.close()
def main(): if (len(sys.argv) != 3): print("usage: python 2-up.py input_file output_file") sys.exit(1) print("2-up input " + sys.argv[1]) input1 = PdfFileReader(open(sys.argv[1], "rb")) output = PdfFileWriter() for iter in range(0, input1.getNumPages() - 1, 2): lhs = input1.getPage(iter) rhs = input1.getPage(iter + 1) lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(), 0, True) output.addPage(lhs) print(str(iter) + " "), sys.stdout.flush() print("writing " + sys.argv[2]) outputStream = file(sys.argv[2], "wb") output.write(outputStream) print("done.")
def add_encryption(path, encryptPath, fileDicts): pdf_writer = PdfFileWriter() for fileName in fileDicts: input_pdf = os.path.join(path, fileName) output_pdf = os.path.join(encryptPath, fileName) pdf_reader = PdfFileReader(input_pdf) for page in range(pdf_reader.getNumPages()): pdf_writer.addPage(pdf_reader.getPage(page)) pdf_writer.encrypt(user_pwd=fileDicts[fileName], owner_pwd=None, use_128bit=True) #输出文件已存在便删除 if os.path.exists(output_pdf): os.remove(output_pdf) with open(output_pdf, 'wb') as fh: pdf_writer.write(fh)
def rename(pdf,doi): #inpfn = 'Chem. Rev. 2019, 119, 10241-10287-VIP-acs.chemrev.9b00008.pdf' fin = open(pdf, 'rb') pdf_in = PdfFileReader(fin) writer = PdfFileWriter() for page in range(pdf_in.getNumPages()): writer.addPage(pdf_in.getPage(page)) infoDict = writer._info.getObject() info = pdf_in.documentInfo for key in info: infoDict.update({NameObject(key): createStringObject(info[key])}) print(key[0]+':'+ info[key]) # add the grade infoDict.update({NameObject('/doi'): createStringObject(u''+doi)}) # It does not appear possible to alter in place. temppdf=pdf+'.temppdf' fout = open(temppdf, 'wb') writer.write(fout) fin.close() fout.close() import os os.unlink(pdf) os.rename(temppdf, pdf) print('The DOI have been updated to:{0}'.format(doi))
def slicer(document, first_page=None, last_page=None, suffix='sliced', tempdir=None): """Slice a PDF document to remove pages.""" # Set output file name if tempdir: with NamedTemporaryFile(suffix='.pdf', dir=tempdir, delete=False) as temp: output = temp.name elif suffix: output = os.path.join(os.path.dirname(document), add_suffix(document, suffix)) else: with NamedTemporaryFile(suffix='.pdf') as temp: output = temp.name # Reindex page selections for simple user input first_page = first_page - 1 if not None else None # Validate page range by comparing selection to number of pages in PDF document pages = Info(document).pages invalid = 'Number of pages: ' + str( pages) + ' ----> Page Range Input: ' + str(first_page) + '-' + str( last_page) assert first_page <= last_page <= pages, invalid pdf = PdfFileReader(document) writer = PdfFileWriter() pages = list(range(pdf.getNumPages()))[first_page:last_page] for page in pages: writer.addPage(pdf.getPage(page)) with open(output, 'wb') as out: writer.write(out) return output
def split(filename: str, chapters: List[Chapter], directory: str) -> None: assert os.path.exists(filename) for chapter in chapters: print( f'Writing to {chapter.name} pages {chapter.start} to {chapter.stop}' ) # The file should have been opened before the previous loop, # however, there is a bug in the library, and this is the only way to # get around it. with open(filename, 'rb') as input_stream: input_pdf = PdfFileReader(input_stream) output = PdfFileWriter() for page_number in range(chapter.start - 1, chapter.stop): page = input_pdf.getPage(page_number) output.addPage(page) output_filename = os.path.join(directory, chapter.name) with open(output_filename, "wb") as output_stream: output.write(output_stream) output_stream.flush()
def rotate(file_name, rotate, suffix='rotated', tempdir=None): """Rotate PDF by increments of 90 degrees.""" # Set output file name if tempdir: outfn = NamedTemporaryFile(suffix='.pdf', dir=tempdir, delete=False).name elif suffix: outfn = os.path.join(os.path.dirname(file_name), add_suffix(file_name, suffix)) else: outfn = NamedTemporaryFile(suffix='.pdf').name with open(file_name, 'rb') as pdf_in: pdf_writer = PdfFileWriter() pdf_reader = PdfFileReader(pdf_in) for pagenum in range(pdf_reader.numPages): page = pdf_reader.getPage(pagenum) page.rotateClockwise(rotate) pdf_writer.addPage(page) with open(outfn, 'wb') as pdf_out: pdf_writer.write(pdf_out) return outfn
def upscale(file_name, scale=1.5, margin_x=0, margin_y=0, suffix='scaled', tempdir=None): """Upscale a PDF to a large size.""" # Set output file name if tempdir: output = NamedTemporaryFile(suffix='.pdf', dir=tempdir, delete=False).name elif suffix: output = os.path.join(os.path.dirname(file_name), add_suffix(file_name, suffix)) else: output = NamedTemporaryFile(suffix='.pdf').name reader = PdfFileReader(file_name) writer = PdfFileWriter() dims = dimensions(file_name) target_w = dims['w'] * scale target_h = dims['h'] * scale # Number of pages in input document page_count = reader.getNumPages() for page_number in range(page_count): wtrmrk = reader.getPage(page_number) page = PageObject.createBlankPage(width=target_w, height=target_h) page.mergeScaledTranslatedPage(wtrmrk, scale, margin_x, margin_y) writer.addPage(page) with open(output, "wb") as outputStream: writer.write(outputStream) return output
def pdfMerge(self, savePath, pdfPath, watermarkPdfPath): pdfFile = open(pdfPath, 'rb') pdfReader = PdfFileReader(pdfFile, strict=False) watermarkPdfFile = open(watermarkPdfPath, 'rb') watermarkPdf = PdfFileReader(watermarkPdfFile, strict=False).getPage(0) pdfWriter = PdfFileWriter() for pageNum in range(pdfReader.numPages): pageObj = pdfReader.getPage(pageNum) x = (pageObj.mediaBox[2] - watermarkPdf.mediaBox[2]) / 2 y = (pageObj.mediaBox[3] - watermarkPdf.mediaBox[3]) / 2 pageObj.mergeTranslatedPage(page2=watermarkPdf, tx=x, ty=y, expand=False) pdfWriter.addPage(pageObj) resultFile = open(savePath, 'wb') pdfWriter.write(resultFile)
def compile_journal(directory, pad_path=None, folio_size=8, starting_page_num=1): pdfs = [f for f in os.listdir(directory) if '.pdf' in f and f[0:2].isdigit()] pdfs.sort() folios = [] while len(pdfs) > 0: folio = [] for i in range(8): path = pdfs.pop(0) reader = PdfFileReader(path) pdf = reader.getPage(0) folio.append(pdf) if len(pdfs) == 0: break folios.append(folio) joined_folios = [] for i,folio in enumerate(folios): joined_folios.append(build_folio(folio,None,i*len(folio)+starting_page_num)) index = PdfFileWriter() for folio in joined_folios: for page in folio: index.addPage(page) index.write(open('out.pdf','wb'))
def Extract_Code_From_PDF(ip_pdf_file, op_pdf_file, code_type): output = PdfFileWriter() input1 = PdfFileReader(open(ip_pdf_file, "rb")) output_page = input1.getPage(0) #bar code if 'bar' in code_type.lower(): output_page.cropBox.lowerLeft = (0, 0) output_page.cropBox.upperleft = (0, 100) output_page.cropBox.lowerRight = (286, 0) output_page.cropBox.upperRight = (286, 100) #Data Matrix code if 'matrix' in code_type.lower(): output_page.cropBox.lowerLeft = (200, 309) output_page.cropBox.upperleft = (200, 378) output_page.cropBox.lowerRight = (270, 309) output_page.cropBox.upperRight = (270, 378) output.addPage(output_page) outputStream = open(op_pdf_file, "wb") output.write(outputStream)
def invoice_pdf(request, number, correction=False): invoice = get_object_or_404(Invoice, number=number) if correction: invoice = invoice.correction from reportlab.lib.units import mm from reportlab.platypus import Paragraph from reportlab.platypus.flowables import Spacer from reportlab.platypus.flowables import KeepTogether from dinbrief.document import Document from dinbrief.invoice import ItemTable, TotalTable from dinbrief.styles import styles from dinbrief.template import BriefTemplate with trans_override(invoice.language): response = HttpResponse(content_type='application/pdf') if 'download' in request.GET: filename = '%s.pdf' % invoice.number response[ 'Content-Disposition'] = 'attachment; filename=%s' % filename if invoice.type == Invoice.TYPE_INVOICE: if callable(INVOICE_TERMS): terms = INVOICE_TERMS(invoice) else: terms = [ Paragraph(term, styles['Terms']) for term in INVOICE_TERMS ] else: terms = [] template = BriefTemplate() document = Document( sender=invoice.sender_lines, recipient=invoice.recipient_lines, date=date_format(invoice.created, 'SHORT_DATE_FORMAT'), content=[ Paragraph( '%s %s' % (invoice.get_type_display() if not correction else gettext(u'Correction of invoice'), invoice.number), styles['Subject']), Spacer(template.CONTENT_WIDTH, 2 * mm), ItemTable(template, invoice), KeepTogether(TotalTable(template, invoice)), Spacer(template.CONTENT_WIDTH, 10 * mm), ] + terms) if settings.SHARK['INVOICE']['BACKGROUND']: with tempfile.TemporaryFile() as tmp: # Create content in a temporary file template.render(document, tmp) # Combine background with the content writer = PdfFileWriter() content = PdfFileReader(tmp) info_dict = writer._info.getObject() info_dict.update(content.getDocumentInfo()) first_bg = PdfFileReader( open(settings.SHARK['INVOICE']['BACKGROUND']['FIRST_PAGE'], 'rb')) later_bg = PdfFileReader( open(settings.SHARK['INVOICE']['BACKGROUND']['LATER_PAGE'], 'rb')) bg = [first_bg.getPage(0), later_bg.getPage(0)] for i, page in enumerate(content.pages): page.mergePage(bg[min(i, 1)]) page.compressContentStreams() writer.addPage(page) writer.write(response) else: # Render content directly to the HTTP response object if no # background images are configured. template.render(document, response) return response
def zipper(opts, cord_path, base_path, rec_path, rec_pdf_exists, output_name, matching, empty_path): # ######### PDF Write Setup ######### # # Open the input PDFs cord_pdf = PdfFileReader(open(cord_path, 'rb'), False) base_pdf = PdfFileReader(open(base_path, 'rb'), False) rec_pdf = '' if rec_pdf_exists: rec_pdf = PdfFileReader(open(rec_path, 'rb'), False) empty_pdf = PdfFileReader(open(empty_path, 'rb'), False) # Check that the coordination PDF is longer than the base (and therefore rec) pdf too. # The Coordination PDF includes pages at the front that do not get sliced in, and instead actually sit # in the front. If the Coordination pdf is less than the Base or Rec, these are missing, or there was another error if cord_pdf.getNumPages() < base_pdf.getNumPages(): prompt = 'Coordination PDF is shorter than the Base PDF' eprint(prompt) logger.critical(prompt) exit(-7) # Find the difference in length of the PDFs, these are the leader pages of the coordination diff_length = cord_pdf.getNumPages() - base_pdf.getNumPages() logger.info('Diff Length: %s', str(diff_length)) output = PdfFileWriter() for ii in range(diff_length): output.addPage(cord_pdf.getPage(ii)) if matching: logger.info("Converting Coordination PDF to string") logging.disable(logging.INFO) cord_str_pages = pdf_pages_to_list_of_strings(cord_path) logging.disable(logging.NOTSET) logger.info("Converting Base PDF to string") logging.disable(logging.INFO) base_str_pages = pdf_pages_to_list_of_strings(base_path) logging.disable(logging.NOTSET) rec_str_pages = [] if rec_pdf_exists: logging.disable(logging.INFO) logger.info("Converting Recommended PDF to string") rec_str_pages = pdf_pages_to_list_of_strings(rec_path) logging.disable(logging.NOTSET) regex_cord = r'(TCC Curve: )(TCC_[\d]+[a-zA-Z]?)([-_#$\w\d\[\] ]*)' regex_base_rec = r'(TCC Name: )(TCC_[\d]+[a-zA-Z]?)([-_#$\w\d\[\] ]*)' for ii in range(diff_length, len(cord_str_pages)): output.addPage(cord_pdf.getPage(ii)) tcc_matches = re.finditer(regex_cord, cord_str_pages[ii], re.MULTILINE) for match_num, tcc_match in enumerate(tcc_matches, start=1): tcc_name = tcc_match.group(2) logger.info("Attempting to find: " + tcc_name) base_num = find_matching_page(tcc_name, base_str_pages, regex_base_rec, 'Base PDF') if base_num != -1: logger.info('Found on base page: %s', str(base_num)) rec_page_flag = check_for_rec(cord_str_pages[ii]) rec_num = 0 if rec_pdf_exists and rec_page_flag: rec_num = find_matching_page(tcc_name, rec_str_pages, regex_base_rec, 'Rec PDF') if rec_num != -1: logger.info('Found on rec page: %s', str(rec_num)) else: output.addPage(empty_pdf.getPage(0)) if base_num > 0: output.addPage(base_pdf.getPage(base_num)) if rec_num > 0: output.addPage(rec_pdf.getPage(rec_num)) break else: for jj in range(base_pdf.getNumPages()): output.addPage(cord_pdf.getPage(jj + diff_length)) output.addPage(base_pdf.getPage(jj)) if rec_pdf_exists: output.addPage(rec_pdf.getPage(jj)) # Finally, output everything to the PDF # The output name is chosen based on what the name of the coordination file is if opts['output']: output_name = opts['output'] else: output_name = "8.0 - Coordination Results & Recommendations_" + output_name + "2018_NEW.pdf" output_name = os.path.join(os.path.dirname(os.path.abspath(cord_path)), output_name) with open(output_name, "wb") as w: output.write(w)
from PyPDF3 import PdfFileWriter, PdfFileReader output = PdfFileWriter() input1 = PdfFileReader(open("document1.pdf", "rb")) # print how many pages input1 has: print "document1.pdf has %d pages." % input1.getNumPages() # add page 1 from input1 to output document, unchanged output.addPage(input1.getPage(0)) # add page 2 from input1, but rotated clockwise 90 degrees output.addPage(input1.getPage(1).rotateClockwise(90)) # add page 3 from input1, rotated the other way: output.addPage(input1.getPage(2).rotateCounterClockwise(90)) # alt: output.addPage(input1.getPage(2).rotateClockwise(270)) # add page 4 from input1, but first add a watermark from another PDF: page4 = input1.getPage(3) watermark = PdfFileReader(open("watermark.pdf", "rb")) page4.mergePage(watermark.getPage(0)) output.addPage(page4) # add page 5 from input1, but crop it to half size: page5 = input1.getPage(4) page5.mediaBox.upperRight = (page5.mediaBox.getUpperRight_x() / 2, page5.mediaBox.getUpperRight_y() / 2) output.addPage(page5) # add some Javascript to launch the print window on opening this PDF.
from PyPDF3 import PdfFileWriter, PdfFileReader import sys if len(sys.argv) == 1 or sys.argv[1] == '-h': print('''args: infile p1 p2 .. pn outfile Program outputs outfile with p1, p2, ..., pn removed from infile.''' ) exit() infile = PdfFileReader(sys.argv[1], 'rb') outfile = PdfFileWriter() page_del = list(map(int, sys.argv[2:-1])) ptr = 0 for i in range(infile.getNumPages()): if ptr == len(page_del) or i < page_del[ptr]: p = infile.getPage(int(i)) outfile.addPage(p) elif i == page_del[ptr]: ptr += 1 with open(sys.argv[-1], 'wb') as f: outfile.write(f)
# print('page_id: {0}'.format(page_id)) # blank_page.mergeTranslatedPage( # pages[page_id-1], # page_idx * (page_actual_width + page_xbuffer_letter), # idy*page_actual_height) # idy = idy - 1 canvas_height = len(pages) * page_actual_height canvas_width = 1 * page_actual_width blank_page = pdf.PageObject.createBlankPage(width=canvas_width, height=canvas_height) # pdb.set_trace() pages.reverse() for idy, page in enumerate(pages): blank_page.mergeTranslatedPage(page, 0, idy * page_actual_height) pdfWriter.addPage(blank_page) newFile = open(outfile_path, 'wb') # writing rotated pages to new file pdfWriter.write(newFile) # closing the original pdf file object pdfFileObj.close() # closing the new pdf file object newFile.close()