def encrypt(self, decrypt=None): # Create PDF writer object pdf_writer = PdfFileWriter() with open(self.pdf, 'rb') as pdf_file: # Read opened PDF file pdf_reader = pypdf3_reader(pdf_file, decrypt) # Add each page from source PDF for page_num in range(pdf_reader.numPages): page = pdf_reader.getPage(page_num) pdf_writer.addPage(page) # Apply encryption to writer object pdf_writer.encrypt(self.user_pw, self.owner_pw, use_128bit=self.encrypt_128, allow_printing=self.allow_printing, allow_commenting=self.allow_commenting, overwrite_permission=self.overwrite_permission) # todo: add metadata adding functionality pdf_writer.addMetadata({ '/Producer': 'pdfconduit', '/Creator': 'HPA Design', '/Author': 'HPA Design', }) # Write encrypted PDF to file with open(self.output, 'wb') as output_pdf: pdf_writer.write( output_pdf, progress_bar=self.progress_bar, progress_bar_enabled=self.progress_bar_enabled) return self.output
def pdf_metadata_load(pdf_file): with open(pdf_file, 'rb') as fin: pdf_in = PdfFileReader(fin) writer = PdfFileWriter() for page in range(pdf_in.getNumPages()): writer.addPage(pdf_in.getPage(page)) infoDict = writer._info.getObject() return pdf_in.documentInfo
def start_Encryption(self): global filename try: if self.filename[0] and self.userPassword.text( ) and self.ownerPassword.text(): pfw = PdfFileWriter() pdffile = PdfFileReader(self.filename[0]) total_pages = pdffile.numPages for page in range(total_pages): current_page = pdffile.getPage(page) pfw.addPage(current_page) pfw.encrypt(self.userPassword.text(), self.ownerPassword.text()) file = open(self.filename[0].replace('.pdf', '_encrypted.pdf'), 'wb') pfw.write(file) file.close() msg = QMessageBox() msg.setWindowTitle('Done') msg.setIcon(QMessageBox.Information) msg.setText('File encryption done successfully.') msg.exec_() self.filename = '' self.userPassword.setText('') self.ownerPassword.setText('') self.userPassword.setDisabled(True) self.ownerPassword.setDisabled(True) self.startEncryption.setDisabled(True) else: if self.ownerPassword.text() == '': msg = QMessageBox() msg.setWindowTitle('Error') msg.setIcon(QMessageBox.Critical) msg.setText('Owner Password Field is Empty.') msg.exec_() if self.userPassword.text() == '': msg = QMessageBox() msg.setWindowTitle('Error') msg.setIcon(QMessageBox.Critical) msg.setText('User Password Field is Empty.') msg.exec_() except: pass
def PDFMerge(savePath, pdfPath, watermarkPdfPath): # pdf파일 불러오기 pdfFile = open(pdfPath, 'rb') pdfReader = PdfFileReader(pdfFile, strict=False) # 워터마크 PDF파일 불러오기 watermarkPdfFile = open(watermarkPdfPath, 'rb') watermarkPdf = PdfFileReader(watermarkPdfFile, strict=False).getPage(0) pdfWriter = PdfFileWriter() #PDF 페이지 수만큼 반복 for pageNum in range(pdfReader.numPages): #페이지를 불러온다 pageObj = pdfReader.getPage(pageNum) #중앙으로 놓기 위해 좌표를 구한다 x = (pageObj.mediaBox[2] - watermarkPdf.mediaBox[2]) / 2 y = (pageObj.mediaBox[3] - watermarkPdf.mediaBox[3]) / 2 # 워터마크페이지와 합친다 pageObj.mergeTranslatedPage(page2=watermarkPdf, tx=x, ty=y, expand=False) #합친걸 저장할 PDF파일에 추가한다 pdfWriter.addPage(pageObj) #저장 resultFile = open(savePath, 'wb') pdfWriter.write(resultFile)
def split(file): """ Esse método irá separar página por página do arquivo que o usuário escolher e as salvar no 'output directory' como novos arquivos pdf. Cada arquivo corresponderá à uma página do documento original. :param file: O arquivo escolhido pelo usuário para fazer a separação das páginas :return: None """ # Limpando o diretório para evitar duplicidade em arquivos/diretórios Splitter.cleanDir() # Tratando o nome do arquivo file = Merger.toPath(file) # Lógica para separação das páginas dos arquivos PDF's e nova nomeclatura para os mesmos with open(file, mode='rb') as pdf_file_to_read: file_length = PdfFileReader(pdf_file_to_read).numPages for page in range(file_length): pdf_file = PdfFileReader(pdf_file_to_read) current_page = PdfFileWriter() current_page.addPage(pdf_file.getPage(page)) with open(join(Splitter.splitter_dir, f"página_{page + 1}.pdf"), mode='wb') as pdf: current_page.write(pdf)
def pdf_metadata_save(pdf_file, metadata, substitute_all_metadata = False, make_backup = True): if type(make_backup) is str: bak_file = make_backup else: bak_file = os.path.splitext(pdf_file)[0] + ".bak" os.rename(pdf_file, bak_file) with open(bak_file, 'rb') as fin: pdf_in = PdfFileReader(fin) writer = PdfFileWriter() for page in range(pdf_in.getNumPages()): writer.addPage(pdf_in.getPage(page)) infoDict = writer._info.getObject() info = pdf_in.documentInfo if not substitute_all_metadata: for key in info: #infoDict.update({NameObject(key): createStringObject(info[key])}) infoDict.update({key: info[key]}) for key in metadata: infoDict.update({NameObject('/' + key): createStringObject(str(metadata[key]))}) with open(pdf_file, 'wb') as fout: writer.write(fout) if make_backup == False: os.unlink(bak_file)
def split_pdf(myfile): pdf_in_file = open('/tmp/' + myfile, 'rb') inputpdf = PdfFileReader(pdf_in_file) pages_no = inputpdf.numPages print(pages_no) output = PdfFileWriter() for i in range(pages_no // 50): output.addPage(inputpdf.getPage(i * 50)) if i * 50 + 1 < inputpdf.numPages: output.addPage(inputpdf.getPage(i * 50 + 1)) print('/tmp/document-page%s.pdf' % i) newname = 'document-page%s.pdf' % i print(newname) with open("/tmp/document-page%s.pdf" % i, "wb") as outputStream: output.write(outputStream) client.upload_file('/tmp/' + newname, destbucketName, 'extracted-pdf/' + newname)
def merge_pdfs(paths, output): pdf_writer = PdfFileWriter() for path in paths: pdf_reader = PdfFileReader(path) for page in range(pdf_reader.getNumPages()): # Add each page to the writer object pdf_writer.addPage(pdf_reader.getPage(page)) # Write out the merged PDF with open(output, 'wb') as out: pdf_writer.write(out)
def pypdf3(self): with open(self.file_name, 'rb') as pdf_in: pdf_writer = PdfFileWriter() pdf_reader = PdfFileReader(pdf_in) for pagenum in range(pdf_reader.numPages): page = pdf_reader.getPage(pagenum) page.rotateClockwise(self.rotation) pdf_writer.addPage(page) with open(self.outfn, 'wb') as pdf_out: pdf_writer.write(pdf_out) return self.outfn
def inject_pdf_links(filepath: str, pdf_data: bytes, links: Iterable[Link], size_relative: bool = True) -> None: """ Injects links into a pdf file data :param filepath: the output file path for the pdf :param pdf_data: the source pdf data as bytes :param links: an iterable of `Link` objects :param size_relative: if True (default) the coordinates and sizes of the links' bounding boxes must be relative [0~1] to the width of the pdf page, otherwise their absolute values are used """ pdf_stream: BytesIO = BytesIO(pdf_data) source_pdf: PdfFileReader = PdfFileReader(pdf_stream) pdf_writer: PdfFileWriter = PdfFileWriter() pdf_writer.appendPagesFromReader(source_pdf) pdf_page_trim_box: RectangleObject = source_pdf.getPage(0).trimBox pdf_page_box: SizedBox = SizedBox( x=pdf_page_trim_box[0], y=pdf_page_trim_box[1], width=pdf_page_trim_box[2] - pdf_page_trim_box[0], height=pdf_page_trim_box[3] - pdf_page_trim_box[1], ) pdf_scale: float = pdf_page_box.width if size_relative else 1.0 link: Link for link in links: link_box: SizedBox = SizedBox( x=pdf_page_box.x + link.box.x * pdf_scale, y=pdf_page_box.y + link.box.y * pdf_scale, width=link.box.width * pdf_scale, height=link.box.height * pdf_scale, ) # pdf coord system is bottom-left, so invert y link_box.y = pdf_page_box.y1 - link_box.y - link_box.height # noinspection PyTypeChecker pdf_writer.addURI( pagenum=0, uri=link.uri, # Broken type annotation in PyPDF3 rect=[link_box.x0, link_box.y0, link_box.x1, link_box.y1], border=[0, 0, 0], ) os.makedirs(os.path.dirname(filepath), exist_ok=True) with open(filepath, "wb") as out_fp: pdf_writer.write(out_fp)
class AddJsTestCase(unittest.TestCase): def setUp(self): ipdf = PdfFileReader(os.path.join(RESOURCE_ROOT, 'crazyones.pdf')) self.pdf_file_writer = PdfFileWriter() self.pdf_file_writer.appendPagesFromReader(ipdf) def test_add(self): self.pdf_file_writer.addJS( "this.print({bUI:true,bSilent:false,bShrinkToFit:true});") self.assertIn('/Names', self.pdf_file_writer._root_object, "addJS should add a name catalog in the root object.") self.assertIn( '/JavaScript', self.pdf_file_writer._root_object['/Names'], "addJS should add a JavaScript name tree under the name catalog.") self.assertIn('/OpenAction', self.pdf_file_writer._root_object, "addJS should add an OpenAction to the catalog.") def test_overwrite(self): self.pdf_file_writer.addJS( "this.print({bUI:true,bSilent:false,bShrinkToFit:true});") first_js = self.get_javascript_name() self.pdf_file_writer.addJS( "this.print({bUI:true,bSilent:false,bShrinkToFit:true});") second_js = self.get_javascript_name() self.assertNotEqual( first_js, second_js, "addJS should overwrite the previous script in the catalog.") def get_javascript_name(self): self.assertIn('/Names', self.pdf_file_writer._root_object) self.assertIn('/JavaScript', self.pdf_file_writer._root_object['/Names']) self.assertIn( '/Names', self.pdf_file_writer._root_object['/Names']['/JavaScript']) return self.pdf_file_writer._root_object['/Names']['/JavaScript'][ '/Names'][0]
def add_encryption(path, encryptPath, fileDicts): pdf_writer = PdfFileWriter() for fileName in fileDicts: input_pdf = os.path.join(path, fileName) output_pdf = os.path.join(encryptPath, fileName) pdf_reader = PdfFileReader(input_pdf) for page in range(pdf_reader.getNumPages()): pdf_writer.addPage(pdf_reader.getPage(page)) pdf_writer.encrypt(user_pwd=fileDicts[fileName], owner_pwd=None, use_128bit=True) #输出文件已存在便删除 if os.path.exists(output_pdf): os.remove(output_pdf) with open(output_pdf, 'wb') as fh: pdf_writer.write(fh)
def pypdf3(): """Much slower than PyPDF3 method.""" # 5b. Get our files ready document_reader = PdfFileReader(document) output_file = PdfFileWriter() # Number of pages in input document page_count = document_reader.getNumPages() # Watermark objects watermark_reader = PdfFileReader(watermark) wtrmrk_page = watermark_reader.getPage(0) wtrmrk_width = (wtrmrk_page.mediaBox.getWidth() / 2) + 0 wtrmrk_height = (wtrmrk_page.mediaBox.getHeight() / 2) + 80 wtrmrk_rotate = -int(Info(watermark_reader).rotate) if Info(watermark_reader).rotate is not None else 0 # 5c. Go through all the input file pages to add a watermark to them for page_number in range(page_count): # Merge the watermark with the page if not self.underneath: input_page = document_reader.getPage(page_number) if wtrmrk_rotate != 0: input_page.mergeRotatedTranslatedPage(wtrmrk_page, wtrmrk_rotate, wtrmrk_width, wtrmrk_height) else: wtrmrk_width = 0 wtrmrk_height = 0 input_page.mergeTranslatedPage(wtrmrk_page, wtrmrk_width, wtrmrk_height) else: size = Info(document_reader).dimensions input_page = PageObject().createBlankPage(document_reader, size['w'], size['h']) if wtrmrk_rotate != 0: input_page.mergeRotatedTranslatedPage(wtrmrk_page, wtrmrk_rotate, wtrmrk_width, wtrmrk_height) else: wtrmrk_width = 0 wtrmrk_height = 0 input_page.mergeTranslatedPage(wtrmrk_page, wtrmrk_width, wtrmrk_height) input_page.mergePage(document_reader.getPage(page_number)) # Add page from input file to output document output_file.addPage(input_page) # 5d. finally, write "output" to PDF with open(output_filename, "wb") as outputStream: output_file.write(outputStream) return output_filename
def pypdf3(self): reader = PdfFileReader(self.file_name) writer = PdfFileWriter() # Number of pages in input document page_count = reader.getNumPages() for page_number in range(page_count): wtrmrk = reader.getPage(page_number) page = PageObject.createBlankPage(width=self.target_w, height=self.target_h) page.mergeScaledTranslatedPage(wtrmrk, self.scale, self.margin_x, self.margin_y) writer.addPage(page) with open(self.output, "wb") as outputStream: writer.write(outputStream) return self.output
def write_pdf(pdf_obj, destination): """ Write PDF object to file :param pdf_obj: PDF object to be written to file :param destination: Desintation path """ reader = PdfFileReader(pdf_obj) # Create new PDF object writer = PdfFileWriter() page_count = reader.getNumPages() # add the "watermark" (which is the new pdf) on the existing page for page_number in range(page_count): page = reader.getPage(page_number) writer.addPage(page) # finally, write "output" to a real file with open(destination, "wb") as outputStream: writer.write(outputStream)
def reorder(input_filename: str, output_filename: str) -> None: assert os.path.exists(input_filename) assert os.path.exists(output_filename) is False input_stream = open(input_filename, 'rb') output = PdfFileWriter() input_pdf = PdfFileReader(input_stream) pages = input_pdf.getNumPages() order = _make_sequence(pages) for page_number in order: page = input_pdf.getPage(page_number) output.addPage(page) output_stream = open(output_filename, "wb") output.write(output_stream) input_stream.close() output_stream.close()
def main(): if (len(sys.argv) != 3): print("usage: python 2-up.py input_file output_file") sys.exit(1) print("2-up input " + sys.argv[1]) input1 = PdfFileReader(open(sys.argv[1], "rb")) output = PdfFileWriter() for iter in range(0, input1.getNumPages() - 1, 2): lhs = input1.getPage(iter) rhs = input1.getPage(iter + 1) lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(), 0, True) output.addPage(lhs) print(str(iter) + " "), sys.stdout.flush() print("writing " + sys.argv[2]) outputStream = file(sys.argv[2], "wb") output.write(outputStream) print("done.")
def overlay_pdfs(top_pdf, bottom_pdf, destination): """ Overlay PDF objects to files :param top_pdf: PDF object to be placed on top :param bottom_pdf: PDF file to be placed underneath :param destination: Desintation path """ drawing = PdfFileReader(top_pdf) # Create new PDF object template = PdfFileReader(bottom_pdf) # read your existing PDF # add the "watermark" (which is the new pdf) on the existing page page = template.getPage(0) page.mergePage(drawing.getPage(0)) output = PdfFileWriter() # Create new PDF file output.addPage(page) # finally, write "output" to a real file with open(destination, "wb") as outputStream: output.write(outputStream)
def decrypt_pdf(filename, password, decrypted_filename=None): global old_file """ 将加密的文件及逆行解密,并生成一个无需密码pdf文件 :param filename: 原先加密的pdf文件 :param password: 对应的密码 :param decrypted_filename: 解密之后的文件名 :return: """ # 生成一个Reader和Writer pdf_reader = get_reader(filename, password) if pdf_reader is None: write_result(filename, '文件打开失败') return '出错' if not pdf_reader.isEncrypted: print('文件没有被加密,无需操作!') return '未加密,无需操作!' pdf_writer = PdfFileWriter() pdf_writer.appendPagesFromReader(pdf_reader) # decrypted_filename=filename if decrypted_filename is None: decrypted_filename = "".join( filename[:-4]) + '_' + 'decrypted' + '.pdf' # 写入新文件 pdf_writer.write(open(decrypted_filename, 'wb')) file_name = re.split('\\\|/', filename)[-1] old_file.close() try: os.remove(filename) os.rename(decrypted_filename, filename) write_result(file_name, '已覆盖旧文件') except: traceback.print_exc() write_result(file_name, '删除旧文件失败') print('解密完成,新文件存储至' + decrypted_filename) return '解密完成'
def rename(pdf,doi): #inpfn = 'Chem. Rev. 2019, 119, 10241-10287-VIP-acs.chemrev.9b00008.pdf' fin = open(pdf, 'rb') pdf_in = PdfFileReader(fin) writer = PdfFileWriter() for page in range(pdf_in.getNumPages()): writer.addPage(pdf_in.getPage(page)) infoDict = writer._info.getObject() info = pdf_in.documentInfo for key in info: infoDict.update({NameObject(key): createStringObject(info[key])}) print(key[0]+':'+ info[key]) # add the grade infoDict.update({NameObject('/doi'): createStringObject(u''+doi)}) # It does not appear possible to alter in place. temppdf=pdf+'.temppdf' fout = open(temppdf, 'wb') writer.write(fout) fin.close() fout.close() import os os.unlink(pdf) os.rename(temppdf, pdf) print('The DOI have been updated to:{0}'.format(doi))
def slicer(document, first_page=None, last_page=None, suffix='sliced', tempdir=None): """Slice a PDF document to remove pages.""" # Set output file name if tempdir: with NamedTemporaryFile(suffix='.pdf', dir=tempdir, delete=False) as temp: output = temp.name elif suffix: output = os.path.join(os.path.dirname(document), add_suffix(document, suffix)) else: with NamedTemporaryFile(suffix='.pdf') as temp: output = temp.name # Reindex page selections for simple user input first_page = first_page - 1 if not None else None # Validate page range by comparing selection to number of pages in PDF document pages = Info(document).pages invalid = 'Number of pages: ' + str( pages) + ' ----> Page Range Input: ' + str(first_page) + '-' + str( last_page) assert first_page <= last_page <= pages, invalid pdf = PdfFileReader(document) writer = PdfFileWriter() pages = list(range(pdf.getNumPages()))[first_page:last_page] for page in pages: writer.addPage(pdf.getPage(page)) with open(output, 'wb') as out: writer.write(out) return output
def split(filename: str, chapters: List[Chapter], directory: str) -> None: assert os.path.exists(filename) for chapter in chapters: print( f'Writing to {chapter.name} pages {chapter.start} to {chapter.stop}' ) # The file should have been opened before the previous loop, # however, there is a bug in the library, and this is the only way to # get around it. with open(filename, 'rb') as input_stream: input_pdf = PdfFileReader(input_stream) output = PdfFileWriter() for page_number in range(chapter.start - 1, chapter.stop): page = input_pdf.getPage(page_number) output.addPage(page) output_filename = os.path.join(directory, chapter.name) with open(output_filename, "wb") as output_stream: output.write(output_stream) output_stream.flush()
def rotate(file_name, rotate, suffix='rotated', tempdir=None): """Rotate PDF by increments of 90 degrees.""" # Set output file name if tempdir: outfn = NamedTemporaryFile(suffix='.pdf', dir=tempdir, delete=False).name elif suffix: outfn = os.path.join(os.path.dirname(file_name), add_suffix(file_name, suffix)) else: outfn = NamedTemporaryFile(suffix='.pdf').name with open(file_name, 'rb') as pdf_in: pdf_writer = PdfFileWriter() pdf_reader = PdfFileReader(pdf_in) for pagenum in range(pdf_reader.numPages): page = pdf_reader.getPage(pagenum) page.rotateClockwise(rotate) pdf_writer.addPage(page) with open(outfn, 'wb') as pdf_out: pdf_writer.write(pdf_out) return outfn
def upscale(file_name, scale=1.5, margin_x=0, margin_y=0, suffix='scaled', tempdir=None): """Upscale a PDF to a large size.""" # Set output file name if tempdir: output = NamedTemporaryFile(suffix='.pdf', dir=tempdir, delete=False).name elif suffix: output = os.path.join(os.path.dirname(file_name), add_suffix(file_name, suffix)) else: output = NamedTemporaryFile(suffix='.pdf').name reader = PdfFileReader(file_name) writer = PdfFileWriter() dims = dimensions(file_name) target_w = dims['w'] * scale target_h = dims['h'] * scale # Number of pages in input document page_count = reader.getNumPages() for page_number in range(page_count): wtrmrk = reader.getPage(page_number) page = PageObject.createBlankPage(width=target_w, height=target_h) page.mergeScaledTranslatedPage(wtrmrk, scale, margin_x, margin_y) writer.addPage(page) with open(output, "wb") as outputStream: writer.write(outputStream) return output
def pdfMerge(self, savePath, pdfPath, watermarkPdfPath): pdfFile = open(pdfPath, 'rb') pdfReader = PdfFileReader(pdfFile, strict=False) watermarkPdfFile = open(watermarkPdfPath, 'rb') watermarkPdf = PdfFileReader(watermarkPdfFile, strict=False).getPage(0) pdfWriter = PdfFileWriter() for pageNum in range(pdfReader.numPages): pageObj = pdfReader.getPage(pageNum) x = (pageObj.mediaBox[2] - watermarkPdf.mediaBox[2]) / 2 y = (pageObj.mediaBox[3] - watermarkPdf.mediaBox[3]) / 2 pageObj.mergeTranslatedPage(page2=watermarkPdf, tx=x, ty=y, expand=False) pdfWriter.addPage(pageObj) resultFile = open(savePath, 'wb') pdfWriter.write(resultFile)
def compile_journal(directory, pad_path=None, folio_size=8, starting_page_num=1): pdfs = [f for f in os.listdir(directory) if '.pdf' in f and f[0:2].isdigit()] pdfs.sort() folios = [] while len(pdfs) > 0: folio = [] for i in range(8): path = pdfs.pop(0) reader = PdfFileReader(path) pdf = reader.getPage(0) folio.append(pdf) if len(pdfs) == 0: break folios.append(folio) joined_folios = [] for i,folio in enumerate(folios): joined_folios.append(build_folio(folio,None,i*len(folio)+starting_page_num)) index = PdfFileWriter() for folio in joined_folios: for page in folio: index.addPage(page) index.write(open('out.pdf','wb'))
def decrypt_pdf(filename, password, decrypted_filename=None): """ 将加密的文件及逆行解密,并生成一个无需密码pdf文件 :param filename: 原先加密的pdf文件 :param password: 对应的密码 :param decrypted_filename: 解密之后的文件名 :return: """ # 生成一个Reader和Writer pdf_reader = get_reader(filename, password) if pdf_reader is None: return '出错' if not pdf_reader.isEncrypted: print('文件没有被加密,无需操作!') return '文件没有被加密,无需操作!' pdf_writer = PdfFileWriter() pdf_writer.appendPagesFromReader(pdf_reader) if decrypted_filename is None: decrypted_filename = "".join( filename[:-4]) + '_' + 'decrypted' + '.pdf' # 写入新文件 pdf_writer.write(open(decrypted_filename, 'wb')) print('解密完成,新文件存储至' + decrypted_filename) return '解密完成'
def Extract_Code_From_PDF(ip_pdf_file, op_pdf_file, code_type): output = PdfFileWriter() input1 = PdfFileReader(open(ip_pdf_file, "rb")) output_page = input1.getPage(0) #bar code if 'bar' in code_type.lower(): output_page.cropBox.lowerLeft = (0, 0) output_page.cropBox.upperleft = (0, 100) output_page.cropBox.lowerRight = (286, 0) output_page.cropBox.upperRight = (286, 100) #Data Matrix code if 'matrix' in code_type.lower(): output_page.cropBox.lowerLeft = (200, 309) output_page.cropBox.upperleft = (200, 378) output_page.cropBox.lowerRight = (270, 309) output_page.cropBox.upperRight = (270, 378) output.addPage(output_page) outputStream = open(op_pdf_file, "wb") output.write(outputStream)
def combine_and_bookmark(file_dict, pdfs): # Create the writer object out = PdfFileWriter() # This is used to track what bookmarks have been added, in order to add parent bookmarks as needed added_bookmarks = {} # Gives the numbers to store as keys in added_bookmarks file_nums = list(file_dict.keys()) counter = 0 # Do this for every PDF we've opened for name, pdf in pdfs.items(): # Determine the number of the pdf chapter pdf_num = int(file_nums[counter]) # Add the first page out.addPage(pdf.getPage(0)) # If we already added a pdf bookmark from this chapter: if pdf_num in added_bookmarks: # We add the bookmark with the parent of the root of the chapter out.addBookmark(name, out.getNumPages() - 1, added_bookmarks[pdf_num]) # Otherwise if we haven't added a bookmark from this chapter yet else: # Add the bookmark, and make sure to add that bookmark to the dict above added_bookmarks[pdf_num] = out.addBookmark(name, out.getNumPages() - 1) # Then, we iterate through the rest of the pages and add the rest for page_num in range(1, pdf.getNumPages()): out.addPage(pdf.getPage(page_num)) counter += 1 return out
from PyPDF3 import PdfFileWriter, PdfFileReader output = PdfFileWriter() input1 = PdfFileReader(open("document1.pdf", "rb")) # print how many pages input1 has: print "document1.pdf has %d pages." % input1.getNumPages() # add page 1 from input1 to output document, unchanged output.addPage(input1.getPage(0)) # add page 2 from input1, but rotated clockwise 90 degrees output.addPage(input1.getPage(1).rotateClockwise(90)) # add page 3 from input1, rotated the other way: output.addPage(input1.getPage(2).rotateCounterClockwise(90)) # alt: output.addPage(input1.getPage(2).rotateClockwise(270)) # add page 4 from input1, but first add a watermark from another PDF: page4 = input1.getPage(3) watermark = PdfFileReader(open("watermark.pdf", "rb")) page4.mergePage(watermark.getPage(0)) output.addPage(page4) # add page 5 from input1, but crop it to half size: page5 = input1.getPage(4) page5.mediaBox.upperRight = (page5.mediaBox.getUpperRight_x() / 2, page5.mediaBox.getUpperRight_y() / 2) output.addPage(page5) # add some Javascript to launch the print window on opening this PDF.