def encrypt_file(self): path = self.file_selector.getpath() if not path: messagebox.showerror(MESSAGE_TITLE, "You must select a PDF file.") return if not self.password.get(): messagebox.showerror(MESSAGE_TITLE, "You must enter a password.") return pdf_reader = PdfFileReader(path) if pdf_reader.isEncrypted: messagebox.showwarning(MESSAGE_TITLE, "File is already encrypted.") return pdf_writer = PdfFileWriter() for page in range(pdf_reader.getNumPages()): pdf_writer.addPage(pdf_reader.getPage(page)) pdf_writer.encrypt(self.password.get()) save_path = save_as_pdf(parent=self) if not save_path: messagebox.showerror(MESSAGE_TITLE, "You must specify a file save path") if save_path[-4:].lower() != ".pdf": save_path += ".pdf" with Path(save_path).open(mode="wb") as save_file: pdf_writer.write(save_file) messagebox.showinfo(MESSAGE_TITLE, "PDF encrypted.")
def rotate(input, output, pages, verbosity, rotate, **kwargs): """rotate selected pages Rotate selected pages and outputs in new pdf """ source = PdfFileReader(input) angle = {'left': -90, 'right': 90, 'inverted': 180}[rotate] if pages is None: pages = range(1, source.numPages) selection = [] for page_num in range(1, source.getNumPages()): real_page = page_num - 1 if verbosity >= 1: click.echo(".", nl=False) if verbosity >= 2: click.echo("Extracting page %s" % page_num) page = source.getPage(real_page) if page_num in pages: page._rotate(angle) selection.append(page) output_pdf = PdfFileWriter() for page in selection: output_pdf.addPage(page) if verbosity >= 1: click.echo("Writing %s" % output.name) output_pdf.write(output)
def split(input, destination, pages, format, verbosity, **kwargs): """split pdf into single page file. pdfcli split document.pdf --format page-%02d.pd -p 1,10-20 """ source = PdfFileReader(input) if pages is None: pages = range(1, source.numPages + 1) to_dir = Path(destination) if not to_dir.exists(): to_dir.mkdir(parents=True) for page_num in pages: real_page = page_num - 1 if verbosity >= 1: click.echo("Extracting page %s" % page_num) # due to a bug PyPDF4 file need to be reopened source = PdfFileReader(input) dest_file = (to_dir / Path(format % page_num)).absolute() page = source.getPage(real_page) output_pdf = PdfFileWriter() output_pdf.addPage(page) with open(str(dest_file), "wb") as f: output_pdf.write(f)
def join(ctx, inputs, output, verbosity, **kwargs): """join multiple pdf together in a single file. pdfcli join files*.pdf -o joined.pdf """ if not inputs: click.echo("No input files") ctx.exit(1) for input in inputs: if not Path(input).exists(): if verbosity >= 1: click.echo("File not found '%s'" % input, err=True) ctx.exit(1) out = PdfFileWriter() for input in inputs: source = PdfFileReader(input) if verbosity >= 1: click.echo("Adding %s" % input) for page_num in range(0, source.numPages): out.addPage(source.getPage(page_num)) out.write(output) if verbosity >= 1: click.echo("Writing %s" % output.name)
def splitPages(testNameNum, testPath, pageRanges, outputDirs): temp = 1 key = 0 for bookmark in pageRanges: f = open(testPath, 'rb') pdf = PdfFileReader(f) pdfWriter = PdfFileWriter() #add watermark to original cover origCover = pdf.getPage(0) newCover = addWaterMark(origCover, key) #put new cover on the front pdfWriter.addPage(newCover) for page in range(temp, bookmark): pdfWriter.addPage(pdf.getPage(page)) outputFileName = genSectionFilePath(outputDirs, testNameNum[1], testNameNum[0], key) with open(outputFileName, 'wb') as out: pdfWriter.write(out) temp = bookmark key += 1 f.close()
def add_watermark(file_path, file_stage, fileno): """把水印添加到pdf中""" pdf_input = PdfFileReader(file_path) if pdf_input.isEncrypted: return pdf_info = pdf_input.getDocumentInfo() w, h = pdf_input.getPage(0).mediaBox[2:] # 页面尺寸转换为毫米 page = (int(w) * 0.3528, int(h) * 0.3528) # 创建水印文件 #mark = str(int(time.time()))+'.pdf' #mark = os.path.join('d:/', mark) mark = create_watermark(page, file_stage, fileno) # 读入水印pdf文件 pageNum = pdf_input.getNumPages() pdf_output = PdfFileWriter() pdf_watermark = PdfFileReader(open(mark, 'rb'), strict=False) for i in range(pageNum): page = pdf_input.getPage(i) page.mergePage(pdf_watermark.getPage(0)) pdf_output.addPage(page) #print('merg 结束...' + str(datetime.now())) # 加密码 pdf_output.encrypt(user_pwd='', owner_pwd='12345', use_128bit=True) pdf_output.addMetadata(pdf_info) pdf_output.write(open(file_path, 'wb'))
def unwatermark_pdf(input_file: str, wm_text: str, pages: Tuple = None): """ Removes watermark from the pdf file. """ pdf_reader = PdfFileReader(open(input_file, 'rb'), strict=False) pdf_writer = PdfFileWriter() for page in range(pdf_reader.getNumPages()): # If required for specific pages if pages: if str(page) not in pages: continue page = pdf_reader.getPage(page) # Get the page content content_object = page["/Contents"].getObject() content = ContentStream(content_object, pdf_reader) # Loop through all the elements page elements for operands, operator in content.operations: # Checks the TJ operator and replaces the corresponding string operand (Watermark text) with '' if operator == b_("Tj"): text = operands[0] if isinstance(text, str) and text.startswith(wm_text): operands[0] = TextStringObject('') page.__setitem__(NameObject('/Contents'), content) pdf_writer.addPage(page) return True, pdf_reader, pdf_writer
def split_pages(testnamenum, test_path, page_ranges): temp = 1 key = 0 for bookmark in page_ranges: f = open(test_path, 'rb') pdf = PdfFileReader(f) pdf_writer = PdfFileWriter() #add watermark to original cover orig_cover = pdf.getPage(0) new_cover = add_watermark(orig_cover, key) #put new cover on the front pdf_writer.addPage(new_cover) for page in range(temp, bookmark): pdf_writer.addPage(pdf.getPage(page)) local_filename = generate_section_filepath(CONST_LOCAL, testnamenum, key) # output_filename = generate_section_filepath(output_dirs, testnamenum, key) with open(local_filename, 'wb') as out: pdf_writer.write(out) # upload_dropbox(local_filename, output_filename) temp = bookmark key += 1 f.close()
def ohin(obj_pages: List[int], save_path: str, input_path: str, img_path: str, position: List[float], img_size: List[float], pdf_size: str = "A4") -> None: overlay_pdf_maker("overlay.pdf", img_path, position, img_size, pdf_size) f_overlay = open("overlay.pdf", 'rb') overlay = PdfFileReader(f_overlay).getPage(0) f_target = open(input_path, 'rb') reader = PdfFileReader(f_target) num_pages = reader.getNumPages() assert num_pages >= max(obj_pages) for p in obj_pages: page = reader.getPage(p - 1) page.mergePage(overlay) writer = PdfFileWriter() for p in range(num_pages): page = reader.getPage(p) writer.addPage(page) with open(save_path, 'wb') as f: writer.write(f) f_overlay.close() f_target.close() os.remove("overlay.pdf")
def main(): parser = argparse.ArgumentParser( prog='ca6fix', description= "Fix some disappointmented points in Computer Architecture Quantitative Approach 6th Edition Japanese translation PDF file.", usage='ca6fix -i ca6.pdf -o ca6_fixed.pdf', add_help=True) parser.add_argument('-i', '--input', help='input PDF file', required=True) parser.add_argument('-o', '--output', help='output PDF file', required=True) args = parser.parse_args() reader = PdfFileReader(args.input) writer = PdfFileWriter() for p in range(reader.getNumPages()): page = reader.getPage(p) writer.addPage(page) writer.insertBlankPage(None, None, 4) for index in outline: add_outline(writer, index, 21) writer.setPageLayout('/TwoPageRight') writer.addMetadata({ '/Title': 'コンピュータアーキテクチャ 定量的アプローチ[第6版]', '/Author': 'ジョン・L・ヘネシー, デイビッド・A・パターソン(著), 中條拓伯, 天野英晴, 鈴木 貢(訳)' }) with open(args.output, 'wb') as fh: writer.write(fh)
def extract(input, output, pages, verbosity, **kwargs): """extract one or multiple pages and build a new document. pdfcli extract source.pdf -o clear.pdf -p 1,3-5 """ source = PdfFileReader(input) if pages is None: pages = range(1, source.numPages) selection = [] for page_num in pages: real_page = page_num - 1 if verbosity >= 1: click.echo(".", nl=False) if verbosity >= 2: click.echo("Extracting page %s" % page_num) selection.append(source.getPage(real_page)) output_pdf = PdfFileWriter() for page in selection: output_pdf.addPage(page) if verbosity >= 1: click.echo("Writing %s" % output.name) output_pdf.write(output)
def remove_watermark(wm_text, inputFile, outputFile): from PyPDF4 import PdfFileReader, PdfFileWriter from PyPDF4.pdf import ContentStream from PyPDF4.generic import TextStringObject, NameObject from PyPDF4.utils import b_ with open(inputFile, "rb") as f: source = PdfFileReader(f, "rb") output = PdfFileWriter() for page in range(source.getNumPages()): page = source.getPage(page) content_object = page["/Contents"].getObject() content = ContentStream(content_object, source) for operands, operator in content.operations: if operator == b_("Tj"): text = operands[0] if isinstance(text, str) and text.startswith(wm_text): operands[0] = TextStringObject('') page.__setitem__(NameObject('/Contents'), content) output.addPage(page) with open(outputFile, "wb") as outputStream: output.write(outputStream)
def splitPdf(path='./input.pdf', N=5): if not os.path.isfile(path): return pdfFileWriter = PdfFileWriter() pdfFileReader = PdfFileReader(path) # 获取 PdfFileReader 对象 # 或者这个方式:pdfFileReader = PdfFileReader(open(readFile, 'rb')) numPages = pdfFileReader.getNumPages() # 文档总页数 print(numPages) # fname = os.path.splitext(os.path.basename(path))[0] fname = os.path.splitext(path)[0] outFile1 = '{}p1_{}.pdf'.format(fname, N) outFile2 = '{}p{}_{}.pdf'.format(fname, N + 1, numPages) print(outFile1, outFile2) if numPages > N: # 第N页之前的页面,输出到一个新的文件中,即分割文档 for index in range(N - 1): pageObj = pdfFileReader.getPage(index) pdfFileWriter.addPage(pageObj) # 添加完每页,再一起保存至文件中 pdfFileWriter.write(open(outFile1, 'wb')) # 从第N页之后的页面,输出到一个新的文件中,即分割文档 for index in range(N, numPages): pageObj = pdfFileReader.getPage(index) pdfFileWriter.addPage(pageObj) # 添加完每页,再一起保存至文件中 pdfFileWriter.write(open(outFile2, 'wb'))
def _merge_documents_PyPDF4(self, file_name, paths): """ Merge documents. """ output = settings.SAVE_PATH / file_name try: pdf_writer = PdfFileWriter() for file_path in paths: if file_path: pdf_reader = PdfFileReader(str(file_path), strict=False) for page in range(pdf_reader.getNumPages()): # Add each page to the writer object pdf_writer.addPage(pdf_reader.getPage(page)) # Write out the merged PDF output = settings.SAVE_PATH / file_name with open(output, 'wb') as out: pdf_writer.write(out) return output except utils.PdfReadError as error: LogHandler.execution_log(error=error) LogHandler.execution_log( error=f'ERROR ON: {output.name.replace(".PDF", "")}') return output
def merge(self): save_path = save_as_pdf() if not save_path: messagebox.showerror(MESSAGE_TITLE, "You must specify a file save path.") return if save_path[-4:].lower() != ".pdf": save_path += ".pdf" pdf_writer = PdfFileWriter() for item in self.tree.get_children(): item_values = self.tree.item(item, option="values") path = item_values[1] pdf_reader = PdfFileReader(path) if pdf_reader.isEncrypted and not decrypt(pdf_reader, MESSAGE_TITLE): messagebox.showwarning(MESSAGE_TITLE, f"{item_values[0]} could not be decrypted. It will not be " f"included in the merge.") continue for page in range(pdf_reader.getNumPages()): pdf_writer.addPage(pdf_reader.getPage(page)) with Path(save_path).open(mode="wb") as save_file: pdf_writer.write(save_file) messagebox.showinfo(MESSAGE_TITLE, "PDF Merged")
def rotate_pdf(self, path: str, page_num: str, rotate_type: str, outpdf: str): """ 旋转pdf页面 path; 需要处理的pdf文件路径 page_num: 页面编号 rotate_type: 0或1,为顺时针或逆时针旋转 outpdf: 输出pdf名称,不包含路径 """ pdf_writer = PdfFileWriter() pdf_reader = PdfFileReader(path) # 顺时针旋转90° if rotate_type == "0": page_1 = pdf_reader.getPage(int(page_num)).rotateClockwise(90) pdf_writer.addPage(page_1) elif rotate_type == "1": # 逆时针旋转90° page_2 = pdf_reader.getPage( int(page_num)).rotateCounterClockwise(90) pdf_writer.addPage(page_2) else: return "输入错误,请重新输入!" with open(self.processed + outpdf, "wb") as f: pdf_writer.write(f) # print("旋转页面完成!") return "旋转页面完成!"
def put_watermark(input_pdf, output_pdf, watermark): # reads the watermark pdf file through # PdfFileReader watermark_instance = PdfFileReader(watermark) # fetches the respective page of # watermark(1st page) watermark_page = watermark_instance.getPage(0) # reads the input pdf file pdf_reader = PdfFileReader(input_pdf) # It creates a pdf writer object for the # output file pdf_writer = PdfFileWriter() # iterates through the original pdf to # merge watermarks for page in range(pdf_reader.getNumPages()): page = pdf_reader.getPage(page) # will overlay the watermark_page on top # of the current page. page.mergePage(watermark_page) # add that newly merged page to the # pdf_writer object. pdf_writer.addPage(page) with open(output_pdf, 'wb') as out: # writes to the respective output_pdf provided pdf_writer.write(out)
def readWritePdf(): with open("./Python 面试题.pdf", "rb") as f: pdfReader = PdfFileReader(f) pdfWriter = PdfFileWriter() page = pdfReader.getPage(0) pdfWriter.addPage(page) with open("./new.pdf", "wb") as f1: pdfWriter.write(f1)
def rotate_pdf(path, degrees, output): pdf_writer = PdfFileWriter() pdf_reader = PdfFileReader(path) for page in range(pdf_reader.getNumPages()): original = pdf_reader.getPage(page) pdf_writer.addPage(original.rotateClockwise(int(degrees))) pdf_writer.write(output)
def merge(pdf_input, mark): pageNum = pdf_input.getNumPages() pdf_output = PdfFileWriter() pdf_watermark = PdfFileReader(open(mark, 'rb'), strict=False) for i in range(pageNum): page = pdf_input.getPage(i) page.mergePage(pdf_watermark.getPage(0)) pdf_output.addPage(page) return pdf_output
def join_pdfs(paths, output): pdf_writer = PdfFileWriter() for path in paths: pdf_reader = PdfFileReader(path) for page in range(pdf_reader.getNumPages()): pdf_writer.addPage(pdf_reader.getPage(page)) pdf_writer.write(output)
def mergePdfFiles(paths, outputPath): pdf_writer = PdfFileWriter() for path in paths: pdf_reader = PdfFileReader(path) for page in range(pdf_reader.getNumPages()): pdf_writer.addPage(pdf_reader.getPage(page)) with open(outputPath, 'wb') as out: pdf_writer.write(out)
def extractPdfPage(path, pageIndex, extractedPdfName): pdf = PdfFileReader(path) for page in range(pdf.getNumPages()): if page != pageIndex: continue pdf_writer = PdfFileWriter() pdf_writer.addPage(pdf.getPage(page)) with open(extractedPdfName, 'wb') as output_pdf: pdf_writer.write(output_pdf)
def split_pdf_by_page(input_pdf_filepath, output_pdf_dir): pdf_reader = getPdfReader(input_pdf_filepath) # 获取pdf页数 page_count = pdf_reader.getNumPages() # 获取pdf第n页的内容 for page_num in range(page_count): writer = PdfFileWriter() writer.addPage(pdf_reader.getPage(page_num)) #temp = NamedTemporaryFile(prefix=str(page_num), suffix=".pdf", delete=False) tempname = '{}/{}.pdf'.format(output_pdf_dir, page_num) writer.write(open(tempname, 'wb'))
def add_encryption(input_pdf, output_pdf, password): pdf_writer = PdfFileWriter() pdf_reader = PdfFileReader(input_pdf) for page in range(pdf_reader.getNumPages()): pdf_writer.addPage(pdf_reader.getPage(page)) pdf_writer.encrypt(user_pwd=password, owner_pwd=None, use_128bit=True) with open(output_pdf, 'wb') as out: pdf_writer.write(out)
def _insert_page_to_pdf(original: BinaryIO, page: PageObject, page_index: int) -> PdfFileWriter: input = PdfFileReader(original) output = PdfFileWriter() for i in range(input.getNumPages()): if i != page_index: p = input.getPage(i) output.addPage(p) else: output.addPage(page) return output
def merge_pdfs(paths, output): pdf_writer = PdfFileWriter() for path in paths: pdf_reader = PdfFileReader(path) for page in range(pdf_reader.getNumPages()): # Add each page to the writer object pdf_writer.addPage(pdf_reader.getPage(page)) # Write out the merged PDF with open(output, 'wb') as out: pdf_writer.write(out)
def split_to_single_pages(path, name_of_split): """ Splits one document into single pages """ pdf = PdfFileReader(path) for page in range(pdf.getNumPages()): pdf_writer = PdfFileWriter() pdf_writer.addPage(pdf.getPage(page)) output = f'{name_of_split}{page}.pdf' with open(output, 'wb') as output_pdf: pdf_writer.write(output_pdf)
def split_pdf(input_path, page_ranges, output_name): for page_range in page_ranges: input_pdf = PdfFileReader(input_path) pdf_writer = PdfFileWriter() start, stop = format_range(page_range) for page in range(start, stop): pdf_writer.addPage(input_pdf.getPage(page)) output = f"{output_name}_p{page_range}.pdf" with open(output, 'wb') as output_pdf: pdf_writer.write(output_pdf)
def remove_page(pdf_path, page_numbers): pdf_writer = PdfFileWriter() pdf_reader = PdfFileReader(pdf_path, 'rb') i = 0 for page in [ pdf_reader.getPage(i) for i in range(pdf_reader.getNumPages()) ]: if i in page_numbers: pdf_writer.addPage(page) i += 1 with open('Resultant_PDF_After_Extraction.pdf', 'wb') as fh: pdf_writer.write(fh)