def encrypt_file(self):
        path = self.file_selector.getpath()
        if not path:
            messagebox.showerror(MESSAGE_TITLE, "You must select a PDF file.")
            return

        if not self.password.get():
            messagebox.showerror(MESSAGE_TITLE, "You must enter a password.")
            return

        pdf_reader = PdfFileReader(path)
        if pdf_reader.isEncrypted:
            messagebox.showwarning(MESSAGE_TITLE, "File is already encrypted.")
            return

        pdf_writer = PdfFileWriter()

        for page in range(pdf_reader.getNumPages()):
            pdf_writer.addPage(pdf_reader.getPage(page))

        pdf_writer.encrypt(self.password.get())

        save_path = save_as_pdf(parent=self)
        if not save_path:
            messagebox.showerror(MESSAGE_TITLE,
                                 "You must specify a file save path")

        if save_path[-4:].lower() != ".pdf":
            save_path += ".pdf"

        with Path(save_path).open(mode="wb") as save_file:
            pdf_writer.write(save_file)

        messagebox.showinfo(MESSAGE_TITLE, "PDF encrypted.")
Beispiel #2
0
def rotate(input, output, pages, verbosity, rotate, **kwargs):
    """rotate selected pages

Rotate selected pages and outputs in new pdf
"""
    source = PdfFileReader(input)

    angle = {'left': -90, 'right': 90, 'inverted': 180}[rotate]
    if pages is None:
        pages = range(1, source.numPages)

    selection = []
    for page_num in range(1, source.getNumPages()):
        real_page = page_num - 1
        if verbosity >= 1:
            click.echo(".", nl=False)
        if verbosity >= 2:
            click.echo("Extracting page %s" % page_num)
        page = source.getPage(real_page)
        if page_num in pages:
            page._rotate(angle)
        selection.append(page)

    output_pdf = PdfFileWriter()
    for page in selection:
        output_pdf.addPage(page)

    if verbosity >= 1:
        click.echo("Writing %s" % output.name)
    output_pdf.write(output)
Beispiel #3
0
def split(input, destination, pages, format, verbosity, **kwargs):
    """split pdf into single page file.

pdfcli split document.pdf --format page-%02d.pd -p 1,10-20

"""
    source = PdfFileReader(input)
    if pages is None:
        pages = range(1, source.numPages + 1)

    to_dir = Path(destination)
    if not to_dir.exists():
        to_dir.mkdir(parents=True)

    for page_num in pages:
        real_page = page_num - 1
        if verbosity >= 1:
            click.echo("Extracting page %s" % page_num)
        # due to a bug PyPDF4 file need to be reopened
        source = PdfFileReader(input)
        dest_file = (to_dir / Path(format % page_num)).absolute()
        page = source.getPage(real_page)
        output_pdf = PdfFileWriter()
        output_pdf.addPage(page)
        with open(str(dest_file), "wb") as f:
            output_pdf.write(f)
Beispiel #4
0
def join(ctx, inputs, output, verbosity, **kwargs):
    """join multiple pdf together in a single file.

pdfcli join files*.pdf -o joined.pdf

"""
    if not inputs:
        click.echo("No input files")
        ctx.exit(1)

    for input in inputs:
        if not Path(input).exists():
            if verbosity >= 1:
                click.echo("File not found '%s'" % input, err=True)
                ctx.exit(1)

    out = PdfFileWriter()

    for input in inputs:
        source = PdfFileReader(input)
        if verbosity >= 1:
            click.echo("Adding %s" % input)
        for page_num in range(0, source.numPages):
            out.addPage(source.getPage(page_num))

    out.write(output)
    if verbosity >= 1:
        click.echo("Writing %s" % output.name)
Beispiel #5
0
def splitPages(testNameNum, testPath, pageRanges, outputDirs):
    temp = 1
    key = 0
    for bookmark in pageRanges:
        f = open(testPath, 'rb')
        pdf = PdfFileReader(f)
        pdfWriter = PdfFileWriter()

        #add watermark to original cover
        origCover = pdf.getPage(0)
        newCover = addWaterMark(origCover, key)

        #put new cover on the front
        pdfWriter.addPage(newCover)

        for page in range(temp, bookmark):
            pdfWriter.addPage(pdf.getPage(page))

        outputFileName = genSectionFilePath(outputDirs, testNameNum[1],
                                            testNameNum[0], key)
        with open(outputFileName, 'wb') as out:
            pdfWriter.write(out)
        temp = bookmark
        key += 1

    f.close()
Beispiel #6
0
def add_watermark(file_path, file_stage, fileno):
    """把水印添加到pdf中"""

    pdf_input = PdfFileReader(file_path)
    if pdf_input.isEncrypted:
        return
    pdf_info = pdf_input.getDocumentInfo()
    w, h = pdf_input.getPage(0).mediaBox[2:]
    # 页面尺寸转换为毫米
    page = (int(w) * 0.3528, int(h) * 0.3528)

    # 创建水印文件
    #mark = str(int(time.time()))+'.pdf'
    #mark = os.path.join('d:/', mark)

    mark = create_watermark(page, file_stage, fileno)

    # 读入水印pdf文件
    pageNum = pdf_input.getNumPages()
    pdf_output = PdfFileWriter()
    pdf_watermark = PdfFileReader(open(mark, 'rb'), strict=False)
    for i in range(pageNum):
        page = pdf_input.getPage(i)
        page.mergePage(pdf_watermark.getPage(0))
        pdf_output.addPage(page)

    #print('merg 结束...' + str(datetime.now()))

    # 加密码
    pdf_output.encrypt(user_pwd='', owner_pwd='12345', use_128bit=True)
    pdf_output.addMetadata(pdf_info)

    pdf_output.write(open(file_path, 'wb'))
Beispiel #7
0
def unwatermark_pdf(input_file: str, wm_text: str, pages: Tuple = None):
    """
    Removes watermark from the pdf file.
    """
    pdf_reader = PdfFileReader(open(input_file, 'rb'), strict=False)
    pdf_writer = PdfFileWriter()
    for page in range(pdf_reader.getNumPages()):
        # If required for specific pages
        if pages:
            if str(page) not in pages:
                continue
        page = pdf_reader.getPage(page)
        # Get the page content
        content_object = page["/Contents"].getObject()
        content = ContentStream(content_object, pdf_reader)
        # Loop through all the elements page elements
        for operands, operator in content.operations:
            # Checks the TJ operator and replaces the corresponding string operand (Watermark text) with ''
            if operator == b_("Tj"):
                text = operands[0]
                if isinstance(text, str) and text.startswith(wm_text):
                    operands[0] = TextStringObject('')
        page.__setitem__(NameObject('/Contents'), content)
        pdf_writer.addPage(page)
    return True, pdf_reader, pdf_writer
Beispiel #8
0
def split_pages(testnamenum, test_path, page_ranges):
	temp = 1
	key = 0
	for bookmark in page_ranges:
		f = open(test_path, 'rb')
		pdf = PdfFileReader(f)
		pdf_writer = PdfFileWriter()
		
		#add watermark to original cover
		orig_cover = pdf.getPage(0)
		new_cover = add_watermark(orig_cover, key)

		#put new cover on the front
		pdf_writer.addPage(new_cover)

		for page in range(temp, bookmark):
			pdf_writer.addPage(pdf.getPage(page))

		local_filename = generate_section_filepath(CONST_LOCAL, testnamenum, key)
		# output_filename = generate_section_filepath(output_dirs, testnamenum, key)

		with open(local_filename, 'wb') as out:
			pdf_writer.write(out)

		# upload_dropbox(local_filename, output_filename)

		temp = bookmark
		key += 1

	f.close()
Beispiel #9
0
def ohin(obj_pages: List[int], save_path: str, input_path: str, img_path: str, position: List[float], img_size: List[float], pdf_size: str = "A4") -> None:

    overlay_pdf_maker("overlay.pdf", img_path, position, img_size, pdf_size)

    f_overlay = open("overlay.pdf", 'rb')
    overlay = PdfFileReader(f_overlay).getPage(0)

    f_target = open(input_path, 'rb')
    reader = PdfFileReader(f_target)
    num_pages = reader.getNumPages()

    assert num_pages >= max(obj_pages)

    for p in obj_pages:
        page = reader.getPage(p - 1)
        page.mergePage(overlay)

    writer = PdfFileWriter()
    for p in range(num_pages):
        page = reader.getPage(p)
        writer.addPage(page)

    with open(save_path, 'wb') as f:
        writer.write(f)

    f_overlay.close()
    f_target.close()

    os.remove("overlay.pdf")
Beispiel #10
0
def main():
    parser = argparse.ArgumentParser(
        prog='ca6fix',
        description=
        "Fix some disappointmented points in Computer Architecture Quantitative Approach 6th Edition Japanese translation PDF file.",
        usage='ca6fix -i ca6.pdf -o ca6_fixed.pdf',
        add_help=True)
    parser.add_argument('-i', '--input', help='input PDF file', required=True)
    parser.add_argument('-o',
                        '--output',
                        help='output PDF file',
                        required=True)
    args = parser.parse_args()

    reader = PdfFileReader(args.input)
    writer = PdfFileWriter()
    for p in range(reader.getNumPages()):
        page = reader.getPage(p)
        writer.addPage(page)

    writer.insertBlankPage(None, None, 4)

    for index in outline:
        add_outline(writer, index, 21)
    writer.setPageLayout('/TwoPageRight')
    writer.addMetadata({
        '/Title':
        'コンピュータアーキテクチャ 定量的アプローチ[第6版]',
        '/Author':
        'ジョン・L・ヘネシー, デイビッド・A・パターソン(著), 中條拓伯, 天野英晴, 鈴木 貢(訳)'
    })

    with open(args.output, 'wb') as fh:
        writer.write(fh)
Beispiel #11
0
def extract(input, output, pages, verbosity, **kwargs):
    """extract one or multiple pages and build a new document.

pdfcli extract source.pdf -o clear.pdf -p 1,3-5


"""
    source = PdfFileReader(input)

    if pages is None:
        pages = range(1, source.numPages)

    selection = []
    for page_num in pages:
        real_page = page_num - 1
        if verbosity >= 1:
            click.echo(".", nl=False)
        if verbosity >= 2:
            click.echo("Extracting page %s" % page_num)

        selection.append(source.getPage(real_page))

    output_pdf = PdfFileWriter()
    for page in selection:
        output_pdf.addPage(page)

    if verbosity >= 1:
        click.echo("Writing %s" % output.name)
    output_pdf.write(output)
def remove_watermark(wm_text, inputFile, outputFile):
    from PyPDF4 import PdfFileReader, PdfFileWriter
    from PyPDF4.pdf import ContentStream
    from PyPDF4.generic import TextStringObject, NameObject
    from PyPDF4.utils import b_

    with open(inputFile, "rb") as f:
        source = PdfFileReader(f, "rb")
        output = PdfFileWriter()

        for page in range(source.getNumPages()):
            page = source.getPage(page)
            content_object = page["/Contents"].getObject()
            content = ContentStream(content_object, source)

            for operands, operator in content.operations:
                if operator == b_("Tj"):
                    text = operands[0]

                    if isinstance(text, str) and text.startswith(wm_text):
                        operands[0] = TextStringObject('')

            page.__setitem__(NameObject('/Contents'), content)
            output.addPage(page)

        with open(outputFile, "wb") as outputStream:
            output.write(outputStream)
Beispiel #13
0
    def splitPdf(path='./input.pdf', N=5):
        if not os.path.isfile(path):
            return

        pdfFileWriter = PdfFileWriter()
        pdfFileReader = PdfFileReader(path)  # 获取 PdfFileReader 对象
        # 或者这个方式:pdfFileReader = PdfFileReader(open(readFile, 'rb'))

        numPages = pdfFileReader.getNumPages()  # 文档总页数
        print(numPages)

        # fname = os.path.splitext(os.path.basename(path))[0]
        fname = os.path.splitext(path)[0]
        outFile1 = '{}p1_{}.pdf'.format(fname, N)
        outFile2 = '{}p{}_{}.pdf'.format(fname, N + 1, numPages)
        print(outFile1, outFile2)

        if numPages > N:
            # 第N页之前的页面,输出到一个新的文件中,即分割文档
            for index in range(N - 1):
                pageObj = pdfFileReader.getPage(index)
                pdfFileWriter.addPage(pageObj)
            # 添加完每页,再一起保存至文件中
            pdfFileWriter.write(open(outFile1, 'wb'))

            # 从第N页之后的页面,输出到一个新的文件中,即分割文档
            for index in range(N, numPages):
                pageObj = pdfFileReader.getPage(index)
                pdfFileWriter.addPage(pageObj)
            # 添加完每页,再一起保存至文件中
            pdfFileWriter.write(open(outFile2, 'wb'))
    def _merge_documents_PyPDF4(self, file_name, paths):
        """ Merge documents. """
        output = settings.SAVE_PATH / file_name
        try:
            pdf_writer = PdfFileWriter()

            for file_path in paths:
                if file_path:
                    pdf_reader = PdfFileReader(str(file_path), strict=False)

                    for page in range(pdf_reader.getNumPages()):
                        # Add each page to the writer object
                        pdf_writer.addPage(pdf_reader.getPage(page))

            # Write out the merged PDF
            output = settings.SAVE_PATH / file_name
            with open(output, 'wb') as out:
                pdf_writer.write(out)

            return output
        except utils.PdfReadError as error:
            LogHandler.execution_log(error=error)
            LogHandler.execution_log(
                error=f'ERROR ON: {output.name.replace(".PDF", "")}')

            return output
Beispiel #15
0
    def merge(self):
        save_path = save_as_pdf()
        if not save_path:
            messagebox.showerror(MESSAGE_TITLE, "You must specify a file save path.")
            return

        if save_path[-4:].lower() != ".pdf":
            save_path += ".pdf"

        pdf_writer = PdfFileWriter()

        for item in self.tree.get_children():
            item_values = self.tree.item(item, option="values")
            path = item_values[1]
            pdf_reader = PdfFileReader(path)
            if pdf_reader.isEncrypted and not decrypt(pdf_reader, MESSAGE_TITLE):
                messagebox.showwarning(MESSAGE_TITLE, f"{item_values[0]} could not be decrypted. It will not be "
                                                      f"included in the merge.")
                continue

            for page in range(pdf_reader.getNumPages()):
                pdf_writer.addPage(pdf_reader.getPage(page))

        with Path(save_path).open(mode="wb") as save_file:
            pdf_writer.write(save_file)

        messagebox.showinfo(MESSAGE_TITLE, "PDF Merged")
Beispiel #16
0
    def rotate_pdf(self, path: str, page_num: str, rotate_type: str,
                   outpdf: str):
        """
        旋转pdf页面
        path; 需要处理的pdf文件路径
        page_num: 页面编号
        rotate_type: 0或1,为顺时针或逆时针旋转
        outpdf: 输出pdf名称,不包含路径
        """
        pdf_writer = PdfFileWriter()
        pdf_reader = PdfFileReader(path)

        # 顺时针旋转90°
        if rotate_type == "0":
            page_1 = pdf_reader.getPage(int(page_num)).rotateClockwise(90)
            pdf_writer.addPage(page_1)

        elif rotate_type == "1":
            # 逆时针旋转90°
            page_2 = pdf_reader.getPage(
                int(page_num)).rotateCounterClockwise(90)
            pdf_writer.addPage(page_2)

        else:
            return "输入错误,请重新输入!"

        with open(self.processed + outpdf, "wb") as f:
            pdf_writer.write(f)

        # print("旋转页面完成!")
        return "旋转页面完成!"
Beispiel #17
0
def put_watermark(input_pdf, output_pdf, watermark):
    # reads the watermark pdf file through
    # PdfFileReader
    watermark_instance = PdfFileReader(watermark)

    # fetches the respective page of
    # watermark(1st page)
    watermark_page = watermark_instance.getPage(0)

    # reads the input pdf file
    pdf_reader = PdfFileReader(input_pdf)

    # It creates a pdf writer object for the
    # output file
    pdf_writer = PdfFileWriter()

    # iterates through the original pdf to
    # merge watermarks
    for page in range(pdf_reader.getNumPages()):
        page = pdf_reader.getPage(page)

        # will overlay the watermark_page on top
        # of the current page.
        page.mergePage(watermark_page)

        # add that newly merged page to the
        # pdf_writer object.
        pdf_writer.addPage(page)

    with open(output_pdf, 'wb') as out:
        # writes to the respective output_pdf provided
        pdf_writer.write(out)
Beispiel #18
0
def readWritePdf():
    with open("./Python 面试题.pdf", "rb") as f:
        pdfReader = PdfFileReader(f)
        pdfWriter = PdfFileWriter()
        page = pdfReader.getPage(0)
        pdfWriter.addPage(page)
        with open("./new.pdf", "wb") as f1:
            pdfWriter.write(f1)
Beispiel #19
0
def rotate_pdf(path, degrees, output):
    pdf_writer = PdfFileWriter()
    pdf_reader = PdfFileReader(path)

    for page in range(pdf_reader.getNumPages()):
        original = pdf_reader.getPage(page)
        pdf_writer.addPage(original.rotateClockwise(int(degrees)))
    pdf_writer.write(output)
Beispiel #20
0
def merge(pdf_input, mark):    
    pageNum = pdf_input.getNumPages()
    pdf_output = PdfFileWriter()
    pdf_watermark = PdfFileReader(open(mark, 'rb'), strict=False)
    for i in range(pageNum):
        page = pdf_input.getPage(i)
        page.mergePage(pdf_watermark.getPage(0))        
        pdf_output.addPage(page)    
    return pdf_output
Beispiel #21
0
def join_pdfs(paths, output):
    pdf_writer = PdfFileWriter()

    for path in paths:
        pdf_reader = PdfFileReader(path)
        for page in range(pdf_reader.getNumPages()):
            pdf_writer.addPage(pdf_reader.getPage(page))

        pdf_writer.write(output)
Beispiel #22
0
def mergePdfFiles(paths, outputPath):
    pdf_writer = PdfFileWriter()

    for path in paths:
        pdf_reader = PdfFileReader(path)
        for page in range(pdf_reader.getNumPages()):
            pdf_writer.addPage(pdf_reader.getPage(page))

    with open(outputPath, 'wb') as out:
        pdf_writer.write(out)
Beispiel #23
0
def extractPdfPage(path, pageIndex, extractedPdfName):
    pdf = PdfFileReader(path)
    for page in range(pdf.getNumPages()):
        if page != pageIndex:
            continue
        pdf_writer = PdfFileWriter()
        pdf_writer.addPage(pdf.getPage(page))

        with open(extractedPdfName, 'wb') as output_pdf:
            pdf_writer.write(output_pdf)
Beispiel #24
0
def split_pdf_by_page(input_pdf_filepath, output_pdf_dir):
    pdf_reader = getPdfReader(input_pdf_filepath)
    # 获取pdf页数
    page_count = pdf_reader.getNumPages()
    # 获取pdf第n页的内容
    for page_num in range(page_count):
        writer = PdfFileWriter()
        writer.addPage(pdf_reader.getPage(page_num))
        #temp = NamedTemporaryFile(prefix=str(page_num), suffix=".pdf", delete=False)
        tempname = '{}/{}.pdf'.format(output_pdf_dir, page_num)
        writer.write(open(tempname, 'wb'))
Beispiel #25
0
def add_encryption(input_pdf, output_pdf, password):
    pdf_writer = PdfFileWriter()
    pdf_reader = PdfFileReader(input_pdf)

    for page in range(pdf_reader.getNumPages()):
        pdf_writer.addPage(pdf_reader.getPage(page))

    pdf_writer.encrypt(user_pwd=password, owner_pwd=None, use_128bit=True)

    with open(output_pdf, 'wb') as out:
        pdf_writer.write(out)
Beispiel #26
0
def _insert_page_to_pdf(original: BinaryIO, page: PageObject,
                        page_index: int) -> PdfFileWriter:
    input = PdfFileReader(original)
    output = PdfFileWriter()
    for i in range(input.getNumPages()):
        if i != page_index:
            p = input.getPage(i)
            output.addPage(p)
        else:
            output.addPage(page)
    return output
Beispiel #27
0
def merge_pdfs(paths, output):
    pdf_writer = PdfFileWriter()

    for path in paths:
        pdf_reader = PdfFileReader(path)
        for page in range(pdf_reader.getNumPages()):
            # Add each page to the writer object
            pdf_writer.addPage(pdf_reader.getPage(page))
    # Write out the merged PDF
    with open(output, 'wb') as out:
        pdf_writer.write(out)
def split_to_single_pages(path, name_of_split):
    """ Splits one document into single pages
    """
    pdf = PdfFileReader(path)
    for page in range(pdf.getNumPages()):
        pdf_writer = PdfFileWriter()
        pdf_writer.addPage(pdf.getPage(page))

        output = f'{name_of_split}{page}.pdf'
        with open(output, 'wb') as output_pdf:
            pdf_writer.write(output_pdf)
Beispiel #29
0
def split_pdf(input_path, page_ranges, output_name):
    for page_range in page_ranges:
        input_pdf = PdfFileReader(input_path)
        pdf_writer = PdfFileWriter()
        start, stop = format_range(page_range)
        for page in range(start, stop):
            pdf_writer.addPage(input_pdf.getPage(page))

        output = f"{output_name}_p{page_range}.pdf"
        with open(output, 'wb') as output_pdf:
            pdf_writer.write(output_pdf)
def remove_page(pdf_path, page_numbers):
    pdf_writer = PdfFileWriter()
    pdf_reader = PdfFileReader(pdf_path, 'rb')
    i = 0
    for page in [
            pdf_reader.getPage(i) for i in range(pdf_reader.getNumPages())
    ]:
        if i in page_numbers:
            pdf_writer.addPage(page)
        i += 1
    with open('Resultant_PDF_After_Extraction.pdf', 'wb') as fh:
        pdf_writer.write(fh)