Beispiel #1
0
    def encrypt(self, decrypt=None):
        # Create PDF writer object
        pdf_writer = PdfFileWriter()
        with open(self.pdf, 'rb') as pdf_file:
            # Read opened PDF file
            pdf_reader = pypdf3_reader(pdf_file, decrypt)

            # Add each page from source PDF
            for page_num in range(pdf_reader.numPages):
                page = pdf_reader.getPage(page_num)
                pdf_writer.addPage(page)

            # Apply encryption to writer object
            pdf_writer.encrypt(self.user_pw,
                               self.owner_pw,
                               use_128bit=self.encrypt_128,
                               allow_printing=self.allow_printing,
                               allow_commenting=self.allow_commenting,
                               overwrite_permission=self.overwrite_permission)

            # todo: add metadata adding functionality
            pdf_writer.addMetadata({
                '/Producer': 'pdfconduit',
                '/Creator': 'HPA Design',
                '/Author': 'HPA Design',
            })

            # Write encrypted PDF to file
            with open(self.output, 'wb') as output_pdf:
                pdf_writer.write(
                    output_pdf,
                    progress_bar=self.progress_bar,
                    progress_bar_enabled=self.progress_bar_enabled)
        return self.output
Beispiel #2
0
def pdf_metadata_load(pdf_file):
    with open(pdf_file, 'rb') as fin:
        pdf_in = PdfFileReader(fin)
        writer = PdfFileWriter()

        for page in range(pdf_in.getNumPages()):
            writer.addPage(pdf_in.getPage(page))

        infoDict = writer._info.getObject()

    return pdf_in.documentInfo
    def start_Encryption(self):
        global filename

        try:

            if self.filename[0] and self.userPassword.text(
            ) and self.ownerPassword.text():

                pfw = PdfFileWriter()
                pdffile = PdfFileReader(self.filename[0])

                total_pages = pdffile.numPages

                for page in range(total_pages):
                    current_page = pdffile.getPage(page)
                    pfw.addPage(current_page)

                pfw.encrypt(self.userPassword.text(),
                            self.ownerPassword.text())

                file = open(self.filename[0].replace('.pdf', '_encrypted.pdf'),
                            'wb')
                pfw.write(file)
                file.close()

                msg = QMessageBox()
                msg.setWindowTitle('Done')
                msg.setIcon(QMessageBox.Information)
                msg.setText('File encryption done successfully.')
                msg.exec_()

                self.filename = ''

                self.userPassword.setText('')
                self.ownerPassword.setText('')
                self.userPassword.setDisabled(True)
                self.ownerPassword.setDisabled(True)
                self.startEncryption.setDisabled(True)

            else:
                if self.ownerPassword.text() == '':
                    msg = QMessageBox()
                    msg.setWindowTitle('Error')
                    msg.setIcon(QMessageBox.Critical)
                    msg.setText('Owner Password Field is Empty.')
                    msg.exec_()

                if self.userPassword.text() == '':
                    msg = QMessageBox()
                    msg.setWindowTitle('Error')
                    msg.setIcon(QMessageBox.Critical)
                    msg.setText('User Password Field is Empty.')
                    msg.exec_()
        except:
            pass
Beispiel #4
0
def PDFMerge(savePath, pdfPath, watermarkPdfPath):
    # pdf파일 불러오기
    pdfFile = open(pdfPath, 'rb')
    pdfReader = PdfFileReader(pdfFile, strict=False)

    # 워터마크 PDF파일 불러오기
    watermarkPdfFile = open(watermarkPdfPath, 'rb')
    watermarkPdf = PdfFileReader(watermarkPdfFile, strict=False).getPage(0)

    pdfWriter = PdfFileWriter()

    #PDF 페이지 수만큼 반복
    for pageNum in range(pdfReader.numPages):

        #페이지를 불러온다
        pageObj = pdfReader.getPage(pageNum)

        #중앙으로 놓기 위해 좌표를 구한다
        x = (pageObj.mediaBox[2] - watermarkPdf.mediaBox[2]) / 2
        y = (pageObj.mediaBox[3] - watermarkPdf.mediaBox[3]) / 2

        # 워터마크페이지와 합친다
        pageObj.mergeTranslatedPage(page2=watermarkPdf,
                                    tx=x,
                                    ty=y,
                                    expand=False)

        #합친걸 저장할 PDF파일에 추가한다
        pdfWriter.addPage(pageObj)

    #저장
    resultFile = open(savePath, 'wb')
    pdfWriter.write(resultFile)
    def split(file):
        """
        Esse método irá separar página por página do arquivo que o usuário escolher e as salvar no 'output directory'
        como novos arquivos pdf. Cada arquivo corresponderá à uma página do documento original.
        :param file: O arquivo escolhido pelo usuário para fazer a separação das páginas
        :return: None
        """
        # Limpando o diretório para evitar duplicidade em arquivos/diretórios
        Splitter.cleanDir()

        # Tratando o nome do arquivo
        file = Merger.toPath(file)

        # Lógica para separação das páginas dos arquivos PDF's e nova nomeclatura para os mesmos
        with open(file, mode='rb') as pdf_file_to_read:
            file_length = PdfFileReader(pdf_file_to_read).numPages

            for page in range(file_length):
                pdf_file = PdfFileReader(pdf_file_to_read)
                current_page = PdfFileWriter()
                current_page.addPage(pdf_file.getPage(page))
                with open(join(Splitter.splitter_dir,
                               f"página_{page + 1}.pdf"),
                          mode='wb') as pdf:
                    current_page.write(pdf)
Beispiel #6
0
def pdf_metadata_save(pdf_file, metadata, substitute_all_metadata = False, make_backup = True):
    if type(make_backup) is str:
        bak_file = make_backup
    else:
        bak_file = os.path.splitext(pdf_file)[0] + ".bak"
    os.rename(pdf_file, bak_file)

    with open(bak_file, 'rb') as fin:
        pdf_in = PdfFileReader(fin)
        writer = PdfFileWriter()

        for page in range(pdf_in.getNumPages()):
            writer.addPage(pdf_in.getPage(page))

        infoDict = writer._info.getObject()

        info = pdf_in.documentInfo
        if not substitute_all_metadata:
            for key in info:
                #infoDict.update({NameObject(key): createStringObject(info[key])})
                infoDict.update({key: info[key]})

        for key in metadata:
            infoDict.update({NameObject('/' + key): createStringObject(str(metadata[key]))})

        with open(pdf_file, 'wb') as fout:
            writer.write(fout)

        if make_backup == False:
            os.unlink(bak_file)
def split_pdf(myfile):
    pdf_in_file = open('/tmp/' + myfile, 'rb')
    inputpdf = PdfFileReader(pdf_in_file)
    pages_no = inputpdf.numPages
    print(pages_no)
    output = PdfFileWriter()
    for i in range(pages_no // 50):
        output.addPage(inputpdf.getPage(i * 50))
        if i * 50 + 1 < inputpdf.numPages:
            output.addPage(inputpdf.getPage(i * 50 + 1))
            print('/tmp/document-page%s.pdf' % i)
        newname = 'document-page%s.pdf' % i
        print(newname)
        with open("/tmp/document-page%s.pdf" % i, "wb") as outputStream:
            output.write(outputStream)
            client.upload_file('/tmp/' + newname, destbucketName,
                               'extracted-pdf/' + newname)
Beispiel #8
0
def merge_pdfs(paths, output):
    pdf_writer = PdfFileWriter()

    for path in paths:
        pdf_reader = PdfFileReader(path)
        for page in range(pdf_reader.getNumPages()):
            # Add each page to the writer object
            pdf_writer.addPage(pdf_reader.getPage(page))

    # Write out the merged PDF
    with open(output, 'wb') as out:
        pdf_writer.write(out)
Beispiel #9
0
    def pypdf3(self):
        with open(self.file_name, 'rb') as pdf_in:
            pdf_writer = PdfFileWriter()
            pdf_reader = PdfFileReader(pdf_in)
            for pagenum in range(pdf_reader.numPages):
                page = pdf_reader.getPage(pagenum)
                page.rotateClockwise(self.rotation)
                pdf_writer.addPage(page)

            with open(self.outfn, 'wb') as pdf_out:
                pdf_writer.write(pdf_out)
        return self.outfn
Beispiel #10
0
def inject_pdf_links(filepath: str,
                     pdf_data: bytes,
                     links: Iterable[Link],
                     size_relative: bool = True) -> None:
    """
    Injects links into a pdf file data
    :param filepath: the output file path for the pdf
    :param pdf_data: the source pdf data as bytes
    :param links: an iterable of `Link` objects
    :param size_relative: if True (default) the coordinates and sizes of the links'
        bounding boxes must be relative [0~1] to the width of the pdf page,
        otherwise their absolute values are used
    """
    pdf_stream: BytesIO = BytesIO(pdf_data)
    source_pdf: PdfFileReader = PdfFileReader(pdf_stream)
    pdf_writer: PdfFileWriter = PdfFileWriter()
    pdf_writer.appendPagesFromReader(source_pdf)
    pdf_page_trim_box: RectangleObject = source_pdf.getPage(0).trimBox
    pdf_page_box: SizedBox = SizedBox(
        x=pdf_page_trim_box[0],
        y=pdf_page_trim_box[1],
        width=pdf_page_trim_box[2] - pdf_page_trim_box[0],
        height=pdf_page_trim_box[3] - pdf_page_trim_box[1],
    )
    pdf_scale: float = pdf_page_box.width if size_relative else 1.0
    link: Link
    for link in links:
        link_box: SizedBox = SizedBox(
            x=pdf_page_box.x + link.box.x * pdf_scale,
            y=pdf_page_box.y + link.box.y * pdf_scale,
            width=link.box.width * pdf_scale,
            height=link.box.height * pdf_scale,
        )
        # pdf coord system is bottom-left, so invert y
        link_box.y = pdf_page_box.y1 - link_box.y - link_box.height
        # noinspection PyTypeChecker
        pdf_writer.addURI(
            pagenum=0,
            uri=link.uri,  # Broken type annotation in PyPDF3
            rect=[link_box.x0, link_box.y0, link_box.x1, link_box.y1],
            border=[0, 0, 0],
        )
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    with open(filepath, "wb") as out_fp:
        pdf_writer.write(out_fp)
Beispiel #11
0
class AddJsTestCase(unittest.TestCase):
    def setUp(self):
        ipdf = PdfFileReader(os.path.join(RESOURCE_ROOT, 'crazyones.pdf'))
        self.pdf_file_writer = PdfFileWriter()
        self.pdf_file_writer.appendPagesFromReader(ipdf)

    def test_add(self):

        self.pdf_file_writer.addJS(
            "this.print({bUI:true,bSilent:false,bShrinkToFit:true});")

        self.assertIn('/Names', self.pdf_file_writer._root_object,
                      "addJS should add a name catalog in the root object.")
        self.assertIn(
            '/JavaScript', self.pdf_file_writer._root_object['/Names'],
            "addJS should add a JavaScript name tree under the name catalog.")
        self.assertIn('/OpenAction', self.pdf_file_writer._root_object,
                      "addJS should add an OpenAction to the catalog.")

    def test_overwrite(self):

        self.pdf_file_writer.addJS(
            "this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
        first_js = self.get_javascript_name()

        self.pdf_file_writer.addJS(
            "this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
        second_js = self.get_javascript_name()

        self.assertNotEqual(
            first_js, second_js,
            "addJS should overwrite the previous script in the catalog.")

    def get_javascript_name(self):
        self.assertIn('/Names', self.pdf_file_writer._root_object)
        self.assertIn('/JavaScript',
                      self.pdf_file_writer._root_object['/Names'])
        self.assertIn(
            '/Names',
            self.pdf_file_writer._root_object['/Names']['/JavaScript'])
        return self.pdf_file_writer._root_object['/Names']['/JavaScript'][
            '/Names'][0]
def add_encryption(path, encryptPath, fileDicts):
    pdf_writer = PdfFileWriter()
    for fileName in fileDicts:
        input_pdf = os.path.join(path, fileName)
        output_pdf = os.path.join(encryptPath, fileName)
        pdf_reader = PdfFileReader(input_pdf)

        for page in range(pdf_reader.getNumPages()):
            pdf_writer.addPage(pdf_reader.getPage(page))

        pdf_writer.encrypt(user_pwd=fileDicts[fileName],
                           owner_pwd=None,
                           use_128bit=True)

        #输出文件已存在便删除
        if os.path.exists(output_pdf):
            os.remove(output_pdf)

        with open(output_pdf, 'wb') as fh:
            pdf_writer.write(fh)
Beispiel #13
0
        def pypdf3():
            """Much slower than PyPDF3 method."""
            # 5b. Get our files ready
            document_reader = PdfFileReader(document)
            output_file = PdfFileWriter()

            # Number of pages in input document
            page_count = document_reader.getNumPages()

            # Watermark objects
            watermark_reader = PdfFileReader(watermark)
            wtrmrk_page = watermark_reader.getPage(0)
            wtrmrk_width = (wtrmrk_page.mediaBox.getWidth() / 2) + 0
            wtrmrk_height = (wtrmrk_page.mediaBox.getHeight() / 2) + 80
            wtrmrk_rotate = -int(Info(watermark_reader).rotate) if Info(watermark_reader).rotate is not None else 0

            # 5c. Go through all the input file pages to add a watermark to them
            for page_number in range(page_count):
                # Merge the watermark with the page
                if not self.underneath:
                    input_page = document_reader.getPage(page_number)
                    if wtrmrk_rotate != 0:
                        input_page.mergeRotatedTranslatedPage(wtrmrk_page, wtrmrk_rotate, wtrmrk_width, wtrmrk_height)
                    else:
                        wtrmrk_width = 0
                        wtrmrk_height = 0
                        input_page.mergeTranslatedPage(wtrmrk_page, wtrmrk_width, wtrmrk_height)
                else:
                    size = Info(document_reader).dimensions
                    input_page = PageObject().createBlankPage(document_reader, size['w'], size['h'])
                    if wtrmrk_rotate != 0:
                        input_page.mergeRotatedTranslatedPage(wtrmrk_page, wtrmrk_rotate, wtrmrk_width, wtrmrk_height)
                    else:
                        wtrmrk_width = 0
                        wtrmrk_height = 0
                        input_page.mergeTranslatedPage(wtrmrk_page, wtrmrk_width, wtrmrk_height)
                    input_page.mergePage(document_reader.getPage(page_number))

                # Add page from input file to output document
                output_file.addPage(input_page)

            # 5d. finally, write "output" to PDF
            with open(output_filename, "wb") as outputStream:
                output_file.write(outputStream)
            return output_filename
Beispiel #14
0
    def pypdf3(self):
        reader = PdfFileReader(self.file_name)
        writer = PdfFileWriter()

        # Number of pages in input document
        page_count = reader.getNumPages()

        for page_number in range(page_count):
            wtrmrk = reader.getPage(page_number)

            page = PageObject.createBlankPage(width=self.target_w,
                                              height=self.target_h)
            page.mergeScaledTranslatedPage(wtrmrk, self.scale, self.margin_x,
                                           self.margin_y)
            writer.addPage(page)

        with open(self.output, "wb") as outputStream:
            writer.write(outputStream)
        return self.output
Beispiel #15
0
def write_pdf(pdf_obj, destination):
    """
    Write PDF object to file
    :param pdf_obj: PDF object to be written to file
    :param destination: Desintation path
    """
    reader = PdfFileReader(pdf_obj)    # Create new PDF object
    writer = PdfFileWriter()

    page_count = reader.getNumPages()

    # add the "watermark" (which is the new pdf) on the existing page
    for page_number in range(page_count):
        page = reader.getPage(page_number)
        writer.addPage(page)

    # finally, write "output" to a real file
    with open(destination, "wb") as outputStream:
        writer.write(outputStream)
Beispiel #16
0
def reorder(input_filename: str, output_filename: str) -> None:
    assert os.path.exists(input_filename)
    assert os.path.exists(output_filename) is False

    input_stream = open(input_filename, 'rb')
    output = PdfFileWriter()
    input_pdf = PdfFileReader(input_stream)

    pages = input_pdf.getNumPages()
    order = _make_sequence(pages)

    for page_number in order:
        page = input_pdf.getPage(page_number)
        output.addPage(page)

    output_stream = open(output_filename, "wb")
    output.write(output_stream)
    input_stream.close()
    output_stream.close()
Beispiel #17
0
def main():
    if (len(sys.argv) != 3):
        print("usage: python 2-up.py input_file output_file")
        sys.exit(1)
    print("2-up input " + sys.argv[1])
    input1 = PdfFileReader(open(sys.argv[1], "rb"))
    output = PdfFileWriter()
    for iter in range(0, input1.getNumPages() - 1, 2):
        lhs = input1.getPage(iter)
        rhs = input1.getPage(iter + 1)
        lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(), 0, True)
        output.addPage(lhs)
        print(str(iter) + " "),
        sys.stdout.flush()

    print("writing " + sys.argv[2])
    outputStream = file(sys.argv[2], "wb")
    output.write(outputStream)
    print("done.")
Beispiel #18
0
def overlay_pdfs(top_pdf, bottom_pdf, destination):
    """
    Overlay PDF objects to files
    :param top_pdf: PDF object to be placed on top
    :param bottom_pdf: PDF file to be placed underneath
    :param destination: Desintation path
    """
    drawing = PdfFileReader(top_pdf)    # Create new PDF object
    template = PdfFileReader(bottom_pdf)    # read your existing PDF

    # add the "watermark" (which is the new pdf) on the existing page
    page = template.getPage(0)
    page.mergePage(drawing.getPage(0))
    output = PdfFileWriter()    # Create new PDF file
    output.addPage(page)

    # finally, write "output" to a real file
    with open(destination, "wb") as outputStream:
        output.write(outputStream)
Beispiel #19
0
def decrypt_pdf(filename, password, decrypted_filename=None):
    global old_file
    """
    将加密的文件及逆行解密,并生成一个无需密码pdf文件
    :param filename: 原先加密的pdf文件
    :param password: 对应的密码
    :param decrypted_filename: 解密之后的文件名
    :return:
    """
    # 生成一个Reader和Writer
    pdf_reader = get_reader(filename, password)
    if pdf_reader is None:
        write_result(filename, '文件打开失败')
        return '出错'
    if not pdf_reader.isEncrypted:
        print('文件没有被加密,无需操作!')
        return '未加密,无需操作!'
    pdf_writer = PdfFileWriter()
    pdf_writer.appendPagesFromReader(pdf_reader)
    # decrypted_filename=filename
    if decrypted_filename is None:
        decrypted_filename = "".join(
            filename[:-4]) + '_' + 'decrypted' + '.pdf'
    # 写入新文件
    pdf_writer.write(open(decrypted_filename, 'wb'))
    file_name = re.split('\\\|/', filename)[-1]

    old_file.close()

    try:
        os.remove(filename)
        os.rename(decrypted_filename, filename)
        write_result(file_name, '已覆盖旧文件')
    except:
        traceback.print_exc()
        write_result(file_name, '删除旧文件失败')

    print('解密完成,新文件存储至' + decrypted_filename)
    return '解密完成'
Beispiel #20
0
def rename(pdf,doi):
    #inpfn = 'Chem. Rev. 2019, 119, 10241-10287-VIP-acs.chemrev.9b00008.pdf'
 

    fin = open(pdf, 'rb')
    pdf_in = PdfFileReader(fin)

    writer = PdfFileWriter()

    for page in range(pdf_in.getNumPages()):
        writer.addPage(pdf_in.getPage(page))

    infoDict = writer._info.getObject()

    info = pdf_in.documentInfo
    for key in info:
        infoDict.update({NameObject(key): createStringObject(info[key])})
        print(key[0]+':'+ info[key])

    # add the grade
    infoDict.update({NameObject('/doi'): createStringObject(u''+doi)})

    # It does not appear possible to alter in place.
    temppdf=pdf+'.temppdf'
    fout = open(temppdf, 'wb')


    writer.write(fout)


    fin.close()
    fout.close()

 
    import os
    os.unlink(pdf)
    os.rename(temppdf, pdf)
    print('The DOI have been updated to:{0}'.format(doi))
Beispiel #21
0
def slicer(document,
           first_page=None,
           last_page=None,
           suffix='sliced',
           tempdir=None):
    """Slice a PDF document to remove pages."""
    # Set output file name
    if tempdir:
        with NamedTemporaryFile(suffix='.pdf', dir=tempdir,
                                delete=False) as temp:
            output = temp.name
    elif suffix:
        output = os.path.join(os.path.dirname(document),
                              add_suffix(document, suffix))
    else:
        with NamedTemporaryFile(suffix='.pdf') as temp:
            output = temp.name

    # Reindex page selections for simple user input
    first_page = first_page - 1 if not None else None

    # Validate page range by comparing selection to number of pages in PDF document
    pages = Info(document).pages
    invalid = 'Number of pages: ' + str(
        pages) + ' ----> Page Range Input: ' + str(first_page) + '-' + str(
            last_page)
    assert first_page <= last_page <= pages, invalid

    pdf = PdfFileReader(document)
    writer = PdfFileWriter()

    pages = list(range(pdf.getNumPages()))[first_page:last_page]
    for page in pages:
        writer.addPage(pdf.getPage(page))

    with open(output, 'wb') as out:
        writer.write(out)
    return output
Beispiel #22
0
def split(filename: str, chapters: List[Chapter], directory: str) -> None:
    assert os.path.exists(filename)

    for chapter in chapters:
        print(
            f'Writing to {chapter.name} pages {chapter.start} to {chapter.stop}'
        )

        # The file should have been opened before the previous loop,
        # however, there is a bug in the library, and this is the only way to
        # get around it.
        with open(filename, 'rb') as input_stream:
            input_pdf = PdfFileReader(input_stream)

            output = PdfFileWriter()
            for page_number in range(chapter.start - 1, chapter.stop):
                page = input_pdf.getPage(page_number)
                output.addPage(page)

            output_filename = os.path.join(directory, chapter.name)
            with open(output_filename, "wb") as output_stream:
                output.write(output_stream)
                output_stream.flush()
Beispiel #23
0
def rotate(file_name, rotate, suffix='rotated', tempdir=None):
    """Rotate PDF by increments of 90 degrees."""
    # Set output file name
    if tempdir:
        outfn = NamedTemporaryFile(suffix='.pdf', dir=tempdir,
                                   delete=False).name
    elif suffix:
        outfn = os.path.join(os.path.dirname(file_name),
                             add_suffix(file_name, suffix))
    else:
        outfn = NamedTemporaryFile(suffix='.pdf').name

    with open(file_name, 'rb') as pdf_in:
        pdf_writer = PdfFileWriter()
        pdf_reader = PdfFileReader(pdf_in)
        for pagenum in range(pdf_reader.numPages):
            page = pdf_reader.getPage(pagenum)
            page.rotateClockwise(rotate)
            pdf_writer.addPage(page)

        with open(outfn, 'wb') as pdf_out:
            pdf_writer.write(pdf_out)
    return outfn
Beispiel #24
0
def upscale(file_name,
            scale=1.5,
            margin_x=0,
            margin_y=0,
            suffix='scaled',
            tempdir=None):
    """Upscale a PDF to a large size."""
    # Set output file name
    if tempdir:
        output = NamedTemporaryFile(suffix='.pdf', dir=tempdir,
                                    delete=False).name
    elif suffix:
        output = os.path.join(os.path.dirname(file_name),
                              add_suffix(file_name, suffix))
    else:
        output = NamedTemporaryFile(suffix='.pdf').name

    reader = PdfFileReader(file_name)
    writer = PdfFileWriter()
    dims = dimensions(file_name)
    target_w = dims['w'] * scale
    target_h = dims['h'] * scale

    # Number of pages in input document
    page_count = reader.getNumPages()

    for page_number in range(page_count):
        wtrmrk = reader.getPage(page_number)

        page = PageObject.createBlankPage(width=target_w, height=target_h)
        page.mergeScaledTranslatedPage(wtrmrk, scale, margin_x, margin_y)
        writer.addPage(page)

    with open(output, "wb") as outputStream:
        writer.write(outputStream)

    return output
Beispiel #25
0
    def pdfMerge(self, savePath, pdfPath, watermarkPdfPath):
        pdfFile = open(pdfPath, 'rb')
        pdfReader = PdfFileReader(pdfFile, strict=False)

        watermarkPdfFile = open(watermarkPdfPath, 'rb')
        watermarkPdf = PdfFileReader(watermarkPdfFile, strict=False).getPage(0)

        pdfWriter = PdfFileWriter()

        for pageNum in range(pdfReader.numPages):
            pageObj = pdfReader.getPage(pageNum)

            x = (pageObj.mediaBox[2] - watermarkPdf.mediaBox[2]) / 2
            y = (pageObj.mediaBox[3] - watermarkPdf.mediaBox[3]) / 2

            pageObj.mergeTranslatedPage(page2=watermarkPdf,
                                        tx=x,
                                        ty=y,
                                        expand=False)

            pdfWriter.addPage(pageObj)

        resultFile = open(savePath, 'wb')
        pdfWriter.write(resultFile)
def compile_journal(directory, pad_path=None, folio_size=8, starting_page_num=1):
    pdfs = [f for f in os.listdir(directory) if '.pdf' in f and f[0:2].isdigit()]
    pdfs.sort()
    
    folios = []
    while len(pdfs) > 0:
        folio = []
        for i in range(8):
            path = pdfs.pop(0)
            reader = PdfFileReader(path)
            pdf = reader.getPage(0)
            folio.append(pdf)
            if len(pdfs) == 0:
                break
        folios.append(folio)

    joined_folios = []
    for i,folio in enumerate(folios):
        joined_folios.append(build_folio(folio,None,i*len(folio)+starting_page_num))
    index = PdfFileWriter()
    for folio in joined_folios:
        for page in folio:
            index.addPage(page)
    index.write(open('out.pdf','wb'))
Beispiel #27
0
def decrypt_pdf(filename, password, decrypted_filename=None):
    """
    将加密的文件及逆行解密,并生成一个无需密码pdf文件
    :param filename: 原先加密的pdf文件
    :param password: 对应的密码
    :param decrypted_filename: 解密之后的文件名
    :return:
    """
    # 生成一个Reader和Writer
    pdf_reader = get_reader(filename, password)
    if pdf_reader is None:
        return '出错'
    if not pdf_reader.isEncrypted:
        print('文件没有被加密,无需操作!')
        return '文件没有被加密,无需操作!'
    pdf_writer = PdfFileWriter()
    pdf_writer.appendPagesFromReader(pdf_reader)
    if decrypted_filename is None:
        decrypted_filename = "".join(
            filename[:-4]) + '_' + 'decrypted' + '.pdf'
    # 写入新文件
    pdf_writer.write(open(decrypted_filename, 'wb'))
    print('解密完成,新文件存储至' + decrypted_filename)
    return '解密完成'
def Extract_Code_From_PDF(ip_pdf_file, op_pdf_file, code_type):

    output = PdfFileWriter()
    input1 = PdfFileReader(open(ip_pdf_file, "rb"))

    output_page = input1.getPage(0)

    #bar code
    if 'bar' in code_type.lower():
        output_page.cropBox.lowerLeft = (0, 0)
        output_page.cropBox.upperleft = (0, 100)
        output_page.cropBox.lowerRight = (286, 0)
        output_page.cropBox.upperRight = (286, 100)

    #Data Matrix code
    if 'matrix' in code_type.lower():
        output_page.cropBox.lowerLeft = (200, 309)
        output_page.cropBox.upperleft = (200, 378)
        output_page.cropBox.lowerRight = (270, 309)
        output_page.cropBox.upperRight = (270, 378)

    output.addPage(output_page)
    outputStream = open(op_pdf_file, "wb")
    output.write(outputStream)
def combine_and_bookmark(file_dict, pdfs):
    # Create the writer object
    out = PdfFileWriter()

    # This is used to track what bookmarks have been added, in order to add parent bookmarks as needed
    added_bookmarks = {}

    # Gives the numbers to store as keys in added_bookmarks
    file_nums = list(file_dict.keys())
    counter = 0

    # Do this for every PDF we've opened
    for name, pdf in pdfs.items():
        # Determine the number of the pdf chapter
        pdf_num = int(file_nums[counter])

        # Add the first page
        out.addPage(pdf.getPage(0))

        # If we already added a pdf bookmark from this chapter:
        if pdf_num in added_bookmarks:
            # We add the bookmark with the parent of the root of the chapter
            out.addBookmark(name, out.getNumPages() - 1, added_bookmarks[pdf_num])

        # Otherwise if we haven't added a bookmark from this chapter yet
        else:
            # Add the bookmark, and make sure to add that bookmark to the dict above
            added_bookmarks[pdf_num] = out.addBookmark(name, out.getNumPages() - 1)
        # Then, we iterate through the rest of the pages and add the rest
        for page_num in range(1, pdf.getNumPages()):
            out.addPage(pdf.getPage(page_num))
        counter += 1
    return out
Beispiel #30
0
from PyPDF3 import PdfFileWriter, PdfFileReader

output = PdfFileWriter()
input1 = PdfFileReader(open("document1.pdf", "rb"))

# print how many pages input1 has:
print "document1.pdf has %d pages." % input1.getNumPages()

# add page 1 from input1 to output document, unchanged
output.addPage(input1.getPage(0))

# add page 2 from input1, but rotated clockwise 90 degrees
output.addPage(input1.getPage(1).rotateClockwise(90))

# add page 3 from input1, rotated the other way:
output.addPage(input1.getPage(2).rotateCounterClockwise(90))
# alt: output.addPage(input1.getPage(2).rotateClockwise(270))

# add page 4 from input1, but first add a watermark from another PDF:
page4 = input1.getPage(3)
watermark = PdfFileReader(open("watermark.pdf", "rb"))
page4.mergePage(watermark.getPage(0))
output.addPage(page4)

# add page 5 from input1, but crop it to half size:
page5 = input1.getPage(4)
page5.mediaBox.upperRight = (page5.mediaBox.getUpperRight_x() / 2,
                             page5.mediaBox.getUpperRight_y() / 2)
output.addPage(page5)

# add some Javascript to launch the print window on opening this PDF.