Example #1
0
def pdf_metadata_save(pdf_file, metadata, substitute_all_metadata = False, make_backup = True):
    if type(make_backup) is str:
        bak_file = make_backup
    else:
        bak_file = os.path.splitext(pdf_file)[0] + ".bak"
    os.rename(pdf_file, bak_file)

    with open(bak_file, 'rb') as fin:
        pdf_in = PdfFileReader(fin)
        writer = PdfFileWriter()

        for page in range(pdf_in.getNumPages()):
            writer.addPage(pdf_in.getPage(page))

        infoDict = writer._info.getObject()

        info = pdf_in.documentInfo
        if not substitute_all_metadata:
            for key in info:
                #infoDict.update({NameObject(key): createStringObject(info[key])})
                infoDict.update({key: info[key]})

        for key in metadata:
            infoDict.update({NameObject('/' + key): createStringObject(str(metadata[key]))})

        with open(pdf_file, 'wb') as fout:
            writer.write(fout)

        if make_backup == False:
            os.unlink(bak_file)
Example #2
0
def PDFMerge(savePath, pdfPath, watermarkPdfPath):
    # pdf파일 불러오기
    pdfFile = open(pdfPath, 'rb')
    pdfReader = PdfFileReader(pdfFile, strict=False)

    # 워터마크 PDF파일 불러오기
    watermarkPdfFile = open(watermarkPdfPath, 'rb')
    watermarkPdf = PdfFileReader(watermarkPdfFile, strict=False).getPage(0)

    pdfWriter = PdfFileWriter()

    #PDF 페이지 수만큼 반복
    for pageNum in range(pdfReader.numPages):

        #페이지를 불러온다
        pageObj = pdfReader.getPage(pageNum)

        #중앙으로 놓기 위해 좌표를 구한다
        x = (pageObj.mediaBox[2] - watermarkPdf.mediaBox[2]) / 2
        y = (pageObj.mediaBox[3] - watermarkPdf.mediaBox[3]) / 2

        # 워터마크페이지와 합친다
        pageObj.mergeTranslatedPage(page2=watermarkPdf,
                                    tx=x,
                                    ty=y,
                                    expand=False)

        #합친걸 저장할 PDF파일에 추가한다
        pdfWriter.addPage(pageObj)

    #저장
    resultFile = open(savePath, 'wb')
    pdfWriter.write(resultFile)
Example #3
0
    def encrypt(self, decrypt=None):
        # Create PDF writer object
        pdf_writer = PdfFileWriter()
        with open(self.pdf, 'rb') as pdf_file:
            # Read opened PDF file
            pdf_reader = pypdf3_reader(pdf_file, decrypt)

            # Add each page from source PDF
            for page_num in range(pdf_reader.numPages):
                page = pdf_reader.getPage(page_num)
                pdf_writer.addPage(page)

            # Apply encryption to writer object
            pdf_writer.encrypt(self.user_pw,
                               self.owner_pw,
                               use_128bit=self.encrypt_128,
                               allow_printing=self.allow_printing,
                               allow_commenting=self.allow_commenting,
                               overwrite_permission=self.overwrite_permission)

            # todo: add metadata adding functionality
            pdf_writer.addMetadata({
                '/Producer': 'pdfconduit',
                '/Creator': 'HPA Design',
                '/Author': 'HPA Design',
            })

            # Write encrypted PDF to file
            with open(self.output, 'wb') as output_pdf:
                pdf_writer.write(
                    output_pdf,
                    progress_bar=self.progress_bar,
                    progress_bar_enabled=self.progress_bar_enabled)
        return self.output
    def split(file):
        """
        Esse método irá separar página por página do arquivo que o usuário escolher e as salvar no 'output directory'
        como novos arquivos pdf. Cada arquivo corresponderá à uma página do documento original.
        :param file: O arquivo escolhido pelo usuário para fazer a separação das páginas
        :return: None
        """
        # Limpando o diretório para evitar duplicidade em arquivos/diretórios
        Splitter.cleanDir()

        # Tratando o nome do arquivo
        file = Merger.toPath(file)

        # Lógica para separação das páginas dos arquivos PDF's e nova nomeclatura para os mesmos
        with open(file, mode='rb') as pdf_file_to_read:
            file_length = PdfFileReader(pdf_file_to_read).numPages

            for page in range(file_length):
                pdf_file = PdfFileReader(pdf_file_to_read)
                current_page = PdfFileWriter()
                current_page.addPage(pdf_file.getPage(page))
                with open(join(Splitter.splitter_dir,
                               f"página_{page + 1}.pdf"),
                          mode='wb') as pdf:
                    current_page.write(pdf)
def combine_and_bookmark(file_dict, pdfs):
    # Create the writer object
    out = PdfFileWriter()

    # This is used to track what bookmarks have been added, in order to add parent bookmarks as needed
    added_bookmarks = {}

    # Gives the numbers to store as keys in added_bookmarks
    file_nums = list(file_dict.keys())
    counter = 0

    # Do this for every PDF we've opened
    for name, pdf in pdfs.items():
        # Determine the number of the pdf chapter
        pdf_num = int(file_nums[counter])

        # Add the first page
        out.addPage(pdf.getPage(0))

        # If we already added a pdf bookmark from this chapter:
        if pdf_num in added_bookmarks:
            # We add the bookmark with the parent of the root of the chapter
            out.addBookmark(name, out.getNumPages() - 1, added_bookmarks[pdf_num])

        # Otherwise if we haven't added a bookmark from this chapter yet
        else:
            # Add the bookmark, and make sure to add that bookmark to the dict above
            added_bookmarks[pdf_num] = out.addBookmark(name, out.getNumPages() - 1)
        # Then, we iterate through the rest of the pages and add the rest
        for page_num in range(1, pdf.getNumPages()):
            out.addPage(pdf.getPage(page_num))
        counter += 1
    return out
    def start_Encryption(self):
        global filename

        try:

            if self.filename[0] and self.userPassword.text(
            ) and self.ownerPassword.text():

                pfw = PdfFileWriter()
                pdffile = PdfFileReader(self.filename[0])

                total_pages = pdffile.numPages

                for page in range(total_pages):
                    current_page = pdffile.getPage(page)
                    pfw.addPage(current_page)

                pfw.encrypt(self.userPassword.text(),
                            self.ownerPassword.text())

                file = open(self.filename[0].replace('.pdf', '_encrypted.pdf'),
                            'wb')
                pfw.write(file)
                file.close()

                msg = QMessageBox()
                msg.setWindowTitle('Done')
                msg.setIcon(QMessageBox.Information)
                msg.setText('File encryption done successfully.')
                msg.exec_()

                self.filename = ''

                self.userPassword.setText('')
                self.ownerPassword.setText('')
                self.userPassword.setDisabled(True)
                self.ownerPassword.setDisabled(True)
                self.startEncryption.setDisabled(True)

            else:
                if self.ownerPassword.text() == '':
                    msg = QMessageBox()
                    msg.setWindowTitle('Error')
                    msg.setIcon(QMessageBox.Critical)
                    msg.setText('Owner Password Field is Empty.')
                    msg.exec_()

                if self.userPassword.text() == '':
                    msg = QMessageBox()
                    msg.setWindowTitle('Error')
                    msg.setIcon(QMessageBox.Critical)
                    msg.setText('User Password Field is Empty.')
                    msg.exec_()
        except:
            pass
Example #7
0
def pdf_metadata_load(pdf_file):
    with open(pdf_file, 'rb') as fin:
        pdf_in = PdfFileReader(fin)
        writer = PdfFileWriter()

        for page in range(pdf_in.getNumPages()):
            writer.addPage(pdf_in.getPage(page))

        infoDict = writer._info.getObject()

    return pdf_in.documentInfo
Example #8
0
    def pypdf3(self):
        with open(self.file_name, 'rb') as pdf_in:
            pdf_writer = PdfFileWriter()
            pdf_reader = PdfFileReader(pdf_in)
            for pagenum in range(pdf_reader.numPages):
                page = pdf_reader.getPage(pagenum)
                page.rotateClockwise(self.rotation)
                pdf_writer.addPage(page)

            with open(self.outfn, 'wb') as pdf_out:
                pdf_writer.write(pdf_out)
        return self.outfn
Example #9
0
def merge_pdfs(paths, output):
    pdf_writer = PdfFileWriter()

    for path in paths:
        pdf_reader = PdfFileReader(path)
        for page in range(pdf_reader.getNumPages()):
            # Add each page to the writer object
            pdf_writer.addPage(pdf_reader.getPage(page))

    # Write out the merged PDF
    with open(output, 'wb') as out:
        pdf_writer.write(out)
def split_pdf(myfile):
    pdf_in_file = open('/tmp/' + myfile, 'rb')
    inputpdf = PdfFileReader(pdf_in_file)
    pages_no = inputpdf.numPages
    print(pages_no)
    output = PdfFileWriter()
    for i in range(pages_no // 50):
        output.addPage(inputpdf.getPage(i * 50))
        if i * 50 + 1 < inputpdf.numPages:
            output.addPage(inputpdf.getPage(i * 50 + 1))
            print('/tmp/document-page%s.pdf' % i)
        newname = 'document-page%s.pdf' % i
        print(newname)
        with open("/tmp/document-page%s.pdf" % i, "wb") as outputStream:
            output.write(outputStream)
            client.upload_file('/tmp/' + newname, destbucketName,
                               'extracted-pdf/' + newname)
Example #11
0
        def pypdf3():
            """Much slower than PyPDF3 method."""
            # 5b. Get our files ready
            document_reader = PdfFileReader(document)
            output_file = PdfFileWriter()

            # Number of pages in input document
            page_count = document_reader.getNumPages()

            # Watermark objects
            watermark_reader = PdfFileReader(watermark)
            wtrmrk_page = watermark_reader.getPage(0)
            wtrmrk_width = (wtrmrk_page.mediaBox.getWidth() / 2) + 0
            wtrmrk_height = (wtrmrk_page.mediaBox.getHeight() / 2) + 80
            wtrmrk_rotate = -int(Info(watermark_reader).rotate) if Info(watermark_reader).rotate is not None else 0

            # 5c. Go through all the input file pages to add a watermark to them
            for page_number in range(page_count):
                # Merge the watermark with the page
                if not self.underneath:
                    input_page = document_reader.getPage(page_number)
                    if wtrmrk_rotate != 0:
                        input_page.mergeRotatedTranslatedPage(wtrmrk_page, wtrmrk_rotate, wtrmrk_width, wtrmrk_height)
                    else:
                        wtrmrk_width = 0
                        wtrmrk_height = 0
                        input_page.mergeTranslatedPage(wtrmrk_page, wtrmrk_width, wtrmrk_height)
                else:
                    size = Info(document_reader).dimensions
                    input_page = PageObject().createBlankPage(document_reader, size['w'], size['h'])
                    if wtrmrk_rotate != 0:
                        input_page.mergeRotatedTranslatedPage(wtrmrk_page, wtrmrk_rotate, wtrmrk_width, wtrmrk_height)
                    else:
                        wtrmrk_width = 0
                        wtrmrk_height = 0
                        input_page.mergeTranslatedPage(wtrmrk_page, wtrmrk_width, wtrmrk_height)
                    input_page.mergePage(document_reader.getPage(page_number))

                # Add page from input file to output document
                output_file.addPage(input_page)

            # 5d. finally, write "output" to PDF
            with open(output_filename, "wb") as outputStream:
                output_file.write(outputStream)
            return output_filename
Example #12
0
def overlay_pdfs(top_pdf, bottom_pdf, destination):
    """
    Overlay PDF objects to files
    :param top_pdf: PDF object to be placed on top
    :param bottom_pdf: PDF file to be placed underneath
    :param destination: Desintation path
    """
    drawing = PdfFileReader(top_pdf)    # Create new PDF object
    template = PdfFileReader(bottom_pdf)    # read your existing PDF

    # add the "watermark" (which is the new pdf) on the existing page
    page = template.getPage(0)
    page.mergePage(drawing.getPage(0))
    output = PdfFileWriter()    # Create new PDF file
    output.addPage(page)

    # finally, write "output" to a real file
    with open(destination, "wb") as outputStream:
        output.write(outputStream)
Example #13
0
    def pypdf3(self):
        reader = PdfFileReader(self.file_name)
        writer = PdfFileWriter()

        # Number of pages in input document
        page_count = reader.getNumPages()

        for page_number in range(page_count):
            wtrmrk = reader.getPage(page_number)

            page = PageObject.createBlankPage(width=self.target_w,
                                              height=self.target_h)
            page.mergeScaledTranslatedPage(wtrmrk, self.scale, self.margin_x,
                                           self.margin_y)
            writer.addPage(page)

        with open(self.output, "wb") as outputStream:
            writer.write(outputStream)
        return self.output
Example #14
0
def write_pdf(pdf_obj, destination):
    """
    Write PDF object to file
    :param pdf_obj: PDF object to be written to file
    :param destination: Desintation path
    """
    reader = PdfFileReader(pdf_obj)    # Create new PDF object
    writer = PdfFileWriter()

    page_count = reader.getNumPages()

    # add the "watermark" (which is the new pdf) on the existing page
    for page_number in range(page_count):
        page = reader.getPage(page_number)
        writer.addPage(page)

    # finally, write "output" to a real file
    with open(destination, "wb") as outputStream:
        writer.write(outputStream)
Example #15
0
def reorder(input_filename: str, output_filename: str) -> None:
    assert os.path.exists(input_filename)
    assert os.path.exists(output_filename) is False

    input_stream = open(input_filename, 'rb')
    output = PdfFileWriter()
    input_pdf = PdfFileReader(input_stream)

    pages = input_pdf.getNumPages()
    order = _make_sequence(pages)

    for page_number in order:
        page = input_pdf.getPage(page_number)
        output.addPage(page)

    output_stream = open(output_filename, "wb")
    output.write(output_stream)
    input_stream.close()
    output_stream.close()
Example #16
0
def main():
    if (len(sys.argv) != 3):
        print("usage: python 2-up.py input_file output_file")
        sys.exit(1)
    print("2-up input " + sys.argv[1])
    input1 = PdfFileReader(open(sys.argv[1], "rb"))
    output = PdfFileWriter()
    for iter in range(0, input1.getNumPages() - 1, 2):
        lhs = input1.getPage(iter)
        rhs = input1.getPage(iter + 1)
        lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(), 0, True)
        output.addPage(lhs)
        print(str(iter) + " "),
        sys.stdout.flush()

    print("writing " + sys.argv[2])
    outputStream = file(sys.argv[2], "wb")
    output.write(outputStream)
    print("done.")
Example #17
0
def add_encryption(path, encryptPath, fileDicts):
    pdf_writer = PdfFileWriter()
    for fileName in fileDicts:
        input_pdf = os.path.join(path, fileName)
        output_pdf = os.path.join(encryptPath, fileName)
        pdf_reader = PdfFileReader(input_pdf)

        for page in range(pdf_reader.getNumPages()):
            pdf_writer.addPage(pdf_reader.getPage(page))

        pdf_writer.encrypt(user_pwd=fileDicts[fileName],
                           owner_pwd=None,
                           use_128bit=True)

        #输出文件已存在便删除
        if os.path.exists(output_pdf):
            os.remove(output_pdf)

        with open(output_pdf, 'wb') as fh:
            pdf_writer.write(fh)
Example #18
0
def rename(pdf,doi):
    #inpfn = 'Chem. Rev. 2019, 119, 10241-10287-VIP-acs.chemrev.9b00008.pdf'
 

    fin = open(pdf, 'rb')
    pdf_in = PdfFileReader(fin)

    writer = PdfFileWriter()

    for page in range(pdf_in.getNumPages()):
        writer.addPage(pdf_in.getPage(page))

    infoDict = writer._info.getObject()

    info = pdf_in.documentInfo
    for key in info:
        infoDict.update({NameObject(key): createStringObject(info[key])})
        print(key[0]+':'+ info[key])

    # add the grade
    infoDict.update({NameObject('/doi'): createStringObject(u''+doi)})

    # It does not appear possible to alter in place.
    temppdf=pdf+'.temppdf'
    fout = open(temppdf, 'wb')


    writer.write(fout)


    fin.close()
    fout.close()

 
    import os
    os.unlink(pdf)
    os.rename(temppdf, pdf)
    print('The DOI have been updated to:{0}'.format(doi))
Example #19
0
def slicer(document,
           first_page=None,
           last_page=None,
           suffix='sliced',
           tempdir=None):
    """Slice a PDF document to remove pages."""
    # Set output file name
    if tempdir:
        with NamedTemporaryFile(suffix='.pdf', dir=tempdir,
                                delete=False) as temp:
            output = temp.name
    elif suffix:
        output = os.path.join(os.path.dirname(document),
                              add_suffix(document, suffix))
    else:
        with NamedTemporaryFile(suffix='.pdf') as temp:
            output = temp.name

    # Reindex page selections for simple user input
    first_page = first_page - 1 if not None else None

    # Validate page range by comparing selection to number of pages in PDF document
    pages = Info(document).pages
    invalid = 'Number of pages: ' + str(
        pages) + ' ----> Page Range Input: ' + str(first_page) + '-' + str(
            last_page)
    assert first_page <= last_page <= pages, invalid

    pdf = PdfFileReader(document)
    writer = PdfFileWriter()

    pages = list(range(pdf.getNumPages()))[first_page:last_page]
    for page in pages:
        writer.addPage(pdf.getPage(page))

    with open(output, 'wb') as out:
        writer.write(out)
    return output
Example #20
0
def split(filename: str, chapters: List[Chapter], directory: str) -> None:
    assert os.path.exists(filename)

    for chapter in chapters:
        print(
            f'Writing to {chapter.name} pages {chapter.start} to {chapter.stop}'
        )

        # The file should have been opened before the previous loop,
        # however, there is a bug in the library, and this is the only way to
        # get around it.
        with open(filename, 'rb') as input_stream:
            input_pdf = PdfFileReader(input_stream)

            output = PdfFileWriter()
            for page_number in range(chapter.start - 1, chapter.stop):
                page = input_pdf.getPage(page_number)
                output.addPage(page)

            output_filename = os.path.join(directory, chapter.name)
            with open(output_filename, "wb") as output_stream:
                output.write(output_stream)
                output_stream.flush()
Example #21
0
def rotate(file_name, rotate, suffix='rotated', tempdir=None):
    """Rotate PDF by increments of 90 degrees."""
    # Set output file name
    if tempdir:
        outfn = NamedTemporaryFile(suffix='.pdf', dir=tempdir,
                                   delete=False).name
    elif suffix:
        outfn = os.path.join(os.path.dirname(file_name),
                             add_suffix(file_name, suffix))
    else:
        outfn = NamedTemporaryFile(suffix='.pdf').name

    with open(file_name, 'rb') as pdf_in:
        pdf_writer = PdfFileWriter()
        pdf_reader = PdfFileReader(pdf_in)
        for pagenum in range(pdf_reader.numPages):
            page = pdf_reader.getPage(pagenum)
            page.rotateClockwise(rotate)
            pdf_writer.addPage(page)

        with open(outfn, 'wb') as pdf_out:
            pdf_writer.write(pdf_out)
    return outfn
Example #22
0
def upscale(file_name,
            scale=1.5,
            margin_x=0,
            margin_y=0,
            suffix='scaled',
            tempdir=None):
    """Upscale a PDF to a large size."""
    # Set output file name
    if tempdir:
        output = NamedTemporaryFile(suffix='.pdf', dir=tempdir,
                                    delete=False).name
    elif suffix:
        output = os.path.join(os.path.dirname(file_name),
                              add_suffix(file_name, suffix))
    else:
        output = NamedTemporaryFile(suffix='.pdf').name

    reader = PdfFileReader(file_name)
    writer = PdfFileWriter()
    dims = dimensions(file_name)
    target_w = dims['w'] * scale
    target_h = dims['h'] * scale

    # Number of pages in input document
    page_count = reader.getNumPages()

    for page_number in range(page_count):
        wtrmrk = reader.getPage(page_number)

        page = PageObject.createBlankPage(width=target_w, height=target_h)
        page.mergeScaledTranslatedPage(wtrmrk, scale, margin_x, margin_y)
        writer.addPage(page)

    with open(output, "wb") as outputStream:
        writer.write(outputStream)

    return output
Example #23
0
    def pdfMerge(self, savePath, pdfPath, watermarkPdfPath):
        pdfFile = open(pdfPath, 'rb')
        pdfReader = PdfFileReader(pdfFile, strict=False)

        watermarkPdfFile = open(watermarkPdfPath, 'rb')
        watermarkPdf = PdfFileReader(watermarkPdfFile, strict=False).getPage(0)

        pdfWriter = PdfFileWriter()

        for pageNum in range(pdfReader.numPages):
            pageObj = pdfReader.getPage(pageNum)

            x = (pageObj.mediaBox[2] - watermarkPdf.mediaBox[2]) / 2
            y = (pageObj.mediaBox[3] - watermarkPdf.mediaBox[3]) / 2

            pageObj.mergeTranslatedPage(page2=watermarkPdf,
                                        tx=x,
                                        ty=y,
                                        expand=False)

            pdfWriter.addPage(pageObj)

        resultFile = open(savePath, 'wb')
        pdfWriter.write(resultFile)
Example #24
0
def compile_journal(directory, pad_path=None, folio_size=8, starting_page_num=1):
    pdfs = [f for f in os.listdir(directory) if '.pdf' in f and f[0:2].isdigit()]
    pdfs.sort()
    
    folios = []
    while len(pdfs) > 0:
        folio = []
        for i in range(8):
            path = pdfs.pop(0)
            reader = PdfFileReader(path)
            pdf = reader.getPage(0)
            folio.append(pdf)
            if len(pdfs) == 0:
                break
        folios.append(folio)

    joined_folios = []
    for i,folio in enumerate(folios):
        joined_folios.append(build_folio(folio,None,i*len(folio)+starting_page_num))
    index = PdfFileWriter()
    for folio in joined_folios:
        for page in folio:
            index.addPage(page)
    index.write(open('out.pdf','wb'))
def Extract_Code_From_PDF(ip_pdf_file, op_pdf_file, code_type):

    output = PdfFileWriter()
    input1 = PdfFileReader(open(ip_pdf_file, "rb"))

    output_page = input1.getPage(0)

    #bar code
    if 'bar' in code_type.lower():
        output_page.cropBox.lowerLeft = (0, 0)
        output_page.cropBox.upperleft = (0, 100)
        output_page.cropBox.lowerRight = (286, 0)
        output_page.cropBox.upperRight = (286, 100)

    #Data Matrix code
    if 'matrix' in code_type.lower():
        output_page.cropBox.lowerLeft = (200, 309)
        output_page.cropBox.upperleft = (200, 378)
        output_page.cropBox.lowerRight = (270, 309)
        output_page.cropBox.upperRight = (270, 378)

    output.addPage(output_page)
    outputStream = open(op_pdf_file, "wb")
    output.write(outputStream)
Example #26
0
def invoice_pdf(request, number, correction=False):
    invoice = get_object_or_404(Invoice, number=number)
    if correction:
        invoice = invoice.correction
    from reportlab.lib.units import mm
    from reportlab.platypus import Paragraph
    from reportlab.platypus.flowables import Spacer
    from reportlab.platypus.flowables import KeepTogether

    from dinbrief.document import Document
    from dinbrief.invoice import ItemTable, TotalTable
    from dinbrief.styles import styles
    from dinbrief.template import BriefTemplate

    with trans_override(invoice.language):

        response = HttpResponse(content_type='application/pdf')
        if 'download' in request.GET:
            filename = '%s.pdf' % invoice.number
            response[
                'Content-Disposition'] = 'attachment; filename=%s' % filename

        if invoice.type == Invoice.TYPE_INVOICE:
            if callable(INVOICE_TERMS):
                terms = INVOICE_TERMS(invoice)
            else:
                terms = [
                    Paragraph(term, styles['Terms']) for term in INVOICE_TERMS
                ]
        else:
            terms = []

        template = BriefTemplate()
        document = Document(
            sender=invoice.sender_lines,
            recipient=invoice.recipient_lines,
            date=date_format(invoice.created, 'SHORT_DATE_FORMAT'),
            content=[
                Paragraph(
                    '%s %s' %
                    (invoice.get_type_display() if not correction else
                     gettext(u'Correction of invoice'), invoice.number),
                    styles['Subject']),
                Spacer(template.CONTENT_WIDTH, 2 * mm),
                ItemTable(template, invoice),
                KeepTogether(TotalTable(template, invoice)),
                Spacer(template.CONTENT_WIDTH, 10 * mm),
            ] + terms)

        if settings.SHARK['INVOICE']['BACKGROUND']:
            with tempfile.TemporaryFile() as tmp:
                # Create content in a temporary file
                template.render(document, tmp)
                # Combine background with the content
                writer = PdfFileWriter()
                content = PdfFileReader(tmp)
                info_dict = writer._info.getObject()
                info_dict.update(content.getDocumentInfo())
                first_bg = PdfFileReader(
                    open(settings.SHARK['INVOICE']['BACKGROUND']['FIRST_PAGE'],
                         'rb'))
                later_bg = PdfFileReader(
                    open(settings.SHARK['INVOICE']['BACKGROUND']['LATER_PAGE'],
                         'rb'))
                bg = [first_bg.getPage(0), later_bg.getPage(0)]
                for i, page in enumerate(content.pages):
                    page.mergePage(bg[min(i, 1)])
                    page.compressContentStreams()
                    writer.addPage(page)
                writer.write(response)
        else:
            # Render content directly to the HTTP response object if no
            # background images are configured.
            template.render(document, response)

    return response
Example #27
0
def zipper(opts, cord_path, base_path, rec_path, rec_pdf_exists, output_name,
           matching, empty_path):
    # ######### PDF Write Setup ######### #
    # Open the input PDFs
    cord_pdf = PdfFileReader(open(cord_path, 'rb'), False)
    base_pdf = PdfFileReader(open(base_path, 'rb'), False)
    rec_pdf = ''
    if rec_pdf_exists:
        rec_pdf = PdfFileReader(open(rec_path, 'rb'), False)
    empty_pdf = PdfFileReader(open(empty_path, 'rb'), False)

    # Check that the coordination PDF is longer than the base (and therefore rec) pdf too.
    # The Coordination PDF includes pages at the front that do not get sliced in, and instead actually sit
    # in the front. If the Coordination pdf is less than the Base or Rec, these are missing, or there was another error
    if cord_pdf.getNumPages() < base_pdf.getNumPages():
        prompt = 'Coordination PDF is shorter than the Base PDF'
        eprint(prompt)
        logger.critical(prompt)
        exit(-7)

    # Find the difference in length of the PDFs, these are the leader pages of the coordination
    diff_length = cord_pdf.getNumPages() - base_pdf.getNumPages()
    logger.info('Diff Length: %s', str(diff_length))

    output = PdfFileWriter()

    for ii in range(diff_length):
        output.addPage(cord_pdf.getPage(ii))

    if matching:

        logger.info("Converting Coordination PDF to string")
        logging.disable(logging.INFO)
        cord_str_pages = pdf_pages_to_list_of_strings(cord_path)
        logging.disable(logging.NOTSET)

        logger.info("Converting Base PDF to string")
        logging.disable(logging.INFO)
        base_str_pages = pdf_pages_to_list_of_strings(base_path)
        logging.disable(logging.NOTSET)

        rec_str_pages = []
        if rec_pdf_exists:
            logging.disable(logging.INFO)
            logger.info("Converting Recommended PDF to string")
            rec_str_pages = pdf_pages_to_list_of_strings(rec_path)
            logging.disable(logging.NOTSET)

        regex_cord = r'(TCC Curve: )(TCC_[\d]+[a-zA-Z]?)([-_#$\w\d\[\] ]*)'
        regex_base_rec = r'(TCC Name: )(TCC_[\d]+[a-zA-Z]?)([-_#$\w\d\[\] ]*)'

        for ii in range(diff_length, len(cord_str_pages)):
            output.addPage(cord_pdf.getPage(ii))
            tcc_matches = re.finditer(regex_cord, cord_str_pages[ii],
                                      re.MULTILINE)

            for match_num, tcc_match in enumerate(tcc_matches, start=1):
                tcc_name = tcc_match.group(2)
                logger.info("Attempting to find: " + tcc_name)
                base_num = find_matching_page(tcc_name, base_str_pages,
                                              regex_base_rec, 'Base PDF')
                if base_num != -1:
                    logger.info('Found on base page: %s', str(base_num))
                rec_page_flag = check_for_rec(cord_str_pages[ii])
                rec_num = 0
                if rec_pdf_exists and rec_page_flag:
                    rec_num = find_matching_page(tcc_name, rec_str_pages,
                                                 regex_base_rec, 'Rec PDF')
                    if rec_num != -1:
                        logger.info('Found on rec page: %s', str(rec_num))
                    else:
                        output.addPage(empty_pdf.getPage(0))
                if base_num > 0:
                    output.addPage(base_pdf.getPage(base_num))
                    if rec_num > 0:
                        output.addPage(rec_pdf.getPage(rec_num))
                    break
    else:
        for jj in range(base_pdf.getNumPages()):
            output.addPage(cord_pdf.getPage(jj + diff_length))
            output.addPage(base_pdf.getPage(jj))
            if rec_pdf_exists:
                output.addPage(rec_pdf.getPage(jj))

    # Finally, output everything to the PDF
    # The output name is chosen based on what the name of the coordination file is
    if opts['output']:
        output_name = opts['output']
    else:
        output_name = "8.0 - Coordination Results & Recommendations_" + output_name + "2018_NEW.pdf"
        output_name = os.path.join(os.path.dirname(os.path.abspath(cord_path)),
                                   output_name)
    with open(output_name, "wb") as w:
        output.write(w)
Example #28
0
from PyPDF3 import PdfFileWriter, PdfFileReader

output = PdfFileWriter()
input1 = PdfFileReader(open("document1.pdf", "rb"))

# print how many pages input1 has:
print "document1.pdf has %d pages." % input1.getNumPages()

# add page 1 from input1 to output document, unchanged
output.addPage(input1.getPage(0))

# add page 2 from input1, but rotated clockwise 90 degrees
output.addPage(input1.getPage(1).rotateClockwise(90))

# add page 3 from input1, rotated the other way:
output.addPage(input1.getPage(2).rotateCounterClockwise(90))
# alt: output.addPage(input1.getPage(2).rotateClockwise(270))

# add page 4 from input1, but first add a watermark from another PDF:
page4 = input1.getPage(3)
watermark = PdfFileReader(open("watermark.pdf", "rb"))
page4.mergePage(watermark.getPage(0))
output.addPage(page4)

# add page 5 from input1, but crop it to half size:
page5 = input1.getPage(4)
page5.mediaBox.upperRight = (page5.mediaBox.getUpperRight_x() / 2,
                             page5.mediaBox.getUpperRight_y() / 2)
output.addPage(page5)

# add some Javascript to launch the print window on opening this PDF.
Example #29
0
from PyPDF3 import PdfFileWriter, PdfFileReader
import sys

if len(sys.argv) == 1 or sys.argv[1] == '-h':
    print('''args: infile p1 p2 .. pn outfile
             Program outputs outfile with p1, p2, ..., pn removed from infile.'''
          )
    exit()

infile = PdfFileReader(sys.argv[1], 'rb')
outfile = PdfFileWriter()

page_del = list(map(int, sys.argv[2:-1]))
ptr = 0
for i in range(infile.getNumPages()):
    if ptr == len(page_del) or i < page_del[ptr]:
        p = infile.getPage(int(i))
        outfile.addPage(p)
    elif i == page_del[ptr]:
        ptr += 1

with open(sys.argv[-1], 'wb') as f:
    outfile.write(f)
Example #30
0
	# 		print('page_id: {0}'.format(page_id))
	# 		blank_page.mergeTranslatedPage(
	# 			pages[page_id-1],
	# 			page_idx * (page_actual_width + page_xbuffer_letter),
	# 			idy*page_actual_height)
	# 	idy = idy - 1

	canvas_height = len(pages) * page_actual_height
	canvas_width = 1 * page_actual_width
	blank_page = pdf.PageObject.createBlankPage(width=canvas_width, height=canvas_height)

	# pdb.set_trace()	
	pages.reverse()
	for idy, page in enumerate(pages):
		blank_page.mergeTranslatedPage(page, 0, idy * page_actual_height)




	pdfWriter.addPage(blank_page)

	newFile = open(outfile_path, 'wb') 

	# writing rotated pages to new file 
	pdfWriter.write(newFile) 

	# closing the original pdf file object 
	pdfFileObj.close() 

	# closing the new pdf file object 
	newFile.close()