예제 #1
1
def mailmerge( ifile, ofile, fn_updatetmpl, content_list ):

    with open( ifile, "rb" ) as f:
        pdf = PdfFileReader( f )
        out = PdfFileWriter( )

        # Get and update template page
        pg = pdf.getPage( 0 )
        if fn_updatetmpl:
            fn_updatetmpl( pg )

        # Use one copy of template contents and page resources dictionary
        tmpl = out._addObject( pg.getContents() )
        res  = out._addObject( pg['/Resources'] )

        # Generate page for each content element
        for cdata in content_list:

            if isinstance( cdata, StreamObject ):
                c = cdata
            else:
                d = { '/Length' : len(cdata), '__streamdata__' : cdata }
                c = StreamObject.initializeFromDictionary( d )

            p = DictionaryObject( pg.items() )
            p[NameObject('/Contents')] = ArrayObject( [tmpl,c] )
            p[NameObject('/Resources')] = res
            out.addPage( p )

        # Write complete output file
        with open( ofile, "wb" ) as g:
            out.write( g )
예제 #2
0
파일: utils.py 프로젝트: mqt635/caj2pdf
def add_outlines(toc, filename, output):
    build_outlines_btree(toc)
    pdf_out = PdfFileWriter()
    inputFile = open(filename, 'rb')
    pdf_in = PdfFileReader(inputFile)
    for p in pdf_in.pages:
        pdf_out.addPage(p)
    toc_num = len(toc)
    if (toc_num == 0): # Just copy if toc empty
        outputFile = open(output, "wb")
        pdf_out.write(outputFile)
        inputFile.close()
        outputFile.close()
        return
    idoix = len(pdf_out._objects) + 1
    idorefs = [PDF.IndirectObject(x + idoix, 0, pdf_out)
               for x in range(toc_num + 1)]
    ol = PDF.DictionaryObject()
    ol.update({
        PDF.NameObject("/Type"): PDF.NameObject("/Outlines"),
        PDF.NameObject("/First"): idorefs[1],
        PDF.NameObject("/Last"): idorefs[-1],
        PDF.NameObject("/Count"): PDF.NumberObject(toc_num)
    })
    olitems = []
    for t in toc:
        oli = PDF.DictionaryObject()
        oli.update({
            PDF.NameObject("/Title"): PDF.TextStringObject(t["title"].decode("utf-8")),
            PDF.NameObject("/Dest"): make_dest(pdf_out, t["page"])
        })
        opt_keys = {"real_parent": "/Parent", "prev": "/Prev",
                    "next": "/Next", "first": "/First", "last": "/Last"}
        for k, v in opt_keys.items():
            n = getattr(t["node"], k)()
            if n is not None:
                oli.update({
                    PDF.NameObject(v): idorefs[n.index]
                })
        olitems.append(oli)
    pdf_out._addObject(ol)
    for i in olitems:
        pdf_out._addObject(i)
    pdf_out._root_object.update({
        PDF.NameObject("/Outlines"): idorefs[0]
    })
    outputFile = open(output, "wb")
    pdf_out.write(outputFile)
    inputFile.close()
    outputFile.close()
예제 #3
0
파일: utils.py 프로젝트: lixint/caj2pdf
def add_outlines(toc, filename, output):
    build_outlines_btree(toc)
    pdf_out = PdfFileWriter()
    inputFile = open(filename, 'rb')
    pdf_in = PdfFileReader(inputFile)
    for p in pdf_in.pages:
        pdf_out.addPage(p)
    toc_num = len(toc)
    idoix = len(pdf_out._objects) + 1
    idorefs = [PDF.IndirectObject(x + idoix, 0, pdf_out)
               for x in range(toc_num + 1)]
    ol = PDF.DictionaryObject()
    ol.update({
        PDF.NameObject("/Type"): PDF.NameObject("/Outlines"),
        PDF.NameObject("/First"): idorefs[1],
        PDF.NameObject("/Last"): idorefs[-1],
        PDF.NameObject("/Count"): PDF.NumberObject(toc_num)
    })
    olitems = []
    for t in toc:
        oli = PDF.DictionaryObject()
        oli.update({
            PDF.NameObject("/Title"): PDF.TextStringObject(t["title"].decode("utf-8")),
            PDF.NameObject("/Dest"): make_dest(pdf_out, t["page"])
        })
        opt_keys = {"real_parent": "/Parent", "prev": "/Prev",
                    "next": "/Next", "first": "/First", "last": "/Last"}
        for k, v in opt_keys.items():
            n = getattr(t["node"], k)()
            if n is not None:
                oli.update({
                    PDF.NameObject(v): idorefs[n.index]
                })
        olitems.append(oli)
    pdf_out._addObject(ol)
    for i in olitems:
        pdf_out._addObject(i)
    pdf_out._root_object.update({
        PDF.NameObject("/Outlines"): idorefs[0]
    })
    outputFile = open(output, "wb")
    pdf_out.write(outputFile)
    inputFile.close()
    outputFile.close()
예제 #4
0
파일: document.py 프로젝트: glins97/PPA
class Document():
    def __init__(self, source):
        if 'redactor/' not in source:
            source = 'redactor/' + source

        try:
            im = Image.open(source)
        except Exception as e:
            raise Exception('Failed to open image source ' + source)

        source = source.replace('/in/', '/temp/')
        self.page_size = 1240, int(1240.0 * im.size[1] / im.size[0])
        im = im.resize(self.page_size, Image.ANTIALIAS)
        im.save(source)

        document = BytesIO()
        canvas = Canvas(document, pagesize=(self.page_size))
        canvas.setFillColorRGB(1, 1, 1)
        canvas.drawImage(source, 0, 0, mask=(1, 1, 1, 1, 1, 1))
        canvas.save()
        self.pdf = PdfFileWriter()
        self.pdf.addPage(
            PdfFileReader(BytesIO(document.getvalue())).getPage(0))

    def add_line(self, x0, y0, x1, y1, color):
        document = BytesIO()
        canvas = Canvas(document, pagesize=self.page_size)
        canvas.setLineWidth(25)
        canvas.setStrokeColorRGB(*color)
        canvas.line(x0, y0, x1, y1)
        canvas.save()
        self.pdf.getPage(0).mergePage(
            PdfFileReader(BytesIO(document.getvalue())).getPage(0))

    def add_rect(self, x0, y0, x1, y1, color=[0, 0, 0, 0]):
        document = BytesIO()
        canvas = Canvas(document, pagesize=self.page_size)
        canvas.setFillColorRGB(*color)
        canvas.rect(x0, y0, x1 - x0, y1 - y0, 0, 1)
        canvas.save()
        self.pdf.getPage(0).mergePage(
            PdfFileReader(BytesIO(document.getvalue())).getPage(0))

    def add_note(self, src, x0, y0, comment='', author=''):
        self._add_image(src, x0, y0)
        self._add_highlight(x0, y0, 71, 39, comment, author)

    def export(self, fn, objects):
        funcs = {
            'LINE': self.add_line,
            'RECT': self.add_rect,
            'COMM': self.add_note,
        }
        for obj in objects['objects']:
            if funcs.get(obj['mode'], None):
                funcs[obj['mode']](**obj['attributes'])

        self.pdf.write(open(fn, 'wb'))

    def _add_image(self, source, x, y):
        if 'redactor/' not in source:
            source = 'redactor/' + source
        document = BytesIO()
        canvas = Canvas(document, pagesize=self.page_size)
        canvas.setFillColorRGB(1, 1, 1)
        canvas.drawImage(source, x, y, mask='auto')
        canvas.save()
        self.pdf.getPage(0).mergePage(
            PdfFileReader(BytesIO(document.getvalue())).getPage(0))

    def _create_highlight(self,
                          x0,
                          y0,
                          width,
                          height,
                          comment,
                          author='',
                          color=[0, 0, 0, 0]):
        self.add_rect(x0, y0, width, height)
        highlight = DictionaryObject()

        highlight.update({
            NameObject("/F"):
            NumberObject(4),
            NameObject("/Type"):
            NameObject("/Annot"),
            NameObject("/Subtype"):
            NameObject("/Highlight"),
            NameObject("/T"):
            TextStringObject(author),
            NameObject("/Contents"):
            TextStringObject(comment),
            NameObject("/C"):
            ArrayObject([FloatObject(c) for c in color]),
            NameObject("/Rect"):
            ArrayObject([
                FloatObject(x0),
                FloatObject(y0),
                FloatObject(x0 + width),
                FloatObject(y0 + width)
            ]),
            NameObject("/QuadPoints"):
            ArrayObject([
                FloatObject(x0),
                FloatObject(y0 + width),
                FloatObject(x0 + width),
                FloatObject(y0 + width),
                FloatObject(x0),
                FloatObject(y0),
                FloatObject(x0 + width),
                FloatObject(y0)
            ]),
        })

        return highlight

    def _add_highlight(self,
                       x0,
                       y0,
                       width,
                       height,
                       comment,
                       author='',
                       color=[0, 0, 0, 0]):
        highlight = self._create_highlight(x0, y0, width, height, comment,
                                           author, color)
        highlight_ref = self.pdf._addObject(highlight)

        if "/Annots" in self.pdf.getPage(0):
            self.pdf.getPage(0)[NameObject("/Annots")].append(highlight_ref)
        else:
            self.pdf.getPage(0)[NameObject("/Annots")] = ArrayObject(
                [highlight_ref])
예제 #5
0
def anotate_pdf(file_path, sht, query_dict):

    # preparing the output file name
    path = pathlib.Path(file_path).parent
    extension = pathlib.Path(file_path).suffix
    name = pathlib.Path(file_path).name[:-len(extension)]
    result_file = str(path) + '\\' + name + '_highlighted' + extension

    #=========================================================

    # create a parser object associated with the file object
    parser = PDFParser(open(file_path, 'rb'))
    # create a PDFDocument object that stores the document structure
    doc = PDFDocument(parser)

    # Layout Analysis
    # Set parameters for analysis.
    laparams = LAParams()
    # Create a PDF page aggregator object.
    rsrcmgr = PDFResourceManager()
    device = PDFPageAggregator(rsrcmgr, laparams=laparams)
    interpreter = PDFPageInterpreter(rsrcmgr, device)

    # create pdf layout - this is list with layout of every page
    layout = []
    for page in PDFPage.create_pages(doc):
        interpreter.process_page(page)
        # receive the LTPage object for the page.
        layout.append(device.get_result())

    # add tooltip info not sure how to use this option in the most usefull way
    m_meta = {"author": "AK", "contents": "HL text1"}

    outputStream = open(result_file, "wb")
    pdfInput = PdfFileReader(open(file_path, 'rb'), strict=True)
    pdfOutput = PdfFileWriter()

    npage = pdfInput.numPages
    for pgn in range(0, npage):
        for query in query_dict:
            all_coor = []
            for page in layout:
                result = get_page_coordinates(page, query)
                all_coor.append(result)

            page_hl = pdfInput.getPage(pgn)

            for item in all_coor[pgn]:
                highlight = create_highlight(item[0],
                                             item[1],
                                             item[2],
                                             item[3],
                                             m_meta,
                                             color=query_dict[query])
                highlight_ref = pdfOutput._addObject(highlight)

                if "/Annots" in page_hl:
                    page_hl[NameObject("/Annots")].append(highlight_ref)
                else:
                    page_hl[NameObject("/Annots")] = ArrayObject(
                        [highlight_ref])

        pdfOutput.addPage(page_hl)

    # save HL to new file
    pdfOutput.write(outputStream)
    outputStream.close()
    sht.range('B2').value = f'File {name+extension} completed'