Ejemplo n.º 1
0
def process_pdf(c,infn,prefix='PageForms'):
    from rlextra.pageCatcher import pageCatcher
    names, data = pageCatcher.storeFormsInMemory(open(infn,'rb').read(),prefix=prefix,all=1)
    names = pageCatcher.restoreFormsInMemory(data,c)
    del data
    for i in xrange(len(names)):
        thisname = names[i]
        c.saveState()
        c.translate(bleedX,bleedY)
        c.doForm(thisname)
        c.restoreState()
        c.showPage()
Ejemplo n.º 2
0
def process_pdf(c,infn,prefix='PageForms'):
    from rlextra.pageCatcher import pageCatcher
    names, data = pageCatcher.storeFormsInMemory(open(infn,'rb').read(),prefix=prefix,all=1)
    names = pageCatcher.restoreFormsInMemory(data,c)
    del data
    for i in xrange(len(names)):
        thisname = names[i]
        c.saveState()
        c.translate(bleedX,bleedY)
        c.doForm(thisname)
        c.restoreState()
        c.showPage()
Ejemplo n.º 3
0
def encryptPdfInMemory(inputPDF,
                       userPassword,
                       ownerPassword=None,
                       canPrint=1,
                       canModify=1,
                       canCopy=1,
                       canAnnotate=1,
                       strength=40):
    """accepts a PDF file 'as a byte array in memory'; return encrypted one.

    This is a high level convenience and does not touch the hard disk in any way.
    If you are encrypting the same file over and over again, it's better to use
    pageCatcher and cache the results."""

    try:
        from rlextra.pageCatcher.pageCatcher import storeFormsInMemory, restoreFormsInMemory
    except ImportError:
        raise ImportError(
            '''reportlab.lib.pdfencrypt.encryptPdfInMemory failed because rlextra cannot be imported.
See https://www.reportlab.com/downloads''')

    (bboxInfo, pickledForms) = storeFormsInMemory(inputPDF, all=1, BBoxes=1)
    names = list(bboxInfo.keys())

    firstPageSize = bboxInfo['PageForms0'][2:]

    #now make a new PDF document
    buf = getBytesIO()
    canv = Canvas(buf, pagesize=firstPageSize)

    # set a standard ID while debugging
    if CLOBBERID:
        canv._doc._ID = "[(xxxxxxxxxxxxxxxx)(xxxxxxxxxxxxxxxx)]"

    formNames = restoreFormsInMemory(pickledForms, canv)
    for formName in formNames:
        canv.setPageSize(bboxInfo[formName][2:])
        canv.doForm(formName)
        canv.showPage()
    encryptCanvas(canv,
                  userPassword,
                  ownerPassword,
                  canPrint,
                  canModify,
                  canCopy,
                  canAnnotate,
                  strength=strength)
    canv.save()
    return buf.getvalue()
Ejemplo n.º 4
0
 def drawFigure(self):
     if not self.canv.hasForm(self.formName):
         if self.filename in self._cache:
             f,data = self._cache[self.filename]
         else:
             f = open(self.filename,'rb')
             pdf = f.read()
             f.close()
             f, data = storeFormsInMemory(pdf, pagenumbers=[self.pageNo], prefix=self.prefix)
             if self.caching=='memory':
                 self._cache[self.filename] = f, data
         f = restoreFormsInMemory(data, self.canv)
     self.canv.saveState()
     self.canv.scale(self._scaleFactor, self._scaleFactor)
     self.canv.doForm(self.formName)
     self.canv.restoreState()
Ejemplo n.º 5
0
    def savePagesAsPdf(self, pageNumbers, fileName):
        "Saves the named pages into file of given name"

        (names, pickledData) = storeFormsInMemory(self.rawContent,
               pagenumbers=pageNumbers, prefix="page", BBoxes=0,
               extractText=0, fformname=None)
        (x,y,w,h) = self.getPageSize(0)
        c = reportlab.pdfgen.canvas.Canvas(fileName,pagesize=(w-x,h-y))
        restoreFormsInMemory(pickledData, c)
        for pageNo in pageNumbers:
            c.doForm('page%d' % pageNo)
            c.showPage()

            # check the rotation and try to preserve it
            #rot = self.getPageRotation(pageNo)
            #if rot:
            #    c._doc.Pages[-1].Rotate = rot
        c.save()
Ejemplo n.º 6
0
    def savePagesAsPdf(self, pageNumbers, fileName):
        "Saves the named pages into file of given name"

        (names, pickledData) = storeFormsInMemory(self.rawContent,
               pagenumbers=pageNumbers, prefix="page", BBoxes=0,
               extractText=0, fformname=None)
        (x,y,w,h) = self.getPageSize(0)
        c = reportlab.pdfgen.canvas.Canvas(fileName,pagesize=(w-x,h-y))
        restoreFormsInMemory(pickledData, c)
        for pageNo in pageNumbers:
            c.doForm('page%d' % pageNo)
            c.showPage()

            # check the rotation and try to preserve it
            #rot = self.getPageRotation(pageNo)
            #if rot:
            #    c._doc.Pages[-1].Rotate = rot
        c.save()
Ejemplo n.º 7
0
def loadPdf(filename, canvas, pageNumbers=None, prefix=None):
    if prefix is None:
        prefix = os.path.splitext(filename)[0] + '_page'
    prefix = prefix.replace('/','_')
    pdfContent = open(filename,"rb").read()
    (formNames, stuff) = storeFormsInMemory(pdfContent,
                                            pagenumbers=pageNumbers,
                                            prefix=prefix,
                                            all=1)

    if pageNumbers:
        namesToInclude = []
        for num in pageNumbers:
            namesToInclude.append(formNames[num])
    else:
        namesToInclude = None
    restoreFormsInMemory(stuff, canvas,
                         allowDuplicates=1,
                         formnames=namesToInclude)
    return formNames
Ejemplo n.º 8
0
def loadPdf(filename, canvas, pageNumbers=None, prefix=None):
    if prefix is None:
        prefix = os.path.splitext(filename)[0] + '_page'
    prefix = prefix.replace('/', '_')
    pdfContent = open(filename, "rb").read()
    (formNames, stuff) = storeFormsInMemory(pdfContent,
                                            pagenumbers=pageNumbers,
                                            prefix=prefix,
                                            all=1)

    if pageNumbers:
        namesToInclude = []
        for num in pageNumbers:
            namesToInclude.append(formNames[num])
    else:
        namesToInclude = None
    restoreFormsInMemory(stuff,
                         canvas,
                         allowDuplicates=1,
                         formnames=namesToInclude)
    return formNames
Ejemplo n.º 9
0
def encryptPdfInMemory(inputPDF,
                  userPassword, ownerPassword=None,
                  canPrint=1, canModify=1, canCopy=1, canAnnotate=1,
                       strength=40):
    """accepts a PDF file 'as a byte array in memory'; return encrypted one.

    This is a high level convenience and does not touch the hard disk in any way.
    If you are encrypting the same file over and over again, it's better to use
    pageCatcher and cache the results."""

    try:
        from rlextra.pageCatcher.pageCatcher import storeFormsInMemory, restoreFormsInMemory
    except ImportError:
        raise ImportError('''reportlab.lib.pdfencrypt.encryptPdfInMemory failed because rlextra cannot be imported.
See http://developer.reportlab.com''')

    (bboxInfo, pickledForms) = storeFormsInMemory(inputPDF, all=1, BBoxes=1)
    names = bboxInfo.keys()

    firstPageSize = bboxInfo['PageForms0'][2:]

    #now make a new PDF document
    buf = getStringIO()
    canv = Canvas(buf, pagesize=firstPageSize)

    # set a standard ID while debugging
    if CLOBBERID:
        canv._doc._ID = "[(xxxxxxxxxxxxxxxx)(xxxxxxxxxxxxxxxx)]"
    encryptCanvas(canv,
                  userPassword, ownerPassword,
                  canPrint, canModify, canCopy, canAnnotate,
                  strength=strength)

    formNames = restoreFormsInMemory(pickledForms, canv)
    for formName in formNames:
        #need to extract page size in future
        canv.doForm(formName)
        canv.showPage()
    canv.save()
    return buf.getvalue()
Ejemplo n.º 10
0
    def rewrite(self, outFileName):
        """Rewrite PDF, optionally with user decoration
        
        This will create a new PDF file from the existing one.
        It attempts to take care of rotated and cropped input files,
        and always outputs a file with no page-rotation and width the
        width and height you would normally expect.
        
        To decorate a page (e.g. overprint a timestamp), subclass
        PdfExplorer, and implement the rewritePage method:

            def rewritePage(self, pageNo, canvas, width, height):
                #your code here

        Take care to use the passed-in width and height, which will
        have been corrected by rotation and crop box.
        """


        
        pageNumbers = list(range(self.pageCount))
        (names, pickledData) = storeFormsInMemory(self.rawContent,
               pagenumbers=pageNumbers, prefix="page", BBoxes=0,
               extractText=0, fformname=None)
        c = reportlab.pdfgen.canvas.Canvas(outFileName)
        restoreFormsInMemory(pickledData, c)
        for pageNo in pageNumbers:
            (x,y,w,h) = self.getPageSize(0)
            rot = self.getPageRotation(pageNo)
            if rot in [90, 270]:
                w, h = h, w

                #go dumpster diving in the PDF and try to correct for
                #the bounds, which can otherwise clip off the content.
                #Ideally PageCatcher itself would do this when
                #reading in a rotated/cropped document, but I cannot
                #get that to work yet.
                formName = xObjectName(names[pageNo])
                form = c._doc.idToObject[formName]
                form.uppery, form.upperx = form.upperx, form.uppery


    
            #if a crop box is set, the user originally 'saw'
            #a window onto the page specified by an extra box in the
            #PDF with (x1, y1, x2, y2) coords.  We need to shift
            #our underlying form across
            try:
                cropBox = self.getCropBox(pageNo)
            except KeyError:
                cropBox = None

            if cropBox:
                if rot in [90, 270]:
                    cropY1, cropX1, cropY2, cropX2 = cropBox
                else:
                    cropX1, cropY1, cropX2, cropY2 = cropBox
                h = cropY2 - cropY1
                w = cropX2 - cropX1
            c.setPageSize((w,h))

            #user hook - subclass this to overprint
            c.saveState()
            self.rewriteUnderPage(pageNo, c, w, h)
            c.restoreState()

        
            c.saveState()
            if cropBox:
                c.translate(-cropX1, -cropY1)
            c.doForm('page%d' % pageNo)
            c.restoreState()
            
            #user hook - subclass this to overprint
            self.rewritePage(pageNo, c, w, h)
            
            #save it
            c.showPage()

        c.save()
Ejemplo n.º 11
0
def includePdfFlowables(fileName,
                        pages=None,
                        dx=0, dy=0, sx=1, sy=1, degrees=0,
                        orientation=None,
                        isdata=False,       #True if this is a preprocessed data file
                        leadingBreak=True,  #True/False or 'notattop'
                        template=None,
                        outlineText=None,
                        outlineLevel=0,
                        outlineClosed=0,
                        pdfBoxType = None,
                        autoCrop = False,
                        pageSize=None,
                        callback=None,
                        user_data=None,
                        ):
    '''
    includePdfFlowables creates a list of story flowables that
                        represents an included PDF.
    Arguments       meaning
    fileName        string name of a .pdf or .data file
    pages           If None all pages will be used, else this argument can
                    be a string like '1,2,4-6,12-10,15' or an explicit
                    list of integers eg [1,2,7].

    dx,dy,          translation together all these make up a transformation
    sx,sy,          scaling     matrix
    degrees,        rotation

    orientation     None or integer degrees eg 0 90 270 or 'portrait'/'landscape'
    isdata          True if fileName argument refers to a .data file (as
                    produced by pageCatcher)
    leadingBreak    True/False or 'notattop' specifies whether a leading
                    page break should be used; 'notattop' means a page break
                    will not be used if the story is at the top of a frame.
    template        If specified the index or name of a template to be used.
    outlineText     Any outline text to be used (default None)
    outlineLevel    The level of any outline text.
    outlineClosed   True/False if the outline should be closed or open.

    pdfBoxType      which box to use or None or [x0,y0,  x1,y1]

    autoCrop        True/False crop/don't crop with CropBox (default is False)
                    boxname use for cropping
                    [x0,y0,  x1,y1] crop area

    pageSize        default None ie leave page size alone
                    'set' adjust page size to incoming box
                    'fit' scale incoming box to fit page size
                    'orthfit' orthogonally scale incoming box to fit
                    'center' or 'centre' center the incoming box in
                    the existing page size
                    [x0,y0, x1,y1] use this as the page size

    callback        draw time callback with signature

                    callback(canvas,key,obj,pdf_data,user_data)

                    canvas the canvas being drawn on
                    key may be 'raw-pre'|'transformed-pre'|'transformed-post'|'raw-post'
                    obj the flowable calling the callback
                    pdf_data ('fileName',pageNumber)
                    user_data user data passed down to the flowable from
                              IncludePdfFlowable.

    user_data       information to be passed to the callback
    '''
    try:
        orientation=int(orientation)
        orientation = orientation % 360
    except:
        if orientation=='portrait':
            orientation = 0
        elif orientation=='landscape':
            orientation = 90
        elif orientation!='auto' and orientation!=None:
            raise ValueError('Bad value %r for orientation attribute' % orientation)

    iptrans = IPTrans(sx,sy,dx,dy,degrees)
    if iptrans.trivial(): iptrans = None

    pages = expandPageNumbers(pages)

    # this one is unusual in that it returns a list of objects to
    # go into the story.
    output = []
    output_append = output.append

    if template:
        output_append(NextPageTemplate(template))

    try:
        if isdata:
            pickledStuff = pickle.loads(open_and_read(fileName))
            formNames = pickledStuff[None]
        else:
            #read in the PDF file right now and get the pickled object
            # and names
            pdfContent = open_and_read(fileName)
            prefix = fileName2Prefix(fileName)
            (formNames, pickledStuff) = storeFormsInMemory(
                    pdfContent,
                    prefix=prefix,
                    all=1,
                    BBoxes=0,
                    extractText=0,
                    fformname=None)
    except:
        annotateException('\nerror storing %r in memory\n' % fileName)

    #if explicit pages requested, slim it down.
    if pages:
        newNames = []
        for pgNo in pages:
            newNames.append(formNames[pgNo-1])
        formNames = newNames

    #make object 1 for story
    loader = LoadPdfFlowable(pickledStuff,isdata)
    output_append(loader)

    #now do first page.  This is special as it might
    #have an outline
    formName = formNames[0]
    if leadingBreak:
        output_append((leadingBreak=='notattop' and NotAtTopPageBreak or PageBreak)())
    if outlineText:
        output_append(OutlineEntry(outlineLevel, outlineText, outlineClosed))

    if pageSize=='fit':
        class PageSizeHandler(object):
            '''simple class to allow communications between first and last ShowPdfFlowables'''
            _oldPageSize = [None]
            def __init__(self,first):
                self.first = first

            def oldPageSize(self,v):
                self._oldPageSize[0] = v
            oldPageSize = property(lambda self: self._oldPageSize[0],oldPageSize)
        pageSizeHandler = PageSizeHandler(True)
    else:
        pageSizeHandler = None
    output_append(ShowPdfFlowable(formName,orientation=orientation,iptrans=iptrans,
                        callback=callback,
                        pdf_data=(fileName,pages[0] if pages else 1),
                        user_data=user_data,
                        pdfBoxType=pdfBoxType,
                        autoCrop=autoCrop,
                        pageSize=pageSize,
                        pageSizeHandler=pageSizeHandler,
                        ))

    #now make a shower for each laterpage, and a page break
    for i,formName in enumerate(formNames[1:]):
        i += 1
        output_append(PageBreak())
        output_append(ShowPdfFlowable(formName,orientation=orientation,iptrans=iptrans,
                callback=callback,
                pdf_data=(fileName,pages[i] if pages else i),
                user_data=user_data,
                pdfBoxType=pdfBoxType,
                autoCrop=autoCrop,
                pageSize=pageSize,
                pageSizeHandler=None,
                ))
    if pageSize=='fit':
        output[-1]._pageSizeHandler = PageSizeHandler(False)
    return output
Ejemplo n.º 12
0
    def rewrite(self, outFileName):
        """Rewrite PDF, optionally with user decoration
        
        This will create a new PDF file from the existing one.
        It attempts to take care of rotated and cropped input files,
        and always outputs a file with no page-rotation and width the
        width and height you would normally expect.
        
        To decorate a page (e.g. overprint a timestamp), subclass
        PdfExplorer, and implement the rewritePage method:

            def rewritePage(self, pageNo, canvas, width, height):
                #your code here

        Take care to use the passed-in width and height, which will
        have been corrected by rotation and crop box.
        """


        
        pageNumbers = list(range(self.pageCount))
        (names, pickledData) = storeFormsInMemory(self.rawContent,
               pagenumbers=pageNumbers, prefix="page", BBoxes=0,
               extractText=0, fformname=None)
        c = reportlab.pdfgen.canvas.Canvas(outFileName)
        restoreFormsInMemory(pickledData, c)
        for pageNo in pageNumbers:
            (x,y,w,h) = self.getPageSize(0)
            rot = self.getPageRotation(pageNo)
            if rot in [90, 270]:
                w, h = h, w

                #go dumpster diving in the PDF and try to correct for
                #the bounds, which can otherwise clip off the content.
                #Ideally PageCatcher itself would do this when
                #reading in a rotated/cropped document, but I cannot
                #get that to work yet.
                formName = xObjectName(names[pageNo])
                form = c._doc.idToObject[formName]
                form.uppery, form.upperx = form.upperx, form.uppery


    
            #if a crop box is set, the user originally 'saw'
            #a window onto the page specified by an extra box in the
            #PDF with (x1, y1, x2, y2) coords.  We need to shift
            #our underlying form across
            try:
                cropBox = self.getCropBox(pageNo)
            except KeyError:
                cropBox = None

            if cropBox:
                if rot in [90, 270]:
                    cropY1, cropX1, cropY2, cropX2 = cropBox
                else:
                    cropX1, cropY1, cropX2, cropY2 = cropBox
                h = cropY2 - cropY1
                w = cropX2 - cropX1
            c.setPageSize((w,h))

            #user hook - subclass this to overprint
            c.saveState()
            self.rewriteUnderPage(pageNo, c, w, h)
            c.restoreState()

        
            c.saveState()
            if cropBox:
                c.translate(-cropX1, -cropY1)
            c.doForm('page%d' % pageNo)
            c.restoreState()
            
            #user hook - subclass this to overprint
            self.rewritePage(pageNo, c, w, h)
            
            #save it
            c.showPage()

        c.save()
Ejemplo n.º 13
0
def includePdfFlowables(
    fileName,
    pages=None,
    dx=0,
    dy=0,
    sx=1,
    sy=1,
    degrees=0,
    orientation=None,
    isdata=False,  #True if this is a preprocessed data file
    leadingBreak=True,  #True/False or 'notattop'
    template=None,
    outlineText=None,
    outlineLevel=0,
    outlineClosed=0,
    pdfBoxType=None,
    autoCrop=False,
    pageSize=None,
    callback=None,
    user_data=None,
):
    '''
    includePdfFlowables creates a list of story flowables that
                        represents an included PDF.
    Arguments       meaning
    fileName        string name of a .pdf or .data file
    pages           If None all pages will be used, else this argument can
                    be a string like '1,2,4-6,12-10,15' or an explicit
                    list of integers eg [1,2,7].

    dx,dy,          translation together all these make up a transformation
    sx,sy,          scaling     matrix
    degrees,        rotation

    orientation     None or integer degrees eg 0 90 270 or 'portrait'/'landscape'
    isdata          True if fileName argument refers to a .data file (as
                    produced by pageCatcher)
    leadingBreak    True/False or 'notattop' specifies whether a leading
                    page break should be used; 'notattop' means a page break
                    will not be used if the story is at the top of a frame.
    template        If specified the index or name of a template to be used.
    outlineText     Any outline text to be used (default None)
    outlineLevel    The level of any outline text.
    outlineClosed   True/False if the outline should be closed or open.

    pdfBoxType      which box to use or None or [x0,y0,  x1,y1]

    autoCrop        True/False crop/don't crop with CropBox (default is False)
                    boxname use for cropping
                    [x0,y0,  x1,y1] crop area

    pageSize        default None ie leave page size alone
                    'set' adjust page size to incoming box
                    'fit' scale incoming box to fit page size
                    'orthfit' orthogonally scale incoming box to fit
                    'center' or 'centre' center the incoming box in
                    the existing page size
                    [x0,y0, x1,y1] use this as the page size

    callback        draw time callback with signature

                    callback(canvas,key,obj,pdf_data,user_data)

                    canvas the canvas being drawn on
                    key may be 'raw-pre'|'transformed-pre'|'transformed-post'|'raw-post'
                    obj the flowable calling the callback
                    pdf_data ('fileName',pageNumber)
                    user_data user data passed down to the flowable from
                              IncludePdfFlowable.

    user_data       information to be passed to the callback
    '''
    try:
        orientation = int(orientation)
        orientation = orientation % 360
    except:
        if orientation == 'portrait':
            orientation = 0
        elif orientation == 'landscape':
            orientation = 90
        elif orientation != 'auto' and orientation != None:
            raise ValueError('Bad value %r for orientation attribute' %
                             orientation)

    iptrans = IPTrans(sx, sy, dx, dy, degrees)
    if iptrans.trivial(): iptrans = None

    pages = expandPageNumbers(pages)

    # this one is unusual in that it returns a list of objects to
    # go into the story.
    output = []
    output_append = output.append

    if template:
        output_append(NextPageTemplate(template))

    try:
        if isdata:
            pickledStuff = pickle.loads(open_and_read(fileName))
            formNames = pickledStuff[None]
        else:
            #read in the PDF file right now and get the pickled object
            # and names
            pdfContent = open_and_read(fileName)
            prefix = fileName2Prefix(fileName)
            (formNames, pickledStuff) = storeFormsInMemory(pdfContent,
                                                           prefix=prefix,
                                                           all=1,
                                                           BBoxes=0,
                                                           extractText=0,
                                                           fformname=None)
    except:
        annotateException('\nerror storing %r in memory\n' % fileName)

    #if explicit pages requested, slim it down.
    if pages:
        newNames = []
        for pgNo in pages:
            newNames.append(formNames[pgNo - 1])
        formNames = newNames

    #make object 1 for story
    loader = LoadPdfFlowable(pickledStuff, isdata)
    output_append(loader)

    #now do first page.  This is special as it might
    #have an outline
    formName = formNames[0]
    if leadingBreak:
        output_append((leadingBreak == 'notattop' and NotAtTopPageBreak
                       or PageBreak)())
    if outlineText:
        output_append(OutlineEntry(outlineLevel, outlineText, outlineClosed))

    if pageSize == 'fit':

        class PageSizeHandler(object):
            '''simple class to allow communications between first and last ShowPdfFlowables'''
            _oldPageSize = [None]

            def __init__(self, first):
                self.first = first

            def oldPageSize(self, v):
                self._oldPageSize[0] = v

            oldPageSize = property(lambda self: self._oldPageSize[0],
                                   oldPageSize)

        pageSizeHandler = PageSizeHandler(True)
    else:
        pageSizeHandler = None
    output_append(
        ShowPdfFlowable(
            formName,
            orientation=orientation,
            iptrans=iptrans,
            callback=callback,
            pdf_data=(fileName, pages[0] if pages else 1),
            user_data=user_data,
            pdfBoxType=pdfBoxType,
            autoCrop=autoCrop,
            pageSize=pageSize,
            pageSizeHandler=pageSizeHandler,
        ))

    #now make a shower for each laterpage, and a page break
    for i, formName in enumerate(formNames[1:]):
        i += 1
        output_append(PageBreak())
        output_append(
            ShowPdfFlowable(
                formName,
                orientation=orientation,
                iptrans=iptrans,
                callback=callback,
                pdf_data=(fileName, pages[i] if pages else i),
                user_data=user_data,
                pdfBoxType=pdfBoxType,
                autoCrop=autoCrop,
                pageSize=pageSize,
                pageSizeHandler=None,
            ))
    if pageSize == 'fit':
        output[-1]._pageSizeHandler = PageSizeHandler(False)
    return output