def __pdf_meta_writer(self):
        if os.path.isfile(self.source_file):
            with open(self.source_file, "rb") as input_file:
                fileinput = PdfFileReader(input_file)
                output = PdfFileWriter()

                meta_data = output._info.getObject()
                meta_data.update({
                    NameObject("/Title"): createStringObject(self.title),
                    NameObject("/Author"): createStringObject(self.author),
                    NameObject("/Subject"): createStringObject(self.subject),
                    NameObject("/Keywords"): createStringObject(self.keywords)
                })

                for page in range(fileinput.getNumPages()):
                    output.addPage(fileinput.getPage(page))

                with file(self.temp_file_path, "wb") as output_stream:
                    output.write(output_stream)
                    output_stream.close()

                input_file.close()

            self.__file_replacement()
            self.__fast_web_view()
        else:
            broken_file_path = str("File: %s not found!!!" % self.source_file)
            self.trigger.emit(broken_file_path)
def editPDF(filename):
    """ function to add metadata to pdf files"""
    INPUT = filename
    OUTPUT = filename[:-4] + '_updated.pdf'

    output = PdfFileWriter()
    fin = file(INPUT, 'rb')
    pdf_in = PdfFileReader(fin)
    infoDict = output._info.getObject()

    ###########################################################
    # I've added random tags here, use what needs to be added #
    #                                                         #
    ###########################################################
    infoDict.update({
        NameObject('/Tags'):
        createStringObject(tag_dict[filename]),
        NameObject('/Keywords'):
        createStringObject(tag_dict[filename])
    })
    for page in range(pdf_in.getNumPages()):
        output.addPage(pdf_in.getPage(page))

    outputStream = file(os.path.join(directory, OUTPUT), 'wb')
    output.write(outputStream)
    fin.close()
    outputStream.close()
Beispiel #3
0
  def setMetadata(self, metadata):
    """Returns a document with new metadata.
    Keyword arguments:
    metadata -- expected an dictionary with metadata.
    """
    # TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate
    input_pdf = PdfFileReader(open(self.document.getUrl(), "rb"))
    output_pdf = PdfFileWriter()

    modification_date = metadata.pop("ModificationDate", None)
    if modification_date:
      metadata['ModDate'] = modification_date
    if type(metadata.get('Keywords', None)) is list:
      metadata['Keywords'] = metadata['Keywords'].join(' ')
    args = {}
    for key, value in list(metadata.items()):
      args[NameObject('/' + key.capitalize())] = createStringObject(value)

    output_pdf._info.getObject().update(args)

    for page_num in range(input_pdf.getNumPages()):
      output_pdf.addPage(input_pdf.getPage(page_num))

    output_stream = io.BytesIO()
    output_pdf.write(output_stream)
    return output_stream.getvalue()
    def save(self, filename):
        temps = []
        for (i,page) in zip(self.mypages, self.pages):
            for j, section in enumerate(page.get_sections()):
                sys.stdout.write("\rWriting Image for page %d/%d, section %d/%d     "%(i+1, len(self.pages), j+1, len(page.get_sections())))
                sys.stdout.flush()
                sectionfile = tempfile.NamedTemporaryFile(suffix='.png')
                temps.append(sectionfile)

                (x,y,w,h) = section
                pagefn = "%s[%d]"%(self.original, i)
                density = 300
                x = int(x*density / 72)
                y = int(y*density / 72)
                w = int(w*density / 72)
                h = int(h*density / 72)
                execute(["convert", "-density", str(density), pagefn, "-crop", '%dx%d%+d%+d'%(w,h,x,y), '-trim',  '+repage', '-trim', sectionfile.name])
        sys.stdout.write("\n")
        sys.stdout.flush()

        sections = temps

        midfile = 'x.pdf'
        execute(["convert"] + [f.name for f in sections] + [midfile])
        for f in sections:
            f.close()

        output = PdfFileWriter()
        infoDict = output._info.getObject()

        infoDict.update({
            NameObject('/Title'): createStringObject(self.info.get('title', "david")),
            NameObject('/Author'): createStringObject(self.info.get('author', ""))
        })

        input1 = PdfFileReader(file(midfile, "rb"))
        for pn in range(input1.getNumPages()):
            output.addPage(input1.getPage(pn))
        outputStream = file(outfile, "wb")
        output.write(outputStream)
        outputStream.close()
        execute(['rm', midfile])
def addCopyrightToPDF(pdf_file_location, pdf_file_destination,copyrightText, drawText=True, title="",authors=""):
    packet = StringIO.StringIO()
    # create a new PDF with Reportlab
    can = canvas.Canvas(packet, pagesize=letter)
    can.setFont("Times-Roman",7)
    ctext = copyrightText.split("\n")
    if drawText:
        can.drawString(30, 40, ctext[0])
        can.drawString(30, 50, ctext[1])
    else:
        can.drawString(30, 50, "")
    can.save()

    #move to the beginning of the StringIO buffer
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    # read your existing PDF
    existing_pdf = PdfFileReader(file(pdf_file_location, "rb"))
    output = PdfFileWriter()
    npagesorig = existing_pdf.getNumPages()
    #print npagesorig
    for i in range(npagesorig):
        page = existing_pdf.getPage(i)
        # if last page
        if i == (npagesorig-1):
            #print "HERE",copyrightText,drawText
            page.mergePage(new_pdf.getPage(0))
        output.addPage(page)

    infoDict = output._info.getObject()
    infoDict.update({
        NameObject('/Title'): createStringObject(title),
        NameObject('/Author'): createStringObject(authors)
    })

    # finally, write "output" to a real file
    outputStream = file(pdf_file_destination, "wb")
    output.write(outputStream)
    outputStream.close()
Beispiel #6
0
def modifyMeta(pathFile, fileNameInput, fileNameOutput, metaInfo):
    # There is no interface through pyPDF with which to set this other then getting
    # your hands dirty like so:
    inputFile = os.path.join(pathFile,fileNameInput)
    outputFile = os.path.join(pathFile,fileNameOutput)
    print outputFile
    
    output = PdfFileWriter()
    infoDict = output._info.getObject()
    
    """
    infoDict.update({
        NameObject('/Title'): createStringObject(u'title'),
        NameObject('/Author'): createStringObject(u'author'),
        NameObject('/Subject'): createStringObject(u'subject'),
        NameObject('/Creator'): createStringObject(u'a script'),
        NameObject('/Producer'): createStringObject(u'producer')
    })
    """
    #Se borra Producer, que tiene valor por defecto
    infoDict.update({
        NameObject('/Producer'): createStringObject(u'')
    })
    
    #Asignamos valores pasados por parametro
    for metaName in metaInfo.keys():
        infoDict.update({
            NameObject(metaName): createStringObject(metaInfo.get(metaName))
        })

    pdfFile = PdfFileReader(file(inputFile, 'rb'))
    for page in range(pdfFile.getNumPages()):
        output.addPage(pdfFile.getPage(page))

    outputStream = file(outputFile, 'wb')
    output.write(outputStream)
    outputStream.close()
	def __modifyData(self):
		
		for data in self.dataToUpdate:
			self.dataToUpdate[data] = createStringObject(('<h1 onmouseover=alert(1)>').encode('ascii'))
from pyPdf import PdfFileWriter, PdfFileReader
from pyPdf.generic import NameObject, createStringObject

OUTPUT = 'ml1.pdf'
INPUT = 'NOFO.pdf'

# There is no interface through pyPDF with which to set this other then getting
# your hands dirty like so:
output = PdfFileWriter()
fin = file(INPUT, 'rb')
pdf_in = PdfFileReader(fin)
infoDict = output._info.getObject()
print infoDict
infoDict.update({
    NameObject('/Title'): createStringObject(u'title'),
    NameObject('/Author'): createStringObject(u'author'),
    NameObject('/Subject'): createStringObject(u'subject'),
    NameObject('/Creator'): createStringObject(u'a script')
})
print infoDict
for page in range(pdf_in.getNumPages()):
    output.addPage(pdf_in.getPage(page))

outputStream = file(OUTPUT, 'wb')
output.write(outputStream)
outputStream.close()

from pyPdf import PdfFileReader, PdfFileWriter

pdf = PdfFileReader(open(OUTPUT, 'rb'))
Beispiel #9
0
import sys

parser = argparse.ArgumentParser(description=u'Limpia los metadatos de un PDF y opcionalmente añade título y autor')
parser.add_argument("input", help="fichero pdf origen")
parser.add_argument("output", help="fichero pdf destino")
args = parser.parse_args()

fin = file(args.input, 'rb')
pdfIn = PdfFileReader(fin)
pdfOut = PdfFileWriter()

for page in range(pdfIn.getNumPages()):
    pdfOut.addPage(pdfIn.getPage(page))

info = pdfOut._info.getObject()
del info[NameObject('/Producer')]


title = raw_input("Titulo:").decode(sys.stdin.encoding)
author = raw_input("Autor:").decode(sys.stdin.encoding)
info.update({
    NameObject('/Title'): createStringObject(title),
    NameObject('/Author'): createStringObject(author)
})



fout = open(args.output, 'wb')
pdfOut.write(fout)
fin.close()
fout.close()
Beispiel #10
0
inpfn = raw_input('Enter PDF path : ')

fin = file(inpfn, 'rb')
pdf_in = PdfFileReader(fin)

writer = PdfFileWriter()

for page in range(pdf_in.getNumPages()):
    writer.addPage(pdf_in.getPage(page))

infoDict = writer._info.getObject()

info = pdf_in.documentInfo
for key in info:
    infoDict.update({NameObject(key): createStringObject(info[key])})

# add the grade
list_of_data_to_delete = [
    '/CreationDate', '/Author', '/Creator', '/ModDate', '/Producer', '/Title'
]
for item in list_of_data_to_delete:
    try:
        infoDict.update({NameObject(item): createStringObject(u'')})
    except:
        print("can't delete : ", i)

fout = open('outputFile.pdf', 'wb')

writer.write(fout)
fin.close()