Example #1
0
 def saveAs(self, fname):
     opdf=PdfWriter()
     #print type(opdf.trailer), type(opdf.trailer.Info), type(opdf.trailer.Info.Author)
     opdf.addpages(self.pdf.pages)        
     opdf.trailer.Info=self.pdf.Info
     opdf.trailer.Root.Outlines=self.pdf.Root.Outlines
     opdf.write(fname)
Example #2
0
def go(inpfn, outfn):
    reader = PdfReader(inpfn, decompress=False)
    page, = reader.pages
    writer = PdfWriter()
    writer.addpage(adjust(page))
    writer.trailer.Info = IndirectPdfDict(reader.Info)
    writer.write(outfn)
Example #3
0
def splitting(filenameOut ="out",*varargs):

    for file in varargs:
        if False == (isinstance(file,str)):
            raise ValueError("Errore: i file devono essere pdf")

    if False == (isinstance(filenameOut,str)):
            raise ValueError("Errore: il nome del file deve essere di tipo str")


    all = PdfWriter()
    numpage=float("inf")

    for file in varargs:
        reader = PdfReader(file)
        i=0
        for page in reader.pages:
            i=i+1
        if (numpage > i):
             numpage=i

    for i in range(numpage):
        for filename in varargs:
            reader = PdfReader(filename)
            all.addPage(reader.getPage(i))


    all.write(filenameOut+".pdf")
Example #4
0
def two_up(data):
    pdf = PdfReader(fdata=data)
    pages = PageMerge() + pdf.pages

    assert len(pages) == 2

    left, right = pages

    rotation = 270
    scale = 0.7071067811865476  # sqrt(0.5)

    x_increment = scale * pages.xobj_box[2]

    left.Rotate = rotation
    left.scale(scale)

    right.Rotate = rotation
    right.scale(scale)
    right.x = x_increment

    writer = PdfWriter()
    writer.addpage(pages.render())

    # retain and update metadata
    pdf.Info.Creator = 'modulo-nic.py %s' % __version__
    writer.trailer.Info = pdf.Info

    sys.stdout.write('Content-Type: application/x-pdf\n\n')
    writer.write(sys.stdout)
    def splitting(*varargs,filenameOut ="out"):

        if(len(varargs)<=1):
            raise IndexError("Errore: inserire almeno due file.")

        for file in varargs:
            if False == (isinstance(file,str)):
                raise ValueError("Errore: i file devono essere pdf")

        if False == (isinstance(filenameOut,str)):
                raise ValueError("Errore: il nome del file deve essere di tipo str")

        all = PdfWriter()
        numpage=float("inf")

        for file in varargs:
            reader = PdfReader(file)
            i=0
            for page in reader.pages:
                i=i+1
            if (numpage > i):
                 numpage=i

        for i in range(numpage):
            for filename in varargs:
                reader = PdfReader(filename)
                all.addPage(reader.getPage(i))
        if(filenameOut.endswith('.pdf') == False):
            filenameOut = filenameOut+'.pdf'

        all.write(filenameOut)
def combine_match_sheets(match_sheets):
    output_fn = os.path.join(match_sheet_dir, "combined_match_sheets.pdf")
    writer = PdfWriter()
    for match_sheet in match_sheets:
        writer.addpages(PdfReader(match_sheet).pages)

    writer.write(output_fn)
    return output_fn
Example #7
0
def save_pdf(infile, outpages):
    trailer = PdfReader(infile)
    outfn = create_filename(infile)
    writer = PdfWriter()
    writer.addpages(outpages)
    writer.trailer.Info = trailer.Info
    writer.trailer.Info.Producer = "https://github.com/sgelb/impositioner"
    writer.write(outfn)
Example #8
0
def main():

    parser = argparse.ArgumentParser(description="Strip ResearchGate additions from a PDF")
    parser.add_argument("infile", metavar="input-filename", type=str, nargs=1,
                        help="PDF file to process")
    parser.add_argument("outfile", metavar="output-filename", type=str, nargs=1,
                        help="name for processed output file")
    args = parser.parse_args()

    # This regular expression matches the form of the ResearchGate
    # underlinings in the content streams. We match against a truncated form
    # of the distinctive RGB triplet because it's not always given with
    # the same accuracy.
    # "0.3333333333 0.6941176471 0.9607843137"
    regex = re.compile(r"""(0\.33333[0-9]+ 0\.694117[0-9]+ 0\.960784[0-9]+ RG
\d+\.?\d* w
\d+\.?\d* \d+\.?\d* m
\d+\.?\d* \d+\.?\d* )l
S""")

    dict_pages = PdfReader(args.infile[0]).pages

    def fix_stream(contents):
        # Look for underlinings and make them invisible.
        if not hasattr(contents, "stream"):
            return
        s = contents.stream
        # We identify RG underlinings by their (hopefully unique)
        # RGB colour triplet.
        if s is not None and regex.search(s):
            # Minimal change: change the line draw commands to
            # moves, so no line is drawn. It would be more
            # satisfying to remove the stream entirely, but it's
            # simpler and safer to preserve the file structure
            # (in particular, the stream length) wherever possible.
            contents.stream = regex.sub("\\1m\nS", s)        

    for page in dict_pages:
        if "/Annots" in page:
            # Remove all annotations. This may of course cause some
            # collateral damage, but PDFs of articles don't usually have
            # annotations so probably this will just strip ResearchGate
            # links. If this becomes a problem, it should be easy to
            # identify RG annotations and remove only them.
            page.pop("/Annots")
        # There may be a stream in the Contents object and/or in its
        # children, so we check for both.
        fix_stream(page.Contents)
        for contents in page.Contents:
            fix_stream(contents)
    
    writer = PdfWriter()

    # Start at the second page to remove the ResearchGate cover sheet.
    for page in dict_pages[1:]:
        writer.addpage(page)
    writer.write(args.outfile[0])
Example #9
0
def combine(inpfn, outfn, x, y, gap):
    # Read all pages from input file
    pages = PdfReader(inpfn).pages
    
    # Object to write output PDF
    writer = PdfWriter()

    while pages:
        writer.addpage(getPages(pages, x, y, gap))
    
    writer.write(outfn)
Example #10
0
def test_pdf(pdfname):
    outfn = os.path.join(outdir, hashlib.md5(pdfname).hexdigest() + '.pdf')
    print >> stderr, '             ->', outfn
    trailer = PdfReader(pdfname, decompress=False)
    try:
        trailer.Info.OriginalFileName = pdfname
    except AttributeError:
        trailer.OriginalFileName = pdfname
    writer = PdfWriter()
    writer.trailer = trailer
    writer.write(outfn)
Example #11
0
File: pwat.py Project: utero/P-WAT
def writepdf():
    outfn = "pwat." + os.path.basename(pdf)
    trailer = PdfReader(pdf)
    trailer.Info.Creator = "NOT"
    trailer.Info.Author = "NOT"
    trailer.Info.Title = "NOT"
    trailer.Info.Producer = "NOT"
    trailer.Info.CreationDate = "6/6/6"
    trailer.Info.ModDate = "6/6/6"
    writer = PdfWriter()
    writer.trailer = trailer
    writer.write(outfn)
Example #12
0
def makeOnePagers(filename='GPO-CONAN-REV-2014.pdf' ,path='pdf/'):
    infile = PdfReader(filename)
    pages = len(infile.pages)
    print(pages)
    for i in range(pages):
       p = infile.pages[i]
       if(p and len(p)>0):
           outfile = PdfWriter()
           outfile.addPage(p)
           try:
               outfile.write('pdf/pageindex-%s.pdf' % str(i))
           except:
               pass
           print(i)
Example #13
0
def merge(*varargs,merge_file):

    if(merge_file.endswith('.pdf')):
        merge_file = merge_file+".pdf"

    for x in varargs:
        if(isinstance(x,str) == False):
            raise Exception("Errore: Tutti i parametri devono essere stringhe.")

    writer = PdfWriter()
    files = []
    for x in varargs :
        if x.endswith('.pdf'):
            files.add(x)
        else:
            raise Exception("Errore tutti i parametri devono terminare con .pdf")
    for fname in sorted(files):
        writer.addpages(PdfReader(os.path.join('pdf_file', fname)).pages)

    writer.write("output.pdf")
Example #14
0
 def get(self,id):
     inpfn = 'teste.pdf'
     ranges = [id]
     #
     assert ranges, "Expected at least one range"
     #
     ranges = ([int(y) for y in x.split('-')] for x in ranges)
     outfn = '%sfrag' % os.path.basename(inpfn)
     pages = PdfReader(inpfn).pages
     outdata = PdfWriter()
     #
     for onerange in ranges:
         onerange = (onerange + onerange[-1:])[:2]
         for pagenum in range(onerange[0], onerange[1]+1):
             outdata.addpage(pages[pagenum-1])
     outdata.write(outfn)
     #
     pdfout = base64.encodestring(open(outfn,"rb").read())
     #
     self.write('<iframe src="data:application/pdf;base64,'+pdfout+'" style="position:fixed; top:0px; left:0px; bottom:0px; right:0px; width:100%; height:100%; border:none; margin:0; padding:0; overflow:hidden; z-index:999999;"/>')
Example #15
0
    def consolidateAllSheets(self, subDir=None):
        """
        not sure if this is neccessary or maybe I can send multiple sheets to the browser
        """
        writer = PdfWriter()
        if subDir != None:
            directory = self.printdirectory+subDir
        else:
            directory = self.printdirectory

        files = [x for x in os.listdir(directory) if x.endswith('.pdf')]
        for fname in sorted(files):
          writer.addpages(PdfReader(os.path.join(directory, fname)).pages)

        writer.write(directory+"output.pdf")

        for x in os.listdir(directory):
            if x == 'output.pdf':
                continue
            else:
                os.remove(directory+x)
Example #16
0
    def merge(*varargs, filenameOut='merge_file'):
        if (len(varargs) <=1):
            raise Exception('Errore: utilizzare almeno due file.')

        if(not  (isinstance(filenameOut,str)) ):
            raise Exception('Errore: filenameOut deve essere una stringa.')

        if(filenameOut.endswith('.pdf') == False):
            filenameOut = filenameOut + ".pdf"

        writer = PdfWriter()

        for fname in varargs:
            if(isinstance(fname,str) == False):
                raise ValueError("Errore: Tutti i parametri devono essere stringhe.")
            if not fname.endswith('.pdf'):
                raise Exception("Errore: tutti i parametri devono terminare con .pdf")

            reader = PdfReader(fname)
            writer.addpages(reader.pages)

        writer.write(filenameOut)
Example #17
0
    def cleanPdf(srcPath, destPath=''):
        if os.path.exists(srcPath):
            if len(destPath) < 1:
                f, e = os.path.splitext(srcPath)
                destPath = f+'.cln.pdf'

            x = PdfReader(srcPath)
            for i, page in enumerate(x.pages):
                print 'page %05d: ' % i,
                xobjs = page.Resources.XObject
                for okey in xobjs.keys():
                    xobj = xobjs[okey]
                    if DuoPdf.isDel(xobj):
                        # xobj.pop('/SMask')
                        xobjs.pop(okey)
                        # print xobj
                        # print xobj.SMask
                        print '.',
                    else:
                        print '[%sx%s#%s]' % (xobj.Width, xobj.Height, xobj.Length),
                print 'done'
            print '[%s] -> [%s]' % (srcPath, destPath)
            PdfWriter().write(destPath, x)
Example #18
0
    def extract_sub_pdf(self, pI, pF, filename):
        """
        copy the pages pI->pF into the file 'filename'

        @param pI,pF : integers, the page numbers
        @param filename : string

        'pI' and 'pF' are the pdf numbers 
        (the ones you see in the document),
        not the numbers in the python's list of pages 
        (which begins at 0).
        """
        output = PdfWriter(filename)
        pages = self.pdf_reader.pages
        for k in range(pI - 1, pF):
            output.addpage(pages[k])
        #output.write(filename)
        output.write()
Example #19
0
    def write_link(self, file_path, uri):
        '''Write the "uri" into the Subject attribute of PDF file "file_path".'''

        fp = antiformat(file_path)
        if __debug__: log(f'reading PDF file {fp}')
        trailer = PdfReader(file_path)
        file = antiformat(f'[steel_blue3]{file_path}[/]')
        if not self.overwrite:
            subject = trailer.Info.Subject or ''
            if __debug__: log(f'found PDF Subject value {subject} on {fp}')
            if uri in subject:
                inform(
                    f'Zotero link already present in PDF "Subject" field of {file}'
                )
                return
            elif subject.startswith('zotero://select'):
                inform(
                    f'Replacing existing Zotero link in PDF "Subject" field of {file}'
                )
                subject = re.sub(r'(zotero://\S+)', uri, subject)
                trailer.Info.Subject = subject
            elif subject is not None:
                warn(f'Not overwriting existing PDF "Subject" value in {file}')
                return
            else:
                if __debug__: log(f'no prior PDF Subject field found on {fp}')
                inform(
                    f'Writing Zotero link into PDF "Subject" field of {file}')
                trailer.Info.Subject = uri
        else:
            inform(f'Overwriting PDF "Subject" field of {file}')
            trailer.Info.Subject = uri

        if not self.dry_run:
            if __debug__:
                log(f'writing PDF file with new "Subject" field: {fp}')
            PdfWriter(file_path, trailer=trailer).write()
Example #20
0
    def get(self, request, format=None):
        books = Book.objects.all()
        book_num = len(books)
        
        writer = PdfWriter()

        for book in books:
                if book:
                     writer.addpages(PdfReader(book.pdf).pages)

        with open(os.path.join('media','mergedfile.pdf'), 'wb') as pdfOutputFile:
                writer.write(pdfOutputFile)
        mergedbook = os.path.join('media','mergedfile.pdf') 
        response = FileResponse(open(os.path.join('media', 'mergedfile.pdf'), 'rb'))
        response['content_type'] = "application/octet-stream"
        response['Content-Disposition'] = 'attachment; filename="mergedfile.pdf"'

        return response    
Example #21
0
def upload():
    uploadedFiles = request.files.getlist('upload_files')

    if len(uploadedFiles) != 2:
        return 'Must be 2 files.'

    if uploadedFiles[0].filename == 'stamp.pdf' and uploadedFiles[
            1].filename == 'stamp.pdf':
        return 'stamp.pdf must be 1 file.'

    if uploadedFiles[0].filename == 'stamp.pdf':
        stampFile = uploadedFiles[0].stream
        inFile = uploadedFiles[1].stream
    elif uploadedFiles[1].filename == 'stamp.pdf':
        inFile = uploadedFiles[0].stream
        stampFile = uploadedFiles[1].stream
    else:
        return 'stamp.pdf must be provided.'

    outputPdf = PageMerge().add(PdfReader(stampFile).pages[0])[0]
    inputPdf = PdfReader(inFile)
    for page in inputPdf.pages:
        PageMerge(page).add(outputPdf, prepend=False).render()

    nowDatetime = datetime.now()
    nowString = nowDatetime.strftime('%Y%m%d%H%M%S')
    sendFileName = nowString + '.pdf'
    sendFilePath = 'tmp/' + sendFileName

    PdfWriter(sendFilePath, trailer=inputPdf).write()

    fileObj = open(sendFilePath, 'rb')
    return send_file(io.BytesIO(fileObj.read()),
                     as_attachment=True,
                     attachment_filename=sendFileName,
                     mimetype='application/pdf')
Example #22
0
    def build(self):

        self.pdffile.init_input()

        print('in build')
        for pagenum, page in enumerate(self.pdffile.trailer.pages, 0):

            mbox = tuple(float(x) for x in page.MediaBox)

            page_x, page_y, page_x1, page_y1 = mbox
            page_w = page_x1 - page_x
            page_h = page_y1 - page_y

            xy_sign = self.signature.locations[pagenum][0]
            xy_date = self.signature.locations[pagenum][1]

            if xy_sign[0]:
                self.add_signature(xy_sign, page, page_w, page_h)

            if xy_date[0]:
                self.add_date(xy_date, page, page_w, page_h)

        # Write out the destination file
        PdfWriter(self.pdffile.outfn, trailer=self.pdffile.trailer).write()
Example #23
0
    def addWatermarkFile(self,
                         fileInPath,
                         watermarkPath,
                         fileOutPath,
                         underneath=True):
        """Adds watermarkPath to fileInPath and output to fileOutPath

           Returns True on success.  Will raise exceptions from base on errors.

           Code based on example at
           https://github.com/pmaupin/pdfrw/blob/master/examples/watermark.py

           Same assumptions apply - pages the same size.
        """

        wmark = PageMerge().add(PdfReader(watermarkPath).pages[0])[0]
        trailer = PdfReader(fileInPath)

        for page in trailer.pages:
            PageMerge(page).add(wmark, prepend=underneath).render()

        PdfWriter(fileOutPath, trailer=trailer).write()

        return True
Example #24
0
def addWaterMark(pdfpath, watermarkcontent=None):
    try:
        now = datetime.datetime.now()

        watermarkpath = pdfpath.split('.')[0] + '-water' + '.pdf'
        out_path = pdfpath.split('.')[0] + '-out' + '.pdf'
        watermark = create_watermark(watermarkpath, watermarkcontent)

        # Get our files ready
        input_file = PdfReader(pdfpath)
        for page in input_file.pages:
            PageMerge(page).add(watermark, prepend=False).render()
        PdfWriter(out_path, trailer=input_file).write()
        os.remove(pdfpath)
        os.rename(out_path, pdfpath)
        os.remove(watermarkpath)
        print('覆盖水印pdf--%s--源文件%s' % (now, out_path))
    except Exception:
        print('覆盖水印pdf出错--%s--源文件%s' % (now, out_path))
        filepath = APILOG_PATH['excptionlogpath'] + '/' + now.strftime('%Y-%m-%d')
        f = open(filepath, 'a')
        f.writelines(now.strftime('%H:%M:%S') + '\n' + traceback.format_exc() + '\n\n')
        f.close()
    return pdfpath
Example #25
0
    def merge_file(self):
        if not self.output_file.text():
            self.populate_file_name()
            return
        if self.pdf_list_widget.count() > 0:
            pdf_merger = PdfWriter()
            try:
                for i in range(self.pdf_list_widget.count()):
                    pdf_merger.addpages(
                        PdfReader(self.pdf_list_widget.item(i).text()).pages)
                pdf_merger.write(self.output_file.text())
                #pdf_merger.close()

                self.pdf_list_widget.clear()
                self.dialog_message('Unificación completada!')
            except Exception as e:
                self.dialog_message(e)
        else:
            self.dialog_message('No hay archivos para unificar.')
Example #26
0
def outimagesfunction(
        f1, f2, f3, f4,
        f5):  #тут filenames это список образованный из ключей словаря

    psw = ''
    for x in range(12):
        psw = psw + random.choice(list('123456789qwertyuiopasdfghjklzxcvbnm'))
    fs = FileSystemStorage(
        location='./pdf4/media/'
    )  #это чтоб в конце цикла мы смогли временные файлы удалить
    imgs = [f1, f2, f3, f4, f5]  #список для прохода по циклу
    i = 0  #счетчик для цикла, в цикле номер страницы результирующего файла
    writer = PdfWriter(
    )  #создаем writer для работы с pdfrw, чтоб после цикла записать результат
    for name in imgs:  #проходим по списку imgs
        if name != None:  #если элемент словаря не имеет значене None (если пользвотаель загрузил файл)
            i = i + 1  #номер страницы результирующего файла (1,2,3...)
            name = './pdf4/media/' + name  #необходимо так как мы из вьюшки передали имена без './pdf4/media/'
            temppdf = './pdf4/media/' + str(
                i
            ) + '_' + psw + '.pdf'  #переменная имени отдельного pdf-файла для каждой страницы
            pdf1 = open(
                temppdf, 'wb'
            )  #открываем (создаем) на запись файл с именем из переменной temppdf

            im = Image.open(name)
            if ('A' in im.getbands()) or (
                    'a' in im.getbands()):  #убираем альфа-канал если есть
                im.convert('RGB').save(name)

            pdf1.write(img2pdf.convert(name))  #записываем в него картинку
            pdf1.close()  #закрываем его
            #до этого момента в цикле работали с модульем img2pdf, далее pdfrw
            page = PdfReader(
                temppdf, decompress=False
            ).pages  #записываем в page страницу из файла temppdf
            writer.addpages(page)  #добавляем во writer страницу page
            fs.delete(str(i) + '_' + psw +
                      '.pdf')  #удаляем временные pdf-файлы
    writer.write('./pdf4/media/pdfresult/' + psw +
                 '.pdf')  #записываем writer в файл
    url = 'https://pdf4you.ru/media/pdfresult/' + psw + '.pdf'
    return url
Example #27
0
    def update_and_move(self, targetdir: str, doctitle: str, tags: List[str],
                        date: str):
        """Update metadata of pdf and move to target directory.

        Arguments:
            targetdir {str} -- Target directory where pdf shall be placed.
            doctitle {str} -- New document title of pdf.
            tags {List[str]} -- Keywords/tags which shall be added to pdf.
            date {str} -- Date which will be entered into pdf filename.

        """
        pdf = PdfReader(self.filepath)
        # Check for correct file ending
        if doctitle[-4:] != ".pdf":
            filename = date + " " + doctitle + ".pdf"
        else:
            filename = date + " " + doctitle
            doctitle = doctitle[0:-4]

        # Check for unique filename
        n = 1
        if os.path.isfile(os.path.join(targetdir, filename)):
            filename = filename[0:-4] + "-" + str(n) + ".pdf"
        while os.path.isfile(os.path.join(targetdir, filename)):
            regex = re.compile(r"-\d{1,}.pdf", re.IGNORECASE)
            filename = regex.sub("-" + str(n) + ".pdf", filename)
            n = n + 1

        # pdf.Info.Keywords = tags
        # pdf.Info.Title = doctitle

        # Write data
        writer = PdfWriter()
        writer.addpages(pdf.pages)
        writer.trailer.Info = IndirectPdfDict(Title=doctitle, Keywords=tags)
        writer.write(os.path.join(targetdir, filename))

        # try to delete file ##
        try:
            os.remove(self.filepath)
        except OSError as e:  # if failed, report it back to the user ##
            print("Error: %s - %s." % (e.filename, e.strerror))
Example #28
0
def PDF2Text(pdf):
    images = convert_from_path(pdf)
    txtlist = []
    writer = PdfWriter()

    for img in images:
        img = img.convert('LA')
        img.save('cur_img.png')
        subprocess.run(['tesseract', 'cur_img.png', 'cur_txt', '--dpi', '125'])
        with open('cur_txt.txt', 'r', encoding='utf-8') as curfile:
            txtlist.extend(curfile.readlines())
            del txtlist[-1]
            txtlist.append('\n')
        subprocess.run(
            ['tesseract', 'cur_img.png', 'cur_pdf', '--dpi', '125', 'pdf'])
        writer.addpages(PdfReader('cur_pdf.pdf').pages)

    writer.write('searchable.pdf')

    return txtlist
Example #29
0
    def __init__(self, pages, item_nu, raw_pages):
        self.item_nu = item_nu
        self.date = 1
        self.name = find_paper_name(raw_pages[0])
        self.authors = "author1"

        # Writes PDF file
        outdata = PdfWriter()
        for page in pages:
            outdata.addpage(Paper.pages_readpdf[page])
        outdata.write(FILE_PATH + "output\\%s.pdf" % item_nu)
        logger.info("Created file operations_%s, with pages: %s", item_nu,
                    pages)

        # Allows the ability to skip groups
        if DEFAULT_GROUP:
            self.group = "default_group"
        else:
            self.group = raw_input("What group is item %s with name %s" %
                                   (item_nu, name))
Example #30
0
class Worker(QRunnable):
    def __init__(self, fileCopy, fileInsInto, pageStart, pageIteration):
        super().__init__()
        self.pageStart = pageStart
        self.fileCopy = fileCopy
        self.page = self.pageStart - 1
        self.pages = 0
        self.pageIterate = pageIteration
        self.currentPage = 0
        self.writer = PdfWriter()
        self.fileInsInto = fileInsInto
        # create unique identifier for each worker
        self.jobID = str(uuid.uuid4().hex)
        self.signals = WorkerSignals()

    def run(self):
        self.signals.started.emit(self.jobID)
        self.fPath = PdfReader(self.fileInsInto)
        outfn = os.path.splitext(self.fileInsInto)[0] + "_MERGED.pdf"
        copyFrom_totalPages = len(PdfReader(self.fileCopy).pages)
        # Calculate the total pages for the current fileInsertInto file
        insInto_totalPages = len(self.fPath.pages)
        # Calculate total number of pages upon merging...round up to nearest
        # whole number
        self.totalPages = math.ceil(
            (insInto_totalPages +
             ((insInto_totalPages - self.pageStart) / self.pageIterate) *
             copyFrom_totalPages))
        while self.pages < self.totalPages:
            if self.pages == self.page:
                self.writer.addpages(PdfReader(self.fileCopy).pages)
                self.page += self.pageIterate
                self.pages += copyFrom_totalPages
            else:
                self.writer.addpage(self.fPath.pages[self.currentPage])
                self.currentPage += 1
                self.pages += 1
        # Write all the files into a file which is named as shown below
        # File directory is that of the last insert into file chosen
        self.writer.write(outfn)
        self.signals.finished.emit(self.jobID)
Example #31
0
def concatenate(input_paths, output_path, details=None):
    """Given an ordered sequence of paths to pdf files, concatenate
    to the desired output path with the given details.
    
    Args:
        input_paths: A sequence of paths to pdf files.
        output_path: The desired path for the concatenated pdf.
        details: A dictionary of metadata values desired for the final pdf.
    """
    writer = PdfWriter()

    for path in input_paths:
        reader = PdfReader(path)
        writer.addpages(reader.pages)

    writer.trailer.Info = IndirectPdfDict()
    if details is not None:
        for metadata, value in details.items():
            writer.trailer.Info[PdfName(metadata)] = value

    writer.write(output_path)
Example #32
0
    def consolidateAllSheets(self, subDir=None):
        """
        not sure if this is neccessary or maybe I can send multiple sheets to the browser
        """
        writer = PdfWriter()
        if subDir != None:
            directory = self.printdirectory + subDir
        else:
            directory = self.printdirectory

        files = [x for x in os.listdir(directory) if x.endswith('.pdf')]
        for fname in sorted(files):
            writer.addpages(PdfReader(os.path.join(directory, fname)).pages)

        writer.write(directory + "output.pdf")

        for x in os.listdir(directory):
            if x == 'output.pdf':
                continue
            else:
                os.remove(directory + x)
def generate_pdf():
    print("generating final pdf...")

    writer = PdfWriter()

    for subdir, dirs, files in os.walk(targetDir):
        # sort_files(files)
        # for file in files:
        for file in pages:
            print(file)
            if file.endswith("svg") != True:
                continue
            file = file.replace("svg", "pdf")
            filepath = subdir + os.sep + file
            if filepath.endswith(".pdf"):
                writer.addpages(PdfReader(filepath).pages)

    # writer.write(config["story"] + "_" + config["lang"] + "_" + config["gender"] + "_WEB" + ".pdf")
    writer.write(outputFile)

    print("done!")
Example #34
0
def merge_pdfs(file_path, sup_url):
    _file_path = file_path
    file_path = _file_path + '.pdf'
    file_path_bak = _file_path + '_bak.pdf'
    if len(sup_url) != 0 and not os.path.exists(file_path):
        sup_file_path = _file_path + '_sup.pdf'

        writer = PdfWriter()
        for inpfn in [file_path_bak, sup_file_path]:
            try:
                writer.addpages(PdfReader(inpfn).pages)
            except Exception:
                print(inpfn)
                traceback.print_exc()
                exit(-1)

        writer.write(file_path)
        os.remove(file_path_bak)
        os.remove(sup_file_path)
    else:
        if os.path.exists(file_path_bak):
            os.rename(file_path_bak, file_path)
Example #35
0
    def LoopDevices(self,
                    looplist=None,
                    Plot=False,
                    plotdir=None,
                    plotting=False):
        """loops through all the devices listed in looplist"""

        #reset list of pdf file names, needed for adding pdf files together
        self.pdf_file_list = []

        if (looplist == None):
            self.ErrorCode(101)
            return
        else:
            for k in looplist:
                print(self.prompt, " currently working on ", k)
                #self.ManipTable(k)
                self.PT1(k)
                print(type(k))
                if (Plot):
                    #self.PlotDeviceStatus(self, k , plotdir = plotdir)
                    self.PlotDeviceStatus(k,
                                          symbol='d',
                                          color1='b',
                                          plotdir=plotdir,
                                          plotting=plotting)

        # done with looping merge all files inot one large one
        if (plotdir != None):
            masterfile = plotdir + 'alldevices.pdf'

            writer = PdfWriter()
            for inpfn in self.pdf_file_list:
                writer.addpages(PdfReader(inpfn).pages)
                writer.write(masterfile)
            print(self.prompt, 'figure saved in ', masterfile)

        return 1
Example #36
0
def append_js_to_pdf(file_name):
    pdf_writer = PdfWriter()
    pdf_reader = PdfReader(file_name)
    try:
        js = open(sys.argv[1]).read()
    except:
        # js = "this.getField('residency_duration_ratio').value = (event.value / 365).toString().split('.')[1];"
        # js = "this.getField('residency_duration_ratio').value = 1"
        js = "app.alert('hi')"
    for page_index in pdf_reader.pages:
        page = page_index
        page.Type = PdfName.Page
        try:
            print(page.Annots)
            for field in page.Annots:
                field.update(PdfDict(AA=PdfDict(V=make_js_action(js))))
        except:
            pass
        # page.AA = PdfDict()
        # page.AA.O = make_js_action(js)
        pdf_writer.addpage(page)

    pdf_writer.write('test.pdf')
Example #37
0
    def reverse(self, out_path: str = None) -> None:
        '''
        Reverse the page order (from last to first) of the PDF. 
        
        Note:
            The default settings this will overwrite this object's PDF
            file.

        Args:
            out_path: Optional string, default=None. If supplied,
                the output will be saved to this path, instead of
                overwriting this PDF object's path.

        Returns:
            None
    
        Raises:
            No exceptions raised 
        
        Examples:
            Invoke like this to overwrite this PDF's file:
            
            ```>>> my_pdf.reverse()```

            Pass in a path to save as a new file. 
             
            ```>>> my_pdf.reverse('/path/to/new/file.pdf')```
        '''

        if not out_path: out_path = self.path
        outdata = PdfWriter(out_path)
        in_pdf = PdfReader(self.path)
        pages = in_pdf.pages
        for i in range((len(pages) - 1), -1, -1):
            outdata.addpage(pages[i])

        outdata.write()
Example #38
0
    def main(self, folder: cli.ExistingDirectory):
        paths = [p for p in folder.list() if p.suffix in self.ALLOWED_SUFFIXES]

        paths.sort()

        with tempfile.TemporaryDirectory() as temp_dir:
            temp_dir_path = local.path(temp_dir)
            print("Converting images to pdfs")
            for path in paths:
                print(f"Converting {path}")
                im = Image.open(path)
                im = im.convert("RGB")
                im.save(temp_dir_path / path.basename + ".pdf")

            pdf_paths = [p for p in temp_dir_path.list() if p.suffix == ".pdf"]

            pdf_paths.sort()

            print("Concatenating PDFs")
            writer = PdfWriter()
            for page in pdf_paths:
                writer.addpages(PdfReader(str(page)).pages)

            writer.write(str(folder / "output.pdf"))
Example #39
0
   def post(self,request):
       serializer = BookSerializer(data=request.data) 
       if serializer.is_valid():
             
             inpfn = serializer.validated_data['pdf']
             
         
             page_range = [int(y) for y in serializer.validated_data['page'].split('-')]
             page_start = int(page_range[0])
             page_end = int(page_range[1])
             path = os.path.join('/books/pdfs', 'extracted_page_{}-{}.pdf'.format(page_start, page_end))
             outfn = os.path.join('media', 'extracted_page_{}-{}.pdf'.format(page_start, page_end))
             pages = PdfReader(inpfn).pages
             outdata = PdfWriter(outfn)  
             page_range = (page_range + page_range[-1:])[:2]
 
             for pagenum in range(page_range[0], page_range[1]+1):
                 outdata.addpage(pages[pagenum-1])
             outdata.write()
             serializer.validated_data['pdf'] = os.path.join('extracted_page_{}-{}.pdf'.format(page_start, page_end))
             serializer.save() 
             
             return Response(serializer.data, status=status.HTTP_201_CREATED)
       return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Example #40
0
def book_list(request):
    if request.method == 'POST':
        books = Book.objects.all()
        book_num = len(books)
        
        writer = PdfWriter()

        for book in books:
                if book:
                     writer.addpages(PdfReader(book.pdf).pages)

        with open(os.path.join('media', 'mergedfile.pdf'), 'wb') as pdfOutputFile:
                writer.write(pdfOutputFile)
        response = FileResponse(open(os.path.join('media', 'mergedfile.pdf'), 'rb'))
        response['content_type'] = "application/octet-stream"
        response['Content-Disposition'] = 'attachment; filename="mergedfile.pdf"'

        return response    
        
    else:
        books = Book.objects.all()
        return render(request, 'book_list.html', {
            'books': books
    })
Example #41
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
usage:   alter.py my.pdf

Creates alter.my.pdf

Demonstrates making a slight alteration to a preexisting PDF file.
Also demonstrates Unicode support.

'''

import sys
import os

from pdfrw import PdfReader, PdfWriter

inpfn, = sys.argv[1:]
outfn = 'alter.' + os.path.basename(inpfn)

trailer = PdfReader(inpfn)
trailer.Info.Title = 'My New Title Goes Here - 我的新名称在这儿'
writer = PdfWriter()
writer.trailer = trailer
writer.write(outfn)
Example #42
0
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from io import StringIO, BytesIO
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(StringIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertEqual(x.Size, '7')
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Count, '1')
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            self.assertEqual(len(x.Root.Pages.Kids), 1)
            self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()),
                             [PdfName.Contents, PdfName.MediaBox,
                              PdfName.Parent, PdfName.Resources, PdfName.Type])
            self.assertEqual(x.Root.Pages.Kids[0].MediaBox,
                             ['0', '0', '115', '48'])
            self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages)
            self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page)
            self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(),
                             [PdfName.XObject])
            self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(),
                             [PdfName.Im0])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(),
                             [PdfName.Length])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.Length,
                             str(len(x.Root.Pages.Kids[0].Contents.stream)))
            self.assertEqual(x.Root.Pages.Kids[0].Contents.stream,
                             "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 "
                             "Do\nQ")

            imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0

            # test if the filter is valid:
            self.assertIn(
                imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
                                  [PdfName.FlateDecode]])
            # test if the colorspace is valid
            self.assertIn(
                imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB,
                                      PdfName.DeviceCMYK])
            # test if the image has correct size
            orig_img = Image.open(f)
            self.assertEqual(imgprops.Width, str(orig_img.size[0]))
            self.assertEqual(imgprops.Height, str(orig_img.size[1]))
            # if the input file is a jpeg then it should've been copied
            # verbatim into the PDF
            if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]:
                self.assertEqual(
                    x.Root.Pages.Kids[0].Resources.XObject.Im0.stream,
                    convert_load(orig_imgdata))
            elif imgprops.Filter == [PdfName.FlateDecode]:
                # otherwise, the data is flate encoded and has to be equal to
                # the pixel data of the input image
                imgdata = zlib.decompress(
                    convert_store(
                        x.Root.Pages.Kids[0].Resources.XObject.Im0.stream))
                colorspace = imgprops.ColorSpace
                if colorspace == PdfName.DeviceGray:
                    colorspace = 'L'
                elif colorspace == PdfName.DeviceRGB:
                    colorspace = 'RGB'
                elif colorspace == PdfName.DeviceCMYK:
                    colorspace = 'CMYK'
                else:
                    raise Exception("invalid colorspace")
                im = Image.frombytes(colorspace, (int(imgprops.Width),
                                                  int(imgprops.Height)),
                                     imgdata)
                if orig_img.mode == '1':
                    orig_img = orig_img.convert("L")
                elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                    orig_img = orig_img.convert("RGB")
                self.assertEqual(im.tobytes(), orig_img.tobytes())
                # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have
                # the close() method
                try:
                    im.close()
                except AttributeError:
                    pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass
Example #43
0
1) Concatenating multiple input PDFs.

2) adding metadata to the PDF.

If you do not need to add metadata, look at subset.py, which
has a simpler interface to PdfWriter.

'''

import sys
import os

import find_pdfrw
from pdfrw import PdfReader, PdfWriter, IndirectPdfDict

inputs = sys.argv[1:]
assert inputs
outfn = 'output.pdf'

writer = PdfWriter()
for inpfn in inputs:
    writer.addpages(PdfReader(inpfn.pages)

writer.trailer.Info = IndirectPdfDict(
    Title = 'your title goes here',
    Author = 'your name goes here',
    Subject = 'what is it all about?',
    Creator = 'some script goes here',
)
writer.write(outfn)
Example #44
0
def notebook(path, uuid, path_annotated_pdf, is_landscape, path_templates=None):
    rm_files_path = "%s/%s" % (path, uuid)
    annotations_pdf = []

    p = 0
    while True:
        rm_file_name = "%s/%d" % (rm_files_path, p)
        rm_file = "%s.rm" % rm_file_name

        if not os.path.exists(rm_file):
            break

        overlay = _render_rm_file(rm_file_name, PDFPageLayout(is_landscape=is_landscape))
        annotations_pdf.append(overlay)
        p += 1

    # Write empty notebook notes containing blank pages or templates
    writer = PdfWriter()
    templates = _get_templates_per_page(path, uuid, path_templates)
    for template in templates:
        if template is None:
            writer.addpage(_blank_page())
        else:
            writer.addpage(template.pages[0])
    writer.write(path_annotated_pdf)

    # Overlay empty notebook with annotations
    templates_pdf = PdfReader(path_annotated_pdf)
    for i in range(len(annotations_pdf)):
        templates_pdf.pages[i].Rotate = 90 if is_landscape else 0
        is_empty_page = len(annotations_pdf[i].pages) <= 0
        if is_empty_page:
            continue

        annotated_page = annotations_pdf[i].pages[0]
        annotated_page.Rotate = -90 if is_landscape else 0
        merger = PageMerge(templates_pdf.pages[i])
        merger.add(annotated_page).render()

    writer = PdfWriter()
    writer.write(path_annotated_pdf, templates_pdf)
Example #45
0
File: djpdf.py Project: Unrud/djpdf
    def write_async(self, outfile, process_semaphore, progress_cb=None):
        pdf_writer = PdfWriter(version="1.5")

        pdf_group = PdfDict()
        pdf_group.indirect = True
        pdf_group.CS = PdfName.DeviceRGB
        pdf_group.I = PdfBool(True)
        pdf_group.S = PdfName.Transparency

        pdf_font_mapping = PdfDict()
        pdf_font_mapping.indirect = True
        pdf_font_mapping.F1 = self._build_font()

        for _ in self._pages:
            pdf_page = PdfDict()
            pdf_page.Type = PdfName.Page
            pdf_writer.addpage(pdf_page)
        # pdfrw makes a internal copy of the pages
        # use the copy so that references to pages in links are correct
        pdf_pages = list(pdf_writer.pagearray)

        # Handle all pages in parallel
        @asyncio.coroutine
        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))
            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem),
                    get_pdf_background(psem),
                    asyncio.gather(*[fg.pdf_image(psem)
                                     for fg in page.foreground]),
                    asyncio.gather(*[get_pdf_mask(fg, psem)
                                     for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray([0, 0,
                                          PdfNumber(page.width),
                                          PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" +
                               "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " +
                             "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None and
                        current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" +
                           "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(),
                        PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [PdfNumber(t.x),
                                      PdfNumber(t.y),
                                      PdfNumber(t.x + t.width),
                                      PdfNumber(t.y + t.height)]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page,
                            PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y),
                            0]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"),
                        9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))
        finished_pages = 0
        yield from asyncio.gather(
            *[make_page(page, pdf_page, process_semaphore)
              for page, pdf_page in zip(self._pages, pdf_pages)])

        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            pdf_writer.write(path.join(temp_dir, "temp.pdf"))
            cmd = [QPDF_CMD,
                   "--stream-data=preserve",
                   "--object-streams=preserve",
                   "--normalize-content=n"]
            if LINEARIZE_PDF:
                cmd.extend(["--linearize"])
            cmd.extend([path.abspath(path.join(temp_dir, "temp.pdf")),
                        path.abspath(outfile)])
            yield from run_command_async(cmd, process_semaphore)
Example #46
0
import sys
import argparse
import itertools
from pdfrw import PdfWriter, PdfReader

parser = argparse.ArgumentParser(description='Interlaces two pdf to make one complete pdf.')
parser.add_argument('front_pdf_loc', type=str, help="PDF of fronts of pages")
parser.add_argument('back_pdf_loc', type=str, help="PDF of backs of pages")
parser.add_argument('output_loc', type=str, nargs='?', default="output.pdf",
        help="Output location for interlaced PDF")

args = parser.parse_args()

output = PdfWriter()
front_pdf = PdfReader(args.front_pdf_loc)
back_pdf = PdfReader(args.back_pdf_loc)

if len(front_pdf.pages) != len(back_pdf.pages):
    print("PDFs must have the same number of pages")
    sys.exit(1)

output.addpages(itertools.chain.from_iterable(zip(front_pdf.pages, back_pdf.pages[::-1])))
output.write(args.output_loc)
Example #47
0
changes = []
for (srcpath, _, filenames) in os.walk('ramdisk/reference'):
    for name in filenames:
        if not name.endswith('.pdf'):
            continue
        src = os.path.join(srcpath, name)
        dst = src.replace('/reference/', '/tmp_results/')
        if not os.path.exists(dst):
            continue
        src_digest = get_digest(src)
        if not src_digest or src_digest not in expected:
            continue
        print src
        count += 1
        trailer = make_canonical(PdfReader(src))
        out = PdfWriter(tmp)
        out.write(trailer=trailer)
        match_digest = get_digest(tmp)
        if not match_digest:
            continue
        trailer = make_canonical(PdfReader(dst))
        out = PdfWriter(tmp)
        out.write(trailer=trailer)
        if get_digest(tmp) != match_digest:
            continue
        goodcount += 1
        print "OK"
        changes.append((src_digest, get_digest(dst)))

print count, goodcount
Example #48
0
def pdf(rm_files_path, path_highlighter, pages, path_original_pdf,
        path_annotated_pdf, path_oap_pdf):
    """ Render pdf with annotations. The path_oap_pdf defines the pdf
        which includes only annotated pages.
    """

    base_pdf = PdfReader(open(path_original_pdf, "rb"))

    # Parse remarkable files and write into pdf
    annotations_pdf = []
    offsets = []

    for page_nr in range(base_pdf.numPages):
        rm_file_name = "%s/%d" % (rm_files_path, page_nr)
        rm_file = "%s.rm" % rm_file_name
        if not os.path.exists(rm_file):
            annotations_pdf.append(None)
            offsets.append(None)
            continue

        if hasattr(base_pdf,
                   "Root") and hasattr(base_pdf.Root, "Pages") and hasattr(
                       base_pdf.Root.Pages, "MediaBox"):
            default_layout = base_pdf.Root.Pages.MediaBox
        else:
            default_layout = None
        page_layout = PDFPageLayout(base_pdf.pages[page_nr],
                                    default_layout=default_layout)
        if page_layout.layout is None:
            annotations_pdf.append(None)
            offsets.append(None)
            continue

        page_file = os.path.join(path_highlighter, f"{pages[page_nr]}.json")
        annotated_page, offset = _render_rm_file(
            rm_file_name,
            page_layout=page_layout,
            page_file=page_file,
        )

        if len(annotated_page.pages) <= 0:
            annotations_pdf.append(None)
        else:
            page = annotated_page.pages[0]
            annotations_pdf.append(page)
        offsets.append(offset)

    # Merge annotations pdf and original pdf
    writer_full = PdfWriter()
    writer_oap = PdfWriter()
    for i in range(base_pdf.numPages):
        annotations_page = annotations_pdf[i]

        if annotations_page is not None:
            # The annotations page is at least as large as the base PDF page,
            # so we merge the base PDF page under the annotations page.
            merger = PageMerge(annotations_page)
            pdf = merger.add(base_pdf.pages[i], prepend=True)[0]
            pdf.x -= offsets[i][0]
            pdf.y -= offsets[i][1]
            merger.render()
            writer_oap.addpage(annotations_page)
            writer_full.addpage(annotations_page)
        else:
            writer_full.addpage(base_pdf.pages[i])

    writer_full.write(path_annotated_pdf)
    writer_oap.write(path_oap_pdf)
    page.AA = PdfDict()
    # You probably should just wrap each JS action with a try/catch,
    # because Chrome does no error reporting or even logging otherwise;
    # you just get a silent failure.
    page.AA.O = make_js_action("""
try {
  %s
} catch (e) {
  app.alert(e.message);
}
    """ % (script))

    page.Annots = PdfArray(annots)
    return page

if len(sys.argv) > 1:
    js_file = open(sys.argv[1], 'r')

    fields = []
    for line in js_file:
        if not line.startswith('/// '): break
        pieces = line.split()
        params = [pieces[1]] + [float(token) for token in pieces[2:]]
        fields.append(make_field(*params))

    js_file.seek(0)

    out = PdfWriter()
    out.addpage(make_page(fields, js_file.read()))
    out.write('result.pdf')
Example #50
0
File: 4up.py Project: b4stien/pdfrw
usage:   4up.py my.pdf

Creates 4up.my.pdf with a single output page for every
4 input pages.
"""

import sys
import os

from pdfrw import PdfReader, PdfWriter, PageMerge


def get4(srcpages):
    scale = 0.5
    srcpages = PageMerge() + srcpages
    x_increment, y_increment = (scale * i for i in srcpages.xobj_box[2:])
    for i, page in enumerate(srcpages):
        page.scale(scale)
        page.x = x_increment if i & 1 else 0
        page.y = 0 if i & 2 else y_increment
    return srcpages.render()


inpfn, = sys.argv[1:]
outfn = "4up." + os.path.basename(inpfn)
pages = PdfReader(inpfn).pages
writer = PdfWriter()
for index in range(0, len(pages), 4):
    writer.addpage(get4(pages[index : index + 4]))
writer.write(outfn)
Example #51
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

'''
usage:   alter.py my.pdf

Creates alter.my.pdf

Demonstrates making a slight alteration to a preexisting PDF file.
Also demonstrates Unicode support.

'''

import sys
import os

from pdfrw import PdfReader, PdfWriter

inpfn, = sys.argv[1:]
outfn = 'alter.' + os.path.basename(inpfn)

trailer = PdfReader(inpfn)
trailer.Info.Title = 'My New Title Goes Here - 我的新名称在这儿'
writer = PdfWriter()
writer.trailer = trailer
writer.write(outfn)
Example #52
0
parser.add_argument('--evenrev',
                    dest='evenrev',
                    action='store_const',
                    const=True,
                    default=False,
                    help='reverses the even pages before shuffling')

args = parser.parse_args()

# The shuffling magic
even = PdfReader(args.evenFile[0])
odd = PdfReader(args.oddFile[0])
isEvenReversed = args.evenrev;
isOddReversed = args.oddrev;
all = PdfWriter()
blank = PageMerge()
blank.mbox = [0, 0, 612, 792] # 8.5 x 11
blank = blank.render()

if isEvenReversed and not isOddReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[i])
        all.addpage(even.pages[len(even.pages)-1-i])
elif isOddReversed and not isEvenReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[len(odd.pages)-1-i])
        all.addpage(even.pages[i])
elif isEvenReversed and isOddReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[len(odd.pages)-1-i])
Example #53
0
 def save_to_file(pdf_obj, file_path):
     short_path_for_logging = '/'.join(file_path.split('/')[-3:])
     logger.debug("Saving to file: " + short_path_for_logging)
     y = PdfWriter()
     y.write(file_path, pdf_obj)
     logger.debug("Done")
Example #54
0
'''
usage:   unspread.py my.pdf

Creates unspread.my.pdf

Chops each page in half, e.g. if a source were
created in booklet form, you could extract individual
pages.
'''

import sys
import os

from pdfrw import PdfReader, PdfWriter, PageMerge


def splitpage(src):
    ''' Split a page into two (left and right)
    '''
    # Yield a result for each half of the page
    for x_pos in (0, 0.5):
        yield PageMerge().add(src, viewrect=(x_pos, 0, 0.5, 1)).render()


inpfn, = sys.argv[1:]
outfn = 'unspread.' + os.path.basename(inpfn)
writer = PdfWriter()
for page in PdfReader(inpfn).pages:
    writer.addpages(splitpage(page))
writer.write(outfn)
Example #55
0
def go(inpfn, outfn):
    pages = PdfReader(inpfn, decompress=False).pages
    writer = PdfWriter()
    while pages:
        writer.addpage(get4(pages))
    writer.write(outfn)
Example #56
0
So she did an 8.5x11" output with 0.5" margin all around
(actual size of useful area 7.5x10") and we scaled it
up by 4.8.

We also copy the Info dict to the new PDF.

'''

import sys
import os

from pdfrw import PdfReader, PdfWriter, PageMerge, IndirectPdfDict


def adjust(page, margin=36, scale=4.8):
    info = PageMerge().add(page)
    x1, y1, x2, y2 = info.xobj_box
    viewrect = (margin, margin, x2 - x1 - 2 * margin, y2 - y1 - 2 * margin)
    page = PageMerge().add(page, viewrect=viewrect)
    page[0].scale(scale)
    return page.render()


inpfn, = sys.argv[1:]
outfn = 'poster.' + os.path.basename(inpfn)
reader = PdfReader(inpfn)
writer = PdfWriter(outfn)
writer.addpage(adjust(reader.pages[0]))
writer.trailer.Info = IndirectPdfDict(reader.Info or {})
writer.write()
Example #57
0
#!/usr/bin/env python

'''
usage:   subset.py my.pdf page[range] [page[range]] ...
         eg. subset.py 1-3 5 7-9

Creates subset.my.pdf

'''

import sys
import os

from pdfrw import PdfReader, PdfWriter

inpfn = sys.argv[1]
ranges = sys.argv[2:]
assert ranges, "Expected at least one range"

ranges = ([int(y) for y in x.split('-')] for x in ranges)
outfn = 'subset.%s' % os.path.basename(inpfn)
pages = PdfReader(inpfn).pages
outdata = PdfWriter(outfn)

for onerange in ranges:
    onerange = (onerange + onerange[-1:])[:2]
    for pagenum in range(onerange[0], onerange[1]+1):
        outdata.addpage(pages[pagenum-1])
outdata.write()
Example #58
0
import sys
import os

import find_pdfrw
from pdfrw import PdfReader, PdfWriter

inpfn = sys.argv[1]
rotate = sys.argv[2]
ranges = sys.argv[3:]

rotate = int(rotate)
assert rotate % 90 == 0

ranges = [[int(y) for y in x.split('-')] for x in ranges]
outfn = 'rotate.%s' % os.path.basename(inpfn)
trailer = PdfReader(inpfn)
pages = trailer.pages

if not ranges:
    ranges = [[1, len(pages)]]

for onerange in ranges:
    onerange = (onerange + onerange[-1:])[:2]
    for pagenum in range(onerange[0]-1, onerange[1]):
        pages[pagenum].Rotate = (int(pages[pagenum].inheritable.Rotate or 0) + rotate) % 360

outdata = PdfWriter()
outdata.trailer = trailer
outdata.write(outfn)
Example #59
0
def pdf(rm_files_path, path_highlighter, path_original_pdf, path_annotated_pdf, path_oap_pdf):
    """ Render pdf with annotations. The path_oap_pdf defines the pdf
        which includes only annotated pages.
    """

    base_pdf = PdfReader(open(path_original_pdf, "rb"))

    # Parse remarkable files and write into pdf
    annotations_pdf = []

    for page_nr in range(base_pdf.numPages):
        rm_file_name = "%s/%d" % (rm_files_path, page_nr)
        rm_file = "%s.rm" % rm_file_name
        if not os.path.exists(rm_file):
            annotations_pdf.append(None)
            continue

        page_layout = PDFPageLayout(base_pdf.pages[page_nr])
        if page_layout.layout is None:
            annotations_pdf.append(None)
            continue

        annotated_page = _render_rm_file(rm_file_name, page_layout=page_layout, path_highlighter=path_highlighter)
        if len(annotated_page.pages) <= 0:
            annotations_pdf.append(None)
        else:
            page = annotated_page.pages[0]
            annotations_pdf.append(page)

    # Merge annotations pdf and original pdf
    writer_full = PdfWriter()
    writer_oap = PdfWriter()
    for i in range(base_pdf.numPages):
        annotations_page = annotations_pdf[i]

        if annotations_page is not None:
            merger = PageMerge(base_pdf.pages[i])
            merger.add(annotations_page).render()
            writer_oap.addpage(base_pdf.pages[i])

        writer_full.addpage(base_pdf.pages[i])

    writer_full.write(path_annotated_pdf)
    writer_oap.write(path_oap_pdf)
Example #60
0
try:
    from pdfrw import PdfReader, PdfWriter
except ImportError:
    print("Instale em seu sistema a biblioteca pdfrw!\n\n")
    print("sudo apt install python3-pdfrw\n")
    quit()

# Limpa o \n do final da linha na lista


def remove_quebra_de_linha(linha):
    return linha.replace('\n', '')


# Vai ser o responsável em escrever o PDFao
writer = PdfWriter()

# Lista contendo arquivos pdf, linha a linha, com o caminho completo do sistema de arquivos
# Deve estar algo como:
# /home/meu_usuario/arquivos_pdf/arquivo1.pdf
# /home/meu_usuario/arquivos_pdf/arquivo2.pdf
pdf_list = open("my_pdfs.txt")

# caminho completo do arquivo de saída. Dessa forma abaixo, gera na pasta do script
pdefao = 'super.pdf'

# Lê linha a linha da lista de pdfs e adiciona ao arquivao
for arquivo in pdf_list:
    arquivo = remove_quebra_de_linha(arquivo)
    writer.addpages(PdfReader(arquivo).pages)