Esempio n. 1
0
# USAGE: ./add_on_page.py $in_filepath $out_filepath
# Inspired by https://github.com/pmaupin/pdfrw/blob/master/examples/watermark.py

import sys

from fpdf import FPDF
from pdfrw import PageMerge, PdfReader, PdfWriter

IN_FILEPATH = sys.argv[1]
OUT_FILEPATH = sys.argv[2]
ON_PAGE_INDEX = 1
UNDERNEATH = (
    False  # if True, new content will be placed underneath page (painted first)
)


def new_content():
    fpdf = FPDF()
    fpdf.add_page()
    fpdf.set_font("helvetica", size=36)
    fpdf.text(50, 50, "Hello!")
    reader = PdfReader(fdata=bytes(fpdf.output()))
    return reader.pages[0]


writer = PdfWriter(trailer=PdfReader(IN_FILEPATH))
PageMerge(writer.pagearray[ON_PAGE_INDEX]).add(new_content(),
                                               prepend=UNDERNEATH).render()
writer.write(OUT_FILEPATH)
Esempio n. 2
0
from pdfrw import PdfReader, PdfWriter
import glob

arr = [f for f in glob.glob("*.pdf")]

for i in arr:
    pages = PdfReader(i).pages
    parts = [(1, 2), (2, 3), (3, 4)]
    for part in parts:
        outdata = PdfWriter(f'out/{i}_{part[0]}.pdf')
        for pagenum in range(*part):
            outdata.addpage(pages[pagenum - 1])
        outdata.write()

#https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory

#pageNumbers = pdf_reader.getNumPages()
#for i in range (pageNumbers):
Esempio n. 3
0
from pdfrw import PdfReader, PdfWriter
import os

title = 'Product Sheet'

files = os.listdir('files')
for file in files:
    trailer = PdfReader('files\\' + file)
    trailer.Info.Title = title
    PdfWriter('out\\' + file, trailer=trailer).write()
Esempio n. 4
0
    def write_async(self, outfile, process_semaphore, progress_cb=None):
        pdf_writer = PdfWriter(version="1.5")

        pdf_group = PdfDict()
        pdf_group.indirect = True
        pdf_group.CS = PdfName.DeviceRGB
        pdf_group.I = PdfBool(True)
        pdf_group.S = PdfName.Transparency

        pdf_font_mapping = PdfDict()
        pdf_font_mapping.indirect = True
        pdf_font_mapping.F1 = self._build_font()

        for _ in self._pages:
            pdf_page = PdfDict()
            pdf_page.Type = PdfName.Page
            pdf_writer.addpage(pdf_page)
        # pdfrw makes a internal copy of the pages
        # use the copy so that references to pages in links are correct
        pdf_pages = list(pdf_writer.pagearray)

        srgb_colorspace = PdfDict()
        srgb_colorspace.indirect = True
        srgb_colorspace.N = 3  # Number of components (red, green, blue)
        with open(SRGB_ICC_FILENAME, "rb") as f:
            srgb_colorspace_stream = f.read()
        srgb_colorspace.Filter = [PdfName.FlateDecode]
        srgb_colorspace.stream = zlib.compress(srgb_colorspace_stream,
                                               9).decode("latin-1")
        srgb_colorspace.Length1 = len(srgb_colorspace_stream)
        default_rgb_colorspace = PdfArray([PdfName.ICCBased, srgb_colorspace])
        default_rgb_colorspace.indirect = True

        # Handle all pages in parallel
        @asyncio.coroutine
        def make_page(page, pdf_page, psem):
            # Prepare everything in parallel
            @asyncio.coroutine
            def get_pdf_thumbnail(psem):
                if page.thumbnail is None:
                    return None
                return (yield from page.thumbnail.pdf_thumbnail(psem))

            @asyncio.coroutine
            def get_pdf_background(psem):
                if page.background is None:
                    return None
                return (yield from page.background.pdf_image(psem))

            @asyncio.coroutine
            def get_pdf_mask(foreground, psem):
                if foreground.color is not None:
                    return None
                return (yield from foreground.pdf_mask(psem))

            pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = (
                yield from asyncio.gather(
                    get_pdf_thumbnail(psem), get_pdf_background(psem),
                    asyncio.gather(
                        *[fg.pdf_image(psem) for fg in page.foreground]),
                    asyncio.gather(
                        *[get_pdf_mask(fg, psem) for fg in page.foreground])))
            pdf_page.MediaBox = PdfArray(
                [0, 0, PdfNumber(page.width),
                 PdfNumber(page.height)])
            pdf_page.Group = pdf_group
            pdf_resources = PdfDict()
            pdf_colorspace = PdfDict()
            pdf_colorspace.DefaultRGB = default_rgb_colorspace
            pdf_resources.ColorSpace = pdf_colorspace
            pdf_xobject = PdfDict()
            if pdf_thumbnail is not None:
                pdf_page.Thumb = pdf_thumbnail
            im_index = 0
            # Save graphics state and scale unity rectangle to page size
            matrix = TransformationMatrix()
            matrix.scale(page.width, page.height)
            before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf())
            after_graphics = "\nQ\n"
            contents = ""
            graphics = ""
            current_color = None
            if page.color != self._factory.WHITE:
                if current_color != page.color:
                    current_color = page.color
                    graphics += page.color.to_pdf() + " rg "
                graphics += ("0 0 1 1 re " + "f\n")

            if pdf_background is not None:
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            for foreground, pdf_foreground, pdf_mask in zip(
                    page.foreground, pdf_foregrounds, pdf_masks):
                if pdf_mask is not None:
                    pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask
                    im_index += 1
                pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground
                if (foreground.color is not None
                        and current_color != foreground.color):
                    current_color = foreground.color
                    graphics += foreground.color.to_pdf() + " rg "
                graphics += "/Im%d Do\n" % im_index
                im_index += 1
            if graphics:
                contents += (before_graphics + graphics.rstrip(" \n") +
                             after_graphics)
            current_color = None
            before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n")
            after_text = "\nET\n"
            text = ""
            pdf_annots = []
            for t in page.text:
                if t.text:
                    matrix = TransformationMatrix()
                    # Glyph size is 0.5 x 1
                    matrix.scale(2 / len(t.text), 1)
                    matrix.translate(-0.5, -0.5)
                    if t.direction == "ltr":
                        pass
                    elif t.direction == "rtl":
                        matrix.translate(0, -1)
                    elif t.direction == "ttb":
                        matrix.rotate(90)
                    matrix.rotate(-t.rotation)
                    matrix.translate(0.5, 0.5)
                    matrix.scale(t.width, t.height)
                    matrix.translate(t.x, t.y)
                    text += "%s Tm %s Tj\n" % (
                        matrix.to_pdf(), PdfString().from_bytes(
                            t.text.encode("utf-16-be"), bytes_encoding="hex"))
                if t.external_link is not None or t.internal_link is not None:
                    pdf_annot = PdfDict()
                    pdf_annots.append(pdf_annot)
                    pdf_annot.Type = PdfName.Annot
                    pdf_annot.Subtype = PdfName.Link
                    pdf_annot.Border = [0, 0, 0]
                    pdf_annot.Rect = [
                        PdfNumber(t.x),
                        PdfNumber(t.y),
                        PdfNumber(t.x + t.width),
                        PdfNumber(t.y + t.height)
                    ]
                    if t.external_link is not None:
                        pdf_a = PdfDict()
                        pdf_annot.A = pdf_a
                        pdf_a.Type = PdfName.Action
                        pdf_a.S = PdfName.URI
                        pdf_a.URI = t.external_link.decode("latin-1")
                    if t.internal_link is not None:
                        pdf_target_page = pdf_pages[t.internal_link[0]]
                        target_x, target_y = t.internal_link[1]
                        pdf_annot.Dest = [
                            pdf_target_page, PdfName.XYZ,
                            PdfNumber(target_x),
                            PdfNumber(target_y), 0
                        ]
            text = text.rstrip(" \n")
            if text:
                pdf_resources.Font = pdf_font_mapping
                contents += (before_text + text + after_text)
            contents = contents.rstrip(" \n")
            if contents:
                pdf_contents = PdfDict()
                pdf_contents.indirect = True
                pdf_page.Contents = pdf_contents
                if COMPRESS_PAGE_CONTENTS:
                    pdf_contents.Filter = [PdfName.FlateDecode]
                    pdf_contents.stream = zlib.compress(
                        contents.encode("latin-1"), 9).decode("latin-1")
                else:
                    pdf_contents.stream = contents
            if pdf_annots:
                pdf_page.Annots = pdf_annots
            if pdf_xobject:
                pdf_resources.XObject = pdf_xobject
            if pdf_resources:
                pdf_page.Resources = pdf_resources
            # Report progress
            nonlocal finished_pages
            finished_pages += 1
            if progress_cb:
                progress_cb(finished_pages / len(self._pages))

        finished_pages = 0
        yield from asyncio.gather(*[
            make_page(page, pdf_page, process_semaphore)
            for page, pdf_page in zip(self._pages, pdf_pages)
        ])

        trailer = pdf_writer.trailer

        document_id = PdfString().from_bytes(os.urandom(16))
        trailer.ID = [document_id, document_id]

        mark_info = PdfDict()
        mark_info.Marked = PdfBool(True)
        trailer.Root.MarkInfo = mark_info

        struct_tree_root = PdfDict()
        struct_tree_root.Type = PdfName.StructTreeRoot
        trailer.Root.StructTreeRoot = struct_tree_root

        metadata = PdfDict()
        metadata.indirect = True
        metadata.Type = PdfName.Metadata
        metadata.Subtype = PdfName.XML
        xmp = XMPMeta()
        xmp.set_property(XMP_NS_PDFA_ID, "part", "2")
        xmp.set_property(XMP_NS_PDFA_ID, "conformance", "A")
        metadata_stream = xmp.serialize_to_str().encode("utf-8")
        metadata.Filter = [PdfName.FlateDecode]
        metadata.stream = zlib.compress(metadata_stream, 9).decode("latin-1")
        metadata.Length1 = len(metadata_stream)
        trailer.Root.Metadata = metadata

        with TemporaryDirectory(prefix="djpdf-") as temp_dir:
            pdf_writer.write(path.join(temp_dir, "temp.pdf"))
            cmd = [
                QPDF_CMD, "--stream-data=preserve",
                "--object-streams=preserve", "--normalize-content=n",
                "--newline-before-endstream"
            ]
            if LINEARIZE_PDF:
                cmd.extend(["--linearize"])
            cmd.extend([
                path.abspath(path.join(temp_dir, "temp.pdf")),
                path.abspath(outfile)
            ])
            yield from run_command_async(cmd, process_semaphore)
Esempio n. 5
0
'''
usage:   print_two.py my.pdf

Creates print_two.my.pdf

This is only useful when you can cut down sheets of paper to make two
small documents.  Works for double-sided only right now.
'''

import sys
import os

from pdfrw import PdfReader, PdfWriter, PageMerge


def fixpage(page, count=[0]):
    count[0] += 1
    oddpage = (count[0] & 1)

    result = PageMerge()
    for rotation in (180 + 180 * oddpage, 180 * oddpage):
        result.add(page, rotate=rotation)
    result[1].x = result[0].w
    return result.render()


inpfn, = sys.argv[1:]
outfn = 'print_two.' + os.path.basename(inpfn)
pages = PdfReader(inpfn).pages
PdfWriter(outfn).addpages(fixpage(x) for x in pages).write()
Esempio n. 6
0
import sys
import os

from pdfrw import PdfReader, PdfWriter, PageMerge, IndirectPdfDict


def adjust(page, margin=0, scale=1):
    info = PageMerge().add(page)
    x1, y1, x2, y2 = info.xobj_box
    viewrect = (margin, margin, x2 - x1 - 2 * margin, y2 - y1 - 2 * margin)
    page = PageMerge().add(page, viewrect=viewrect)
    page[0].scale(scale)
    return page.render()


inpfn = 'F:page-number.pdf'
outfn = 'F:poster.' + os.path.basename(inpfn)
reader = PdfReader(inpfn)
writer = PdfWriter(outfn)
writer.addpage(adjust(reader.pages[0]))
writer.trailer.Info = IndirectPdfDict(reader.Info or {})
writer.write()
    def get(self, format: str, path: str):
        """Handle the GET method call."""
        if format != 'pdf':
            self.log.exception('format must be pdf')
            raise web.HTTPError(500, 'format must be pdf')

        self.config.PDFExporter.preprocessors = [thermohw.ExtractAttachmentsPreprocessor]
        self.config.PDFExporter.template_file = os.path.join(thermohw_dir, 'homework.tpl')
        self.config.PDFExporter.filters = {'convert_div': thermohw.convert_div,
                                           'convert_raw_html': thermohw.convert_raw_html}
        self.config.PDFExporter.latex_count = 1

        exporter = PDFExporter(config=self.config, log=self.log)
        exporter.writer.build_directory = '.'

        pdfs = []

        path = path.strip('/').strip()
        paths = path.split('.ipynb')

        for path in paths:
            if not path:
                continue
            path += '.ipynb'
            # If the notebook relates to a real file (default contents manager),
            # give its path to nbconvert.
            ext_resources_dir: Union[str, None]
            basename: str
            os_path: str
            if hasattr(self.contents_manager, '_get_os_path'):
                os_path = self.contents_manager._get_os_path(path)
                ext_resources_dir, basename = os.path.split(os_path)
            else:
                ext_resources_dir = None

            model: Dict[str, str] = self.contents_manager.get(path=path)
            name: str = model['name']
            if model['type'] != 'notebook':
                # not a notebook, redirect to files
                return FilesRedirectHandler.redirect_to_files(self, path)

            nb = model['content']

            self.set_header('Last-Modified', model['last_modified'])

            # create resources dictionary
            mod_date: str = model['last_modified'].strftime(text.date_format)
            nb_title: str = os.path.splitext(name)[0]

            config_dir: str = self.application.settings['config_dir']

            resource_dict: Dict[str, str] = {
                "metadata": {
                    "name": nb_title,
                    "modified_date": mod_date
                },
                "config_dir": config_dir,
            }

            if ext_resources_dir:
                resource_dict['metadata']['path'] = ext_resources_dir

            output: bytes
            try:
                output, _ = exporter.from_notebook_node(
                    nb,
                    resources=resource_dict
                )
            except Exception as e:
                self.log.exception("nbconvert failed: %s", e)
                raise web.HTTPError(500, "nbconvert failed: %s" % e)

            pdfs.append(io.BytesIO(output))

        writer = PdfWriter()
        for pdf in pdfs:
            writer.addpages(PdfReader(pdf).pages)
        bio = io.BytesIO()
        writer.write(bio)
        bio.seek(0)
        output = bio.read()
        bio.close()

        # Force download if requested
        if self.get_argument('download', 'false').lower() == 'true':
            filename = 'final_output.pdf'
            self.set_header('Content-Disposition',
                            'attachment; filename="{}"'.format(filename))

        # MIME type
        if exporter.output_mimetype:
            self.set_header('Content-Type',
                            '{}; charset=utf-8'.format(exporter.output_mimetype))

        self.set_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
        self.finish(output)
Esempio n. 8
0
def go(inpfn, outfn):
    pages = PdfReader(inpfn).pages
    writer = PdfWriter()
    while pages:
        writer.addpage(get4(pages))
    writer.write(outfn)
Esempio n. 9
0
if args.path:
    path = args.path
    if args.verbose:
        print("Searching {} for PDF files.\n".format(path))

# Generate a list of file names (includes the full path)
fileList = []
for filePath in glob(path + "/*.pdf"):
    if args.verbose:
        print("Found {}".format(filePath))
    fileList.append(filePath)

# sort the list in 'natural' order
sortedFiles = natsorted(fileList)

# loop through the list of PDFs, and add them to a new PDF
outFile = PdfWriter()
for pdf in sortedFiles:
    x = PdfReader(pdf)
    if args.verbose:
        print("Adding {} pages from {} to the combined file.".format(
            x.numPages, pdf))
    outFile.addpages(x.pages)

outFile.write(combinedFile)

if args.verbose:
    m = PdfReader(combinedFile)
    print("\nCombined file created at {} with a total of {} pages.".format(
        combinedFile, m.numPages))
Esempio n. 10
0
    def __init__(self, *args):
        if len(args) < 1:
            raise Exception('Need at least a file to slice.')

        dest = args[0].split('[')[0] if len(args) == 1 else args[1]
        PdfWriter().addpages(reversed(get_document_pages(args[0]))).write(dest)
Esempio n. 11
0
# Удалите первые две страницы (титульный лист) из PDF

from pdfrw import PdfReader, PdfWriter

input_file = "example.pdf"
output_file = "example-updated.pdf"

# Определить объекты чтения и записи
reader_input = PdfReader(input_file)
writer_output = PdfWriter()

# Перейти на страницу один за другим
for current_page in range(len(reader_input.pages)):
    if current_page > 1:
        writer_output.addpage(reader_input.pages[current_page])
        print("adding page %i" % (current_page + 1))

# Записать измененный контент на диск
writer_output.write(output_file)
Esempio n. 12
0
---

gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=stamp-test.pdf -dBATCH stamp.pdf prior-post-pro-vel-cropped.pdf
pdfnup stamp-test.pdf --nup 2x1 --landscape --outfile stamp-test-side.pdf

pdfjam --keepinfo --landscape --trim "100mm 0mm 0mm 0mm" --clip true stamp-test-side.pdf -o test-side-cropped.pdf
pdfjam --keepinfo --landscape --trim "0mm 0mm 100mm 0mm" --clip true stamp-test-side.pdf -o stamp-side-cropped.pdf

from pdfrw import PdfReader, PdfWriter, PageMerge
ipdf = PdfReader('test-side-cropped.pdf')
wpdf = PdfReader('stamp-side-cropped.pdf')
PageMerge(ipdf.pages[0]).add(wpdf.pages[0]).render()
PdfWriter().write('newfile.pdf', ipdf)

pdfcrop --margins '-120 5 -140 5' newfile.pdf newfile-cropped.pdf


---





# coding: utf-8
from PyPDF2 import PdfFileWriter, PdfFileReader
output = PdfFileWriter()
ipdf = PdfFileReader(open('prior-post-pro-vel-cropped.pdf', 'rb'))
wpdf = PdfFileReader(open('prior-posterior-stamp.pdf', 'rb'))
watermark = wpdf.getPage(0)
page = ipdf.getPage(0)
page.mergePage(watermark)
Esempio n. 13
0
    def merge_attachment(self):
        filename = 'Print Shipping Labels.pdf'
        picking_obj = self.env['stock.picking']
        picking = picking_obj.browse(self._context.get('active_ids'))
        lst =[]
        writer = PdfWriter()
        for pick in picking:
            pick.shipping_label_print_bool = True
            ship_name = 'Shipping labels' "%s" %pick.name
            if not pick.ship_label_bool :
                attachments = self.env['ir.attachment'].search([('res_id','=',pick.id)])
            else:
                attachments = self.env['ir.attachment'].search([('res_id','=',pick.id),('name','=',ship_name)])
            for att in attachments:
                lst.append(att)
#             writer = PdfFileWriter()
#             inpfn, = sys.argv[1:]
#             outfn = '4up.' + os.path.basename(inpfn)
#             pages = PdfReader(inpfn).pages
         
        def get4(srcpages):
            if not pick.ship_label_bool:
                scale = 0.35
                srcpages = PageMerge() + srcpages
                x_increment, y_increment = (scale * i for i in srcpages.xobj_box[2:])
                for i, page in enumerate(srcpages):
                    page.scale(scale)
                    page.x = x_increment if i & 1 else 0
                    page.y = 0 if i & 2 else y_increment
                return srcpages.render()  
            if pick.ship_label_bool:
                scale = 0.88
                srcpages = PageMerge() + srcpages
                x_increment, y_increment = (scale * i for i in srcpages.xobj_box[2:])
                for i, page in enumerate(srcpages):
                    page.scale(scale)
                     
    #                 page.x = x_increment if i & 1 else 0
    #                 page.y = 0 if i & 2 else y_increment
    #                 print "parrrrrrrrrrrrrrrrrrrrrr",page.x,page.y
                return srcpages.render() 
                
        def get4_fedex(srcpages):
            scale = 0.88
            srcpages = PageMerge() + srcpages
            x_increment, y_increment = (scale * i for i in srcpages.xobj_box[2:])
            for i, page in enumerate(srcpages):
                page.scale(scale)
                 
#                 page.x = x_increment if i & 1 else 0
#                 page.y = 0 if i & 2 else y_increment
#                 print "parrrrrrrrrrrrrrrrrrrrrr",page.x,page.y
            return srcpages.render()  
        for pdf in lst:
            
            pages = PdfReader(BytesIO(base64.decodestring(pdf.datas))).pages
            pick1 = picking_obj.browse(pdf.res_id)
         
            
            for index in range(0, len(pages), 1):
                if pick1.carrier_id.delivery_type =='ups':    
                    writer.addpage(get4(pages[index:index + 1]))
                if pick1.carrier_id.delivery_type =='fedex':    
                    writer.addpage(get4_fedex(pages[index:index + 1]))
                    
            # Return merged PDF
        s = BytesIO()
        writer.write(s)
        reader = PdfFileReader(s)
        writer = PdfFileWriter()
        for page in range(0, reader.getNumPages()):
            p=reader.getPage(page)
            if pick1.carrier_id.delivery_type =='fedex' and not pick1.ship_label_bool :
                p.mediaBox.lowerRight = (900, 145)
                p.mediaBox.lowerLeft = (-600, 390)
                p.mediaBox.upperLeft = (99, 500)
                p.mediaBox.upperRight = (530, 680)
            if pick1.carrier_id.delivery_type =='ups' and not pick1.ship_label_bool:
                p.mediaBox.lowerRight = (450, 145)
                p.mediaBox.upperRight = (425, 600) 
                p.mediaBox.lowerLeft = (-150, 275)
                p.mediaBox.upperLeft = (-5, 565)
            writer.addPage(p)
        s = BytesIO()
        writer.write(s)
        out = base64.b64encode(s.getvalue())
        view_report_status_id=self.env['view.report'].create({'file_name':out,'datas_fname':filename})
        return {
        'res_id'   :view_report_status_id.id,
        'name'     :'Print Shipping Labels',
        'view_type':'form',
        'view_mode':'form',
        'res_model':'view.report',
        'view_id'  : False ,
        'type'     :'ir.actions.act_window',
            }
Esempio n. 14
0
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata,
                                     nodate=True,
                                     with_pdfrw=with_pdfrw)
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(PdfReaderIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()),
                             [PdfName.Info, PdfName.Root, PdfName.Size])
            self.assertIn(x.Root.Pages.Count, ('1', '2'))
            if len(x.Root.Pages.Kids) == '1':
                self.assertEqual(x.Size, '7')
                self.assertEqual(len(x.Root.Pages.Kids), 1)
            elif len(x.Root.Pages.Kids) == '2':
                self.assertEqual(x.Size, '10')
                self.assertEqual(len(x.Root.Pages.Kids), 2)
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()),
                             [PdfName.Pages, PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            orig_img = Image.open(f)
            for pagenum in range(len(x.Root.Pages.Kids)):
                # retrieve the original image frame that this page was
                # generated from
                orig_img.seek(pagenum)
                cur_page = x.Root.Pages.Kids[pagenum]

                ndpi = orig_img.info.get("dpi", (96.0, 96.0))
                # In python3, the returned dpi value for some tiff images will
                # not be an integer but a float. To make the behaviour of
                # img2pdf the same between python2 and python3, we convert that
                # float into an integer by rounding.
                # Search online for the 72.009 dpi problem for more info.
                ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
                imgwidthpx, imgheightpx = orig_img.size
                pagewidth = 72.0 * imgwidthpx / ndpi[0]
                pageheight = 72.0 * imgheightpx / ndpi[1]

                def format_float(f):
                    if int(f) == f:
                        return str(int(f))
                    else:
                        return ("%.4f" % f).rstrip("0")

                self.assertEqual(sorted(cur_page.keys()), [
                    PdfName.Contents, PdfName.MediaBox, PdfName.Parent,
                    PdfName.Resources, PdfName.Type
                ])
                self.assertEqual(cur_page.MediaBox, [
                    '0', '0',
                    format_float(pagewidth),
                    format_float(pageheight)
                ])
                self.assertEqual(cur_page.Parent, x.Root.Pages)
                self.assertEqual(cur_page.Type, PdfName.Page)
                self.assertEqual(cur_page.Resources.keys(), [PdfName.XObject])
                self.assertEqual(cur_page.Resources.XObject.keys(),
                                 [PdfName.Im0])
                self.assertEqual(cur_page.Contents.keys(), [PdfName.Length])
                self.assertEqual(cur_page.Contents.Length,
                                 str(len(cur_page.Contents.stream)))
                self.assertEqual(
                    cur_page.Contents.stream,
                    "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
                    "/Im0 Do\nQ" % (pagewidth, pageheight))

                imgprops = cur_page.Resources.XObject.Im0

                # test if the filter is valid:
                self.assertIn(
                    imgprops.Filter,
                    [[PdfName.DCTDecode], [PdfName.JPXDecode],
                     [PdfName.FlateDecode], [PdfName.CCITTFaxDecode]])
                # test if the colorspace is valid
                self.assertIn(imgprops.ColorSpace, [
                    PdfName.DeviceGray, PdfName.DeviceRGB, PdfName.DeviceCMYK
                ])

                # test if the image has correct size
                self.assertEqual(imgprops.Width, str(orig_img.size[0]))
                self.assertEqual(imgprops.Height, str(orig_img.size[1]))
                # if the input file is a jpeg then it should've been copied
                # verbatim into the PDF
                if imgprops.Filter in [[PdfName.DCTDecode],
                                       [PdfName.JPXDecode]]:
                    self.assertEqual(cur_page.Resources.XObject.Im0.stream,
                                     convert_load(orig_imgdata))
                elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
                    tiff_header = tiff_header_for_ccitt(
                        int(imgprops.Width), int(imgprops.Height),
                        int(imgprops.Length), 4)
                    imgio = BytesIO()
                    imgio.write(tiff_header)
                    imgio.write(
                        convert_store(cur_page.Resources.XObject.Im0.stream))
                    imgio.seek(0)
                    im = Image.open(imgio)
                    self.assertEqual(im.tobytes(), orig_img.tobytes())
                    try:
                        im.close()
                    except AttributeError:
                        pass

                elif imgprops.Filter == [PdfName.FlateDecode]:
                    # otherwise, the data is flate encoded and has to be equal
                    # to the pixel data of the input image
                    imgdata = zlib.decompress(
                        convert_store(cur_page.Resources.XObject.Im0.stream))
                    colorspace = imgprops.ColorSpace
                    if colorspace == PdfName.DeviceGray:
                        colorspace = 'L'
                    elif colorspace == PdfName.DeviceRGB:
                        colorspace = 'RGB'
                    elif colorspace == PdfName.DeviceCMYK:
                        colorspace = 'CMYK'
                    else:
                        raise Exception("invalid colorspace")
                    im = Image.frombytes(
                        colorspace,
                        (int(imgprops.Width), int(imgprops.Height)), imgdata)
                    if orig_img.mode == '1':
                        self.assertEqual(im.tobytes(),
                                         orig_img.convert("L").tobytes())
                    elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                        self.assertEqual(im.tobytes(),
                                         orig_img.convert("RGB").tobytes())
                    # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not
                    # have the close() method
                    try:
                        im.close()
                    except AttributeError:
                        pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass
Esempio n. 15
0
 def save_to_file(pdf_obj, file_path):
     short_path_for_logging = '/'.join(file_path.split('/')[-3:])
     logger.debug("Saving to file: " + short_path_for_logging)
     y = PdfWriter()
     y.write(file_path, pdf_obj)
Esempio n. 16
0
'''

import sys
import os

# import find_pdfrw
from pdfrw import PdfReader, PdfWriter

inpfn = sys.argv[1]
rotate = sys.argv[2]
outfn = sys.argv[3]

rotate = int(rotate)
assert rotate % 90 == 0

# ranges = [[int(y) for y in x.split('-')] for x in ranges]
trailer = PdfReader(inpfn)
pages = trailer.pages

ranges = [[1, len(pages)]]

for onerange in ranges:
    onerange = (onerange + onerange[-1:])[:2]
    for pagenum in range(onerange[0] - 1, onerange[1]):
        pages[pagenum].Rotate = (int(pages[pagenum].inheritable.Rotate or 0) +
                                 rotate) % 360

outdata = PdfWriter()
outdata.trailer = trailer
outdata.write(outfn)
Esempio n. 17
0
    if not isinstance(initial, list):
        initial = [initial]
    files = []
    queue = initial[:]
    while bool(queue):
        current = queue.pop(0)
        if isfile(current) and splitext(current)[1] in ext:
            files.append(current)
        elif isdir(current):
            sub = [join(current,x) for x in listdir(current)]
            queue += sub

    logging.info("Found {} {} files".format(len(files), ext))
    return files

pdfs = get_data_files(args.directory, '.pdf')

logging.info("Chopping pdfs")
for pdf in pdfs:
    logging.info("Reading: {}".format(pdf))
    data = PdfReader(pdf)
    edited = PdfWriter()

    for x in range(1, len(data.pages)):
        edited.addpage(data.pages[x])

    out_name = join(args.out, split(pdf)[1])
    logging.info("Writing to: {}".format(out_name))
    edited.write(out_name)
    logging.info("-----------")
Esempio n. 18
0
def popups_write_pdf(file):
    from pdfrw import PdfWriter
    w = PdfWriter(version='1.5', compress=pdf_popup_config['compress'])
    w.trailer = popup_pdf
    w.write(file)
Esempio n. 19
0
    result[-1].x += result[0].w
    return result.render()


parser = argparse.ArgumentParser()
parser.add_argument("input", help="Input pdf file name")
parser.add_argument("-p", "--padding", action = "store_true",
                    help="Padding the document so that all pages use the same type of sheet")
args = parser.parse_args()

inpfn = args.input
outfn = 'booklet.' + os.path.basename(inpfn)
ipages = PdfReader(inpfn).pages

if args.padding:
    pad_to = 4
else:
    pad_to = 2

# Make sure we have a correct number of sides
ipages += [None]*(-len(ipages)%pad_to)

opages = []
while len(ipages) > 2:
    opages.append(fixpage(ipages.pop(), ipages.pop(0)))
    opages.append(fixpage(ipages.pop(0), ipages.pop()))

opages += ipages

PdfWriter(outfn).addpages(opages).write()
Esempio n. 20
0
def copyrightParse(sourceKey, bucketName, context):
    # BOTO3 objects
    s3 = boto3.resource('s3')
    s3client = boto3.client('s3')
    object = s3.Object(bucketName, sourceKey)

    # Copyright Data
    metadata = object.metadata
    if "copyright" in metadata:
        return 'Copyright already exists - aborting'
    dateTimeObj = datetime.now()
    timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S.%f)")
    metadata['copyright'] = timestampStr

    # Get prelim data from object
    with io.BytesIO(object.get()['Body'].read()) as pdf_content_sample:
        existing_pdf = PdfReader(pdf_content_sample)
        # Get Dimensions of document to make corresponding sized watermark
        mbox = existing_pdf.pages[0].MediaBox
        mediabox = tuple(float(x) for x in mbox)

        ### ReportLab implementation
        # Get Source PDF to watermark - Load single page to generate watermark to the right size
        # Create memory position for Watermark PDF
        with io.BytesIO() as packet:
            print('Loading PDF file - Watermark generation')
            height = 40
            width = mediabox[2]

            # create a new PDF with Reportlab
            can = canvas.Canvas(packet)
            can.setPageSize((width, height))

            # Get Copyright content
            copyrightContent = getCopyrightContent()

            # Stylesheet additions
            stylesheet = getSampleStyleSheet()
            style_watermark = stylesheet["Normal"]
            style_watermark.alignment = TA_CENTER
            style_watermark.textColor = colors.Color(0, 0, 0, alpha=0.5)
            style_watermark.fontSize = 8
            style_watermark.font = 'Helvetica'
            # Creating Paragraph
            copyright_paragraph = Paragraph(copyrightContent, style_watermark)
            # Creating Table to wrap Paragraph
            data = [[copyright_paragraph]]
            table = Table(data)
            table.setStyle(
                TableStyle([
                    ('BACKGROUND', (0, 0), (-1, -1),
                     colors.Color(255, 255, 255, alpha=0.5)),
                ]))
            # Adding Table to Canvas
            # Make sure the width is an integer!
            print(f'Table width set to {math.floor(width)}')
            table.wrapOn(can, math.floor(width), 15)
            table.drawOn(can, 0, 0)
            # Saving
            can.save()
            # Move to start of memory pointer
            packet.seek(0)

            watermark_input = PdfReader(packet)
            watermark = watermark_input.pages[0]
            # Iterate through pages, updating source file.
            for current_page in range(len(existing_pdf.pages)):
                merger = PageMerge(existing_pdf.pages[current_page])
                merger.add(watermark).render()
            # write the modified content to disk
            writer_output = PdfWriter()
            outputStream = io.BytesIO()
            with outputStream as pdfOutput:
                writer_output.write(pdfOutput, existing_pdf)
                print('File written to PDFWriter')
                pdfOutput.seek(0)
                s3client.upload_fileobj(pdfOutput,
                                        bucketName,
                                        sourceKey,
                                        ExtraArgs={"Metadata": metadata})
            status = f'Copyright Set: {timestampStr}'
    return status
Esempio n. 21
0
def render(source, *, progress_cb=lambda x: None):
    # Exports the self as a PDF document to disk

    # progress_cb will be called with a progress percentage between 0 and
    # 100.  This percentage calculation is split 50% for the rendering
    # of the lines and 50% merging with the base PDF file.  This callback
    # also provides an opportunity to abort the process. If the callback
    # raises an error, this function will take steps to abort gracefullly
    # and pass the error upwards.

    vector = True  # TODO: Different rendering styles
    source = sources.get_source(source)

    # If this is using a base PDF, the percentage is calculated
    # differently.
    uses_base_pdf = source.exists('{ID}.pdf')

    # Document metadata should already be loaded (from device)
    # ...

    # Generate page information
    # If a PDF file was uploaded, but never opened, there may not be
    # a .content file. So, just load a barebones one with a 'pages'
    # key of zero length, so it doesn't break the rest of the
    # process.
    pages = []
    if source.exists('{ID}.content'):
        with source.open('{ID}.content', 'r') as f:
            pages = json.load(f).get('pages', [])

    # Render each page as a pdf
    tmpfh = tempfile.TemporaryFile()
    pdf_canvas = canvas.Canvas(tmpfh, (PDFWIDTH, PDFHEIGHT))
    # TODO: check pageCompression

    # Don't load all the pages into memory, because large notebooks
    # about 500 pages could use up to 3 GB of RAM. Create them by
    # iteration so they get released by garbage collector.
    changed_pages = []
    annotations = []
    for i in range(0, len(pages)):
        page = document.DocumentPage(source, pages[i], i)
        if source.exists(page.rmpath):
            changed_pages.append(i)
        page.render_to_painter(pdf_canvas, vector)
        annotations.append(page.get_grouped_annotations())
        progress_cb((i + 1) / len(pages) * 50)
    pdf_canvas.save()
    tmpfh.seek(0)

    # This new PDF represents just the notebook. If there was a
    # parent PDF, merge it now.
    if uses_base_pdf and not changed_pages:
        # Since there is no stroke data, just return the PDF data
        progress_cb(100)

        log.info('exported pdf')
        return source.open('{ID}.pdf', 'rb')

    # PDF exists, stroke data exists, so mix them together.
    if uses_base_pdf:
        rmpdfr = PdfReader(tmpfh)
        basepdfr = PdfReader(source.open('{ID}.pdf', 'rb'))
    else:
        basepdfr = PdfReader(tmpfh)
        # Alias, which is used for annotations and layers.
        rmpdfr = basepdfr

    # If making a 'layered' PDF (with optional content groups,
    # OCGs), associate the annoatations with the layer.

    # This property list is put into the rmpdfr document, which
    # will not have any existing properties.
    ocgprop = IndirectPdfDict(OCGs=PdfArray(), D=PdfDict(Order=PdfArray()))

    for i in range(0, len(basepdfr.pages)):
        basepage = basepdfr.pages[i]
        rmpage = rmpdfr.pages[i]

        # Apply OCGs
        apply_ocg = False  #TODO configurable? bool(int(QSettings().value(
        #'pane/notebooks/export_pdf_ocg')))
        if apply_ocg:
            ocgorderinner = do_apply_ocg(basepage, rmpage, i, uses_base_pdf,
                                         ocgprop, annotations)
        else:
            ocgorderinner = None

        # Apply annotations to the rmpage. This must come after
        # applying OCGs, because the annotation may belong to
        # one of those groups.
        apply_annotations(rmpage, annotations[i], ocgorderinner)

        # If this is a normal notebook with highlighting,
        # just add the annotations and forget about the rest,
        # which are page geometry transformations.
        if uses_base_pdf:
            merge_pages(basepage, rmpage, i in changed_pages)

        progress_cb(((i + 1) / rmpdfr.numPages * 50) + 50)

    # Apply the OCG order. The basepdf may have already had OCGs
    # and so we must not overwrite them. NOTE: there are other
    # properties that ought to be carried over, but this is the
    # minimum required.
    if apply_ocg:
        if '/OCProperties' in basepdfr.Root:
            basepdfr.Root.OCProperties.OCGs += ocgprop.OCGs
            basepdfr.Root.OCProperties.D.Order += ocgprop.D.Order
        else:
            basepdfr.Root.OCProperties = ocgprop

    pdfw = PdfWriter()
    stream = tempfile.SpooledTemporaryFile(SPOOL_MAX)
    pdfw.write(stream, basepdfr)
    stream.seek(0)

    log.info('exported pdf')
    return stream
Esempio n. 22
0
def debug(event, context):

    # Get Source PDF to watermark
    filename = "sample.pdf"
    existing_pdf = PdfReader(open(filename, "rb"))

    # Get Dimensions of document to make corresponding sized watermark
    mbox = existing_pdf.pages[0].MediaBox
    mediabox = tuple(float(x) for x in mbox)

    with io.BytesIO() as packet:
        height = 40
        width = mediabox[2]
        # create a new PDF with Reportlab
        can = canvas.Canvas(packet)
        can.setPageSize((width, height))

        # Get Copyright content
        copyrightContent = getCopyrightContent()

        # Stylesheet additions
        stylesheet = getSampleStyleSheet()
        style_watermark = stylesheet["Normal"]
        style_watermark.alignment = TA_CENTER
        style_watermark.textColor = colors.Color(0, 0, 0, alpha=0.5)
        style_watermark.fontSize = 8
        style_watermark.font = 'Helvetica'
        # Creating Paragraph
        copyright_paragraph = Paragraph(copyrightContent, style_watermark)
        # Creating Table to wrap Paragraph
        data = [[copyright_paragraph]]
        table = Table(data)
        table.setStyle(
            TableStyle([
                ('BACKGROUND', (0, 0), (-1, -1),
                 colors.Color(255, 255, 255, alpha=0.5)),
            ]))
        # Adding Table to Canvas
        table.wrapOn(can, math.floor(width), 15)
        table.drawOn(can, 0, 0)
        # Saving
        can.save()
        # Move to start of memory pointer
        packet.seek(0)

        # Setting up PDF as a PDFFileReader object
        watermark_input = PdfReader(packet)
        watermark = watermark_input.pages[0]
        # Iterate through pages, updating source file.
        for current_page in range(len(existing_pdf.pages)):
            print(f'page {current_page}')
            merger = PageMerge(existing_pdf.pages[current_page])
            merger.add(watermark).render()

        # write the modified content to disk
        writer_output = PdfWriter()
        outputStream = open(f"processed_{filename}", "wb")

        with outputStream as pdfOutput:
            writer_output.write(pdfOutput, existing_pdf)

        print('Processed PDF - copyright added')
Esempio n. 23
0
def fingerprinter_upload(request):
    processed_files = []

    pdf_file = request.FILES.get('pdf-file')
    copy_count = request.POST.get('copy-count', 1)
    suffix = request.POST.get('file-suffix', '')

    try:
        copy_count = int(copy_count)
    except:
        copy_count = 1

    if pdf_file is not None:
        #make save directory
        rand_path = randomword(9)
        fingerprint_dir = os.path.join(settings.BASE_DIR, settings.STATIC_ROOT,
                                       'fingerprints', rand_path)

        os.makedirs(fingerprint_dir)

        s = os.path.splitext(pdf_file.name)
        filename = s[0].replace("'", '').replace('"', '')

        #handle non ascii chars in file name
        #(strangly only wsgi seems to choke on those)
        if isinstance(filename, unicode):
            try:
                filename = unidecode(filename)
            except:
                filename = re.sub(r'[^\x00-\x7F]+', '.', filename)

        extension = s[1]

        file_content = pdf_file.read()

        content = PdfReader(io.BytesIO(file_content))

        if content.ID is None:
            file_id = 'No ID'
        else:
            file_id = str(content.ID[0]).replace('<', '').replace('>', '')\
                    .replace('(', '').replace(')', '')

        #bad file_ids can contain strange characters
        #TODO When we upgrade
        try:
            file_id.encode('utf-8').strip()
        except UnicodeDecodeError:
            file_id = 'Unreadable'

        file_info = {
            'filename': pdf_file.name,
            'size': pdf_file.size,
            'id': file_id,
            'directory_name': rand_path
        }

        for copy_index in range(copy_count):
            if suffix and suffix != '':
                save_filename = filename + '-' + suffix + '-' + str(
                    copy_index + 1) + extension
            else:
                save_filename = filename + '-' + str(copy_index +
                                                     1) + extension

            file_path = os.path.join(fingerprint_dir, save_filename)

            static_link = os.path.join('/pdf', save_filename)
            download_link = os.path.join('/static/drop-pdf', save_filename)

            content = PdfReader(io.BytesIO(file_content))

            #add some random meta data
            content.Info.randomMetaData = binascii.b2a_hex(
                os.urandom(20)).upper()

            #change id to random id
            md = hashlib.md5(filename)
            md.update(str(time.time()))
            md.update(os.urandom(10))

            new_id = md.hexdigest().upper()

            #keep length 32
            new_id = new_id[0:32]

            while len(new_id) < 32:
                new_id += random.choice('0123456789ABCDEF')

            content.ID = [new_id, new_id]

            PdfWriter(file_path, trailer=content).write()

            #copy file into online annotator with unique name
            annotation_name = filename + '-' + suffix + '-' \
                    + str(copy_index + 1) + '-' + rand_path + extension

            annotation_path = os.path.join(settings.BASE_DIR,
                                           settings.STATIC_ROOT, 'drop-pdf',
                                           annotation_name)

            shutil.copy(file_path, annotation_path)

            #For some reason nested directories do not provide files from static.
            #We need to clean up double "settings" file and sanify the basic setup but
            #For now serve the file from a dedicated URL.

            copy_info = {
                'filename': save_filename,
                'download_path': os.path.join(rand_path, save_filename),
                'docdrop_link': annotation_name,
                'id': content.ID[0]
            }

            processed_files.append(copy_info)

    else:
        raise Http404('file not provided')

    data = {
        'processed_files': processed_files,
        'file_info': file_info,
        'archive_name': filename
    }

    return render_to_response('refingerprint_results.html', data)
Esempio n. 24
0
from pdfrw import PdfReader, PdfWriter
pages = PdfReader('Official Ielts Practice Materials 2.pdf').pages
parts = [(15, 28)]
for part in parts:
    outdata = PdfWriter(f'pages_{part[0]}_{part[1]}.pdf')
    for pagenum in range(*part):
        outdata.addpage(pages[pagenum - 1])
    outdata.write()
Esempio n. 25
0
# -*- coding: utf-8 -*-
import os, sys, datetime
from pdfrw import PdfReader, PdfWriter

writer = PdfWriter()
now = datetime.datetime.now()
data_path = os.getcwd() + "/data/"
dir_path = data_path + str(now.year) + '_' + sys.argv[1] + "week"

if not os.path.exists(dir_path + "/result"):
    os.mkdir(dir_path + "/result")

files = [x for x in os.listdir(dir_path) if x.endswith('.pdf')]
for fname in sorted(files, key = lambda x: int(x.split(".")[0])):
    print ("[" + fname + "] Merged")
    writer.addpages(PdfReader(os.path.join(dir_path, fname)).pages)

writer.write(dir_path + "/result/"+ str(now.year) + "_" + sys.argv[1] + "_merge.pdf")
print("\nENDED MERGE REPORT!")
Esempio n. 26
0
    # Multiple copies of first page used as a placeholder to
    # get blank page on back.
    for p1, p2 in zip(pages, pages[1:]):
        if p1[1] is p2[1]:
            pages.remove(p1)

    return IndirectPdfDict(
        Type=PdfName.Page,
        Contents=PdfDict(stream=''.join(page.stream for page in pages)),
        MediaBox=PdfArray([0, 0, x, y]),
        Resources=PdfDict(XObject=PdfDict(pages), ),
    )


inpfn, = sys.argv[1:]
outfn = 'booklet.' + os.path.basename(inpfn)
pages = PdfReader(inpfn, decompress=False).pages

# Use page1 as a marker to print a blank at the end
if len(pages) & 1:
    pages.append(pages[0])

bigpages = []
while len(pages) > 2:
    bigpages.append(fixpage(pages.pop(), pages.pop(0)))
    bigpages.append(fixpage(pages.pop(0), pages.pop()))

bigpages += pages

PdfWriter().addpages(bigpages).write(outfn)
Esempio n. 27
0
    def do_test(self, params, prev_results=[''], scrub=False):
        params = params.split()
        hashkey = 'examples/%s' % '_'.join(params)
        params = [lookup.get(x, x) for x in params]
        progname = params[0]
        params[0] = prog_dir % progname
        srcf = params[1]
        params.insert(0, sys.executable)
        subdir, progname = os.path.split(progname)
        subdir = os.path.join(dstdir, subdir)
        if not os.path.exists(subdir):
            os.makedirs(subdir)
        os.chdir(subdir)
        dstf = '%s.%s' % (progname, os.path.basename(srcf))
        scrub = scrub and dstf
        dstf = dstf if not scrub else 'final.%s' % dstf
        hash = '------no-file-generated---------'
        expects = expected.results[hashkey]

        # If the test has been deliberately skipped,
        # we are done.  Otherwise, execute it even
        # if we don't know about it yet, so we have
        # results to compare.

        result = 'fail'
        size = 0
        try:
            if 'skip' in expects:
                result = 'skip requested'
                return self.skipTest(result)
            elif 'xfail' in expects:
                result = 'xfail requested'
                return self.fail(result)

            exists = os.path.exists(dstf)
            if expects or not exists:
                if exists:
                    os.remove(dstf)
                if scrub and os.path.exists(scrub):
                    os.remove(scrub)
                subprocess.call(params)
                if scrub:
                    PdfWriter().addpages(PdfReader(scrub).pages).write(dstf)
            with open(dstf, 'rb') as f:
                data = f.read()
            size = len(data)
            if data:
                hash = hashlib.md5(data).hexdigest()
                lookup[hash] = dstf
                prev_results[0] = hash
            else:
                os.remove(dstf)
            if expects:
                if len(expects) == 1:
                    expects, = expects
                    self.assertEqual(hash, expects)
                else:
                    self.assertIn(hash, expects)
                result = 'pass'
            else:
                result = 'skip'
                self.skipTest('No hash available')
        finally:
            result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash)
            with open(hashfile, 'ab') as f:
                f.write(convert_store(result))
Esempio n. 28
0
from pdfrw import PdfReader, PdfWriter

sections = [
    'Introduction', '1_Experimental_datasets',
    '2_Structured_data_from_literature', '3_Analysis_tools',
    '4_Simulation_environments', '5_Model_sharing',
    '6_Computing_infrastructure', '7_Open_source_initiatives', '8_Web_portals'
]
#sections = ['Introduction','1_Experimental_datasets', '2_Structured_data_from_literature']

for section in sections:

    print("++++++++++++++++++++++++++++++++++\n+  Adding section: %s\n+" %
          section)
    big_file = PdfWriter()

    files = os.listdir(section)

    files = sorted(files)

    for f in files:
        fpath = section + '/' + f
        if os.path.isfile(fpath) and fpath.endswith(
                'pptx') and not f == 'Template.pptx':
            print("+   Incorporating: %s" % fpath)
            call([
                "libreoffice", "--headless", "--invisible", "--convert-to",
                "pdf", fpath
            ])
            pdf_file_name = f.replace('pptx', 'pdf')
Esempio n. 29
0
    # You probably should just wrap each JS action with a try/catch,
    # because Chrome does no error reporting or even logging otherwise;
    # you just get a silent failure.
    page.AA.O = make_js_action("""
try {
  %s
} catch (e) {
  app.alert(e.message);
}
    """ % (script))

    page.Annots = PdfArray(annots)
    return page


if len(sys.argv) > 1:
    js_file = open(sys.argv[1], 'r')

    fields = []
    for line in js_file:
        if not line.startswith('/// '): break
        pieces = line.split()
        params = [pieces[1]] + [float(token) for token in pieces[2:]]
        fields.append(make_field(*params))

    js_file.seek(0)

    out = PdfWriter()
    out.addpage(make_page(fields, js_file.read()))
    out.write('result.pdf')
Esempio n. 30
0
parser.add_argument('--evenrev',
                    dest='evenrev',
                    action='store_const',
                    const=True,
                    default=False,
                    help='reverses the even pages before shuffling')

args = parser.parse_args()

# The shuffling magic
even = PdfReader(args.evenFile[0])
odd = PdfReader(args.oddFile[0])
isEvenReversed = args.evenrev
isOddReversed = args.oddrev
all = PdfWriter()
blank = PageMerge()
blank.mbox = [0, 0, 612, 792]  # 8.5 x 11
blank = blank.render()

if isEvenReversed and not isOddReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[i])
        all.addpage(even.pages[len(even.pages) - 1 - i])
elif isOddReversed and not isEvenReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[len(odd.pages) - 1 - i])
        all.addpage(even.pages[i])
elif isEvenReversed and isOddReversed:
    for i in range(0, len(odd.pages)):
        all.addpage(odd.pages[len(odd.pages) - 1 - i])