Example #1
0
    def download(self, path, to):
        conn, desc = self.session.data_objects.open(path, O_RDONLY)
        raw = iRODSDataObjectFileRaw(conn, desc)
        br = BufferedRandom(raw)

        try:
            with open(to, 'w') as wf:
                while(True):
                    buf = _readLargeBlock(br)

                    if not buf:
                        break

                    wf.write(buf)
        finally:
            conn.release(True)
            br.close()

        return to
Example #2
0
    def read(self, path, offset, size):
        buf = None
        br = None
        conn = None
        try:
            conn, desc = self.session.data_objects.open(path, O_RDONLY)
            raw = iRODSDataObjectFileRaw(conn, desc)
            br = BufferedRandom(raw)
            new_offset = br.seek(offset)
            
            if new_offset == offset:
                buf = br.read(size)
        finally:
            if br:
                br.close()
            if conn:
                conn.release(True)

        return buf
Example #3
0
def merge_pdf(pages, target_pdf, img_format='png'):

    merged_pdf = PyPDF2.PdfFileWriter()
    log.debug('Merging {} into one PDF document'.format(len(pages)))

    for page in pages:
        # reset binary stream position
        page.seek(0)
        pdf_page_stream = BufferedRandom(BytesIO())

        log.debug('Converting image to PDF')
        with wandImage(file=page, format=img_format) as image:
            img_converted = image.convert('pdf')
            img_converted.save(file=pdf_page_stream)
            img_converted.close()

        log.debug('Adding PDF page to merged document')
        pdf_page_stream.seek(0)
        merged_pdf.addPage(PyPDF2.PdfFileReader(pdf_page_stream).getPage(0))

    merged_pdf.write(open(target_pdf, 'wb'))
Example #4
0
def split_pdf(src_filename, pdf_res,  page_list=None, img_format='png'):

    pdf_source = PyPDF2.PdfFileReader(open(src_filename, 'rb'))

    logging.debug('Source PDF {} contains {} pages'.format(src_filename, pdf_source.getNumPages()))
    pages = []

    if not page_list:
        log.debug('Extracting all pages from PDF')
        # no pages defined, so create a list of all page numbers
        page_list = list(range(pdf_source.getNumPages()))
    else:
        log.debug('Extracting {} page(s) from PDF'.format(len(page_list)))

    for page_nr in page_list:

        log.debug('Processing page {}'.format(page_nr))
        page = pdf_source.getPage(page_nr)

        log.debug('Extracting page from source PDF')
        # extract single page and save it to a temporary stream
        pdf_writer = PyPDF2.PdfFileWriter()
        pdf_writer.addPage(page)
        pdf_page_stream = BufferedRandom(BytesIO())
        pdf_writer.write(pdf_page_stream)
        # reset the binary stream's position to the beginning
        pdf_page_stream.seek(0)

        log.debug('Converting PDF page to image ({})'.format(img_format))
        # Define the resolution when opening the intermediate PDF for better quality converted PNGs
        # http://stackoverflow.com/questions/17314382/improve-quality-of-wand-conversion
        with wandImage(file=pdf_page_stream, resolution=pdf_res) as pdf_page:
            image_page = pdf_page.convert(img_format)
            image_page_stream = BufferedRandom(BytesIO())
            image_page.save(file=image_page_stream)
            pages.append(image_page_stream)

    return pages