def to_stream(stream): if not hasattr(stream, 'seek'): if file_exists(stream): stream = MMFile(stream, 'rb', delete_on_close=False) else: with mktempfn('-TS.pdf', 'tos-') as fn: open(fn, 'wb').write(stream) stream = MMFile(fn, 'rb', delete_on_close=True) stream.seek(0, 2) if stream.tell() == 0: LOG.warn('zero pdf !? (%r)', repr(stream)[:100]) stream.seek(0, 0) return stream
def get_pages(stream): stream_s = repr(stream)[:100] from pyPdf import PdfFileReader # LOG.debug('stream: %s (%s)', stream_s, dir(stream)) if not hasattr(stream, 'seek'): with mktempfn('-GP.pdf', 'gp-') as fn: with open(fn, 'wb') as fh: fh.write(stream) stream = MMFile(fn, 'rb', delete_on_close=True) stream.seek(0, 2) if stream.tell() == 0: LOG.warn('zero pdf !? (%r)', stream_s) return stream.seek(0, 0) for i, page in enumerate(PdfFileReader(stream).pages): LOG.debug('get_pages(%s) yields %d (%s)', stream_s, i, repr(page)[:100]) yield page LOG.info('%r is %d pages', stream_s, i)