def pdftk_merge(out_fn, inputs): cmd = ([PDFTK] + [to_filename(inp, opened_ok=False) for inp in inputs] + ['cat', 'output', out_fn]) LOG.info('pdftk_merge: %r', cmd) subprocess.check_call(cmd) unlink(cmd[1]) return out_fn
def imagick_exec(todo, args, inp=None, executable=None): if not executable: executable = EXECUTABLE LOG.debug('EXE=%r cmd_end=%r', executable, args) if os.path.basename(executable) in ('gm', 'gm.exe'): cmd = [executable, todo] else: executable = replace_bbase(executable, todo) cmd = [executable] try: inpfn, outfn = None, None data = None kwds = {} if USE_TEMPFILES and (inp or args[1].endswith('-')): if inp: # stdin inp = MMStringIO(inp).getvalue() ext = '.' + args[0].split(':', 1)[0] if ':' in args[0] else '' inpfd, inpfn = tempfile.mkstemp('-IM' + ext) os.write(inpfd, inp) os.close(inpfd) args[0] = inpfn assert os.path.exists(inpfn), inpfn assert os.path.getsize(inpfn) == len(inp), \ 'inp=%d <> %s=%d' % (len(inp), inpfn, os.path.getsize(inpfn)) if args[1].endswith('-'): # stdout ext = '.' + args[1].split(':', 1)[0] if ':' in args[1] else '' outfd, outfn = tempfile.mkstemp('-IM' + ext) os.close(outfd) args[1] = outfn assert os.path.exists(outfn), outfn else: data = MMStringIO(inp).getvalue() if inp else '' if args[0].endswith('-'): assert data, args LOG.debug('data=%r...%r', data[:100], data[-100:]) kwds.update({'stdin': subprocess.PIPE, 'stdout': subprocess.PIPE}) LOG.debug('cmd=%r args=%r kwds=%r => %s', cmd, args, kwds, ' '.join(cmd + args)) p = subprocess.Popen(cmd + args) out, err = p.communicate(data) if inpfn: unlink(inpfn) assert not err and p.returncode in (None, 0), (p.returncode, err) res = (MMFile(outfn, 'rb', delete_on_close=True) if outfn else StringIO(out)) LOG.debug("imagick_exec(%s, %r): %r", todo, args, res) if outfn: unlink(outfn) return res except OSError: if not executable or EXECUTABLE == executable: executable2 = replace_bbase(executable, todo) if executable2 == executable: raise return convert(inp, args[1].split(':', 1)[0], executable=executable2) else: raise
def pdftk_split(stream): pref = 'page_' with mktempdir('pdf', delete_asap=True) as out_dir: cmd = [PDFTK, to_filename(stream), 'burst', 'output', out_dir + '/' + pref + '%03d.pdf'] LOG.debug('pdftk_split: %r', cmd) subprocess.check_call(cmd) files = sorted(os.listdir(out_dir)) LOG.debug('out_dir=%s: %s', out_dir, files) for fn in files: if fn.startswith(pref) and fn.endswith('.pdf'): yield MMFile(os.path.join(out_dir, fn), 'rb', delete_on_close=True) unlink(cmd[1])
def _request_files(files=None): if files: for elt in files: yield elt return fn = None if SAVE_REQ_DIR: with _SRD_lock: fns = os.listdir(SAVE_REQ_DIR) if len(fns) > SAVE_REQ_DIR_THRESHOLD: for x in sorted(fns)[:SAVE_REQ_DIR_THRESHOLD // 2]: unlink(os.path.join(SAVE_REQ_DIR, x)) fn = os.path.join(SAVE_REQ_DIR, str(int(time.time() * 1000)) + '--' + ('%06d' % REQ_COUNTER) + urllib.quote_plus(request.url, safe='')) import gzip files = ((stor.stream, stor.mimetype, stor.name) for _, stor in request.files.iteritems(multi=True)) try: first = files.next() files = itertools.chain([first], files) except StopIteration: # nothing there LOG.warn('empty "files"') if request.data: import re rgx = re.compile('''filename=['"](.*?)["']''') files = [(StringIO(request.data), request.headers['Content-Type'], rgx.search(request.headers.get('Content-Disposition', 'filename=""')).group(1))] LOG.info('files=%r', files) i = 0 for stream, mimetype, name in files: if len(stream.read(1)) == 1: # skip empty files stream.seek(0) if fn: with gzip.open(fn + ('--file%02d' % i) + '.dmp.gz', 'wb') as fh: while 1: chunk = stream.read(65536) if not chunk: break fh.write(chunk) stream.seek(0) i += 1 yield (stream, mimetype, basename(name))
except Exception, e: LOG.error('pdftk_split cannot split %s: %s [decrypt? %s]', stream_s, e, decrypt) if not decrypt: raise #pdffn = to_filename(stream) try: pdffn = clean_pdf(pdffn) except: LOG.exception('error with PDF->PS->PDF of %s', pdffn) raise LOG.info('trying again with %r', pdffn) try: splitted = split_pdf(to_stream(pdffn), decrypt=False) finally: unlink(pdffn) LOG.debug('splitted files=%r', splitted) return splitted try: from pyPdf import PdfFileWriter as PFW PdfFileWriter = PFW del PFW except ImportError: PdfFileWriter = None def simple_split(stream): stream_s = repr(stream)[:100] pages = list(get_pages(stream))