Example #1
0
def pdftk_merge(out_fn, inputs):
    cmd = ([PDFTK] + [to_filename(inp, opened_ok=False) for inp in inputs] +
        ['cat', 'output', out_fn])
    LOG.info('pdftk_merge: %r', cmd)
    subprocess.check_call(cmd)
    unlink(cmd[1])
    return out_fn
Example #2
0
def imagick_exec(todo, args, inp=None, executable=None):
    if not executable:
        executable = EXECUTABLE
    LOG.debug('EXE=%r cmd_end=%r', executable, args)
    if os.path.basename(executable) in ('gm', 'gm.exe'):
        cmd = [executable, todo]
    else:
        executable = replace_bbase(executable, todo)
        cmd = [executable]
    try:
        inpfn, outfn = None, None
        data = None
        kwds = {}
        if USE_TEMPFILES and (inp or args[1].endswith('-')):
            if inp:  # stdin
                inp = MMStringIO(inp).getvalue()
                ext = '.' + args[0].split(':', 1)[0] if ':' in args[0] else ''
                inpfd, inpfn = tempfile.mkstemp('-IM' + ext)
                os.write(inpfd, inp)
                os.close(inpfd)
                args[0] = inpfn
                assert os.path.exists(inpfn), inpfn
                assert os.path.getsize(inpfn) == len(inp), \
                       'inp=%d <> %s=%d' % (len(inp), inpfn,
                                            os.path.getsize(inpfn))
            if args[1].endswith('-'):  # stdout
                ext = '.' + args[1].split(':', 1)[0] if ':' in args[1] else ''
                outfd, outfn = tempfile.mkstemp('-IM' + ext)
                os.close(outfd)
                args[1] = outfn
                assert os.path.exists(outfn), outfn
        else:
            data = MMStringIO(inp).getvalue() if inp else ''
            if args[0].endswith('-'):
                assert data, args
                LOG.debug('data=%r...%r', data[:100], data[-100:])
            kwds.update({'stdin': subprocess.PIPE, 'stdout': subprocess.PIPE})
        LOG.debug('cmd=%r args=%r kwds=%r => %s', cmd, args, kwds,
                  ' '.join(cmd + args))
        p = subprocess.Popen(cmd + args)
        out, err = p.communicate(data)
        if inpfn:
            unlink(inpfn)
        assert not err and p.returncode in (None, 0), (p.returncode, err)
        res = (MMFile(outfn, 'rb', delete_on_close=True) if outfn
            else StringIO(out))
        LOG.debug("imagick_exec(%s, %r): %r", todo, args, res)
        if outfn:
            unlink(outfn)
        return res

    except OSError:
        if not executable or EXECUTABLE == executable:
            executable2 = replace_bbase(executable, todo)
            if executable2 == executable:
                raise
            return convert(inp, args[1].split(':', 1)[0],
                executable=executable2)
        else:
            raise
Example #3
0
def pdftk_split(stream):
    pref = 'page_'
    with mktempdir('pdf', delete_asap=True) as out_dir:
        cmd = [PDFTK, to_filename(stream), 'burst', 'output',
            out_dir + '/' + pref + '%03d.pdf']
        LOG.debug('pdftk_split: %r', cmd)
        subprocess.check_call(cmd)
        files = sorted(os.listdir(out_dir))
        LOG.debug('out_dir=%s: %s', out_dir, files)
        for fn in files:
            if fn.startswith(pref) and fn.endswith('.pdf'):
                yield MMFile(os.path.join(out_dir, fn), 'rb',
                    delete_on_close=True)
        unlink(cmd[1])
Example #4
0
def _request_files(files=None):
    if files:
        for elt in files:
            yield elt
        return
    fn = None
    if SAVE_REQ_DIR:
        with _SRD_lock:
            fns = os.listdir(SAVE_REQ_DIR)
            if len(fns) > SAVE_REQ_DIR_THRESHOLD:
                for x in sorted(fns)[:SAVE_REQ_DIR_THRESHOLD // 2]:
                    unlink(os.path.join(SAVE_REQ_DIR, x))
        fn = os.path.join(SAVE_REQ_DIR,
                str(int(time.time() * 1000)) + '--' +
                ('%06d' % REQ_COUNTER) +
                urllib.quote_plus(request.url, safe=''))
        import gzip
    files = ((stor.stream, stor.mimetype, stor.name)
            for _, stor in request.files.iteritems(multi=True))
    try:
        first = files.next()
        files = itertools.chain([first], files)
    except StopIteration:  # nothing there
        LOG.warn('empty "files"')
        if request.data:
            import re
            rgx = re.compile('''filename=['"](.*?)["']''')
            files = [(StringIO(request.data), request.headers['Content-Type'],
                    rgx.search(request.headers.get('Content-Disposition',
                        'filename=""')).group(1))]
            LOG.info('files=%r', files)

    i = 0
    for stream, mimetype, name in files:
        if len(stream.read(1)) == 1:  # skip empty files
            stream.seek(0)
            if fn:
                with gzip.open(fn + ('--file%02d' % i) + '.dmp.gz',
                        'wb') as fh:
                    while 1:
                        chunk = stream.read(65536)
                        if not chunk:
                            break
                        fh.write(chunk)
                stream.seek(0)
                i += 1
            yield (stream, mimetype, basename(name))
Example #5
0
    except Exception, e:
        LOG.error('pdftk_split cannot split %s: %s [decrypt? %s]',
                  stream_s, e, decrypt)
        if not decrypt:
            raise
        #pdffn = to_filename(stream)
        try:
            pdffn = clean_pdf(pdffn)
        except:
            LOG.exception('error with PDF->PS->PDF of %s', pdffn)
            raise
        LOG.info('trying again with %r', pdffn)
        try:
            splitted = split_pdf(to_stream(pdffn), decrypt=False)
        finally:
            unlink(pdffn)
    LOG.debug('splitted files=%r', splitted)
    return splitted

try:
    from pyPdf import PdfFileWriter as PFW
    PdfFileWriter = PFW
    del PFW
except ImportError:
    PdfFileWriter = None


def simple_split(stream):
    stream_s = repr(stream)[:100]
    pages = list(get_pages(stream))