Esempio n. 1
0
def safe_filename(fn):
    fn = basename(fn)
    if not isinstance(fn, unicode):
        try:
            fn = unicode(fn, sys.getfilesystemencoding())
        except UnicodeDecodeError:
            fn = unicode(fn, 'utf8', 'replace')
    return fn.encode('cp437', 'replace')
Esempio n. 2
0
def _request_files(files=None):
    if files:
        for elt in files:
            yield elt
        return
    fn = None
    if SAVE_REQ_DIR:
        with _SRD_lock:
            fns = os.listdir(SAVE_REQ_DIR)
            if len(fns) > SAVE_REQ_DIR_THRESHOLD:
                for x in sorted(fns)[:SAVE_REQ_DIR_THRESHOLD // 2]:
                    unlink(os.path.join(SAVE_REQ_DIR, x))
        fn = os.path.join(SAVE_REQ_DIR,
                str(int(time.time() * 1000)) + '--' +
                ('%06d' % REQ_COUNTER) +
                urllib.quote_plus(request.url, safe=''))
        import gzip
    files = ((stor.stream, stor.mimetype, stor.name)
            for _, stor in request.files.iteritems(multi=True))
    try:
        first = files.next()
        files = itertools.chain([first], files)
    except StopIteration:  # nothing there
        LOG.warn('empty "files"')
        if request.data:
            import re
            rgx = re.compile('''filename=['"](.*?)["']''')
            files = [(StringIO(request.data), request.headers['Content-Type'],
                    rgx.search(request.headers.get('Content-Disposition',
                        'filename=""')).group(1))]
            LOG.info('files=%r', files)

    i = 0
    for stream, mimetype, name in files:
        if len(stream.read(1)) == 1:  # skip empty files
            stream.seek(0)
            if fn:
                with gzip.open(fn + ('--file%02d' % i) + '.dmp.gz',
                        'wb') as fh:
                    while 1:
                        chunk = stream.read(65536)
                        if not chunk:
                            break
                        fh.write(chunk)
                stream.seek(0)
                i += 1
            yield (stream, mimetype, basename(name))
Esempio n. 3
0
def convert_to_pdf(files=None, splitted=None, return_errors=None,
        dest_typ=None):
    '''yields PDF streams'''
    if splitted is None:
        splitted = request.args.get('splitted', '0') == '1'
    if return_errors is None:
        return_errors = request.args.get('errors', '0') == '1'
    errors = []
    for stream, mimetype, name in request_files(files):
        name = basename(name)
        LOG.debug('to_pdf from=%s, stream=%r (%s)', mimetype, stream, name)
        res = None
        try:
            res = pdf.to_pdf(stream, mimetype, splitted=splitted)
            LOG.debug('to_pdf from=%s => %s', mimetype, repr(res)[:100])
        except pdf.NonConvertible:
            errors.append((stream, mimetype, name))
            LOG.warn('%r not convertible to pdf', errors[-1])
        except:
            LOG.exception('error converting %s to pdf', repr(stream)[:100])
            errors.append((stream, mimetype, name))
            continue
        if not res:
            LOG.error('conversion of %r(%s) failed: %r', mimetype, stream, res)
            continue
        for i, chunk in enumerate(res):
            if not chunk:
                LOG.warn('chunk %r !?', chunk)
                continue
            if not pdf.assert_pdf(chunk,
                    extra='%s(%s): %s/%d' % (stream, mimetype, name, i),
                    raise_exc=False):
                LOG.warn('part %d is NOT PDF!', i)
                errors.append((stream, mimetype, name))
            length = (len(chunk) if (pdf.MIN_PDF_SIZE > 0
                                    and hasattr(chunk, '__len__'))
                    else -1)
            LOG.debug('to_pdf yields %s (%d)', repr(chunk)[:100], length)
            if length < 0 or length > pdf.MIN_PDF_SIZE:
                yield chunk
            else:
                errors.append((stream, mimetype, name))
    LOG.info('to_pdf done, errcount=%d', len(errors))
    if return_errors and errors:
        yield errors