Example #1
0
def assert_pdf(pdf_part, raise_exc=True, extra=None):
    if not pdf_part:
        LOG.error('cannot check %r', pdf_part)
        return False
    pdf_part_s = repr(pdf_part)[:100]
    start = None
    if hasattr(pdf_part, 'seek'):
        p = pdf_part.tell()
        start = pdf_part.read(1024)
        pdf_part.seek(p, 0)
    elif isinstance(pdf_part, (basestring, buffer)):
        start = pdf_part[:1024]
    else:
        LOG.warn('cannot check %s (%s %s)', pdf_part_s, type(pdf_part), extra)
        return None
    if not start:
        LOG.error('cannot check %r', start)
    if start.startswith('%PDF-'):
        LOG.debug('pdf_check ok on %r (%s)', pdf_part_s, extra)
        return True
    if 'application/pdf' == guess_data_mime(start):
        LOG.debug('pdf_check ok on %r (%s) by Magic!', pdf_part_s, extra)
        return True
    if raise_exc:
        raise AssertionError('NOT PDF: %s (%r)\n%r'
            % (extra, pdf_part_s, start))
    else:
        return False
Example #2
0
def fix_mimetype(stream, typ):
    if 'application/x-zip-compressed' == typ:
        typ = 'application/zip'
    already_pdf = assert_pdf(stream, raise_exc=False)
    if already_pdf:
        LOG.info('mimetype is %r, but seems to be a pdf already!', typ)
        return 'application/pdf'
    elif typ == 'application/pdf':
        LOG.error('got %r as pdf, but it is not!', repr(stream)[:100])

    typn = guess_data_mime(stream)
    if typn and typn != typ:
        LOG.info('typ=%s -> %s by magic!', typ, typn)
        return typn
    return typ