def assert_pdf(pdf_part, raise_exc=True, extra=None): if not pdf_part: LOG.error('cannot check %r', pdf_part) return False pdf_part_s = repr(pdf_part)[:100] start = None if hasattr(pdf_part, 'seek'): p = pdf_part.tell() start = pdf_part.read(1024) pdf_part.seek(p, 0) elif isinstance(pdf_part, (basestring, buffer)): start = pdf_part[:1024] else: LOG.warn('cannot check %s (%s %s)', pdf_part_s, type(pdf_part), extra) return None if not start: LOG.error('cannot check %r', start) if start.startswith('%PDF-'): LOG.debug('pdf_check ok on %r (%s)', pdf_part_s, extra) return True if 'application/pdf' == guess_data_mime(start): LOG.debug('pdf_check ok on %r (%s) by Magic!', pdf_part_s, extra) return True if raise_exc: raise AssertionError('NOT PDF: %s (%r)\n%r' % (extra, pdf_part_s, start)) else: return False
def fix_mimetype(stream, typ): if 'application/x-zip-compressed' == typ: typ = 'application/zip' already_pdf = assert_pdf(stream, raise_exc=False) if already_pdf: LOG.info('mimetype is %r, but seems to be a pdf already!', typ) return 'application/pdf' elif typ == 'application/pdf': LOG.error('got %r as pdf, but it is not!', repr(stream)[:100]) typn = guess_data_mime(stream) if typn and typn != typ: LOG.info('typ=%s -> %s by magic!', typ, typn) return typn return typ