def pdf_do_file(afile): try: fid = open(afile) except IOError: log_error('IOError', afile) pdf_do_pdf(fid, afile) fid.close()
def types_data( data ): try: mime = magic.from_buffer(data, mime=True) except IOError: log_error('IOError', '_data') return ('other', False) return types_filter(mime)
def types_file( afile ): try: mime = magic.from_file(afile, mime=True) except IOError: log_error('IOError', afile) return ('other', False) return types_filter(mime)
def gz_do_file(afile): try: agz = gzip.GzipFile(afile) except IOError: log_error('IOError', afile) return gz_do_gz(agz, afile) agz.close()
def zip_do_file(afile): try: azip = zipfile.ZipFile(afile) except zipfile.BadZipfile: log_error('zipfile.BadZipFile', afile) return zip_do_zip(azip, afile) azip.close()
def types_file(afile): """guess a file's type""" try: mime = magic.from_file(afile, mime=True) except IOError: log_error('IOError', afile) return ('other', False) return types_find(mime, afile)
def tar_do_file(afile): try: atar = tarfile.open(afile) except tarfile.TarError: log_error('tarfile.TarError', afile) return tar_do_tar(atar, afile) atar.close()
def docx_do_file(afile): try: azip = zipfile.ZipFile(afile) except zipfile.BadZipfile: log_error("zipfile.BadZipFile", afile) return docx_do_docx(azip, afile) azip.close()
def docx_do_data(data, afile): filelike = io.BytesIO(data) try: azip = zipfile.ZipFile(filelike) except zipfile.BadZipfile: log_error("zipfile.BadZipFile", afile) return docx_do_docx(azip, afile) azip.close()
def zip_do_data(data, afile): filelike = io.BytesIO(data) try: azip = zipfile.ZipFile(filelike) except zipfile.BadZipfile: log_error('zipfile.BadZipFile', afile) return zip_do_zip(azip, afile) azip.close()
def tar_do_data(data, afile): filelike = io.BytesIO(data) try: atar = tarfile.open(fileobj=filelike) except tarfile.TarError: log_error('tarfile.TarError', afile) return tar_do_tar(atar, afile) atar.close()
def text_do_file(afile): try: fid = open(afile) except IOError: log_error('IOError', afile) return data = fid.read() fid.close() text_do_data(data, afile)
def gz_do_data(data, afile): filelike = io.BytesIO(data) try: agz = gzip.GzipFile(fileobj=filelike) except IOError: log_error('IOError', afile) return gz_do_gz(agz, afile) agz.close()
def rar_do_file(afile): # fixes problems with default '\' separator rarfile.PATH_SEP = '/' try: arar = rarfile.RarFile(afile) except rarfile.BadRarFile: log_error('rarfile.BadRarFile', afile) return rar_do_rar(arar, afile) arar.close()
def types_data(data, afile=''): """guess an in-memory file's type optional file name (as found in archive or decompressed) """ try: mime = magic.from_buffer(data, mime=True) except IOError: log_error('IOError', '_data') return ('other', False) return types_find(mime, afile)
def gz_do_gz(agz, afile): """agz:GzipFile, afile:source file name""" try: data = agz.read() except IOError: log_error('IOError', afile) return (ftype, keep) = types_data(data) if keep: # strip any .gz extension (root, ext) = os.path.splitext(afile) if ext.lower() == '.gz': do_data(ftype, data, afile+':'+root) else: do_data(ftype, data, afile)
def gz_do_gz(agz, afile): """agz:GzipFile, afile:source file name""" try: data = agz.read() except IOError: log_error('IOError', afile) return (ftype, keep) = types_data(data) if keep: # strip any .gz extension (root, ext) = os.path.splitext(afile) if ext.lower() == '.gz': do_data(ftype, data, afile + ':' + root) else: do_data(ftype, data, afile)