def __init__(self, filename): non_utf8_error = None compressed_format = None contents_header_bytes = None contents_header = None # First MAX_BYTES of the file. truncated = False # A future direction to optimize sniffing even more for sniffers at the top of the list # is to lazy load contents_header based on what interface is requested. For instance instead # of returning a StringIO directly in string_io() return an object that reads the contents and # populates contents_header while providing a StringIO-like interface until the file is read # but then would fallback to native string_io() try: compressed_format, f = compression_utils.get_fileobj_raw( filename, "rb") try: contents_header_bytes = f.read(SNIFF_PREFIX_BYTES) truncated = len(contents_header_bytes) == SNIFF_PREFIX_BYTES contents_header = contents_header_bytes.decode("utf-8") finally: f.close() except UnicodeDecodeError as e: non_utf8_error = e self.truncated = truncated self.filename = filename self.non_utf8_error = non_utf8_error self.binary = non_utf8_error is not None # obviously wrong self.compressed_format = compressed_format self.contents_header = contents_header self.contents_header_bytes = contents_header_bytes self._file_size = None
def __init__(self, filename): binary = False compressed_format = None contents_header = None # First MAX_BYTES of the file. truncated = False # A future direction to optimize sniffing even more for sniffers at the top of the list # is to lazy load contents_header based on what interface is requested. For instance instead # of returning a StringIO directly in string_io() return an object that reads the contents and # populates contents_header while providing a StringIO-like interface until the file is read # but then would fallback to native string_io() try: compressed_format, f = compression_utils.get_fileobj_raw(filename) try: contents_header = f.read(SNIFF_PREFIX_BYTES) truncated = len(contents_header) == SNIFF_PREFIX_BYTES finally: f.close() except UnicodeDecodeError: binary = True self.truncated = truncated self.filename = filename self.binary = binary self.compressed_format = compressed_format self.contents_header = contents_header self._file_size = None
def assert_format_detected(self, path, expected_fmt, allowed_fmts=None): for mode in ['r', 'rb', 'rt', 'U']: if 'b' in mode: expected_type = bytes else: expected_type = str fmt, fh = get_fileobj_raw(path, mode, allowed_fmts) assert fmt == expected_fmt assert isinstance(fh.read(0), expected_type)