def testSeekable(self): bz2f = BZ2File(BytesIO(self.DATA)) try: self.assertTrue(bz2f.seekable()) bz2f.read() self.assertTrue(bz2f.seekable()) finally: bz2f.close() self.assertRaises(ValueError, bz2f.seekable) bz2f = BZ2File(BytesIO(), mode="w") try: self.assertFalse(bz2f.seekable()) finally: bz2f.close() self.assertRaises(ValueError, bz2f.seekable) src = BytesIO(self.DATA) src.seekable = lambda: False bz2f = BZ2File(src) try: self.assertFalse(bz2f.seekable()) finally: bz2f.close() self.assertRaises(ValueError, bz2f.seekable)
def testSeekable(self): bz2f = BZ2File(BytesIO(self.DATA)) try: self.assertTrue(bz2f.seekable()) bz2f.read() self.assertTrue(bz2f.seekable()) finally: bz2f.close() self.assertRaises(ValueError, bz2f.seekable) bz2f = BZ2File(BytesIO(), "w") try: self.assertFalse(bz2f.seekable()) finally: bz2f.close() self.assertRaises(ValueError, bz2f.seekable) src = BytesIO(self.DATA) src.seekable = lambda: False bz2f = BZ2File(src) try: self.assertFalse(bz2f.seekable()) finally: bz2f.close() self.assertRaises(ValueError, bz2f.seekable)
def bdecode(f_or_data): """ bdecodes data by looking up the type byte, and using it to look up the respective decoding function, which in turn is used to return the decoded object The parameter can be a file opened in bytes mode, bytes or a string (the last of which will be decoded) """ if isinstance(f_or_data, str): f_or_data = f_or_data.encode() if isinstance(f_or_data, bytes): f_or_data = BytesIO(f_or_data) #TODO: the following line is the only one that needs readahead. #peek returns a arbitrary amount of bytes, so we have to slice. if f_or_data.seekable(): first_byte = f_or_data.read(1) f_or_data.seek(-1, SEEK_CUR) else: first_byte = f_or_data.peek(1)[:1] btype = TYPES.get(first_byte) if btype is not None: return btype(f_or_data) else: #Used in dicts and lists to designate an end assert_btype(f_or_data.read(1), _TYPE_END) return None
def prepare_rw_output_stream(output): """ Prepare an output stream that supports both reading and writing. Intended to be used for writing & updating signed files: when producing a signature, we render the PDF to a byte buffer with placeholder values for the signature data, or straight to the provided output stream if possible. More precisely: this function will return the original output stream if it is writable, readable and seekable. If the ``output`` parameter is ``None``, not readable or not seekable, this function will return a :class:`.BytesIO` instance instead. If the ``output`` parameter is not ``None`` and not writable, :class:`.IOError` will be raised. :param output: A writable file-like object, or ``None``. :return: A file-like object that supports reading, writing and seeking. """ if output is None: output = BytesIO() else: # Rationale for the explicit writability check: # If the output buffer is not readable or not seekable, it's # about to be replaced with a BytesIO instance, and in that # case, the write error would only happen *after* the signing # operations are done. We want to avoid that scenario. if not output.writable(): raise IOError("Output buffer is not writable") # pragma: nocover if not output.seekable() or not output.readable(): output = BytesIO() return output
def bdecode(f_or_data): """ bdecodes data by looking up the type byte, and using it to look up the respective decoding function, which in turn is used to return the decoded object The parameter can be a file opened in bytes mode, bytes or a string (the last of which will be decoded) """ if isinstance(f_or_data, str): f_or_data = f_or_data.encode() if isinstance(f_or_data, bytes): f_or_data = BytesIO(f_or_data) #TODO: the following line is the only one that needs readahead. #peek returns a arbitrary amount of bytes, so we have to slice. if f_or_data.seekable(): first_byte = f_or_data.read(1) f_or_data.seek(-1, SEEK_CUR) else: #FIXME: muted bug! first_byte = f_or_data.peek(1)[:1] # pylint: disable=no-member btype = TYPES.get(first_byte) if btype is not None: return btype(f_or_data) else: #Used in dicts and lists to designate an end assert_btype(f_or_data.read(1), _TYPE_END) return None
def test_fp_callable_incomplete(self): obj = [123, b'something'] # remove whole of last token (binary data 'something', without its length) output = BytesIO(self.bjddumpb(obj)[:-(len(obj[1]) + 1)]) output.seekable = lambda: False with self.assert_raises_regex(DecoderException, 'Insufficient input'): self.bjdload(output)
class SrlDocumentReader(object): def __init__(self, byte_str): super(SrlDocumentReader, self).__init__() self.stream = BytesIO(byte_str) self.stream.seek(0, os.SEEK_SET) def _read_unpack(self, fmt): ''' First get the number of bytes that struct will need to unpack. Then read those number of bytes from the io stream. The current io stream position will advance by the number of bytes read. ''' plen = struct.calcsize(fmt) b = self.stream.read(plen) if (plen != len(b)): return 0 value = struct.unpack(fmt, b)[0] return value def tell(self): return self.stream.tell() def seek(self, offset, wench=os.SEEK_CUR): # seek returns the new absolute position if not self.stream.seekable(): return 0 return self.stream.seek(offset, wench) def read_varint(self): shift = 0 result = 0 while True: i = ord(self.stream.read(1)) result |= (i & 0x7f) << shift shift += 7 if not (i & 0x80): break return result def read_uint32(self): fmt = '<I' return self._read_unpack(fmt) def read_uint8(self): fmt = '<B' return self._read_unpack(fmt) def read_float(self): fmt = '<f' return self._read_unpack(fmt) def read_str(self, slen): fmt = '{0}s'.format(slen) val = self._read_unpack(fmt) return bytes.decode(val, encoding='utf-8')
class SrlDocumentReader(object): def __init__(self, byte_str): super(SrlDocumentReader, self).__init__() self.stream = BytesIO(byte_str) self.stream.seek(0, os.SEEK_SET) def _read_unpack(self, fmt): ''' First get the number of bytes that struct will need to unpack. Then read those number of bytes from the io stream. The current io stream position will advance by the number of bytes read. ''' plen = struct.calcsize(fmt) b = self.stream.read(plen) if (plen != len(b)): return 0 value = struct.unpack(fmt, b)[0] return value def tell(self): return self.stream.tell() def seek(self, offset, wench = os.SEEK_CUR): # seek returns the new absolute position if not self.stream.seekable(): return 0 return self.stream.seek(offset, wench) def read_varint(self): shift = 0 result = 0 while True: i = ord(self.stream.read(1)) result |= (i & 0x7f) << shift shift += 7 if not (i & 0x80): break return result def read_uint32(self): fmt = '<I' return self._read_unpack(fmt) def read_uint8(self): fmt = '<B' return self._read_unpack(fmt) def read_float(self): fmt = '<f' return self._read_unpack(fmt) def read_str(self, slen): fmt = '{0}s'.format(slen) val = self._read_unpack(fmt) return bytes.decode(val, encoding='utf-8')
def test_fp_multi(self): obj = {'a': 123, 'b': b'some raw content'} output = BytesIO() count = 10 # Seekable an non-seekable runs for _ in range(2): output.seek(0) for i in range(count): obj['c'] = i self.bjddump(obj, output) output.seek(0) for i in range(count): obj['c'] = i self.assertEqual(self.bjdload(output), obj) output.seekable = lambda: False
from io import BytesIO, StringIO bio = BytesIO() print(bio.readable(), bio.writable(), bio.seekable()) bio.write(b'magede\nPython') bio.seek(0) print(bio.readline()) print(bio.getvalue()) bio.close() sio = StringIO() print(sio.readable(), sio.writable(), sio.seekable()) sio.write('magedu\nPython') sio.seek(0) print(sio.readline()) print(sio.getvalue()) sio.close() # 二者都是io模块中的类:在内存中,开辟一个文本或者二进制模式的buffer,可以像文件对象一样操作它, # 当close方法被调用的时候,这个buffer会被释放 # getvalue()获取全部内容,跟文件指针没有关系 # StringIO的好处:一般来说,磁盘的操作比内存的操作要慢的多,内存足够的情况下, # 一般的优化思路是少落地,减少磁盘IO的过程,可以大大提高程序的运行效率 # 类文件对象:file-like对象,可以像文件对象一样操作 from sys import stdout f = stdout print(type(f)) f.write('magedu.com') # 控制台输出
class VCRHTTPResponse(HTTPResponse): """ Stub response class that gets returned instead of a HTTPResponse """ def __init__(self, recorded_response): self.fp = None self.recorded_response = recorded_response self.reason = recorded_response["status"]["message"] self.status = self.code = recorded_response["status"]["code"] self.version = None self._content = BytesIO(self.recorded_response["body"]["string"]) self._closed = False headers = self.recorded_response["headers"] # Since we are loading a response that has already been serialized, our # response is no longer chunked. That means we don't want any # libraries trying to process a chunked response. By removing the # transfer-encoding: chunked header, this should cause the downstream # libraries to process this as a non-chunked response. te_key = [h for h in headers.keys() if h.upper() == "TRANSFER-ENCODING"] if te_key: del headers[te_key[0]] self.headers = self.msg = parse_headers(headers) self.length = compat.get_header(self.msg, "content-length") or None @property def closed(self): # in python3, I can't change the value of self.closed. So I' # twiddling self._closed and using this property to shadow the real # self.closed from the superclas return self._closed def read(self, *args, **kwargs): return self._content.read(*args, **kwargs) def readall(self): return self._content.readall() def readinto(self, *args, **kwargs): return self._content.readinto(*args, **kwargs) def readline(self, *args, **kwargs): return self._content.readline(*args, **kwargs) def readlines(self, *args, **kwargs): return self._content.readlines(*args, **kwargs) def seekable(self): return self._content.seekable() def tell(self): return self._content.tell() def isatty(self): return self._content.isatty() def seek(self, *args, **kwargs): return self._content.seek(*args, **kwargs) def close(self): self._closed = True return True def getcode(self): return self.status def isclosed(self): return self.closed def info(self): return parse_headers(self.recorded_response["headers"]) def getheaders(self): message = parse_headers(self.recorded_response["headers"]) return list(compat.get_header_items(message)) def getheader(self, header, default=None): values = [v for (k, v) in self.getheaders() if k.lower() == header.lower()] if values: return ", ".join(values) else: return default def readable(self): return self._content.readable()
class _BaseBinaryWrapper: def __init__(self, stream: Union[typing.BinaryIO, bytes] = b""): if isinstance(stream, bytes) or isinstance(stream, bytearray): self.stream = BytesIO(stream) else: self.stream = stream # Wrappings: def close(self) -> None: return self.stream.close() def flush(self) -> None: return self.stream.flush() def read(self, n: int = -1) -> AnyStr: return self.stream.read(n) def readable(self) -> bool: return self.stream.readable() def readline(self, limit: int = -1) -> AnyStr: return self.stream.readline(limit) def readlines(self, hint: int = -1) -> List[AnyStr]: return self.stream.readlines(hint) def write(self, s: Union[bytes, bytearray]) -> int: return self.stream.write(s) def writable(self) -> bool: return self.stream.writable() def writelines(self, lines: Iterable[AnyStr]) -> None: self.stream.writelines(lines) def seek(self, offset: int, whence: int = 0) -> int: return self.stream.seek(offset, whence) def seekable(self) -> bool: return self.stream.seekable() def tell(self) -> int: return self.stream.tell() def fileno(self) -> int: return self.stream.fileno() def __enter__(self): self.stream.__enter__() return self def __exit__(self, exc_type, exc_val, exc_tb): self.stream.__exit__(exc_type, exc_val, exc_tb) # helper functions def readall(self): self.stream.seek(0) return self.stream.read() def getvalue(self): if isinstance(self.stream, BytesIO): return self.stream.getvalue() pos = self.stream.tell() ret = self.readall() self.stream.seek(pos) return ret def align(self, alignment=4): if offset := (self.tell() % alignment): self.seek(self.tell() + alignment - offset)
class StreamIO(object): stream = None endian = None labels = {} # I/O functions read_func = None write_func = None # attributes can_seek = False can_tell = False def __init__(self, stream=None, endian: Endian = Endian.LITTLE): self.reset() self.set_stream(stream) self.set_endian(endian) self.set_io_funcs() # reset def reset(self) -> None: self.stream = None self.endian = None self.labels = {} self.read_func = None self.write_func = None self.can_seek = False self.can_tell = False # add with functionality def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() # shortcuts def __int__(self) -> int: return self.tell() def __len__(self) -> int: return self.length() def __bytes__(self) -> bytes: return self.getvalue() def __iadd__(self, other: int) -> None: self.seek(self.tell() + other) def __isub__(self, other: int) -> None: self.seek(self.tell() - other) def __imul__(self, other: int) -> None: self.seek(self.tell() * other) def __ifloordiv__(self, other: int) -> None: self.seek(self.tell() // other) def __itruediv__(self, other: int) -> None: self.seek(self.tell() // other) def __getitem__(self, key: int | slice): if isinstance(key, slice): return self.read_bytes_at(key.start, key.stop - key.start) return self.read_byte_at(key) def __setitem__(self, key: int | slice, value: int | bytes | bytearray) -> int: if isinstance(key, slice): return self.write_bytes_at(key.start, value) if isinstance(value, bytes) or isinstance(value, bytearray): if len(value) > 1: return self.write_bytes_at(key, value) else: return self.write_byte_at(key, value[0]) else: return self.write_byte_at(key, value) # virtual file pointer @property def offset(self) -> int: return self.tell() @offset.setter def offset(self, value: int) -> None: self.seek(value) # utilities def set_stream(self, stream) -> None: """ Set stream to read/write from/to :param stream: The stream to interact with :return: None """ if stream is None: self.stream = BytesIO() elif type(stream) in [bytes, bytearray, memoryview]: self.stream = BytesIO(stream) elif type(stream) == str: if isfile(stream): self.stream = open(stream, "r+b") else: self.stream = open(stream, "wb") else: self.stream = stream self.can_seek = self.stream.seekable() self.can_tell = self.stream.seekable() def set_endian(self, endian: Endian) -> None: """ Set the endian you want to use for reading/writing data in the stream :param endian: LITTLE, BIG, NETWORK, or NATIVE :return: None """ endian = int(endian) endians = ["<", ">", "!", "@"] if endian in range(0, len(endians)): self.endian = endians[endian] def set_read_func(self, name: str) -> None: #, *param_types): """ Set the function name in the stream of the read function :param name: The name of the read function :return: None """ if hasattr(self.stream, name): self.read_func = getattr(self.stream, name) def set_write_func(self, name: str) -> None: #, *param_types): """ Set the function name in the stream of the write function :param name: The name of the write function :return: None """ if hasattr(self.stream, name): self.write_func = getattr(self.stream, name) def set_io_funcs(self, read_name: str = "read", write_name: str = "write") -> None: """ Set the read/write function names in the stream :param read_name: The name of the read function :param write_name: The name of the write function :return: None """ self.set_read_func(read_name) self.set_write_func(write_name) def tell(self) -> int: """ Tell the current position of the stream if supported :return: The position of the stream """ if self.can_tell: return self.stream.tell() raise NotImplementedError( "tell isn't implemented in the specified stream!") def seek(self, index: int, whence: int = SEEK_SET) -> int: """ Jump to a position in the stream if supported :param index: The offset to jump to :param whence: Index is interpreted relative to the position indicated by whence (SEEK_SET, SEEK_CUR, and SEEK_END in io library) :return: The new absolute position """ if self.can_seek: return self.stream.seek(index, whence) raise NotImplementedError( "seek isn't implemented in the specified stream!") def seek_start(self) -> int: """ Jump to the beginning of the stream if supported :return: The new absolute position """ return self.stream.seek(0) def seek_end(self) -> int: """ Jump to the end of the stream if supported :return: The new absolute position """ return self.stream.seek(0, SEEK_END) def length(self) -> int: """ Get the length of the stream if supported :return: The total length of the stream """ loc = self.tell() self.seek_end() size = self.tell() self.seek(loc) return size def getvalue(self) -> bytes | bytearray: """ Get the stream's output :return: The stream's data as bytes or bytearray """ return self.stream.getvalue() def getbuffer(self) -> bytes | bytearray: """ Get the stream's buffer :return: The stream's buffer as bytes or bytearray """ return self.stream.getbuffer() def flush(self) -> None: """ Write the data to the stream :return: None """ return self.stream.flush() def close(self) -> None: """ Close the stream :return: None """ self.stream.close() # labeling def get_labels(self) -> list: return list(self.labels.keys()) def label_exists(self, name: str) -> bool: return name in self.get_labels() def get_label(self, name: str) -> int: return self.labels[name] def set_label(self, name: str, offset: int = None, overwrite: bool = True) -> int: if not overwrite and self.label_exists(name): name += ("_" + rand_str(4)) if offset is not None and offset >= 0: loc = offset else: loc = self.tell() self.labels[name] = loc return loc def rename_label(self, old_name: str, new_name: str, overwrite: bool = True) -> bool: assert old_name != new_name, "Old and new label names shouldn't be the same" if self.label_exists(old_name): value = self.get_label(old_name) self.del_label(old_name) self.set_label(new_name, value, overwrite) return False def goto_label(self, name: str) -> int: return self.seek(self.labels[name]) def del_label(self, name: str) -> int: return self.labels.pop(name) # base I/O methods def read(self, num: int = None) -> bytes | bytearray: if num is None: return self.read_func() return self.read_func(num) def write(self, data: bytes | bytearray | int) -> int: if type(data) == int: data = bytes([data]) return self.write_func(data) def stream_unpack(self, fmt: str) -> tuple | list: fmt = f"{self.endian}{fmt}" return unpack(fmt, self.read(calcsize(fmt))) def stream_pack(self, fmt: str, *values) -> int: fmt = f"{self.endian}{fmt}" return self.write(pack(fmt, *values)) def stream_unpack_array(self, t: str, num: int) -> tuple | list: fmt = f"{self.endian}{num}{t}" return unpack(fmt, self.read(calcsize(fmt))) def stream_pack_array(self, t: str, *values) -> int: fmt = f"{self.endian}{len(values)}{t}" return self.write(pack(fmt, *values)) # bytes def read_sbyte(self) -> int: (val, ) = self.stream_unpack("b") return val def read_sbyte_at(self, offset: int, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.read_sbyte() if ret: self.seek(loc) return output def read_sbytes(self, num: int) -> Tuple[int] | List[int]: return self.stream_unpack_array("b", num) def read_sbytes_at(self, offset: int, num: int, ret: bool = True) -> Tuple[int] | List[int]: loc = self.tell() self.seek(offset) output = self.read_sbytes(num) if ret: self.seek(loc) return output def write_sbyte(self, value: int) -> int: return self.stream_pack("b", value) def write_sbyte_at(self, offset: int, value: int, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.write_sbyte(value) if ret: self.seek(loc) return output def write_sbytes(self, values: bytes | bytearray) -> int: return self.stream_pack_array("b", *values) def write_sbytes_at(self, offset: int, values: bytes | bytearray, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.write_sbytes(values) if ret: self.seek(loc) return output # bytes def read_byte(self) -> int: (val, ) = self.stream_unpack("B") return val read_ubyte = read_byte def read_byte_at(self, offset: int, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.read_byte() if ret: self.seek(loc) return output read_bytes = read read_ubytes = read def read_bytes_at(self, offset: int, num: int, ret: bool = True) -> bytes: loc = self.tell() self.seek(offset) output = self.read_bytes(num) if ret: self.seek(loc) return output read_ubytes_at = read_bytes_at def write_byte(self, value: int): return self.stream_pack("B", value) write_ubyte = write_byte def write_byte_at(self, offset: int, value: int, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.write_byte(value) if ret: self.seek(loc) return output write_bytes = write write_ubyte_at = write_byte_at def write_bytes_at(self, offset: int, values: bytes | bytearray, ret: bool = True) -> int: loc = self.tell() self.seek(offset) output = self.write_bytes(values) if ret: self.seek(loc) return output write_ubytes_at = write_bytes_at def load_from_buffer(self, data: bytes | bytearray) -> int: return self.write_bytes(data) # boolean def read_bool(self) -> bool: (val, ) = self.stream_unpack("?") return val def read_bool_array(self, num: int) -> Tuple[bool]: return self.stream_unpack_array("?", num) def write_bool(self, value: bool) -> int: return self.stream_pack("?", value) def write_bool_array(self, values: List[bool] | Tuple[bool]) -> int: return self.stream_pack_array("?", *values) # int16/short def read_int16(self) -> int: (val, ) = self.stream_unpack("h") return val read_short = read_int16 def read_int16_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("h", num) read_short_array = read_int16_array def write_int16(self, value: int) -> int: return self.stream_pack("h", value) write_short = write_int16 def write_int16_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("h", *values) write_short_array = write_int16_array # uint16/ushort def read_uint16(self) -> int: (val, ) = self.stream_unpack("H") return val read_ushort = read_uint16 def read_uint16_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("H", num) read_ushort_array = read_uint16_array def write_uint16(self, value: int) -> int: return self.stream_pack("H", value) write_ushort = write_uint16 def write_uint16_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("H", *values) write_ushort_array = write_uint16_array # int32/int/long def read_int32(self) -> int: (val, ) = self.stream_unpack("i") return val read_int = read_int32 read_long = read_int32 def read_int32_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("i", num) read_int_array = read_int32_array read_long_array = read_int32_array def write_int32(self, value: int) -> int: return self.stream_pack("i", value) write_int = write_int32 write_long = write_int32 def write_int32_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("i", *values) write_int_array = write_int32_array write_long_array = write_int32_array # uint32/uint/ulong def read_uint32(self) -> int: (val, ) = self.stream_unpack("I") return val read_uint = read_uint32 read_ulong = read_uint32 def read_uint32_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("I", num) read_uint_array = read_uint32_array read_ulong_array = read_uint32_array def write_uint32(self, value: int) -> int: return self.stream_pack("I", value) write_uint = write_uint32 write_ulong = write_uint32 def write_uint32_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("I", *values) write_uint_array = write_uint32_array write_ulong_array = write_uint32_array # int64/longlong def read_int64(self) -> int: return self.stream_unpack("q")[0] read_longlong = read_int64 def read_int64_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("q", num) read_longlong_array = read_int64_array def write_int64(self, value: int) -> int: return self.stream_pack("q", value) write_longlong = write_int64 def write_int64_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("q", *values) write_longlong_array = write_int64_array # uint64/ulonglong def read_uint64(self) -> int: (val, ) = self.stream_unpack("Q") return val read_ulonglong = read_uint64 def read_uint64_array(self, num: int) -> Tuple[int]: return self.stream_unpack_array("Q", num) read_ulonglong_array = read_uint64_array def write_uint64(self, value: int) -> int: return self.stream_pack("Q", value) write_ulonglong = write_uint64 def write_uint64_array(self, values: List[int] | Tuple[int]) -> int: return self.stream_pack_array("Q", *values) write_ulonglong_array = write_uint64_array # float32/single def read_float32(self) -> float: (val, ) = self.stream_unpack("f") return val read_single = read_float32 def read_float32_array(self, num: int) -> Tuple[float]: return self.stream_unpack_array("f", num) read_single_array = read_float32_array def write_float32(self, value: float) -> float: return self.stream_pack("f", value) write_single = write_float32 def write_float32_array(self, values: List[float] | Tuple[float]) -> int: return self.stream_pack_array("f", *values) write_single_array = write_float32_array # float64/double def read_float64(self) -> float: (val, ) = self.stream_unpack("d") return val read_double = read_float64 def read_float64_array(self, num: int) -> Tuple[float]: return self.stream_unpack_array("d", num) read_double_array = read_float64_array def write_float64(self, value: float) -> float: return self.stream_pack("d", value) write_double = write_float64 def write_float64_array(self, values: List[float] | Tuple[float]) -> int: return self.stream_pack_array("d", *values) write_double_array = write_float64_array # varint def read_varint(self) -> int: shift = 0 result = 0 while True: i = self.read_byte() result |= (i & 0x7f) << shift shift += 7 if not (i & 0x80): break return result def read_varint_array(self, num: int) -> Tuple[int]: return tuple([self.read_varint() for i in range(num)]) def write_varint(self, num: int) -> int: buff = b"" while True: towrite = num & 0x7f num >>= 7 if num: buff += bytes([(towrite | 0x80)]) else: buff += bytes([towrite]) break return self.write_bytes(buff) def write_varint_array(self, values: List[int] | Tuple[int]) -> int: return sum([self.write_varint(x) for x in values]) # strings def read_int7(self) -> int: index = 0 result = 0 while True: byte_value = self.read_byte() result |= (byte_value & 0x7F) << (7 * index) if byte_value & 0x80 == 0: break index += 1 return result def read_int7_array(self, num: int) -> Tuple[int]: return tuple([self.read_int7() for i in range(num)]) def write_int7(self, value: int) -> int: data = b"" num = value while num >= 0x80: data += bytes([((num | 0x80) & 0xFF)]) num >>= 7 data += bytes([num & 0xFF]) return self.write(data) def write_int7_array(self, values: List[int] | Tuple[int]) -> int: return sum([self.write_int7(x) for x in values]) def read_string(self, encoding: str = "UTF8") -> str: str_size = self.read_int7() if str_size <= 0: return "" return self.read(str_size).decode(encoding) def read_c_string(self, encoding: str = "UTF8") -> str: output = b"" while (tmp := self.read(1)) != b"\x00": output += tmp return output.rstrip(b"\x00").decode(encoding)
class VerifiableStream(BinaryIO): """A binary stream whose contents can be verified to not have changed. The stream does not accept a HMAC key, but generates it randomly as a nonce. While unusual, this is intentional -- these streams are meant to be used as part of model serialization, where their nonces and HMAC codes are stored in a cryptographically signed metadata file. In other words, the HMAC simply ensures that stream's data has not changed, and does not guarantee the data's origin -- that's the metadata signature's job. The stream is meant to be used in the following sequence: - instantiate the stream - write all data to the stream (the stream is not readable yet!) - call "finalize()" on the stream, saving the returned nonce and HMAC code - read data from the stream (the stream is not writable any more!) """ def __init__(self): """Create a new VerifiableStream with a random nonce.""" self._finalized = False self._random_nonce = os.urandom( 16) # this is bytes, be careful trying to add strings to it self._underlying_stream = BytesIO() self._hmac_state = hmac.new(self._random_nonce, digestmod=HASHER) def _ensure_finalized(self): """Raise an error if the stream has not already been finalized.""" if not self._finalized: raise AssertionError( "Expected the stream to be finalized, but it was not!") def _ensure_not_finalized(self): """Raise an error if the stream has already been finalized.""" if self._finalized: raise AssertionError( "Expected the stream to not be finalized, but it was!") def finalize(self): """Calculate the HMAC code for the stream, disable writing and enable reading. Returns: tuple (nonce, HMAC code) (both of type string) """ self._ensure_not_finalized() self._finalized = True nonce_string = _convert_base64_bytes_to_string(self._random_nonce) hmac_string = _convert_base64_bytes_to_string( self._hmac_state.digest()) return nonce_string, hmac_string # methods for writing require that the stream not be finalized def writable(self) -> bool: """Return True if the stream is writable, and False otherwise.""" if self._finalized: return False else: return self._underlying_stream.writable() @validate(b=bytes) def write(self, b: bytes) -> int: """Write the given binary data to the stream, and include it in the HMAC calculation.""" self._ensure_not_finalized() num_bytes = self._underlying_stream.write(b) self._hmac_state.update(b) return num_bytes def writelines(self, lines: Iterable[bytes]) -> None: """Write lines to a stream""" self._ensure_not_finalized( ) # technically done by `write` but doesn't hurt to be safe for line in lines: self.write(line) return None # methods for reading require that the stream is finalized def readable(self) -> bool: """Return True if the stream is readable, and False otherwise.""" if self._finalized: return self._underlying_stream.readable() else: return False def read(self, size=None) -> bytes: """Read bytes from stream""" self._ensure_finalized() return self._underlying_stream.read(size) def readall(self) -> bytes: """Read lines from stream""" raise NotImplementedError( "`VerifiablStream` does not implement `readall` since the underlying BtytesIO does not " "implement it.") def readline(self, size=None) -> bytes: """Read a line from stream""" self._ensure_finalized() return self._underlying_stream.readline(size) def readlines(self, size=None) -> List[bytes]: """Read lines from stream""" self._ensure_finalized() return self._underlying_stream.readlines(size) def read1(self, size) -> bytes: """Read bytes from stream""" self._ensure_finalized() return self._underlying_stream.read1(size) def readinto(self, b) -> Optional[int]: """Read bytes into another buffer""" self._ensure_finalized() return self._underlying_stream.readinto(b) def readinto1(self, b) -> Optional[int]: """Read bytes into another buffer""" self._ensure_finalized() return self._underlying_stream.readinto1(b) # seeking requires a finalized stream def seekable(self): """Return True if the read pointer in the stream can be moved, and False otherwise.""" if self._finalized: return self._underlying_stream.seekable() else: return False def seek(self, *args, **kwargs) -> int: """Seek to a new position. Return the new position""" self._ensure_finalized() return self._underlying_stream.seek(*args, **kwargs) def truncate(self, size: Optional[int] = ...) -> None: """Truncate the stream""" raise NotImplementedError( "`VerifiableStream` does not support truncation. It is too " "complicated to keep track of the hmac digests") def close(self): """Close the stream, discarding its data. Will raise an error if not finalized yet.""" if self._finalized: return self._underlying_stream.close() else: raise AssertionError( "Attempting to close an unfinalized VerifiableStream. This is " "almost certainly a bug.") # a bunch of attributes/methods that are always accessible def isatty(self) -> bool: """Determine whether this is a terminal""" return self._underlying_stream.isatty() @property def closed(self) -> bool: """Determine whether the stream is closed""" return self._underlying_stream.closed def fileno(self) -> int: """Return the underlying file descriptor""" # this will technically raise UnsuportedOperation, but better to let BytesIO do that return self._underlying_stream.fileno() def mode(self) -> str: """Return the underlying file descriptor""" # this doesn't exist for the underlying stream raise AssertionError( "`VerifiableStream` does not have a mode. This is probably a bug in " "something assuming that the stream is a backed by a file") def name(self) -> str: """Return the underlying file descriptor""" # this doesn't exist for the underlying stream raise AssertionError( "`VerifiableStream` does not have a name. This is probably a bug in " "something assuming the stream is a file descriptor") def flush(self) -> None: """Flush the underlying stream""" # this technically does nothing in BytesIO return self._underlying_stream.flush() def tell(self) -> int: """Tell the current position""" return self._underlying_stream.tell() # context manager methods def __enter__(self) -> "VerifiableStream": """Enter""" return self def __exit__( self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType], ) -> bool: """Exit""" return self._underlying_stream.__exit__(exc_type, exc_val, exc_tb)
class MemoryFile(object): kind = NodeKind.FILE FILE_LOCKS = defaultdict(threading.Lock) def __init__(self, path): self.path = path self._data = BytesIO() self._line_reader = None self.mtime = self.ctime = time.time() self.mode = 0 self.lock = self.FILE_LOCKS[self.path] def reset(self): self._data = BytesIO() self._line_reader = None self.mtime = self.ctime = time.time() self.mode = 0 self.lock = self.FILE_LOCKS[self.path] def __len__(self): pos = self._data.tell() self._data.seek(-1,2) length = self._data.tell() + 1 self._data.seek(pos) return length def size(self): return len(self._data.getvalue()) def write(self, d): self._data.write(d) self.mtime = time.time() def read(self, size=-1): return self._data.read(size) def seek(self, to, whence=0): self._data.seek(to, whence) def seekable(self): return self._data.seekable() def tell(self): return self._data.tell() def flush(self): return self._data.flush() def truncate(self, pos=None): return self._data.truncate(pos) def seekable(self): return self._data.seekable() def __unicode__(self): return self._data.getvalue().decode('utf-8') def __bytes__(self): return self._data.getvalue() def __str__(self): return str(self._data.getvalue()) def close(self): self._line_reader = None self._data.seek(0) def readline(self): return self._data.readline() def readlines(self): for line in self: yield line def __next__(self): line = self.readline() if line: return line else: raise StopIteration next = __next__ # Python 2 iterator interface def __iter__(self): return self def items(self): return []