Exemple #1
0
    def testSeekable(self):
        bz2f = BZ2File(BytesIO(self.DATA))
        try:
            self.assertTrue(bz2f.seekable())
            bz2f.read()
            self.assertTrue(bz2f.seekable())
        finally:
            bz2f.close()
        self.assertRaises(ValueError, bz2f.seekable)

        bz2f = BZ2File(BytesIO(), mode="w")
        try:
            self.assertFalse(bz2f.seekable())
        finally:
            bz2f.close()
        self.assertRaises(ValueError, bz2f.seekable)

        src = BytesIO(self.DATA)
        src.seekable = lambda: False
        bz2f = BZ2File(src)
        try:
            self.assertFalse(bz2f.seekable())
        finally:
            bz2f.close()
        self.assertRaises(ValueError, bz2f.seekable)
    def testSeekable(self):
        bz2f = BZ2File(BytesIO(self.DATA))
        try:
            self.assertTrue(bz2f.seekable())
            bz2f.read()
            self.assertTrue(bz2f.seekable())
        finally:
            bz2f.close()
        self.assertRaises(ValueError, bz2f.seekable)

        bz2f = BZ2File(BytesIO(), "w")
        try:
            self.assertFalse(bz2f.seekable())
        finally:
            bz2f.close()
        self.assertRaises(ValueError, bz2f.seekable)

        src = BytesIO(self.DATA)
        src.seekable = lambda: False
        bz2f = BZ2File(src)
        try:
            self.assertFalse(bz2f.seekable())
        finally:
            bz2f.close()
        self.assertRaises(ValueError, bz2f.seekable)
Exemple #3
0
def bdecode(f_or_data):
    """
	bdecodes data by looking up the type byte,
	and using it to look up the respective decoding function,
	which in turn is used to return the decoded object
	
	The parameter can be a file opened in bytes mode,
	bytes or a string (the last of which will be decoded)
	"""
    if isinstance(f_or_data, str):
        f_or_data = f_or_data.encode()
    if isinstance(f_or_data, bytes):
        f_or_data = BytesIO(f_or_data)

    #TODO: the following line is the only one that needs readahead.
    #peek returns a arbitrary amount of bytes, so we have to slice.
    if f_or_data.seekable():
        first_byte = f_or_data.read(1)
        f_or_data.seek(-1, SEEK_CUR)
    else:
        first_byte = f_or_data.peek(1)[:1]
    btype = TYPES.get(first_byte)
    if btype is not None:
        return btype(f_or_data)
    else:  #Used in dicts and lists to designate an end
        assert_btype(f_or_data.read(1), _TYPE_END)
        return None
Exemple #4
0
def prepare_rw_output_stream(output):
    """
    Prepare an output stream that supports both reading and writing.
    Intended to be used for writing & updating signed files:
    when producing a signature, we render the PDF to a byte buffer with
    placeholder values for the signature data, or straight to the provided
    output stream if possible.

    More precisely: this function will return the original output stream
    if it is writable, readable and seekable.
    If the ``output`` parameter is ``None``, not readable or not seekable,
    this function will return a :class:`.BytesIO` instance instead.
    If the ``output`` parameter is not ``None`` and not writable,
    :class:`.IOError` will be raised.

    :param output:
        A writable file-like object, or ``None``.
    :return:
        A file-like object that supports reading, writing and seeking.
    """
    if output is None:
        output = BytesIO()
    else:
        # Rationale for the explicit writability check:
        #  If the output buffer is not readable or not seekable, it's
        #  about to be replaced with a BytesIO instance, and in that
        #  case, the write error would only happen *after* the signing
        #  operations are done. We want to avoid that scenario.
        if not output.writable():
            raise IOError("Output buffer is not writable")  # pragma: nocover
        if not output.seekable() or not output.readable():
            output = BytesIO()

    return output
Exemple #5
0
def bdecode(f_or_data):
    """
    bdecodes data by looking up the type byte,
    and using it to look up the respective decoding function,
    which in turn is used to return the decoded object

    The parameter can be a file opened in bytes mode,
    bytes or a string (the last of which will be decoded)
    """
    if isinstance(f_or_data, str):
        f_or_data = f_or_data.encode()
    if isinstance(f_or_data, bytes):
        f_or_data = BytesIO(f_or_data)

    #TODO: the following line is the only one that needs readahead.
    #peek returns a arbitrary amount of bytes, so we have to slice.
    if f_or_data.seekable():
        first_byte = f_or_data.read(1)
        f_or_data.seek(-1, SEEK_CUR)
    else:
        #FIXME: muted bug!
        first_byte = f_or_data.peek(1)[:1]  # pylint: disable=no-member
    btype = TYPES.get(first_byte)
    if btype is not None:
        return btype(f_or_data)
    else: #Used in dicts and lists to designate an end
        assert_btype(f_or_data.read(1), _TYPE_END)
        return None
Exemple #6
0
    def test_fp_callable_incomplete(self):
        obj = [123, b'something']
        # remove whole of last token (binary data 'something', without its length)
        output = BytesIO(self.bjddumpb(obj)[:-(len(obj[1]) + 1)])
        output.seekable = lambda: False

        with self.assert_raises_regex(DecoderException, 'Insufficient input'):
            self.bjdload(output)
Exemple #7
0
class SrlDocumentReader(object):
    def __init__(self, byte_str):
        super(SrlDocumentReader, self).__init__()
        self.stream = BytesIO(byte_str)
        self.stream.seek(0, os.SEEK_SET)

    def _read_unpack(self, fmt):
        '''
        First get the number of bytes that struct will need to unpack. Then
        read those number of bytes from the io stream. The current io stream
        position will advance by the number of bytes read.
        '''
        plen = struct.calcsize(fmt)
        b = self.stream.read(plen)

        if (plen != len(b)):
            return 0

        value = struct.unpack(fmt, b)[0]
        return value

    def tell(self):
        return self.stream.tell()

    def seek(self, offset, wench=os.SEEK_CUR):
        # seek returns the new absolute position
        if not self.stream.seekable():
            return 0
        return self.stream.seek(offset, wench)

    def read_varint(self):
        shift = 0
        result = 0
        while True:
            i = ord(self.stream.read(1))
            result |= (i & 0x7f) << shift
            shift += 7
            if not (i & 0x80):
                break

        return result

    def read_uint32(self):
        fmt = '<I'
        return self._read_unpack(fmt)

    def read_uint8(self):
        fmt = '<B'
        return self._read_unpack(fmt)

    def read_float(self):
        fmt = '<f'
        return self._read_unpack(fmt)

    def read_str(self, slen):
        fmt = '{0}s'.format(slen)
        val = self._read_unpack(fmt)
        return bytes.decode(val, encoding='utf-8')
Exemple #8
0
class SrlDocumentReader(object):
    def __init__(self, byte_str):
        super(SrlDocumentReader, self).__init__()
        self.stream = BytesIO(byte_str)
        self.stream.seek(0, os.SEEK_SET)

    def _read_unpack(self, fmt):
        '''
        First get the number of bytes that struct will need to unpack. Then
        read those number of bytes from the io stream. The current io stream
        position will advance by the number of bytes read.
        '''
        plen = struct.calcsize(fmt)
        b = self.stream.read(plen)

        if (plen != len(b)):
            return 0

        value = struct.unpack(fmt, b)[0]
        return value

    def tell(self):
        return self.stream.tell()

    def seek(self, offset, wench = os.SEEK_CUR):
        # seek returns the new absolute position
        if not self.stream.seekable():
            return 0
        return self.stream.seek(offset, wench)

    def read_varint(self):
        shift = 0
        result = 0
        while True:
            i = ord(self.stream.read(1))
            result |= (i & 0x7f) << shift
            shift += 7
            if not (i & 0x80):
                break

        return result

    def read_uint32(self):
        fmt = '<I'
        return self._read_unpack(fmt)

    def read_uint8(self):
        fmt = '<B'
        return self._read_unpack(fmt)

    def read_float(self):
        fmt = '<f'
        return self._read_unpack(fmt)

    def read_str(self, slen):
        fmt = '{0}s'.format(slen)
        val = self._read_unpack(fmt)
        return bytes.decode(val, encoding='utf-8')
Exemple #9
0
    def test_fp_multi(self):
        obj = {'a': 123, 'b': b'some raw content'}
        output = BytesIO()
        count = 10

        # Seekable an non-seekable runs
        for _ in range(2):
            output.seek(0)

            for i in range(count):
                obj['c'] = i
                self.bjddump(obj, output)

            output.seek(0)
            for i in range(count):
                obj['c'] = i
                self.assertEqual(self.bjdload(output), obj)

            output.seekable = lambda: False
Exemple #10
0
from io import BytesIO, StringIO

bio = BytesIO()
print(bio.readable(), bio.writable(), bio.seekable())
bio.write(b'magede\nPython')
bio.seek(0)
print(bio.readline())
print(bio.getvalue())
bio.close()

sio = StringIO()
print(sio.readable(), sio.writable(), sio.seekable())
sio.write('magedu\nPython')
sio.seek(0)
print(sio.readline())
print(sio.getvalue())
sio.close()

# 二者都是io模块中的类:在内存中,开辟一个文本或者二进制模式的buffer,可以像文件对象一样操作它,
# 当close方法被调用的时候,这个buffer会被释放
# getvalue()获取全部内容,跟文件指针没有关系
# StringIO的好处:一般来说,磁盘的操作比内存的操作要慢的多,内存足够的情况下,
# 一般的优化思路是少落地,减少磁盘IO的过程,可以大大提高程序的运行效率

# 类文件对象:file-like对象,可以像文件对象一样操作
from sys import stdout

f = stdout
print(type(f))
f.write('magedu.com')  # 控制台输出
Exemple #11
0
class VCRHTTPResponse(HTTPResponse):
    """
    Stub response class that gets returned instead of a HTTPResponse
    """

    def __init__(self, recorded_response):
        self.fp = None
        self.recorded_response = recorded_response
        self.reason = recorded_response["status"]["message"]
        self.status = self.code = recorded_response["status"]["code"]
        self.version = None
        self._content = BytesIO(self.recorded_response["body"]["string"])
        self._closed = False

        headers = self.recorded_response["headers"]
        # Since we are loading a response that has already been serialized, our
        # response is no longer chunked.  That means we don't want any
        # libraries trying to process a chunked response.  By removing the
        # transfer-encoding: chunked header, this should cause the downstream
        # libraries to process this as a non-chunked response.
        te_key = [h for h in headers.keys() if h.upper() == "TRANSFER-ENCODING"]
        if te_key:
            del headers[te_key[0]]
        self.headers = self.msg = parse_headers(headers)

        self.length = compat.get_header(self.msg, "content-length") or None

    @property
    def closed(self):
        # in python3, I can't change the value of self.closed.  So I'
        # twiddling self._closed and using this property to shadow the real
        # self.closed from the superclas
        return self._closed

    def read(self, *args, **kwargs):
        return self._content.read(*args, **kwargs)

    def readall(self):
        return self._content.readall()

    def readinto(self, *args, **kwargs):
        return self._content.readinto(*args, **kwargs)

    def readline(self, *args, **kwargs):
        return self._content.readline(*args, **kwargs)

    def readlines(self, *args, **kwargs):
        return self._content.readlines(*args, **kwargs)

    def seekable(self):
        return self._content.seekable()

    def tell(self):
        return self._content.tell()

    def isatty(self):
        return self._content.isatty()

    def seek(self, *args, **kwargs):
        return self._content.seek(*args, **kwargs)

    def close(self):
        self._closed = True
        return True

    def getcode(self):
        return self.status

    def isclosed(self):
        return self.closed

    def info(self):
        return parse_headers(self.recorded_response["headers"])

    def getheaders(self):
        message = parse_headers(self.recorded_response["headers"])
        return list(compat.get_header_items(message))

    def getheader(self, header, default=None):
        values = [v for (k, v) in self.getheaders() if k.lower() == header.lower()]

        if values:
            return ", ".join(values)
        else:
            return default

    def readable(self):
        return self._content.readable()
Exemple #12
0
class _BaseBinaryWrapper:
    def __init__(self, stream: Union[typing.BinaryIO, bytes] = b""):
        if isinstance(stream, bytes) or isinstance(stream, bytearray):
            self.stream = BytesIO(stream)
        else:
            self.stream = stream

    # Wrappings:
    def close(self) -> None:
        return self.stream.close()

    def flush(self) -> None:
        return self.stream.flush()

    def read(self, n: int = -1) -> AnyStr:
        return self.stream.read(n)

    def readable(self) -> bool:
        return self.stream.readable()

    def readline(self, limit: int = -1) -> AnyStr:
        return self.stream.readline(limit)

    def readlines(self, hint: int = -1) -> List[AnyStr]:
        return self.stream.readlines(hint)

    def write(self, s: Union[bytes, bytearray]) -> int:
        return self.stream.write(s)

    def writable(self) -> bool:
        return self.stream.writable()

    def writelines(self, lines: Iterable[AnyStr]) -> None:
        self.stream.writelines(lines)

    def seek(self, offset: int, whence: int = 0) -> int:
        return self.stream.seek(offset, whence)

    def seekable(self) -> bool:
        return self.stream.seekable()

    def tell(self) -> int:
        return self.stream.tell()

    def fileno(self) -> int:
        return self.stream.fileno()

    def __enter__(self):
        self.stream.__enter__()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.stream.__exit__(exc_type, exc_val, exc_tb)

    # helper functions

    def readall(self):
        self.stream.seek(0)
        return self.stream.read()

    def getvalue(self):
        if isinstance(self.stream, BytesIO):
            return self.stream.getvalue()
        pos = self.stream.tell()
        ret = self.readall()
        self.stream.seek(pos)
        return ret

    def align(self, alignment=4):
        if offset := (self.tell() % alignment):
            self.seek(self.tell() + alignment - offset)
Exemple #13
0
class StreamIO(object):
    stream = None
    endian = None
    labels = {}

    # I/O functions
    read_func = None
    write_func = None

    # attributes
    can_seek = False
    can_tell = False

    def __init__(self, stream=None, endian: Endian = Endian.LITTLE):
        self.reset()
        self.set_stream(stream)
        self.set_endian(endian)
        self.set_io_funcs()

    # reset
    def reset(self) -> None:
        self.stream = None
        self.endian = None
        self.labels = {}
        self.read_func = None
        self.write_func = None
        self.can_seek = False
        self.can_tell = False

    # add with functionality
    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    # shortcuts
    def __int__(self) -> int:
        return self.tell()

    def __len__(self) -> int:
        return self.length()

    def __bytes__(self) -> bytes:
        return self.getvalue()

    def __iadd__(self, other: int) -> None:
        self.seek(self.tell() + other)

    def __isub__(self, other: int) -> None:
        self.seek(self.tell() - other)

    def __imul__(self, other: int) -> None:
        self.seek(self.tell() * other)

    def __ifloordiv__(self, other: int) -> None:
        self.seek(self.tell() // other)

    def __itruediv__(self, other: int) -> None:
        self.seek(self.tell() // other)

    def __getitem__(self, key: int | slice):
        if isinstance(key, slice):
            return self.read_bytes_at(key.start, key.stop - key.start)
        return self.read_byte_at(key)

    def __setitem__(self, key: int | slice,
                    value: int | bytes | bytearray) -> int:
        if isinstance(key, slice):
            return self.write_bytes_at(key.start, value)
        if isinstance(value, bytes) or isinstance(value, bytearray):
            if len(value) > 1:
                return self.write_bytes_at(key, value)
            else:
                return self.write_byte_at(key, value[0])
        else:
            return self.write_byte_at(key, value)

    # virtual file pointer
    @property
    def offset(self) -> int:
        return self.tell()

    @offset.setter
    def offset(self, value: int) -> None:
        self.seek(value)

    # utilities
    def set_stream(self, stream) -> None:
        """
		Set stream to read/write from/to
		:param stream: The stream to interact with
		:return: None
		"""
        if stream is None:
            self.stream = BytesIO()
        elif type(stream) in [bytes, bytearray, memoryview]:
            self.stream = BytesIO(stream)
        elif type(stream) == str:
            if isfile(stream):
                self.stream = open(stream, "r+b")
            else:
                self.stream = open(stream, "wb")
        else:
            self.stream = stream
        self.can_seek = self.stream.seekable()
        self.can_tell = self.stream.seekable()

    def set_endian(self, endian: Endian) -> None:
        """
		Set the endian you want to use for reading/writing data in the stream
		:param endian: LITTLE, BIG, NETWORK, or NATIVE
		:return: None
		"""
        endian = int(endian)
        endians = ["<", ">", "!", "@"]
        if endian in range(0, len(endians)):
            self.endian = endians[endian]

    def set_read_func(self, name: str) -> None:  #, *param_types):
        """
		Set the function name in the stream of the read function
		:param name: The name of the read function
		:return: None
		"""
        if hasattr(self.stream, name):
            self.read_func = getattr(self.stream, name)

    def set_write_func(self, name: str) -> None:  #, *param_types):
        """
		Set the function name in the stream of the write function
		:param name: The name of the write function
		:return: None
		"""
        if hasattr(self.stream, name):
            self.write_func = getattr(self.stream, name)

    def set_io_funcs(self,
                     read_name: str = "read",
                     write_name: str = "write") -> None:
        """
		Set the read/write function names in the stream
		:param read_name: The name of the read function
		:param write_name: The name of the write function
		:return: None
		"""
        self.set_read_func(read_name)
        self.set_write_func(write_name)

    def tell(self) -> int:
        """
		Tell the current position of the stream if supported
		:return: The position of the stream
		"""
        if self.can_tell:
            return self.stream.tell()
        raise NotImplementedError(
            "tell isn't implemented in the specified stream!")

    def seek(self, index: int, whence: int = SEEK_SET) -> int:
        """
		Jump to a position in the stream if supported
		:param index: The offset to jump to
		:param whence: Index is interpreted relative to the position indicated by whence (SEEK_SET, SEEK_CUR, and SEEK_END in io library)
		:return: The new absolute position
		"""
        if self.can_seek:
            return self.stream.seek(index, whence)
        raise NotImplementedError(
            "seek isn't implemented in the specified stream!")

    def seek_start(self) -> int:
        """
		Jump to the beginning of the stream if supported
		:return: The new absolute position
		"""
        return self.stream.seek(0)

    def seek_end(self) -> int:
        """
		Jump to the end of the stream if supported
		:return: The new absolute position
		"""
        return self.stream.seek(0, SEEK_END)

    def length(self) -> int:
        """
		Get the length of the stream if supported
		:return: The total length of the stream
		"""
        loc = self.tell()
        self.seek_end()
        size = self.tell()
        self.seek(loc)
        return size

    def getvalue(self) -> bytes | bytearray:
        """
		Get the stream's output
		:return: The stream's data as bytes or bytearray
		"""
        return self.stream.getvalue()

    def getbuffer(self) -> bytes | bytearray:
        """
		Get the stream's buffer
		:return: The stream's buffer as bytes or bytearray
		"""
        return self.stream.getbuffer()

    def flush(self) -> None:
        """
		Write the data to the stream
		:return: None
		"""
        return self.stream.flush()

    def close(self) -> None:
        """
		Close the stream
		:return: None
		"""
        self.stream.close()

    # labeling
    def get_labels(self) -> list:
        return list(self.labels.keys())

    def label_exists(self, name: str) -> bool:
        return name in self.get_labels()

    def get_label(self, name: str) -> int:
        return self.labels[name]

    def set_label(self,
                  name: str,
                  offset: int = None,
                  overwrite: bool = True) -> int:
        if not overwrite and self.label_exists(name):
            name += ("_" + rand_str(4))
        if offset is not None and offset >= 0:
            loc = offset
        else:
            loc = self.tell()
        self.labels[name] = loc
        return loc

    def rename_label(self,
                     old_name: str,
                     new_name: str,
                     overwrite: bool = True) -> bool:
        assert old_name != new_name, "Old and new label names shouldn't be the same"

        if self.label_exists(old_name):
            value = self.get_label(old_name)
            self.del_label(old_name)
            self.set_label(new_name, value, overwrite)
        return False

    def goto_label(self, name: str) -> int:
        return self.seek(self.labels[name])

    def del_label(self, name: str) -> int:
        return self.labels.pop(name)

    # base I/O methods
    def read(self, num: int = None) -> bytes | bytearray:
        if num is None:
            return self.read_func()
        return self.read_func(num)

    def write(self, data: bytes | bytearray | int) -> int:
        if type(data) == int:
            data = bytes([data])
        return self.write_func(data)

    def stream_unpack(self, fmt: str) -> tuple | list:
        fmt = f"{self.endian}{fmt}"
        return unpack(fmt, self.read(calcsize(fmt)))

    def stream_pack(self, fmt: str, *values) -> int:
        fmt = f"{self.endian}{fmt}"
        return self.write(pack(fmt, *values))

    def stream_unpack_array(self, t: str, num: int) -> tuple | list:
        fmt = f"{self.endian}{num}{t}"
        return unpack(fmt, self.read(calcsize(fmt)))

    def stream_pack_array(self, t: str, *values) -> int:
        fmt = f"{self.endian}{len(values)}{t}"
        return self.write(pack(fmt, *values))

    # bytes
    def read_sbyte(self) -> int:
        (val, ) = self.stream_unpack("b")
        return val

    def read_sbyte_at(self, offset: int, ret: bool = True) -> int:
        loc = self.tell()
        self.seek(offset)
        output = self.read_sbyte()
        if ret:
            self.seek(loc)
        return output

    def read_sbytes(self, num: int) -> Tuple[int] | List[int]:
        return self.stream_unpack_array("b", num)

    def read_sbytes_at(self,
                       offset: int,
                       num: int,
                       ret: bool = True) -> Tuple[int] | List[int]:
        loc = self.tell()
        self.seek(offset)
        output = self.read_sbytes(num)
        if ret:
            self.seek(loc)
        return output

    def write_sbyte(self, value: int) -> int:
        return self.stream_pack("b", value)

    def write_sbyte_at(self, offset: int, value: int, ret: bool = True) -> int:
        loc = self.tell()
        self.seek(offset)
        output = self.write_sbyte(value)
        if ret:
            self.seek(loc)
        return output

    def write_sbytes(self, values: bytes | bytearray) -> int:
        return self.stream_pack_array("b", *values)

    def write_sbytes_at(self,
                        offset: int,
                        values: bytes | bytearray,
                        ret: bool = True) -> int:
        loc = self.tell()
        self.seek(offset)
        output = self.write_sbytes(values)
        if ret:
            self.seek(loc)
        return output

    # bytes
    def read_byte(self) -> int:
        (val, ) = self.stream_unpack("B")
        return val

    read_ubyte = read_byte

    def read_byte_at(self, offset: int, ret: bool = True) -> int:
        loc = self.tell()
        self.seek(offset)
        output = self.read_byte()
        if ret:
            self.seek(loc)
        return output

    read_bytes = read
    read_ubytes = read

    def read_bytes_at(self, offset: int, num: int, ret: bool = True) -> bytes:
        loc = self.tell()
        self.seek(offset)
        output = self.read_bytes(num)
        if ret:
            self.seek(loc)
        return output

    read_ubytes_at = read_bytes_at

    def write_byte(self, value: int):
        return self.stream_pack("B", value)

    write_ubyte = write_byte

    def write_byte_at(self, offset: int, value: int, ret: bool = True) -> int:
        loc = self.tell()
        self.seek(offset)
        output = self.write_byte(value)
        if ret:
            self.seek(loc)
        return output

    write_bytes = write
    write_ubyte_at = write_byte_at

    def write_bytes_at(self,
                       offset: int,
                       values: bytes | bytearray,
                       ret: bool = True) -> int:
        loc = self.tell()
        self.seek(offset)
        output = self.write_bytes(values)
        if ret:
            self.seek(loc)
        return output

    write_ubytes_at = write_bytes_at

    def load_from_buffer(self, data: bytes | bytearray) -> int:
        return self.write_bytes(data)

    # boolean
    def read_bool(self) -> bool:
        (val, ) = self.stream_unpack("?")
        return val

    def read_bool_array(self, num: int) -> Tuple[bool]:
        return self.stream_unpack_array("?", num)

    def write_bool(self, value: bool) -> int:
        return self.stream_pack("?", value)

    def write_bool_array(self, values: List[bool] | Tuple[bool]) -> int:
        return self.stream_pack_array("?", *values)

    # int16/short
    def read_int16(self) -> int:
        (val, ) = self.stream_unpack("h")
        return val

    read_short = read_int16

    def read_int16_array(self, num: int) -> Tuple[int]:
        return self.stream_unpack_array("h", num)

    read_short_array = read_int16_array

    def write_int16(self, value: int) -> int:
        return self.stream_pack("h", value)

    write_short = write_int16

    def write_int16_array(self, values: List[int] | Tuple[int]) -> int:
        return self.stream_pack_array("h", *values)

    write_short_array = write_int16_array

    # uint16/ushort
    def read_uint16(self) -> int:
        (val, ) = self.stream_unpack("H")
        return val

    read_ushort = read_uint16

    def read_uint16_array(self, num: int) -> Tuple[int]:
        return self.stream_unpack_array("H", num)

    read_ushort_array = read_uint16_array

    def write_uint16(self, value: int) -> int:
        return self.stream_pack("H", value)

    write_ushort = write_uint16

    def write_uint16_array(self, values: List[int] | Tuple[int]) -> int:
        return self.stream_pack_array("H", *values)

    write_ushort_array = write_uint16_array

    # int32/int/long
    def read_int32(self) -> int:
        (val, ) = self.stream_unpack("i")
        return val

    read_int = read_int32
    read_long = read_int32

    def read_int32_array(self, num: int) -> Tuple[int]:
        return self.stream_unpack_array("i", num)

    read_int_array = read_int32_array
    read_long_array = read_int32_array

    def write_int32(self, value: int) -> int:
        return self.stream_pack("i", value)

    write_int = write_int32
    write_long = write_int32

    def write_int32_array(self, values: List[int] | Tuple[int]) -> int:
        return self.stream_pack_array("i", *values)

    write_int_array = write_int32_array
    write_long_array = write_int32_array

    # uint32/uint/ulong
    def read_uint32(self) -> int:
        (val, ) = self.stream_unpack("I")
        return val

    read_uint = read_uint32
    read_ulong = read_uint32

    def read_uint32_array(self, num: int) -> Tuple[int]:
        return self.stream_unpack_array("I", num)

    read_uint_array = read_uint32_array
    read_ulong_array = read_uint32_array

    def write_uint32(self, value: int) -> int:
        return self.stream_pack("I", value)

    write_uint = write_uint32
    write_ulong = write_uint32

    def write_uint32_array(self, values: List[int] | Tuple[int]) -> int:
        return self.stream_pack_array("I", *values)

    write_uint_array = write_uint32_array
    write_ulong_array = write_uint32_array

    # int64/longlong
    def read_int64(self) -> int:
        return self.stream_unpack("q")[0]

    read_longlong = read_int64

    def read_int64_array(self, num: int) -> Tuple[int]:
        return self.stream_unpack_array("q", num)

    read_longlong_array = read_int64_array

    def write_int64(self, value: int) -> int:
        return self.stream_pack("q", value)

    write_longlong = write_int64

    def write_int64_array(self, values: List[int] | Tuple[int]) -> int:
        return self.stream_pack_array("q", *values)

    write_longlong_array = write_int64_array

    # uint64/ulonglong
    def read_uint64(self) -> int:
        (val, ) = self.stream_unpack("Q")
        return val

    read_ulonglong = read_uint64

    def read_uint64_array(self, num: int) -> Tuple[int]:
        return self.stream_unpack_array("Q", num)

    read_ulonglong_array = read_uint64_array

    def write_uint64(self, value: int) -> int:
        return self.stream_pack("Q", value)

    write_ulonglong = write_uint64

    def write_uint64_array(self, values: List[int] | Tuple[int]) -> int:
        return self.stream_pack_array("Q", *values)

    write_ulonglong_array = write_uint64_array

    # float32/single
    def read_float32(self) -> float:
        (val, ) = self.stream_unpack("f")
        return val

    read_single = read_float32

    def read_float32_array(self, num: int) -> Tuple[float]:
        return self.stream_unpack_array("f", num)

    read_single_array = read_float32_array

    def write_float32(self, value: float) -> float:
        return self.stream_pack("f", value)

    write_single = write_float32

    def write_float32_array(self, values: List[float] | Tuple[float]) -> int:
        return self.stream_pack_array("f", *values)

    write_single_array = write_float32_array

    # float64/double
    def read_float64(self) -> float:
        (val, ) = self.stream_unpack("d")
        return val

    read_double = read_float64

    def read_float64_array(self, num: int) -> Tuple[float]:
        return self.stream_unpack_array("d", num)

    read_double_array = read_float64_array

    def write_float64(self, value: float) -> float:
        return self.stream_pack("d", value)

    write_double = write_float64

    def write_float64_array(self, values: List[float] | Tuple[float]) -> int:
        return self.stream_pack_array("d", *values)

    write_double_array = write_float64_array

    # varint
    def read_varint(self) -> int:
        shift = 0
        result = 0
        while True:
            i = self.read_byte()
            result |= (i & 0x7f) << shift
            shift += 7
            if not (i & 0x80):
                break
        return result

    def read_varint_array(self, num: int) -> Tuple[int]:
        return tuple([self.read_varint() for i in range(num)])

    def write_varint(self, num: int) -> int:
        buff = b""
        while True:
            towrite = num & 0x7f
            num >>= 7
            if num:
                buff += bytes([(towrite | 0x80)])
            else:
                buff += bytes([towrite])
                break
        return self.write_bytes(buff)

    def write_varint_array(self, values: List[int] | Tuple[int]) -> int:
        return sum([self.write_varint(x) for x in values])

    # strings
    def read_int7(self) -> int:
        index = 0
        result = 0
        while True:
            byte_value = self.read_byte()
            result |= (byte_value & 0x7F) << (7 * index)
            if byte_value & 0x80 == 0:
                break
            index += 1
        return result

    def read_int7_array(self, num: int) -> Tuple[int]:
        return tuple([self.read_int7() for i in range(num)])

    def write_int7(self, value: int) -> int:
        data = b""
        num = value
        while num >= 0x80:
            data += bytes([((num | 0x80) & 0xFF)])
            num >>= 7
        data += bytes([num & 0xFF])
        return self.write(data)

    def write_int7_array(self, values: List[int] | Tuple[int]) -> int:
        return sum([self.write_int7(x) for x in values])

    def read_string(self, encoding: str = "UTF8") -> str:
        str_size = self.read_int7()
        if str_size <= 0:
            return ""
        return self.read(str_size).decode(encoding)

    def read_c_string(self, encoding: str = "UTF8") -> str:
        output = b""
        while (tmp := self.read(1)) != b"\x00":
            output += tmp
        return output.rstrip(b"\x00").decode(encoding)
Exemple #14
0
class VerifiableStream(BinaryIO):
    """A binary stream whose contents can be verified to not have changed.

    The stream does not accept a HMAC key, but generates it randomly as a nonce. While unusual,
    this is intentional -- these streams are meant to be used as part of model serialization,
    where their nonces and HMAC codes are stored in a cryptographically signed metadata file.
    In other words, the HMAC simply ensures that stream's data has not changed, and does not
    guarantee the data's origin -- that's the metadata signature's job.

    The stream is meant to be used in the following sequence:
        - instantiate the stream
        - write all data to the stream (the stream is not readable yet!)
        - call "finalize()" on the stream, saving the returned nonce and HMAC code
        - read data from the stream (the stream is not writable any more!)
    """
    def __init__(self):
        """Create a new VerifiableStream with a random nonce."""
        self._finalized = False
        self._random_nonce = os.urandom(
            16)  # this is bytes, be careful trying to add strings to it
        self._underlying_stream = BytesIO()
        self._hmac_state = hmac.new(self._random_nonce, digestmod=HASHER)

    def _ensure_finalized(self):
        """Raise an error if the stream has not already been finalized."""
        if not self._finalized:
            raise AssertionError(
                "Expected the stream to be finalized, but it was not!")

    def _ensure_not_finalized(self):
        """Raise an error if the stream has already been finalized."""
        if self._finalized:
            raise AssertionError(
                "Expected the stream to not be finalized, but it was!")

    def finalize(self):
        """Calculate the HMAC code for the stream, disable writing and enable reading.

        Returns:
            tuple (nonce, HMAC code)  (both of type string)
        """
        self._ensure_not_finalized()

        self._finalized = True

        nonce_string = _convert_base64_bytes_to_string(self._random_nonce)
        hmac_string = _convert_base64_bytes_to_string(
            self._hmac_state.digest())

        return nonce_string, hmac_string

    # methods for writing require that the stream not be finalized
    def writable(self) -> bool:
        """Return True if the stream is writable, and False otherwise."""
        if self._finalized:
            return False
        else:
            return self._underlying_stream.writable()

    @validate(b=bytes)
    def write(self, b: bytes) -> int:
        """Write the given binary data to the stream, and include it in the HMAC calculation."""
        self._ensure_not_finalized()
        num_bytes = self._underlying_stream.write(b)
        self._hmac_state.update(b)
        return num_bytes

    def writelines(self, lines: Iterable[bytes]) -> None:
        """Write lines to a stream"""
        self._ensure_not_finalized(
        )  # technically done by `write` but doesn't hurt to be safe
        for line in lines:
            self.write(line)
        return None

    # methods for reading require that the stream is finalized
    def readable(self) -> bool:
        """Return True if the stream is readable, and False otherwise."""
        if self._finalized:
            return self._underlying_stream.readable()
        else:
            return False

    def read(self, size=None) -> bytes:
        """Read bytes from stream"""
        self._ensure_finalized()
        return self._underlying_stream.read(size)

    def readall(self) -> bytes:
        """Read lines from stream"""
        raise NotImplementedError(
            "`VerifiablStream` does not implement `readall` since the underlying BtytesIO does not "
            "implement it.")

    def readline(self, size=None) -> bytes:
        """Read a line from stream"""
        self._ensure_finalized()
        return self._underlying_stream.readline(size)

    def readlines(self, size=None) -> List[bytes]:
        """Read lines from stream"""
        self._ensure_finalized()
        return self._underlying_stream.readlines(size)

    def read1(self, size) -> bytes:
        """Read bytes from stream"""
        self._ensure_finalized()
        return self._underlying_stream.read1(size)

    def readinto(self, b) -> Optional[int]:
        """Read bytes into another buffer"""
        self._ensure_finalized()
        return self._underlying_stream.readinto(b)

    def readinto1(self, b) -> Optional[int]:
        """Read bytes into another buffer"""
        self._ensure_finalized()
        return self._underlying_stream.readinto1(b)

    # seeking requires a finalized stream
    def seekable(self):
        """Return True if the read pointer in the stream can be moved, and False otherwise."""
        if self._finalized:
            return self._underlying_stream.seekable()
        else:
            return False

    def seek(self, *args, **kwargs) -> int:
        """Seek to a new position. Return the new position"""
        self._ensure_finalized()
        return self._underlying_stream.seek(*args, **kwargs)

    def truncate(self, size: Optional[int] = ...) -> None:
        """Truncate the stream"""
        raise NotImplementedError(
            "`VerifiableStream` does not support truncation. It is too "
            "complicated to keep track of the hmac digests")

    def close(self):
        """Close the stream, discarding its data. Will raise an error if not finalized yet."""
        if self._finalized:
            return self._underlying_stream.close()
        else:
            raise AssertionError(
                "Attempting to close an unfinalized VerifiableStream. This is "
                "almost certainly a bug.")

    # a bunch of attributes/methods that are always accessible
    def isatty(self) -> bool:
        """Determine whether this is a terminal"""
        return self._underlying_stream.isatty()

    @property
    def closed(self) -> bool:
        """Determine whether the stream is closed"""
        return self._underlying_stream.closed

    def fileno(self) -> int:
        """Return the underlying file descriptor"""
        # this will technically raise UnsuportedOperation, but better to let BytesIO do that
        return self._underlying_stream.fileno()

    def mode(self) -> str:
        """Return the underlying file descriptor"""
        # this doesn't exist for the underlying stream
        raise AssertionError(
            "`VerifiableStream` does not have a mode. This is probably a bug in "
            "something assuming that the stream is a backed by a file")

    def name(self) -> str:
        """Return the underlying file descriptor"""
        # this doesn't exist for the underlying stream
        raise AssertionError(
            "`VerifiableStream` does not have a name. This is probably a bug in "
            "something assuming the stream is a file descriptor")

    def flush(self) -> None:
        """Flush the underlying stream"""
        # this technically does nothing in BytesIO
        return self._underlying_stream.flush()

    def tell(self) -> int:
        """Tell the current position"""
        return self._underlying_stream.tell()

    # context manager methods
    def __enter__(self) -> "VerifiableStream":
        """Enter"""
        return self

    def __exit__(
        self,
        exc_type: Optional[Type[BaseException]],
        exc_val: Optional[BaseException],
        exc_tb: Optional[TracebackType],
    ) -> bool:
        """Exit"""
        return self._underlying_stream.__exit__(exc_type, exc_val, exc_tb)
Exemple #15
0
class MemoryFile(object):

    kind = NodeKind.FILE

    FILE_LOCKS = defaultdict(threading.Lock)

    def __init__(self, path):
        self.path = path
        self._data = BytesIO()
        self._line_reader = None
        self.mtime = self.ctime = time.time()
        self.mode = 0
        self.lock = self.FILE_LOCKS[self.path]


    def reset(self):
        self._data = BytesIO()
        self._line_reader = None
        self.mtime = self.ctime = time.time()
        self.mode = 0
        self.lock = self.FILE_LOCKS[self.path]


    def __len__(self):
        pos = self._data.tell()
        self._data.seek(-1,2)
        length = self._data.tell() + 1
        self._data.seek(pos)
        return length


    def size(self):
        return len(self._data.getvalue())


    def write(self, d):
        self._data.write(d)
        self.mtime = time.time()


    def read(self, size=-1):
        return self._data.read(size)


    def seek(self, to, whence=0):
        self._data.seek(to, whence)


    def seekable(self):
        return self._data.seekable()


    def tell(self):
        return self._data.tell()


    def flush(self):
        return self._data.flush()


    def truncate(self, pos=None):
        return self._data.truncate(pos)


    def seekable(self):
        return self._data.seekable()


    def __unicode__(self):
        return self._data.getvalue().decode('utf-8')


    def __bytes__(self):
        return self._data.getvalue()


    def __str__(self):
        return str(self._data.getvalue())


    def close(self):
        self._line_reader = None
        self._data.seek(0)


    def readline(self):
        return self._data.readline()


    def readlines(self):
        for line in self:
            yield line


    def __next__(self):
        line = self.readline()
        if line:
            return line
        else:
            raise StopIteration


    next = __next__ # Python 2 iterator interface


    def __iter__(self):
        return self


    def items(self):
        return []