def parse_code(stream: IO[bytes]) -> Code: size = parse_u32(stream) start_pos = stream.tell() expected_end_pos = start_pos + size locals = parse_vector(parse_locals, stream) num_locals = sum(local.num for local in locals) if num_locals > constants.UINT32_MAX: raise MalformedModule( f"Number of locals exceeds u32: {num_locals} > " f"{constants.UINT32_MAX}" ) expr = parse_expression(stream) end_pos = stream.tell() if end_pos != expected_end_pos: actual_size = end_pos - start_pos raise MalformedModule( f"Declared code size does not match parsed size: Declared={size} " f"Actual={actual_size}" ) local_types = tuple( local.valtype for local in locals for _ in range(local.num) ) return Code(local_types, expr)
def parse_instruction(stream: IO[bytes]) -> Instruction: opcode_byte = stream.read(1) try: opcode_value = opcode_byte[0] except IndexError: raise Exception("TODO: end of stream, what is the right exception here") try: opcode = BinaryOpcode(opcode_value) except ValueError: raise MalformedModule( f"Unknown opcode: {hex(opcode_value)} found at position {stream.tell() - 1}" ) if opcode.is_numeric: return parse_numeric_instruction(opcode, stream) elif opcode.is_variable: return parse_variable_instruction(opcode, stream) elif opcode.is_memory: return parse_memory_instruction(opcode, stream) elif opcode.is_parametric: return parse_parametric_instruction(opcode, stream) elif opcode.is_control: return parse_control_instruction(opcode, stream) else: raise Exception(f"Unhandled opcode: {opcode}")
def parse_null_byte(stream: IO[bytes]) -> None: byte = stream.read(1) if byte == b'\x00': return elif byte: raise MalformedModule(f"TODO: expected 0x00 but got {hex(byte[0])}") else: raise ParseError("Unexpected end of stream")
def parse_table_element_type(stream: IO[bytes]) -> Type[FunctionAddress]: type_flag = parse_single_byte(stream) if type_flag == 0x70: return FunctionAddress else: raise MalformedModule( f"Unrecognized table element type: {hex(type_flag)}")
def parse_mut(stream: IO[bytes]) -> Mutability: byte = parse_single_byte(stream) try: return Mutability.from_byte(byte) except ValueError as err: raise MalformedModule( f"Invalid byte while parsing mut. Got '{hex(byte)}: {str(err)}")
def parse_null_byte(stream: io.BytesIO) -> None: byte = stream.read(1) if byte == b'\x00': return elif byte: raise MalformedModule(f"TODO: expected 0x00 but got {hex(byte[0])}") else: raise Exception("Unexpected end of stream")
def parse_valtype(stream: IO[bytes]) -> ValType: byte = parse_single_byte(stream) try: return ValType.from_byte(byte) except ValueError as err: raise MalformedModule( f"Invalid byte while parsing valtype. Got '{hex(byte)}: {str(err)}" )
def parse_function_type(stream: IO[bytes]) -> FunctionType: flag = parse_single_byte(stream) if flag != 0x60: raise MalformedModule( f"Invalid function type leading byte: {hex(flag)}") params = parse_vector(parse_valtype, stream) results = parse_vector(parse_valtype, stream) return FunctionType(params, results)
def parse_u64(stream: IO[bytes]) -> numpy.uint64: """ Parser for a single unsigned 64-bit integer """ start_pos = stream.tell() value = parse_unsigned_leb128(stream) end_pos = stream.tell() byte_width = end_pos - start_pos if byte_width > 10: # ceil(64 / 7) raise MalformedModule( f"encoded u64 exceeds maximum byte width: {byte_width} > 10") elif 0 <= value < constants.UINT64_CEIL: return numpy.uint64(value) elif value < 0: raise MalformedModule(f"decoded u64 was not positive: {value}") elif value > constants.UINT64_MAX: raise MalformedModule( f"decoded u64 is greater than UINT64_MAX: {value} > 2**64 - 1") else: raise Exception("Invariant")
def parse_u32(stream: IO[bytes]) -> numpy.uint32: """ Parser for a single unsigned 32-bit integer """ start_pos = stream.tell() value = parse_unsigned_leb128(stream) end_pos = stream.tell() byte_width = end_pos - start_pos if byte_width > 5: # ceil(32 / 7) raise MalformedModule( f"encoded u32 exceeds maximum byte width: {byte_width} > 5") elif constants.UINT32_FLOOR <= value < constants.UINT32_CEIL: return numpy.uint32(value) elif value < constants.UINT32_FLOOR: raise MalformedModule(f"decoded uin32 was not positive: {value}") elif value > constants.UINT32_MAX: raise MalformedModule( f"decoded uin32 is greater than UINT32_MAX: {value} > 2**32 - 1") else: raise Exception("Invariant")
def parse_s32(stream: IO[bytes]) -> numpy.int32: """ Parser for a single signed 32-bit integer """ start_pos = stream.tell() value = parse_signed_leb128(stream) end_pos = stream.tell() byte_width = end_pos - start_pos if byte_width > 5: # ceil(32 / 7) raise MalformedModule( f"encoded s32 exceeds maximum byte width: {byte_width} > 5") elif constants.SINT32_MIN <= value < constants.SINT32_CEIL: return numpy.int32(value) elif value < constants.SINT32_MIN: raise MalformedModule( f"decoded s32 is less than SINT32_MIN: {value} < -1 * 2**31") elif value > constants.SINT32_MAX: raise MalformedModule( f"decoded s32 is greater than SINT32_MAX: {value} > 2**31 - 1") else: raise Exception("Invariant")
def parse_u32(stream: io.BytesIO) -> UInt32: start_pos = stream.tell() value = parse_unsigned_leb128(stream) end_pos = stream.tell() byte_width = end_pos - start_pos if byte_width > 5: # ceil(32 / 7) raise MalformedModule( f"encoded u32 exceeds maximum byte width: {byte_width} > 10" ) elif 0 <= value < constants.UINT32_CEIL: return UInt32(value) elif value < 0: raise MalformedModule( f"decoded uin32 was not positive: {value}" ) elif value > constants.UINT32_MAX: raise MalformedModule( f"decoded uin32 is greater than UINT32_MAX: {value} > 2**32 - 1" ) else: raise Exception("Invariant")
def parse_s64(stream: io.BytesIO) -> SInt64: start_pos = stream.tell() value = parse_signed_leb128(stream) end_pos = stream.tell() byte_width = end_pos - start_pos if byte_width > 10: # ceil(64 / 7) raise MalformedModule( f"encoded s64 exceeds maximum byte width: {byte_width} > 10" ) elif constants.SINT64_MIN <= value < constants.SINT64_CEIL: return SInt64(value) elif value < constants.SINT64_MIN: raise MalformedModule( f"decoded s64 is less than SINT64_MIN: {value} < -1 * 2**63" ) elif value > constants.SINT64_MAX: raise MalformedModule( f"decoded s64 is greater than SINT64_MAX: {value} > 2**63 - 1" ) else: raise Exception("Invariant")
def parse_import_descriptor(stream: IO[bytes]) -> TImportDesc: type_flag = parse_single_byte(stream) if type_flag == 0x00: return parse_type_idx(stream) elif type_flag == 0x01: return parse_table_type(stream) elif type_flag == 0x02: return parse_memory_type(stream) elif type_flag == 0x03: return parse_global_type(stream) else: raise MalformedModule( f"Unknown leading byte for import descriptor: {hex(type_flag)}")
def parse_magic(stream: IO[bytes]) -> Tuple[UInt8, UInt8, UInt8, UInt8]: """ https://webassembly.github.io/spec/core/bikeshed/index.html#binary-magic """ actual = ( parse_single_byte(stream), parse_single_byte(stream), parse_single_byte(stream), parse_single_byte(stream), ) if actual != MAGIC: raise MalformedModule( f"Invalid magic start bytes. Got: " f"{tuple(hex(byte) for byte in actual)}" ) return MAGIC
def parse_module(stream: IO[bytes]) -> Module: # `parse_magic` both parses and validates the 4-byte *magic* preamble. # Curretly we simply discard this value. parse_magic(stream) version = parse_version(stream) ( custom_sections, type_section, import_section, function_section, table_section, memory_section, global_section, export_section, start_section, element_segment_section, code_section, data_segment_section, ) = parse_sections(stream) if len(function_section.types) != len(code_section.codes): raise MalformedModule( "Mismatched lengths of function section and code section. " f"function-types[{len(function_section.types)}] != " f"codes[{len(code_section.codes)}]") functions = tuple( Function(type_idx, code.locals, code.expr) for type_idx, code in zip(function_section.types, code_section.codes)) module = Module( version=version, types=type_section.function_types, funcs=functions, tables=table_section.tables, mems=memory_section.mems, globals=global_section.globals, elem=element_segment_section.element_segments, data=data_segment_section.data_segments, start=start_section.start, imports=import_section.imports, exports=export_section.exports, ) return module
def parse_export_descriptor(stream: IO[bytes]) -> TExportDesc: """ Parse the descriptor value for an Export """ flag = parse_single_byte(stream) if flag == 0x00: return parse_function_idx(stream) elif flag == 0x01: return parse_table_idx(stream) elif flag == 0x02: return parse_memory_idx(stream) elif flag == 0x03: return parse_global_idx(stream) else: raise MalformedModule( f"Unregonized byte while parsing export descriptor: {hex(flag)}" )
def parse_magic( stream: IO[bytes] ) -> Tuple[numpy.uint8, numpy.uint8, numpy.uint8, numpy.uint8]: """ Parser for the *magic* 4-byte preamble for a binary encoded Web Assembly module. https://webassembly.github.io/spec/core/bikeshed/index.html#binary-magic """ actual = ( parse_single_byte(stream), parse_single_byte(stream), parse_single_byte(stream), parse_single_byte(stream), ) if actual != MAGIC: raise MalformedModule(f"Invalid magic start bytes. Got: " f"{tuple(hex(byte) for byte in actual)}") return MAGIC