def test_proto_import_examples(): config = Config() # try importing all the examples pulled from protobuf repo protofiles = glob.glob("tests/payloads/test_protos/*.proto") os.system("pwd") assert len(protofiles) != 0 for target_file in protofiles: print("Testing file: %s" % target_file) typedef_map_out = protofile.import_proto(config, input_filename=target_file) config.known_types = typedef_map_out for name, typedef in typedef_map_out.items(): logging.debug("known messages: %s" % config.known_types) blackboxprotobuf.lib.validate_typedef(typedef, config=config)
def test_message_inverse(x): config = Config() typedef, message = x encoded = length_delim.encode_lendelim_message(message, config, typedef) decoded, typedef_out, pos = length_delim.decode_lendelim_message( encoded, config, typedef, 0) note(encoded) note(typedef) note(typedef_out) assert isinstance(encoded, bytearray) assert isinstance(decoded, dict) assert pos == len(encoded) assert message == decoded
def test_message_json_inverse(x): config = Config() typedef, message = x encoded = length_delim.encode_message(message, config, typedef) decoded_json, typedef_out = blackboxprotobuf.protobuf_to_json( encoded, config=config, message_type=typedef) encoded_json = blackboxprotobuf.protobuf_from_json( decoded_json, config=config, message_type=typedef_out) decoded, typedef_out = blackboxprotobuf.decode_message( encoded_json, config=config, message_type=typedef) assert isinstance(encoded, bytearray) assert isinstance(decoded, dict) assert message == decoded
def test_bytes_guess_inverse(x): config = Config() # wrap the message in a new message so that it's a guess inside wrapper_typedef = {"1": {"type": "bytes"}} wrapper_message = {"1": x} encoded = length_delim.encode_lendelim_message(wrapper_message, config, wrapper_typedef) value, typedef, pos = length_delim.decode_lendelim_message( encoded, config, {}) # would like to fail if it guesses wrong, but sometimes it might parse as a message assume(typedef["1"]["type"] == "bytes") assert isinstance(encoded, bytearray) assert isinstance(value["1"], bytearray) assert pos == len(encoded) assert value["1"] == x
def test_message_guess_inverse(x): config = Config() type_def, message = x # wrap the message in a new message so that it's a guess inside wrapper_typedef = {"1": {"type": "message", "message_typedef": type_def}} wrapper_message = {"1": message} encoded = length_delim.encode_lendelim_message(wrapper_message, config, wrapper_typedef) note("Encoded length %d" % len(encoded)) value, decoded_type, pos = length_delim.decode_lendelim_message( encoded, config, {}) note(value) assert decoded_type["1"]["type"] == "message" assert isinstance(encoded, bytearray) assert isinstance(value, dict) assert isinstance(value["1"], dict) assert pos == len(encoded)
def test_proto_export_inverse(tmp_path, x, name): """Generate a proto file and try to re-import it. This does not cover all possible proto files we want to try importing""" config = Config() typedef, message = x with tempfile.NamedTemporaryFile(mode="r+", dir=str(tmp_path), suffix=".proto", delete=True) as outfile: typedef_map = {name: typedef} protofile.export_proto(typedef_map, output_file=outfile) outfile.flush() outfile.seek(0) new_typedef_map = protofile.import_proto(config, input_file=outfile) config.known_types.update(new_typedef_map) # validate for name, typedef in new_typedef_map.items(): blackboxprotobuf.validate_typedef(typedef, config=config) def _check_field_types(typedef1, typedef2): for field_num in typedef1.keys(): # make sure we don't drop keys assert field_num in typedef2 assert typedef1[field_num]["type"] == typedef2[field_num][ "type"] if typedef1[field_num]["type"] == "message": message_typedef1 = None message_typedef2 = None if "message_typedef" in typedef1[field_num]: message_typedef1 = typedef1[field_num][ "message_typedef"] elif "message_type_name" in typedef1[field_num]: assert typedef1[field_num][ "message_type_name"] in typedef_map message_typedef1 = typedef_map[typedef1[field_num] ["message_type_name"]] if "message_typedef" in typedef2[field_num]: message_typedef2 = typedef2[field_num][ "message_typedef"] elif "message_type_name" in typedef2[field_num]: assert (typedef2[field_num]["message_type_name"] in new_typedef_map) message_typedef2 = new_typedef_map[ typedef2[field_num]["message_type_name"]] _check_field_types(message_typedef1, message_typedef2) note(typedef_map) note(new_typedef_map) for name, typedef in typedef_map.items(): _check_field_types(typedef, new_typedef_map[name]) note(new_typedef_map[name]) # try to actually encode a message with the typedef encode_forward = length_delim.encode_message(message, config, typedef_map[name]) config.known_types = new_typedef_map encode_backward = length_delim.encode_message(message, config, new_typedef_map[name]) decode_forward, _, _ = length_delim.decode_message( encode_forward, config, new_typedef_map[name]) decode_backward, _, _ = length_delim.decode_message( encode_backward, config, typedef_map[name])
def test_proto_decode(tmp_path, x, name): config = Config() typedef, message = x """ Export to protobuf and try to decoe a message we encodedd with it """ with tempfile.NamedTemporaryFile(mode="w", dir=str(tmp_path), suffix=".proto", delete=True) as outfile: typedef_map = {name: typedef} encoded_message = length_delim.encode_message(message, config, typedef) note(typedef_map) basename = os.path.basename(outfile.name) # Export the protobuf file and compile it protofile.export_proto(typedef_map, output_file=outfile, package=basename[:-6]) py_out = str(tmp_path / "py_out") if os.path.exists(py_out): shutil.rmtree(py_out) os.mkdir(py_out) outfile.flush() subprocess.check_call( "/usr/bin/protoc --python_out ./py_out %s" % basename, shell=True, cwd=str(tmp_path), ) # Try to import the file sys.path.insert(0, str(tmp_path) + "/py_out/") # Trim off .proto try: proto_module = __import__(basename[:-6] + "_pb2") del sys.path[0] except SyntaxError: logging.debug("Caught syntax error in protoc import") return message_class = getattr(proto_module, name) note(encoded_message) my_message = message_class() my_message.ParseFromString(encoded_message) decoded_message = google.protobuf.json_format.MessageToDict( my_message, including_default_value_fields=True) note(message) note(decoded_message) note( google.protobuf.json_format.MessageToJson( my_message, including_default_value_fields=True)) def _check_field_match(orig_value, new_value): note(type(new_value)) note(type(orig_value)) if isinstance(orig_value, six.integer_types) and isinstance( new_value, str): assert str(orig_value) == new_value elif isinstance(orig_value, bytes): assert orig_value == base64.b64decode(new_value) elif isinstance(new_value, dict): _check_message_match(orig_value, new_value) elif isinstance(orig_value, float): # normalize floats if isinstance(new_value, str): if "Infinity" in new_value: assert math.isinf(orig_value) else: assert new_value == "NaN" assert math.isnan(new_value) else: # pack and unpack floats to try and normalize them try: orig_value_packed = struct.pack("<f", orig_value) (orig_value, ) = struct.unpack("<f", orig_value_packed) new_value_packed = struct.pack("<f", orig_value) (new_value, ) = struct.unpack("<f", orig_value_packed) assert orig_value == new_value except OverflowError: orig_value_packed = struct.pack("<d", orig_value) (orig_value, ) = struct.unpack("<d", orig_value_packed) new_value_packed = struct.pack("<d", new_value) (new_value, ) = struct.unpack("<d", new_value_packed) assert orig_value == new_value else: assert orig_value == new_value def _check_message_match(message_orig, message_new): for field_key, field_value in message_new.items(): if field_key.startswith("field"): field_key = field_key[5:] orig_value = message_orig[field_key] if isinstance(field_value, list): if not isinstance(orig_value, list): orig_value = [orig_value] assert len(orig_value) == len(field_value) for orig_value, new_value in zip(orig_value, field_value): _check_field_match(orig_value, new_value) else: _check_field_match(orig_value, field_value) # Check all the fields match each other _check_message_match(message, decoded_message)
def test_anon_decode(x): config = Config() typedef, message = x encoded = length_delim.encode_lendelim_message(message, config, typedef) decoded, typedef_out, pos = length_delim.decode_lendelim_message( encoded, config, {}, 0) note("Original message: %r" % message) note("Decoded message: %r" % decoded) note("Original typedef: %r" % typedef) note("Decoded typedef: %r" % typedef_out) def check_message(orig, orig_typedef, new, new_typedef): for field_number in set(orig.keys()) | set(new.keys()): # verify all fields are there assert field_number in orig assert field_number in orig_typedef assert field_number in new assert field_number in new_typedef orig_values = orig[field_number] new_values = new[field_number] orig_type = orig_typedef[field_number]["type"] new_type = new_typedef[field_number]["type"] note("Parsing field# %s" % field_number) note("orig_values: %r" % orig_values) note("new_values: %r" % new_values) note("orig_type: %s" % orig_type) note("new_type: %s" % new_type) # Fields might be lists. Just convert everything to a list if not isinstance(orig_values, list): orig_values = [orig_values] assert not isinstance(new_values, list) new_values = [new_values] # if the types don't match, then try to convert them if new_type == "message" and orig_type in ["bytes", "string"]: # if the type is a message, we want to convert the orig type to a message # this isn't ideal, we'll be using the unintended type, but # best way to compare. Re-encoding a message to binary might # not keep the field order new_field_typedef = new_typedef[field_number][ "message_typedef"] for i, orig_value in enumerate(orig_values): if orig_type == "bytes": ( orig_values[i], orig_field_typedef, _, ) = length_delim.decode_lendelim_message( length_delim.encode_bytes(orig_value), config, new_field_typedef, ) else: # string value ( orig_values[i], orig_field_typedef, _, ) = length_delim.decode_lendelim_message( length_delim.encode_string(orig_value), config, new_field_typedef, ) orig_typedef[field_number][ "message_typedef"] = orig_field_typedef orig_type = "message" if new_type == "string" and orig_type == "bytes": # our bytes were accidently valid string new_type = "bytes" for i, new_value in enumerate(new_values): new_values[i], _ = length_delim.decode_bytes( length_delim.encode_string(new_value), 0) # sort the lists with special handling for dicts orig_values.sort( key=lambda x: x if not isinstance(x, dict) else x.items()) new_values.sort( key=lambda x: x if not isinstance(x, dict) else x.items()) for orig_value, new_value in zip(orig_values, new_values): if orig_type == "message": check_message( orig_value, orig_typedef[field_number]["message_typedef"], new_value, new_typedef[field_number]["message_typedef"], ) else: assert orig_value == new_value check_message(message, typedef, decoded, typedef_out)