예제 #1
0
def test_proto_import_examples():
    config = Config()
    # try importing all the examples pulled from protobuf repo
    protofiles = glob.glob("tests/payloads/test_protos/*.proto")
    os.system("pwd")
    assert len(protofiles) != 0
    for target_file in protofiles:
        print("Testing file: %s" % target_file)
        typedef_map_out = protofile.import_proto(config,
                                                 input_filename=target_file)
        config.known_types = typedef_map_out
        for name, typedef in typedef_map_out.items():
            logging.debug("known messages: %s" % config.known_types)
            blackboxprotobuf.lib.validate_typedef(typedef, config=config)
예제 #2
0
def test_message_inverse(x):
    config = Config()
    typedef, message = x
    encoded = length_delim.encode_lendelim_message(message, config, typedef)
    decoded, typedef_out, pos = length_delim.decode_lendelim_message(
        encoded, config, typedef, 0)
    note(encoded)
    note(typedef)
    note(typedef_out)
    assert isinstance(encoded, bytearray)
    assert isinstance(decoded, dict)
    assert pos == len(encoded)
    assert message == decoded
예제 #3
0
def test_message_json_inverse(x):
    config = Config()
    typedef, message = x
    encoded = length_delim.encode_message(message, config, typedef)
    decoded_json, typedef_out = blackboxprotobuf.protobuf_to_json(
        encoded, config=config, message_type=typedef)
    encoded_json = blackboxprotobuf.protobuf_from_json(
        decoded_json, config=config, message_type=typedef_out)
    decoded, typedef_out = blackboxprotobuf.decode_message(
        encoded_json, config=config, message_type=typedef)
    assert isinstance(encoded, bytearray)
    assert isinstance(decoded, dict)
    assert message == decoded
예제 #4
0
def test_bytes_guess_inverse(x):
    config = Config()
    # wrap the message in a new message so that it's a guess inside
    wrapper_typedef = {"1": {"type": "bytes"}}
    wrapper_message = {"1": x}

    encoded = length_delim.encode_lendelim_message(wrapper_message, config,
                                                   wrapper_typedef)
    value, typedef, pos = length_delim.decode_lendelim_message(
        encoded, config, {})

    # would like to fail if it guesses wrong, but sometimes it might parse as a message
    assume(typedef["1"]["type"] == "bytes")

    assert isinstance(encoded, bytearray)
    assert isinstance(value["1"], bytearray)
    assert pos == len(encoded)
    assert value["1"] == x
예제 #5
0
def test_message_guess_inverse(x):
    config = Config()
    type_def, message = x
    # wrap the message in a new message so that it's a guess inside
    wrapper_typedef = {"1": {"type": "message", "message_typedef": type_def}}
    wrapper_message = {"1": message}

    encoded = length_delim.encode_lendelim_message(wrapper_message, config,
                                                   wrapper_typedef)
    note("Encoded length %d" % len(encoded))
    value, decoded_type, pos = length_delim.decode_lendelim_message(
        encoded, config, {})

    note(value)
    assert decoded_type["1"]["type"] == "message"

    assert isinstance(encoded, bytearray)
    assert isinstance(value, dict)
    assert isinstance(value["1"], dict)
    assert pos == len(encoded)
예제 #6
0
def test_proto_export_inverse(tmp_path, x, name):
    """Generate a proto file and try to re-import it. This does not cover all
    possible proto files we want to try importing"""
    config = Config()
    typedef, message = x
    with tempfile.NamedTemporaryFile(mode="r+",
                                     dir=str(tmp_path),
                                     suffix=".proto",
                                     delete=True) as outfile:

        typedef_map = {name: typedef}

        protofile.export_proto(typedef_map, output_file=outfile)
        outfile.flush()

        outfile.seek(0)
        new_typedef_map = protofile.import_proto(config, input_file=outfile)

        config.known_types.update(new_typedef_map)
        # validate
        for name, typedef in new_typedef_map.items():
            blackboxprotobuf.validate_typedef(typedef, config=config)

        def _check_field_types(typedef1, typedef2):
            for field_num in typedef1.keys():
                # make sure we don't drop keys
                assert field_num in typedef2
                assert typedef1[field_num]["type"] == typedef2[field_num][
                    "type"]
                if typedef1[field_num]["type"] == "message":
                    message_typedef1 = None
                    message_typedef2 = None
                    if "message_typedef" in typedef1[field_num]:
                        message_typedef1 = typedef1[field_num][
                            "message_typedef"]
                    elif "message_type_name" in typedef1[field_num]:
                        assert typedef1[field_num][
                            "message_type_name"] in typedef_map
                        message_typedef1 = typedef_map[typedef1[field_num]
                                                       ["message_type_name"]]
                    if "message_typedef" in typedef2[field_num]:
                        message_typedef2 = typedef2[field_num][
                            "message_typedef"]
                    elif "message_type_name" in typedef2[field_num]:
                        assert (typedef2[field_num]["message_type_name"]
                                in new_typedef_map)
                        message_typedef2 = new_typedef_map[
                            typedef2[field_num]["message_type_name"]]

                    _check_field_types(message_typedef1, message_typedef2)

        note(typedef_map)
        note(new_typedef_map)
        for name, typedef in typedef_map.items():
            _check_field_types(typedef, new_typedef_map[name])

        note(new_typedef_map[name])
        # try to actually encode a message with the typedef
        encode_forward = length_delim.encode_message(message, config,
                                                     typedef_map[name])

        config.known_types = new_typedef_map
        encode_backward = length_delim.encode_message(message, config,
                                                      new_typedef_map[name])

        decode_forward, _, _ = length_delim.decode_message(
            encode_forward, config, new_typedef_map[name])
        decode_backward, _, _ = length_delim.decode_message(
            encode_backward, config, typedef_map[name])
예제 #7
0
def test_proto_decode(tmp_path, x, name):
    config = Config()
    typedef, message = x
    """ Export to protobuf and try to decoe a message we encodedd with it """
    with tempfile.NamedTemporaryFile(mode="w",
                                     dir=str(tmp_path),
                                     suffix=".proto",
                                     delete=True) as outfile:
        typedef_map = {name: typedef}

        encoded_message = length_delim.encode_message(message, config, typedef)

        note(typedef_map)
        basename = os.path.basename(outfile.name)

        # Export the protobuf file and compile it
        protofile.export_proto(typedef_map,
                               output_file=outfile,
                               package=basename[:-6])

        py_out = str(tmp_path / "py_out")
        if os.path.exists(py_out):
            shutil.rmtree(py_out)
        os.mkdir(py_out)
        outfile.flush()
        subprocess.check_call(
            "/usr/bin/protoc --python_out ./py_out %s" % basename,
            shell=True,
            cwd=str(tmp_path),
        )

        # Try to import the file
        sys.path.insert(0, str(tmp_path) + "/py_out/")
        # Trim off .proto
        try:
            proto_module = __import__(basename[:-6] + "_pb2")
            del sys.path[0]
        except SyntaxError:
            logging.debug("Caught syntax error in protoc import")
            return

        message_class = getattr(proto_module, name)

        note(encoded_message)
        my_message = message_class()
        my_message.ParseFromString(encoded_message)

        decoded_message = google.protobuf.json_format.MessageToDict(
            my_message, including_default_value_fields=True)

        note(message)
        note(decoded_message)
        note(
            google.protobuf.json_format.MessageToJson(
                my_message, including_default_value_fields=True))

        def _check_field_match(orig_value, new_value):
            note(type(new_value))
            note(type(orig_value))
            if isinstance(orig_value, six.integer_types) and isinstance(
                    new_value, str):
                assert str(orig_value) == new_value
            elif isinstance(orig_value, bytes):
                assert orig_value == base64.b64decode(new_value)
            elif isinstance(new_value, dict):
                _check_message_match(orig_value, new_value)
            elif isinstance(orig_value, float):
                # normalize floats
                if isinstance(new_value, str):
                    if "Infinity" in new_value:
                        assert math.isinf(orig_value)
                    else:
                        assert new_value == "NaN"
                        assert math.isnan(new_value)

                else:
                    # pack and unpack floats to try and normalize them
                    try:
                        orig_value_packed = struct.pack("<f", orig_value)
                        (orig_value, ) = struct.unpack("<f", orig_value_packed)
                        new_value_packed = struct.pack("<f", orig_value)
                        (new_value, ) = struct.unpack("<f", orig_value_packed)
                        assert orig_value == new_value
                    except OverflowError:
                        orig_value_packed = struct.pack("<d", orig_value)
                        (orig_value, ) = struct.unpack("<d", orig_value_packed)
                        new_value_packed = struct.pack("<d", new_value)
                        (new_value, ) = struct.unpack("<d", new_value_packed)
                        assert orig_value == new_value

            else:
                assert orig_value == new_value

        def _check_message_match(message_orig, message_new):
            for field_key, field_value in message_new.items():
                if field_key.startswith("field"):
                    field_key = field_key[5:]
                orig_value = message_orig[field_key]
                if isinstance(field_value, list):
                    if not isinstance(orig_value, list):
                        orig_value = [orig_value]
                    assert len(orig_value) == len(field_value)
                    for orig_value, new_value in zip(orig_value, field_value):
                        _check_field_match(orig_value, new_value)
                else:
                    _check_field_match(orig_value, field_value)

        # Check all the fields match each other
        _check_message_match(message, decoded_message)
예제 #8
0
def test_anon_decode(x):
    config = Config()
    typedef, message = x
    encoded = length_delim.encode_lendelim_message(message, config, typedef)
    decoded, typedef_out, pos = length_delim.decode_lendelim_message(
        encoded, config, {}, 0)
    note("Original message: %r" % message)
    note("Decoded message: %r" % decoded)
    note("Original typedef: %r" % typedef)
    note("Decoded typedef: %r" % typedef_out)

    def check_message(orig, orig_typedef, new, new_typedef):
        for field_number in set(orig.keys()) | set(new.keys()):
            # verify all fields are there
            assert field_number in orig
            assert field_number in orig_typedef
            assert field_number in new
            assert field_number in new_typedef

            orig_values = orig[field_number]
            new_values = new[field_number]
            orig_type = orig_typedef[field_number]["type"]
            new_type = new_typedef[field_number]["type"]

            note("Parsing field# %s" % field_number)
            note("orig_values: %r" % orig_values)
            note("new_values: %r" % new_values)
            note("orig_type: %s" % orig_type)
            note("new_type: %s" % new_type)
            # Fields might be lists. Just convert everything to a list
            if not isinstance(orig_values, list):
                orig_values = [orig_values]
                assert not isinstance(new_values, list)
                new_values = [new_values]

            # if the types don't match, then try to convert them
            if new_type == "message" and orig_type in ["bytes", "string"]:
                # if the type is a message, we want to convert the orig type to a message
                # this isn't ideal, we'll be using the unintended type, but
                # best way to compare. Re-encoding a  message to binary might
                # not keep the field order
                new_field_typedef = new_typedef[field_number][
                    "message_typedef"]
                for i, orig_value in enumerate(orig_values):
                    if orig_type == "bytes":
                        (
                            orig_values[i],
                            orig_field_typedef,
                            _,
                        ) = length_delim.decode_lendelim_message(
                            length_delim.encode_bytes(orig_value),
                            config,
                            new_field_typedef,
                        )
                    else:
                        # string value
                        (
                            orig_values[i],
                            orig_field_typedef,
                            _,
                        ) = length_delim.decode_lendelim_message(
                            length_delim.encode_string(orig_value),
                            config,
                            new_field_typedef,
                        )
                    orig_typedef[field_number][
                        "message_typedef"] = orig_field_typedef
                orig_type = "message"

            if new_type == "string" and orig_type == "bytes":
                # our bytes were accidently valid string
                new_type = "bytes"
                for i, new_value in enumerate(new_values):
                    new_values[i], _ = length_delim.decode_bytes(
                        length_delim.encode_string(new_value), 0)
            # sort the lists with special handling for dicts
            orig_values.sort(
                key=lambda x: x if not isinstance(x, dict) else x.items())
            new_values.sort(
                key=lambda x: x if not isinstance(x, dict) else x.items())
            for orig_value, new_value in zip(orig_values, new_values):
                if orig_type == "message":
                    check_message(
                        orig_value,
                        orig_typedef[field_number]["message_typedef"],
                        new_value,
                        new_typedef[field_number]["message_typedef"],
                    )
                else:
                    assert orig_value == new_value

    check_message(message, typedef, decoded, typedef_out)