Example #1
0
def walk_binary(binr):
    if type(binr) == str:
        with open(binr, 'rb') as fd:
            binr = fd.read()

    # Search for:
    # ".proto" or ".protodevel", as part of the "name" (1) field
    cursor = 0
    while cursor < len(binr):
        cursor = binr.find(b'.proto', cursor)

        if cursor == -1:
            break
        cursor += len('.proto')
        cursor += (binr[cursor:cursor + 5] == b'devel') * 5

        # Search back for the (1, length-delimited) marker
        start = binr.rfind(b'\x0a', max(cursor - 1024, 0), cursor)

        if start > 0 and binr[start - 1] == 0x0a == (cursor - start - 1):
            start -= 1

        # Check whether length byte is coherent
        if start == -1:
            continue
        varint, end = _DecodeVarint(binr, start + 1)
        if cursor - end != varint:
            continue

        # Look just after for subsequent markers
        tags = b'\x12\x1a\x22\x2a\x32\x3a\x42\x4a\x50\x58\x62'
        if binr[cursor] not in tags:
            continue

        while cursor < len(binr) and binr[cursor] in tags:
            tags = tags[tags.index(binr[cursor]):]

            varint, end = _DecodeVarint(binr, cursor + 1)
            cursor = end + varint * (binr[cursor] & 0b111 == 2)

        # Parse descriptor
        proto = FileDescriptorProto()
        proto.ParseFromString(binr[start:cursor])

        # Convert to ascii
        yield descpb_to_proto(proto)
Example #2
0
def nest_and_print_to_files(msg_path_to_obj, msg_to_referrers):
    msg_to_topmost = {}
    msg_to_newloc = {}
    newloc_to_msg = {}
    msg_to_imports = defaultdict(list)
    for msg, referrers in msg_to_referrers.items():
        for _, referrer, _ in referrers:
            msg_to_imports[referrer].append(msg)

    # Iterate over referred to messages/groups/enums.

    # Merge groups first:
    msg_to_referrers = OrderedDict(
        sorted(msg_to_referrers.items(), key=lambda x: -x[1][0][2]))

    mergeable = {}
    enumfield_to_enums = defaultdict(set)
    enum_to_dupfields = defaultdict(set)

    for msg, referrers in dict(msg_to_referrers).items():
        msg_pkg = get_pkg(msg)
        msg_obj = msg_path_to_obj[msg]

        # Check for duplicate enum fields in the same package:
        if not isinstance(msg_obj, DescriptorProto):
            for enum_field in msg_obj.value:
                name = msg_pkg + '.' + enum_field.name
                enumfield_to_enums[name].add(msg)

                if len(enumfield_to_enums[name]) > 1:
                    for other_enum in enumfield_to_enums[name]:
                        enum_to_dupfields[other_enum].add(name)

        first_field = referrers[0]
        field, referrer, is_group = first_field

        # Check whether message/enum has exactly one reference in this
        # package:
        if not is_group:
            in_pkg = [(field, referrer) for field, referrer, _ in referrers \
                      if (get_pkg(referrer) == msg_pkg or not msg_pkg) \
                      and msg_to_topmost.get(referrer, referrer) != msg \
                      and not msg_path_to_obj[referrer].options.map_entry \
                      and ('$' not in msg or msg.split('.')[-1].split('$')[0] == \
                                        referrer.split('.')[-1].split('$')[0])]

            if len({i for _, i in in_pkg}) != 1:
                # It doesn't. Keep for the next step
                if in_pkg:
                    mergeable[msg] = in_pkg
                continue
            else:
                field, referrer = in_pkg[0]

        else:
            assert len(referrers) == 1

        merge_and_rename(msg, referrer, msg_pkg, is_group, msg_to_referrers,
                         msg_to_topmost, msg_to_newloc, msg_to_imports,
                         msg_path_to_obj, newloc_to_msg)

    # Try to fix recursive (mutual) imports, and conflicting enum field names.
    for msg, in_pkg in mergeable.items():
        duplicate_enumfields = enum_to_dupfields.get(msg, set())

        for field, referrer in sorted(
                in_pkg,
                key=lambda x: msg_to_newloc.get(x[1], x[1]).count('.')):
            top_referrer = msg_to_topmost.get(referrer, referrer)

            if (msg in msg_to_imports[top_referrer] and \
                top_referrer in msg_to_imports[msg] and \
                msg_to_topmost.get(referrer, referrer) != msg) or \
                duplicate_enumfields:

                merge_and_rename(msg, referrer, get_pkg(msg), False,
                                 msg_to_referrers, msg_to_topmost,
                                 msg_to_newloc, msg_to_imports,
                                 msg_path_to_obj, newloc_to_msg)
                break

        for dupfield in duplicate_enumfields:
            siblings = enumfield_to_enums[dupfield]
            siblings.remove(msg)
            if len(siblings) == 1:
                enum_to_dupfields[siblings.pop()].remove(dupfield)

    for msg, msg_obj in msg_path_to_obj.items():
        # If we're a top-level message, enforce name transforms anyway
        if msg not in msg_to_topmost:
            new_name = msg_obj.name.split('$')[-1]
            new_name = new_name[0].upper() + new_name[1:]

            msg_pkg = get_pkg(msg)
            if msg_pkg:
                msg_pkg += '.'

            if new_name != msg_obj.name:
                while newloc_to_msg.get(msg_pkg + new_name, msg_pkg + new_name) in msg_path_to_obj and \
                      newloc_to_msg.get(msg_pkg + new_name, msg_pkg + new_name) not in msg_to_topmost:
                    new_name += '_'
                msg_obj.name = new_name

            fix_naming(msg_obj, msg_pkg + new_name, msg, msg, msg_to_referrers,
                       msg_to_topmost, msg_to_newloc, msg_to_imports,
                       msg_path_to_obj, newloc_to_msg)

    # Turn messages into individual files and stringify.

    path_to_file = OrderedDict()
    path_to_defines = defaultdict(list)

    for msg, msg_obj in msg_path_to_obj.items():
        if msg not in msg_to_topmost:
            path = msg.split('$')[0].replace('.', '/') + '.proto'

            if path not in path_to_file:
                path_to_file[path] = FileDescriptorProto()
                path_to_file[path].syntax = 'proto2'
                path_to_file[path].package = get_pkg(msg)
                path_to_file[path].name = path
            file_obj = path_to_file[path]

            for imported in msg_to_imports[msg]:
                import_path = imported.split('$')[0].replace('.',
                                                             '/') + '.proto'
                if import_path != path and imported not in msg_to_topmost:
                    if import_path not in file_obj.dependency:
                        file_obj.dependency.append(import_path)

            if isinstance(msg_obj, DescriptorProto):
                nested = file_obj.message_type.add()
            else:
                nested = file_obj.enum_type.add()
            nested.MergeFrom(msg_obj)

            path_to_defines[path].append(msg)
            path_to_defines[path] += [
                k for k, v in msg_to_topmost.items()
                if v == msg and '$map' not in k
            ]

    for path, file_obj in path_to_file.items():
        name, proto = descpb_to_proto(file_obj)
        header_lines = ['/**', 'Messages defined in this file:\n']
        header_lines += path_to_defines[path]
        yield name, '\n * '.join(header_lines) + '\n */\n\n' + proto