Exemple #1
0
 def setUp(self):
     TemporaryVariables.reset()
     Label.reset()
     symbol_table = SymbolTable()
     symbol_table.add_symbol("a_int", Types.INT, 1)
     symbol_table.add_symbol("b_int", Types.INT, 1)
     symbol_table.add_symbol("c_float", Types.FLOAT, 2)
     symbol_table.add_symbol("d_float", Types.FLOAT, 2)
     self.transformer = CPLTransformer(symbol_table)
class SymbolTableGenerator(object):
    def __init__(self, elf_file):
        self._elf_parser = ElfParser(elf_file)

        self._symbol_table = self._elf_parser.parse_symbol_table()
        self._dwarf_info = self._elf_parser.parse_dwarf_info()
        self.valid_symbol_table = None

    """
    Public methods
    """

    def generate_symbol_table(self):
        """ build valid symbol table data structure
            compare symbol name and address from symbol table with dwarf info attributes
            valid symbol if address match and symbol name within dwarf attribute
            get the base type for all valid symbols
        """
        if self.valid_symbol_table is None:
            self.valid_symbol_table = SymbolTable()

            if self._symbol_table is None or self._dwarf_info is None:
                raise ValueError(
                    "Either Symbol Table or DWARF Info not populated")

            for entry in self._symbol_table:
                for abbrev in self._dwarf_info:
                    symbol_found = False
                    addr_found = False
                    for attr in self._dwarf_info[abbrev]["attr"]:
                        # There may be multiple symbol names so we do not match exact string
                        if isinstance(attr["desc"], str):
                            if attr["desc"] in entry.name:
                                if not symbol_found:
                                    symbol_found = True
                        # We do however, match address since its definitive
                        if isinstance(attr["desc"], int):
                            if attr["desc"] == entry.address:
                                if not addr_found:
                                    addr_found = True

                    # We only want symbols that have address in its DWARF attributes
                    # get symbol type also
                    if symbol_found and addr_found:
                        # time to go down rabbit holes...
                        base_offset, base_type, base_size = self._get_type(
                            abbrev)
                        if base_type == "struct":
                            # collect member info from struct
                            members = self._get_struct_or_union_members(
                                base_offset)
                            decoded_struct = self._decode_struct(members)
                            for member in decoded_struct:
                                # append struct info to its member
                                if member.data_type not in ["void", "ptr"]:
                                    member_name = "{0}.{1}".format(
                                        entry.name, member.name)
                                    member_address = entry.address + member.address
                                    data_type = self._decode_data_type(
                                        member.data_type, member.size)
                                    symbol_entry = Symbol(
                                        member_name, hex(member_address),
                                        str(member.size), data_type)
                                    self.valid_symbol_table.add_symbol(
                                        symbol_entry)
                        elif base_type in ["void", "ptr"]:
                            # do not include these in symbol table
                            pass
                        else:
                            # base types or pointers
                            entry.data_type = base_type
                            data_type = self._decode_data_type(
                                base_type, base_size)
                            symbol_entry = Symbol(entry.name,
                                                  hex(entry.address),
                                                  str(base_size), data_type)
                            self.valid_symbol_table.add_symbol(symbol_entry)
                        break

        return self.valid_symbol_table

    """
    Private methods
    """

    def _decode_struct(self, struct):
        """ decode all members in struct
            recursive call itself to decode structs within structs, if any exists

            struct:
                a list of Symbol objects or Symbol object

            return:
                a list of all members in struct
        """
        struct_list = []
        if isinstance(struct, list):
            for struct_member in struct:
                if isinstance(struct_member.data_type, list):
                    for member_list in struct_member.data_type:
                        members = self._decode_struct(member_list)
                        for member in members:
                            member_name = "{0}.{1}".format(
                                struct_member.name, member.name)
                            member_address = struct_member.address + member.address
                            member_symbol = Symbol(member_name, member_address,
                                                   member.size,
                                                   member.data_type)
                            struct_list.append(member_symbol)
                else:
                    struct_list.append(struct_member)
        elif isinstance(struct, Symbol):
            members = self._decode_struct(struct.data_type)
            for member in members:
                member_name = "{0}.{1}".format(struct.name, member.name)
                member_address = struct.address + member.address
                member_symbol = Symbol(member_name, member_address,
                                       member.size, member.data_type)
                struct_list.append(member_symbol)
            if not isinstance(struct.data_type, list):
                struct_list.append(struct)

        return struct_list

    @staticmethod
    def _decode_data_type(data_type, data_size):
        """ Given base type and size, translate to a generic set of
            data type enum
        """
        enum_type = ""
        sign = ""
        if "char" in data_type:
            # all data_type containing char normalized to char
            data_type = "char"
        elif "ptr" == data_type:
            # data size may vary
            data_type = "uintptr_t"
        elif data_type == "_Bool":
            data_type = "bool"
        elif "int" in data_type:
            size_to_data_type_map = {
                1: "int8_t",
                2: "int16_t",
                4: "int32_t",
                8: "int64_t"
            }
            if "unsigned" in data_type:
                sign = "u"
            data_type = size_to_data_type_map[data_size]
        elif data_type in ["float", "double"]:
            # IEEE 754: float: 4 bytes
            # IEEE 754: double: 8 bytes
            # data type unchanged
            pass
        else:
            raise ValueError("Unable to decode type: {0}".format(data_type))

        enum_type = "{0}{1}".format(sign, data_type)

        return DataType(enum_type)

    def _get_type(self, offset):
        """ get base symbol information
            recursive call to find base type, if needed

            offset:
                offset number in dwarf info to start search

            return:
                return tuple of base symbol information
        """
        symbol_offset = self._get_type_offset(offset)

        if symbol_offset:
            # we found the root
            symbol_type = self._get_description(symbol_offset, "DW_AT_name")
            symbol_size = self._get_description(symbol_offset,
                                                "DW_AT_byte_size")

            if self._is_struct_or_union(symbol_offset):
                # name the type struct for later processing
                symbol_type = "struct"
            elif self._is_pointer(symbol_offset):
                symbol_type = "ptr"
            elif not symbol_type:
                # name the type void for later processing
                symbol_type = "void"
            else:
                # base types
                pass
            # return tuple after we find base type
            return (symbol_offset, symbol_type, symbol_size)

    def _get_type_offset(self, offset):
        """ get descripton at offset for type

            offset:
                offset number in dwarf info

            return:
                offset location to find base type
        """
        if self._is_pointer(offset):
            # return early if pointer type
            return offset

        symbol_offset = self._get_description(offset, "DW_AT_type")

        if symbol_offset:
            return self._get_type_offset(symbol_offset)
        else:
            return offset

    def _get_struct_or_union_members(self, offset):
        """ get structure or union members starting at offset

            offset:
                offset number in dwarf info

            return:
                a list of Symbol object which contain struct member information
                it is possible for Symbol["type"] to be either a string or a list
                Symbol["type"] is a list if that member is a struct and ["type"] contains the struct member
        """
        struct_members = []

        for key, val in dropwhile(lambda x: x[0] != offset,
                                  self._dwarf_info.items()):
            # we start iterating from offset
            if val["offset"] == offset:
                # we skip it, we are interested in the struct members
                base_die_depth = val["depth"]
                continue

            if val["tag"] == "DW_TAG_member" and val[
                    "depth"] == base_die_depth + 1:
                member_name = self._get_description(key, "DW_AT_name")
                member_location_offset = self._get_description(
                    key, "DW_AT_data_member_location")
                if not member_location_offset:
                    # for union, since they do not have "DW_AT_data_member_location" type
                    member_location_offset = 0
                else:
                    member_location_offset = int(member_location_offset)
                base_member_offset, base_member_type, base_member_size = self._get_type(
                    key)
                if base_member_type == "struct":
                    # struct inside struct
                    base_member_type = self._get_struct_or_union_members(
                        base_member_offset)
                    # embed struct members in "type" key to be decoded

                new_struct_member = Symbol(member_name, member_location_offset,
                                           base_member_size, base_member_type)
                struct_members.append(new_struct_member)
            else:
                return struct_members

    def _is_struct_or_union(self, offset):
        """ check if struct or union tag exists at offset

            offset:
                offset number in dwarf info

            return:
                True if struct or union tag, else False
        """
        dwarf_tag = self._get_dwarf_tag(offset)
        return dwarf_tag in ["DW_TAG_structure_type", "DW_TAG_union_type"]

    def _is_pointer(self, offset):
        """ check if pointer tag exists at offset

            offset:
                offset number in dwarf info

            return:
                True if pointer tag, else False
        """
        dwarf_tag = self._get_dwarf_tag(offset)
        return (dwarf_tag == "DW_TAG_pointer_type")

    def _get_dwarf_tag(self, offset):
        """ check if abbrev tag exists at offset

            offset:
                offset number in dwarf info

            return:
                boolean value if tag matches
        """
        if self._dwarf_info is None:
            return

        tag = ""
        if offset in self._dwarf_info:
            tag = self._dwarf_info[offset]["tag"]

            if not tag:
                raise ValueError("Abbreviation tag not found")

        return tag

    def _get_description(self, offset, attr_type):
        """ get description for attribute type at offset

            offset:
                offset number in dwarf info

            attr_type:
                attribute type of interest

            return:
                return description for attribute type at offset if it exist
        """
        if self._dwarf_info is None:
            return

        if offset in self._dwarf_info:
            for attr in self._dwarf_info[offset]["attr"]:
                if attr["name"] == attr_type:
                    return attr["desc"]

        return None
Exemple #3
0
class ElfParser(object):
    def __init__(self, elf_file):
        self._elf = ELFFile(elf_file)
        self.symbol_table = None
        self.dwarf_info = None

    """
    Public methods
    """

    def parse_symbol_table(self):
        """ build symbol table data structure

        :return: list of symbols
        """
        if self.symbol_table is None:
            self.symbol_table = SymbolTable()

            symbol_tables = [
                section for section in self._elf.iter_sections()
                if isinstance(section, SymbolTableSection)
            ]
            for section in symbol_tables:
                for symbol in section.iter_symbols():
                    if ((int(symbol["st_size"]) > 0)
                            and ("OBJECT" == describe_symbol_type(
                                symbol["st_info"]["type"]))):
                        symbol_entry = Symbol(symbol.name, symbol["st_value"],
                                              symbol["st_size"])
                        self.symbol_table.add_symbol(symbol_entry)

        return self.symbol_table

    def parse_dwarf_info(self):
        """ build dwarf info data structure

        :return: OrderedDict
        """
        if self.dwarf_info is None:
            self.dwarf_info = OrderedDict()

            logging.debug('Parsing DWARF Info...')
            dwarf_info = self._elf.get_dwarf_info()
            if not dwarf_info.has_debug_info:
                raise ValueError(
                    "Debug information not available in ELF file. \
                                    Symbol table will be empty")

            for cu in dwarf_info.iter_CUs():
                die_depth = 0
                for die in cu.iter_DIEs():

                    if die.is_null():
                        die_depth -= 1
                        continue

                    # abbreviation property of interest
                    abbreviation = OrderedDict()
                    abbreviation["depth"] = die_depth
                    abbreviation["offset"] = die.offset
                    abbreviation["code"] = die.abbrev_code
                    abbreviation["tag"] = die.tag if not die.is_null() else ""
                    abbreviation["attr"] = []

                    abbreviation_log_string = " <{0}><{1}>: Abbrev Number: {2} ({3})".format(
                        die_depth, hex(die.offset), die.abbrev_code, die.tag)
                    logging.debug(abbreviation_log_string)

                    for attr in itervalues(die.attributes):
                        description = self._get_attribute_description(
                            attr, die)

                        if description is not None:
                            attr_dict = OrderedDict()
                            attr_dict["offset"] = attr.offset
                            attr_dict["name"] = attr.name
                            attr_dict["desc"] = description
                            abbreviation["attr"].append(attr_dict)

                            log_description = hex(description) if isinstance(
                                description, int) else description
                            attribute_log_string = "    <{0}>   {1}: {2}".format(
                                hex(attr.offset), attr.name, log_description)
                            logging.debug(attribute_log_string)

                    if abbreviation["attr"]:
                        self.dwarf_info[die.offset] = abbreviation

                    if die.has_children:
                        die_depth += 1

        return self.dwarf_info

    """
    Private methods
    """

    def _get_attribute_description(self, attr, die):
        """ Use regex to parse attribute description (value)
        """
        description = describe_attr_value(attr, die, 0)
        regex_pattern = ""
        if "DW_AT_name" == attr.name:
            regex_pattern = "^([\w ]+\t)|: ([\w ]+\t)$"
        elif "DW_AT_type" == attr.name:
            regex_pattern = "^<(0x[\da-fA-F]+)>\t$"
        elif "DW_AT_location" == attr.name:
            regex_pattern = ".*DW_OP_addr: ([\w]+)"
        elif "DW_AT_data_member_location" == attr.name:
            regex_pattern = "^([\d]+\t)$"
        elif "DW_AT_byte_size" == attr.name:
            regex_pattern = "^([\d]+\t)$"

        if "" != regex_pattern:
            match = re.compile(regex_pattern)
            match = match.search(description)
            if match:
                match_group = match.groups()

                if attr.name in ["DW_AT_type", "DW_AT_location"]:
                    description = match_group[0].rstrip()
                    description = int(description, 16)

                elif attr.name in [
                        "DW_AT_data_member_location", "DW_AT_byte_size"
                ]:
                    description = match_group[0].rstrip()
                    description = int(description)

                elif attr.name in ["DW_AT_name"]:
                    index = [
                        match for match in range(len(match_group))
                        if match_group[match] != None
                    ]
                    description = match_group[index[0]].rstrip()
                else:
                    pass
            else:
                description = description.rstrip()
        else:
            description = None

        return description
Exemple #4
0
# Do a second run of the assembly program
# Adds all @symbols to symbol table, with a corresponding address starting from 16
# If the address is already taken increment it by 1 until a free spot in RAM is found
# Generates all A and C-command mnemonics and binaries
while not assembly_program.is_parsed():
    current_command = assembly_program.advance()
    command_type = assembly_program.get_command_type(current_command)

    if command_type == 'A_COMMAND':
        symbol = current_command[1:]

        # Check if the A_COMMAND is a decimal number or a @symbol (variable)
        if assembly_program.is_decimal(symbol):
            translated_program.append(convert_to_bin(symbol))
        elif not symbol_table.contains_symbol(symbol):
            symbol_table.add_symbol(symbol)
            translated_program.append(
                convert_to_bin(symbol_table.get_address(symbol)))
        else:
            translated_program.append(
                convert_to_bin(symbol_table.get_address(symbol)))

    else:  # It's a C_COMMAND
        dest_mnemonics = assembly_program.get_dest_mnemonics(current_command)
        comp_mnemonics = assembly_program.get_comp_mnemonics(current_command)
        jump_mnemonics = assembly_program.get_jump_mnemonics(current_command)

        # Generate a Translator object,
        # which contains the converted command mnemonics to bits
        mnemonics_to_bin = Translator(dest_mnemonics, comp_mnemonics,
                                      jump_mnemonics)