Exemple #1
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' %
                  (CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    # Check if this attribute contains location information
                    # pdb.set_trace()
                    if loc_parser.attribute_has_location(attr, CU['version']):
                        var_name = DIE.attributes['DW_AT_name'].value
                        print(' Varname:%s' % (var_name))
                        print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                        loc = loc_parser.parse_from_attribute(
                            attr, CU['version'])
                        # We either get a list (in case the attribute is a
                        # reference to the .debug_loc section) or a LocationExpr
                        # object (in case the attribute itself contains location
                        # information).
                        if isinstance(loc, LocationExpr):
                            print('      %s' % (describe_DWARF_expr(
                                loc.loc_expr, dwarfinfo.structs)))
                        elif isinstance(loc, list):
                            print(show_loclist(loc, dwarfinfo,
                                               indent='      '))
 def _test_file(self, filename):
     filepath = os.path.join('test', 'testfiles_for_unittests', filename)
     with open(filepath, 'rb') as f:
         elffile = ELFFile(f)
         dwarfinfo = elffile.get_dwarf_info()
         locparser = LocationParser(dwarfinfo.location_lists())
         for CU in dwarfinfo.iter_CUs():
             ver = CU['version']
             for DIE in CU.iter_DIEs():
                 for key in DIE.attributes:
                     attr = DIE.attributes[key]
                     if LocationParser.attribute_has_location(attr, ver):
                         # This will crash on unpatched library on DIE at 0x9f
                         locparser.parse_from_attribute(attr, ver)
def extract_string_variables(elf):
    """
    Find all string variables (char) in all Compilation Units and
    Debug information Entry (DIE) in ELF file.
    """
    dwarf_info = elf.get_dwarf_info()
    loc_lists = dwarf_info.location_lists()
    loc_parser = LocationParser(loc_lists)

    strings = []

    # Loop through all Compilation Units and
    # Debug information Entry (DIE) to extract all string variables
    for compile_unit in dwarf_info.iter_CUs():
        for die in compile_unit.iter_DIEs():
            # Only care about variables with location information
            # and of type "char"
            if die.tag == 'DW_TAG_variable':
                if ('DW_AT_type' in die.attributes
                        and 'DW_AT_location' in die.attributes
                        and is_die_var_const_char(compile_unit, die)):
                    # Extract location information, which is
                    # its address in memory.
                    loc_attr = die.attributes['DW_AT_location']
                    if loc_parser.attribute_has_location(
                            loc_attr, die.cu['version']):
                        loc = loc_parser.parse_from_attribute(
                            loc_attr, die.cu['version'])
                        if isinstance(loc, LocationExpr):
                            try:
                                addr = describe_DWARF_expr(
                                    loc.loc_expr, dwarf_info.structs)

                                matcher = DT_LOCATION_REGEX.match(addr)
                                if matcher:
                                    addr = int(matcher.group(1), 16)
                                    if addr > 0:
                                        strings.append({
                                            'name':
                                            die.attributes['DW_AT_name'].value,
                                            'addr':
                                            addr,
                                            'die':
                                            die
                                        })
                            except KeyError:
                                pass

    return strings
Exemple #4
0
def test_dwarfinfo(di):
    # Some global cache setup in line with the app proper
    di._ranges = None
    di._CUs = [cu for cu in di.iter_CUs()]
    di._locparser = None

    m = False
    dummy_index = QModelIndex()
    for (i, CU) in enumerate(di._CUs):
        top_die = CU.get_top_DIE()
        print("%s" %
              strip_path(top_die.attributes['DW_AT_name'].value.
                         decode('utf-8', errors='ignore')) if 'DW_AT_name' in
              top_die.attributes else "(no name)")
        CU._lineprogram = None
        CU._exprparser = None
        for die in CU.iter_DIEs():
            if not die.is_null():
                assert die.tag.startswith('DW_TAG_')

                if not m:
                    # With prefix, with low level data, decimal
                    m = DIETableModel(die, True, True, False, True)
                else:
                    m.display_DIE(die)

                rc = m.rowCount(dummy_index)
                cc = m.columnCount(dummy_index)
                keys = list(die.attributes.keys())
                # Assuming rows correspond to attributes;
                # if we introduce non-attribute metadata into the DIE table, this will break
                for r in range(m.meta_count, rc):
                    key = keys[r - m.meta_count]
                    attr = die.attributes[key]
                    form = attr.form
                    value = attr.value
                    # Check the elftools' results first

                    # Check if the key is interpreted properly
                    assert str(key).startswith('DW_AT_')
                    assert str(form).startswith('DW_FORM_')

                    # Check if attributes with locations are all found
                    if form == 'DW_FORM_exprloc':
                        assert LocationParser.attribute_has_location(
                            attr, CU['version'])
                    # The converse is not true; on DWARF2, location expressions can have form DW_FORM_block1

                    # Now check the spell out logic
                    for c in range(0, cc):
                        m.data(m.index(r, c, dummy_index), Qt.DisplayRole)
                    details = m.get_attribute_details(
                        m.index(r, 0, dummy_index))
                    if form == 'DW_FORM_section_offset':
                        assert details is not None
Exemple #5
0
 def get_attribute_details(self, index):
     row = index.row()
     if row >= self.meta_count:
         row -= self.meta_count
         key = self.keys[row]
         attr = self.attributes[key]
         form = attr.form
         if key == "DW_AT_ranges":
             di = self.die.dwarfinfo
             if not di._ranges:
                 di._ranges = di.range_lists()
             if not di._ranges: # Absent in the DWARF file
                 return None
             ranges = di._ranges.get_range_list_at_offset(attr.value)
             # TODO: handle base addresses. Never seen those so far...
             cu_base = get_cu_base(self.die)
             return GenericTableModel(("Start offset", "End offset"),
                 ((hex(cu_base + r.begin_offset), hex(cu_base + r.end_offset)) for r in ranges))
         elif LocationParser.attribute_has_location(attr, self.die.cu['version']):
             # Expression is a list of ints
             ll = self.parse_location(attr)
             if isinstance(ll, LocationExpr):
                 return GenericTableModel(("Command",), ((cmd,) for cmd in self.dump_expr(ll.loc_expr)))
             else:
                 cu_base = get_cu_base(self.die)
                 if self.lowlevel:
                     headers = ("Start offset", "End offset", "Expr bytes", "Expression")
                     values = ((hex(cu_base + l.begin_offset),
                         hex(cu_base + l.end_offset),
                         ' '.join("%02x" % b for b in l.loc_expr),
                         '; '.join(self.dump_expr(l.loc_expr))) for l in ll)
                 else:
                     headers = ("Start offset", "End offset", "Expression")
                     values = ((hex(cu_base + l.begin_offset), hex(cu_base + l.end_offset), '; '.join(self.dump_expr(l.loc_expr))) for l in ll)
                 return GenericTableModel(headers, values)
         elif key == 'DW_AT_stmt_list':
             if self.die.cu._lineprogram is None:
                 self.die.cu._lineprogram = self.die.dwarfinfo.line_program_for_CU(self.die.cu)
             lpe = self.die.cu._lineprogram.get_entries()
             files = self.die.cu._lineprogram.header.file_entry
             def format_state(state):
                 return (hex(state.address),
                     files[state.file-1].name.decode('utf-8', errors='ignore') if state.file > 0 else '(N/A)',
                     state.line,
                     'Y' if state.is_stmt  else '',
                     'Y' if state.basic_block else '',
                     'Y' if state.end_sequence else '',
                     'Y' if state.prologue_end else '',
                     'Y' if state.epilogue_begin else '')
             states = [format_state(e.state) for e in lpe if e.state]
             # TODO: low level flavor with extra details
             # TODO: commands vs states
             return GenericTableModel(('Address', 'File', 'Line', 'Stmt', 'Basic block', 'End seq', 'End prologue', 'Begin epilogue'), states)
     return None
Exemple #6
0
def get_func_bounds(filename, function_name):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)
        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Find the function
                if DIE.tag == "DW_TAG_subprogram":
                    fname = ""
                    high_addr = 0
                    low_addr = 0
                    c = False
                    for attr in itervalues(DIE.attributes):
                        if attr.name == "DW_AT_name":
                            fname = attr.value
                        if attr.name == "DW_AT_low_pc":
                            low_addr = attr.value
                        if attr.name == "DW_AT_high_pc":
                            high_addr = attr.value
                    if high_addr < low_addr:
                        high_addr = low_addr + high_addr
                    if fname == function_name:
                        return (low_addr, high_addr)
Exemple #7
0
 def _test_file(self, filename):
     filepath = os.path.join('test', 'testfiles_for_unittests', filename)
     with open(filepath, 'rb') as f:
         elffile = ELFFile(f)
         dwarfinfo = elffile.get_dwarf_info()
         for CU in dwarfinfo.iter_CUs():
             ver = CU['version']
             for DIE in CU.iter_DIEs():
                 for key in DIE.attributes:
                     attr = DIE.attributes[key]
                     if attr.form == 'DW_FORM_exprloc':
                         self.assertTrue(
                             LocationParser.attribute_has_location(
                                 attr, CU['version']),
                             "Attribute %s not recognized as a location" %
                             key)
Exemple #8
0
 def format_value(self, attr):
     key = attr.name
     val = attr.value
     form = attr.form
     if form == 'DW_FORM_addr' and isinstance(val, int):
         return hex(val)
     elif form == 'DW_FORM_flag_present':
         return ''
     elif form in ('DW_FORM_ref0', 'DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', 'DW_FORM_ref8', 'DW_FORM_ref_addr'):
         return "Ref: 0x%x" % val # There are several other reference forms in the spec
     elif LocationParser.attribute_has_location(attr, self.die.cu['version']):
         ll = self.parse_location(attr)
         if isinstance(ll, LocationExpr):
             return '; '.join(self.dump_expr(ll.loc_expr))
         else:
             return "Loc list: 0x%x" % attr.value
     elif key == 'DW_AT_language':
         return "%d %s" % (val, _DESCR_DW_LANG[val]) if val in _DESCR_DW_LANG else val
     elif key == 'DW_AT_encoding':
         return "%d %s" % (val, _DESCR_DW_ATE[val]) if val in _DESCR_DW_ATE else val
     elif key == 'DW_AT_accessibility':
         return "%d %s" % (val, _DESCR_DW_ACCESS[val]) if val in _DESCR_DW_ACCESS else val
     elif key == 'DW_AT_inline':
         return "%d %s" % (val, _DESCR_DW_INL[val]) if val in _DESCR_DW_INL else val
     elif key == 'DW_AT_decl_file':
         if self.die.cu._lineprogram is None:
             self.die.cu._lineprogram = self.die.dwarfinfo.line_program_for_CU(self.die.cu)
         return "%d: %s" % (val, self.die.cu._lineprogram.header.file_entry[val-1].name.decode('utf-8', errors='ignore')) if val > 0 else "0: (N/A)"
     elif key == 'DW_AT_stmt_list':
         return 'LNP at 0x%x' % val
     elif isinstance(val, bytes):
         if form in ('DW_FORM_strp', 'DW_FORM_string'):
             return val.decode('utf-8', errors='ignore')
         elif val == b'': # What's a good value for a blank blob?
             return '[]'
         else:
             return ' '.join("%02x" % b for b in val) # Something like "01 ff 33 55"
     elif isinstance(val, list): # block1 comes across as this
         if val == []:
             return '[]'
         elif isinstance(val[0], int): # Assuming it's a byte array diguised as int array
             return ' '.join("%02x" % b for b in val)
         else: # List of something else
             return str(val)
     else:
         return hex(val) if self.hex and isinstance(val, int) else str(val)
Exemple #9
0
def get_var_offset(filename, function_name, var_name):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Find the function
                if DIE.tag == "DW_TAG_subprogram":
                    fname = ""
                    base = 0
                    for attr in itervalues(DIE.attributes):
                        if attr.name == "DW_AT_name":
                            fname = attr.value
                    if fname == function_name:
                        for CHILD in DIE.iter_children():
                            if CHILD.tag == "DW_TAG_variable" or CHILD.tag == "DW_TAG_formal_parameter":
                                right_name = False
                                location = 0
                                for attr in itervalues(CHILD.attributes):
                                    if attr.name == "DW_AT_name":
                                        if attr.value == var_name:
                                            right_name = True
                                    # Check if this attribute contains location information
                                    if attr.name == "DW_AT_location":
                                        loc = loc_parser.parse_from_attribute(attr,
                                                                              CU['version'])
                                        if isinstance(loc, LocationExpr):
                                            parser = DWARFExprParser(dwarfinfo.structs)
                                            parsed = parser.parse_expr(loc.loc_expr)
                                            for op in parsed:
                                                if op.op_name == 'DW_OP_fbreg':
                                                    location = op.args[0]
                                if right_name:
                                    return location
Exemple #10
0
 def parse_location(self, attr):
     di = self.die.dwarfinfo
     if di._locparser is None:
         di._locparser = LocationParser(di.location_lists())
     return di._locparser.parse_from_attribute(attr, self.die.cu['version'])
Exemple #11
0
def do_address_range_matching(elf, symbol_dict, processed):
    """
    Match symbols indirectly using address ranges.

    This uses the address ranges of DIEs and map them to symbols
    residing within those ranges, and works on DIEs that have not
    been mapped in previous steps. This works on symbol names
    that do not match the names in DIEs, e.g. "<func>" in DIE,
    but "<func>.constprop.*" in symbol name list. This also
    helps with mapping the mangled function names in C++,
    since the names in DIE are actual function names in source
    code and not mangled version of them.
    """
    if "unmapped_dies" not in processed:
        return

    mapped_symbols = processed["mapped_symbols"]
    mapped_addresses = processed["mapped_addr"]
    unmapped_symbols = processed["unmapped_symbols"]
    newly_mapped_syms = set()

    dwarfinfo = elf.get_dwarf_info()
    location_lists = dwarfinfo.location_lists()
    location_parser = LocationParser(location_lists)

    unmapped_dies = processed["unmapped_dies"]

    # Group DIEs by compile units
    cu_list = dict()

    for die in unmapped_dies:
        cu = die.cu
        if cu not in cu_list:
            cu_list[cu] = {"dies": set()}
        cu_list[cu]["dies"].add(die)

    # Loop through all compile units
    for cu in cu_list:
        lineprog = dwarfinfo.line_program_for_CU(cu)

        # Map offsets from DIEs
        offset_map = dict()
        for die in cu.iter_DIEs():
            offset_map[die.offset] = die

        for die in cu_list[cu]["dies"]:
            if not die.tag == "DW_TAG_subprogram":
                continue

            path = None

            # Has direct reference to file, so use it
            if "DW_AT_decl_file" in die.attributes:
                path = get_die_filename(die, lineprog)

            # Loop through indirect reference until a direct
            # reference to file is found
            if ("DW_AT_abstract_origin"
                    in die.attributes) or ("DW_AT_specification"
                                           in die.attributes):
                die_ptr = die
                while path is None:
                    if not (die_ptr.tag == "DW_TAG_subprogram") or not (
                        ("DW_AT_abstract_origin" in die_ptr.attributes) or
                        ("DW_AT_specification" in die_ptr.attributes)):
                        break

                    if "DW_AT_abstract_origin" in die_ptr.attributes:
                        ofname = "DW_AT_abstract_origin"
                    elif "DW_AT_specification" in die_ptr.attributes:
                        ofname = "DW_AT_specification"

                    offset = die_ptr.attributes[ofname].value
                    offset += die_ptr.cu.cu_offset

                    # There is nothing to reference so no need to continue
                    if offset not in offset_map:
                        break

                    die_ptr = offset_map[offset]
                    if "DW_AT_decl_file" in die_ptr.attributes:
                        path = get_die_filename(die_ptr, lineprog)

            # Nothing to map
            if path is not None:
                low, high = get_die_mapped_address(die, location_parser,
                                                   dwarfinfo)
                if low is None:
                    continue

                for ums in unmapped_symbols:
                    for one_sym in symbol_dict[ums]:
                        symbol = one_sym["symbol"]
                        symaddr = symbol["st_value"]

                        if symaddr not in mapped_addresses:
                            if low <= symaddr < high:
                                one_sym["mapped_files"].add(path)
                                mapped_addresses.add(symaddr)
                                newly_mapped_syms.add(ums)

    mapped_symbols = mapped_symbols.union(newly_mapped_syms)
    unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms)

    processed["mapped_symbols"] = mapped_symbols
    processed["mapped_addr"] = mapped_addresses
    processed["unmapped_symbols"] = unmapped_symbols
Exemple #12
0
def do_simple_name_matching(elf, symbol_dict, processed):
    """
    Sequentially process DIEs in compiler units with direct file mappings
    within the DIEs themselves, and do simply matching between DIE names
    and symbol names.
    """
    mapped_symbols = processed["mapped_symbols"]
    mapped_addresses = processed["mapped_addr"]
    unmapped_symbols = processed["unmapped_symbols"]
    newly_mapped_syms = set()

    dwarfinfo = elf.get_dwarf_info()
    location_lists = dwarfinfo.location_lists()
    location_parser = LocationParser(location_lists)

    unmapped_dies = set()

    # Loop through all compile units
    for compile_unit in dwarfinfo.iter_CUs():
        lineprog = dwarfinfo.line_program_for_CU(compile_unit)
        if lineprog is None:
            continue

        # Loop through each DIE and find variables and
        # subprograms (i.e. functions)
        for die in compile_unit.iter_DIEs():
            sym_name = None

            # Process variables
            if die.tag == "DW_TAG_variable":
                # DW_AT_declaration

                # having "DW_AT_location" means this maps
                # to an actual address (e.g. not an extern)
                if "DW_AT_location" in die.attributes:
                    sym_name = die.get_full_path()

            # Process subprograms (i.e. functions) if they are valid
            if die.tag == "DW_TAG_subprogram":
                # Refer to another DIE for name
                if ("DW_AT_abstract_origin"
                        in die.attributes) or ("DW_AT_specification"
                                               in die.attributes):
                    unmapped_dies.add(die)

                # having "DW_AT_low_pc" means it maps to
                # an actual address
                elif "DW_AT_low_pc" in die.attributes:
                    # DW_AT_low_pc == 0 is a weak function
                    # which has been overridden
                    if die.attributes["DW_AT_low_pc"].value != 0:
                        sym_name = die.get_full_path()

                # For mangled function names, the linkage name
                # is what appears in the symbol list
                if "DW_AT_linkage_name" in die.attributes:
                    linkage = die.attributes["DW_AT_linkage_name"]
                    sym_name = linkage.value.decode()

            if sym_name is not None:
                # Skip DIE with no reference back to a file
                if "DW_AT_decl_file" not in die.attributes:
                    continue

                is_die_mapped = False
                if sym_name in symbol_dict:
                    mapped_symbols.add(sym_name)
                    symlist = symbol_dict[sym_name]
                    symbol = match_symbol_address(symlist, die,
                                                  location_parser, dwarfinfo)

                    if symbol is not None:
                        symaddr = symbol["symbol"]["st_value"]
                        if symaddr not in mapped_addresses:
                            is_die_mapped = True
                            path = get_die_filename(die, lineprog)
                            symbol["mapped_files"].add(path)
                            mapped_addresses.add(symaddr)
                            newly_mapped_syms.add(sym_name)

                if not is_die_mapped:
                    unmapped_dies.add(die)

    mapped_symbols = mapped_symbols.union(newly_mapped_syms)
    unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms)

    processed["mapped_symbols"] = mapped_symbols
    processed["mapped_addr"] = mapped_addresses
    processed["unmapped_symbols"] = unmapped_symbols
    processed["unmapped_dies"] = unmapped_dies
Exemple #13
0
    def __init__(self, elf_file_path, inputfile, resultdir):
        self.elf_file_path = elf_file_path
        self.result_file_path = self.elf_file_path + ".type"
        self.inputfile = inputfile
        self.resultdir = resultdir
        # To save the basic information.
        self.base_type_map = {}
        self.addr2type_map = {}
        self.CU_TYPE = None
        # self.compile_unit_base_types = {}
        # self.compile_unit_const_types = {}
        # self.compile_unit_pointer_types = {}
        # self.compile_unit_enumeration_types = {}
        # self.compile_unit_union_types = {}
        # self.compile_unit_array_types = {}
        # self.compile_unit_subrange_types = {}
        # self.compile_unit_structure_types = {}
        # self.compile_unit_typedef_types = {}
        self.functions = []
        self.global_var = []

        # To support extract the dwarf
        self.loc_parser = None
        self.CU = None
        self.dwarfinfo = None
        #
        print('Processing file:', self.elf_file_path)
        with open(self.elf_file_path, 'rb') as f:
            elffile = ELFFile(f)

            if not elffile.has_dwarf_info():
                print('  file has no DWARF info')
                return

            # get_dwarf_info returns a DWARFInfo context object, which is the
            # starting point for all DWARF-based processing in pyelftools.
            self.dwarfinfo = elffile.get_dwarf_info()

            # The location lists are extracted by DWARFInfo from the .debug_loc
            # section, and returned here as a LocationLists object.
            location_lists = self.dwarfinfo.location_lists()

            # This is required for the descriptions module to correctly decode
            # register names contained in DWARF expressions.
            set_global_machine_arch(elffile.get_machine_arch())

            # Create a LocationParser object that parses the DIE attributes and
            # creates objects representing the actual location information.
            self.loc_parser = LocationParser(location_lists)

            for CU in self.dwarfinfo.iter_CUs():
                # DWARFInfo allows to iterate over the compile units contained in
                # the .debug_info section. CU is a CompileUnit object, with some
                # computed attributes (such as its offset in the section) and
                # a header which conforms to the DWARF standard. The access to
                # header elements is, as usual, via item-lookup.
                print('  Found a compile unit at offset %s, length %s' %
                      (CU.cu_offset, CU['unit_length']))
                self.CU = CU
                self.CU_TYPE = get_compile_unit_types(self.CU)
                # print(self.CU_TYPE)

                # A CU provides a simple API to iterate over all the DIEs in it.
                for die in CU.iter_DIEs():
                    # Go over all attributes of the DIE. Each attribute is an
                    # AttributeValue object (from elftools.dwarf.die), which we
                    # can examine.
                    if die.tag == 'DW_TAG_subprogram':
                        print("")
                        self.process_subprogram(die)
                    elif die.tag == 'DW_TAG_variable' and 'DW_AT_external' in die.attributes:
                        print("")
                        self.process_global_var(die)

            pincmd = [
                '../pin/pin', '-t',
                '../TaintAnalysisWithPin/obj-ia32/taint.so', '--',
                elf_file_path
            ]
            print(pincmd)
            result = self.resultdir + '/' + elf_file_path.split(
                b'/')[-1] + ".out"
            print(result)
            try:
                # trace = subprocess.check_output(pincmd)
                trace = ""
                if inputfile != None:
                    print("fead input")
                    process = subprocess.Popen(pincmd,
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE)
                    with open(self.inputfile, 'rb') as inputfile:
                        for line in inputfile.readlines():
                            print("Give Std Input:%s" % line)
                            process.stdin.write(line)
                    trace = process.communicate()[0]
                    process.stdin.close()
                else:
                    print("running without input")
                    trace = subprocess.check_output(pincmd)
                tracelist = loadtrace(trace)
                extractfromtrace(tracelist, self.global_var, self.functions,
                                 result)

            except subprocess.CalledProcessError as e:
                print("run pin error(%s)")
Exemple #14
0
class ElfDwarf:
    """
    :elf_file_path(must): the path of elf file
    :addr2type_map: map address to type
    :base_type_map: map id to type 
    """
    def __init__(self, elf_file_path, inputfile, resultdir):
        self.elf_file_path = elf_file_path
        self.result_file_path = self.elf_file_path + ".type"
        self.inputfile = inputfile
        self.resultdir = resultdir
        # To save the basic information.
        self.base_type_map = {}
        self.addr2type_map = {}
        self.CU_TYPE = None
        # self.compile_unit_base_types = {}
        # self.compile_unit_const_types = {}
        # self.compile_unit_pointer_types = {}
        # self.compile_unit_enumeration_types = {}
        # self.compile_unit_union_types = {}
        # self.compile_unit_array_types = {}
        # self.compile_unit_subrange_types = {}
        # self.compile_unit_structure_types = {}
        # self.compile_unit_typedef_types = {}
        self.functions = []
        self.global_var = []

        # To support extract the dwarf
        self.loc_parser = None
        self.CU = None
        self.dwarfinfo = None
        #
        print('Processing file:', self.elf_file_path)
        with open(self.elf_file_path, 'rb') as f:
            elffile = ELFFile(f)

            if not elffile.has_dwarf_info():
                print('  file has no DWARF info')
                return

            # get_dwarf_info returns a DWARFInfo context object, which is the
            # starting point for all DWARF-based processing in pyelftools.
            self.dwarfinfo = elffile.get_dwarf_info()

            # The location lists are extracted by DWARFInfo from the .debug_loc
            # section, and returned here as a LocationLists object.
            location_lists = self.dwarfinfo.location_lists()

            # This is required for the descriptions module to correctly decode
            # register names contained in DWARF expressions.
            set_global_machine_arch(elffile.get_machine_arch())

            # Create a LocationParser object that parses the DIE attributes and
            # creates objects representing the actual location information.
            self.loc_parser = LocationParser(location_lists)

            for CU in self.dwarfinfo.iter_CUs():
                # DWARFInfo allows to iterate over the compile units contained in
                # the .debug_info section. CU is a CompileUnit object, with some
                # computed attributes (such as its offset in the section) and
                # a header which conforms to the DWARF standard. The access to
                # header elements is, as usual, via item-lookup.
                print('  Found a compile unit at offset %s, length %s' %
                      (CU.cu_offset, CU['unit_length']))
                self.CU = CU
                self.CU_TYPE = get_compile_unit_types(self.CU)
                # print(self.CU_TYPE)

                # A CU provides a simple API to iterate over all the DIEs in it.
                for die in CU.iter_DIEs():
                    # Go over all attributes of the DIE. Each attribute is an
                    # AttributeValue object (from elftools.dwarf.die), which we
                    # can examine.
                    if die.tag == 'DW_TAG_subprogram':
                        print("")
                        self.process_subprogram(die)
                    elif die.tag == 'DW_TAG_variable' and 'DW_AT_external' in die.attributes:
                        print("")
                        self.process_global_var(die)

            pincmd = [
                '../pin/pin', '-t',
                '../TaintAnalysisWithPin/obj-ia32/taint.so', '--',
                elf_file_path
            ]
            print(pincmd)
            result = self.resultdir + '/' + elf_file_path.split(
                b'/')[-1] + ".out"
            print(result)
            try:
                # trace = subprocess.check_output(pincmd)
                trace = ""
                if inputfile != None:
                    print("fead input")
                    process = subprocess.Popen(pincmd,
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE)
                    with open(self.inputfile, 'rb') as inputfile:
                        for line in inputfile.readlines():
                            print("Give Std Input:%s" % line)
                            process.stdin.write(line)
                    trace = process.communicate()[0]
                    process.stdin.close()
                else:
                    print("running without input")
                    trace = subprocess.check_output(pincmd)
                tracelist = loadtrace(trace)
                extractfromtrace(tracelist, self.global_var, self.functions,
                                 result)

            except subprocess.CalledProcessError as e:
                print("run pin error(%s)")
                # return None

    def show_loclist(self, loclist, dwarfinfo, indent):
        """ Display a location list nicely, decoding the DWARF expressions
            contained within.
        """
        d = []
        for loc_entity in loclist:
            if isinstance(loc_entity, LocationEntry):
                d.append('%s <<%s>>' %
                         (loc_entity,
                          describe_DWARF_expr(loc_entity.loc_expr,
                                              dwarfinfo.structs)))
            else:
                d.append(str(loc_entity))
        return '\n'.join(indent + s for s in d)

    def process_global_var(self, DIE):
        self.global_var.append({})
        try:

            self.global_var[-1]["name"] = DIE.attributes['DW_AT_name'].value
        except KeyError:
            #print "DIE has no attribute 'DW_AT_name'"
            self.global_var[-1]["name"] = None

        variable_size, variable_type_name = get_variable_size_and_name(
            DIE, self.CU, self.CU_TYPE)
        # print(" name:%s, size:%d, type_name:%s" % (self.global_var[-1]["name"],variable_size, variable_type_name))
        if variable_size != None and variable_type_name != None:
            self.global_var[-1]["size"] = variable_size
            self.global_var[-1]["type_name"] = variable_type_name
        else:
            self.global_var.pop()
            return

        for attr in itervalues(DIE.attributes):
            # Check if this attribute contains location information
            # pdb.set_trace()
            if self.loc_parser.attribute_has_location(attr,
                                                      self.CU['version']):
                # print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                loc = self.loc_parser.parse_from_attribute(
                    attr, self.CU['version'])
                # We either get a list (in case the attribute is a
                # reference to the .debug_loc section) or a LocationExpr
                # object (in case the attribute itself contains location
                # information).
                if isinstance(loc, LocationExpr):
                    dwarf_expr_dumper = extract_DWARF_expr(
                        loc.loc_expr, self.dwarfinfo.structs)
                    exp_info = dwarf_expr_dumper._str_parts
                    for item in exp_info:
                        baseregister = item[0:item.find(':')]
                        offset = int(item[item.find(':') + 2:], 16)
                        print("%s:%s:%d:%s" %
                              (self.global_var[-1]["name"], baseregister,
                               offset, self.global_var[-1]["type_name"]))
                        self.global_var[-1]["offset"] = offset
                        self.global_var[-1]["breg"] = baseregister
                        return
        self.global_var.pop()
        return

    def process_subprogram(self, subprogram_die):

        # Print name, start_address and DW_AT_frame_base of the current function
        # print(subprogram_die)
        self.functions.append({})
        if 'DW_AT_name' in subprogram_die.attributes:
            self.functions[-1]["name"] = subprogram_die.attributes[
                'DW_AT_name'].value
        else:
            print("Does not find function name")
            self.functions[-1]["name"] = None
        print("function name")
        print(self.functions[-1]["name"])
        # try:
        #     dw_at_frame_base = subprogram_die.attributes['DW_AT_frame_base']
        # except:
        #     # I am not sure if every subprogram has a DW_AT_frame_base
        #     print(subprogram_die)
        #     print("subprogram [%s]  has no a DW_AT_frame_base (and thus no stack variables (?)). Skipping." % self.functions[-1]['name'])
        #     self.functions.pop()
        #     return

        if subprogram_die.has_children:
            #print "subprogram [%s] has children!" % self.functions[-1]['name']
            self.functions[-1]["stack_variables"] = []

            # Print names of all variables that are children of the current DIE (the current function)
            for child in subprogram_die.iter_children():
                if child.tag == 'DW_TAG_variable' or child.tag == 'DW_TAG_formal_parameter':
                    self.process_subprogram_variable(child)

    def process_subprogram_variable(self, DIE):
        if self.functions[-1].get("stack_variables") is None:
            return
        self.functions[-1]["stack_variables"].append({})
        try:
            self.functions[-1]["stack_variables"][-1]["name"] = DIE.attributes[
                'DW_AT_name'].value
        except KeyError:
            #print "subprogram_variable_die has no attribute 'DW_AT_name'"
            self.functions[-1]["stack_variables"][-1]["name"] = None

        variable_size, variable_type_name = get_variable_size_and_name(
            DIE, self.CU, self.CU_TYPE)
        if variable_size != None and variable_type_name != None:
            self.functions[-1]["stack_variables"][-1]["size"] = variable_size
            self.functions[-1]["stack_variables"][-1][
                "type_name"] = variable_type_name
        else:
            self.functions[-1]["stack_variables"].pop()
            return
        for attr in itervalues(DIE.attributes):
            # Check if this attribute contains location information
            # pdb.set_trace()
            if self.loc_parser.attribute_has_location(attr,
                                                      self.CU['version']):
                # print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                loc = self.loc_parser.parse_from_attribute(
                    attr, self.CU['version'])
                # We either get a list (in case the attribute is a
                # reference to the .debug_loc section) or a LocationExpr
                # object (in case the attribute itself contains location
                # information).
                if isinstance(loc, LocationExpr):
                    dwarf_expr_dumper = extract_DWARF_expr(
                        loc.loc_expr, self.dwarfinfo.structs)
                    exp_info = dwarf_expr_dumper._str_parts
                    for item in exp_info:
                        print(item)
                        index = item.find(':')
                        if index == -1:
                            continue
                        baseregister = item[0:item.find(':')]
                        if baseregister == "DW_OP_addr":
                            offset = int(item[item.find(':') + 1:], 16)
                            print(
                                "%s:%s:%s:%d:%s" %
                                (self.functions[-1]["name"], self.functions[-1]
                                 ["stack_variables"][-1]["name"], baseregister,
                                 offset, self.functions[-1]["stack_variables"]
                                 [-1]["type_name"]))
                            self.functions[-1]["stack_variables"][-1][
                                "offset"] = offset
                            self.functions[-1]["stack_variables"][-1][
                                "breg"] = baseregister
                        else:
                            offset = int(item[item.find(':') + 1:])
                            print(
                                "%s:%s:%s:%d:%s" %
                                (self.functions[-1]["name"], self.functions[-1]
                                 ["stack_variables"][-1]["name"], baseregister,
                                 offset, self.functions[-1]["stack_variables"]
                                 [-1]["type_name"]))
                            self.functions[-1]["stack_variables"][-1][
                                "offset"] = offset
                            self.functions[-1]["stack_variables"][-1][
                                "breg"] = baseregister

        if "breg" not in self.functions[-1]["stack_variables"][-1]:
            self.functions[-1]["stack_variables"].pop()