def from_symbols(config: Config, symbols: SymbolDF,
                  tree_name: str) -> 'SourceTree':
     """Construct a SourceTree from a Memory Map DataFrame."""
     tree = SourceTree(tree_name)
     for row in symbols.itertuples():
         symbol = row.symbol
         if config['report.demangle']:
             symbol = memdf.report.demangle(symbol)
         tree.symbol_node(row.cu, symbol, row.size)
     tree.calculate_sizes()
     return tree
Example #2
0
def postprocess_symbols(config: Config, symbols: SymbolDF) -> SymbolDF:
    """Postprocess a symbol table after collecting from one source.

    If the symbol table contains FILE symbols, they will be removed and
    replaced by a 'file' column on other symbols.

    If the symbol table contains ARM mode symbols, they will be removed
    and replaced by an 'arm' column on other symbols.
    """
    files = []
    arms = []
    arm_symbols = {}
    current_file = ''
    current_arm = ''
    has_file = False
    if config['collect.prefix-file']:
        prefixes = config.get_re('collect.prefix')
    else:
        prefixes = None
    if 'type' in symbols.columns:
        for symbol in symbols.itertuples():
            if symbol.type == 'FILE':
                has_file = True
                current_file = symbol.symbol
                if prefixes:
                    current_file = simplify_source(current_file, prefixes)

            elif symbol.type == 'NOTYPE':
                if symbol.symbol.startswith('$'):
                    if current_arm or symbol.symbol in ARM_SPECIAL_SYMBOLS:
                        current_arm = symbol.symbol
                        arm_symbols[current_arm] = True
            files.append(current_file)
            arms.append(current_arm)

    if has_file:
        symbols['file'] = files
    if current_arm:
        symbols['arm'] = arms

    if has_file:
        symbols = symbols[symbols['type'] != 'FILE']
    if current_arm:
        syms = arm_symbols.keys()
        symbols = symbols[~symbols.symbol.isin(syms)]
    return symbols
Example #3
0
def read_symbols(config: Config, filename: str) -> SymbolDF:
    """Read a binary's symbol map using bloaty."""
    column_map = {
        'compileunits': 'cu',
        'sections': 'section',
        'symbols': 'symbol',
        'vmsize': 'size',
    }
    process = memdf.util.subprocess.run_tool_pipe(config, [
        'bloaty', '--tsv', '--demangle=none', '-n', '0', '-d',
        'compileunits,sections,symbols', filename
    ])
    if not process or not process.stdout:
        return SymbolDF()
    df = pd.read_table(io.TextIOWrapper(process.stdout, newline=os.linesep),
                       usecols=list(column_map.keys()),
                       dtype=SymbolDF.dtype,
                       na_filter=False)
    df.rename(inplace=True, columns=column_map)
    prefixes = config.get_re('collect.prefix')
    df['cu'] = df['cu'].apply(lambda s: simplify_source(s, prefixes))
    return df
Example #4
0
def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs:
    """Account for space not used by any symbol, or by multiple symbols."""

    # These symbols mark the start or end of unused space.
    start_unused = frozenset(config.get('symbol.free.start', []))
    end_unused = frozenset(config.get('symbol.free.end', []))

    extent_columns = ['address', 'size', 'section', 'file']
    need_cu = 'cu' in symbols.columns
    if need_cu:
        extent_columns.append('cu')
    need_input = 'input' in symbols.columns
    if need_input:
        extent_columns.append('input')
    columns = ['symbol', *extent_columns, 'type', 'bind']

    def filler(name, address, size, previous, current) -> List:
        row = [
            name,  # symbol
            address,  # address
            size,  # size
            (previous.section if previous else
             current.section if current else memdf.name.UNDEF),  # section
            (previous.file
             if previous else current.file if current else ''),  # file
        ]
        if need_cu:
            row.append(
                previous.cu if previous else current.cu if current else '')
        if need_input:
            row.append(previous.input if previous else current.
                       input if current else '')
        row.append('NOTYPE')  # type
        row.append('LOCAL')  # bind
        return row

    def fill_gap(previous, current, from_address,
                 to_address) -> Tuple[str, List]:
        """Add a row for a unaccounted gap or unused space."""
        size = to_address - from_address
        if (previous is None or previous.symbol in start_unused
                or current.symbol in end_unused):
            use = 'unused'
            name = memdf.name.unused(from_address, size)
        else:
            use = 'gap'
            name = memdf.name.gap(from_address, size)
        return (use, filler(name, from_address, size, previous, current))

    def fill_overlap(previous, current, from_address,
                     to_address) -> Tuple[str, List]:
        """Add a row for overlap."""
        size = to_address - from_address
        return ('overlap',
                filler(memdf.name.overlap(from_address, -size), from_address,
                       size, previous, current))

    # Find the address range for sections that are configured or allocated.
    config_sections = set()
    for _, s in config.get('region.sections', {}).items():
        config_sections |= set(s)
    section_to_range = {}
    start_to_section = {}
    section_starts = [0]
    for s in sections.itertuples():
        if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)):
            section_to_range[s.section] = range(s.address, s.address + s.size)
            start_to_section[s.address] = s.section
            section_starts.append(s.address)
    section_starts.sort()

    new_symbols: Dict[str, List[list]] = {
        'gap': [],
        'unused': [],
        'overlap': []
    }
    section_range = None
    previous_symbol = None
    current_address = 0
    iterable_symbols = symbols.loc[(symbols.type != 'SECTION')
                                   & (symbols.type != 'FILE')
                                   & symbols.section.isin(section_to_range)]
    iterable_symbols = iterable_symbols.sort_values(by='address')

    for symbol in iterable_symbols.itertuples():
        if not previous_symbol or symbol.section != previous_symbol.section:
            # We sometimes see symbols that have the value of their section end
            # address (so they are not actually within the section) and have
            # the same address as a symbol in the next section.
            symbol_address_section = start_to_section.get(section_starts[
                bisect.bisect_right(section_starts, symbol.address) - 1])
            if symbol_address_section != symbol.section:
                continue
            # Starting or switching sections.
            if previous_symbol and section_range:
                # previous_symbol is the last in its section.
                if current_address < section_range[-1] + 1:
                    use, row = fill_gap(previous_symbol, previous_symbol,
                                        current_address, section_range[-1] + 1)
                    new_symbols[use].append(row)
            # Start of section.
            previous_symbol = None
            section_range = section_to_range.get(symbol.section)
            if section_range:
                current_address = section_range[0]
        if section_range:
            if current_address < symbol.address:
                use, row = fill_gap(previous_symbol, symbol, current_address,
                                    symbol.address)
                new_symbols[use].append(row)
            elif current_address > symbol.address:
                use, row = fill_overlap(previous_symbol, symbol,
                                        current_address, symbol.address)
                new_symbols[use].append(row)
        current_address = symbol.address + symbol.size
        previous_symbol = symbol

    dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols}
    symbols = pd.concat([symbols, *dfs.values()]).fillna('')
    symbols.sort_values(by='address', inplace=True)
    for k in dfs:
        dfs[k] = ExtentDF(dfs[k][extent_columns])
        dfs[k].attrs['name'] = k
    dfs[SymbolDF.name] = SymbolDF(symbols)
    return dfs