Example #1
0
def read_symbols(config: Config, filename: str) -> SymbolDF:
    """Read a binary's symbol map using bloaty."""
    column_map = {
        'compileunits': 'cu',
        'sections': 'section',
        'symbols': 'symbol',
        'vmsize': 'size',
    }
    process = memdf.util.subprocess.run_tool_pipe(config, [
        'bloaty', '--tsv', '--demangle=none', '-n', '0', '-d',
        'compileunits,sections,symbols', filename
    ])
    if not process or not process.stdout:
        return SymbolDF()
    df = pd.read_table(io.TextIOWrapper(process.stdout, newline=os.linesep),
                       usecols=list(column_map.keys()),
                       dtype=SymbolDF.dtype,
                       na_filter=False)
    df.rename(inplace=True, columns=column_map)
    prefixes = config.get_re('collect.prefix')
    df['cu'] = df['cu'].apply(lambda s: simplify_source(s, prefixes))
    return df
Example #2
0
def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs:
    """Account for space not used by any symbol, or by multiple symbols."""

    # These symbols mark the start or end of unused space.
    start_unused = frozenset(config.get('symbol.free.start', []))
    end_unused = frozenset(config.get('symbol.free.end', []))

    extent_columns = ['address', 'size', 'section', 'file']
    need_cu = 'cu' in symbols.columns
    if need_cu:
        extent_columns.append('cu')
    need_input = 'input' in symbols.columns
    if need_input:
        extent_columns.append('input')
    columns = ['symbol', *extent_columns, 'type', 'bind']

    def filler(name, address, size, previous, current) -> List:
        row = [
            name,  # symbol
            address,  # address
            size,  # size
            (previous.section if previous else
             current.section if current else memdf.name.UNDEF),  # section
            (previous.file
             if previous else current.file if current else ''),  # file
        ]
        if need_cu:
            row.append(
                previous.cu if previous else current.cu if current else '')
        if need_input:
            row.append(previous.input if previous else current.
                       input if current else '')
        row.append('NOTYPE')  # type
        row.append('LOCAL')  # bind
        return row

    def fill_gap(previous, current, from_address,
                 to_address) -> Tuple[str, List]:
        """Add a row for a unaccounted gap or unused space."""
        size = to_address - from_address
        if (previous is None or previous.symbol in start_unused
                or current.symbol in end_unused):
            use = 'unused'
            name = memdf.name.unused(from_address, size)
        else:
            use = 'gap'
            name = memdf.name.gap(from_address, size)
        return (use, filler(name, from_address, size, previous, current))

    def fill_overlap(previous, current, from_address,
                     to_address) -> Tuple[str, List]:
        """Add a row for overlap."""
        size = to_address - from_address
        return ('overlap',
                filler(memdf.name.overlap(from_address, -size), from_address,
                       size, previous, current))

    # Find the address range for sections that are configured or allocated.
    config_sections = set()
    for _, s in config.get('region.sections', {}).items():
        config_sections |= set(s)
    section_to_range = {}
    start_to_section = {}
    section_starts = [0]
    for s in sections.itertuples():
        if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)):
            section_to_range[s.section] = range(s.address, s.address + s.size)
            start_to_section[s.address] = s.section
            section_starts.append(s.address)
    section_starts.sort()

    new_symbols: Dict[str, List[list]] = {
        'gap': [],
        'unused': [],
        'overlap': []
    }
    section_range = None
    previous_symbol = None
    current_address = 0
    iterable_symbols = symbols.loc[(symbols.type != 'SECTION')
                                   & (symbols.type != 'FILE')
                                   & symbols.section.isin(section_to_range)]
    iterable_symbols = iterable_symbols.sort_values(by='address')

    for symbol in iterable_symbols.itertuples():
        if not previous_symbol or symbol.section != previous_symbol.section:
            # We sometimes see symbols that have the value of their section end
            # address (so they are not actually within the section) and have
            # the same address as a symbol in the next section.
            symbol_address_section = start_to_section.get(section_starts[
                bisect.bisect_right(section_starts, symbol.address) - 1])
            if symbol_address_section != symbol.section:
                continue
            # Starting or switching sections.
            if previous_symbol and section_range:
                # previous_symbol is the last in its section.
                if current_address < section_range[-1] + 1:
                    use, row = fill_gap(previous_symbol, previous_symbol,
                                        current_address, section_range[-1] + 1)
                    new_symbols[use].append(row)
            # Start of section.
            previous_symbol = None
            section_range = section_to_range.get(symbol.section)
            if section_range:
                current_address = section_range[0]
        if section_range:
            if current_address < symbol.address:
                use, row = fill_gap(previous_symbol, symbol, current_address,
                                    symbol.address)
                new_symbols[use].append(row)
            elif current_address > symbol.address:
                use, row = fill_overlap(previous_symbol, symbol,
                                        current_address, symbol.address)
                new_symbols[use].append(row)
        current_address = symbol.address + symbol.size
        previous_symbol = symbol

    dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols}
    symbols = pd.concat([symbols, *dfs.values()]).fillna('')
    symbols.sort_values(by='address', inplace=True)
    for k in dfs:
        dfs[k] = ExtentDF(dfs[k][extent_columns])
        dfs[k].attrs['name'] = k
    dfs[SymbolDF.name] = SymbolDF(symbols)
    return dfs