Exemple #1
0
def main(argv):
    status = 0
    try:

        config = Config().init({
            **memdf.util.config.CONFIG,
            **memdf.collect.PREFIX_CONFIG,
            **memdf.collector.readelf.NM_CONFIG,
            **memdf.report.REPORT_CONFIG,
            **memdf.report.OUTPUT_CONFIG,
            **BLOCKLIST_CONFIG,
        })
        config.argparse.add_argument('inputs', metavar='FILE', nargs='+')
        config = config.parse(argv)

        block_re: Optional[Pattern] = config.get_re('symbol.block')
        if block_re is None:
            logging.warning('No block list')
        else:
            frames = []
            for filename in config.get('args.inputs', []):
                ssdf = memdf.collector.readelf.read_sources(config, filename)
                frames.append(ssdf[ssdf.kind == 'U'])
            ssdf = pd.concat(frames)
            ssdf = ssdf[ssdf.symbol.str.fullmatch(block_re)]
            memdf.report.write_dfs(config, {'Symbols': ssdf})
    except Exception as exception:
        raise exception

    return status
Exemple #2
0
def postprocess_collected(config: Config, dfs: DFs) -> None:
    """Postprocess tables after reading all sources."""

    # Prune tables according to configuration options. This happens before
    # fill_holes() so that space of any pruned symbols will be accounted for,
    # and to avoid unnecessary work for pruned sections.
    for c in [SymbolDF, SectionDF]:
        if c.name in dfs:
            dfs[c.name] = memdf.select.select_configured(
                config, dfs[c.name], memdf.select.COLLECTED_CHOICES)

    # Account for space not used by any symbol, or by multiple symbols.
    if (SymbolDF.name in dfs and SectionDF.name in dfs
            and config.get('args.fill_holes', True)):
        dfs.update(fill_holes(config, dfs[SymbolDF.name], dfs[SectionDF.name]))

    # Create synthetic columns (e.g. 'region') and prune tables
    # according to their configuration. This happens after fill_holes()
    # so that synthetic column values will be created for the gap symbols.
    for c in [SymbolDF, SectionDF]:
        if c.name in dfs:
            for column in memdf.select.SYNTHETIC_CHOICES:
                dfs[c.name] = memdf.select.synthesize_column(
                    config, dfs[c.name], column)
                dfs[c.name] = memdf.select.select_configured_column(
                    config, dfs[c.name], column)

    for df in dfs.values():
        if demangle := set((c for c in df.columns if c.endswith('symbol'))):
            df.attrs['demangle'] = demangle
        if hexify := set((c for c in df.columns if c.endswith('address'))):
            df.attrs['hexify'] = hexify
 def __init__(self, config: Config):
     self.config = config
     self.gh = Gh(config)
     db_file = config.get('database.file', ':memory:')
     self.db = memdf.sizedb.SizeDatabase(db_file,
                                         not config['database.readonly'])
     self.db.open()
Exemple #4
0
def collect_files(config: Config,
                  files: Optional[List[str]] = None,
                  method: Optional[str] = None) -> DFs:
    """Read a filtered memory map from a set of files."""
    filenames = files if files else config.get('args.inputs', [])
    if method is None:
        method = config.get('collect.method', 'csv')
    frames: Dict[str, List[DF]] = {}
    for filename in filenames:
        dfs: DFs = FILE_READERS[method](config, filename, method)
        postprocess_file(config, dfs)
        for k, frame in dfs.items():
            if k not in frames:
                frames[k] = []
            frames[k].append(frame)
    dfs = {}
    for k, v in frames.items():
        dfs[k] = pd.concat(v, ignore_index=True)
    postprocess_collected(config, dfs)
    return dfs
Exemple #5
0
def postprocess_selections(config: Config, key: str, info: Mapping) -> None:
    """Resolve select/ignore command options."""
    split_size(config, key)
    choice, select = key.split('.')
    assert select == 'select'
    selections = config.get(key)
    if not config.getl([choice, 'ignore-all'], False):
        if defaults := config.getl([choice, 'default']):
            for i in config.getl([choice, 'ignore']):
                if i in defaults:
                    defaults.remove(i)
            selections += defaults
def gh_open(config: Config) -> Optional[ghapi.core.GhApi]:
    """Return a GhApi, if so configured."""
    gh: Optional[ghapi.core.GhApi] = None
    if config['github.repository']:
        owner, repo = config.get('github.repository').split('/', 1)
        config.put('github.owner', owner)
        config.put('github.repo', repo)
        if not config['github.token']:
            config['github.token'] = os.environ.get('GITHUB_TOKEN')
            if not config['github.token']:
                logging.error('Missing --github-token')
                return None
        token = config['github.token']
        if token != 'SKIP':
            gh = ghapi.all.GhApi(owner=owner,
                                 repo=repo,
                                 token=config['github.token'])
    return gh
def main(argv):
    status = 0
    try:
        cfg = {
            **memdf.util.config.CONFIG,
            **memdf.util.sqlite.CONFIG,
            **memdf.report.OUTPUT_CONFIG,
            **QUERY_CONFIG,
        }
        cfg['database.file']['argparse']['required'] = True

        config = Config().init(cfg)
        config.parse(argv)

        db = SizeDatabase(config['database.file'], writable=False)
        db.open()

        dfs = {}

        q = 0
        for title, key, values, info in config.get('queries', []):
            q += 1
            query = make_query(config, info)
            logging.debug('Option: %s', key)
            logging.debug('Title: %s', title)
            logging.debug('Query: %s', query.strip())
            logging.debug('With: %s', values)
            cur = db.execute(query, values)
            columns = [i[0] for i in cur.description]
            rows = cur.fetchall()
            if rows:
                df = pd.DataFrame(rows, columns=columns)
                df.attrs = {'name': f'query{q}', 'title': title}
                for f in info['sql'].get('postprocess', []):
                    df = f(config, df)
                dfs[df.attrs['name']] = df

        if build := config['query.build-sizes']:
            q += 1
            if (df := query_build_sizes(config, db, build)) is not None:
                dfs[df.attrs['name']] = df
Exemple #8
0
def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs:
    """Account for space not used by any symbol, or by multiple symbols."""

    # These symbols mark the start or end of unused space.
    start_unused = frozenset(config.get('symbol.free.start', []))
    end_unused = frozenset(config.get('symbol.free.end', []))

    extent_columns = ['address', 'size', 'section', 'file']
    need_cu = 'cu' in symbols.columns
    if need_cu:
        extent_columns.append('cu')
    need_input = 'input' in symbols.columns
    if need_input:
        extent_columns.append('input')
    columns = ['symbol', *extent_columns, 'type', 'bind']

    def filler(name, address, size, previous, current) -> List:
        row = [
            name,  # symbol
            address,  # address
            size,  # size
            (previous.section if previous else
             current.section if current else memdf.name.UNDEF),  # section
            (previous.file
             if previous else current.file if current else ''),  # file
        ]
        if need_cu:
            row.append(
                previous.cu if previous else current.cu if current else '')
        if need_input:
            row.append(previous.input if previous else current.
                       input if current else '')
        row.append('NOTYPE')  # type
        row.append('LOCAL')  # bind
        return row

    def fill_gap(previous, current, from_address,
                 to_address) -> Tuple[str, List]:
        """Add a row for a unaccounted gap or unused space."""
        size = to_address - from_address
        if (previous is None or previous.symbol in start_unused
                or current.symbol in end_unused):
            use = 'unused'
            name = memdf.name.unused(from_address, size)
        else:
            use = 'gap'
            name = memdf.name.gap(from_address, size)
        return (use, filler(name, from_address, size, previous, current))

    def fill_overlap(previous, current, from_address,
                     to_address) -> Tuple[str, List]:
        """Add a row for overlap."""
        size = to_address - from_address
        return ('overlap',
                filler(memdf.name.overlap(from_address, -size), from_address,
                       size, previous, current))

    # Find the address range for sections that are configured or allocated.
    config_sections = set()
    for _, s in config.get('region.sections', {}).items():
        config_sections |= set(s)
    section_to_range = {}
    start_to_section = {}
    section_starts = [0]
    for s in sections.itertuples():
        if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)):
            section_to_range[s.section] = range(s.address, s.address + s.size)
            start_to_section[s.address] = s.section
            section_starts.append(s.address)
    section_starts.sort()

    new_symbols: Dict[str, List[list]] = {
        'gap': [],
        'unused': [],
        'overlap': []
    }
    section_range = None
    previous_symbol = None
    current_address = 0
    iterable_symbols = symbols.loc[(symbols.type != 'SECTION')
                                   & (symbols.type != 'FILE')
                                   & symbols.section.isin(section_to_range)]
    iterable_symbols = iterable_symbols.sort_values(by='address')

    for symbol in iterable_symbols.itertuples():
        if not previous_symbol or symbol.section != previous_symbol.section:
            # We sometimes see symbols that have the value of their section end
            # address (so they are not actually within the section) and have
            # the same address as a symbol in the next section.
            symbol_address_section = start_to_section.get(section_starts[
                bisect.bisect_right(section_starts, symbol.address) - 1])
            if symbol_address_section != symbol.section:
                continue
            # Starting or switching sections.
            if previous_symbol and section_range:
                # previous_symbol is the last in its section.
                if current_address < section_range[-1] + 1:
                    use, row = fill_gap(previous_symbol, previous_symbol,
                                        current_address, section_range[-1] + 1)
                    new_symbols[use].append(row)
            # Start of section.
            previous_symbol = None
            section_range = section_to_range.get(symbol.section)
            if section_range:
                current_address = section_range[0]
        if section_range:
            if current_address < symbol.address:
                use, row = fill_gap(previous_symbol, symbol, current_address,
                                    symbol.address)
                new_symbols[use].append(row)
            elif current_address > symbol.address:
                use, row = fill_overlap(previous_symbol, symbol,
                                        current_address, symbol.address)
                new_symbols[use].append(row)
        current_address = symbol.address + symbol.size
        previous_symbol = symbol

    dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols}
    symbols = pd.concat([symbols, *dfs.values()]).fillna('')
    symbols.sort_values(by='address', inplace=True)
    for k in dfs:
        dfs[k] = ExtentDF(dfs[k][extent_columns])
        dfs[k].attrs['name'] = k
    dfs[SymbolDF.name] = SymbolDF(symbols)
    return dfs
Exemple #9
0
def get_limit(config: Config, column: str, name: str) -> int:
    return config.getl([column, 'limit', name], config.get('report.limit', 0))
Exemple #10
0
def synthesize_region(config: Config, df: DF, column: str) -> DF:
    """Add a 'region' column derived from the 'section' column."""
    cmap = config.transpose_dictlist(config.get('region.sections', {}))
    memdf.util.pretty.debug(cmap)
    df[column] = df['section'].map(lambda x: cmap.get(x, memdf.name.UNKNOWN))
    return df
def make_query(config: Config, info: Mapping) -> str:
    """Construct an SQL query string for a simple SQL query option."""
    args = {'where': '', 'order': '', 'limit': ''}
    if where := config.get('query.where'):
        if kw := info['sql'].get('where'):
            args['where'] = f'{kw} {where}'
        title = title.format(**args)
    else:
        values = tuple()

    if config['queries'] is None:
        config['queries'] = []
    cast(list, config['queries']).append((title, key, values, info))


def make_query(config: Config, info: Mapping) -> str:
    """Construct an SQL query string for a simple SQL query option."""
    args = {'where': '', 'order': '', 'limit': ''}
    if where := config.get('query.where'):
        if kw := info['sql'].get('where'):
            args['where'] = f'{kw} {where}'
    if order := (config.get('query.order') or info['sql'].get('order')):
        args['order'] = f'ORDER BY {order}'
    if limit := config.get('query.limit'):
        args['limit'] = f'LIMIT {limit}'
    return info['sql']['query'].format(**args)


def postprocess_df_time(_config: Config, df: pd.DataFrame) -> pd.DataFrame:
    """Convert a DataFrame ‘time’ column from Unix timestamp to ISO."""
    df['time'] = df['time'].map(
        lambda t: datetime.datetime.utcfromtimestamp(t).isoformat())
    return df


def postprocess_df_changes(config: Config, df: pd.DataFrame) -> pd.DataFrame:
    """Given ‘parent_size’and ‘commit_size’ columns, add change columns."""