def main(argv): status = 0 try: config = Config().init({ **memdf.util.config.CONFIG, **memdf.collect.PREFIX_CONFIG, **memdf.collector.readelf.NM_CONFIG, **memdf.report.REPORT_CONFIG, **memdf.report.OUTPUT_CONFIG, **BLOCKLIST_CONFIG, }) config.argparse.add_argument('inputs', metavar='FILE', nargs='+') config = config.parse(argv) block_re: Optional[Pattern] = config.get_re('symbol.block') if block_re is None: logging.warning('No block list') else: frames = [] for filename in config.get('args.inputs', []): ssdf = memdf.collector.readelf.read_sources(config, filename) frames.append(ssdf[ssdf.kind == 'U']) ssdf = pd.concat(frames) ssdf = ssdf[ssdf.symbol.str.fullmatch(block_re)] memdf.report.write_dfs(config, {'Symbols': ssdf}) except Exception as exception: raise exception return status
def postprocess_collected(config: Config, dfs: DFs) -> None: """Postprocess tables after reading all sources.""" # Prune tables according to configuration options. This happens before # fill_holes() so that space of any pruned symbols will be accounted for, # and to avoid unnecessary work for pruned sections. for c in [SymbolDF, SectionDF]: if c.name in dfs: dfs[c.name] = memdf.select.select_configured( config, dfs[c.name], memdf.select.COLLECTED_CHOICES) # Account for space not used by any symbol, or by multiple symbols. if (SymbolDF.name in dfs and SectionDF.name in dfs and config.get('args.fill_holes', True)): dfs.update(fill_holes(config, dfs[SymbolDF.name], dfs[SectionDF.name])) # Create synthetic columns (e.g. 'region') and prune tables # according to their configuration. This happens after fill_holes() # so that synthetic column values will be created for the gap symbols. for c in [SymbolDF, SectionDF]: if c.name in dfs: for column in memdf.select.SYNTHETIC_CHOICES: dfs[c.name] = memdf.select.synthesize_column( config, dfs[c.name], column) dfs[c.name] = memdf.select.select_configured_column( config, dfs[c.name], column) for df in dfs.values(): if demangle := set((c for c in df.columns if c.endswith('symbol'))): df.attrs['demangle'] = demangle if hexify := set((c for c in df.columns if c.endswith('address'))): df.attrs['hexify'] = hexify
def __init__(self, config: Config): self.config = config self.gh = Gh(config) db_file = config.get('database.file', ':memory:') self.db = memdf.sizedb.SizeDatabase(db_file, not config['database.readonly']) self.db.open()
def collect_files(config: Config, files: Optional[List[str]] = None, method: Optional[str] = None) -> DFs: """Read a filtered memory map from a set of files.""" filenames = files if files else config.get('args.inputs', []) if method is None: method = config.get('collect.method', 'csv') frames: Dict[str, List[DF]] = {} for filename in filenames: dfs: DFs = FILE_READERS[method](config, filename, method) postprocess_file(config, dfs) for k, frame in dfs.items(): if k not in frames: frames[k] = [] frames[k].append(frame) dfs = {} for k, v in frames.items(): dfs[k] = pd.concat(v, ignore_index=True) postprocess_collected(config, dfs) return dfs
def postprocess_selections(config: Config, key: str, info: Mapping) -> None: """Resolve select/ignore command options.""" split_size(config, key) choice, select = key.split('.') assert select == 'select' selections = config.get(key) if not config.getl([choice, 'ignore-all'], False): if defaults := config.getl([choice, 'default']): for i in config.getl([choice, 'ignore']): if i in defaults: defaults.remove(i) selections += defaults
def gh_open(config: Config) -> Optional[ghapi.core.GhApi]: """Return a GhApi, if so configured.""" gh: Optional[ghapi.core.GhApi] = None if config['github.repository']: owner, repo = config.get('github.repository').split('/', 1) config.put('github.owner', owner) config.put('github.repo', repo) if not config['github.token']: config['github.token'] = os.environ.get('GITHUB_TOKEN') if not config['github.token']: logging.error('Missing --github-token') return None token = config['github.token'] if token != 'SKIP': gh = ghapi.all.GhApi(owner=owner, repo=repo, token=config['github.token']) return gh
def main(argv): status = 0 try: cfg = { **memdf.util.config.CONFIG, **memdf.util.sqlite.CONFIG, **memdf.report.OUTPUT_CONFIG, **QUERY_CONFIG, } cfg['database.file']['argparse']['required'] = True config = Config().init(cfg) config.parse(argv) db = SizeDatabase(config['database.file'], writable=False) db.open() dfs = {} q = 0 for title, key, values, info in config.get('queries', []): q += 1 query = make_query(config, info) logging.debug('Option: %s', key) logging.debug('Title: %s', title) logging.debug('Query: %s', query.strip()) logging.debug('With: %s', values) cur = db.execute(query, values) columns = [i[0] for i in cur.description] rows = cur.fetchall() if rows: df = pd.DataFrame(rows, columns=columns) df.attrs = {'name': f'query{q}', 'title': title} for f in info['sql'].get('postprocess', []): df = f(config, df) dfs[df.attrs['name']] = df if build := config['query.build-sizes']: q += 1 if (df := query_build_sizes(config, db, build)) is not None: dfs[df.attrs['name']] = df
def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs: """Account for space not used by any symbol, or by multiple symbols.""" # These symbols mark the start or end of unused space. start_unused = frozenset(config.get('symbol.free.start', [])) end_unused = frozenset(config.get('symbol.free.end', [])) extent_columns = ['address', 'size', 'section', 'file'] need_cu = 'cu' in symbols.columns if need_cu: extent_columns.append('cu') need_input = 'input' in symbols.columns if need_input: extent_columns.append('input') columns = ['symbol', *extent_columns, 'type', 'bind'] def filler(name, address, size, previous, current) -> List: row = [ name, # symbol address, # address size, # size (previous.section if previous else current.section if current else memdf.name.UNDEF), # section (previous.file if previous else current.file if current else ''), # file ] if need_cu: row.append( previous.cu if previous else current.cu if current else '') if need_input: row.append(previous.input if previous else current. input if current else '') row.append('NOTYPE') # type row.append('LOCAL') # bind return row def fill_gap(previous, current, from_address, to_address) -> Tuple[str, List]: """Add a row for a unaccounted gap or unused space.""" size = to_address - from_address if (previous is None or previous.symbol in start_unused or current.symbol in end_unused): use = 'unused' name = memdf.name.unused(from_address, size) else: use = 'gap' name = memdf.name.gap(from_address, size) return (use, filler(name, from_address, size, previous, current)) def fill_overlap(previous, current, from_address, to_address) -> Tuple[str, List]: """Add a row for overlap.""" size = to_address - from_address return ('overlap', filler(memdf.name.overlap(from_address, -size), from_address, size, previous, current)) # Find the address range for sections that are configured or allocated. config_sections = set() for _, s in config.get('region.sections', {}).items(): config_sections |= set(s) section_to_range = {} start_to_section = {} section_starts = [0] for s in sections.itertuples(): if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)): section_to_range[s.section] = range(s.address, s.address + s.size) start_to_section[s.address] = s.section section_starts.append(s.address) section_starts.sort() new_symbols: Dict[str, List[list]] = { 'gap': [], 'unused': [], 'overlap': [] } section_range = None previous_symbol = None current_address = 0 iterable_symbols = symbols.loc[(symbols.type != 'SECTION') & (symbols.type != 'FILE') & symbols.section.isin(section_to_range)] iterable_symbols = iterable_symbols.sort_values(by='address') for symbol in iterable_symbols.itertuples(): if not previous_symbol or symbol.section != previous_symbol.section: # We sometimes see symbols that have the value of their section end # address (so they are not actually within the section) and have # the same address as a symbol in the next section. symbol_address_section = start_to_section.get(section_starts[ bisect.bisect_right(section_starts, symbol.address) - 1]) if symbol_address_section != symbol.section: continue # Starting or switching sections. if previous_symbol and section_range: # previous_symbol is the last in its section. if current_address < section_range[-1] + 1: use, row = fill_gap(previous_symbol, previous_symbol, current_address, section_range[-1] + 1) new_symbols[use].append(row) # Start of section. previous_symbol = None section_range = section_to_range.get(symbol.section) if section_range: current_address = section_range[0] if section_range: if current_address < symbol.address: use, row = fill_gap(previous_symbol, symbol, current_address, symbol.address) new_symbols[use].append(row) elif current_address > symbol.address: use, row = fill_overlap(previous_symbol, symbol, current_address, symbol.address) new_symbols[use].append(row) current_address = symbol.address + symbol.size previous_symbol = symbol dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols} symbols = pd.concat([symbols, *dfs.values()]).fillna('') symbols.sort_values(by='address', inplace=True) for k in dfs: dfs[k] = ExtentDF(dfs[k][extent_columns]) dfs[k].attrs['name'] = k dfs[SymbolDF.name] = SymbolDF(symbols) return dfs
def get_limit(config: Config, column: str, name: str) -> int: return config.getl([column, 'limit', name], config.get('report.limit', 0))
def synthesize_region(config: Config, df: DF, column: str) -> DF: """Add a 'region' column derived from the 'section' column.""" cmap = config.transpose_dictlist(config.get('region.sections', {})) memdf.util.pretty.debug(cmap) df[column] = df['section'].map(lambda x: cmap.get(x, memdf.name.UNKNOWN)) return df
def make_query(config: Config, info: Mapping) -> str: """Construct an SQL query string for a simple SQL query option.""" args = {'where': '', 'order': '', 'limit': ''} if where := config.get('query.where'): if kw := info['sql'].get('where'): args['where'] = f'{kw} {where}'
title = title.format(**args) else: values = tuple() if config['queries'] is None: config['queries'] = [] cast(list, config['queries']).append((title, key, values, info)) def make_query(config: Config, info: Mapping) -> str: """Construct an SQL query string for a simple SQL query option.""" args = {'where': '', 'order': '', 'limit': ''} if where := config.get('query.where'): if kw := info['sql'].get('where'): args['where'] = f'{kw} {where}' if order := (config.get('query.order') or info['sql'].get('order')): args['order'] = f'ORDER BY {order}' if limit := config.get('query.limit'): args['limit'] = f'LIMIT {limit}' return info['sql']['query'].format(**args) def postprocess_df_time(_config: Config, df: pd.DataFrame) -> pd.DataFrame: """Convert a DataFrame ‘time’ column from Unix timestamp to ISO.""" df['time'] = df['time'].map( lambda t: datetime.datetime.utcfromtimestamp(t).isoformat()) return df def postprocess_df_changes(config: Config, df: pd.DataFrame) -> pd.DataFrame: """Given ‘parent_size’and ‘commit_size’ columns, add change columns."""