def main(argv): status = 0 try: config = Config().init(CONFIG) config.argparse.add_argument('inputs', metavar='FILE', nargs=2) config.parse(argv) config['args.fill_holes'] = False inputs = config.get('args.inputs') a_dfs = memdf.collect.collect_files(config, files=[inputs[0]]) b_dfs = memdf.collect.collect_files(config, files=[inputs[1]]) a_syms = a_dfs[SymbolDF.name].sort_values(by='symbol', ignore_index=True) b_syms = b_dfs[SymbolDF.name].sort_values(by='symbol', ignore_index=True) # TBD: Differences other than size, configurably. differences = [] ai = a_syms.itertuples() bi = b_syms.itertuples() a = next(ai, None) b = next(bi, None) while a and b: if a.symbol < b.symbol: differences.append((-a.size, a.size, 0, a.symbol)) a = next(ai, None) continue if a.symbol > b.symbol: differences.append((b.size, 0, b.size, b.symbol)) b = next(bi, None) continue if a.size != b.size: differences.append((b.size - a.size, a.size, b.size, a.symbol)) a = next(ai, None) b = next(bi, None) for a in ai: differences.append((-a.size, a.size, 0, a.symbol)) for b in bi: differences.append((b.size, 0, b.size, b.symbol)) df = pd.DataFrame(differences, columns=['change', 'a-size', 'b-size', 'symbol']) if config['report.demangle']: # Demangle early to sort by demangled name. df['symbol'] = df['symbol'].apply(memdf.report.demangle) config['report.demangle'] = False df.sort_values(by=['change', 'symbol'], ascending=[False, True], inplace=True) memdf.report.write_dfs(config, {'Differences': df}) except Exception as exception: status = 1 raise exception return status
def postprocess_config(config: Config, _key: str, _info: Mapping) -> None: """Postprocess --github-repository.""" if config['github.repository']: owner, repo = config.get('github.repository').split('/', 1) config.put('github.owner', owner) config.put('github.repo', repo) if not config['github.token']: config['github.token'] = os.environ.get('GITHUB_TOKEN') if not config['github.token']: logging.error('Missing --github-token')
def postprocess_output_metadata(config: Config, key: str) -> None: """For --output-metadata=KEY:VALUE list, convert to dictionary.""" assert key == 'output.metadata' metadata = {} for s in config.get(key): if ':' in s: k, v = s.split(':', 1) else: k, v = s, True metadata[k] = v config.put(key, metadata)
def main(argv): status = 0 try: config = Config().init(CONFIG) config.argparse.add_argument('inputs', metavar='FILE', nargs='+') config.parse(argv) config['collect.method'] = 'elftools' config['args.tag_inputs'] = True dfs = memdf.collect.collect_files(config) elf = {} for filename in config['args.inputs']: elf[filename] = { 'elffile': ELFFile(open(filename, 'rb')), 'section': {}, 'data': {}, 'limit': {}, } with memdf.report.open_output(config) as fp: for i in dfs['gap'].itertuples(): e = elf[i.input] if i.section in e['section']: section = e['section'][i.section] data = e['data'][i.section] limit = e['limit'][i.section] else: section = e['elffile'].get_section_by_name(i.section) data = section.data() limit = memdf.select.get_limit(config, 'section', i.section) e['section'][i.section] = section e['data'][i.section] = data e['limit'][i.section] = limit if limit and i.size < limit: continue offset = i.address - section['sh_addr'] assert section['sh_size'] == len(data) print('\n{:08X} length {} in section {} of {}'.format( i.address, i.size, i.section, i.input), file=fp) for i in hexdump(data, offset, i.size, i.address): print(i, file=fp) except Exception as exception: status = 1 raise exception return status
def main(argv): status = 0 try: config = Config().init({ **memdf.util.config.CONFIG, **memdf.util.sqlite.CONFIG, **memdf.report.OUTPUT_CONFIG, **GITHUB_CONFIG, }) config.argparse.add_argument('inputs', metavar='FILE', nargs='*') config.parse(argv) dfs = {} with SizeDatabase(config) as db: db.read_inputs() dfs.update(report_matching_commits(db)) dfs.update(report_queries(db)) memdf.report.write_dfs(config, dfs, hierify=True, title=True, floatfmt='5.1f') except Exception as exception: raise exception return status
def select_and_ignore_config_desc(key: str) -> ConfigDescription: return { Config.group_map(key): { 'group': 'select' }, f'{key}.select': { 'help': f'{key.capitalize()}(s) to process; otherwise all not ignored', 'metavar': 'NAME', 'default': [], 'argparse': { 'alias': [f'--{key}'], }, 'postprocess': postprocess_selections }, f'{key}.select-all': { 'help': f'Select all {key}s', 'default': False, }, key + '.ignore': { 'help': f'{key.capitalize()}(s) to ignore', 'metavar': 'NAME', 'default': [], }, f'{key}.ignore-all': { 'help': f'Ignore all {key}s unless explicitly selected', 'default': False, }, }
def main(argv): status = 0 try: config = Config().init({ **memdf.util.config.CONFIG, **memdf.util.github.CONFIG, **memdf.util.sqlite.CONFIG, **memdf.report.OUTPUT_CONFIG, **GITHUB_CONFIG, **DB_CONFIG, **REPORT_CONFIG, }) config.argparse.add_argument('inputs', metavar='FILE', nargs='*') config.parse(argv) szc = SizeContext(config) szc.read_inputs() dfs = szc.report_matching_commits() memdf.report.write_dfs(config, dfs, hierify=True, title=True, floatfmt='5.1f') except Exception as exception: raise exception return status
def main(argv): status = 0 try: config = Config().init({ **memdf.util.config.CONFIG, **memdf.collect.PREFIX_CONFIG, **memdf.collector.readelf.NM_CONFIG, **memdf.report.REPORT_CONFIG, **memdf.report.OUTPUT_CONFIG, **BLOCKLIST_CONFIG, }) config.argparse.add_argument('inputs', metavar='FILE', nargs='+') config = config.parse(argv) block_re: Optional[Pattern] = config.get_re('symbol.block') if block_re is None: logging.warning('No block list') else: frames = [] for filename in config.get('args.inputs', []): ssdf = memdf.collector.readelf.read_sources(config, filename) frames.append(ssdf[ssdf.kind == 'U']) ssdf = pd.concat(frames) ssdf = ssdf[ssdf.symbol.str.fullmatch(block_re)] memdf.report.write_dfs(config, {'Symbols': ssdf}) except Exception as exception: raise exception return status
def parse_args(config_desc: Mapping, argv: Sequence[str]) -> Config: """Common argument parsing for collection tools.""" config = Config().init({ **memdf.util.config.CONFIG, **CONFIG, **config_desc }) config.argparse.add_argument('inputs', metavar='FILE', nargs='+') return config.parse(argv)
def main(argv): status = 0 try: config = Config().init(CONFIG) config.argparse.add_argument('inputs', metavar='FILE', nargs=2) config.parse(argv) config['args.fill_holes'] = False inputs = config.get('args.inputs') a_dfs = memdf.collect.collect_files(config, files=[inputs[0]]) b_dfs = memdf.collect.collect_files(config, files=[inputs[1]]) a_syms = a_dfs[SymbolDF.name].sort_values(by='symbol') b_syms = b_dfs[SymbolDF.name].sort_values(by='symbol') # TBD: Differences other than size, configurably. differences = [] ai = a_syms.itertuples() bi = b_syms.itertuples() while True: if (a := next(ai, None)) is None: break if (b := next(bi, None)) is None: differences.append((a.symbol, a.size, None)) break if a.symbol < b.symbol: differences.append((a.symbol, a.size, None)) a = next(ai, None) continue if a.symbol > b.symbol: differences.append((b.symbol, None, b.size)) b = next(bi, None) continue if a.size != b.size: differences.append((a.symbol, a.size, b.size))
def main(argv): status = 0 try: sqlite_config = memdf.util.sqlite.CONFIG sqlite_config['database.file']['argparse']['required'] = True config = Config().init({ **memdf.util.config.CONFIG, **memdf.util.github.CONFIG, **sqlite_config, **GITHUB_CONFIG, }) config.argparse.add_argument('inputs', metavar='FILE', nargs='*') config.parse(argv) db = memdf.sizedb.SizeDatabase(config['database.file']).open() if gh := Gh(config): artifact_limit = config['github.limit-artifacts'] artifacts_added = 0 events = config['github.event'] if not events: events = ['push'] for a in gh.get_size_artifacts(label=config['github.label']): if events and a.event not in events: logging.debug('Skipping %s artifact %d', a.event, a.id) continue cur = db.execute('SELECT id FROM build WHERE artifact = ?', (a.id, )) if cur.fetchone(): logging.debug('Skipping known artifact %d', a.id) continue blob = gh.download_artifact(a.id) if blob: logging.info('Adding artifact %d %s %s %s %s', a.id, a.commit[:12], a.pr, a.event, a.group) db.add_sizes_from_zipfile(io.BytesIO(blob), {'artifact': a.id}) db.commit() artifacts_added += 1 if artifact_limit and artifact_limit <= artifacts_added: break for filename in config['args.inputs']: db.add_sizes_from_file(filename) db.commit()
def main(argv): status = 0 try: cfg = { **memdf.util.config.CONFIG, **memdf.util.sqlite.CONFIG, **memdf.report.OUTPUT_CONFIG, **QUERY_CONFIG, } cfg['database.file']['argparse']['required'] = True config = Config().init(cfg) config.parse(argv) db = SizeDatabase(config['database.file'], writable=False) db.open() dfs = {} q = 0 for title, key, values, info in config.get('queries', []): q += 1 query = make_query(config, info) logging.debug('Option: %s', key) logging.debug('Title: %s', title) logging.debug('Query: %s', query.strip()) logging.debug('With: %s', values) cur = db.execute(query, values) columns = [i[0] for i in cur.description] rows = cur.fetchall() if rows: df = pd.DataFrame(rows, columns=columns) df.attrs = {'name': f'query{q}', 'title': title} for f in info['sql'].get('postprocess', []): df = f(config, df) dfs[df.attrs['name']] = df if build := config['query.build-sizes']: q += 1 if (df := query_build_sizes(config, db, build)) is not None: dfs[df.attrs['name']] = df
def read_symbols(config: Config, filename: str) -> SymbolDF: """Read a binary's symbol map using bloaty.""" column_map = { 'compileunits': 'cu', 'sections': 'section', 'symbols': 'symbol', 'vmsize': 'size', } process = memdf.util.subprocess.run_tool_pipe(config, [ 'bloaty', '--tsv', '--demangle=none', '-n', '0', '-d', 'compileunits,sections,symbols', filename ]) if not process or not process.stdout: return SymbolDF() df = pd.read_table(io.TextIOWrapper(process.stdout, newline=os.linesep), usecols=list(column_map.keys()), dtype=SymbolDF.dtype, na_filter=False) df.rename(inplace=True, columns=column_map) prefixes = config.get_re('collect.prefix') df['cu'] = df['cu'].apply(lambda s: simplify_source(s, prefixes)) return df
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """Wrapper and utility functions around sqlite3""" import sqlite3 from typing import List, Optional from memdf import Config, ConfigDescription CONFIG: ConfigDescription = { Config.group_def('database'): { 'title': 'database options', }, 'database.file': { 'help': 'Sqlite3 file', 'metavar': 'FILENAME', 'default': ':memory:', 'argparse': { 'alias': ['--db'], }, }, } class Database: """Wrapper and utility functions around sqlite3"""
from memdf.collector.util import simplify_source PREFIX_CONFIG: ConfigDescription = { 'collect.prefix': { 'help': 'Strip PATH from the beginning of source file names', 'metavar': 'PATH', 'default': [], 'argparse': { 'alias': ['--prefix', '--strip-prefix'], 'action': 'append', } }, } CONFIG: ConfigDescription = { Config.group_def('input'): { 'title': 'input options', }, Config.group_def('tool'): { 'title': 'external tool options', }, Config.group_map('collect'): { 'group': 'input' }, **memdf.collector.bloaty.CONFIG, **memdf.collector.csv.CONFIG, **memdf.collector.elftools.CONFIG, **memdf.collector.readelf.CONFIG, 'collect.method': { 'help': 'Method of input processing', 'metavar': 'METHOD',
def main(argv): status = 0 try: _, platform, config_name, target_name, binary, *args = argv except ValueError: program = pathlib.Path(argv[0]) logging.error( """ Usage: %s platform config target binary [output] [options] This is intended for use in github workflows. For other purposes, a general program for the same operations is %s/report_summary.py """, program.name, program.parent) return 1 try: config_file = pathlib.Path(platform) if config_file.is_file(): platform = config_file.stem else: config_file = (PLATFORM_CONFIG_DIR / platform).with_suffix('.cfg') output_base = f'{platform}-{config_name}-{target_name}-sizes.json' if args and not args[0].startswith('-'): out, *args = args output = pathlib.Path(out) if out.endswith('/') and not output.exists(): output.mkdir(parents=True) if output.is_dir(): output = output / output_base else: output = pathlib.Path(binary).parent / output_base config = Config().init({ **memdf.util.config.CONFIG, **memdf.collect.CONFIG, **memdf.select.CONFIG, **memdf.report.OUTPUT_CONFIG, **CONFIG, }) config.put('output.file', output) config.put('output.format', 'json_records') if config_file.is_file(): config.read_config_file(config_file) else: logging.warning('Missing config file: %s', config_file) config.parse([argv[0]] + args) config.put('output.metadata.platform', platform) config.put('output.metadata.config', config_name) config.put('output.metadata.target', target_name) config.put('output.metadata.time', config['timestamp']) config.put('output.metadata.input', binary) config.put('output.metadata.by', 'section') for key in ['event', 'hash', 'parent', 'pr', 'ref']: if value := config[key]: config.putl(['output', 'metadata', key], value) collected: DFs = memdf.collect.collect_files(config, [binary]) # Aggregate loaded segments, by writable (flash) or not (RAM). segments = collected[SegmentDF.name] segments['segment'] = segments.index segments['wr'] = ((segments['flags'] & 2) != 0).convert_dtypes( convert_boolean=False, convert_integer=True) segment_summary = segments[segments['type'] == 'PT_LOAD'][[ 'wr', 'size' ]].groupby('wr').aggregate(np.sum).reset_index().astype( {'size': np.int64}) segment_summary.attrs['name'] = "wr" sections = collected[SectionDF.name] sections = sections.join(on='segment', how='left', other=segments, rsuffix='-segment') section_summary = sections[['section', 'size', 'wr']].sort_values(by='section') section_summary.attrs['name'] = "section" summaries = { 'section': section_summary, 'memory': segment_summary, } # Write configured (json) report to the output file. memdf.report.write_dfs(config, summaries) # Write text report to stdout. memdf.report.write_dfs(config, summaries, sys.stdout, 'simple', floatfmt='.0f')
'metavar': 'NAME', 'default': [], }, f'{key}.ignore-all': { 'help': f'Ignore all {key}s unless explicitly selected', 'default': False, }, } SECTION_CONFIG = select_and_ignore_config_desc('section') SYMBOL_CONFIG = select_and_ignore_config_desc('symbol') REGION_CONFIG = select_and_ignore_config_desc('region') CONFIG: ConfigDescription = { Config.group_def('select'): { 'title': 'selection options', }, **SECTION_CONFIG, **SYMBOL_CONFIG, **REGION_CONFIG, } COLLECTED_CHOICES = ['symbol', 'section'] SYNTHETIC_CHOICES = ['region'] SELECTION_CHOICES = COLLECTED_CHOICES + SYNTHETIC_CHOICES def is_selected(config: Config, column, name) -> bool: """Test `name` against the configured selection criteria for `column`.""" if config.getl([column, 'select-all']):
# """Fetch data from GitHub size artifacts.""" import io import logging import sys import memdf.sizedb import memdf.util.config import memdf.util.markdown import memdf.util.sqlite from memdf.util.github import Gh from memdf import Config, ConfigDescription GITHUB_CONFIG: ConfigDescription = { Config.group_def('github'): { 'title': 'github options', }, 'github.event': { 'help': 'Download only event type(s) (default ‘push’)', 'metavar': 'EVENT', 'default': [], 'argparse': { 'alias': ['--event'] }, }, 'github.limit-artifacts': { 'help': 'Download no more than COUNT artifacts', 'metavar': 'COUNT', 'default': 0, 'argparse': {
def postprocess_report_by(config: Config, key: str, info: Mapping) -> None: """For --report-by=region, select all sections.""" assert key == 'report.by' if config.get(key) == 'region': config.put('section.select-all', True),
from typing import (Any, Callable, Dict, List, Mapping, IO, Optional, Protocol, Sequence, Union) import cxxfilt # type: ignore import pandas as pd # type: ignore import memdf.df import memdf.select import memdf.util.pretty from memdf import Config, ConfigDescription, DF, DFs from memdf.util.config import ParseSizeAction REPORT_DEMANGLE_CONFIG: ConfigDescription = { Config.group_map('report'): { 'group': 'output' }, 'report.demangle': { 'help': 'Demangle C++ symbol names', 'default': False, 'argparse': { 'alias': ['--demangle', '-C'], 'action': 'store_true', }, }, } REPORT_LIMIT_CONFIG: ConfigDescription = { Config.group_map('report'): { 'group': 'output'
import datetime import logging import sys from typing import cast, Dict, List, Mapping, Optional, Tuple import pandas as pd # type: ignore import memdf.report import memdf.util.config import memdf.util.sqlite from memdf.sizedb import SizeDatabase from memdf import Config QUERY_CONFIG = { Config.group_map('query'): { 'group': 'output' }, 'report.increases': { 'help': 'Highlight large increases', 'metavar': 'PERCENT', 'default': 0.0, 'argparse': { 'alias': ['--threshold'], 'type': float, }, }, 'query.where': { 'help': 'SQL filter', 'metavar': 'SQL-EXPR', 'default': '',
def main(argv): status = 0 try: _, platform, config_name, target_name, binary, *args = argv except ValueError: program = pathlib.Path(argv[0]) logging.error( """ Usage: %s platform config target binary [output] [options] This is intended for use in github workflows. For other purposes, a general program for the same operations is %s/report_summary.py """, program.name, program.parent) return 1 try: config_file = pathlib.Path(platform) if config_file.is_file(): platform = config_file.stem else: config_file = (PLATFORM_CONFIG_DIR / platform).with_suffix('.cfg') output_base = f'{platform}-{config_name}-{target_name}-sizes.json' if args and not args[0].startswith('-'): out, *args = args output = pathlib.Path(out) if out.endswith('/') and not output.exists(): output.mkdir(parents=True) if output.is_dir(): output = output / output_base else: output = pathlib.Path(binary).parent / output_base config = Config().init({ **memdf.util.config.CONFIG, **memdf.collect.CONFIG, **memdf.select.CONFIG, **memdf.report.OUTPUT_CONFIG, **CONFIG, }) config.put('output.file', output) config.put('output.format', 'json_records') if config_file.is_file(): config.read_config_file(config_file) else: logging.warning('Missing config file: %s', config_file) config.parse([argv[0]] + args) config.put('output.metadata.platform', platform) config.put('output.metadata.config', config_name) config.put('output.metadata.target', target_name) config.put('output.metadata.time', config['timestamp']) config.put('output.metadata.input', binary) config.put('output.metadata.by', 'section') for key in ['event', 'hash', 'parent', 'pr']: if value := config[key]: config.putl(['output', 'metadata', key], value) collected: DFs = memdf.collect.collect_files(config, [binary]) sections = collected[SectionDF.name] section_summary = sections[['section', 'size']].sort_values(by='section') section_summary.attrs['name'] = "section" summaries = { 'section': section_summary, } # Write configured (json) report to the output file. memdf.report.write_dfs(config, summaries) # Write text report to stdout. memdf.report.write_dfs(config, summaries, sys.stdout, 'simple')
from pathlib import Path from typing import Dict, IO, Iterable, Optional, Union import dateutil # type: ignore import fastcore # type: ignore import ghapi.all # type: ignore import pandas as pd # type: ignore import memdf.report import memdf.util.config import memdf.util.sqlite from memdf import Config, ConfigDescription GITHUB_CONFIG: ConfigDescription = { Config.group_def('github'): { 'title': 'github options', }, 'github.token': { 'help': 'Github API token, or "SKIP" to suppress connecting to github', 'metavar': 'TOKEN', 'default': '', 'argparse': { 'alias': ['--github-api-token', '--token'], }, }, 'github.repository': { 'help': 'Github repostiory', 'metavar': 'OWNER/REPO', 'default': '', 'argparse': {