def main(argv):
    status = 0
    try:
        config = Config().init(CONFIG)
        config.argparse.add_argument('inputs', metavar='FILE', nargs=2)
        config.parse(argv)
        config['args.fill_holes'] = False

        inputs = config.get('args.inputs')

        a_dfs = memdf.collect.collect_files(config, files=[inputs[0]])
        b_dfs = memdf.collect.collect_files(config, files=[inputs[1]])

        a_syms = a_dfs[SymbolDF.name].sort_values(by='symbol',
                                                  ignore_index=True)
        b_syms = b_dfs[SymbolDF.name].sort_values(by='symbol',
                                                  ignore_index=True)

        # TBD: Differences other than size, configurably.
        differences = []
        ai = a_syms.itertuples()
        bi = b_syms.itertuples()
        a = next(ai, None)
        b = next(bi, None)
        while a and b:
            if a.symbol < b.symbol:
                differences.append((-a.size, a.size, 0, a.symbol))
                a = next(ai, None)
                continue
            if a.symbol > b.symbol:
                differences.append((b.size, 0, b.size, b.symbol))
                b = next(bi, None)
                continue
            if a.size != b.size:
                differences.append((b.size - a.size, a.size, b.size, a.symbol))
            a = next(ai, None)
            b = next(bi, None)
        for a in ai:
            differences.append((-a.size, a.size, 0, a.symbol))
        for b in bi:
            differences.append((b.size, 0, b.size, b.symbol))

        df = pd.DataFrame(differences,
                          columns=['change', 'a-size', 'b-size', 'symbol'])
        if config['report.demangle']:
            # Demangle early to sort by demangled name.
            df['symbol'] = df['symbol'].apply(memdf.report.demangle)
            config['report.demangle'] = False
        df.sort_values(by=['change', 'symbol'],
                       ascending=[False, True],
                       inplace=True)
        memdf.report.write_dfs(config, {'Differences': df})

    except Exception as exception:
        status = 1
        raise exception

    return status
def postprocess_config(config: Config, _key: str, _info: Mapping) -> None:
    """Postprocess --github-repository."""
    if config['github.repository']:
        owner, repo = config.get('github.repository').split('/', 1)
        config.put('github.owner', owner)
        config.put('github.repo', repo)
        if not config['github.token']:
            config['github.token'] = os.environ.get('GITHUB_TOKEN')
            if not config['github.token']:
                logging.error('Missing --github-token')
Exemple #3
0
def postprocess_output_metadata(config: Config, key: str) -> None:
    """For --output-metadata=KEY:VALUE list, convert to dictionary."""
    assert key == 'output.metadata'
    metadata = {}
    for s in config.get(key):
        if ':' in s:
            k, v = s.split(':', 1)
        else:
            k, v = s, True
        metadata[k] = v
    config.put(key, metadata)
def main(argv):
    status = 0
    try:
        config = Config().init(CONFIG)
        config.argparse.add_argument('inputs', metavar='FILE', nargs='+')
        config.parse(argv)
        config['collect.method'] = 'elftools'
        config['args.tag_inputs'] = True

        dfs = memdf.collect.collect_files(config)

        elf = {}
        for filename in config['args.inputs']:
            elf[filename] = {
                'elffile': ELFFile(open(filename, 'rb')),
                'section': {},
                'data': {},
                'limit': {},
            }

        with memdf.report.open_output(config) as fp:
            for i in dfs['gap'].itertuples():
                e = elf[i.input]
                if i.section in e['section']:
                    section = e['section'][i.section]
                    data = e['data'][i.section]
                    limit = e['limit'][i.section]
                else:
                    section = e['elffile'].get_section_by_name(i.section)
                    data = section.data()
                    limit = memdf.select.get_limit(config, 'section',
                                                   i.section)
                    e['section'][i.section] = section
                    e['data'][i.section] = data
                    e['limit'][i.section] = limit
                if limit and i.size < limit:
                    continue
                offset = i.address - section['sh_addr']
                assert section['sh_size'] == len(data)
                print('\n{:08X} length {} in section {} of {}'.format(
                    i.address, i.size, i.section, i.input),
                      file=fp)
                for i in hexdump(data, offset, i.size, i.address):
                    print(i, file=fp)

    except Exception as exception:
        status = 1
        raise exception

    return status
def main(argv):
    status = 0
    try:
        config = Config().init({
            **memdf.util.config.CONFIG,
            **memdf.util.sqlite.CONFIG,
            **memdf.report.OUTPUT_CONFIG,
            **GITHUB_CONFIG,
        })
        config.argparse.add_argument('inputs', metavar='FILE', nargs='*')
        config.parse(argv)

        dfs = {}
        with SizeDatabase(config) as db:
            db.read_inputs()
            dfs.update(report_matching_commits(db))
            dfs.update(report_queries(db))

        memdf.report.write_dfs(config,
                               dfs,
                               hierify=True,
                               title=True,
                               floatfmt='5.1f')

    except Exception as exception:
        raise exception

    return status
Exemple #6
0
def select_and_ignore_config_desc(key: str) -> ConfigDescription:
    return {
        Config.group_map(key): {
            'group': 'select'
        },
        f'{key}.select': {
            'help':
            f'{key.capitalize()}(s) to process; otherwise all not ignored',
            'metavar': 'NAME',
            'default': [],
            'argparse': {
                'alias': [f'--{key}'],
            },
            'postprocess': postprocess_selections
        },
        f'{key}.select-all': {
            'help': f'Select all {key}s',
            'default': False,
        },
        key + '.ignore': {
            'help': f'{key.capitalize()}(s) to ignore',
            'metavar': 'NAME',
            'default': [],
        },
        f'{key}.ignore-all': {
            'help': f'Ignore all {key}s unless explicitly selected',
            'default': False,
        },
    }
def main(argv):
    status = 0
    try:
        config = Config().init({
            **memdf.util.config.CONFIG,
            **memdf.util.github.CONFIG,
            **memdf.util.sqlite.CONFIG,
            **memdf.report.OUTPUT_CONFIG,
            **GITHUB_CONFIG,
            **DB_CONFIG,
            **REPORT_CONFIG,
        })
        config.argparse.add_argument('inputs', metavar='FILE', nargs='*')
        config.parse(argv)

        szc = SizeContext(config)
        szc.read_inputs()
        dfs = szc.report_matching_commits()

        memdf.report.write_dfs(config,
                               dfs,
                               hierify=True,
                               title=True,
                               floatfmt='5.1f')

    except Exception as exception:
        raise exception

    return status
Exemple #8
0
def main(argv):
    status = 0
    try:

        config = Config().init({
            **memdf.util.config.CONFIG,
            **memdf.collect.PREFIX_CONFIG,
            **memdf.collector.readelf.NM_CONFIG,
            **memdf.report.REPORT_CONFIG,
            **memdf.report.OUTPUT_CONFIG,
            **BLOCKLIST_CONFIG,
        })
        config.argparse.add_argument('inputs', metavar='FILE', nargs='+')
        config = config.parse(argv)

        block_re: Optional[Pattern] = config.get_re('symbol.block')
        if block_re is None:
            logging.warning('No block list')
        else:
            frames = []
            for filename in config.get('args.inputs', []):
                ssdf = memdf.collector.readelf.read_sources(config, filename)
                frames.append(ssdf[ssdf.kind == 'U'])
            ssdf = pd.concat(frames)
            ssdf = ssdf[ssdf.symbol.str.fullmatch(block_re)]
            memdf.report.write_dfs(config, {'Symbols': ssdf})
    except Exception as exception:
        raise exception

    return status
Exemple #9
0
def parse_args(config_desc: Mapping, argv: Sequence[str]) -> Config:
    """Common argument parsing for collection tools."""
    config = Config().init({
        **memdf.util.config.CONFIG,
        **CONFIG,
        **config_desc
    })
    config.argparse.add_argument('inputs', metavar='FILE', nargs='+')
    return config.parse(argv)
Exemple #10
0
def main(argv):
    status = 0
    try:
        config = Config().init(CONFIG)
        config.argparse.add_argument('inputs', metavar='FILE', nargs=2)
        config.parse(argv)
        config['args.fill_holes'] = False

        inputs = config.get('args.inputs')

        a_dfs = memdf.collect.collect_files(config, files=[inputs[0]])
        b_dfs = memdf.collect.collect_files(config, files=[inputs[1]])

        a_syms = a_dfs[SymbolDF.name].sort_values(by='symbol')
        b_syms = b_dfs[SymbolDF.name].sort_values(by='symbol')

        # TBD: Differences other than size, configurably.
        differences = []
        ai = a_syms.itertuples()
        bi = b_syms.itertuples()
        while True:
            if (a := next(ai, None)) is None:
                break
            if (b := next(bi, None)) is None:
                differences.append((a.symbol, a.size, None))
                break
            if a.symbol < b.symbol:
                differences.append((a.symbol, a.size, None))
                a = next(ai, None)
                continue
            if a.symbol > b.symbol:
                differences.append((b.symbol, None, b.size))
                b = next(bi, None)
                continue
            if a.size != b.size:
                differences.append((a.symbol, a.size, b.size))
Exemple #11
0
def main(argv):
    status = 0
    try:
        sqlite_config = memdf.util.sqlite.CONFIG
        sqlite_config['database.file']['argparse']['required'] = True

        config = Config().init({
            **memdf.util.config.CONFIG,
            **memdf.util.github.CONFIG,
            **sqlite_config,
            **GITHUB_CONFIG,
        })
        config.argparse.add_argument('inputs', metavar='FILE', nargs='*')
        config.parse(argv)

        db = memdf.sizedb.SizeDatabase(config['database.file']).open()

        if gh := Gh(config):

            artifact_limit = config['github.limit-artifacts']
            artifacts_added = 0
            events = config['github.event']
            if not events:
                events = ['push']
            for a in gh.get_size_artifacts(label=config['github.label']):
                if events and a.event not in events:
                    logging.debug('Skipping %s artifact %d', a.event, a.id)
                    continue
                cur = db.execute('SELECT id FROM build WHERE artifact = ?',
                                 (a.id, ))
                if cur.fetchone():
                    logging.debug('Skipping known artifact %d', a.id)
                    continue
                blob = gh.download_artifact(a.id)
                if blob:
                    logging.info('Adding artifact %d %s %s %s %s', a.id,
                                 a.commit[:12], a.pr, a.event, a.group)
                    db.add_sizes_from_zipfile(io.BytesIO(blob),
                                              {'artifact': a.id})
                    db.commit()
                    artifacts_added += 1
                    if artifact_limit and artifact_limit <= artifacts_added:
                        break

        for filename in config['args.inputs']:
            db.add_sizes_from_file(filename)
            db.commit()
def main(argv):
    status = 0
    try:
        cfg = {
            **memdf.util.config.CONFIG,
            **memdf.util.sqlite.CONFIG,
            **memdf.report.OUTPUT_CONFIG,
            **QUERY_CONFIG,
        }
        cfg['database.file']['argparse']['required'] = True

        config = Config().init(cfg)
        config.parse(argv)

        db = SizeDatabase(config['database.file'], writable=False)
        db.open()

        dfs = {}

        q = 0
        for title, key, values, info in config.get('queries', []):
            q += 1
            query = make_query(config, info)
            logging.debug('Option: %s', key)
            logging.debug('Title: %s', title)
            logging.debug('Query: %s', query.strip())
            logging.debug('With: %s', values)
            cur = db.execute(query, values)
            columns = [i[0] for i in cur.description]
            rows = cur.fetchall()
            if rows:
                df = pd.DataFrame(rows, columns=columns)
                df.attrs = {'name': f'query{q}', 'title': title}
                for f in info['sql'].get('postprocess', []):
                    df = f(config, df)
                dfs[df.attrs['name']] = df

        if build := config['query.build-sizes']:
            q += 1
            if (df := query_build_sizes(config, db, build)) is not None:
                dfs[df.attrs['name']] = df
Exemple #13
0
def read_symbols(config: Config, filename: str) -> SymbolDF:
    """Read a binary's symbol map using bloaty."""
    column_map = {
        'compileunits': 'cu',
        'sections': 'section',
        'symbols': 'symbol',
        'vmsize': 'size',
    }
    process = memdf.util.subprocess.run_tool_pipe(config, [
        'bloaty', '--tsv', '--demangle=none', '-n', '0', '-d',
        'compileunits,sections,symbols', filename
    ])
    if not process or not process.stdout:
        return SymbolDF()
    df = pd.read_table(io.TextIOWrapper(process.stdout, newline=os.linesep),
                       usecols=list(column_map.keys()),
                       dtype=SymbolDF.dtype,
                       na_filter=False)
    df.rename(inplace=True, columns=column_map)
    prefixes = config.get_re('collect.prefix')
    df['cu'] = df['cu'].apply(lambda s: simplify_source(s, prefixes))
    return df
Exemple #14
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Wrapper and utility functions around sqlite3"""

import sqlite3

from typing import List, Optional

from memdf import Config, ConfigDescription

CONFIG: ConfigDescription = {
    Config.group_def('database'): {
        'title': 'database options',
    },
    'database.file': {
        'help': 'Sqlite3 file',
        'metavar': 'FILENAME',
        'default': ':memory:',
        'argparse': {
            'alias': ['--db'],
        },
    },
}


class Database:
    """Wrapper and utility functions around sqlite3"""
Exemple #15
0
from memdf.collector.util import simplify_source

PREFIX_CONFIG: ConfigDescription = {
    'collect.prefix': {
        'help': 'Strip PATH from the beginning of source file names',
        'metavar': 'PATH',
        'default': [],
        'argparse': {
            'alias': ['--prefix', '--strip-prefix'],
            'action': 'append',
        }
    },
}

CONFIG: ConfigDescription = {
    Config.group_def('input'): {
        'title': 'input options',
    },
    Config.group_def('tool'): {
        'title': 'external tool options',
    },
    Config.group_map('collect'): {
        'group': 'input'
    },
    **memdf.collector.bloaty.CONFIG,
    **memdf.collector.csv.CONFIG,
    **memdf.collector.elftools.CONFIG,
    **memdf.collector.readelf.CONFIG,
    'collect.method': {
        'help': 'Method of input processing',
        'metavar': 'METHOD',
def main(argv):
    status = 0

    try:
        _, platform, config_name, target_name, binary, *args = argv
    except ValueError:
        program = pathlib.Path(argv[0])
        logging.error(
            """
            Usage: %s platform config target binary [output] [options]

            This is intended for use in github workflows.
            For other purposes, a general program for the same operations is
            %s/report_summary.py

            """, program.name, program.parent)
        return 1

    try:
        config_file = pathlib.Path(platform)
        if config_file.is_file():
            platform = config_file.stem
        else:
            config_file = (PLATFORM_CONFIG_DIR / platform).with_suffix('.cfg')

        output_base = f'{platform}-{config_name}-{target_name}-sizes.json'
        if args and not args[0].startswith('-'):
            out, *args = args
            output = pathlib.Path(out)
            if out.endswith('/') and not output.exists():
                output.mkdir(parents=True)
            if output.is_dir():
                output = output / output_base
        else:
            output = pathlib.Path(binary).parent / output_base

        config = Config().init({
            **memdf.util.config.CONFIG,
            **memdf.collect.CONFIG,
            **memdf.select.CONFIG,
            **memdf.report.OUTPUT_CONFIG,
            **CONFIG,
        })
        config.put('output.file', output)
        config.put('output.format', 'json_records')
        if config_file.is_file():
            config.read_config_file(config_file)
        else:
            logging.warning('Missing config file: %s', config_file)
        config.parse([argv[0]] + args)

        config.put('output.metadata.platform', platform)
        config.put('output.metadata.config', config_name)
        config.put('output.metadata.target', target_name)
        config.put('output.metadata.time', config['timestamp'])
        config.put('output.metadata.input', binary)
        config.put('output.metadata.by', 'section')
        for key in ['event', 'hash', 'parent', 'pr', 'ref']:
            if value := config[key]:
                config.putl(['output', 'metadata', key], value)

        collected: DFs = memdf.collect.collect_files(config, [binary])

        # Aggregate loaded segments, by writable (flash) or not (RAM).
        segments = collected[SegmentDF.name]
        segments['segment'] = segments.index
        segments['wr'] = ((segments['flags'] & 2) != 0).convert_dtypes(
            convert_boolean=False, convert_integer=True)
        segment_summary = segments[segments['type'] == 'PT_LOAD'][[
            'wr', 'size'
        ]].groupby('wr').aggregate(np.sum).reset_index().astype(
            {'size': np.int64})
        segment_summary.attrs['name'] = "wr"

        sections = collected[SectionDF.name]
        sections = sections.join(on='segment',
                                 how='left',
                                 other=segments,
                                 rsuffix='-segment')
        section_summary = sections[['section', 'size',
                                    'wr']].sort_values(by='section')
        section_summary.attrs['name'] = "section"

        summaries = {
            'section': section_summary,
            'memory': segment_summary,
        }

        # Write configured (json) report to the output file.
        memdf.report.write_dfs(config, summaries)

        # Write text report to stdout.
        memdf.report.write_dfs(config,
                               summaries,
                               sys.stdout,
                               'simple',
                               floatfmt='.0f')
Exemple #17
0
            'metavar': 'NAME',
            'default': [],
        },
        f'{key}.ignore-all': {
            'help': f'Ignore all {key}s unless explicitly selected',
            'default': False,
        },
    }


SECTION_CONFIG = select_and_ignore_config_desc('section')
SYMBOL_CONFIG = select_and_ignore_config_desc('symbol')
REGION_CONFIG = select_and_ignore_config_desc('region')

CONFIG: ConfigDescription = {
    Config.group_def('select'): {
        'title': 'selection options',
    },
    **SECTION_CONFIG,
    **SYMBOL_CONFIG,
    **REGION_CONFIG,
}

COLLECTED_CHOICES = ['symbol', 'section']
SYNTHETIC_CHOICES = ['region']
SELECTION_CHOICES = COLLECTED_CHOICES + SYNTHETIC_CHOICES


def is_selected(config: Config, column, name) -> bool:
    """Test `name` against the configured selection criteria for `column`."""
    if config.getl([column, 'select-all']):
Exemple #18
0
#
"""Fetch data from GitHub size artifacts."""

import io
import logging
import sys

import memdf.sizedb
import memdf.util.config
import memdf.util.markdown
import memdf.util.sqlite
from memdf.util.github import Gh
from memdf import Config, ConfigDescription

GITHUB_CONFIG: ConfigDescription = {
    Config.group_def('github'): {
        'title': 'github options',
    },
    'github.event': {
        'help': 'Download only event type(s) (default ‘push’)',
        'metavar': 'EVENT',
        'default': [],
        'argparse': {
            'alias': ['--event']
        },
    },
    'github.limit-artifacts': {
        'help': 'Download no more than COUNT artifacts',
        'metavar': 'COUNT',
        'default': 0,
        'argparse': {
Exemple #19
0
def postprocess_report_by(config: Config, key: str, info: Mapping) -> None:
    """For --report-by=region, select all sections."""
    assert key == 'report.by'
    if config.get(key) == 'region':
        config.put('section.select-all', True),
Exemple #20
0
from typing import (Any, Callable, Dict, List, Mapping, IO, Optional, Protocol,
                    Sequence, Union)

import cxxfilt  # type: ignore
import pandas as pd  # type: ignore

import memdf.df
import memdf.select
import memdf.util.pretty

from memdf import Config, ConfigDescription, DF, DFs
from memdf.util.config import ParseSizeAction

REPORT_DEMANGLE_CONFIG: ConfigDescription = {
    Config.group_map('report'): {
        'group': 'output'
    },
    'report.demangle': {
        'help': 'Demangle C++ symbol names',
        'default': False,
        'argparse': {
            'alias': ['--demangle', '-C'],
            'action': 'store_true',
        },
    },
}

REPORT_LIMIT_CONFIG: ConfigDescription = {
    Config.group_map('report'): {
        'group': 'output'
import datetime
import logging
import sys

from typing import cast, Dict, List, Mapping, Optional, Tuple

import pandas as pd  # type: ignore

import memdf.report
import memdf.util.config
import memdf.util.sqlite
from memdf.sizedb import SizeDatabase
from memdf import Config

QUERY_CONFIG = {
    Config.group_map('query'): {
        'group': 'output'
    },
    'report.increases': {
        'help': 'Highlight large increases',
        'metavar': 'PERCENT',
        'default': 0.0,
        'argparse': {
            'alias': ['--threshold'],
            'type': float,
        },
    },
    'query.where': {
        'help': 'SQL filter',
        'metavar': 'SQL-EXPR',
        'default': '',
Exemple #22
0
def main(argv):
    status = 0

    try:
        _, platform, config_name, target_name, binary, *args = argv
    except ValueError:
        program = pathlib.Path(argv[0])
        logging.error(
            """
            Usage: %s platform config target binary [output] [options]

            This is intended for use in github workflows.
            For other purposes, a general program for the same operations is
            %s/report_summary.py

            """, program.name, program.parent)
        return 1

    try:
        config_file = pathlib.Path(platform)
        if config_file.is_file():
            platform = config_file.stem
        else:
            config_file = (PLATFORM_CONFIG_DIR / platform).with_suffix('.cfg')

        output_base = f'{platform}-{config_name}-{target_name}-sizes.json'
        if args and not args[0].startswith('-'):
            out, *args = args
            output = pathlib.Path(out)
            if out.endswith('/') and not output.exists():
                output.mkdir(parents=True)
            if output.is_dir():
                output = output / output_base
        else:
            output = pathlib.Path(binary).parent / output_base

        config = Config().init({
            **memdf.util.config.CONFIG,
            **memdf.collect.CONFIG,
            **memdf.select.CONFIG,
            **memdf.report.OUTPUT_CONFIG,
            **CONFIG,
        })
        config.put('output.file', output)
        config.put('output.format', 'json_records')
        if config_file.is_file():
            config.read_config_file(config_file)
        else:
            logging.warning('Missing config file: %s', config_file)
        config.parse([argv[0]] + args)

        config.put('output.metadata.platform', platform)
        config.put('output.metadata.config', config_name)
        config.put('output.metadata.target', target_name)
        config.put('output.metadata.time', config['timestamp'])
        config.put('output.metadata.input', binary)
        config.put('output.metadata.by', 'section')
        for key in ['event', 'hash', 'parent', 'pr']:
            if value := config[key]:
                config.putl(['output', 'metadata', key], value)

        collected: DFs = memdf.collect.collect_files(config, [binary])

        sections = collected[SectionDF.name]
        section_summary = sections[['section',
                                    'size']].sort_values(by='section')
        section_summary.attrs['name'] = "section"

        summaries = {
            'section': section_summary,
        }

        # Write configured (json) report to the output file.
        memdf.report.write_dfs(config, summaries)

        # Write text report to stdout.
        memdf.report.write_dfs(config, summaries, sys.stdout, 'simple')
Exemple #23
0
from pathlib import Path
from typing import Dict, IO, Iterable, Optional, Union

import dateutil  # type: ignore
import fastcore  # type: ignore
import ghapi.all  # type: ignore
import pandas as pd  # type: ignore

import memdf.report
import memdf.util.config
import memdf.util.sqlite
from memdf import Config, ConfigDescription

GITHUB_CONFIG: ConfigDescription = {
    Config.group_def('github'): {
        'title': 'github options',
    },
    'github.token': {
        'help': 'Github API token, or "SKIP" to suppress connecting to github',
        'metavar': 'TOKEN',
        'default': '',
        'argparse': {
            'alias': ['--github-api-token', '--token'],
        },
    },
    'github.repository': {
        'help': 'Github repostiory',
        'metavar': 'OWNER/REPO',
        'default': '',
        'argparse': {