Ejemplo n.º 1
0
def plot_score_distribution(summary_df: pd.DataFrame):
    logger.debug('plot score distributions')

    summary_df = summary_df.sort_values(by='score')
    max_score = summary_df['max_score'].max()

    plt.clf()
    ax = plt.gca()
    try:
        sns.distplot(summary_df[~summary_df['student_id'].duplicated(
            keep='first')]['score'],
                     rug=True,
                     fit=norm,
                     bins=int(max_score),
                     ax=ax)
    except LinAlgError as error:
        logger.warning(f'unable to plot score distribution: {error}')

    ax.set_xlim(0, max_score)
    ax.set_xlabel('score')
    ax.set_ylabel('share')
    ax.set_title('score distribution without duplicates (takes lower score)')
    plt.tight_layout()

    with io.BytesIO() as buffer:
        plt.savefig(buffer, format='svg', transparent=False)
        return buffer.getvalue()
Ejemplo n.º 2
0
    def _apply_cases(self, state: Dict) -> List[Result]:
        state = state.copy()
        results = []

        # prepare import filter
        if_regex, if_blacklist = self._variables.get('IMPORT_FILTER',
                                                     (None, None))

        for i, case in enumerate(self._cases.values(), start=1):
            logger.debug(f'[{i}/{len(self._cases)}] execute {case}')

            with io.StringIO() as stdout, io.StringIO() as stderr:
                with ExitStack() as es:
                    if if_regex is not None:
                        es.enter_context(
                            import_filter(if_regex, blacklist=if_blacklist))
                    es.enter_context(capture_output(stdout, stderr))

                    achieved, msg = case(state)

                results.append(
                    Result(id=case.id,
                           label=case.label,
                           target=case.targets,
                           score=achieved,
                           score_max=case.score,
                           messages=[msg],
                           stdout=stdout.getvalue(),
                           stderr=stderr.getvalue()))

        logger.debug('testing completed')
        return results
Ejemplo n.º 3
0
def summarize_results(results,
                      filenames=None,
                      include_tasks=False) -> pd.DataFrame:
    task_names = []
    if include_tasks:
        task_keys = []
        print(list(results[0].results[0].__dict__.keys()))
        for res in results[0].results:
            task_keys.append(res.id)
            if len(res.label) > 0:
                task_names.append(str(res.label))
            else:
                task_names.append(str(res.target))

        logger.debug(f'summarize {len(results)} results')
    header = [
        'student_id', 'last_name', 'first_name', 'score', 'max_score',
        'patches', 'checksum', *task_names, 'filename'
    ]

    if filenames is None:
        filenames = ["" for _ in range(len(results))]

    def row_factory():
        for r, filename in zip(results, filenames):
            stuff = []
            if include_tasks:
                for key in task_keys:
                    for task in r.results:
                        if task.id == key:
                            stuff.append(task.score)
                            break
                    else:
                        stuff.append(0)

            for member in r.team_members:
                s = r.summary()
                yield (member.student_id, member.last_name,
                       member.first_name, s.score, s.score_max,
                       len(r.applied_patches), r.checksum, *stuff, filename)

    summary_df = pd.DataFrame(row_factory(),
                              columns=header).sort_values(by='last_name')
    print(summary_df.columns)
    summary_df['multiple_submissions'] = summary_df['student_id'].duplicated(
        keep=False)

    if not math.isclose(summary_df['max_score'].std(), 0):
        logger.warning('max scores seem not to be consistent!')

    return summary_df
Ejemplo n.º 4
0
    def execute(self, args=None):
        """
        Commandline interface for notebook test. Call with `--help` flag to get further information.

        :param args: optional arguments, uses `sys.argv` by default
        :return: number of failed tests
        """
        parser = argparse.ArgumentParser(
            description='run tests on jupyter notebook')

        parser.add_argument('notebook',
                            type=str,
                            help='the jupyter notebook to test')
        parser.add_argument('-t',
                            '--target',
                            type=str,
                            metavar='',
                            help='where to store results')
        parser.add_argument('-c',
                            '--context',
                            type=str,
                            metavar='',
                            help='context directory')
        parser.add_argument('-v',
                            '--verbose',
                            action='count',
                            default=0,
                            help='verbosity level')

        args = parser.parse_args(args)

        logger.setLevel(loglevel(args.verbose))
        logger.debug(f'args: {args}')

        results = self._grade_notebook(
            Path(args.notebook).absolute(),
            target_dir=Path(args.target).absolute() if args.target else None,
            context=Path(args.context).absolute() if args.context else None)

        return results.summary().failed
Ejemplo n.º 5
0
def inject_patch(results: Results, path='.', prefix: str = 'results') -> Path:
    """Store results as patch in mounted results archive"""
    path = Path(path)
    ct = len(list(path.glob(f'{prefix}_patch*.json')))

    with cd(path):
        with open(f'{prefix}_patch_{ct + 1:02d}.json', mode='wt') as f:
            json.dump(results.to_dict(), f, indent=4)

        # update report if it exists
        if Path('report.html').exists():
            results = load_patched()
            logger.debug(f'update report for {results.checksum}')
            with open('report.html', mode='wt') as f:
                f.write(
                    render('report.html',
                           title='report',
                           id=results.checksum,
                           results={results.checksum: results},
                           summary=results.summary()))

    return path
Ejemplo n.º 6
0
    def run(path_nb_):
        if args.backend is None:
            cmd = [
                'python', f'"{path_tst}"', f'"{path_nb_}"', '-t',
                f'"{path_tgt}"', *(('-c', f'"{path_cxt}"') if path_cxt else
                                   ()),
                *(('-' + 'v' * args.verbose, ) if args.verbose > 0 else ())
            ]
        elif 'docker' in args.backend:
            cmd = [
                'docker', 'run', '-v', f'"{path_tst}:/autograde/test.py"',
                '-v', f'"{path_nb_}:/autograde/notebook.ipynb"', '-v',
                f'"{path_tgt}:/autograde/target"',
                *(('-v',
                   f'"{path_cxt}:/autograde/context:ro"') if path_cxt else ()),
                *(('-u',
                   str(os.geteuid())) if 'rootless' not in args.backend else
                  ()), args.tag,
                *(('-' + 'v' * args.verbose, ) if args.verbose > 0 else ())
            ]
        elif args.backend == 'podman':
            cmd = [
                'podman', 'run', '-v', f'"{path_tst}:/autograde/test.py"',
                '-v', f'"{path_nb_}:/autograde/notebook.ipynb"', '-v',
                f'"{path_tgt}:/autograde/target"',
                *(('-v', f'"{path_cxt}:/autograde/context"') if path_cxt else
                  ()), args.tag,
                *(('-' + 'v' * args.verbose, ) if args.verbose > 0 else ())
            ]
        else:
            raise ValueError(f'unknown backend: {args.backend}')

        logger.info(f'test: {path_nb_}')
        logger.debug('run' + ' '.join(cmd))

        if not args.backend:
            return subprocess.call(' '.join(cmd), shell=True)
        return subprocess.call(' '.join(cmd), shell=True)
Ejemplo n.º 7
0
def plot_fraud_matrix(sources: Dict[str, str]) -> bytes:
    logger.debug('apply fraud detection')
    hashes = sorted(sources)
    diffs = pd.DataFrame(np.NaN, index=hashes, columns=hashes)

    for h in hashes:
        diffs.loc[h][h] = 1.

    for (ha, ca), (hb, cb) in combinations(sources.items(), 2):
        diffs.loc[ha][hb] = diffs.loc[hb][ha] = SequenceMatcher(a=ca,
                                                                b=cb).ratio()

    plt.clf()
    ax = sns.heatmap(diffs,
                     vmin=0.,
                     vmax=1.,
                     xticklabels=True,
                     yticklabels=True)
    ax.set_title('similarity of notebook code')

    with io.BytesIO() as buffer:
        plt.savefig(buffer, format='svg', transparent=False)
        return buffer.getvalue()
Ejemplo n.º 8
0
def cmd_summary(args):
    """Generate human & machine readable summary of results"""

    from autograde.cli.util import load_patched, render, list_results, merge_results, b64str, plot_fraud_matrix, \
        plot_score_distribution, summarize_results

    path = Path(args.result or Path.cwd()).expanduser().absolute()
    assert path.is_dir(), f'{path} is no regular directory'
    include_similarities = args.similarities

    results = list()
    sources = dict()
    filenames = []
    for path_ in list_results(path):
        logger.debug(f'read {path_}')
        filenames.append(path_.absolute())

        with mount_tar(path_) as tar, cd(tar):
            r = load_patched()
            results.append(r)

            with open('code.py', mode='rt') as f:
                sources[r.checksum] = f.read()

    # merge results
    results_df = merge_results(results)
    logger.debug('store raw.csv')
    results_df.to_csv(path.joinpath('raw.csv'), index=False)

    # summarize results
    summary_df = summarize_results(results, filenames, include_tasks=True)
    logger.debug('store summary.csv')
    summary_df.to_csv(path.joinpath('summary.csv'), index=False)

    if include_similarities:
        plots = dict(distribution=b64str(plot_score_distribution(summary_df)),
                     similarities=b64str(plot_fraud_matrix(sources)))
    else:
        plots = dict(score_distribution=b64str(
            plot_score_distribution(summary_df)),
                     similarities=b64str(plot_score_distribution(summary_df)))

    logger.info('render summary.html')
    with open(path.joinpath('summary.html'), mode='wt') as f:
        f.write(
            render('summary.html',
                   title='summary',
                   summary=summary_df,
                   plots=plots))

    return 0
Ejemplo n.º 9
0
def cmd_build(args):
    """Build autograde container image for specified backend"""
    if args.backend is None:
        logger.warning('no backend specified')
        return 1

    if args.requirements:
        with Path(args.requirements).open(mode='rt') as f:
            requirements = list(
                filter(lambda s: s, map(str.strip,
                                        f.read().split('\n'))))
    else:
        requirements = []

    with TemporaryDirectory() as tmp:
        logger.debug(f'copy source to {tmp}')
        shutil.copytree('.', tmp, dirs_exist_ok=True)

        if requirements:
            logger.info(f'add additional requirements: {requirements}')
            with Path(tmp).joinpath('requirements.txt').open(mode='w') as f:
                logger.debug('add additional requirements: ' +
                             ' '.join(requirements))
                f.write('\n'.join(requirements))

        if 'docker' in args.backend:
            cmd = ['docker', 'build', '-t', args.tag, tmp]
        elif args.backend == 'podman':
            cmd = [
                'podman', 'build', '-t', args.tag, '--cgroup-manager=cgroupfs',
                tmp
            ]
        else:
            raise ValueError(f'unknown backend: {args.backend}')

        logger.debug('run: ' + ' '.join(cmd))
        return subprocess.run(cmd, capture_output=args.quiet).returncode
Ejemplo n.º 10
0
 def route_settings():
     settings.update(**request.form)
     logger.debug(f'update settings: {settings}')
     return redirect(request.referrer)
Ejemplo n.º 11
0
def cmd_audit(args):
    """Launch a web interface for manually auditing test results"""
    import logging
    from flask import Flask, redirect, request
    import flask.cli as flask_cli
    from werkzeug.exceptions import HTTPException, InternalServerError

    from autograde.util import logger, parse_bool, timestamp_utc_iso, mount_tar

    with ExitStack() as exit_stack:
        # settings
        settings = AuditSettings()

        # mount & index all results
        mounts = OrderedDict()
        sources = dict()
        results = dict()
        for path in list_results(args.result):
            mount_path = Path(
                exit_stack.enter_context(mount_tar(path, mode='a')))

            r = load_patched(mount_path)
            results[r.checksum] = r
            mounts[r.checksum] = mount_path

            with mount_path.joinpath('code.py').open(mode='rt') as f:
                sources[r.checksum] = f.read()

        patched = set()
        next_ids = dict(zip(mounts, list(mounts)[1:]))
        prev_ids = dict(((b, a) for a, b in next_ids.items()))

        # create actual flask application
        app = Flask('autograde - audit')

        # monkey patching for nicer cli output
        flask_cli.show_server_banner = lambda *_, **__: logger.debug(
            'suppress flask banner')
        app.logger = logger
        logging.root = logger

        @app.errorhandler(Exception)
        def handle_error(error):
            logger.warning(f'{type(error)}: {error}')
            error = error if isinstance(
                error, HTTPException) else InternalServerError()
            return render('error.html', title='Oooops',
                          error=error), error.code

        @app.route('/')
        def route_root():
            return redirect('/audit')

        @app.route('/settings', methods=('POST', ))
        def route_settings():
            settings.update(**request.form)
            logger.debug(f'update settings: {settings}')
            return redirect(request.referrer)

        @app.route('/audit', strict_slashes=False)
        @app.route('/audit/<string:id>')
        def route_audit(id=None):
            return render('audit.html',
                          title='audit',
                          settings=settings,
                          results=results,
                          id=id,
                          prev_id=prev_ids.get(id),
                          next_id=next_ids.get(id),
                          patched=patched,
                          mounts=mounts)

        @app.route('/patch', methods=('POST', ))
        def route_patch():
            if (rid := request.form.get('id')) and (mount := mounts.get(rid)):
                scores = dict()
                comments = dict()
                r = deepcopy(results[rid])

                r.title = 'manual audit'
                r.timestamp = timestamp_utc_iso()

                # extract form data
                for key, value in request.form.items():
                    if key.startswith('score:'):
                        scores[key.split(':')[
                            -1]] = math.nan if value == '' else float(value)
                    elif key.startswith('comment:'):
                        comments[key.split(':')[-1]] = value

                # update results
                modification_flag = False
                for result in r.results:
                    score = scores.get(result.id)
                    if score is not None and not math.isclose(
                            score, result.score):
                        logger.debug(f'update score of result {result.id[:8]}')
                        result.score = score
                        modification_flag = True

                    if comment := comments.get(result.id):
                        logger.debug(
                            f'update messages of result {result.id[:8]}')
                        result.messages.append(
                            settings.format_comment(comment))
                        modification_flag = True
Ejemplo n.º 12
0
 def route_stop():
     if func := request.environ.get('werkzeug.server.shutdown'):
         logger.debug('shutdown werkzeug server')
         func()
Ejemplo n.º 13
0
                    if comment := comments.get(result.id):
                        logger.debug(
                            f'update messages of result {result.id[:8]}')
                        result.messages.append(
                            settings.format_comment(comment))
                        modification_flag = True

                # patch results back
                if modification_flag:
                    # update state & persist patch
                    inject_patch(r, mount)
                    results[rid] = results[rid].patch(r)
                    patched.add(rid)
                else:
                    logger.debug('no modifications were made')

                if next_id := next_ids.get(rid):
                    return redirect(f'/audit/{next_id}#edit')

            return redirect('/audit')

        @app.route('/report/<string:id>')
        def route_report(id):
            return render('report.html',
                          title='report (preview)',
                          id=id,
                          results=results,
                          summary=results[id].summary())

        @app.route('/source/<string:id>')
Ejemplo n.º 14
0
def exec_notebook(notebook,
                  file: TextIO = sys.stdout,
                  cell_timeout: float = 0.,
                  ignore_errors: bool = False,
                  variables: Dict = None):
    """
    Extract source code from jupyter notebook and execute it.

    :param notebook: file like with notebook data
    :param file: where to send stdout
    :param ignore_errors: whether or not errors will be forwarded or ignored
    :param cell_timeout: timeout for cell execution 0=∞
    :param variables: variables to be inserted into initial state
    :return: the state mutated by executed code
    """
    state = dict()
    variables = variables or {}
    state.update(deepcopy(variables))

    try:
        logger.debug('parse notebook')

        # when executed within a docker container, some minor warnings occur that we filter here
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')

            notebook = read(notebook, 4)
            shell = InteractiveShell.instance()

        # extract comment cells
        md_cells = [
            c.source for c in filter(lambda c: c.cell_type == 'markdown',
                                     notebook.cells)
        ]

        # prepare code cells for execution
        def _code_cells():
            yield 'injected: setup', INJECT_BEFORE, 0

            for i, cell in enumerate(
                    filter(lambda c: c.cell_type == 'code', notebook.cells)):
                # render code
                source = shell.input_transformer_manager.transform_cell(
                    cell.source)
                yield (
                    f'nb-{i+1}',
                    f'{source.strip()}\n\n# injected by test\ndump_figure()',
                    cell_timeout)

            yield 'injected: teardown', INJECT_AFTER, 0

        code_cells = list(_code_cells())

    except Exception as error:
        logger.error(f'unable to parse notebook: {error}')
        raise ValueError(error)

    # prepare import filter
    if_regex, if_blacklist = variables.get('IMPORT_FILTER', (None, None))

    # the log is supposed to be a valid, standalone python script
    print('#!/usr/bin/env python3', file=file)

    # actual code execution
    with ExitStack() as shadow_stack:
        for i, (label, code, timeout_) in enumerate(code_cells, start=1):
            state.update({
                '__LABEL__': deepcopy(label),
                '__PLOT_REGISTRY__': []
            })

            with io.StringIO() as stdout, io.StringIO() as stderr:
                logger.debug(
                    f'[{i}/{len(code_cells)}] execute cell ("{label}")')
                stopwatch = StopWatch()

                try:
                    with capture_output(stdout, stderr):
                        # actual execution that extends state
                        with ExitStack() as es:
                            if if_regex is not None and i > 1:
                                es.enter_context(
                                    import_filter(if_regex,
                                                  blacklist=if_blacklist))
                            es.enter_context(timeout(timeout_))
                            es.enter_context(stopwatch)

                            shadow_stack.enter_context(
                                shadowed_exec(code, state))

                except Exception as error:
                    # extend log with some meaningful error message
                    traceback.print_exception(type(error),
                                              error,
                                              error.__traceback__,
                                              file=stderr)

                    if not ignore_errors:
                        raise error

                finally:
                    # log code and output
                    with capture_output(file):
                        _label = f' CODE CELL {label} '
                        print(f'# {_label:-^78}')
                        print(str(code).strip())

                        print(
                            f"\n# EXECUTED IN {stopwatch.duration_rel()[-1]:.3}s"
                        )

                        stdout_s = stdout.getvalue()
                        if stdout_s:
                            print('# STDOUT')
                            print(as_py_comment(stdout_s, 4))

                        stderr_s = stderr.getvalue()
                        if stderr_s:
                            print('# STDERR')
                            print(as_py_comment(stderr_s, 4))

                        print('\n')

        # add markdown comments to state
        state['__COMMENTS__'] = md_cells

        # add artifact loader
        state['__ARTIFACTS__'] = ArtifactLoader()

        logger.debug('execution completed')
        yield state
Ejemplo n.º 15
0
def cli(args=None):
    # environment variables
    verbosity = int(os.environ.get('AG_VERBOSITY', 0))
    container_backend = os.environ.get('AG_BACKEND', None)
    container_tag = os.environ.get('AG_TAG', 'autograde')

    # command line arguments
    parser = argparse.ArgumentParser(
        description='utility for grading jupyter notebooks',
        epilog='autograde on github: https://github.com/cssh-rwth/autograde',
        prog='autograde',
    )

    # global flags
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=verbosity,
                        help='verbosity level')
    parser.add_argument(
        '--backend',
        type=str,
        default=container_backend,
        choices=['docker', 'rootless-docker', 'podman'],
        metavar='',
        help=f'container backend to use, default is {container_backend}')
    parser.add_argument('--tag',
                        type=str,
                        default=container_tag,
                        metavar='',
                        help=f'container tag, default: "{container_tag}"')
    parser.set_defaults(func=cmd_version)

    subparsers = parser.add_subparsers(help='sub command help')

    # build sub command
    bld_parser = subparsers.add_parser('build', help=cmd_build.__doc__)
    bld_parser.add_argument('-r',
                            '--requirements',
                            type=Path,
                            default=None,
                            help='additional requirements to install')
    bld_parser.add_argument('-q',
                            '--quiet',
                            action='store_true',
                            help='mute output')
    bld_parser.set_defaults(func=cmd_build)

    # test sub command
    tst_parser = subparsers.add_parser('test', help=cmd_tst.__doc__)
    tst_parser.add_argument('test', type=str, help='autograde test script')
    tst_parser.add_argument('notebook',
                            type=str,
                            help='the jupyter notebook(s) to be tested')
    tst_parser.add_argument('-t',
                            '--target',
                            type=str,
                            metavar='',
                            help='where to store results')
    tst_parser.add_argument('-c',
                            '--context',
                            type=str,
                            metavar='',
                            help='context directory')
    tst_parser.set_defaults(func=cmd_tst)

    # patch sub command
    ptc_parser = subparsers.add_parser('patch', help=cmd_patch.__doc__)
    ptc_parser.add_argument('result',
                            type=str,
                            help='result archive(s) to be patched')
    ptc_parser.add_argument('patch',
                            type=str,
                            help='result archive(s) for patching')
    ptc_parser.set_defaults(func=cmd_patch)

    # audit sub command
    adt_parser = subparsers.add_parser('audit', help=cmd_audit.__doc__)
    adt_parser.add_argument('result',
                            type=str,
                            help='result archive(s) to audit')
    adt_parser.add_argument('-b',
                            '--bind',
                            type=str,
                            default='127.0.0.1',
                            help='host to bind to')
    adt_parser.add_argument('-p',
                            '--port',
                            type=int,
                            default=5000,
                            help='port')
    adt_parser.set_defaults(func=cmd_audit)

    # report sub command
    rpt_parser = subparsers.add_parser('report', help=cmd_report.__doc__)
    rpt_parser.add_argument('result',
                            type=str,
                            help='result archive(s) for creating the report')
    rpt_parser.set_defaults(func=cmd_report)

    # summary sub command
    sum_parser = subparsers.add_parser('summary', help=cmd_summary.__doc__)
    sum_parser.add_argument('--similarities',
                            action="store_true",
                            help="activates notebook similarity computation")
    sum_parser.add_argument('result',
                            type=str,
                            help='result archives to summarize')
    sum_parser.set_defaults(func=cmd_summary)

    # version sub command
    vrs_parser = subparsers.add_parser('version', help=cmd_version.__doc__)
    vrs_parser.set_defaults(func=cmd_version)

    args = parser.parse_args(args)

    logger.setLevel(loglevel(args.verbose))
    logger.debug(f'default encoding: {sys.getdefaultencoding()}')
    logger.debug(f'args: {args}')

    return args.func(args)
Ejemplo n.º 16
0
    def _grade_notebook(self, nb_path, target_dir=None, context=None):
        target_dir = target_dir or os.getcwd()

        # prepare notebook
        with open(nb_path, mode='rb') as f:
            nb_data = f.read()

        nb_hash = sha256(nb_data).hexdigest()
        nb_hash_short = nb_hash[:8]

        with cd(target_dir):
            archive = Path(f'results_{nb_hash_short}.tar.xz')

            if archive.exists():
                logger.debug(f'remove existing {archive}')
                archive.unlink()

            with ExitStack() as exec_test_stack:
                tar = exec_test_stack.enter_context(
                    mount_tar(archive, mode='w:xz'))
                exec_test_stack.enter_context(cd(tar))

                # store copy of notebook
                logger.debug('dump copy of original notebook')
                with open('notebook.ipynb', mode='wb') as f:
                    f.write(nb_data)

                # prepare context and execute notebook
                with open('code.py', mode='wt') as c, cd('artifacts',
                                                         mkdir=True):
                    # prepare execution context in file system
                    if context is not None:
                        logger.debug(f'copy context files from: {context}')
                        shutil.copytree(context, '.', dirs_exist_ok=True)

                    # build index of all files known before execution
                    index = set()
                    for path in Path('.').glob('**/*'):
                        if path.is_file():
                            with path.open(mode='rb') as f:
                                index.add(sha256(f.read()).hexdigest())

                    # actual notebook execution
                    try:
                        logger.debug('execute notebook')
                        state = exec_test_stack.enter_context(
                            exec_notebook(io.StringIO(nb_data.decode('utf-8')),
                                          file=c,
                                          ignore_errors=True,
                                          cell_timeout=self._cell_timeout,
                                          variables=self._variables))

                    except ValueError:
                        state = {}

                    # remove files that haven't changed
                    artifacts = []
                    artifacts_excluded = []
                    for path in Path('.').glob('**/*'):
                        if path.is_file():
                            delete_flag = False
                            with path.open(mode='rb') as f:
                                if sha256(f.read()).hexdigest() in index:
                                    artifacts_excluded.append(str(path))
                                    delete_flag = True
                                else:
                                    artifacts.append(str(path))

                            if delete_flag:
                                path.unlink()

                # infer meta information
                group = list(
                    map(lambda m: TeamMember(**m),
                        state.get('team_members', [])))

                if not group:
                    logger.warning(
                        f'Couldn\'t find valid information about team members in "{nb_path}"'
                    )

                # execute tests
                logger.debug('execute tests')
                results = Results(
                    title=self._title,
                    notebook=str(nb_path),
                    checksum=nb_hash,
                    team_members=group,
                    artifacts=sorted(artifacts),
                    excluded_artifacts=sorted(artifacts_excluded),
                    results=self._apply_cases(state))

                # store results as json
                logger.debug('dump results as json')
                with open('results.json', mode='wt') as f:
                    json.dump(results.to_dict(), fp=f, indent=4)

                # infer new, more readable name
                names = results.format_members(separator=',')
                archive_name_new = Path(
                    f'results_[{names}]_{nb_hash_short}.tar.xz')

            if archive_name_new.exists():
                logger.debug(f'remove existing {archive_name_new}')
                archive_name_new.unlink()

            archive.rename(archive_name_new)

        return results