예제 #1
0
def result_str(
    msg: ResultMsg,
    *,
    as_json: bool,
    as_csv: bool,
    gpa_only: bool,
    show_paths: bool,
    show_ranks: bool,
) -> str:
    if gpa_only:
        return f"GPA: {msg.result.gpa()}"

    dict_result = msg.result.to_dict()

    if as_csv:
        return to_csv(dict_result, transcript=msg.transcript)

    if as_json:
        return json.dumps(dict_result)

    dict_result = json.loads(json.dumps(dict_result))

    return "\n" + "".join(
        summarize(
            result=dict_result,
            transcript=msg.transcript,
            count=msg.iters,
            avg_iter_ms=msg.avg_iter_ms,
            elapsed=pretty_ms(msg.elapsed_ms),
            show_paths=show_paths,
            show_ranks=show_ranks,
            claims=msg.result.keyed_claims(),
        ))
예제 #2
0
def fetch__print_summary(args: argparse.Namespace, curs: Any) -> None:
    # language=PostgreSQL
    curs.execute("""
        SELECT run
             , min(ts AT TIME ZONE 'America/Chicago') AS first
             , max((ts + duration) AT TIME ZONE 'America/Chicago') AS last
             , extract(EPOCH FROM max((ts + duration)) - min(ts)) AS duration
             , count(*) AS total
             , sum(ok::integer) AS ok
             , sum((NOT ok)::integer) AS "not-ok"
             , ((SELECT count(*) FROM queue WHERE run = r.run)) as queued
        FROM result r
        WHERE run > 0
          AND ts > now() - INTERVAL '1 week'
        GROUP BY run
        ORDER BY run DESC
    """)

    # 219: 2019-12-06 23:07 / 2019-12-07 04:40 [5h 32m 58.7s]; 6,997 total, 201 ok, 6,796 not-ok
    date_fmt = "%Y-%m-%d %H:%M"
    for row in curs.fetchall():
        first = row['first'].strftime(date_fmt)
        last = row['last'].strftime(date_fmt)
        duration = pretty_ms(row['duration'] * 1000, unit_count=2)
        queue_count = f", {row['queued']:,} queued" if row['queued'] else ''
        print(f"{row['run']}: {first} / {last} [{duration.ljust(10, ' ')}]; {row['total']:,} total, {row['ok']:,} ok, {row['not-ok']:,} not-ok{queue_count}")
예제 #3
0
파일: audit.py 프로젝트: fagan2888/auditor
def audit(*, area_spec: Dict, area_code: str, area_catalog: str, student: Dict,
          run_id: int, curs: psycopg2.extensions.cursor) -> None:
    args = Arguments()

    stnum = student['stnum']

    logger.info("auditing #%s against %s %s", stnum, area_catalog, area_code)
    with sentry_sdk.configure_scope() as scope:
        scope.user = {"id": stnum}

    curs.execute(
        """
        INSERT INTO result (  student_id,     area_code,     catalog,     run,     input_data, in_progress)
        VALUES             (%(student_id)s, %(area_code)s, %(catalog)s, %(run)s, %(student)s , true       )
        RETURNING id
    """, {
            "student_id": stnum,
            "area_code": area_code,
            "catalog": area_catalog,
            "run": run_id,
            "student": json.dumps(student)
        })

    row = curs.fetchone()
    result_id: int = cast(int, row[0])

    logger.info(f"result id = {result_id}")

    with sentry_sdk.configure_scope() as scope:
        scope.user = dict(id=stnum)
        scope.set_tag("area_code", area_code)
        scope.set_tag("catalog", area_catalog)
        scope.set_extra("result_id", result_id)

    try:
        for msg in run(args, area_spec=area_spec, student=student):
            if isinstance(msg, NoAuditsCompletedMsg):
                logger.critical('no audits completed')

            elif isinstance(msg, EstimateMsg):
                pass

            elif isinstance(msg, ProgressMsg):
                avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True)

                curs.execute(
                    """
                    UPDATE result
                    SET iterations = %(count)s, duration = interval %(elapsed)s
                    WHERE id = %(result_id)s
                """, {
                        "result_id": result_id,
                        "count": msg.iters,
                        "elapsed": f"{msg.elapsed_ms}ms"
                    })

                logger.info(f"{msg.iters:,} at {avg_iter_time} per audit")

            elif isinstance(msg, ResultMsg):
                result = msg.result.to_dict()

                curs.execute(
                    """
                    UPDATE result
                    SET iterations = %(total_count)s
                      , duration = interval %(elapsed)s
                      , per_iteration = interval %(avg_iter_time)s
                      , rank = %(rank)s
                      , max_rank = %(max_rank)s
                      , result = %(result)s::jsonb
                      , ok = %(ok)s
                      , ts = %(now)s
                      , gpa = %(gpa)s
                      , in_progress = false
                      , claimed_courses = %(claimed_courses)s::jsonb
                    WHERE id = %(result_id)s
                """,
                    {
                        "result_id": result_id,
                        "total_count": msg.iters,
                        "elapsed": f"{msg.elapsed_ms}ms",
                        "avg_iter_time": f"{msg.avg_iter_ms}ms",
                        "result": json.dumps(result),
                        "claimed_courses": json.dumps(
                            msg.result.keyed_claims()),
                        "rank": result["rank"],
                        "max_rank": result["max_rank"],
                        "gpa": result["gpa"],
                        "ok": result["ok"],
                        # we insert a Python now() instead of using the now() psql function
                        # because sql's now() is the start time of the transaction, and we
                        # want this to be the end of the transaction
                        "now": datetime.datetime.now(),
                    })

            else:
                logger.critical('unknown message %s', msg)

    except Exception as ex:
        sentry_sdk.capture_exception(ex)

        curs.execute(
            """
            UPDATE result
            SET in_progress = false, error = %(error)s
            WHERE id = %(result_id)s
        """, {
                "result_id": result_id,
                "error": json.dumps({"error": str(ex)})
            })
예제 #4
0
파일: branch.py 프로젝트: fagan2888/auditor
def branch(args: argparse.Namespace) -> None:
    fetch_if_needed(args)

    with sqlite_connect(args.db) as conn:
        print(f'clearing data for "{args.branch}"... ', end='', flush=True)
        conn.execute('DELETE FROM branch WHERE branch = ?', [args.branch])
        conn.execute('DELETE FROM branch_ip WHERE branch = ?', [args.branch])
        conn.commit()
        print('cleared')

    minimum_duration = parse_ms_str(args.minimum_duration)

    with sqlite_connect(args.db) as conn:
        results = conn.execute(
            '''
            SELECT
                count(duration) as count,
                coalesce(max(sum(duration) / :workers, max(duration)), 0) as duration_s
            FROM baseline
            WHERE duration < :min
                AND CASE WHEN :code IS NULL THEN 1 = 1 ELSE code = :code END
        ''', {
                'min': minimum_duration.sec(),
                'workers': args.workers,
                'code': args.filter
            })

        count, estimated_duration_s = results.fetchone()

        pretty_min = pretty_ms(minimum_duration.ms())
        pretty_dur = pretty_ms(estimated_duration_s * 1000)
        print(
            f'{count:,} audits under {pretty_min} each: ~{pretty_dur} with {args.workers:,} workers'
        )

        results = conn.execute(
            '''
            SELECT catalog, code
            FROM baseline
            WHERE duration < :min
                AND CASE WHEN :code IS NULL THEN 1 = 1 ELSE code = :code END
            GROUP BY catalog, code
        ''', {
                'min': minimum_duration.sec(),
                'code': args.filter
            })

        area_specs = load_areas(args, list(results))

        results = conn.execute(
            '''
            SELECT stnum, catalog, code
            FROM baseline
            WHERE duration < :min
                AND CASE WHEN :code IS NULL THEN 1 = 1 ELSE code = :code END
            ORDER BY duration DESC, stnum, catalog, code
        ''', {
                'min': minimum_duration.sec(),
                'code': args.filter
            })

        records = [(stnum, catalog, code) for stnum, catalog, code in results]

    print(f'running {len(records):,} audits...')

    with sqlite_connect(args.db) as conn:
        with ProcessPoolExecutor(max_workers=args.workers) as executor:
            futures = {
                executor.submit(
                    audit,
                    (stnum, catalog, code),
                    db=args.db,
                    area_spec=area_specs[f"{catalog}/{code}"],
                    timeout=float(minimum_duration.sec()),
                    run_id=args.branch,
                ): (stnum, catalog, code)
                for (stnum, catalog, code) in records
            }

            for future in tqdm.tqdm(as_completed(futures),
                                    total=len(futures),
                                    disable=None):
                stnum, catalog, code = futures[future]

                with sqlite_cursor(conn) as curs:
                    try:
                        db_args = future.result()
                    except TimeoutError as timeout:
                        print(timeout.args[0])
                        curs.execute(
                            '''
                            DELETE
                            FROM branch_ip
                            WHERE stnum = :stnum
                                AND catalog = :catalog
                                AND code = :code
                                AND branch = :branch
                        ''', timeout.args[1])
                        conn.commit()
                        continue
                    except Exception as exc:
                        print(
                            f'{stnum} {catalog} {code} generated an exception: {exc}'
                        )
                        continue

                    assert db_args is not None

                    try:
                        curs.execute(
                            '''
                            INSERT INTO branch (branch, stnum, catalog, code, iterations, duration, gpa, ok, rank, max_rank, result)
                            VALUES (:run, :stnum, :catalog, :code, :iterations, :duration, :gpa, :ok, :rank, :max_rank, json(:result))
                        ''', db_args)

                        curs.execute(
                            '''
                            DELETE
                            FROM branch_ip
                            WHERE stnum = :stnum
                                AND catalog = :catalog
                                AND code = :code
                                AND branch = :run
                        ''', db_args)
                    except sqlite3.Error as ex:
                        print(db_args)
                        print(db_args['stnum'], db_args['catalog'],
                              db_args['code'], 'generated an exception', ex)
                        conn.rollback()
                        continue

                    conn.commit()
예제 #5
0
def main() -> int:  # noqa: C901
    DEFAULT_DIR = os.getenv('DP_STUDENT_DIR')

    parser = argparse.ArgumentParser()
    parser.add_argument('-w',
                        '--workers',
                        help="the number of worker processes to spawn",
                        default=os.cpu_count())
    parser.add_argument('--dir', default=DEFAULT_DIR)
    parser.add_argument(
        '--areas-dir',
        default=os.path.expanduser('~/Projects/degreepath-areas'))
    parser.add_argument("--transcript", action='store_true')
    parser.add_argument("--invocation", action='store_true')
    parser.add_argument("-q", "--quiet", action='store_true')
    parser.add_argument("--paths",
                        dest='show_paths',
                        action='store_const',
                        const=True,
                        default=True)
    parser.add_argument("--no-paths",
                        dest='show_paths',
                        action='store_const',
                        const=False)
    parser.add_argument("--ranks",
                        dest='show_ranks',
                        action='store_const',
                        const=True,
                        default=True)
    parser.add_argument("--no-ranks",
                        dest='show_ranks',
                        action='store_const',
                        const=False)
    parser.add_argument("--table", action='store_true')
    parser.add_argument("-n", default=1, type=int)

    cli_args = parser.parse_args()

    # deduplicate, then duplicate if requested
    data = sorted(
        set(tuple(stnum_code.strip().split())
            for stnum_code in sys.stdin)) * cli_args.n

    if not data:
        print('expects a list of "stnum catalog-year areacode" to stdin',
              file=sys.stderr)
        return 1

    if cli_args.table:
        print('stnum,catalog,area_code,gpa,rank,max', flush=True)

    for stnum, catalog, area_code in data:
        student_file = os.path.join(cli_args.dir, f"{stnum}.json")

        args = Arguments(print_all=False, transcript_only=cli_args.transcript)
        area_file = find_area(root=pathlib.Path(cli_args.areas_dir),
                              area_catalog=int(catalog.split('-')[0]),
                              area_code=area_code)

        if not area_file:
            print(
                'could not find area spec for %s at or below catalog %s, under %s',
                area_code, catalog, cli_args.areas_dir)
            return 1

        if cli_args.invocation:
            print(
                f"python3 dp.py --student '{student_file}' --area '{area_file}'"
            )
            continue

        student = load_student(student_file)
        area_spec = load_area(area_file)

        if not cli_args.quiet and not cli_args.table:
            print(f"auditing #{student['stnum']} against {area_file}",
                  file=sys.stderr)

        try:
            for msg in run(args, area_spec=area_spec, student=student):
                if isinstance(msg, NoAuditsCompletedMsg):
                    print('no audits completed', file=sys.stderr)
                    return 2

                elif isinstance(msg, EstimateMsg):
                    print("estimate completed", file=sys.stderr)

                elif isinstance(msg, ProgressMsg):
                    if not cli_args.quiet:
                        avg_iter_time = pretty_ms(msg.avg_iter_ms,
                                                  format_sub_ms=True)
                        print(
                            f"{msg.iters:,} at {avg_iter_time} per audit (best: {msg.best_rank})",
                            file=sys.stderr)

                elif isinstance(msg, ResultMsg):
                    result = json.loads(json.dumps(msg.result.to_dict()))
                    if cli_args.table:
                        avg_iter_time = pretty_ms(msg.avg_iter_ms,
                                                  format_sub_ms=True)
                        print(','.join([
                            stnum,
                            catalog,
                            area_code,
                            str(round(float(result['gpa']), 2)),
                            str(round(float(result['rank']), 2)),
                            str(round(float(result['max_rank']))),
                        ]),
                              flush=True)
                    else:
                        print("\n" + "".join(
                            summarize(
                                result=result,
                                transcript=msg.transcript,
                                count=msg.iters,
                                avg_iter_ms=msg.avg_iter_ms,
                                elapsed=pretty_ms(msg.elapsed_ms),
                                show_paths=cli_args.show_paths,
                                show_ranks=cli_args.show_ranks,
                                claims=msg.result.keyed_claims(),
                            )))

                else:
                    if not cli_args.quiet:
                        print('unknown message %s' % msg, file=sys.stderr)
                    return 1

        except Exception as ex:
            print(
                f"error during audit of #{student['stnum']} against {area_file}",
                file=sys.stderr)
            print(ex, file=sys.stderr)
            return 1

    return 0
예제 #6
0
def main() -> int:  # noqa: C901
    parser = argparse.ArgumentParser()
    parser.add_argument("--area", dest="area_file")
    parser.add_argument("--student", dest="student_file")
    parser.add_argument("--loglevel",
                        dest="loglevel",
                        choices=("warn", "debug", "info", "critical"),
                        default="info")
    parser.add_argument("--json", action='store_true')
    parser.add_argument("--csv", action='store_true')
    parser.add_argument("--print-all", action='store_true')
    parser.add_argument("--stop-after", action='store', type=int)
    parser.add_argument("--progress-every",
                        action='store',
                        type=int,
                        default=1_000)
    parser.add_argument("--estimate", action='store_true')
    parser.add_argument("--transcript", action='store_true')
    parser.add_argument("--gpa", action='store_true')
    parser.add_argument("--quiet", "-q", action='store_true')
    parser.add_argument("--tracemalloc-init", action='store_true')
    parser.add_argument("--tracemalloc-end", action='store_true')
    parser.add_argument("--tracemalloc-each", action='store_true')
    parser.add_argument("--paths",
                        dest='show_paths',
                        action='store_const',
                        const=True,
                        default=True)
    parser.add_argument("--no-paths",
                        dest='show_paths',
                        action='store_const',
                        const=False)
    parser.add_argument("--ranks",
                        dest='show_ranks',
                        action='store_const',
                        const=True,
                        default=True)
    parser.add_argument("--no-ranks",
                        dest='show_ranks',
                        action='store_const',
                        const=False)
    cli_args = parser.parse_args()

    loglevel = getattr(logging, cli_args.loglevel.upper())
    logging.basicConfig(level=loglevel, format=logformat)

    if cli_args.estimate:
        os.environ['DP_ESTIMATE'] = '1'

    has_tracemalloc = cli_args.tracemalloc_init or cli_args.tracemalloc_end or cli_args.tracemalloc_each

    args = Arguments(
        gpa_only=cli_args.gpa,
        print_all=cli_args.print_all,
        progress_every=cli_args.progress_every,
        stop_after=cli_args.stop_after,
        transcript_only=cli_args.transcript,
        estimate_only=cli_args.estimate,
    )

    if has_tracemalloc:
        import tracemalloc
        tracemalloc.start()

    first_progress_message = True

    top_mem_items: Dict[str, Dict[int, float]] = defaultdict(dict)
    tracemalloc_index = 0

    student = load_students(cli_args.student_file)[0]
    area_spec = load_areas(cli_args.area_file)[0]

    if not cli_args.quiet:
        print(f"auditing #{student['stnum']} against {cli_args.area_file}",
              file=sys.stderr)

    for msg in run(args, student=student, area_spec=area_spec):
        if isinstance(msg, NoAuditsCompletedMsg):
            logger.critical('no audits completed')
            return 2

        elif isinstance(msg, EstimateMsg):
            if not cli_args.quiet:
                print(
                    f"{msg.estimate:,} estimated solution{'s' if msg.estimate != 1 else ''}",
                    file=sys.stderr)

        elif isinstance(msg, ProgressMsg):
            if (cli_args.tracemalloc_init
                    and first_progress_message) or cli_args.tracemalloc_each:
                snapshot = tracemalloc.take_snapshot()
                for k, v in process_top(snapshot):
                    top_mem_items[k][tracemalloc_index] = v
                tracemalloc_index += 1

            first_progress_message = False

            if not cli_args.quiet or (cli_args.tracemalloc_init
                                      or cli_args.tracemalloc_each):
                avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True)
                print(
                    f"{msg.iters:,} at {avg_iter_time} per audit (best: {msg.best_rank})",
                    file=sys.stderr)

        elif isinstance(msg, ResultMsg):
            if not cli_args.quiet:
                print(
                    result_str(
                        msg,
                        as_json=cli_args.json,
                        as_csv=cli_args.csv,
                        gpa_only=cli_args.gpa,
                        show_paths=cli_args.show_paths,
                        show_ranks=cli_args.show_ranks,
                    ))

        else:
            if not cli_args.quiet:
                logger.critical('unknown message %s', msg)
            return 1

    if cli_args.tracemalloc_end:
        snapshot = tracemalloc.take_snapshot()
        for k, v in process_top(snapshot):
            top_mem_items[k][tracemalloc_index] = v

    if has_tracemalloc:
        longest = max(index for item in top_mem_items.values()
                      for index, datapoint in item.items())
        for tracemalloc_index in range(0, longest + 1):
            print(tracemalloc_index * 10_000, end='\t')

        for file, datapoints in top_mem_items.items():
            print(file, end='\t')
            for i in range(0, longest + 1):
                print(f"{datapoints.get(i, 0):.1f}", end='\t')
            print()

    return 0
예제 #7
0
def run_batch(args: argparse.Namespace, *, baseline: bool) -> None:
    fetch_if_needed(args)

    with sqlite_connect(args.db) as conn, sqlite_transaction(conn):
        if baseline:
            print('clearing baseline data... ', end='', flush=True)
            conn.execute('DELETE FROM baseline')
        else:
            print(f'clearing data for "{args.branch}"... ', end='', flush=True)
            conn.execute('DELETE FROM branch WHERE branch = ?', [args.branch])
        print('cleared')

    minimum_duration = parse_ms_str(args.minimum_duration)

    with sqlite_connect(args.db) as conn:
        if baseline:
            results = conn.execute('''
                SELECT stnum, catalog, code, duration, catalog || '/' || code as area_key
                FROM server_data
                WHERE duration < :min
                ORDER BY duration DESC, stnum, catalog, code
            ''', {'min': minimum_duration.sec()})
        else:
            results = conn.execute('''
                SELECT stnum, catalog, code, duration, catalog || '/' || code as area_key
                FROM baseline
                WHERE duration < :min
                ORDER BY duration DESC, stnum, catalog, code
            ''', {'min': minimum_duration.sec()})

        records = [Record(**r) for r in results]

    if args.filter is not None:
        records = [r for r in records if r.code == args.filter]

    estimated_duration_s = sum(r.duration for r in records) / args.workers
    pretty_dur = pretty_ms(estimated_duration_s * 1000)
    pretty_min = pretty_ms(minimum_duration.ms())
    print(f'{len(records):,} audits under {pretty_min} each: ~{pretty_dur} with {args.workers:,} workers')

    if baseline and args.copy:
        with sqlite_transaction(conn):
            conn.execute('''
                INSERT INTO baseline (stnum, catalog, code, iterations, duration, gpa, ok, rank, max_rank, status, result)
                SELECT stnum, catalog, code, iterations, duration, gpa, ok, rank, max_rank, status, result
                FROM server_data
                WHERE duration < :min
            ''', {'min': minimum_duration.sec()})
        return

    area_codes = set((r.catalog, r.code) for r in records)
    area_specs = load_areas(args, [{"catalog": catalog, "code": code} for catalog, code in area_codes])

    remaining_records = list(records)
    print(f'running {len(records):,} audits...')

    timeout: Optional[float] = None
    if baseline:
        timeout = float(minimum_duration.sec()) * 2.5

    with \
            sqlite_connect(args.db) as conn, \
            sqlite_transaction(conn), \
            ProcessPoolExecutor(max_workers=args.workers) as executor:
        futures = {
            executor.submit(
                audit,
                (r.stnum, r.catalog, r.code),
                db=args.db,
                area_spec=area_specs[r.area_key],
                timeout=timeout,
                run_id=getattr(args, 'branch', 'None'),
            ): r
            for r in records
            if r.area_key in area_specs
        }

        pbar = tqdm.tqdm(total=len(futures), disable=None)

        upcoming = [f"{r.stnum}:{r.code}" for r in remaining_records[:args.workers]]
        pbar.set_description(', '.join(upcoming))

        for future in as_completed(futures):
            record = futures[future]

            try:
                remaining_records.remove(record)
                upcoming = [f"{r.stnum}:{r.code}" for r in remaining_records[:args.workers]]
            except ValueError:
                pass

            pbar.update(n=1)
            # pbar.write(f"completed ({record.stnum}, {record.code})")
            pbar.set_description(', '.join(upcoming))

            try:
                db_args = future.result()
            except TimeoutError as err:
                print(err.args[0])
                continue
            except Exception as exc:
                print(f'{record.stnum} {record.catalog} {record.code} generated an exception: {exc}')
                continue

            assert db_args is not None, f"{record.stnum}, {record.catalog}, {record.code} returned None"

            if baseline:
                conn.execute('''
                    INSERT INTO baseline (stnum, catalog, code, iterations, duration, gpa, ok, rank, max_rank, status, result, version)
                    VALUES (:stnum, :catalog, :code, :iterations, :duration, :gpa, :ok, :rank, :max_rank, :status, json(:result), :version)
                ''', db_args)
            else:
                conn.execute('''
                    INSERT INTO branch (branch, stnum, catalog, code, iterations, duration, gpa, ok, rank, max_rank, status, result, version)
                    VALUES (:run, :stnum, :catalog, :code, :iterations, :duration, :gpa, :ok, :rank, :max_rank, :status, json(:result), :version)
                ''', db_args)