예제 #1
0
def estimate(
    row: Tuple[str, str, str],
    *,
    data: Optional[Dict] = None,
    db: str,
    area_spec: Dict,
    run_id: str = '',
) -> Optional[int]:
    stnum, catalog, code = row

    if not data:
        with sqlite_connect(db, readonly=True) as conn:
            results = conn.execute('''
                SELECT input_data
                FROM server_data
                WHERE (stnum, catalog, code) = (?, ?, ?)
            ''', [stnum, catalog, code])

            record = results.fetchone()
            assert record is not None

            student = json.loads(record['input_data'])
            area_spec = area_spec
    else:
        student = data
        area_spec = area_spec

    for message in run(args=Arguments(estimate_only=True), student=student, area_spec=area_spec):
        if isinstance(message, EstimateMsg):
            return message.estimate
        else:
            assert False, type(message)

    return None
예제 #2
0
def audit(
    row: Tuple[str, str, str],
    *,
    data: Optional[Dict] = None,
    db: str,
    area_spec: Dict,
    run_id: str = '',
    timeout: Optional[float] = None,
) -> Optional[Dict]:
    stnum, catalog, code = row

    if not data:
        with sqlite_connect(db, readonly=True) as conn:
            results = conn.execute('''
                SELECT input_data
                FROM server_data
                WHERE (stnum, catalog, code) = (?, ?, ?)
            ''', [stnum, catalog, code])

            record = results.fetchone()
            assert record is not None

            student = json.loads(record['input_data'])
            area_spec = area_spec
    else:
        student = data
        area_spec = area_spec

    estimate_count = estimate((stnum, catalog, code), data=student, db=db, area_spec=area_spec)
    assert estimate_count is not None

    db_keys = {'stnum': stnum, 'catalog': catalog, 'code': code, 'estimate': estimate_count, 'branch': run_id}

    start_time = time.perf_counter()

    for message in run(args=Arguments(), student=student, area_spec=area_spec):
        if isinstance(message, ResultMsg):
            result = message.result.to_dict()
            return {
                "run": run_id,
                "stnum": stnum,
                "catalog": catalog,
                "code": code,
                "iterations": message.iters,
                "duration": message.elapsed_ms / 1000,
                "gpa": result["gpa"],
                "ok": result["ok"],
                "rank": result["rank"],
                "max_rank": result["max_rank"],
                "result": json.dumps(result, sort_keys=True),
                "status": result["status"],
                "version": message.version,
            }
        else:
            if timeout and time.perf_counter() - start_time >= timeout:
                raise TimeoutError(f'cancelling {" ".join(row)} after {time.perf_counter() - start_time}', db_keys)
            pass

    return None
예제 #3
0
파일: audit.py 프로젝트: fagan2888/auditor
def audit(*, area_spec: Dict, area_code: str, area_catalog: str, student: Dict,
          run_id: int, curs: psycopg2.extensions.cursor) -> None:
    args = Arguments()

    stnum = student['stnum']

    logger.info("auditing #%s against %s %s", stnum, area_catalog, area_code)
    with sentry_sdk.configure_scope() as scope:
        scope.user = {"id": stnum}

    curs.execute(
        """
        INSERT INTO result (  student_id,     area_code,     catalog,     run,     input_data, in_progress)
        VALUES             (%(student_id)s, %(area_code)s, %(catalog)s, %(run)s, %(student)s , true       )
        RETURNING id
    """, {
            "student_id": stnum,
            "area_code": area_code,
            "catalog": area_catalog,
            "run": run_id,
            "student": json.dumps(student)
        })

    row = curs.fetchone()
    result_id: int = cast(int, row[0])

    logger.info(f"result id = {result_id}")

    with sentry_sdk.configure_scope() as scope:
        scope.user = dict(id=stnum)
        scope.set_tag("area_code", area_code)
        scope.set_tag("catalog", area_catalog)
        scope.set_extra("result_id", result_id)

    try:
        for msg in run(args, area_spec=area_spec, student=student):
            if isinstance(msg, NoAuditsCompletedMsg):
                logger.critical('no audits completed')

            elif isinstance(msg, EstimateMsg):
                pass

            elif isinstance(msg, ProgressMsg):
                avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True)

                curs.execute(
                    """
                    UPDATE result
                    SET iterations = %(count)s, duration = interval %(elapsed)s
                    WHERE id = %(result_id)s
                """, {
                        "result_id": result_id,
                        "count": msg.iters,
                        "elapsed": f"{msg.elapsed_ms}ms"
                    })

                logger.info(f"{msg.iters:,} at {avg_iter_time} per audit")

            elif isinstance(msg, ResultMsg):
                result = msg.result.to_dict()

                curs.execute(
                    """
                    UPDATE result
                    SET iterations = %(total_count)s
                      , duration = interval %(elapsed)s
                      , per_iteration = interval %(avg_iter_time)s
                      , rank = %(rank)s
                      , max_rank = %(max_rank)s
                      , result = %(result)s::jsonb
                      , ok = %(ok)s
                      , ts = %(now)s
                      , gpa = %(gpa)s
                      , in_progress = false
                      , claimed_courses = %(claimed_courses)s::jsonb
                    WHERE id = %(result_id)s
                """,
                    {
                        "result_id": result_id,
                        "total_count": msg.iters,
                        "elapsed": f"{msg.elapsed_ms}ms",
                        "avg_iter_time": f"{msg.avg_iter_ms}ms",
                        "result": json.dumps(result),
                        "claimed_courses": json.dumps(
                            msg.result.keyed_claims()),
                        "rank": result["rank"],
                        "max_rank": result["max_rank"],
                        "gpa": result["gpa"],
                        "ok": result["ok"],
                        # we insert a Python now() instead of using the now() psql function
                        # because sql's now() is the start time of the transaction, and we
                        # want this to be the end of the transaction
                        "now": datetime.datetime.now(),
                    })

            else:
                logger.critical('unknown message %s', msg)

    except Exception as ex:
        sentry_sdk.capture_exception(ex)

        curs.execute(
            """
            UPDATE result
            SET in_progress = false, error = %(error)s
            WHERE id = %(result_id)s
        """, {
                "result_id": result_id,
                "error": json.dumps({"error": str(ex)})
            })
예제 #4
0
def audit(
    *,
    area_spec: Dict,
    area_code: str,
    area_catalog: str,
    student: Dict,
    run_id: int,
    expires_at: Optional[str],
    link_only: bool,
    curs: psycopg2.extensions.cursor,
) -> Optional[int]:
    args = Arguments()

    stnum = student['stnum']

    logger.info("auditing #%s against %s %s", stnum, area_catalog, area_code)
    try:
        for msg in run(args, area_spec=area_spec, student=student):
            if isinstance(msg, NoAuditsCompletedMsg):
                logger.critical('no audits completed')

            elif isinstance(msg, EstimateMsg):
                pass

            elif isinstance(msg, ProgressMsg):
                pass

            elif isinstance(msg, ResultMsg):
                result = msg.result.to_dict()
                result_str = json.dumps(result)

                if not link_only:
                    # delete any old copies of this exact result
                    curs.execute(
                        """
                        DELETE FROM result
                        WHERE student_id = %(student_id)s
                            AND catalog = %(area_catalog)s
                            AND area_code = %(area_code)s
                            AND result = %(result)s::jsonb
                    """, {
                            "student_id": stnum,
                            "area_catalog": area_catalog,
                            "area_code": area_code,
                            "result": result_str
                        })

                    # deactivate all existing records
                    curs.execute(
                        """
                        UPDATE result
                        SET is_active = false
                        WHERE
                            student_id = %(student_id)s
                            AND area_code = %(area_code)s
                            AND is_active = true
                    """, {
                            "student_id": stnum,
                            "area_code": area_code
                        })

                # we use clock_timestamp() instead of now() here, because
                # now() is the start time of the transaction, and we instead
                # want the time when the computation was finished.
                # see https://stackoverflow.com/a/24169018
                curs.execute(
                    """
                    INSERT INTO result (
                        student_id,
                        area_code,
                        catalog,
                        run,
                        input_data,
                        expires_at,
                        link_only,
                        result_version,
                        iterations,
                        duration,
                        per_iteration,
                        rank,
                        max_rank,
                        result,
                        ok,
                        ts,
                        gpa,
                        claimed_courses,
                        status,
                        is_active,
                        revision,
                        student_classification,
                        student_class,
                        student_name,
                        student_name_sort
                    )
                    VALUES (
                        %(student_id)s,
                        %(area_code)s,
                        %(catalog)s,
                        %(run)s,
                        %(input_data)s,
                        %(expires_at)s,
                        %(link_only)s,
                        %(result_version)s,
                        %(total_count)s,
                        interval %(elapsed)s,
                        interval %(avg_iter_time)s,
                        %(rank)s,
                        %(max_rank)s,
                        %(result)s::jsonb,
                        %(ok)s,
                        clock_timestamp(),
                        %(gpa)s,
                        %(claimed_courses)s::jsonb,
                        %(status)s,
                        %(is_active)s,
                        coalesce((SELECT max(revision) FROM result WHERE student_id = %(student_id)s AND area_code = %(area_code)s), 0) + 1,
                        %(student_classification)s,
                        %(student_class)s,
                        nullif(%(student_name)s, ''),
                        nullif(%(student_name_sort)s, '')
                    )
                    RETURNING id
                """, {
                        "student_id":
                        stnum,
                        "area_code":
                        area_code,
                        "catalog":
                        area_catalog,
                        "run":
                        run_id,
                        "input_data":
                        json.dumps(student),
                        "expires_at":
                        expires_at,
                        "link_only":
                        link_only,
                        "is_active":
                        False if link_only else True,
                        "total_count":
                        msg.iters,
                        "elapsed":
                        f"{msg.elapsed_ms}ms",
                        "avg_iter_time":
                        f"{msg.avg_iter_ms}ms",
                        "result":
                        result_str,
                        "claimed_courses":
                        json.dumps(msg.result.keyed_claims()),
                        "rank":
                        result["rank"],
                        "max_rank":
                        result["max_rank"],
                        "gpa":
                        result["gpa"],
                        "ok":
                        result["ok"],
                        "status":
                        result["status"],
                        "student_name":
                        student["name"],
                        "student_name_sort":
                        student["name_sort"],
                        "student_classification":
                        student["classification"],
                        "student_class":
                        student["class"]
                        if student["class"] != "None" else None,
                        "result_version":
                        result["version"],
                    })

                result_id: int = curs.fetchone()[0]

                return result_id

            else:
                logger.critical('unknown message %s', msg)

    except Exception as ex:
        logger.error("error with student #%s, catalog %s, area %s: %s", stnum,
                     area_catalog, area_code, ex)

    return None
예제 #5
0
def main() -> int:  # noqa: C901
    DEFAULT_DIR = os.getenv('DP_STUDENT_DIR')

    parser = argparse.ArgumentParser()
    parser.add_argument('-w',
                        '--workers',
                        help="the number of worker processes to spawn",
                        default=os.cpu_count())
    parser.add_argument('--dir', default=DEFAULT_DIR)
    parser.add_argument(
        '--areas-dir',
        default=os.path.expanduser('~/Projects/degreepath-areas'))
    parser.add_argument("--transcript", action='store_true')
    parser.add_argument("--invocation", action='store_true')
    parser.add_argument("-q", "--quiet", action='store_true')
    parser.add_argument("--paths",
                        dest='show_paths',
                        action='store_const',
                        const=True,
                        default=True)
    parser.add_argument("--no-paths",
                        dest='show_paths',
                        action='store_const',
                        const=False)
    parser.add_argument("--ranks",
                        dest='show_ranks',
                        action='store_const',
                        const=True,
                        default=True)
    parser.add_argument("--no-ranks",
                        dest='show_ranks',
                        action='store_const',
                        const=False)
    parser.add_argument("--table", action='store_true')
    parser.add_argument("-n", default=1, type=int)

    cli_args = parser.parse_args()

    # deduplicate, then duplicate if requested
    data = sorted(
        set(tuple(stnum_code.strip().split())
            for stnum_code in sys.stdin)) * cli_args.n

    if not data:
        print('expects a list of "stnum catalog-year areacode" to stdin',
              file=sys.stderr)
        return 1

    if cli_args.table:
        print('stnum,catalog,area_code,gpa,rank,max', flush=True)

    for stnum, catalog, area_code in data:
        student_file = os.path.join(cli_args.dir, f"{stnum}.json")

        args = Arguments(print_all=False, transcript_only=cli_args.transcript)
        area_file = find_area(root=pathlib.Path(cli_args.areas_dir),
                              area_catalog=int(catalog.split('-')[0]),
                              area_code=area_code)

        if not area_file:
            print(
                'could not find area spec for %s at or below catalog %s, under %s',
                area_code, catalog, cli_args.areas_dir)
            return 1

        if cli_args.invocation:
            print(
                f"python3 dp.py --student '{student_file}' --area '{area_file}'"
            )
            continue

        student = load_student(student_file)
        area_spec = load_area(area_file)

        if not cli_args.quiet and not cli_args.table:
            print(f"auditing #{student['stnum']} against {area_file}",
                  file=sys.stderr)

        try:
            for msg in run(args, area_spec=area_spec, student=student):
                if isinstance(msg, NoAuditsCompletedMsg):
                    print('no audits completed', file=sys.stderr)
                    return 2

                elif isinstance(msg, EstimateMsg):
                    print("estimate completed", file=sys.stderr)

                elif isinstance(msg, ProgressMsg):
                    if not cli_args.quiet:
                        avg_iter_time = pretty_ms(msg.avg_iter_ms,
                                                  format_sub_ms=True)
                        print(
                            f"{msg.iters:,} at {avg_iter_time} per audit (best: {msg.best_rank})",
                            file=sys.stderr)

                elif isinstance(msg, ResultMsg):
                    result = json.loads(json.dumps(msg.result.to_dict()))
                    if cli_args.table:
                        avg_iter_time = pretty_ms(msg.avg_iter_ms,
                                                  format_sub_ms=True)
                        print(','.join([
                            stnum,
                            catalog,
                            area_code,
                            str(round(float(result['gpa']), 2)),
                            str(round(float(result['rank']), 2)),
                            str(round(float(result['max_rank']))),
                        ]),
                              flush=True)
                    else:
                        print("\n" + "".join(
                            summarize(
                                result=result,
                                transcript=msg.transcript,
                                count=msg.iters,
                                avg_iter_ms=msg.avg_iter_ms,
                                elapsed=pretty_ms(msg.elapsed_ms),
                                show_paths=cli_args.show_paths,
                                show_ranks=cli_args.show_ranks,
                                claims=msg.result.keyed_claims(),
                            )))

                else:
                    if not cli_args.quiet:
                        print('unknown message %s' % msg, file=sys.stderr)
                    return 1

        except Exception as ex:
            print(
                f"error during audit of #{student['stnum']} against {area_file}",
                file=sys.stderr)
            print(ex, file=sys.stderr)
            return 1

    return 0
예제 #6
0
def main() -> int:  # noqa: C901
    parser = argparse.ArgumentParser()
    parser.add_argument("--area", dest="area_file")
    parser.add_argument("--student", dest="student_file")
    parser.add_argument("--loglevel",
                        dest="loglevel",
                        choices=("warn", "debug", "info", "critical"),
                        default="info")
    parser.add_argument("--json", action='store_true')
    parser.add_argument("--csv", action='store_true')
    parser.add_argument("--print-all", action='store_true')
    parser.add_argument("--stop-after", action='store', type=int)
    parser.add_argument("--progress-every",
                        action='store',
                        type=int,
                        default=1_000)
    parser.add_argument("--estimate", action='store_true')
    parser.add_argument("--transcript", action='store_true')
    parser.add_argument("--gpa", action='store_true')
    parser.add_argument("--quiet", "-q", action='store_true')
    parser.add_argument("--tracemalloc-init", action='store_true')
    parser.add_argument("--tracemalloc-end", action='store_true')
    parser.add_argument("--tracemalloc-each", action='store_true')
    parser.add_argument("--paths",
                        dest='show_paths',
                        action='store_const',
                        const=True,
                        default=True)
    parser.add_argument("--no-paths",
                        dest='show_paths',
                        action='store_const',
                        const=False)
    parser.add_argument("--ranks",
                        dest='show_ranks',
                        action='store_const',
                        const=True,
                        default=True)
    parser.add_argument("--no-ranks",
                        dest='show_ranks',
                        action='store_const',
                        const=False)
    cli_args = parser.parse_args()

    loglevel = getattr(logging, cli_args.loglevel.upper())
    logging.basicConfig(level=loglevel, format=logformat)

    if cli_args.estimate:
        os.environ['DP_ESTIMATE'] = '1'

    has_tracemalloc = cli_args.tracemalloc_init or cli_args.tracemalloc_end or cli_args.tracemalloc_each

    args = Arguments(
        gpa_only=cli_args.gpa,
        print_all=cli_args.print_all,
        progress_every=cli_args.progress_every,
        stop_after=cli_args.stop_after,
        transcript_only=cli_args.transcript,
        estimate_only=cli_args.estimate,
    )

    if has_tracemalloc:
        import tracemalloc
        tracemalloc.start()

    first_progress_message = True

    top_mem_items: Dict[str, Dict[int, float]] = defaultdict(dict)
    tracemalloc_index = 0

    student = load_students(cli_args.student_file)[0]
    area_spec = load_areas(cli_args.area_file)[0]

    if not cli_args.quiet:
        print(f"auditing #{student['stnum']} against {cli_args.area_file}",
              file=sys.stderr)

    for msg in run(args, student=student, area_spec=area_spec):
        if isinstance(msg, NoAuditsCompletedMsg):
            logger.critical('no audits completed')
            return 2

        elif isinstance(msg, EstimateMsg):
            if not cli_args.quiet:
                print(
                    f"{msg.estimate:,} estimated solution{'s' if msg.estimate != 1 else ''}",
                    file=sys.stderr)

        elif isinstance(msg, ProgressMsg):
            if (cli_args.tracemalloc_init
                    and first_progress_message) or cli_args.tracemalloc_each:
                snapshot = tracemalloc.take_snapshot()
                for k, v in process_top(snapshot):
                    top_mem_items[k][tracemalloc_index] = v
                tracemalloc_index += 1

            first_progress_message = False

            if not cli_args.quiet or (cli_args.tracemalloc_init
                                      or cli_args.tracemalloc_each):
                avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True)
                print(
                    f"{msg.iters:,} at {avg_iter_time} per audit (best: {msg.best_rank})",
                    file=sys.stderr)

        elif isinstance(msg, ResultMsg):
            if not cli_args.quiet:
                print(
                    result_str(
                        msg,
                        as_json=cli_args.json,
                        as_csv=cli_args.csv,
                        gpa_only=cli_args.gpa,
                        show_paths=cli_args.show_paths,
                        show_ranks=cli_args.show_ranks,
                    ))

        else:
            if not cli_args.quiet:
                logger.critical('unknown message %s', msg)
            return 1

    if cli_args.tracemalloc_end:
        snapshot = tracemalloc.take_snapshot()
        for k, v in process_top(snapshot):
            top_mem_items[k][tracemalloc_index] = v

    if has_tracemalloc:
        longest = max(index for item in top_mem_items.values()
                      for index, datapoint in item.items())
        for tracemalloc_index in range(0, longest + 1):
            print(tracemalloc_index * 10_000, end='\t')

        for file, datapoints in top_mem_items.items():
            print(file, end='\t')
            for i in range(0, longest + 1):
                print(f"{datapoints.get(i, 0):.1f}", end='\t')
            print()

    return 0