def result_str( msg: ResultMsg, *, as_json: bool, as_csv: bool, gpa_only: bool, show_paths: bool, show_ranks: bool, ) -> str: if gpa_only: return f"GPA: {msg.result.gpa()}" dict_result = msg.result.to_dict() if as_csv: return to_csv(dict_result, transcript=msg.transcript) if as_json: return json.dumps(dict_result) dict_result = json.loads(json.dumps(dict_result)) return "\n" + "".join( summarize( result=dict_result, transcript=msg.transcript, count=msg.iters, avg_iter_ms=msg.avg_iter_ms, elapsed=pretty_ms(msg.elapsed_ms), show_paths=show_paths, show_ranks=show_ranks, claims=msg.result.keyed_claims(), ))
def fetch__print_summary(args: argparse.Namespace, curs: Any) -> None: # language=PostgreSQL curs.execute(""" SELECT run , min(ts AT TIME ZONE 'America/Chicago') AS first , max((ts + duration) AT TIME ZONE 'America/Chicago') AS last , extract(EPOCH FROM max((ts + duration)) - min(ts)) AS duration , count(*) AS total , sum(ok::integer) AS ok , sum((NOT ok)::integer) AS "not-ok" , ((SELECT count(*) FROM queue WHERE run = r.run)) as queued FROM result r WHERE run > 0 AND ts > now() - INTERVAL '1 week' GROUP BY run ORDER BY run DESC """) # 219: 2019-12-06 23:07 / 2019-12-07 04:40 [5h 32m 58.7s]; 6,997 total, 201 ok, 6,796 not-ok date_fmt = "%Y-%m-%d %H:%M" for row in curs.fetchall(): first = row['first'].strftime(date_fmt) last = row['last'].strftime(date_fmt) duration = pretty_ms(row['duration'] * 1000, unit_count=2) queue_count = f", {row['queued']:,} queued" if row['queued'] else '' print(f"{row['run']}: {first} / {last} [{duration.ljust(10, ' ')}]; {row['total']:,} total, {row['ok']:,} ok, {row['not-ok']:,} not-ok{queue_count}")
def audit(*, area_spec: Dict, area_code: str, area_catalog: str, student: Dict, run_id: int, curs: psycopg2.extensions.cursor) -> None: args = Arguments() stnum = student['stnum'] logger.info("auditing #%s against %s %s", stnum, area_catalog, area_code) with sentry_sdk.configure_scope() as scope: scope.user = {"id": stnum} curs.execute( """ INSERT INTO result ( student_id, area_code, catalog, run, input_data, in_progress) VALUES (%(student_id)s, %(area_code)s, %(catalog)s, %(run)s, %(student)s , true ) RETURNING id """, { "student_id": stnum, "area_code": area_code, "catalog": area_catalog, "run": run_id, "student": json.dumps(student) }) row = curs.fetchone() result_id: int = cast(int, row[0]) logger.info(f"result id = {result_id}") with sentry_sdk.configure_scope() as scope: scope.user = dict(id=stnum) scope.set_tag("area_code", area_code) scope.set_tag("catalog", area_catalog) scope.set_extra("result_id", result_id) try: for msg in run(args, area_spec=area_spec, student=student): if isinstance(msg, NoAuditsCompletedMsg): logger.critical('no audits completed') elif isinstance(msg, EstimateMsg): pass elif isinstance(msg, ProgressMsg): avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True) curs.execute( """ UPDATE result SET iterations = %(count)s, duration = interval %(elapsed)s WHERE id = %(result_id)s """, { "result_id": result_id, "count": msg.iters, "elapsed": f"{msg.elapsed_ms}ms" }) logger.info(f"{msg.iters:,} at {avg_iter_time} per audit") elif isinstance(msg, ResultMsg): result = msg.result.to_dict() curs.execute( """ UPDATE result SET iterations = %(total_count)s , duration = interval %(elapsed)s , per_iteration = interval %(avg_iter_time)s , rank = %(rank)s , max_rank = %(max_rank)s , result = %(result)s::jsonb , ok = %(ok)s , ts = %(now)s , gpa = %(gpa)s , in_progress = false , claimed_courses = %(claimed_courses)s::jsonb WHERE id = %(result_id)s """, { "result_id": result_id, "total_count": msg.iters, "elapsed": f"{msg.elapsed_ms}ms", "avg_iter_time": f"{msg.avg_iter_ms}ms", "result": json.dumps(result), "claimed_courses": json.dumps( msg.result.keyed_claims()), "rank": result["rank"], "max_rank": result["max_rank"], "gpa": result["gpa"], "ok": result["ok"], # we insert a Python now() instead of using the now() psql function # because sql's now() is the start time of the transaction, and we # want this to be the end of the transaction "now": datetime.datetime.now(), }) else: logger.critical('unknown message %s', msg) except Exception as ex: sentry_sdk.capture_exception(ex) curs.execute( """ UPDATE result SET in_progress = false, error = %(error)s WHERE id = %(result_id)s """, { "result_id": result_id, "error": json.dumps({"error": str(ex)}) })
def branch(args: argparse.Namespace) -> None: fetch_if_needed(args) with sqlite_connect(args.db) as conn: print(f'clearing data for "{args.branch}"... ', end='', flush=True) conn.execute('DELETE FROM branch WHERE branch = ?', [args.branch]) conn.execute('DELETE FROM branch_ip WHERE branch = ?', [args.branch]) conn.commit() print('cleared') minimum_duration = parse_ms_str(args.minimum_duration) with sqlite_connect(args.db) as conn: results = conn.execute( ''' SELECT count(duration) as count, coalesce(max(sum(duration) / :workers, max(duration)), 0) as duration_s FROM baseline WHERE duration < :min AND CASE WHEN :code IS NULL THEN 1 = 1 ELSE code = :code END ''', { 'min': minimum_duration.sec(), 'workers': args.workers, 'code': args.filter }) count, estimated_duration_s = results.fetchone() pretty_min = pretty_ms(minimum_duration.ms()) pretty_dur = pretty_ms(estimated_duration_s * 1000) print( f'{count:,} audits under {pretty_min} each: ~{pretty_dur} with {args.workers:,} workers' ) results = conn.execute( ''' SELECT catalog, code FROM baseline WHERE duration < :min AND CASE WHEN :code IS NULL THEN 1 = 1 ELSE code = :code END GROUP BY catalog, code ''', { 'min': minimum_duration.sec(), 'code': args.filter }) area_specs = load_areas(args, list(results)) results = conn.execute( ''' SELECT stnum, catalog, code FROM baseline WHERE duration < :min AND CASE WHEN :code IS NULL THEN 1 = 1 ELSE code = :code END ORDER BY duration DESC, stnum, catalog, code ''', { 'min': minimum_duration.sec(), 'code': args.filter }) records = [(stnum, catalog, code) for stnum, catalog, code in results] print(f'running {len(records):,} audits...') with sqlite_connect(args.db) as conn: with ProcessPoolExecutor(max_workers=args.workers) as executor: futures = { executor.submit( audit, (stnum, catalog, code), db=args.db, area_spec=area_specs[f"{catalog}/{code}"], timeout=float(minimum_duration.sec()), run_id=args.branch, ): (stnum, catalog, code) for (stnum, catalog, code) in records } for future in tqdm.tqdm(as_completed(futures), total=len(futures), disable=None): stnum, catalog, code = futures[future] with sqlite_cursor(conn) as curs: try: db_args = future.result() except TimeoutError as timeout: print(timeout.args[0]) curs.execute( ''' DELETE FROM branch_ip WHERE stnum = :stnum AND catalog = :catalog AND code = :code AND branch = :branch ''', timeout.args[1]) conn.commit() continue except Exception as exc: print( f'{stnum} {catalog} {code} generated an exception: {exc}' ) continue assert db_args is not None try: curs.execute( ''' INSERT INTO branch (branch, stnum, catalog, code, iterations, duration, gpa, ok, rank, max_rank, result) VALUES (:run, :stnum, :catalog, :code, :iterations, :duration, :gpa, :ok, :rank, :max_rank, json(:result)) ''', db_args) curs.execute( ''' DELETE FROM branch_ip WHERE stnum = :stnum AND catalog = :catalog AND code = :code AND branch = :run ''', db_args) except sqlite3.Error as ex: print(db_args) print(db_args['stnum'], db_args['catalog'], db_args['code'], 'generated an exception', ex) conn.rollback() continue conn.commit()
def main() -> int: # noqa: C901 DEFAULT_DIR = os.getenv('DP_STUDENT_DIR') parser = argparse.ArgumentParser() parser.add_argument('-w', '--workers', help="the number of worker processes to spawn", default=os.cpu_count()) parser.add_argument('--dir', default=DEFAULT_DIR) parser.add_argument( '--areas-dir', default=os.path.expanduser('~/Projects/degreepath-areas')) parser.add_argument("--transcript", action='store_true') parser.add_argument("--invocation", action='store_true') parser.add_argument("-q", "--quiet", action='store_true') parser.add_argument("--paths", dest='show_paths', action='store_const', const=True, default=True) parser.add_argument("--no-paths", dest='show_paths', action='store_const', const=False) parser.add_argument("--ranks", dest='show_ranks', action='store_const', const=True, default=True) parser.add_argument("--no-ranks", dest='show_ranks', action='store_const', const=False) parser.add_argument("--table", action='store_true') parser.add_argument("-n", default=1, type=int) cli_args = parser.parse_args() # deduplicate, then duplicate if requested data = sorted( set(tuple(stnum_code.strip().split()) for stnum_code in sys.stdin)) * cli_args.n if not data: print('expects a list of "stnum catalog-year areacode" to stdin', file=sys.stderr) return 1 if cli_args.table: print('stnum,catalog,area_code,gpa,rank,max', flush=True) for stnum, catalog, area_code in data: student_file = os.path.join(cli_args.dir, f"{stnum}.json") args = Arguments(print_all=False, transcript_only=cli_args.transcript) area_file = find_area(root=pathlib.Path(cli_args.areas_dir), area_catalog=int(catalog.split('-')[0]), area_code=area_code) if not area_file: print( 'could not find area spec for %s at or below catalog %s, under %s', area_code, catalog, cli_args.areas_dir) return 1 if cli_args.invocation: print( f"python3 dp.py --student '{student_file}' --area '{area_file}'" ) continue student = load_student(student_file) area_spec = load_area(area_file) if not cli_args.quiet and not cli_args.table: print(f"auditing #{student['stnum']} against {area_file}", file=sys.stderr) try: for msg in run(args, area_spec=area_spec, student=student): if isinstance(msg, NoAuditsCompletedMsg): print('no audits completed', file=sys.stderr) return 2 elif isinstance(msg, EstimateMsg): print("estimate completed", file=sys.stderr) elif isinstance(msg, ProgressMsg): if not cli_args.quiet: avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True) print( f"{msg.iters:,} at {avg_iter_time} per audit (best: {msg.best_rank})", file=sys.stderr) elif isinstance(msg, ResultMsg): result = json.loads(json.dumps(msg.result.to_dict())) if cli_args.table: avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True) print(','.join([ stnum, catalog, area_code, str(round(float(result['gpa']), 2)), str(round(float(result['rank']), 2)), str(round(float(result['max_rank']))), ]), flush=True) else: print("\n" + "".join( summarize( result=result, transcript=msg.transcript, count=msg.iters, avg_iter_ms=msg.avg_iter_ms, elapsed=pretty_ms(msg.elapsed_ms), show_paths=cli_args.show_paths, show_ranks=cli_args.show_ranks, claims=msg.result.keyed_claims(), ))) else: if not cli_args.quiet: print('unknown message %s' % msg, file=sys.stderr) return 1 except Exception as ex: print( f"error during audit of #{student['stnum']} against {area_file}", file=sys.stderr) print(ex, file=sys.stderr) return 1 return 0
def main() -> int: # noqa: C901 parser = argparse.ArgumentParser() parser.add_argument("--area", dest="area_file") parser.add_argument("--student", dest="student_file") parser.add_argument("--loglevel", dest="loglevel", choices=("warn", "debug", "info", "critical"), default="info") parser.add_argument("--json", action='store_true') parser.add_argument("--csv", action='store_true') parser.add_argument("--print-all", action='store_true') parser.add_argument("--stop-after", action='store', type=int) parser.add_argument("--progress-every", action='store', type=int, default=1_000) parser.add_argument("--estimate", action='store_true') parser.add_argument("--transcript", action='store_true') parser.add_argument("--gpa", action='store_true') parser.add_argument("--quiet", "-q", action='store_true') parser.add_argument("--tracemalloc-init", action='store_true') parser.add_argument("--tracemalloc-end", action='store_true') parser.add_argument("--tracemalloc-each", action='store_true') parser.add_argument("--paths", dest='show_paths', action='store_const', const=True, default=True) parser.add_argument("--no-paths", dest='show_paths', action='store_const', const=False) parser.add_argument("--ranks", dest='show_ranks', action='store_const', const=True, default=True) parser.add_argument("--no-ranks", dest='show_ranks', action='store_const', const=False) cli_args = parser.parse_args() loglevel = getattr(logging, cli_args.loglevel.upper()) logging.basicConfig(level=loglevel, format=logformat) if cli_args.estimate: os.environ['DP_ESTIMATE'] = '1' has_tracemalloc = cli_args.tracemalloc_init or cli_args.tracemalloc_end or cli_args.tracemalloc_each args = Arguments( gpa_only=cli_args.gpa, print_all=cli_args.print_all, progress_every=cli_args.progress_every, stop_after=cli_args.stop_after, transcript_only=cli_args.transcript, estimate_only=cli_args.estimate, ) if has_tracemalloc: import tracemalloc tracemalloc.start() first_progress_message = True top_mem_items: Dict[str, Dict[int, float]] = defaultdict(dict) tracemalloc_index = 0 student = load_students(cli_args.student_file)[0] area_spec = load_areas(cli_args.area_file)[0] if not cli_args.quiet: print(f"auditing #{student['stnum']} against {cli_args.area_file}", file=sys.stderr) for msg in run(args, student=student, area_spec=area_spec): if isinstance(msg, NoAuditsCompletedMsg): logger.critical('no audits completed') return 2 elif isinstance(msg, EstimateMsg): if not cli_args.quiet: print( f"{msg.estimate:,} estimated solution{'s' if msg.estimate != 1 else ''}", file=sys.stderr) elif isinstance(msg, ProgressMsg): if (cli_args.tracemalloc_init and first_progress_message) or cli_args.tracemalloc_each: snapshot = tracemalloc.take_snapshot() for k, v in process_top(snapshot): top_mem_items[k][tracemalloc_index] = v tracemalloc_index += 1 first_progress_message = False if not cli_args.quiet or (cli_args.tracemalloc_init or cli_args.tracemalloc_each): avg_iter_time = pretty_ms(msg.avg_iter_ms, format_sub_ms=True) print( f"{msg.iters:,} at {avg_iter_time} per audit (best: {msg.best_rank})", file=sys.stderr) elif isinstance(msg, ResultMsg): if not cli_args.quiet: print( result_str( msg, as_json=cli_args.json, as_csv=cli_args.csv, gpa_only=cli_args.gpa, show_paths=cli_args.show_paths, show_ranks=cli_args.show_ranks, )) else: if not cli_args.quiet: logger.critical('unknown message %s', msg) return 1 if cli_args.tracemalloc_end: snapshot = tracemalloc.take_snapshot() for k, v in process_top(snapshot): top_mem_items[k][tracemalloc_index] = v if has_tracemalloc: longest = max(index for item in top_mem_items.values() for index, datapoint in item.items()) for tracemalloc_index in range(0, longest + 1): print(tracemalloc_index * 10_000, end='\t') for file, datapoints in top_mem_items.items(): print(file, end='\t') for i in range(0, longest + 1): print(f"{datapoints.get(i, 0):.1f}", end='\t') print() return 0
def run_batch(args: argparse.Namespace, *, baseline: bool) -> None: fetch_if_needed(args) with sqlite_connect(args.db) as conn, sqlite_transaction(conn): if baseline: print('clearing baseline data... ', end='', flush=True) conn.execute('DELETE FROM baseline') else: print(f'clearing data for "{args.branch}"... ', end='', flush=True) conn.execute('DELETE FROM branch WHERE branch = ?', [args.branch]) print('cleared') minimum_duration = parse_ms_str(args.minimum_duration) with sqlite_connect(args.db) as conn: if baseline: results = conn.execute(''' SELECT stnum, catalog, code, duration, catalog || '/' || code as area_key FROM server_data WHERE duration < :min ORDER BY duration DESC, stnum, catalog, code ''', {'min': minimum_duration.sec()}) else: results = conn.execute(''' SELECT stnum, catalog, code, duration, catalog || '/' || code as area_key FROM baseline WHERE duration < :min ORDER BY duration DESC, stnum, catalog, code ''', {'min': minimum_duration.sec()}) records = [Record(**r) for r in results] if args.filter is not None: records = [r for r in records if r.code == args.filter] estimated_duration_s = sum(r.duration for r in records) / args.workers pretty_dur = pretty_ms(estimated_duration_s * 1000) pretty_min = pretty_ms(minimum_duration.ms()) print(f'{len(records):,} audits under {pretty_min} each: ~{pretty_dur} with {args.workers:,} workers') if baseline and args.copy: with sqlite_transaction(conn): conn.execute(''' INSERT INTO baseline (stnum, catalog, code, iterations, duration, gpa, ok, rank, max_rank, status, result) SELECT stnum, catalog, code, iterations, duration, gpa, ok, rank, max_rank, status, result FROM server_data WHERE duration < :min ''', {'min': minimum_duration.sec()}) return area_codes = set((r.catalog, r.code) for r in records) area_specs = load_areas(args, [{"catalog": catalog, "code": code} for catalog, code in area_codes]) remaining_records = list(records) print(f'running {len(records):,} audits...') timeout: Optional[float] = None if baseline: timeout = float(minimum_duration.sec()) * 2.5 with \ sqlite_connect(args.db) as conn, \ sqlite_transaction(conn), \ ProcessPoolExecutor(max_workers=args.workers) as executor: futures = { executor.submit( audit, (r.stnum, r.catalog, r.code), db=args.db, area_spec=area_specs[r.area_key], timeout=timeout, run_id=getattr(args, 'branch', 'None'), ): r for r in records if r.area_key in area_specs } pbar = tqdm.tqdm(total=len(futures), disable=None) upcoming = [f"{r.stnum}:{r.code}" for r in remaining_records[:args.workers]] pbar.set_description(', '.join(upcoming)) for future in as_completed(futures): record = futures[future] try: remaining_records.remove(record) upcoming = [f"{r.stnum}:{r.code}" for r in remaining_records[:args.workers]] except ValueError: pass pbar.update(n=1) # pbar.write(f"completed ({record.stnum}, {record.code})") pbar.set_description(', '.join(upcoming)) try: db_args = future.result() except TimeoutError as err: print(err.args[0]) continue except Exception as exc: print(f'{record.stnum} {record.catalog} {record.code} generated an exception: {exc}') continue assert db_args is not None, f"{record.stnum}, {record.catalog}, {record.code} returned None" if baseline: conn.execute(''' INSERT INTO baseline (stnum, catalog, code, iterations, duration, gpa, ok, rank, max_rank, status, result, version) VALUES (:stnum, :catalog, :code, :iterations, :duration, :gpa, :ok, :rank, :max_rank, :status, json(:result), :version) ''', db_args) else: conn.execute(''' INSERT INTO branch (branch, stnum, catalog, code, iterations, duration, gpa, ok, rank, max_rank, status, result, version) VALUES (:run, :stnum, :catalog, :code, :iterations, :duration, :gpa, :ok, :rank, :max_rank, :status, json(:result), :version) ''', db_args)