def get_submission_result(cls, submission=None, dataset=None, **kwargs): """Create a submission result.""" task = None task = submission.task if submission is not None else task task = dataset.task if dataset is not None else task submission = submission if submission is not None \ else cls.get_submission(task=task) dataset = dataset if dataset is not None \ else cls.get_dataset(task=task) assert submission.task == dataset.task args = { "submission": submission, "dataset": dataset, } args.update(kwargs) submission_result = SubmissionResult(**args) return submission_result
def add_submission_result(self, submission=None, dataset=None, **kwargs): """Add a submission result.""" task = None task = submission.task if submission is not None else task task = dataset.task if dataset is not None else task submission = submission \ if submission is not None else self.add_submission(task=task) dataset = dataset \ if dataset is not None else self.add_dataset(task=task) assert submission.task == dataset.task args = { "submission": submission, "dataset": dataset, } args.update(kwargs) submission_result = SubmissionResult(**args) self.session.add(submission_result) return submission_result
from cms.io import PriorityQueue, QueueItem from cms.db import Dataset, Evaluation, Submission, SubmissionResult, \ Task, Testcase, UserTest, UserTestResult from cms.grading.Job import CompilationJob, EvaluationJob logger = logging.getLogger(__name__) MAX_COMPILATION_TRIES = 3 MAX_EVALUATION_TRIES = 3 MAX_USER_TEST_COMPILATION_TRIES = 3 MAX_USER_TEST_EVALUATION_TRIES = 3 FILTER_SUBMISSION_DATASETS_TO_JUDGE = ((Dataset.id == Task.active_dataset_id) | (Dataset.autojudge.is_(True))) FILTER_SUBMISSION_RESULTS_TO_COMPILE = ( (~SubmissionResult.filter_compiled()) & (SubmissionResult.compilation_tries < MAX_COMPILATION_TRIES)) FILTER_SUBMISSION_RESULTS_TO_EVALUATE = ( SubmissionResult.filter_compilation_succeeded() & (~SubmissionResult.filter_evaluated()) & (SubmissionResult.evaluation_tries < MAX_EVALUATION_TRIES)) FILTER_USER_TEST_DATASETS_TO_JUDGE = ((Dataset.id == Task.active_dataset_id) | (Dataset.autojudge.is_(True))) FILTER_USER_TEST_RESULTS_TO_COMPILE = ( (~UserTestResult.filter_compiled()) & (UserTestResult.compilation_tries < MAX_COMPILATION_TRIES)) FILTER_USER_TEST_RESULTS_TO_EVALUATE = ( UserTestResult.filter_compilation_succeeded() & (~UserTestResult.filter_evaluated()) & (UserTestResult.evaluation_tries < MAX_EVALUATION_TRIES))
def submissions_status(contest_id): """Returns a dictionary of statistics about the number of submissions on a specific status in the given contest. There are six statuses: evaluated, compilation failed, evaluating, compiling, maximum number of attempts of compilations reached, the same for evaluations. The last two should not happen and require a check from the admin. The status of a submission is checked on its result for the active dataset of its task. contest_id (int|None): counts are restricted to this contest, or None for no restrictions. return (dict): statistics on the submissions. """ # TODO: at the moment this counts all submission results for # the live datasets. It is interesting to show also numbers # for the datasets with autojudge, and for all datasets. stats = {} with SessionGen() as session: base_query = session\ .query(func.count(SubmissionResult.submission_id))\ .select_from(SubmissionResult)\ .join(Dataset)\ .join(Task, Dataset.task_id == Task.id)\ .filter(Task.active_dataset_id == SubmissionResult.dataset_id) if contest_id is not None: base_query = base_query\ .filter(Task.contest_id == contest_id) compiled = base_query.filter(SubmissionResult.filter_compiled()) evaluated = compiled.filter(SubmissionResult.filter_evaluated()) not_compiled = base_query.filter( not_(SubmissionResult.filter_compiled())) not_evaluated = compiled.filter( SubmissionResult.filter_compilation_succeeded(), not_(SubmissionResult.filter_evaluated())) queries = {} queries['compiling'] = not_compiled.filter( SubmissionResult.compilation_tries < EvaluationService.EvaluationService.MAX_COMPILATION_TRIES) queries['max_compilations'] = not_compiled.filter( SubmissionResult.compilation_tries >= EvaluationService.EvaluationService.MAX_COMPILATION_TRIES) queries['compilation_fail'] = base_query.filter( SubmissionResult.filter_compilation_failed()) queries['evaluating'] = not_evaluated.filter( SubmissionResult.evaluation_tries < EvaluationService.EvaluationService.MAX_EVALUATION_TRIES) queries['max_evaluations'] = not_evaluated.filter( SubmissionResult.evaluation_tries >= EvaluationService.EvaluationService.MAX_EVALUATION_TRIES) queries['scoring'] = evaluated.filter( not_(SubmissionResult.filter_scored())) queries['scored'] = evaluated.filter( SubmissionResult.filter_scored()) total_query = session\ .query(func.count(Submission.id))\ .select_from(Submission)\ .join(Task, Submission.task_id == Task.id) if contest_id is not None: total_query = total_query\ .filter(Task.contest_id == contest_id) queries['total'] = total_query stats = {} keys = list(queries.keys()) results = queries[keys[0]].union_all( *(queries[key] for key in keys[1:])).all() for i, k in enumerate(keys): stats[k] = results[i][0] stats['compiling'] += 2 * stats['total'] - sum(stats.values()) return stats
def write_results(self, items): """Receive worker results from the cache and writes them to the DB. Grouping results together by object (i.e., submission result or user test result) and type (compilation or evaluation) allows this method to talk less to the DB, for example by retrieving datasets and submission results only once instead of once for every result. items ([(operation, Result)]): the results received by ES but not yet written to the db. """ logger.info("Starting commit process...") # Reorganize the results by submission/usertest result and # operation type (i.e., group together the testcase # evaluations for the same submission and dataset). by_object_and_type = defaultdict(list) for operation, result in items: t = (operation.type_, operation.object_id, operation.dataset_id) by_object_and_type[t].append((operation, result)) with SessionGen() as session: for key, operation_results in by_object_and_type.items(): type_, object_id, dataset_id = key dataset = Dataset.get_from_id(dataset_id, session) if dataset is None: logger.error("Could not find dataset %d in the database.", dataset_id) continue # Get submission or user test results. if type_ in [ESOperation.COMPILATION, ESOperation.EVALUATION]: object_ = Submission.get_from_id(object_id, session) if object_ is None: logger.error( "Could not find submission %d " "in the database.", object_id) continue object_result = object_.get_result_or_create(dataset) else: object_ = UserTest.get_from_id(object_id, session) if object_ is None: logger.error( "Could not find user test %d " "in the database.", object_id) continue object_result = object_.get_result_or_create(dataset) self.write_results_one_object_and_type(session, object_result, operation_results) logger.info("Committing evaluations...") session.commit() num_testcases_per_dataset = dict() for type_, object_id, dataset_id in by_object_and_type.keys(): if type_ == ESOperation.EVALUATION: if dataset_id not in num_testcases_per_dataset: num_testcases_per_dataset[dataset_id] = session\ .query(func.count(Testcase.id))\ .filter(Testcase.dataset_id == dataset_id).scalar() num_evaluations = session\ .query(func.count(Evaluation.id)) \ .filter(Evaluation.dataset_id == dataset_id) \ .filter(Evaluation.submission_id == object_id).scalar() if num_evaluations == num_testcases_per_dataset[ dataset_id]: submission_result = SubmissionResult.get_from_id( (object_id, dataset_id), session) submission_result.set_evaluation_outcome() logger.info("Committing evaluation outcomes...") session.commit() logger.info("Ending operations for %s objects...", len(by_object_and_type)) for type_, object_id, dataset_id in by_object_and_type.keys(): if type_ == ESOperation.COMPILATION: submission_result = SubmissionResult.get_from_id( (object_id, dataset_id), session) self.compilation_ended(submission_result) elif type_ == ESOperation.EVALUATION: submission_result = SubmissionResult.get_from_id( (object_id, dataset_id), session) if submission_result.evaluated(): self.evaluation_ended(submission_result) elif type_ == ESOperation.USER_TEST_COMPILATION: user_test_result = UserTestResult.get_from_id( (object_id, dataset_id), session) self.user_test_compilation_ended(user_test_result) elif type_ == ESOperation.USER_TEST_EVALUATION: user_test_result = UserTestResult.get_from_id( (object_id, dataset_id), session) self.user_test_evaluation_ended(user_test_result) logger.info("Done")
import logging from cms.io import QueueItem from cms.db import Dataset, Submission, SubmissionResult, \ Task logger = logging.getLogger(__name__) FILTER_DATASETS_TO_JUDGE = ( (Dataset.id == Task.active_dataset_id) | (Dataset.autojudge.is_(True)) ) FILTER_SUBMISSION_RESULTS_TO_SCORE = ( (~SubmissionResult.filter_scored()) & ( (SubmissionResult.filter_compilation_failed()) | (SubmissionResult.filter_evaluated())) ) def get_operations(session): """Return all the operations to do for all submissions. session (Session): the database session to use. return ([ScoringOperation, float]): a list of operations and timestamps. """ # Retrieve all the compilation operations for submissions
def submissions_status(self): """Returns a dictionary of statistics about the number of submissions on a specific status. There are seven statuses: evaluated, compilation failed, evaluating, compiling, maximum number of attempts of compilations reached, the same for evaluations, and finally 'I have no idea what's happening'. The last three should not happen and require a check from the admin. The status of a submission is checked on its result for the active dataset of its task. return (dict): statistics on the submissions. """ # TODO: at the moment this counts all submission results for # the live datasets. It is interesting to show also numbers # for the datasets with autojudge, and for all datasets. stats = {} with SessionGen() as session: base_query = session\ .query(func.count(SubmissionResult.submission_id))\ .select_from(SubmissionResult)\ .join(Dataset)\ .join(Task, Dataset.task_id == Task.id)\ .filter(Task.active_dataset_id == SubmissionResult.dataset_id) if self.contest_id is not None: base_query = base_query\ .filter(Task.contest_id == self.contest_id) compiled = base_query.filter(SubmissionResult.filter_compiled()) evaluated = compiled.filter(SubmissionResult.filter_evaluated()) not_compiled = base_query.filter( not_(SubmissionResult.filter_compiled())) not_evaluated = compiled.filter( SubmissionResult.filter_compilation_succeeded(), not_(SubmissionResult.filter_evaluated())) queries = {} queries['compiling'] = not_compiled.filter( SubmissionResult.compilation_tries < EvaluationService.MAX_COMPILATION_TRIES) queries['max_compilations'] = not_compiled.filter( SubmissionResult.compilation_tries >= EvaluationService.MAX_COMPILATION_TRIES) queries['compilation_fail'] = base_query.filter( SubmissionResult.filter_compilation_failed()) queries['evaluating'] = not_evaluated.filter( SubmissionResult.evaluation_tries < EvaluationService.MAX_EVALUATION_TRIES) queries['max_evaluations'] = not_evaluated.filter( SubmissionResult.evaluation_tries >= EvaluationService.MAX_EVALUATION_TRIES) queries['scoring'] = evaluated.filter( not_(SubmissionResult.filter_scored())) queries['scored'] = evaluated.filter( SubmissionResult.filter_scored()) queries['total'] = base_query stats = {} keys = queries.keys() results = queries[keys[0]].union_all( *(queries[key] for key in keys[1:])).all() for i in range(len(keys)): stats[keys[i]] = results[i][0] stats['invalid'] = 2 * stats['total'] - sum(stats.itervalues()) return stats
logger = logging.getLogger(__name__) MAX_COMPILATION_TRIES = 3 MAX_EVALUATION_TRIES = 3 MAX_USER_TEST_COMPILATION_TRIES = 3 MAX_USER_TEST_EVALUATION_TRIES = 3 FILTER_SUBMISSION_DATASETS_TO_JUDGE = ( (Dataset.id == Task.active_dataset_id) | (Dataset.autojudge.is_(True)) ) FILTER_SUBMISSION_RESULTS_TO_COMPILE = ( (~SubmissionResult.filter_compiled()) & (SubmissionResult.compilation_tries < MAX_COMPILATION_TRIES) ) FILTER_SUBMISSION_RESULTS_TO_EVALUATE = ( SubmissionResult.filter_compilation_succeeded() & (~SubmissionResult.filter_evaluated()) & (SubmissionResult.evaluation_tries < MAX_EVALUATION_TRIES) ) FILTER_USER_TEST_DATASETS_TO_JUDGE = ( (Dataset.id == Task.active_dataset_id) | (Dataset.autojudge.is_(True)) ) FILTER_USER_TEST_RESULTS_TO_COMPILE = ( (~UserTestResult.filter_compiled()) &
def submissions_status(contest_id): """Returns a dictionary of statistics about the number of submissions on a specific status in the given contest. There are six statuses: evaluated, compilation failed, evaluating, compiling, maximum number of attempts of compilations reached, the same for evaluations. The last two should not happen and require a check from the admin. The status of a submission is checked on its result for the active dataset of its task. contest_id (int|None): counts are restricted to this contest, or None for no restrictions. return (dict): statistics on the submissions. """ # TODO: at the moment this counts all submission results for # the live datasets. It is interesting to show also numbers # for the datasets with autojudge, and for all datasets. stats = {} with SessionGen() as session: base_query = session\ .query(func.count(SubmissionResult.submission_id))\ .select_from(SubmissionResult)\ .join(Dataset)\ .join(Task, Dataset.task_id == Task.id)\ .filter(Task.active_dataset_id == SubmissionResult.dataset_id) if contest_id is not None: base_query = base_query\ .filter(Task.contest_id == contest_id) compiled = base_query.filter(SubmissionResult.filter_compiled()) evaluated = compiled.filter(SubmissionResult.filter_evaluated()) not_compiled = base_query.filter( not_(SubmissionResult.filter_compiled())) not_evaluated = compiled.filter( SubmissionResult.filter_compilation_succeeded(), not_(SubmissionResult.filter_evaluated())) queries = {} queries['compiling'] = not_compiled.filter( SubmissionResult.compilation_tries < EvaluationService.EvaluationService.MAX_COMPILATION_TRIES) queries['max_compilations'] = not_compiled.filter( SubmissionResult.compilation_tries >= EvaluationService.EvaluationService.MAX_COMPILATION_TRIES) queries['compilation_fail'] = base_query.filter( SubmissionResult.filter_compilation_failed()) queries['evaluating'] = not_evaluated.filter( SubmissionResult.evaluation_tries < EvaluationService.EvaluationService.MAX_EVALUATION_TRIES) queries['max_evaluations'] = not_evaluated.filter( SubmissionResult.evaluation_tries >= EvaluationService.EvaluationService.MAX_EVALUATION_TRIES) queries['scoring'] = evaluated.filter( not_(SubmissionResult.filter_scored())) queries['scored'] = evaluated.filter( SubmissionResult.filter_scored()) total_query = session\ .query(func.count(Submission.id))\ .select_from(Submission)\ .join(Task, Submission.task_id == Task.id) if contest_id is not None: total_query = total_query\ .filter(Task.contest_id == contest_id) queries['total'] = total_query stats = {} keys = list(iterkeys(queries)) results = queries[keys[0]].union_all( *(queries[key] for key in keys[1:])).all() for i, k in enumerate(keys): stats[k] = results[i][0] stats['compiling'] += 2 * stats['total'] - sum(itervalues(stats)) return stats
"""The ScoringService operation class, and related functions to compute sets of operations to do. """ import logging from cms.db import Dataset, Submission, SubmissionResult, \ Task from cms.io import QueueItem logger = logging.getLogger(__name__) FILTER_DATASETS_TO_JUDGE = ((Dataset.id == Task.active_dataset_id) | (Dataset.autojudge.is_(True))) FILTER_SUBMISSION_RESULTS_TO_SCORE = ((~SubmissionResult.filter_scored()) & ( (SubmissionResult.filter_compilation_failed()) | (SubmissionResult.filter_evaluated()))) def get_operations(session): """Return all the operations to do for all submissions. session (Session): the database session to use. return ([ScoringOperation, float]): a list of operations and timestamps. """ # Retrieve all the compilation operations for submissions # already having a result for a dataset to judge.
def submissions_status(self): """Returns a dictionary of statistics about the number of submissions on a specific status. There are seven statuses: evaluated, compilation failed, evaluating, compiling, maximum number of attempts of compilations reached, the same for evaluations, and finally 'I have no idea what's happening'. The last three should not happen and require a check from the admin. The status of a submission is checked on its result for the active dataset of its task. return (dict): statistics on the submissions. """ # TODO: at the moment this counts all submission results for # the live datasets. It is interesting to show also numbers # for the datasets with autojudge, and for all datasets. stats = {} with SessionGen() as session: base_query = session\ .query(func.count(SubmissionResult.submission_id))\ .select_from(SubmissionResult)\ .join(Dataset)\ .join(Task, Dataset.task_id == Task.id)\ .filter(Task.active_dataset_id == SubmissionResult.dataset_id)\ .filter(Task.contest_id == self.contest_id) compiled = base_query.filter(SubmissionResult.filter_compiled()) evaluated = compiled.filter(SubmissionResult.filter_evaluated()) not_compiled = base_query.filter( not_(SubmissionResult.filter_compiled())) not_evaluated = compiled.filter( SubmissionResult.filter_compilation_succeeded(), not_(SubmissionResult.filter_evaluated())) queries = {} queries['compiling'] = not_compiled.filter( SubmissionResult.compilation_tries < EvaluationService.MAX_COMPILATION_TRIES) queries['max_compilations'] = not_compiled.filter( SubmissionResult.compilation_tries >= EvaluationService.MAX_COMPILATION_TRIES) queries['compilation_fail'] = base_query.filter( SubmissionResult.filter_compilation_failed()) queries['evaluating'] = not_evaluated.filter( SubmissionResult.evaluation_tries < EvaluationService.MAX_EVALUATION_TRIES) queries['max_evaluations'] = not_evaluated.filter( SubmissionResult.evaluation_tries >= EvaluationService.MAX_EVALUATION_TRIES) queries['scoring'] = evaluated.filter( not_(SubmissionResult.filter_scored())) queries['scored'] = evaluated.filter( SubmissionResult.filter_scored()) queries['total'] = base_query stats = {} keys = queries.keys() results = queries[keys[0]].union_all(*(queries[key] for key in keys[1:])).all() for i in range(len(keys)): stats[keys[i]] = results[i][0] stats['invalid'] = 2 * stats['total'] - sum(stats.itervalues()) return stats
import logging from cms.db import Dataset, Submission, SubmissionResult, \ Task from cms.io import QueueItem logger = logging.getLogger(__name__) FILTER_DATASETS_TO_JUDGE = ( (Dataset.id == Task.active_dataset_id) | (Dataset.autojudge.is_(True)) ) FILTER_SUBMISSION_RESULTS_TO_SCORE = ( (~SubmissionResult.filter_scored()) & ( (SubmissionResult.filter_compilation_failed()) | (SubmissionResult.filter_evaluated())) ) def get_operations(session): """Return all the operations to do for all submissions. session (Session): the database session to use. return ([ScoringOperation, float]): a list of operations and timestamps. """ # Retrieve all the compilation operations for submissions
def write_results(self, items): """Receive worker results from the cache and writes them to the DB. Grouping results together by object (i.e., submission result or user test result) and type (compilation or evaluation) allows this method to talk less to the DB, for example by retrieving datasets and submission results only once instead of once for every result. items ([(operation, Result)]): the results received by ES but not yet written to the db. """ logger.info("Starting commit process...") # Reorganize the results by submission/usertest result and # operation type (i.e., group together the testcase # evaluations for the same submission and dataset). by_object_and_type = defaultdict(list) for operation, result in items: t = (operation.type_, operation.object_id, operation.dataset_id) by_object_and_type[t].append((operation, result)) with SessionGen() as session: for key, operation_results in by_object_and_type.items(): type_, object_id, dataset_id = key dataset = Dataset.get_from_id(dataset_id, session) if dataset is None: logger.error("Could not find dataset %d in the database.", dataset_id) continue # Get submission or user test results. if type_ in [ESOperation.COMPILATION, ESOperation.EVALUATION]: object_ = Submission.get_from_id(object_id, session) if object_ is None: logger.error("Could not find submission %d " "in the database.", object_id) continue object_result = object_.get_result_or_create(dataset) else: object_ = UserTest.get_from_id(object_id, session) if object_ is None: logger.error("Could not find user test %d " "in the database.", object_id) continue object_result = object_.get_result_or_create(dataset) self.write_results_one_object_and_type( session, object_result, operation_results) logger.info("Committing evaluations...") session.commit() num_testcases_per_dataset = dict() for type_, object_id, dataset_id in by_object_and_type.keys(): if type_ == ESOperation.EVALUATION: if dataset_id not in num_testcases_per_dataset: num_testcases_per_dataset[dataset_id] = session\ .query(func.count(Testcase.id))\ .filter(Testcase.dataset_id == dataset_id).scalar() num_evaluations = session\ .query(func.count(Evaluation.id)) \ .filter(Evaluation.dataset_id == dataset_id) \ .filter(Evaluation.submission_id == object_id).scalar() if num_evaluations == num_testcases_per_dataset[dataset_id]: submission_result = SubmissionResult.get_from_id( (object_id, dataset_id), session) submission_result.set_evaluation_outcome() logger.info("Committing evaluation outcomes...") session.commit() logger.info("Ending operations for %s objects...", len(by_object_and_type)) for type_, object_id, dataset_id in by_object_and_type.keys(): if type_ == ESOperation.COMPILATION: submission_result = SubmissionResult.get_from_id( (object_id, dataset_id), session) self.compilation_ended(submission_result) elif type_ == ESOperation.EVALUATION: submission_result = SubmissionResult.get_from_id( (object_id, dataset_id), session) if submission_result.evaluated(): self.evaluation_ended(submission_result) elif type_ == ESOperation.USER_TEST_COMPILATION: user_test_result = UserTestResult.get_from_id( (object_id, dataset_id), session) self.user_test_compilation_ended(user_test_result) elif type_ == ESOperation.USER_TEST_EVALUATION: user_test_result = UserTestResult.get_from_id( (object_id, dataset_id), session) self.user_test_evaluation_ended(user_test_result) logger.info("Done")
def compute_contest_metrics(sql_session): metrics = {} descs = {} sub_full_query = sql_session.query(Contest.name, Task.name, Team.code, User.username, func.count(Submission.id))\ .select_from(Participation)\ .filter(not_(Participation.hidden))\ .outerjoin(Team, Team.id == Participation.team_id)\ .join(User, User.id == Participation.user_id)\ .join(Contest, Contest.id == Participation.contest_id)\ .join(Submission, Submission.participation_id == Participation.id)\ .join(Task, Task.id == Submission.task_id)\ .group_by(Contest.id, Task.id, Team.id, User.id) sub_official_counts = sub_full_query.filter(Submission.official).all() sub_unofficial_counts = sub_full_query.filter(not_( Submission.official)).all() descs['submissions_total'] = ('gauge', 'status = official | unofficial') metrics['submissions_total'] = {} for cs, status in [(sub_official_counts, 'official'), (sub_unofficial_counts, 'unofficial')]: for c in cs: cname, taskname, teamname, uname, count = c key = (('contest', cname), ('task', taskname), ('team', teamname), ('user', uname), ('status', status)) metrics['submissions_total'][key] = count res_full_query = sql_session.query( Contest.name, Task.name, Team.code, User.username, Dataset.description, Dataset.id == Task.active_dataset_id, Dataset.autojudge, func.count(SubmissionResult.submission_id))\ .select_from(Participation)\ .filter(not_(Participation.hidden))\ .outerjoin(Team, Team.id == Participation.team_id)\ .join(User, User.id == Participation.user_id)\ .join(Contest, Contest.id == Participation.contest_id)\ .join(Submission, Submission.participation_id == Participation.id)\ .join(Task, Task.id == Submission.task_id)\ .join(SubmissionResult, SubmissionResult.submission_id == Submission.id)\ .join(Dataset, Dataset.id == SubmissionResult.dataset_id)\ .group_by(Contest.id, Task.id, Team.id, User.id, Dataset.id) res_compiling_query = res_full_query.filter( not_(SubmissionResult.filter_compiled())) res_evaluating_query = res_full_query.filter( SubmissionResult.filter_compilation_succeeded(), not_(SubmissionResult.filter_evaluated())) res_evaluated_query = res_full_query.filter( SubmissionResult.filter_compilation_succeeded(), SubmissionResult.filter_evaluated()) res_compiling_ok = res_compiling_query.filter( SubmissionResult.compilation_tries < EvaluationService.EvaluationService.MAX_COMPILATION_TRIES)\ .all() res_compiling_stop = res_compiling_query.filter( SubmissionResult.compilation_tries >= EvaluationService.EvaluationService.MAX_COMPILATION_TRIES)\ .all() res_compilation_failed = res_full_query.filter( SubmissionResult.filter_compilation_failed())\ .all() res_evaluating_ok = res_evaluating_query.filter( SubmissionResult.evaluation_tries < EvaluationService.EvaluationService.MAX_EVALUATION_TRIES)\ .all() res_evaluating_stop = res_evaluating_query.filter( SubmissionResult.evaluation_tries >= EvaluationService.EvaluationService.MAX_EVALUATION_TRIES)\ .all() res_scoring = res_evaluated_query.filter( not_(SubmissionResult.filter_scored()))\ .all() res_scored = res_evaluated_query.filter( SubmissionResult.filter_scored())\ .all() judgements_list = [ (res_compiling_ok, 'compiling'), (res_compiling_stop, 'stuck_in_compilation'), (res_compilation_failed, 'compilation_failed'), (res_evaluating_ok, 'evaluating'), (res_evaluating_stop, 'stuck_in_evaluation'), (res_scoring, 'scoring'), (res_scored, 'scored'), ] status_list = " | ".join(map(lambda l: l[1], judgements_list)) descs['judgements_total'] = ( 'gauge', 'status = {}\\ndataset_status = live | active | inactive'.format( status_list)) metrics['judgements_total'] = {} for cs, status in judgements_list: for c in cs: cname, taskname, teamname, uname, ds_desc, ds_live, ds_autojudge, count = c ds_status = get_dataset_status(ds_live, ds_autojudge) key = (('contest', cname), ('task', taskname), ('team', teamname), ('user', uname), ('dataset', ds_desc), ('dataset_status', ds_status), ('status', status)) metrics['judgements_total'][key] = count question_query = sql_session.query(Contest.name, Team.code, User.username, func.count(Question.id))\ .select_from(Participation)\ .filter(not_(Participation.hidden))\ .outerjoin(Team, Team.id == Participation.team_id)\ .join(User, User.id == Participation.user_id)\ .join(Contest, Contest.id == Participation.contest_id)\ .join(Question, Question.participation_id == Participation.id)\ .group_by(Contest.id, Team.id, User.id) question_answered = question_query.filter( Question.reply_timestamp.isnot(None)).all() question_ignored = question_query.filter(Question.ignored.is_(True)).all() question_pending = question_query.filter( Question.reply_timestamp.is_(None), Question.ignored.is_(False)).all() question_list = [ (question_answered, 'answered'), (question_ignored, 'ignored'), (question_pending, 'pending'), ] status_list = " | ".join(map(lambda l: l[1], question_list)) descs['questions_total'] = ('gauge', 'status = {}'.format(status_list)) metrics['questions_total'] = {} for qs, status in question_list: for q in qs: cname, tname, uname, count = q key = (('contest', cname), ('team', tname), ('user', uname), ('status', status)) metrics['questions_total'][key] = count evals = sql_session.query( Contest.name, Task.name, Team.code, User.username, Dataset.description, Dataset.id == Task.active_dataset_id, Dataset.autojudge, func.coalesce(func.sum(Evaluation.execution_wall_clock_time), 0.0))\ .select_from(Participation)\ .filter(not_(Participation.hidden))\ .outerjoin(Team, Team.id == Participation.team_id)\ .join(User, User.id == Participation.user_id)\ .join(Contest, Contest.id == Participation.contest_id)\ .join(Submission, Submission.participation_id == Participation.id)\ .join(Task, Task.id == Submission.task_id)\ .join(SubmissionResult, SubmissionResult.submission_id == Submission.id)\ .join(Dataset, Dataset.id == SubmissionResult.dataset_id)\ .join(Evaluation, Evaluation.submission_id == Submission.id)\ .filter(Evaluation.dataset_id == Dataset.id)\ .group_by(Contest.id, Team.id, User.id, Task.id, Dataset.id)\ .all() descs['wall_clock_time_total'] = ( 'gauge', 'dataset_status = live | active | inactive') metrics['wall_clock_time_total'] = {} for e in evals: cname, taskname, teamname, uname, ddesc, ds_live, ds_autojudge, wtime = e ds_status = get_dataset_status(ds_live, ds_autojudge) key = (('contest', cname), ('task', taskname), ('team', teamname), ('user', uname), ('dataset', ddesc), ('dataset_status', ds_status)) metrics['wall_clock_time_total'][key] = wtime return (metrics, descs)
def execute(self, entry): """Assign a score to a submission result. This is the core of ScoringService: here we retrieve the result from the database, check if it is in the correct status, instantiate its ScoreType, compute its score, store it back in the database and tell ProxyService to update RWS if needed. entry (QueueEntry): entry containing the operation to perform. """ operation = entry.item with SessionGen() as session: # Obtain submission. submission = Submission.get_from_id(operation.submission_id, session) if submission is None: raise ValueError("Submission %d not found in the database." % operation.submission_id) # Obtain dataset. dataset = Dataset.get_from_id(operation.dataset_id, session) if dataset is None: raise ValueError("Dataset %d not found in the database." % operation.dataset_id) # Obtain submission result. submission_result = submission.get_result(dataset) # It means it was not even compiled (for some reason). if submission_result is None: raise ValueError( "Submission result %d(%d) was not found." % (operation.submission_id, operation.dataset_id)) # Check if it's ready to be scored. if not submission_result.needs_scoring(): if submission_result.scored(): logger.info("Submission result %d(%d) is already scored.", operation.submission_id, operation.dataset_id) return else: raise ValueError( "The state of the submission result " "%d(%d) doesn't allow scoring." % (operation.submission_id, operation.dataset_id)) # Instantiate the score type. score_type = get_score_type(dataset=dataset) # Compute score and fill it in the database. submission_result.score, \ submission_result.score_details, \ submission_result.public_score, \ submission_result.public_score_details, \ ranking_score_details = \ score_type.compute_score(submission_result) submission_result.ranking_score_details = \ json.dumps(ranking_score_details) task = submission.task participation = submission.participation relevant_submissions = session.query(SubmissionResult)\ .join(SubmissionResult.submission)\ .filter(Submission.participation_id == participation.id)\ .filter(Submission.task_id == task.id) \ .filter(SubmissionResult.dataset_id == dataset.id) \ .filter(SubmissionResult.filter_scored())\ .order_by(Submission.timestamp.asc())\ .all() changed_task_results = [] official_submissions = [ s for s in relevant_submissions if s.submission.official ] official_ptr = 0 for i in range(len(relevant_submissions)): sr = relevant_submissions[i] if official_ptr < len(official_submissions) and \ sr == official_submissions[official_ptr]: official_ptr += 1 if sr.submission.timestamp >= submission.timestamp: old_data = (sr.task_score, sr.task_score_details, sr.task_public_score, sr.task_public_score_details, sr.task_ranking_score_details) new_data = score_type.\ compute_total_score( official_submissions[:official_ptr] ) new_data = new_data[:4] + (json.dumps(new_data[4]), ) if old_data != new_data: sr.task_score, \ sr.task_score_details, \ sr.task_public_score, \ sr.task_public_score_details, \ sr.task_ranking_score_details = \ new_data changed_task_results.append(sr.submission_id) # Store it. session.commit() logger.metric("submission_scoring_time", submission_id=submission.id, dataset_id=submission_result.dataset_id, language=submission.language, task=submission.task_id, participant=submission.participation_id, value=(make_datetime() - submission.timestamp).total_seconds()) logger.info("Submission scored %d seconds after submission", (make_datetime() - submission.timestamp).total_seconds()) # If dataset is the active one, update RWS. if dataset is submission.task.active_dataset: if submission.id not in changed_task_results: logger.error("Submission was recently scored but " "it isn't listed as submissions with " "a task score change") changed_task_results.append(submission.id) for changed_submission_id in changed_task_results: self.proxy_service.submission_scored( submission_id=changed_submission_id)