Ejemplo n.º 1
0
 def get_submission_result(cls, submission=None, dataset=None, **kwargs):
     """Create a submission result."""
     task = None
     task = submission.task if submission is not None else task
     task = dataset.task if dataset is not None else task
     submission = submission if submission is not None \
         else cls.get_submission(task=task)
     dataset = dataset if dataset is not None \
         else cls.get_dataset(task=task)
     assert submission.task == dataset.task
     args = {
         "submission": submission,
         "dataset": dataset,
     }
     args.update(kwargs)
     submission_result = SubmissionResult(**args)
     return submission_result
Ejemplo n.º 2
0
 def add_submission_result(self, submission=None, dataset=None, **kwargs):
     """Add a submission result."""
     task = None
     task = submission.task if submission is not None else task
     task = dataset.task if dataset is not None else task
     submission = submission \
         if submission is not None else self.add_submission(task=task)
     dataset = dataset \
         if dataset is not None else self.add_dataset(task=task)
     assert submission.task == dataset.task
     args = {
         "submission": submission,
         "dataset": dataset,
     }
     args.update(kwargs)
     submission_result = SubmissionResult(**args)
     self.session.add(submission_result)
     return submission_result
Ejemplo n.º 3
0
from cms.io import PriorityQueue, QueueItem
from cms.db import Dataset, Evaluation, Submission, SubmissionResult, \
    Task, Testcase, UserTest, UserTestResult
from cms.grading.Job import CompilationJob, EvaluationJob

logger = logging.getLogger(__name__)

MAX_COMPILATION_TRIES = 3
MAX_EVALUATION_TRIES = 3
MAX_USER_TEST_COMPILATION_TRIES = 3
MAX_USER_TEST_EVALUATION_TRIES = 3

FILTER_SUBMISSION_DATASETS_TO_JUDGE = ((Dataset.id == Task.active_dataset_id) |
                                       (Dataset.autojudge.is_(True)))
FILTER_SUBMISSION_RESULTS_TO_COMPILE = (
    (~SubmissionResult.filter_compiled()) &
    (SubmissionResult.compilation_tries < MAX_COMPILATION_TRIES))
FILTER_SUBMISSION_RESULTS_TO_EVALUATE = (
    SubmissionResult.filter_compilation_succeeded() &
    (~SubmissionResult.filter_evaluated()) &
    (SubmissionResult.evaluation_tries < MAX_EVALUATION_TRIES))

FILTER_USER_TEST_DATASETS_TO_JUDGE = ((Dataset.id == Task.active_dataset_id) |
                                      (Dataset.autojudge.is_(True)))
FILTER_USER_TEST_RESULTS_TO_COMPILE = (
    (~UserTestResult.filter_compiled()) &
    (UserTestResult.compilation_tries < MAX_COMPILATION_TRIES))
FILTER_USER_TEST_RESULTS_TO_EVALUATE = (
    UserTestResult.filter_compilation_succeeded() &
    (~UserTestResult.filter_evaluated()) &
    (UserTestResult.evaluation_tries < MAX_EVALUATION_TRIES))
Ejemplo n.º 4
0
    def submissions_status(contest_id):
        """Returns a dictionary of statistics about the number of
        submissions on a specific status in the given contest.

        There are six statuses: evaluated, compilation failed,
        evaluating, compiling, maximum number of attempts of
        compilations reached, the same for evaluations. The last two
        should not happen and require a check from the admin.

        The status of a submission is checked on its result for the
        active dataset of its task.

        contest_id (int|None): counts are restricted to this contest,
            or None for no restrictions.

        return (dict): statistics on the submissions.

        """
        # TODO: at the moment this counts all submission results for
        # the live datasets. It is interesting to show also numbers
        # for the datasets with autojudge, and for all datasets.
        stats = {}
        with SessionGen() as session:
            base_query = session\
                .query(func.count(SubmissionResult.submission_id))\
                .select_from(SubmissionResult)\
                .join(Dataset)\
                .join(Task, Dataset.task_id == Task.id)\
                .filter(Task.active_dataset_id == SubmissionResult.dataset_id)
            if contest_id is not None:
                base_query = base_query\
                    .filter(Task.contest_id == contest_id)

            compiled = base_query.filter(SubmissionResult.filter_compiled())
            evaluated = compiled.filter(SubmissionResult.filter_evaluated())
            not_compiled = base_query.filter(
                not_(SubmissionResult.filter_compiled()))
            not_evaluated = compiled.filter(
                SubmissionResult.filter_compilation_succeeded(),
                not_(SubmissionResult.filter_evaluated()))

            queries = {}
            queries['compiling'] = not_compiled.filter(
                SubmissionResult.compilation_tries <
                EvaluationService.EvaluationService.MAX_COMPILATION_TRIES)
            queries['max_compilations'] = not_compiled.filter(
                SubmissionResult.compilation_tries >=
                EvaluationService.EvaluationService.MAX_COMPILATION_TRIES)
            queries['compilation_fail'] = base_query.filter(
                SubmissionResult.filter_compilation_failed())
            queries['evaluating'] = not_evaluated.filter(
                SubmissionResult.evaluation_tries <
                EvaluationService.EvaluationService.MAX_EVALUATION_TRIES)
            queries['max_evaluations'] = not_evaluated.filter(
                SubmissionResult.evaluation_tries >=
                EvaluationService.EvaluationService.MAX_EVALUATION_TRIES)
            queries['scoring'] = evaluated.filter(
                not_(SubmissionResult.filter_scored()))
            queries['scored'] = evaluated.filter(
                SubmissionResult.filter_scored())

            total_query = session\
                .query(func.count(Submission.id))\
                .select_from(Submission)\
                .join(Task, Submission.task_id == Task.id)
            if contest_id is not None:
                total_query = total_query\
                    .filter(Task.contest_id == contest_id)
            queries['total'] = total_query

            stats = {}
            keys = list(queries.keys())
            results = queries[keys[0]].union_all(
                *(queries[key] for key in keys[1:])).all()

        for i, k in enumerate(keys):
            stats[k] = results[i][0]
        stats['compiling'] += 2 * stats['total'] - sum(stats.values())

        return stats
Ejemplo n.º 5
0
    def write_results(self, items):
        """Receive worker results from the cache and writes them to the DB.

        Grouping results together by object (i.e., submission result
        or user test result) and type (compilation or evaluation)
        allows this method to talk less to the DB, for example by
        retrieving datasets and submission results only once instead
        of once for every result.

        items ([(operation, Result)]): the results received by ES but
            not yet written to the db.

        """
        logger.info("Starting commit process...")

        # Reorganize the results by submission/usertest result and
        # operation type (i.e., group together the testcase
        # evaluations for the same submission and dataset).
        by_object_and_type = defaultdict(list)
        for operation, result in items:
            t = (operation.type_, operation.object_id, operation.dataset_id)
            by_object_and_type[t].append((operation, result))

        with SessionGen() as session:
            for key, operation_results in by_object_and_type.items():
                type_, object_id, dataset_id = key

                dataset = Dataset.get_from_id(dataset_id, session)
                if dataset is None:
                    logger.error("Could not find dataset %d in the database.",
                                 dataset_id)
                    continue

                # Get submission or user test results.
                if type_ in [ESOperation.COMPILATION, ESOperation.EVALUATION]:
                    object_ = Submission.get_from_id(object_id, session)
                    if object_ is None:
                        logger.error(
                            "Could not find submission %d "
                            "in the database.", object_id)
                        continue
                    object_result = object_.get_result_or_create(dataset)
                else:
                    object_ = UserTest.get_from_id(object_id, session)
                    if object_ is None:
                        logger.error(
                            "Could not find user test %d "
                            "in the database.", object_id)
                        continue
                    object_result = object_.get_result_or_create(dataset)

                self.write_results_one_object_and_type(session, object_result,
                                                       operation_results)

            logger.info("Committing evaluations...")
            session.commit()

            num_testcases_per_dataset = dict()
            for type_, object_id, dataset_id in by_object_and_type.keys():
                if type_ == ESOperation.EVALUATION:
                    if dataset_id not in num_testcases_per_dataset:
                        num_testcases_per_dataset[dataset_id] = session\
                            .query(func.count(Testcase.id))\
                            .filter(Testcase.dataset_id == dataset_id).scalar()
                    num_evaluations = session\
                        .query(func.count(Evaluation.id)) \
                        .filter(Evaluation.dataset_id == dataset_id) \
                        .filter(Evaluation.submission_id == object_id).scalar()
                    if num_evaluations == num_testcases_per_dataset[
                            dataset_id]:
                        submission_result = SubmissionResult.get_from_id(
                            (object_id, dataset_id), session)
                        submission_result.set_evaluation_outcome()

            logger.info("Committing evaluation outcomes...")
            session.commit()

            logger.info("Ending operations for %s objects...",
                        len(by_object_and_type))
            for type_, object_id, dataset_id in by_object_and_type.keys():
                if type_ == ESOperation.COMPILATION:
                    submission_result = SubmissionResult.get_from_id(
                        (object_id, dataset_id), session)
                    self.compilation_ended(submission_result)
                elif type_ == ESOperation.EVALUATION:
                    submission_result = SubmissionResult.get_from_id(
                        (object_id, dataset_id), session)
                    if submission_result.evaluated():
                        self.evaluation_ended(submission_result)
                elif type_ == ESOperation.USER_TEST_COMPILATION:
                    user_test_result = UserTestResult.get_from_id(
                        (object_id, dataset_id), session)
                    self.user_test_compilation_ended(user_test_result)
                elif type_ == ESOperation.USER_TEST_EVALUATION:
                    user_test_result = UserTestResult.get_from_id(
                        (object_id, dataset_id), session)
                    self.user_test_evaluation_ended(user_test_result)

        logger.info("Done")
Ejemplo n.º 6
0
import logging

from cms.io import QueueItem
from cms.db import Dataset, Submission, SubmissionResult, \
    Task


logger = logging.getLogger(__name__)


FILTER_DATASETS_TO_JUDGE = (
    (Dataset.id == Task.active_dataset_id) |
    (Dataset.autojudge.is_(True))
)
FILTER_SUBMISSION_RESULTS_TO_SCORE = (
    (~SubmissionResult.filter_scored()) & (
        (SubmissionResult.filter_compilation_failed()) |
        (SubmissionResult.filter_evaluated()))
)


def get_operations(session):
    """Return all the operations to do for all submissions.

    session (Session): the database session to use.

    return ([ScoringOperation, float]): a list of operations and
        timestamps.

    """
    # Retrieve all the compilation operations for submissions
Ejemplo n.º 7
0
    def submissions_status(self):
        """Returns a dictionary of statistics about the number of
        submissions on a specific status. There are seven statuses:
        evaluated, compilation failed, evaluating, compiling, maximum
        number of attempts of compilations reached, the same for
        evaluations, and finally 'I have no idea what's
        happening'. The last three should not happen and require a
        check from the admin.

        The status of a submission is checked on its result for the
        active dataset of its task.

        return (dict): statistics on the submissions.

        """
        # TODO: at the moment this counts all submission results for
        # the live datasets. It is interesting to show also numbers
        # for the datasets with autojudge, and for all datasets.
        stats = {}
        with SessionGen() as session:
            base_query = session\
                .query(func.count(SubmissionResult.submission_id))\
                .select_from(SubmissionResult)\
                .join(Dataset)\
                .join(Task, Dataset.task_id == Task.id)\
                .filter(Task.active_dataset_id == SubmissionResult.dataset_id)
            if self.contest_id is not None:
                base_query = base_query\
                    .filter(Task.contest_id == self.contest_id)

            compiled = base_query.filter(SubmissionResult.filter_compiled())
            evaluated = compiled.filter(SubmissionResult.filter_evaluated())
            not_compiled = base_query.filter(
                not_(SubmissionResult.filter_compiled()))
            not_evaluated = compiled.filter(
                SubmissionResult.filter_compilation_succeeded(),
                not_(SubmissionResult.filter_evaluated()))

            queries = {}
            queries['compiling'] = not_compiled.filter(
                SubmissionResult.compilation_tries <
                EvaluationService.MAX_COMPILATION_TRIES)
            queries['max_compilations'] = not_compiled.filter(
                SubmissionResult.compilation_tries >=
                EvaluationService.MAX_COMPILATION_TRIES)
            queries['compilation_fail'] = base_query.filter(
                SubmissionResult.filter_compilation_failed())
            queries['evaluating'] = not_evaluated.filter(
                SubmissionResult.evaluation_tries <
                EvaluationService.MAX_EVALUATION_TRIES)
            queries['max_evaluations'] = not_evaluated.filter(
                SubmissionResult.evaluation_tries >=
                EvaluationService.MAX_EVALUATION_TRIES)
            queries['scoring'] = evaluated.filter(
                not_(SubmissionResult.filter_scored()))
            queries['scored'] = evaluated.filter(
                SubmissionResult.filter_scored())
            queries['total'] = base_query

            stats = {}
            keys = queries.keys()
            results = queries[keys[0]].union_all(
                *(queries[key] for key in keys[1:])).all()

        for i in range(len(keys)):
            stats[keys[i]] = results[i][0]
        stats['invalid'] = 2 * stats['total'] - sum(stats.itervalues())

        return stats
Ejemplo n.º 8
0
logger = logging.getLogger(__name__)


MAX_COMPILATION_TRIES = 3
MAX_EVALUATION_TRIES = 3
MAX_USER_TEST_COMPILATION_TRIES = 3
MAX_USER_TEST_EVALUATION_TRIES = 3


FILTER_SUBMISSION_DATASETS_TO_JUDGE = (
    (Dataset.id == Task.active_dataset_id) |
    (Dataset.autojudge.is_(True))
)
FILTER_SUBMISSION_RESULTS_TO_COMPILE = (
    (~SubmissionResult.filter_compiled()) &
    (SubmissionResult.compilation_tries < MAX_COMPILATION_TRIES)
)
FILTER_SUBMISSION_RESULTS_TO_EVALUATE = (
    SubmissionResult.filter_compilation_succeeded() &
    (~SubmissionResult.filter_evaluated()) &
    (SubmissionResult.evaluation_tries < MAX_EVALUATION_TRIES)
)


FILTER_USER_TEST_DATASETS_TO_JUDGE = (
    (Dataset.id == Task.active_dataset_id) |
    (Dataset.autojudge.is_(True))
)
FILTER_USER_TEST_RESULTS_TO_COMPILE = (
    (~UserTestResult.filter_compiled()) &
Ejemplo n.º 9
0
    def submissions_status(contest_id):
        """Returns a dictionary of statistics about the number of
        submissions on a specific status in the given contest.

        There are six statuses: evaluated, compilation failed,
        evaluating, compiling, maximum number of attempts of
        compilations reached, the same for evaluations. The last two
        should not happen and require a check from the admin.

        The status of a submission is checked on its result for the
        active dataset of its task.

        contest_id (int|None): counts are restricted to this contest,
            or None for no restrictions.

        return (dict): statistics on the submissions.

        """
        # TODO: at the moment this counts all submission results for
        # the live datasets. It is interesting to show also numbers
        # for the datasets with autojudge, and for all datasets.
        stats = {}
        with SessionGen() as session:
            base_query = session\
                .query(func.count(SubmissionResult.submission_id))\
                .select_from(SubmissionResult)\
                .join(Dataset)\
                .join(Task, Dataset.task_id == Task.id)\
                .filter(Task.active_dataset_id == SubmissionResult.dataset_id)
            if contest_id is not None:
                base_query = base_query\
                    .filter(Task.contest_id == contest_id)

            compiled = base_query.filter(SubmissionResult.filter_compiled())
            evaluated = compiled.filter(SubmissionResult.filter_evaluated())
            not_compiled = base_query.filter(
                not_(SubmissionResult.filter_compiled()))
            not_evaluated = compiled.filter(
                SubmissionResult.filter_compilation_succeeded(),
                not_(SubmissionResult.filter_evaluated()))

            queries = {}
            queries['compiling'] = not_compiled.filter(
                SubmissionResult.compilation_tries <
                EvaluationService.EvaluationService.MAX_COMPILATION_TRIES)
            queries['max_compilations'] = not_compiled.filter(
                SubmissionResult.compilation_tries >=
                EvaluationService.EvaluationService.MAX_COMPILATION_TRIES)
            queries['compilation_fail'] = base_query.filter(
                SubmissionResult.filter_compilation_failed())
            queries['evaluating'] = not_evaluated.filter(
                SubmissionResult.evaluation_tries <
                EvaluationService.EvaluationService.MAX_EVALUATION_TRIES)
            queries['max_evaluations'] = not_evaluated.filter(
                SubmissionResult.evaluation_tries >=
                EvaluationService.EvaluationService.MAX_EVALUATION_TRIES)
            queries['scoring'] = evaluated.filter(
                not_(SubmissionResult.filter_scored()))
            queries['scored'] = evaluated.filter(
                SubmissionResult.filter_scored())

            total_query = session\
                .query(func.count(Submission.id))\
                .select_from(Submission)\
                .join(Task, Submission.task_id == Task.id)
            if contest_id is not None:
                total_query = total_query\
                    .filter(Task.contest_id == contest_id)
            queries['total'] = total_query

            stats = {}
            keys = list(iterkeys(queries))
            results = queries[keys[0]].union_all(
                *(queries[key] for key in keys[1:])).all()

        for i, k in enumerate(keys):
            stats[k] = results[i][0]
        stats['compiling'] += 2 * stats['total'] - sum(itervalues(stats))

        return stats
Ejemplo n.º 10
0
"""The ScoringService operation class, and related functions to
compute sets of operations to do.

"""

import logging

from cms.db import Dataset, Submission, SubmissionResult, \
    Task
from cms.io import QueueItem

logger = logging.getLogger(__name__)

FILTER_DATASETS_TO_JUDGE = ((Dataset.id == Task.active_dataset_id) |
                            (Dataset.autojudge.is_(True)))
FILTER_SUBMISSION_RESULTS_TO_SCORE = ((~SubmissionResult.filter_scored()) & (
    (SubmissionResult.filter_compilation_failed()) |
    (SubmissionResult.filter_evaluated())))


def get_operations(session):
    """Return all the operations to do for all submissions.

    session (Session): the database session to use.

    return ([ScoringOperation, float]): a list of operations and
        timestamps.

    """
    # Retrieve all the compilation operations for submissions
    # already having a result for a dataset to judge.
Ejemplo n.º 11
0
    def submissions_status(self):
        """Returns a dictionary of statistics about the number of
        submissions on a specific status. There are seven statuses:
        evaluated, compilation failed, evaluating, compiling, maximum
        number of attempts of compilations reached, the same for
        evaluations, and finally 'I have no idea what's
        happening'. The last three should not happen and require a
        check from the admin.

        The status of a submission is checked on its result for the
        active dataset of its task.

        return (dict): statistics on the submissions.

        """
        # TODO: at the moment this counts all submission results for
        # the live datasets. It is interesting to show also numbers
        # for the datasets with autojudge, and for all datasets.
        stats = {}
        with SessionGen() as session:
            base_query = session\
                .query(func.count(SubmissionResult.submission_id))\
                .select_from(SubmissionResult)\
                .join(Dataset)\
                .join(Task, Dataset.task_id == Task.id)\
                .filter(Task.active_dataset_id == SubmissionResult.dataset_id)\
                .filter(Task.contest_id == self.contest_id)

            compiled = base_query.filter(SubmissionResult.filter_compiled())
            evaluated = compiled.filter(SubmissionResult.filter_evaluated())
            not_compiled = base_query.filter(
                not_(SubmissionResult.filter_compiled()))
            not_evaluated = compiled.filter(
                SubmissionResult.filter_compilation_succeeded(),
                not_(SubmissionResult.filter_evaluated()))

            queries = {}
            queries['compiling'] = not_compiled.filter(
                SubmissionResult.compilation_tries <
                EvaluationService.MAX_COMPILATION_TRIES)
            queries['max_compilations'] = not_compiled.filter(
                SubmissionResult.compilation_tries >=
                EvaluationService.MAX_COMPILATION_TRIES)
            queries['compilation_fail'] = base_query.filter(
                SubmissionResult.filter_compilation_failed())
            queries['evaluating'] = not_evaluated.filter(
                SubmissionResult.evaluation_tries <
                EvaluationService.MAX_EVALUATION_TRIES)
            queries['max_evaluations'] = not_evaluated.filter(
                SubmissionResult.evaluation_tries >=
                EvaluationService.MAX_EVALUATION_TRIES)
            queries['scoring'] = evaluated.filter(
                not_(SubmissionResult.filter_scored()))
            queries['scored'] = evaluated.filter(
                SubmissionResult.filter_scored())
            queries['total'] = base_query

            stats = {}
            keys = queries.keys()
            results = queries[keys[0]].union_all(*(queries[key]
                                                   for key in keys[1:])).all()

        for i in range(len(keys)):
            stats[keys[i]] = results[i][0]
        stats['invalid'] = 2 * stats['total'] - sum(stats.itervalues())

        return stats
Ejemplo n.º 12
0
import logging

from cms.db import Dataset, Submission, SubmissionResult, \
    Task
from cms.io import QueueItem


logger = logging.getLogger(__name__)


FILTER_DATASETS_TO_JUDGE = (
    (Dataset.id == Task.active_dataset_id) |
    (Dataset.autojudge.is_(True))
)
FILTER_SUBMISSION_RESULTS_TO_SCORE = (
    (~SubmissionResult.filter_scored()) & (
        (SubmissionResult.filter_compilation_failed()) |
        (SubmissionResult.filter_evaluated()))
)


def get_operations(session):
    """Return all the operations to do for all submissions.

    session (Session): the database session to use.

    return ([ScoringOperation, float]): a list of operations and
        timestamps.

    """
    # Retrieve all the compilation operations for submissions
Ejemplo n.º 13
0
    def write_results(self, items):
        """Receive worker results from the cache and writes them to the DB.

        Grouping results together by object (i.e., submission result
        or user test result) and type (compilation or evaluation)
        allows this method to talk less to the DB, for example by
        retrieving datasets and submission results only once instead
        of once for every result.

        items ([(operation, Result)]): the results received by ES but
            not yet written to the db.

        """
        logger.info("Starting commit process...")

        # Reorganize the results by submission/usertest result and
        # operation type (i.e., group together the testcase
        # evaluations for the same submission and dataset).
        by_object_and_type = defaultdict(list)
        for operation, result in items:
            t = (operation.type_, operation.object_id, operation.dataset_id)
            by_object_and_type[t].append((operation, result))

        with SessionGen() as session:
            for key, operation_results in by_object_and_type.items():
                type_, object_id, dataset_id = key

                dataset = Dataset.get_from_id(dataset_id, session)
                if dataset is None:
                    logger.error("Could not find dataset %d in the database.",
                                 dataset_id)
                    continue

                # Get submission or user test results.
                if type_ in [ESOperation.COMPILATION, ESOperation.EVALUATION]:
                    object_ = Submission.get_from_id(object_id, session)
                    if object_ is None:
                        logger.error("Could not find submission %d "
                                     "in the database.", object_id)
                        continue
                    object_result = object_.get_result_or_create(dataset)
                else:
                    object_ = UserTest.get_from_id(object_id, session)
                    if object_ is None:
                        logger.error("Could not find user test %d "
                                     "in the database.", object_id)
                        continue
                    object_result = object_.get_result_or_create(dataset)

                self.write_results_one_object_and_type(
                    session, object_result, operation_results)

            logger.info("Committing evaluations...")
            session.commit()

            num_testcases_per_dataset = dict()
            for type_, object_id, dataset_id in by_object_and_type.keys():
                if type_ == ESOperation.EVALUATION:
                    if dataset_id not in num_testcases_per_dataset:
                        num_testcases_per_dataset[dataset_id] = session\
                            .query(func.count(Testcase.id))\
                            .filter(Testcase.dataset_id == dataset_id).scalar()
                    num_evaluations = session\
                        .query(func.count(Evaluation.id)) \
                        .filter(Evaluation.dataset_id == dataset_id) \
                        .filter(Evaluation.submission_id == object_id).scalar()
                    if num_evaluations == num_testcases_per_dataset[dataset_id]:
                        submission_result = SubmissionResult.get_from_id(
                            (object_id, dataset_id), session)
                        submission_result.set_evaluation_outcome()

            logger.info("Committing evaluation outcomes...")
            session.commit()

            logger.info("Ending operations for %s objects...",
                        len(by_object_and_type))
            for type_, object_id, dataset_id in by_object_and_type.keys():
                if type_ == ESOperation.COMPILATION:
                    submission_result = SubmissionResult.get_from_id(
                        (object_id, dataset_id), session)
                    self.compilation_ended(submission_result)
                elif type_ == ESOperation.EVALUATION:
                    submission_result = SubmissionResult.get_from_id(
                        (object_id, dataset_id), session)
                    if submission_result.evaluated():
                        self.evaluation_ended(submission_result)
                elif type_ == ESOperation.USER_TEST_COMPILATION:
                    user_test_result = UserTestResult.get_from_id(
                        (object_id, dataset_id), session)
                    self.user_test_compilation_ended(user_test_result)
                elif type_ == ESOperation.USER_TEST_EVALUATION:
                    user_test_result = UserTestResult.get_from_id(
                        (object_id, dataset_id), session)
                    self.user_test_evaluation_ended(user_test_result)

        logger.info("Done")
Ejemplo n.º 14
0
def compute_contest_metrics(sql_session):

    metrics = {}
    descs = {}

    sub_full_query = sql_session.query(Contest.name, Task.name, Team.code, User.username, func.count(Submission.id))\
        .select_from(Participation)\
        .filter(not_(Participation.hidden))\
        .outerjoin(Team, Team.id == Participation.team_id)\
        .join(User, User.id == Participation.user_id)\
        .join(Contest, Contest.id == Participation.contest_id)\
        .join(Submission, Submission.participation_id == Participation.id)\
        .join(Task, Task.id == Submission.task_id)\
        .group_by(Contest.id, Task.id, Team.id, User.id)

    sub_official_counts = sub_full_query.filter(Submission.official).all()
    sub_unofficial_counts = sub_full_query.filter(not_(
        Submission.official)).all()

    descs['submissions_total'] = ('gauge', 'status = official | unofficial')
    metrics['submissions_total'] = {}
    for cs, status in [(sub_official_counts, 'official'),
                       (sub_unofficial_counts, 'unofficial')]:
        for c in cs:
            cname, taskname, teamname, uname, count = c
            key = (('contest', cname), ('task', taskname), ('team', teamname),
                   ('user', uname), ('status', status))
            metrics['submissions_total'][key] = count

    res_full_query = sql_session.query(
        Contest.name, Task.name, Team.code, User.username, Dataset.description,
        Dataset.id == Task.active_dataset_id, Dataset.autojudge, func.count(SubmissionResult.submission_id))\
        .select_from(Participation)\
        .filter(not_(Participation.hidden))\
        .outerjoin(Team, Team.id == Participation.team_id)\
        .join(User, User.id == Participation.user_id)\
        .join(Contest, Contest.id == Participation.contest_id)\
        .join(Submission, Submission.participation_id == Participation.id)\
        .join(Task, Task.id == Submission.task_id)\
        .join(SubmissionResult, SubmissionResult.submission_id == Submission.id)\
        .join(Dataset, Dataset.id == SubmissionResult.dataset_id)\
        .group_by(Contest.id, Task.id, Team.id, User.id, Dataset.id)

    res_compiling_query = res_full_query.filter(
        not_(SubmissionResult.filter_compiled()))
    res_evaluating_query = res_full_query.filter(
        SubmissionResult.filter_compilation_succeeded(),
        not_(SubmissionResult.filter_evaluated()))
    res_evaluated_query = res_full_query.filter(
        SubmissionResult.filter_compilation_succeeded(),
        SubmissionResult.filter_evaluated())

    res_compiling_ok = res_compiling_query.filter(
        SubmissionResult.compilation_tries <
        EvaluationService.EvaluationService.MAX_COMPILATION_TRIES)\
        .all()
    res_compiling_stop = res_compiling_query.filter(
        SubmissionResult.compilation_tries >=
        EvaluationService.EvaluationService.MAX_COMPILATION_TRIES)\
        .all()
    res_compilation_failed = res_full_query.filter(
        SubmissionResult.filter_compilation_failed())\
        .all()

    res_evaluating_ok = res_evaluating_query.filter(
        SubmissionResult.evaluation_tries <
        EvaluationService.EvaluationService.MAX_EVALUATION_TRIES)\
        .all()
    res_evaluating_stop = res_evaluating_query.filter(
        SubmissionResult.evaluation_tries >=
        EvaluationService.EvaluationService.MAX_EVALUATION_TRIES)\
        .all()
    res_scoring = res_evaluated_query.filter(
        not_(SubmissionResult.filter_scored()))\
        .all()
    res_scored = res_evaluated_query.filter(
        SubmissionResult.filter_scored())\
        .all()

    judgements_list = [
        (res_compiling_ok, 'compiling'),
        (res_compiling_stop, 'stuck_in_compilation'),
        (res_compilation_failed, 'compilation_failed'),
        (res_evaluating_ok, 'evaluating'),
        (res_evaluating_stop, 'stuck_in_evaluation'),
        (res_scoring, 'scoring'),
        (res_scored, 'scored'),
    ]

    status_list = " | ".join(map(lambda l: l[1], judgements_list))

    descs['judgements_total'] = (
        'gauge',
        'status = {}\\ndataset_status = live | active | inactive'.format(
            status_list))
    metrics['judgements_total'] = {}
    for cs, status in judgements_list:
        for c in cs:
            cname, taskname, teamname, uname, ds_desc, ds_live, ds_autojudge, count = c
            ds_status = get_dataset_status(ds_live, ds_autojudge)
            key = (('contest', cname), ('task', taskname), ('team', teamname),
                   ('user', uname), ('dataset', ds_desc),
                   ('dataset_status', ds_status), ('status', status))
            metrics['judgements_total'][key] = count

    question_query = sql_session.query(Contest.name, Team.code, User.username, func.count(Question.id))\
        .select_from(Participation)\
        .filter(not_(Participation.hidden))\
        .outerjoin(Team, Team.id == Participation.team_id)\
        .join(User, User.id == Participation.user_id)\
        .join(Contest, Contest.id == Participation.contest_id)\
        .join(Question, Question.participation_id == Participation.id)\
        .group_by(Contest.id, Team.id, User.id)

    question_answered = question_query.filter(
        Question.reply_timestamp.isnot(None)).all()
    question_ignored = question_query.filter(Question.ignored.is_(True)).all()
    question_pending = question_query.filter(
        Question.reply_timestamp.is_(None), Question.ignored.is_(False)).all()

    question_list = [
        (question_answered, 'answered'),
        (question_ignored, 'ignored'),
        (question_pending, 'pending'),
    ]

    status_list = " | ".join(map(lambda l: l[1], question_list))

    descs['questions_total'] = ('gauge', 'status = {}'.format(status_list))
    metrics['questions_total'] = {}
    for qs, status in question_list:
        for q in qs:
            cname, tname, uname, count = q
            key = (('contest', cname), ('team', tname), ('user', uname),
                   ('status', status))
            metrics['questions_total'][key] = count

    evals = sql_session.query(
        Contest.name, Task.name, Team.code, User.username, Dataset.description,
        Dataset.id == Task.active_dataset_id, Dataset.autojudge, func.coalesce(func.sum(Evaluation.execution_wall_clock_time), 0.0))\
        .select_from(Participation)\
        .filter(not_(Participation.hidden))\
        .outerjoin(Team, Team.id == Participation.team_id)\
        .join(User, User.id == Participation.user_id)\
        .join(Contest, Contest.id == Participation.contest_id)\
        .join(Submission, Submission.participation_id == Participation.id)\
        .join(Task, Task.id == Submission.task_id)\
        .join(SubmissionResult, SubmissionResult.submission_id == Submission.id)\
        .join(Dataset, Dataset.id == SubmissionResult.dataset_id)\
        .join(Evaluation, Evaluation.submission_id == Submission.id)\
        .filter(Evaluation.dataset_id == Dataset.id)\
        .group_by(Contest.id, Team.id, User.id, Task.id, Dataset.id)\
        .all()

    descs['wall_clock_time_total'] = (
        'gauge', 'dataset_status = live | active | inactive')
    metrics['wall_clock_time_total'] = {}

    for e in evals:
        cname, taskname, teamname, uname, ddesc, ds_live, ds_autojudge, wtime = e
        ds_status = get_dataset_status(ds_live, ds_autojudge)
        key = (('contest', cname), ('task', taskname), ('team', teamname),
               ('user', uname), ('dataset', ddesc), ('dataset_status',
                                                     ds_status))
        metrics['wall_clock_time_total'][key] = wtime

    return (metrics, descs)
Ejemplo n.º 15
0
    def execute(self, entry):
        """Assign a score to a submission result.

        This is the core of ScoringService: here we retrieve the result
        from the database, check if it is in the correct status,
        instantiate its ScoreType, compute its score, store it back in
        the database and tell ProxyService to update RWS if needed.

        entry (QueueEntry): entry containing the operation to perform.

        """
        operation = entry.item
        with SessionGen() as session:
            # Obtain submission.
            submission = Submission.get_from_id(operation.submission_id,
                                                session)
            if submission is None:
                raise ValueError("Submission %d not found in the database." %
                                 operation.submission_id)

            # Obtain dataset.
            dataset = Dataset.get_from_id(operation.dataset_id, session)
            if dataset is None:
                raise ValueError("Dataset %d not found in the database." %
                                 operation.dataset_id)

            # Obtain submission result.
            submission_result = submission.get_result(dataset)

            # It means it was not even compiled (for some reason).
            if submission_result is None:
                raise ValueError(
                    "Submission result %d(%d) was not found." %
                    (operation.submission_id, operation.dataset_id))

            # Check if it's ready to be scored.
            if not submission_result.needs_scoring():
                if submission_result.scored():
                    logger.info("Submission result %d(%d) is already scored.",
                                operation.submission_id, operation.dataset_id)
                    return
                else:
                    raise ValueError(
                        "The state of the submission result "
                        "%d(%d) doesn't allow scoring." %
                        (operation.submission_id, operation.dataset_id))

            # Instantiate the score type.
            score_type = get_score_type(dataset=dataset)

            # Compute score and fill it in the database.
            submission_result.score, \
                submission_result.score_details, \
                submission_result.public_score, \
                submission_result.public_score_details, \
                ranking_score_details = \
                score_type.compute_score(submission_result)
            submission_result.ranking_score_details = \
                json.dumps(ranking_score_details)

            task = submission.task
            participation = submission.participation
            relevant_submissions = session.query(SubmissionResult)\
                .join(SubmissionResult.submission)\
                .filter(Submission.participation_id == participation.id)\
                .filter(Submission.task_id == task.id) \
                .filter(SubmissionResult.dataset_id == dataset.id) \
                .filter(SubmissionResult.filter_scored())\
                .order_by(Submission.timestamp.asc())\
                .all()

            changed_task_results = []
            official_submissions = [
                s for s in relevant_submissions if s.submission.official
            ]
            official_ptr = 0
            for i in range(len(relevant_submissions)):
                sr = relevant_submissions[i]
                if official_ptr < len(official_submissions) and \
                        sr == official_submissions[official_ptr]:
                    official_ptr += 1
                if sr.submission.timestamp >= submission.timestamp:
                    old_data = (sr.task_score, sr.task_score_details,
                                sr.task_public_score,
                                sr.task_public_score_details,
                                sr.task_ranking_score_details)
                    new_data = score_type.\
                        compute_total_score(
                            official_submissions[:official_ptr]
                        )
                    new_data = new_data[:4] + (json.dumps(new_data[4]), )
                    if old_data != new_data:
                        sr.task_score, \
                            sr.task_score_details, \
                            sr.task_public_score, \
                            sr.task_public_score_details, \
                            sr.task_ranking_score_details = \
                            new_data
                        changed_task_results.append(sr.submission_id)
            # Store it.
            session.commit()

            logger.metric("submission_scoring_time",
                          submission_id=submission.id,
                          dataset_id=submission_result.dataset_id,
                          language=submission.language,
                          task=submission.task_id,
                          participant=submission.participation_id,
                          value=(make_datetime() -
                                 submission.timestamp).total_seconds())

            logger.info("Submission scored %d seconds after submission",
                        (make_datetime() -
                         submission.timestamp).total_seconds())

            # If dataset is the active one, update RWS.
            if dataset is submission.task.active_dataset:
                if submission.id not in changed_task_results:
                    logger.error("Submission was recently scored but "
                                 "it isn't listed as submissions with "
                                 "a task score change")
                    changed_task_results.append(submission.id)

                for changed_submission_id in changed_task_results:
                    self.proxy_service.submission_scored(
                        submission_id=changed_submission_id)