Esempio n. 1
0
File: Worker.py Progetto: Takt29/cms
class Worker(Service):
    """This service implement the possibility to compile and evaluate
    submissions in a sandbox. The instructions to follow for the
    operations are in the TaskType classes, while the sandbox is in
    the Sandbox module.

    """

    JOB_TYPE_COMPILATION = "compile"
    JOB_TYPE_EVALUATION = "evaluate"

    def __init__(self, shard):
        Service.__init__(self, shard)
        self.file_cacher = FileCacher(self)

        self.work_lock = gevent.coros.RLock()

    @rpc_method
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # In order to avoid a long-living connection, first fetch the
        # complete list of files and then download the files; since
        # this is just pre-caching, possible race conditions are not
        # dangerous
        logger.info("Precaching files for contest %d.", contest_id)
        with SessionGen() as session:
            contest = Contest.get_from_id(contest_id, session)
            files = contest.enumerate_files(skip_submissions=True,
                                            skip_user_tests=True)
        for digest in files:
            try:
                self.file_cacher.load(digest, if_needed=True)
            except KeyError:
                # No problem (at this stage) if we cannot find the
                # file
                pass

        logger.info("Precaching finished.")

    @rpc_method
    def execute_job(self, job_dict):
        """Receive a group of jobs in a dict format and executes them
        one by one.

        job_dict (dict): a dictionary suitable to be imported from Job.

        """
        job = Job.import_from_dict_with_type(job_dict)

        if self.work_lock.acquire(False):

            try:
                logger.info("Starting job.",
                            extra={"operation": job.info})

                job.shard = self.shard

                task_type = get_task_type(job.task_type,
                                          job.task_type_parameters)
                task_type.execute_job(job, self.file_cacher)

                logger.info("Finished job.",
                            extra={"operation": job.info})

                return job.export_to_dict()

            except:
                err_msg = "Worker failed."
                logger.error(err_msg, exc_info=True)
                raise JobException(err_msg)

            finally:
                self.work_lock.release()

        else:
            err_msg = "Request received, but declined because of acquired " \
                "lock (Worker is busy executing another job, this should " \
                "not happen: check if there are more than one ES running, " \
                "or for bugs in ES."
            logger.warning(err_msg)
            raise JobException(err_msg)
Esempio n. 2
0
class Worker(Service):
    """This service implement the possibility to compile and evaluate
    submissions in a sandbox. The instructions to follow for the
    operations are in the TaskType classes, while the sandbox is in
    the Sandbox module.

    """

    JOB_TYPE_COMPILATION = "compile"
    JOB_TYPE_EVALUATION = "evaluate"

    def __init__(self, shard):
        Service.__init__(self, shard)
        self.file_cacher = FileCacher(self)

        self.work_lock = gevent.coros.RLock()
        self._ignore_job = False

    @rpc_method
    def ignore_job(self):
        """RPC that inform the worker that its result for the current
        action will be discarded. The worker will try to return as
        soon as possible even if this means that the result are
        inconsistent.

        """
        # We remember to quit as soon as possible.
        logger.info("Trying to interrupt job as requested.")
        self._ignore_job = True

    @rpc_method
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # Lock is not needed if the admins correctly placed cache and
        # temp directories in the same filesystem. This is what
        # usually happens since they are children of the same,
        # cms-created, directory.
        logger.info("Precaching files for contest %d." % contest_id)
        with SessionGen() as session:
            contest = Contest.get_from_id(contest_id, session)
            for digest in contest.enumerate_files(skip_submissions=True,
                                                  skip_user_tests=True):
                self.file_cacher.load(digest)
        logger.info("Precaching finished.")

    @rpc_method
    def execute_job_group(self, job_group_dict):
        """Receive a group of jobs in a dict format and executes them
        one by one.

        job_group_dict (dict): a dictionary suitable to be imported
            from JobGroup.

        """
        job_group = JobGroup.import_from_dict(job_group_dict)

        if self.work_lock.acquire(False):

            try:
                self._ignore_job = False

                for k, job in job_group.jobs.iteritems():
                    logger.info("Starting job.",
                                extra={"operation": job.info})
                    #self.rpc_test(job_group_dict)

                    job.shard = self.shard

                    # FIXME This is actually kind of a workaround...
                    # The only TaskType that needs it is OutputOnly.
                    job._key = k

                    # FIXME We're creating a new TaskType for each Job
                    # even if, at the moment, a JobGroup always uses
                    # the same TaskType and the same parameters. Yet,
                    # this could change in the future, so the best
                    # solution is to keep a cache of TaskTypes objects
                    # (like ScoringService does with ScoreTypes, except
                    # that we cannot index by Dataset ID here...).
                    task_type = get_task_type(job.task_type,
                                              job.task_type_parameters)
                    task_type.execute_job(job, self.file_cacher)

                    logger.info("Finished job.",
                                extra={"operation": job.info})

                    if not job.success or self._ignore_job:
                        job_group.success = False
                        break
                else:
                    job_group.success = True

                return job_group.export_to_dict()

            except:
                err_msg = "Worker failed."
                logger.error(err_msg, exc_info=True)
                raise JobException(err_msg)

            finally:
                self.work_lock.release()

        else:
            err_msg = "Request received, but declined because of acquired " \
                "lock (Worker is busy executing another job group, this " \
                "should not happen: check if there are more than one ES " \
                "running, or for bugs in ES."
            logger.warning(err_msg)
            raise JobException(err_msg)

    @rpc_method
    def rpc_test(self, mes):
        logger.info(mes)
Esempio n. 3
0
class Worker(Service):
    """This service implement the possibility to compile and evaluate
    submissions in a sandbox. The instructions to follow for the
    operations are in the TaskType classes, while the sandbox is in
    the Sandbox module.

    """

    JOB_TYPE_COMPILATION = "compile"
    JOB_TYPE_EVALUATION = "evaluate"

    def __init__(self, shard, fake_worker_time=None):
        Service.__init__(self, shard)
        self.file_cacher = FileCacher(self)

        self.work_lock = gevent.lock.RLock()
        self._last_end_time = None
        self._total_free_time = 0
        self._total_busy_time = 0
        self._number_execution = 0

        self._fake_worker_time = fake_worker_time

    @rpc_method
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # In order to avoid a long-living connection, first fetch the
        # complete list of files and then download the files; since
        # this is just pre-caching, possible race conditions are not
        # dangerous
        logger.info("Precaching files for contest %d.", contest_id)
        with SessionGen() as session:
            contest = Contest.get_from_id(contest_id, session)
            files = enumerate_files(session,
                                    contest,
                                    skip_submissions=True,
                                    skip_user_tests=True,
                                    skip_print_jobs=True)
        for digest in files:
            try:
                self.file_cacher.load(digest, if_needed=True)
            except KeyError:
                # No problem (at this stage) if we cannot find the
                # file
                pass

        logger.info("Precaching finished.")

    @rpc_method
    def execute_job_group(self, job_group_dict):
        """Receive a group of jobs in a list format and executes them one by
        one.

        job_group_dict ({}): a JobGroup exported to dict.

        return ({}): the same JobGroup in dict format, but containing
            the results.

        """
        start_time = time.time()
        job_group = JobGroup.import_from_dict(job_group_dict)

        if self.work_lock.acquire(False):
            try:
                logger.info("Starting job group.")
                for job in job_group.jobs:
                    logger.info("Starting job.", extra={"operation": job.info})

                    job.shard = self.shard

                    if self._fake_worker_time is None:
                        self._perform_job(job)
                    else:
                        self._fake_work(job)

                    logger.info("Finished job.", extra={"operation": job.info})

                logger.info("Finished job group.")
                return job_group.export_to_dict()

            except Exception as e:
                err_msg = "Worker failed: %s." % e
                logger.error(err_msg, exc_info=True)
                raise JobException(err_msg)

            finally:
                self._finalize(start_time)
                self.work_lock.release()

        else:
            err_msg = "Request received, but declined because of acquired " \
                "lock (Worker is busy executing another job, this should " \
                "not happen: check if there are more than one ES running, " \
                "or for bugs in ES."
            logger.warning(err_msg)
            self._finalize(start_time)
            raise JobException(err_msg)

    def _perform_job(self, job):
        task_type = get_task_type(job.task_type, job.task_type_parameters)

        tries = 0
        MAX_TRIES = 5

        while tries < MAX_TRIES:
            try:
                task_type.execute_job(job, self.file_cacher)
            except TombstoneError:
                job.success = False
                job.plus = {"tombstone": True}
                return

            if not isinstance(job, EvaluationJob):
                return
            if job.plus is None:
                return

            execution_time = job.plus['execution_time']

            if job.text[0] != 'Execution timed out':
                if tries > 0:
                    logger.info("Took: %s (TL: %s)",
                                execution_time,
                                job.time_limit,
                                extra={"operation": job.info})
                    logger.info("Not a TLE anymore.",
                                extra={"operation": job.info})

                return

            logger.info("Took: %s (TL: %s)",
                        execution_time,
                        job.time_limit,
                        extra={"operation": job.info})

            if execution_time > 1.3 * job.time_limit:
                logger.info("Significant TLE. Not retrying.",
                            extra={"operation": job.info})
                return

            tries += 1
            logger.info("Slight TLE. Retrying (%s of %s)",
                        tries,
                        MAX_TRIES,
                        extra={"operation": job.info})

    def _fake_work(self, job):
        """Fill the job with fake success data after waiting for some time."""
        time.sleep(self._fake_worker_time)
        job.success = True
        job.text = ["ok"]
        job.plus = {
            "execution_time": self._fake_worker_time,
            "execution_wall_clock_time": self._fake_worker_time,
            "execution_memory": 1000,
        }
        if isinstance(job, CompilationJob):
            job.compilation_success = True
        elif isinstance(job, EvaluationJob):
            job.outcome = "1.0"

    def _finalize(self, start_time):
        end_time = time.time()
        busy_time = end_time - start_time
        free_time = 0.0
        if self._last_end_time is not None:
            free_time = start_time - self._last_end_time
        self._last_end_time = end_time
        self._total_busy_time += busy_time
        self._total_free_time += free_time
        ratio = self._total_busy_time * 100.0 / \
            (self._total_busy_time + self._total_free_time)
        avg_free_time = 0.0
        if self._number_execution > 0:
            avg_free_time = self._total_free_time / self._number_execution
        avg_busy_time = 0.0
        if self._number_execution > 0:
            avg_busy_time = self._total_busy_time / self._number_execution
        self._number_execution += 1
        logger.info(
            "Executed in %.3lf after free for %.3lf; "
            "busyness is %.1lf%%; avg free time is %.3lf "
            "avg busy time is %.3lf ", busy_time, free_time, ratio,
            avg_free_time, avg_busy_time)
Esempio n. 4
0
class Worker(Service):
    """This service implement the possibility to compile and evaluate
    submissions in a sandbox. The instructions to follow for the
    operations are in the TaskType classes, while the sandbox is in
    the Sandbox module.

    """

    JOB_TYPE_COMPILATION = "compile"
    JOB_TYPE_EVALUATION = "evaluate"

    def __init__(self, shard, fake_worker_time=None):
        Service.__init__(self, shard)
        self.file_cacher = FileCacher(self)

        self.work_lock = gevent.lock.RLock()
        self._last_end_time = None
        self._total_free_time = 0
        self._total_busy_time = 0
        self._number_execution = 0

        self._fake_worker_time = fake_worker_time

    @rpc_method
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # In order to avoid a long-living connection, first fetch the
        # complete list of files and then download the files; since
        # this is just pre-caching, possible race conditions are not
        # dangerous
        logger.info("Precaching files for contest %d.", contest_id)
        with SessionGen() as session:
            contest = Contest.get_from_id(contest_id, session)
            files = enumerate_files(session, contest, skip_submissions=True,
                                    skip_user_tests=True, skip_print_jobs=True)
        for digest in files:
            try:
                self.file_cacher.load(digest, if_needed=True)
            except KeyError:
                # No problem (at this stage) if we cannot find the
                # file
                pass

        logger.info("Precaching finished.")

    @rpc_method
    def execute_job_group(self, job_group_dict):
        """Receive a group of jobs in a list format and executes them one by
        one.

        job_group_dict ({}): a JobGroup exported to dict.

        return ({}): the same JobGroup in dict format, but containing
            the results.

        """
        start_time = time.time()
        job_group = JobGroup.import_from_dict(job_group_dict)

        if self.work_lock.acquire(False):
            try:
                logger.info("Starting job group.")
                for job in job_group.jobs:
                    logger.info("Starting job.",
                                extra={"operation": job.info})

                    job.shard = self.shard

                    if self._fake_worker_time is None:
                        task_type = get_task_type(job.task_type,
                                                  job.task_type_parameters)
                        try:
                            task_type.execute_job(job, self.file_cacher)
                        except TombstoneError:
                            job.success = False
                            job.plus = {"tombstone": True}
                    else:
                        self._fake_work(job)

                    logger.info("Finished job.",
                                extra={"operation": job.info})

                logger.info("Finished job group.")
                return job_group.export_to_dict()

            except Exception as e:
                err_msg = "Worker failed: %s." % e
                logger.error(err_msg, exc_info=True)
                raise JobException(err_msg)

            finally:
                self._finalize(start_time)
                self.work_lock.release()

        else:
            err_msg = "Request received, but declined because of acquired " \
                "lock (Worker is busy executing another job, this should " \
                "not happen: check if there are more than one ES running, " \
                "or for bugs in ES."
            logger.warning(err_msg)
            self._finalize(start_time)
            raise JobException(err_msg)

    def _fake_work(self, job):
        """Fill the job with fake success data after waiting for some time."""
        time.sleep(self._fake_worker_time)
        job.success = True
        job.text = ["ok"]
        job.plus = {
            "execution_time": self._fake_worker_time,
            "execution_wall_clock_time": self._fake_worker_time,
            "execution_memory": 1000,
        }
        if isinstance(job, CompilationJob):
            job.compilation_success = True
        elif isinstance(job, EvaluationJob):
            job.outcome = "1.0"

    def _finalize(self, start_time):
        end_time = time.time()
        busy_time = end_time - start_time
        free_time = 0.0
        if self._last_end_time is not None:
            free_time = start_time - self._last_end_time
        self._last_end_time = end_time
        self._total_busy_time += busy_time
        self._total_free_time += free_time
        ratio = self._total_busy_time * 100.0 / \
            (self._total_busy_time + self._total_free_time)
        avg_free_time = 0.0
        if self._number_execution > 0:
            avg_free_time = self._total_free_time / self._number_execution
        avg_busy_time = 0.0
        if self._number_execution > 0:
            avg_busy_time = self._total_busy_time / self._number_execution
        self._number_execution += 1
        logger.info("Executed in %.3lf after free for %.3lf; "
                    "busyness is %.1lf%%; avg free time is %.3lf "
                    "avg busy time is %.3lf ",
                    busy_time, free_time, ratio, avg_free_time, avg_busy_time)
Esempio n. 5
0
class Worker(Service):
    """This service implement the possibility to compile and evaluate
    submissions in a sandbox. The instructions to follow for the
    operations are in the TaskType classes, while the sandbox is in
    the Sandbox module.

    """

    JOB_TYPE_COMPILATION = "compile"
    JOB_TYPE_EVALUATION = "evaluate"

    def __init__(self, shard):
        Service.__init__(self, shard)
        self.file_cacher = FileCacher(self)

        self.work_lock = gevent.coros.RLock()
        self._ignore_job = False

    @rpc_method
    def ignore_job(self):
        """RPC that inform the worker that its result for the current
        action will be discarded. The worker will try to return as
        soon as possible even if this means that the result are
        inconsistent.

        """
        # We remember to quit as soon as possible.
        logger.info("Trying to interrupt job as requested.")
        self._ignore_job = True

    @rpc_method
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # Lock is not needed if the admins correctly placed cache and
        # temp directories in the same filesystem. This is what
        # usually happens since they are children of the same,
        # cms-created, directory.
        logger.info("Precaching files for contest %d." % contest_id)
        with SessionGen() as session:
            contest = Contest.get_from_id(contest_id, session)
            for digest in contest.enumerate_files(skip_submissions=True,
                                                  skip_user_tests=True):
                self.file_cacher.load(digest)
        logger.info("Precaching finished.")

    @rpc_method
    def execute_job_group(self, job_group_dict):
        """Receive a group of jobs in a dict format and executes them
        one by one.

        job_group_dict (dict): a dictionary suitable to be imported
            from JobGroup.

        """
        job_group = JobGroup.import_from_dict(job_group_dict)

        if self.work_lock.acquire(False):

            try:
                self._ignore_job = False

                for k, job in job_group.jobs.iteritems():
                    logger.info("Starting job.", extra={"operation": job.info})

                    job.shard = self.shard

                    # FIXME This is actually kind of a workaround...
                    # The only TaskType that needs it is OutputOnly.
                    job._key = k

                    # FIXME We're creating a new TaskType for each Job
                    # even if, at the moment, a JobGroup always uses
                    # the same TaskType and the same parameters. Yet,
                    # this could change in the future, so the best
                    # solution is to keep a cache of TaskTypes objects
                    # (like ScoringService does with ScoreTypes, except
                    # that we cannot index by Dataset ID here...).
                    task_type = get_task_type(job.task_type,
                                              job.task_type_parameters)
                    task_type.execute_job(job, self.file_cacher)

                    logger.info("Finished job.", extra={"operation": job.info})

                    if not job.success or self._ignore_job:
                        job_group.success = False
                        break
                else:
                    job_group.success = True

                return job_group.export_to_dict()

            except:
                err_msg = "Worker failed."
                logger.error(err_msg, exc_info=True)
                raise JobException(err_msg)

            finally:
                self.work_lock.release()

        else:
            err_msg = "Request received, but declined because of acquired " \
                "lock (Worker is busy executing another job group, this " \
                "should not happen: check if there are more than one ES " \
                "running, or for bugs in ES."
            logger.warning(err_msg)
            raise JobException(err_msg)
Esempio n. 6
0
class Worker(Service):
    """This service implement the possibility to compile and evaluate
    submissions in a sandbox. The instructions to follow for the
    operations are in the TaskType classes, while the sandbox is in
    the Sandbox module.

    """

    JOB_TYPE_COMPILATION = "compile"
    JOB_TYPE_EVALUATION = "evaluate"

    def __init__(self, shard):
        Service.__init__(self, shard)
        self.file_cacher = FileCacher(self)

        self.work_lock = gevent.coros.RLock()
        self._last_end_time = None
        self._total_free_time = 0
        self._total_busy_time = 0
        self._number_execution = 0

    @rpc_method
    def precache_files(self, contest_id):
        """RPC to ask the worker to precache of files in the contest.

        contest_id (int): the id of the contest

        """
        # In order to avoid a long-living connection, first fetch the
        # complete list of files and then download the files; since
        # this is just pre-caching, possible race conditions are not
        # dangerous
        logger.info("Precaching files for contest %d.", contest_id)
        with SessionGen() as session:
            contest = Contest.get_from_id(contest_id, session)
            files = contest.enumerate_files(skip_submissions=True,
                                            skip_user_tests=True)
        for digest in files:
            try:
                self.file_cacher.load(digest, if_needed=True)
            except KeyError:
                # No problem (at this stage) if we cannot find the
                # file
                pass

        logger.info("Precaching finished.")

    @rpc_method
    def execute_job(self, job_dict):
        """Receive a group of jobs in a dict format and executes them
        one by one.

        job_dict (dict): a dictionary suitable to be imported from Job.

        """
        start_time = time.time()
        job = Job.import_from_dict_with_type(job_dict)

        if self.work_lock.acquire(False):

            try:
                logger.info("Starting job.", extra={"operation": job.info})

                job.shard = self.shard

                task_type = get_task_type(job.task_type,
                                          job.task_type_parameters)
                task_type.execute_job(job, self.file_cacher)

                logger.info("Finished job.", extra={"operation": job.info})

                return job.export_to_dict()

            except:
                err_msg = "Worker failed."
                logger.error(err_msg, exc_info=True)
                raise JobException(err_msg)

            finally:
                self._finalize(start_time)
                self.work_lock.release()

        else:
            err_msg = "Request received, but declined because of acquired " \
                "lock (Worker is busy executing another job, this should " \
                "not happen: check if there are more than one ES running, " \
                "or for bugs in ES."
            logger.warning(err_msg)
            self._finalize(start_time)
            raise JobException(err_msg)

    def _finalize(self, start_time):
        end_time = time.time()
        busy_time = end_time - start_time
        free_time = 0.0
        if self._last_end_time is not None:
            free_time = start_time - self._last_end_time
        self._last_end_time = end_time
        self._total_busy_time += busy_time
        self._total_free_time += free_time
        ratio = self._total_busy_time * 100.0 / \
            (self._total_busy_time + self._total_free_time)
        avg_free_time = 0.0
        if self._number_execution > 0:
            avg_free_time = self._total_free_time / self._number_execution
        avg_busy_time = 0.0
        if self._number_execution > 0:
            avg_busy_time = self._total_busy_time / self._number_execution
        self._number_execution += 1
        logger.info(
            "Executed in %.3lf after free for %.3lf; "
            "busyness is %.1lf%%; avg free time is %.3lf "
            "avg busy time is %.3lf ", busy_time, free_time, ratio,
            avg_free_time, avg_busy_time)