Example #1
0
    def get(self, *args, **kwargs):
        scope = kwargs.get("scope")
        worker_node_dao = daos.WorkerNodeDao(self.settings)

        if scope == "all":
            return {
                "worker_nodes":
                list(
                    map(
                        lambda worker_node: {
                            "hostname": worker_node.hostname,
                            "jobs_processed": worker_node.jobs_processed,
                            "busy": (worker_node.running_job_id is not None),
                            "alive": worker_node.is_alive,
                        },
                        worker_node_dao.find_all(),
                    ))
            }
        else:
            self.abort(
                {
                    "message":
                    "scope {} has not been implemented yet".format(scope)
                }, 404)
            return
Example #2
0
    def post(self, *args, **kwargs):
        worker_id = kwargs.get("worker_id")
        hostname = self.body.get("hostname")

        worker_node_dao = daos.WorkerNodeDao(self.settings)

        worker_node = models.WorkerNode(id_=worker_id,
                                        hostname=hostname,
                                        last_seen=get_time(),
                                        is_alive=True)

        dup = worker_node_dao.find_by_id(worker_id)

        if dup is None:
            logger.info("new worker {} joined on {}".format(
                worker_id, hostname))
            worker_node_dao.insert(worker_node)
        elif not dup.is_alive:
            dup.is_alive = True
            logger.info("worker {} alive again on {}".format(
                worker_id, hostname))
            worker_node_dao.update(dup)
        else:
            msg = "worker id '{}' already exists".format(worker_id)
            logger.info(msg)
            self.abort({"message": msg}, status=400)
            return

        return {"heartbeat": self.get_flags()["heartbeat_interval"]}
Example #3
0
    def post(self, *args, **kwargs):
        """
        Allows workers to update grading job status on completion
        """
        worker_id = kwargs.get("worker_id")
        job_id = self.body.get("grading_job_id")

        grading_job_dao = daos.GradingJobDao(self.settings)
        job = grading_job_dao.find_by_id(job_id)
        if not job:
            self.abort({"message": "job with the given ID not found"})
            return

        job_state = job.get_state()
        if job_state != models.GradingJobState.STARTED:
            logger.critical(
                "job with id '{}' updated when in state '{}'".format(
                    job_id, job_state.value))
            self.abort(
                {"message": "cannot update job that is not in STARTED state"})
            return

        worker_node_dao = daos.WorkerNodeDao(self.settings)
        worker_node = worker_node_dao.find_by_id(worker_id)
        if not worker_node:
            logger.critical(
                "unknown node with ID '{}' successfully updated job".format(
                    worker_id))
            self.abort({"message": ""}, status=404)
            return

        # clear the worker node's job
        worker_node.running_job_id = None
        worker_node.is_alive = True
        worker_node_dao.update(worker_node)

        # finish the job
        job.finished_at = get_time()
        job.results = self.body.get("results")
        job.success = self.body.get("success")
        grading_job_dao.update(job)

        # store the logs
        job_log_dao = daos.GradingJobLogDao(self.settings)
        job_log = models.GradingJobLog(job_id=job_id, **self.body.get("logs"))
        job_log_dao.insert(job_log)

        # thread safe callback
        tornado.ioloop.IOLoop.current().add_callback(job_update_callback,
                                                     self.settings, job_id,
                                                     job.run_id)
Example #4
0
    def post(self, *args, **kwargs):
        worker_id = kwargs.get("worker_id")

        worker_node_dao = daos.WorkerNodeDao(self.settings)
        worker_node = worker_node_dao.find_by_id(worker_id)
        if not worker_node:
            logger.critical(
                "unknown node with ID '{}' successfully sent heartbeat".format(
                    worker_id))
            self.abort({"message": ""}, status=404)
            return

        worker_node.last_seen = get_time()
        worker_node.is_alive = True
        worker_node_dao.update(worker_node)
Example #5
0
    def get(self, *args, **kwargs):
        """
        Allows workers to request their next grading job
        """
        worker_id = kwargs.get("worker_id")
        worker_node_dao = daos.WorkerNodeDao(self.settings)
        worker_node = worker_node_dao.find_by_id(worker_id)
        if not worker_node:
            logger.critical(
                "unknown node with ID '{}' successfully requested job".format(
                    worker_id))
            self.abort({"message": ""}, status=404)
            return

        try:
            grading_job_id = self.get_queue().pull()
            self.get_stream_queue().update_job_state(
                grading_job_id, models.GradingJobState.STARTED.name)
            self.get_queue().update_all_job_positions(self.get_stream_queue())
            grading_job_dao = daos.GradingJobDao(self.settings)
            grading_job = grading_job_dao.find_by_id(grading_job_id)
            if not grading_job:
                logger.critical(
                    "found job ID '{}' in queue, but job does not exist".
                    format(grading_job_id))
                self.abort(
                    {"message": "a failure occurred while getting next job"},
                    status=500)
                return

            grading_job.started_at = get_time()
            grading_job.worker_id = worker_id
            grading_job_dao.update(grading_job)

            worker_node.running_job_id = grading_job_id
            worker_node.jobs_processed += 1
            worker_node.is_alive = True
            worker_node_dao.update(worker_node)

            return {
                "grading_job_id": grading_job_id,
                "stages": grading_job.stages
            }
        except Empty:
            self.abort({"message": "no jobs available"}, status=498)
Example #6
0
    def handler_register(self, hostname):
        if self.worker_id is None:
            return

        worker_node_dao = daos.WorkerNodeDao(self.settings)

        dup = worker_node_dao.find_by_id(self.worker_id)

        if dup is None:
            self.worker_node = models.WorkerNode(
                id_=self.worker_id,
                hostname=hostname,
                last_seen=get_time(),
                is_alive=True,
                use_ws=True,
            )
            logger.info("new worker '{}' joined on '{}'".format(
                self.worker_id, hostname))
            worker_node_dao.insert(self.worker_node)
        elif not dup.is_alive:
            self.worker_node = dup
            self.worker_node.hostname = hostname
            self.worker_node.last_seen = get_time()
            self.worker_node.is_alive = True
            self.use_ws = True
            logger.info("worker '{}' alive again on '{}'".format(
                self.worker_id, hostname))
            worker_node_dao.update(self.worker_node)
        else:
            msg = "worker id '{}' already exists".format(self.worker_id)
            logger.info(msg)
            self.send({"success": False})
            self.close(reason=msg, code=1002)
            return

        self.registered = True
        self.get_ws_conn_map()[self.worker_id] = self

        self.send({"success": True})

        # trigger schedule event
        tornado.ioloop.IOLoop.current().add_callback(worker_schedule_job,
                                                     self.settings)
Example #7
0
    def on_ping(self, data):
        # ping messages have the same function as heartbeat requests
        # for normal http workers

        if self.worker_id is None:
            logger.critical("worker is not initialized")
            return

        worker_node_dao = daos.WorkerNodeDao(self.settings)
        worker_node = worker_node_dao.find_by_id(self.worker_id)

        if not worker_node:
            logger.critical(
                "unknown ws node with ID '{}' successfully sent heartbeat".
                format(self.worker_id))
            return

        worker_node.last_seen = get_time()
        worker_node_dao.update(worker_node)
Example #8
0
    def handler_job_result(self, grading_job_id, success, results, logs):
        if not self.registered:
            logger.info("worker '{}' submitted before registering".format(
                self.worker_id))
            self.close(reason="submitting before registering", code=1002)
            return

        grading_job_dao = daos.GradingJobDao(self.settings)
        job = grading_job_dao.find_by_id(grading_job_id)

        if not job:
            self.close(reason="job with the given ID not found", code=1002)
            return

        job_state = job.get_state()

        if job_state != models.GradingJobState.STARTED:
            logger.critical(
                "job with id '{}' updated when in state '{}'".format(
                    grading_job_id, job_state.value))
            self.close(reason="cannot update job that is not in STARTED state",
                       code=1002)
            return

        worker_node_dao = daos.WorkerNodeDao(self.settings)
        worker_node = worker_node_dao.find_by_id(self.worker_id)

        if not worker_node:
            msg = "unknown worker '{}' successfully updated job".format(
                self.worker_id)
            logger.critical(msg)
            self.close(reason=msg, code=1002)
            return

        logger.info("worker '{}' submitted job result for job '{}'".format(
            self.worker_id, grading_job_id))

        # clear the worker node's job
        worker_node.running_job_id = None
        worker_node_dao.update(worker_node)

        # finish the job
        job.finished_at = get_time()
        job.results = results
        job.success = success
        grading_job_dao.update(job)

        # store the logs
        job_log_dao = daos.GradingJobLogDao(self.settings)
        job_log = models.GradingJobLog(job_id=grading_job_id, **logs)
        job_log_dao.insert(job_log)

        # thread safe callback
        tornado.ioloop.IOLoop.current().add_callback(job_update_callback,
                                                     self.settings,
                                                     grading_job_id,
                                                     job.run_id)

        # trigger schedule event
        tornado.ioloop.IOLoop.current().add_callback(worker_schedule_job,
                                                     self.settings)