def get(self, *args, **kwargs): grading_job_id = kwargs.get("job_id") grading_job_dao = daos.GradingJobDao(self.settings) if grading_job_dao.find_by_id(grading_job_id) is None: self.abort({"message": "grading job with the given ID not found"}) return course_id = kwargs["course_id"] queue = self.settings["QUEUE"] if not queue.contains_key(course_id): self.abort({ "message": f"{course_id} does not exist as a course in the queue" }) return queue_position = queue.get_position_in_queue(course_id, grading_job_id) if queue_position == -1: self.abort({ "message": f"{grading_job_id} has already passed through the queue" }) return {"position": queue_position}
def job_update_callback(settings, grading_job_id, grading_run_id): job_dao = daos.GradingJobDao(settings) job = job_dao.find_by_id(grading_job_id) if job is None: logger.critical("cannot update non-existent job with ID '{}'".format( grading_job_id)) return run_dao = daos.GradingRunDao(settings) run = run_dao.find_by_id(grading_run_id) if run is None: logger.critical("cannot update non-existent run with ID '{}'".format( grading_run_id)) return if run.finished_at is not None: logger.critical( "cannot update run with ID '{}' (already finished)".format( grading_run_id)) return stream_queue = settings["STREAM_QUEUE"] if job.success: stream_queue.update_job_state(job.id, GradingRunState.FINISHED.name) else: stream_queue.update_job_state(job.id, GradingRunState.FAILED.name) stream_queue.send_close_event(job.id) if job.type == GradingJobType.PRE_PROCESSING: if job.success: continue_grading_run(settings, run) else: fail_grading_run(settings, run) elif job.type == GradingJobType.POST_PROCESSING: if run.student_jobs_left != 0: logger.critical( "post-processing job finished when {} student jobs remain". format(run.student_jobs_left)) return if job.success: continue_grading_run(settings, run) else: fail_grading_run(settings, run) elif job.type == GradingJobType.STUDENT: if run.student_jobs_left <= 0: logger.critical( "student job finished when {} student jobs remain".format( run.student_jobs_left)) return run.student_jobs_left -= 1 run_dao.update(run) if run.student_jobs_left == 0: # last job in this stage is complete continue_grading_run(settings, run) else: logger.critical("cannot update run with last job type '{}'".format( job.type))
def post(self, *args, **kwargs): """ Allows workers to update grading job status on completion """ worker_id = kwargs.get("worker_id") job_id = self.body.get("grading_job_id") grading_job_dao = daos.GradingJobDao(self.settings) job = grading_job_dao.find_by_id(job_id) if not job: self.abort({"message": "job with the given ID not found"}) return job_state = job.get_state() if job_state != models.GradingJobState.STARTED: logger.critical( "job with id '{}' updated when in state '{}'".format( job_id, job_state.value)) self.abort( {"message": "cannot update job that is not in STARTED state"}) return worker_node_dao = daos.WorkerNodeDao(self.settings) worker_node = worker_node_dao.find_by_id(worker_id) if not worker_node: logger.critical( "unknown node with ID '{}' successfully updated job".format( worker_id)) self.abort({"message": ""}, status=404) return # clear the worker node's job worker_node.running_job_id = None worker_node.is_alive = True worker_node_dao.update(worker_node) # finish the job job.finished_at = get_time() job.results = self.body.get("results") job.success = self.body.get("success") grading_job_dao.update(job) # store the logs job_log_dao = daos.GradingJobLogDao(self.settings) job_log = models.GradingJobLog(job_id=job_id, **self.body.get("logs")) job_log_dao.insert(job_log) # thread safe callback tornado.ioloop.IOLoop.current().add_callback(job_update_callback, self.settings, job_id, job.run_id)
def _prepare_next_job(settings, grading_run, global_job_environ, runtime_job_environ, job_stages, job_type): """ Prepares a job to be submitted to queue """ grading_job_dao = daos.GradingJobDao(settings) grading_job = models.GradingJob(job_type=job_type, run_id=grading_run.id, queued_at=get_time()) grading_job.id = str(grading_job_dao.insert(grading_job).inserted_id) runtime_job_environ["GRADING_JOB_ID"] = grading_job.id grading_job.set_stages(job_stages, global_job_environ, runtime_job_environ) grading_job_dao.update(grading_job) return grading_job.id
def get(self, *args, **kwargs): """ Allows workers to request their next grading job """ worker_id = kwargs.get("worker_id") worker_node_dao = daos.WorkerNodeDao(self.settings) worker_node = worker_node_dao.find_by_id(worker_id) if not worker_node: logger.critical( "unknown node with ID '{}' successfully requested job".format( worker_id)) self.abort({"message": ""}, status=404) return try: grading_job_id = self.get_queue().pull() self.get_stream_queue().update_job_state( grading_job_id, models.GradingJobState.STARTED.name) self.get_queue().update_all_job_positions(self.get_stream_queue()) grading_job_dao = daos.GradingJobDao(self.settings) grading_job = grading_job_dao.find_by_id(grading_job_id) if not grading_job: logger.critical( "found job ID '{}' in queue, but job does not exist". format(grading_job_id)) self.abort( {"message": "a failure occurred while getting next job"}, status=500) return grading_job.started_at = get_time() grading_job.worker_id = worker_id grading_job_dao.update(grading_job) worker_node.running_job_id = grading_job_id worker_node.jobs_processed += 1 worker_node.is_alive = True worker_node_dao.update(worker_node) return { "grading_job_id": grading_job_id, "stages": grading_job.stages } except Empty: self.abort({"message": "no jobs available"}, status=498)
def get(self, *args, **kwargs): grading_run_id = kwargs.get("run_id") grading_run_dao = daos.GradingRunDao(self.settings) grading_run = grading_run_dao.find_by_id(grading_run_id) if grading_run is None: self.abort({"message": "grading run with the given ID not found"}) return grading_job_dao = daos.GradingJobDao(self.settings) grading_jobs = grading_job_dao.find_by_run_id(grading_run_id) pre_processing_job = next( filter(lambda j: j.type == models.GradingJobType.PRE_PROCESSING, grading_jobs), None, ) post_processing_job = next( filter(lambda j: j.type == models.GradingJobType.POST_PROCESSING, grading_jobs), None, ) student_jobs = filter( lambda j: j.type == models.GradingJobType.STUDENT, grading_jobs) # [jobs] -> { job_id: job_state } def get_job_id_to_state_map(jobs): if jobs is None: return None else: return {job.id: job.get_state().value for job in jobs} return { "state": grading_run.state.value, "pre_processing_job_state": get_job_id_to_state_map( [pre_processing_job] if pre_processing_job else None), "post_processing_job_state": get_job_id_to_state_map( [post_processing_job] if post_processing_job else None), "student_jobs_state": get_job_id_to_state_map(student_jobs), }
def get(self, *args, **kwargs): grading_run_id = kwargs.get("run_id") grading_run_dao = daos.GradingRunDao(self.settings) grading_run = grading_run_dao.find_by_id(grading_run_id) if grading_run is None: self.abort({"message": "grading run with the given ID not found"}) return grading_job_dao = daos.GradingJobDao(self.settings) grading_jobs = grading_job_dao.find_by_run_id(grading_run_id) # Helper method for making a dictionary from the grading_run def get_job_id_to_env_map(jobs): job_id_to_env_map = {} for job in jobs: env_dict = {} for stage in job.stages: env = stage["env"] for key in env: env_dict[key] = env_dict.get(key, set()) env_dict[key].add(env[key]) # Convert each set into a list for JSON for key in env_dict: env_values = list(env_dict[key]) env_dict[key] = env_values job_id_to_env_map[job.id] = env_dict return job_id_to_env_map pre_processing_job = next( filter(lambda j: j.type == models.GradingJobType.PRE_PROCESSING, grading_jobs), None, ) post_processing_job = next( filter(lambda j: j.type == models.GradingJobType.POST_PROCESSING, grading_jobs), None, ) student_jobs = filter( lambda j: j.type == models.GradingJobType.STUDENT, grading_jobs) # Make sure pre_processing_env exists if pre_processing_job is None: pre_processing_dict = None else: pre_processing_dict = get_job_id_to_env_map([pre_processing_job]) # Make sure post_processing_env exists if post_processing_job is None: post_processing_dict = None else: post_processing_dict = get_job_id_to_env_map([post_processing_job]) # We are guaranteed that this dict exists in the run, so no need to check student_dict = get_job_id_to_env_map(student_jobs) return { "pre_processing_env": pre_processing_dict, "post_processing_env": post_processing_dict, "student_env": student_dict, }
def handler_job_result(self, grading_job_id, success, results, logs): if not self.registered: logger.info("worker '{}' submitted before registering".format( self.worker_id)) self.close(reason="submitting before registering", code=1002) return grading_job_dao = daos.GradingJobDao(self.settings) job = grading_job_dao.find_by_id(grading_job_id) if not job: self.close(reason="job with the given ID not found", code=1002) return job_state = job.get_state() if job_state != models.GradingJobState.STARTED: logger.critical( "job with id '{}' updated when in state '{}'".format( grading_job_id, job_state.value)) self.close(reason="cannot update job that is not in STARTED state", code=1002) return worker_node_dao = daos.WorkerNodeDao(self.settings) worker_node = worker_node_dao.find_by_id(self.worker_id) if not worker_node: msg = "unknown worker '{}' successfully updated job".format( self.worker_id) logger.critical(msg) self.close(reason=msg, code=1002) return logger.info("worker '{}' submitted job result for job '{}'".format( self.worker_id, grading_job_id)) # clear the worker node's job worker_node.running_job_id = None worker_node_dao.update(worker_node) # finish the job job.finished_at = get_time() job.results = results job.success = success grading_job_dao.update(job) # store the logs job_log_dao = daos.GradingJobLogDao(self.settings) job_log = models.GradingJobLog(job_id=grading_job_id, **logs) job_log_dao.insert(job_log) # thread safe callback tornado.ioloop.IOLoop.current().add_callback(job_update_callback, self.settings, grading_job_id, job.run_id) # trigger schedule event tornado.ioloop.IOLoop.current().add_callback(worker_schedule_job, self.settings)