def test_execute_job_subsequent_success(self): """Executes three successful jobs, then other three. """ jobs_a, calls_a = TestWorker.new_jobs(3, prefix="a") task_type_a = FakeTaskType([True, True, True]) cms.service.Worker.get_task_type = Mock(return_value=task_type_a) for job in jobs_a: Job.import_from_dict_with_type( self.service.execute_job(job.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls(calls_a) self.assertEquals(task_type_a.call_count, 3) jobs_b, calls_b = TestWorker.new_jobs(3, prefix="b") task_type_b = FakeTaskType([True, True, True]) cms.service.Worker.get_task_type = Mock(return_value=task_type_b) for job in jobs_b: Job.import_from_dict_with_type( self.service.execute_job(job.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls(calls_b) self.assertEquals(task_type_b.call_count, 3)
def test_execute_job_subsequent_locked(self): """Executes a long job, then another one that should fail because of the lock. """ # Because of how gevent works, the interval here can be very small. task_type = FakeTaskType([0.01]) cms.service.Worker.get_task_type = Mock(return_value=task_type) jobs_a, calls_a = TestWorker.new_jobs(1, prefix="a") jobs_b, calls_b = TestWorker.new_jobs(1, prefix="b") def first_call(): Job.import_from_dict_with_type( self.service.execute_job(jobs_a[0].export_to_dict())) first_greenlet = gevent.spawn(first_call) gevent.sleep(0) # To ensure we call jobgroup_a first. with self.assertRaises(JobException): Job.import_from_dict_with_type( self.service.execute_job(jobs_b[0].export_to_dict())) first_greenlet.get() self.assertNotIn(calls_b[0], cms.service.Worker.get_task_type.mock_calls) cms.service.Worker.get_task_type.assert_has_calls(calls_a)
def test_execute_job_success(self): """Executes three successful jobs. """ jobs, calls = TestWorker.new_jobs(3) task_type = FakeTaskType([True, True, True]) cms.service.Worker.get_task_type = Mock(return_value=task_type) for job in jobs: Job.import_from_dict_with_type( self.service.execute_job(job.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls(calls) self.assertEquals(task_type.call_count, 3)
def test_execute_job_tasktype_raise(self): """Executes two jobs raising exceptions. """ jobs, unused_calls = TestWorker.new_jobs(2) task_type = FakeTaskType([Exception(), Exception()]) cms.service.Worker.get_task_type = Mock(return_value=task_type) for job in jobs: with self.assertRaises(JobException): Job.import_from_dict_with_type( self.service.execute_job(job.export_to_dict())) self.assertEquals(cms.service.Worker.get_task_type.call_count, 2) self.assertEquals(task_type.call_count, 2)
def execute_job(self, job_dict): job = Job.import_from_dict_with_type(job_dict) if self.work_lock.acquire(False): try: logger.operation = "job '%s'" % (job.info) logger.info("Request received") job.shard = self.shard self.task_type = get_task_type(job, self.file_cacher) self.task_type.execute_job() logger.info("Request finished.") return job.export_to_dict() except: err_msg = "Worker failed on operation `%s'" % logger.operation logger.error("%s\n%s" % (err_msg, traceback.format_exc())) raise JobException(err_msg) finally: self.task_type = None self.session = None logger.operation = "" self.work_lock.release() else: err_msg = "Request '%s' received, " \ "but declined because of acquired lock" % \ (job.info) logger.warning(err_msg) raise JobException(err_msg)
def enqueue(self, operation, priority, timestamp, job=None): """Push an operation in the queue. Push an operation in the operation queue if the submission is not already in the queue or assigned to a worker. operation (ESOperation|list): the operation to put in the queue. priority (int): the priority of the operation. timestamp (datetime|float): the time of the submission. return (bool): True if pushed, False if not. """ if not isinstance(timestamp, datetime): timestamp = datetime.utcfromtimestamp(timestamp) if isinstance(operation, list): operation = ESOperation.from_list(operation) if operation in self.get_executor() or operation in self.pending: return False if isinstance(job, dict): operation.job = Job.import_from_dict_with_type(job) else: logger.info("Operation without job!") # enqueue() returns the number of successful pushes. return super(QueueService, self).enqueue(operation, priority, timestamp) > 0
def run(self): from cms.grading.Job import Job from cms.grading.tasktypes import get_task_type task_type = get_task_type(self.job.task_type, self.job.task_type_parameters) # Crazy workaround to clone the job jobresult = Job.import_from_dict_with_type(self.job.export_to_dict()) task_type.execute_job(jobresult, self.file_cacher) self.result.log['job'] = jobresult.export_to_dict()
def run(self): from cms.grading.Job import Job from cms.grading.tasktypes import get_task_type task_type = get_task_type(self.job.task_type, self.job.task_type_parameters) # Crazy workaround to clone the job jobresult = Job.import_from_dict_with_type(self.job.export_to_dict()) task_type.execute_job(jobresult, self.file_cacher) # Don't save the result if the sandbox failed. if not jobresult.success: self.result.badfail = True self.result.log['job'] = jobresult.export_to_dict()
def test_execute_job_failure_releases_lock(self): """After a failure, the worker should be able to accept another job. """ jobs_a, calls_a = TestWorker.new_jobs(1) task_type_a = FakeTaskType([Exception()]) cms.service.Worker.get_task_type = Mock(return_value=task_type_a) with self.assertRaises(JobException): Job.import_from_dict_with_type( self.service.execute_job(jobs_a[0].export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls(calls_a) self.assertEquals(task_type_a.call_count, 1) jobs_b, calls_b = TestWorker.new_jobs(3) task_type_b = FakeTaskType([True, True, True]) cms.service.Worker.get_task_type = Mock(return_value=task_type_b) for job in jobs_b: Job.import_from_dict_with_type( self.service.execute_job(job.export_to_dict())) cms.service.Worker.get_task_type.assert_has_calls(calls_b) self.assertEquals(task_type_b.call_count, 3)
def test_execute_job_failure(self): """Executes two unsuccessful jobs. """ jobs, unused_calls = TestWorker.new_jobs(2) task_type = FakeTaskType([False, False]) cms.service.Worker.get_task_type = Mock(return_value=task_type) results = [] for job in jobs: results.append(Job.import_from_dict_with_type( self.service.execute_job(job.export_to_dict()))) self.assertFalse(any(job.success for job in results)) self.assertEquals(cms.service.Worker.get_task_type.call_count, 2) self.assertEquals(task_type.call_count, 2)
def test_execute_job_failure(self): """Executes two unsuccessful jobs. """ jobs, unused_calls = TestWorker.new_jobs(2) task_type = FakeTaskType([False, False]) cms.service.Worker.get_task_type = Mock(return_value=task_type) results = [] for job in jobs: results.append( Job.import_from_dict_with_type( self.service.execute_job(job.export_to_dict()))) self.assertFalse(any(job.success for job in results)) self.assertEquals(cms.service.Worker.get_task_type.call_count, 2) self.assertEquals(task_type.call_count, 2)
def execute_job(self, job_dict): """Receive a group of jobs in a dict format and executes them one by one. job_dict (dict): a dictionary suitable to be imported from Job. """ start_time = time.time() job = Job.import_from_dict_with_type(job_dict) if self.work_lock.acquire(False): try: logger.info("Starting job.", extra={"operation": job.info}) job.shard = self.shard task_type = get_task_type(job.task_type, job.task_type_parameters) task_type.execute_job(job, self.file_cacher) logger.info("Finished job.", extra={"operation": job.info}) return job.export_to_dict() except: err_msg = "Worker failed." logger.error(err_msg, exc_info=True) raise JobException(err_msg) finally: self._finalize(start_time) self.work_lock.release() else: err_msg = "Request received, but declined because of acquired " \ "lock (Worker is busy executing another job, this should " \ "not happen: check if there are more than one ES running, " \ "or for bugs in ES." logger.warning(err_msg) self._finalize(start_time) raise JobException(err_msg)
def first_call(): Job.import_from_dict_with_type( self.service.execute_job(jobs_a[0].export_to_dict()))
def finish(self): from cms.grading.Job import Job self.result.job = \ Job.import_from_dict_with_type(self.result.log['job'])
def write_result(self, operation, job): """Receive worker results from QS and writes them to the DB. operation (dict): operation performed, exported as dict job (dict): job containing the result, exported as dict """ logger.debug("Starting commit process...") operation = ESOperation.from_dict(operation) job = Job.import_from_dict_with_type(job) with SessionGen() as session: type_ = operation.type_ object_id = operation.object_id dataset_id = operation.dataset_id dataset = session.query(Dataset)\ .filter(Dataset.id == dataset_id)\ .options(joinedload(Dataset.testcases))\ .first() if dataset is None: logger.error("Could not find dataset %d in the database.", dataset_id) return False, [] # Get submission or user test, and their results. if type_ in [ESOperation.COMPILATION, ESOperation.EVALUATION]: object_ = Submission.get_from_id(object_id, session) if object_ is None: logger.error( "Could not find submission %d " "in the database.", object_id) return False, [] object_result = object_.get_result_or_create(dataset) else: object_ = UserTest.get_from_id(object_id, session) object_result = object_.get_result_or_create(dataset) logger.info("Writing result to db for %s", operation) new_operations = [] try: new_operations = self.write_results_one_row( session, object_result, operation, job) except IntegrityError: logger.warning( "Integrity error while inserting worker result.", exc_info=True) # This is not an error condition, as the result is already # in the DB. return True, [] logger.debug("Committing evaluations...") session.commit() # If we collected some new operations to do while writing # the results, it means we had to invalidate the submission. # We return immediately since we already have all the operations # we need to do next. if new_operations != []: return True, [[ op.to_dict(), priority, (timestamp - EvaluationService.EPOCH).total_seconds(), job_ ] for op, priority, timestamp, job_ in new_operations] if type_ == ESOperation.EVALUATION: if len(object_result.evaluations) == len(dataset.testcases): object_result.set_evaluation_outcome() logger.debug("Committing evaluation outcomes...") session.commit() logger.info("Ending operations...") if type_ == ESOperation.COMPILATION: new_operations = self.compilation_ended(object_result) elif type_ == ESOperation.EVALUATION: if object_result.evaluated(): new_operations = self.evaluation_ended(object_result) elif type_ == ESOperation.USER_TEST_COMPILATION: new_operations = \ self.user_test_compilation_ended(object_result) elif type_ == ESOperation.USER_TEST_EVALUATION: new_operations = self.user_test_evaluation_ended(object_result) logger.debug("Done") return True, [[ op.to_dict(), priority, (timestamp - EvaluationService.EPOCH).total_seconds(), job_ ] for op, priority, timestamp, job_ in new_operations]
def action_finished(self, data, plus, error=None): """Callback from a worker, to signal that is finished some action (compilation or evaluation). data (dict): a dictionary that describes a Job instance. plus (tuple): the tuple (type_, object_id, dataset_id, testcase_codename, side_data=(priority, timestamp), shard_of_worker) """ # Unpack the plus tuple. It's built in the RPC call to Worker's # execute_job method inside WorkerPool.acquire_worker. type_, object_id, dataset_id, testcase_codename, _, \ shard = plus # Restore operation from its fields. operation = ESOperation(type_, object_id, dataset_id, testcase_codename) # We notify the pool that the worker is available again for # further work (no matter how the current request turned out, # even if the worker encountered an error). If the pool # informs us that the data produced by the worker has to be # ignored (by returning True) we interrupt the execution of # this method and do nothing because in that case we know the # operation has returned to the queue and perhaps already been # reassigned to another worker. if self.get_executor().pool.release_worker(shard): logger.info("Ignored result from worker %s as requested.", shard) return job_success = True if error is not None: logger.error("Received error from Worker: `%s'.", error) job_success = False else: try: job = Job.import_from_dict_with_type(data) except: logger.error("Couldn't build Job for data %s.", data, exc_info=True) job_success = False else: if not job.success: logger.error("Worker %s signaled action not successful.", shard) job_success = False logger.info("`%s' completed. Success: %s.", operation, job_success) # We get the submission from DB and update it. with SessionGen() as session: dataset = Dataset.get_from_id(dataset_id, session) if dataset is None: logger.error("Could not find dataset %d in the database.", dataset_id) return # TODO Try to move this 4-cases if-clause into a method of # ESOperation: I'd really like ES itself not to care about # which type of operation it's handling. if type_ == ESOperation.COMPILATION: submission = Submission.get_from_id(object_id, session) if submission is None: logger.error( "Could not find submission %d " "in the database.", object_id) return submission_result = submission.get_result(dataset) if submission_result is None: logger.info( "Couldn't find submission %d(%d) " "in the database. Creating it.", object_id, dataset_id) submission_result = \ submission.get_result_or_create(dataset) if job_success: job.to_submission(submission_result) else: submission_result.compilation_tries += 1 session.commit() self.compilation_ended(submission_result) elif type_ == ESOperation.EVALUATION: submission = Submission.get_from_id(object_id, session) if submission is None: logger.error( "Could not find submission %d " "in the database.", object_id) return submission_result = submission.get_result(dataset) if submission_result is None: logger.error( "Couldn't find submission %d(%d) " "in the database.", object_id, dataset_id) return if job_success: job.to_submission(submission_result) else: submission_result.evaluation_tries += 1 # Submission evaluation will be ended only when # evaluation for each testcase is available. evaluation_complete = (len( submission_result.evaluations) == len(dataset.testcases)) if evaluation_complete: submission_result.set_evaluation_outcome() session.commit() if evaluation_complete: self.evaluation_ended(submission_result) elif type_ == ESOperation.USER_TEST_COMPILATION: user_test = UserTest.get_from_id(object_id, session) if user_test is None: logger.error( "Could not find user test %d " "in the database.", object_id) return user_test_result = user_test.get_result(dataset) if user_test_result is None: logger.error( "Couldn't find user test %d(%d) " "in the database. Creating it.", object_id, dataset_id) user_test_result = \ user_test.get_result_or_create(dataset) if job_success: job.to_user_test(user_test_result) else: user_test_result.compilation_tries += 1 session.commit() self.user_test_compilation_ended(user_test_result) elif type_ == ESOperation.USER_TEST_EVALUATION: user_test = UserTest.get_from_id(object_id, session) if user_test is None: logger.error( "Could not find user test %d " "in the database.", object_id) return user_test_result = user_test.get_result(dataset) if user_test_result is None: logger.error( "Couldn't find user test %d(%d) " "in the database.", object_id, dataset_id) return if job_success: job.to_user_test(user_test_result) else: user_test_result.evaluation_tries += 1 session.commit() self.user_test_evaluation_ended(user_test_result) else: logger.error("Invalid operation type %r.", type_) return
def action_finished(self, data, plus, error=None): """Callback from a worker, to signal that is finished some action (compilation or evaluation). data (dict): a dictionary that describes a Job instance. plus (tuple): the tuple (type_, object_id, dataset_id, testcase_codename, side_data=(priority, timestamp), shard_of_worker) """ # Unpack the plus tuple. It's built in the RPC call to Worker's # execute_job method inside WorkerPool.acquire_worker. type_, object_id, dataset_id, testcase_codename, _, \ shard = plus # Restore operation from its fields. operation = ESOperation( type_, object_id, dataset_id, testcase_codename) # We notify the pool that the worker is available again for # further work (no matter how the current request turned out, # even if the worker encountered an error). If the pool # informs us that the data produced by the worker has to be # ignored (by returning True) we interrupt the execution of # this method and do nothing because in that case we know the # operation has returned to the queue and perhaps already been # reassigned to another worker. if self.get_executor().pool.release_worker(shard): logger.info("Ignored result from worker %s as requested.", shard) return job_success = True if error is not None: logger.error("Received error from Worker: `%s'.", error) job_success = False else: try: job = Job.import_from_dict_with_type(data) except: logger.error("[action_finished] Couldn't build Job for " "data %s.", data, exc_info=True) job_success = False else: if not job.success: logger.error("Worker %s signaled action " "not successful.", shard) job_success = False logger.info("Operation `%s' for submission %s completed. Success: %s.", operation, object_id, job_success) # We get the submission from DB and update it. with SessionGen() as session: dataset = Dataset.get_from_id(dataset_id, session) if dataset is None: logger.error("[action_finished] Could not find " "dataset %d in the database.", dataset_id) return # TODO Try to move this 4-cases if-clause into a method of # ESOperation: I'd really like ES itself not to care about # which type of operation it's handling. if type_ == ESOperation.COMPILATION: submission = Submission.get_from_id(object_id, session) if submission is None: logger.error("[action_finished] Could not find " "submission %d in the database.", object_id) return submission_result = submission.get_result(dataset) if submission_result is None: logger.info("[action_finished] Couldn't find " "submission %d(%d) in the database. " "Creating it.", object_id, dataset_id) submission_result = \ submission.get_result_or_create(dataset) if job_success: job.to_submission(submission_result) else: submission_result.compilation_tries += 1 session.commit() self.compilation_ended(submission_result) elif type_ == ESOperation.EVALUATION: submission = Submission.get_from_id(object_id, session) if submission is None: logger.error("[action_finished] Could not find " "submission %d in the database.", object_id) return submission_result = submission.get_result(dataset) if submission_result is None: logger.error("[action_finished] Couldn't find " "submission %d(%d) in the database.", object_id, dataset_id) return if job_success: job.to_submission(submission_result) else: submission_result.evaluation_tries += 1 # Submission evaluation will be ended only when # evaluation for each testcase is available. evaluation_complete = (len(submission_result.evaluations) == len(dataset.testcases)) if evaluation_complete: submission_result.set_evaluation_outcome() session.commit() if evaluation_complete: self.evaluation_ended(submission_result) elif type_ == ESOperation.USER_TEST_COMPILATION: user_test = UserTest.get_from_id(object_id, session) if user_test is None: logger.error("[action_finished] Could not find " "user test %d in the database.", object_id) return user_test_result = user_test.get_result(dataset) if user_test_result is None: logger.error("[action_finished] Couldn't find " "user test %d(%d) in the database. " "Creating it.", object_id, dataset_id) user_test_result = \ user_test.get_result_or_create(dataset) if job_success: job.to_user_test(user_test_result) else: user_test_result.compilation_tries += 1 session.commit() self.user_test_compilation_ended(user_test_result) elif type_ == ESOperation.USER_TEST_EVALUATION: user_test = UserTest.get_from_id(object_id, session) if user_test is None: logger.error("[action_finished] Could not find " "user test %d in the database.", object_id) return user_test_result = user_test.get_result(dataset) if user_test_result is None: logger.error("[action_finished] Couldn't find " "user test %d(%d) in the database.", object_id, dataset_id) return if job_success: job.to_user_test(user_test_result) else: user_test_result.evaluation_tries += 1 session.commit() self.user_test_evaluation_ended(user_test_result) else: logger.error("Invalid operation type %r.", type_) return
def action_finished(self, data, plus, error=None): """Callback from a worker, to signal that is finished some action (compilation or evaluation). data (dict): a dictionary that describes a Job instance. plus (tuple): the tuple (job_type, object_id, side_data=(priority, timestamp), shard_of_worker) """ # TODO - The next two comments are in the wrong place and # really little understandable anyway. # We notify the pool that the worker is free (even if it # replied with an error), but if the pool wants to disable the # worker, it's because it already assigned its job to someone # else, so we discard the data from the worker. job_type, object_id, side_data, shard = plus # If worker was ignored, do nothing. if self.pool.release_worker(shard): return job_success = True if error is not None: logger.error("Received error from Worker: `%s'." % error) job_success = False else: try: job = Job.import_from_dict_with_type(data) except: logger.error("[action_finished] Couldn't build Job for data" " %s." % (data)) job_success = False else: if not job.success: logger.error("Worker %s signaled action " "not successful." % shard) job_success = False _, timestamp = side_data logger.info("Action %s for submission %s completed. Success: %s." % (job_type, object_id, data["success"])) # We get the submission from DB and update it. with SessionGen(commit=False) as session: if job_type == EvaluationService.JOB_TYPE_COMPILATION: submission = Submission.get_from_id(object_id, session) if submission is None: logger.error("[action_finished] Couldn't find " "submission %d in the database." % object_id) return submission.compilation_tries += 1 if job_success: submission.compilation_outcome = "ok" if job.compilation_success else "fail" submission.compilation_text = job.text submission.compilation_shard = job.shard submission.compilation_sandbox = ":".join(job.sandboxes) for executable in job.executables.itervalues(): submission.executables[executable.filename] = executable session.add(executable) self.compilation_ended(submission) elif job_type == EvaluationService.JOB_TYPE_EVALUATION: submission = Submission.get_from_id(object_id, session) if submission is None: logger.error("[action_finished] Couldn't find " "submission %s in the database." % object_id) return submission.evaluation_tries += 1 if job_success: submission.evaluation_outcome = "ok" for test_number, info in job.evaluations.iteritems(): evaluation = Evaluation( text=info["text"], outcome=info["outcome"], num=test_number, memory_used=info["plus"].get("memory_used", None), execution_time=info["plus"].get("execution_time", None), execution_wall_clock_time=info["plus"].get("execution_wall_clock_time", None), evaluation_shard=job.shard, evaluation_sandbox=":".join(info["sandboxes"]), submission=submission, ) session.add(evaluation) self.evaluation_ended(submission) elif job_type == EvaluationService.JOB_TYPE_TEST_COMPILATION: user_test = UserTest.get_from_id(object_id, session) if user_test is None: logger.error("[action_finished] Couldn't find " "user test %d in the database." % object_id) return user_test.compilation_tries += 1 if job_success: user_test.compilation_outcome = "ok" if job.compilation_success else "fail" user_test.compilation_text = job.text user_test.compilation_shard = job.shard user_test.compilation_sandbox = ":".join(job.sandboxes) for executable in job.executables.itervalues(): ut_executable = UserTestExecutable.import_from_dict(executable.export_to_dict()) user_test.executables[ut_executable.filename] = ut_executable session.add(ut_executable) self.user_test_compilation_ended(user_test) elif job_type == EvaluationService.JOB_TYPE_TEST_EVALUATION: user_test = UserTest.get_from_id(object_id, session) if user_test is None: logger.error("[action_finished] Couldn't find " "user test %d in the database." % object_id) return user_test.evaluation_tries += 1 if job_success: try: [evaluation] = job.evaluations.values() except ValueError: logger.error( "[action_finished] I expected the job " "for a user test to contain a single " "evaluation, while instead it has %d." % (len(job.evaluations.values())) ) return user_test.evaluation_outcome = "ok" user_test.evaluation_shard = job.shard user_test.output = evaluation["output"] user_test.evaluation_text = evaluation["text"] user_test.evaluation_sandbox = ":".join(evaluation["sandboxes"]) user_test.memory_used = (evaluation["plus"].get("memory_used", None),) user_test.execution_time = (evaluation["plus"].get("execution_time", None),) self.user_test_evaluation_ended(user_test) else: logger.error("Invalid job type %r." % (job_type)) return session.commit()