def create_job_exe_model(self, framework_id, when): """Creates and returns a scheduled job execution model :param framework_id: The scheduling framework ID :type framework_id: string :param when: The start time :type when: :class:`datetime.datetime` :returns: The job execution model :rtype: :class:`job.models.JobExecution` """ job_exe = JobExecution() job_exe.job_id = self._queue.job_id job_exe.job_type_id = self._queue.job_type_id job_exe.recipe_id = self._queue.recipe_id job_exe.batch_id = self._queue.batch_id job_exe.exe_num = self._queue.exe_num job_exe.timeout = self._queue.timeout job_exe.input_file_size = self._queue.input_file_size job_exe.configuration = self.configuration.get_dict() job_exe.queued = self._queue.queued if self.is_canceled: job_exe.node_id = None job_exe.resources = NodeResources().get_json().get_dict() job_exe.started = None else: job_exe.node_id = self._scheduled_node_id job_exe.resources = self._scheduled_resources.get_json().get_dict() job_exe.started = when job_exe.set_cluster_id(framework_id, self._queue.job_id, self._queue.exe_num) return job_exe
def create_running_job_exe(agent_id='agent_1', job_type=None, job=None, node=None, timeout=None, input_file_size=10.0, queued=None, started=None, resources=None, priority=None, num_exes=1): """Creates a running job execution for unit testing :returns: The running job execution :rtype: :class:`job.execution.job_exe.RunningJobExecution` """ when = timezone.now() if not job: job = create_job(job_type=job_type, status='RUNNING', input_file_size=input_file_size, num_exes=num_exes) job_type = job.job_type # Configuration that occurs at queue time input_files = {} input_file_ids = job.get_job_data().get_input_file_ids() if input_file_ids: for input_file in ScaleFile.objects.get_files_for_queued_jobs(input_file_ids): input_files[input_file.id] = input_file exe_config = QueuedExecutionConfigurator(input_files).configure_queued_job(job) job_exe = JobExecution() job_exe.set_cluster_id('1234', job.id, job.num_exes) job_exe.job = job job_exe.job_type = job_type job_exe.exe_num = job.num_exes if not node: node = node_utils.create_node() job_exe.node = node if not timeout: timeout = job.timeout job_exe.timeout = timeout job_exe.input_file_size = input_file_size if not resources: resources = job.get_resources() job_exe.resources = resources.get_json().get_dict() job_exe.configuration = exe_config.get_dict() if not queued: queued = when job_exe.queued = queued if not started: started = when + datetime.timedelta(seconds=1) job_exe.started = started job_exe.save() if not priority: priority = job.priority # Configuration that occurs at schedule time workspaces = {} for workspace in Workspace.objects.all(): workspaces[workspace.name] = workspace secret_config = ScheduledExecutionConfigurator(workspaces).configure_scheduled_job(job_exe, job_type, job_type.get_job_interface(),'INFO') return RunningJobExecution(agent_id, job_exe, job_type, secret_config, priority)
def create_job_exe_model(self, framework_id, when): """Creates and returns a scheduled job execution model :param framework_id: The scheduling framework ID :type framework_id: string :param when: The start time :type when: :class:`datetime.datetime` :returns: The job execution model :rtype: :class:`job.models.JobExecution` """ job_exe = JobExecution() job_exe.job_id = self._queue.job_id job_exe.job_type_id = self._queue.job_type_id job_exe.recipe_id = self._queue.recipe_id job_exe.batch_id = self._queue.batch_id job_exe.exe_num = self._queue.exe_num job_exe.timeout = self._queue.timeout job_exe.docker_image = self._queue.docker_image job_exe.input_file_size = self._queue.input_file_size job_exe.configuration = self.configuration.get_dict() job_exe.queued = self._queue.queued if self.is_canceled: job_exe.node_id = None job_exe.resources = NodeResources().get_json().get_dict() job_exe.started = None else: job_exe.node_id = self._scheduled_node_id job_exe.resources = self._scheduled_resources.get_json().get_dict() job_exe.started = when job_exe.set_cluster_id(framework_id, self._queue.job_id, self._queue.exe_num) if self.required_resources.gpus > 0: if not GPUManager.assign_gpus_for_job(job_exe.node_id,job_exe.job_id, self.required_resources.gpus): logger.error("Job %s was unable to assign %s reserved GPUs on node %s. Note: this is not supposed to be able to happen. something has gone horribly wrong.", job_exe.job_id, self.required_resources.gpus, job_exe.node_id) return job_exe
def create_job_exe(job_type=None, job=None, exe_num=None, node=None, timeout=None, input_file_size=10.0, queued=None, started=None, status='RUNNING', error=None, ended=None, output=None, task_results=None): """Creates a job_exe model for unit testing, may also create job_exe_end and job_exe_output models depending on status :returns: The job_exe model :rtype: :class:`job.execution.job_exe.RunningJobExecution` """ when = timezone.now() if not job: job = create_job(job_type=job_type, status=status, input_file_size=input_file_size) job_type = job.job_type job_exe = JobExecution() job_exe.job = job job_exe.job_type = job_type if not exe_num: exe_num = job.num_exes job_exe.exe_num = exe_num job_exe.set_cluster_id('1234', job.id, job_exe.exe_num) if not node: node = node_utils.create_node() job_exe.node = node if not timeout: timeout = job.timeout job_exe.timeout = timeout job_exe.input_file_size = input_file_size job_exe.resources = job.get_resources().get_json().get_dict() job_exe.configuration = ExecutionConfiguration().get_dict() if not queued: queued = when job_exe.queued = queued if not started: started = when + datetime.timedelta(seconds=1) job_exe.started = started job_exe.save() if status in ['COMPLETED', 'FAILED', 'CANCELED']: job_exe_end = JobExecutionEnd() job_exe_end.job_exe_id = job_exe.id job_exe_end.job = job_exe.job job_exe_end.job_type = job_exe.job_type job_exe_end.exe_num = job_exe.exe_num if not task_results: task_results = TaskResults() job_exe_end.task_results = task_results.get_dict() job_exe_end.status = status if status == 'FAILED' and not error: error = error_test_utils.create_error() job_exe_end.error = error job_exe_end.node = node job_exe_end.queued = queued job_exe_end.started = started job_exe_end.seed_started = task_results.get_task_started('main') job_exe_end.seed_ended = task_results.get_task_ended('main') if not ended: ended = started + datetime.timedelta(seconds=1) job_exe_end.ended = ended job_exe_end.save() if status == 'COMPLETED' or output: job_exe_output = JobExecutionOutput() job_exe_output.job_exe_id = job_exe.id job_exe_output.job = job_exe.job job_exe_output.job_type = job_exe.job_type job_exe_output.exe_num = job_exe.exe_num if not output: output = JobResults() job_exe_output.output = output.get_dict() job_exe_output.save() return job_exe