Example #1
0
    def create_job_exe_model(self, framework_id, when):
        """Creates and returns a scheduled job execution model

        :param framework_id: The scheduling framework ID
        :type framework_id: string
        :param when: The start time
        :type when: :class:`datetime.datetime`
        :returns: The job execution model
        :rtype: :class:`job.models.JobExecution`
        """

        job_exe = JobExecution()
        job_exe.job_id = self._queue.job_id
        job_exe.job_type_id = self._queue.job_type_id
        job_exe.recipe_id = self._queue.recipe_id
        job_exe.batch_id = self._queue.batch_id
        job_exe.exe_num = self._queue.exe_num
        job_exe.timeout = self._queue.timeout
        job_exe.input_file_size = self._queue.input_file_size
        job_exe.configuration = self.configuration.get_dict()
        job_exe.queued = self._queue.queued

        if self.is_canceled:
            job_exe.node_id = None
            job_exe.resources = NodeResources().get_json().get_dict()
            job_exe.started = None
        else:
            job_exe.node_id = self._scheduled_node_id
            job_exe.resources = self._scheduled_resources.get_json().get_dict()
            job_exe.started = when

        job_exe.set_cluster_id(framework_id, self._queue.job_id,
                               self._queue.exe_num)

        return job_exe
Example #2
0
def create_running_job_exe(agent_id='agent_1', job_type=None, job=None, node=None, timeout=None, input_file_size=10.0,
                           queued=None, started=None, resources=None, priority=None, num_exes=1):
    """Creates a running job execution for unit testing

    :returns: The running job execution
    :rtype: :class:`job.execution.job_exe.RunningJobExecution`
    """

    when = timezone.now()
    if not job:
        job = create_job(job_type=job_type, status='RUNNING', input_file_size=input_file_size, num_exes=num_exes)
    job_type = job.job_type

    # Configuration that occurs at queue time
    input_files = {}
    input_file_ids = job.get_job_data().get_input_file_ids()
    if input_file_ids:
        for input_file in ScaleFile.objects.get_files_for_queued_jobs(input_file_ids):
            input_files[input_file.id] = input_file
    exe_config = QueuedExecutionConfigurator(input_files).configure_queued_job(job)

    job_exe = JobExecution()
    job_exe.set_cluster_id('1234', job.id, job.num_exes)
    job_exe.job = job
    job_exe.job_type = job_type
    job_exe.exe_num = job.num_exes
    if not node:
        node = node_utils.create_node()
    job_exe.node = node
    if not timeout:
        timeout = job.timeout
    job_exe.timeout = timeout
    job_exe.input_file_size = input_file_size
    if not resources:
        resources = job.get_resources()
    job_exe.resources = resources.get_json().get_dict()
    job_exe.configuration = exe_config.get_dict()
    if not queued:
        queued = when
    job_exe.queued = queued
    if not started:
        started = when + datetime.timedelta(seconds=1)
    job_exe.started = started
    job_exe.save()

    if not priority:
        priority = job.priority

    # Configuration that occurs at schedule time
    workspaces = {}
    for workspace in Workspace.objects.all():
        workspaces[workspace.name] = workspace
    secret_config = ScheduledExecutionConfigurator(workspaces).configure_scheduled_job(job_exe, job_type,
                                                                                       job_type.get_job_interface(),'INFO')
    return RunningJobExecution(agent_id, job_exe, job_type, secret_config, priority)
Example #3
0
    def create_job_exe_model(self, framework_id, when):
        """Creates and returns a scheduled job execution model

        :param framework_id: The scheduling framework ID
        :type framework_id: string
        :param when: The start time
        :type when: :class:`datetime.datetime`
        :returns: The job execution model
        :rtype: :class:`job.models.JobExecution`
        """

        job_exe = JobExecution()
        job_exe.job_id = self._queue.job_id
        job_exe.job_type_id = self._queue.job_type_id
        job_exe.recipe_id = self._queue.recipe_id
        job_exe.batch_id = self._queue.batch_id
        job_exe.exe_num = self._queue.exe_num
        job_exe.timeout = self._queue.timeout
        job_exe.docker_image = self._queue.docker_image
        job_exe.input_file_size = self._queue.input_file_size
        job_exe.configuration = self.configuration.get_dict()
        job_exe.queued = self._queue.queued

        if self.is_canceled:
            job_exe.node_id = None
            job_exe.resources = NodeResources().get_json().get_dict()
            job_exe.started = None
        else:
            job_exe.node_id = self._scheduled_node_id
            job_exe.resources = self._scheduled_resources.get_json().get_dict()
            job_exe.started = when

        job_exe.set_cluster_id(framework_id, self._queue.job_id, self._queue.exe_num)

        if self.required_resources.gpus > 0:
            if not GPUManager.assign_gpus_for_job(job_exe.node_id,job_exe.job_id, self.required_resources.gpus):
                logger.error("Job %s was unable to assign %s reserved GPUs on node %s. Note: this is not supposed to be able to happen. something has gone horribly wrong.", job_exe.job_id, self.required_resources.gpus, job_exe.node_id)

        return job_exe
Example #4
0
def create_job_exe(job_type=None,
                   job=None,
                   exe_num=None,
                   node=None,
                   timeout=None,
                   input_file_size=10.0,
                   queued=None,
                   started=None,
                   status='RUNNING',
                   error=None,
                   ended=None,
                   output=None,
                   task_results=None):
    """Creates a job_exe model for unit testing, may also create job_exe_end and job_exe_output models depending on
    status

    :returns: The job_exe model
    :rtype: :class:`job.execution.job_exe.RunningJobExecution`
    """

    when = timezone.now()
    if not job:
        job = create_job(job_type=job_type,
                         status=status,
                         input_file_size=input_file_size)
    job_type = job.job_type

    job_exe = JobExecution()
    job_exe.job = job
    job_exe.job_type = job_type
    if not exe_num:
        exe_num = job.num_exes
    job_exe.exe_num = exe_num
    job_exe.set_cluster_id('1234', job.id, job_exe.exe_num)
    if not node:
        node = node_utils.create_node()
    job_exe.node = node
    if not timeout:
        timeout = job.timeout
    job_exe.timeout = timeout
    job_exe.input_file_size = input_file_size
    job_exe.resources = job.get_resources().get_json().get_dict()
    job_exe.configuration = ExecutionConfiguration().get_dict()
    if not queued:
        queued = when
    job_exe.queued = queued
    if not started:
        started = when + datetime.timedelta(seconds=1)
    job_exe.started = started
    job_exe.save()

    if status in ['COMPLETED', 'FAILED', 'CANCELED']:
        job_exe_end = JobExecutionEnd()
        job_exe_end.job_exe_id = job_exe.id
        job_exe_end.job = job_exe.job
        job_exe_end.job_type = job_exe.job_type
        job_exe_end.exe_num = job_exe.exe_num
        if not task_results:
            task_results = TaskResults()
        job_exe_end.task_results = task_results.get_dict()
        job_exe_end.status = status
        if status == 'FAILED' and not error:
            error = error_test_utils.create_error()
        job_exe_end.error = error
        job_exe_end.node = node
        job_exe_end.queued = queued
        job_exe_end.started = started
        job_exe_end.seed_started = task_results.get_task_started('main')
        job_exe_end.seed_ended = task_results.get_task_ended('main')
        if not ended:
            ended = started + datetime.timedelta(seconds=1)
        job_exe_end.ended = ended
        job_exe_end.save()

    if status == 'COMPLETED' or output:
        job_exe_output = JobExecutionOutput()
        job_exe_output.job_exe_id = job_exe.id
        job_exe_output.job = job_exe.job
        job_exe_output.job_type = job_exe.job_type
        job_exe_output.exe_num = job_exe.exe_num
        if not output:
            output = JobResults()
        job_exe_output.output = output.get_dict()
        job_exe_output.save()

    return job_exe