Esempio n. 1
0
def create_running_job_exe(agent_id='agent_1', job_type=None, job=None, node=None, timeout=None, input_file_size=10.0,
                           queued=None, started=None, resources=None, priority=None, num_exes=1):
    """Creates a running job execution for unit testing

    :returns: The running job execution
    :rtype: :class:`job.execution.job_exe.RunningJobExecution`
    """

    when = timezone.now()
    if not job:
        job = create_job(job_type=job_type, status='RUNNING', input_file_size=input_file_size, num_exes=num_exes)
    job_type = job.job_type

    # Configuration that occurs at queue time
    input_files = {}
    input_file_ids = job.get_job_data().get_input_file_ids()
    if input_file_ids:
        for input_file in ScaleFile.objects.get_files_for_queued_jobs(input_file_ids):
            input_files[input_file.id] = input_file
    exe_config = QueuedExecutionConfigurator(input_files).configure_queued_job(job)

    job_exe = JobExecution()
    job_exe.set_cluster_id('1234', job.id, job.num_exes)
    job_exe.job = job
    job_exe.job_type = job_type
    job_exe.exe_num = job.num_exes
    if not node:
        node = node_utils.create_node()
    job_exe.node = node
    if not timeout:
        timeout = job.timeout
    job_exe.timeout = timeout
    job_exe.input_file_size = input_file_size
    if not resources:
        resources = job.get_resources()
    job_exe.resources = resources.get_json().get_dict()
    job_exe.configuration = exe_config.get_dict()
    if not queued:
        queued = when
    job_exe.queued = queued
    if not started:
        started = when + datetime.timedelta(seconds=1)
    job_exe.started = started
    job_exe.save()

    if not priority:
        priority = job.priority

    # Configuration that occurs at schedule time
    workspaces = {}
    for workspace in Workspace.objects.all():
        workspaces[workspace.name] = workspace
    secret_config = ScheduledExecutionConfigurator(workspaces).configure_scheduled_job(job_exe, job_type,
                                                                                       job_type.get_job_interface(),'INFO')
    return RunningJobExecution(agent_id, job_exe, job_type, secret_config, priority)
Esempio n. 2
0
    def queue_jobs(self, jobs, requeue=False, priority=None):
        """Queues the given jobs. The caller must have obtained model locks on the job models in an atomic transaction.
        Any jobs that are not in a valid status for being queued, are without job input, or are superseded will be
        ignored.

        :param jobs: The job models to put on the queue
        :type jobs: list
        :param requeue: Whether this is a re-queue (True) or a first queue (False)
        :type requeue: bool
        :param priority: An optional argument to reset the jobs' priority when they are queued
        :type priority: int
        :returns: The list of job IDs that were successfully QUEUED
        :rtype: list
        """

        when_queued = timezone.now()

        # Set job models to QUEUED
        queued_job_ids = Job.objects.update_jobs_to_queued(jobs,
                                                           when_queued,
                                                           requeue=requeue)
        if not queued_job_ids:
            return queued_job_ids  # Done if nothing was queued

        # Retrieve the related job_type, job_type_rev, and batch models for the queued jobs
        queued_jobs = Job.objects.get_jobs_with_related(queued_job_ids)

        # Query for all input files of the queued jobs
        input_files = {}
        input_file_ids = set()
        for job in queued_jobs:
            input_file_ids.update(job.get_job_data().get_input_file_ids())
        if input_file_ids:
            for input_file in ScaleFile.objects.get_files_for_queued_jobs(
                    input_file_ids):
                input_files[input_file.id] = input_file

        # Bulk create queue models
        queues = []
        configurator = QueuedExecutionConfigurator(input_files)
        for job in queued_jobs:
            config = configurator.configure_queued_job(job)

            manifest = None
            if JobInterfaceSunset.is_seed_dict(job.job_type.manifest):
                manifest = SeedManifest(job.job_type.manifest)

            if priority:
                queued_priority = priority
            elif job.priority:
                queued_priority = job.priority
            elif job.batch and self.batch.get_configuration().priority:
                queued_priority = self.batch.get_configuration().priority
            else:
                queued_priority = job.job_type.get_job_configuration().priority

            queue = Queue()
            queue.job_type_id = job.job_type_id
            queue.job_id = job.id
            queue.recipe_id = job.recipe_id
            queue.batch_id = job.batch_id
            queue.exe_num = job.num_exes
            queue.input_file_size = job.input_file_size if job.input_file_size else 0.0
            queue.is_canceled = False
            queue.priority = queued_priority
            queue.timeout = manifest.get_timeout() if manifest else job.timeout
            queue.interface = job.get_job_interface().get_dict()
            queue.configuration = config.get_dict()
            queue.resources = job.get_resources().get_json().get_dict()
            queue.queued = when_queued
            queues.append(queue)

        if queues:
            self.bulk_create(queues)

        return queued_job_ids
Esempio n. 3
0
    def setUp(self):
        django.setup()

        cmd = 'command'
        cmd_args = 'run test'
        timeout = 60

        workspace = storage_utils.create_workspace(base_url="http://test.com/")
        self.file_1 = storage_utils.create_file(workspace=workspace,
                                                file_path="path/1/file1.txt")
        self.file_2 = storage_utils.create_file(workspace=workspace,
                                                file_path="path/2/file2.txt")
        self.file_3 = storage_utils.create_file(workspace=workspace,
                                                file_path="path/3/file3.txt")
        input_files = {
            self.file_1.id: self.file_1,
            self.file_2.id: self.file_2,
            self.file_3.id: self.file_3
        }

        manifest = job_utils.create_seed_manifest(command='command run test')
        imm = copy.deepcopy(manifest)
        imm['job']['jobVersion'] = '1.0.1'
        imm['job']['interface']['inputs']['files'].append(
            {'name': 'INPUT_METADATA_MANIFEST'})

        self.seed_job_type = job_utils.create_seed_job_type(manifest=manifest)
        self.seed_job_type_metadata = job_utils.create_seed_job_type(
            manifest=imm)
        self.event = TriggerEvent.objects.create_trigger_event(
            'TEST', None, {}, now())
        self.seed_job = job_utils.create_job(job_type=self.seed_job_type,
                                             event=self.event,
                                             status='RUNNING')

        self.data_dict = {
            'json': {
                'input_1': 'my_val'
            },
            'files': {
                'input_2': [self.file_1.id],
                'input_3': [self.file_2.id, self.file_3.id]
            }
        }
        self.seed_job_meta = job_utils.create_job(
            job_type=self.seed_job_type_metadata,
            event=self.event,
            input=self.data_dict,
            status='RUNNING')
        config = {
            'output_workspaces': {
                'default': storage_utils.create_workspace().name
            }
        }
        self.seed_exe = job_utils.create_job_exe(job=self.seed_job,
                                                 status='RUNNING',
                                                 timeout=timeout,
                                                 queued=now(),
                                                 configuration=config)

        configurator = QueuedExecutionConfigurator(input_files)
        exe_config = configurator.configure_queued_job(self.seed_job_meta)
        self.seed_exe_meta = job_utils.create_job_exe(
            job=self.seed_job_meta,
            status='RUNNING',
            timeout=timeout,
            queued=now(),
            configuration=exe_config.get_dict())
Esempio n. 4
0
    def populate_queue(apps, schema_editor):
        from job.execution.configuration.json.exe_config import ExecutionConfiguration

        # Go through all of the queued job models and re-populate the queue table
        when_queued = now()
        Job = apps.get_model('job', 'Job')
        JobType = apps.get_model('job', 'JobType')
        JobExecution = apps.get_model('job', 'JobExecution')
        FileAncestryLink = apps.get_model('product', 'FileAncestryLink')
        Queue = apps.get_model('queue', 'Queue')
        ScaleFile = apps.get_model('storage', 'ScaleFile')
        Workspace = apps.get_model('storage', 'Workspace')

        # Attach needed methods to Job model
        Job.get_job_data = get_job_data
        Job.get_job_interface = job_get_job_interface
        Job.get_resources = job_get_resources

        # Attach needed methods to JobType model
        JobType.get_job_interface = job_type_get_job_interface
        JobType.get_resources = job_type_get_resources
        JobType.get_secrets_key = get_secrets_key

        total_count = Job.objects.filter(status='QUEUED').count()
        print 'Populating new queue table for %s queued jobs' % str(
            total_count)
        done_count = 0
        batch_size = 1000
        while done_count < total_count:
            percent = (float(done_count) / float(total_count)) * 100.00
            print 'Completed %s of %s jobs (%f%%)' % (done_count, total_count,
                                                      percent)
            batch_end = done_count + batch_size
            job_qry = Job.objects.filter(status='QUEUED').select_related(
                'job_type', 'job_type_rev')
            job_qry = job_qry.order_by('id')[done_count:batch_end]

            # Query for all input files
            input_files = {}
            input_file_ids = set()
            for job in job_qry:
                input_file_ids.update(job.get_job_data().get_input_file_ids())

            if input_file_ids:
                qry = ScaleFile.objects.select_related('workspace').filter(
                    id__in=input_file_ids)
                for input_file in qry.only('id', 'file_type', 'file_path',
                                           'is_deleted',
                                           'workspace__name').iterator():
                    input_files[input_file.id] = input_file

            # Bulk create queue models
            queues = []
            configurator = QueuedExecutionConfigurator(input_files)
            for job in job_qry:
                config = configurator.configure_queued_job(job)

                queue = Queue()
                queue.job_type = job.job_type
                queue.job = job
                queue.exe_num = job.num_exes
                queue.input_file_size = job.disk_in_required if job.disk_in_required else 0.0
                queue.is_canceled = False
                queue.priority = job.priority
                queue.timeout = job.timeout
                queue.interface = job.get_job_interface().get_dict()
                queue.configuration = config.get_dict()
                queue.resources = job.get_resources().get_json().get_dict()
                queue.queued = when_queued
                queues.append(queue)

            if not queues:
                return []

            Queue.objects.bulk_create(queues)
            done_count += batch_size
        print 'All %s jobs completed' % str(total_count)

        total_count = JobExecution.objects.filter(status='QUEUED').count()
        print 'Updating file ancestry links for %s queued job executions' % str(
            total_count)
        done_count = 0
        batch_size = 1000
        while done_count < total_count:
            percent = (float(done_count) / float(total_count)) * 100.00
            print 'Completed %s of %s queued job executions (%f%%)' % (
                done_count, total_count, percent)
            batch_end = done_count + batch_size
            job_exe_qry = JobExecution.objects.filter(status='QUEUED').defer(
                'configuration', 'resources')
            job_exe_qry = job_exe_qry.order_by('id')[done_count:batch_end]

            job_exe_ids = []
            for job_exe in job_exe_qry:
                job_exe_ids.append(job_exe.id)

            FileAncestryLink.objects.filter(job_exe_id__in=job_exe_ids).update(
                job_exe_id=None)

            done_count += batch_size
        print 'All file ancestry links for %s queued job executions completed' % str(
            total_count)
        print 'Deleting %s queued job executions...' % str(total_count)
        JobExecution.objects.filter(status='QUEUED').delete()