def create_running_job_exe(agent_id='agent_1', job_type=None, job=None, node=None, timeout=None, input_file_size=10.0, queued=None, started=None, resources=None, priority=None, num_exes=1): """Creates a running job execution for unit testing :returns: The running job execution :rtype: :class:`job.execution.job_exe.RunningJobExecution` """ when = timezone.now() if not job: job = create_job(job_type=job_type, status='RUNNING', input_file_size=input_file_size, num_exes=num_exes) job_type = job.job_type # Configuration that occurs at queue time input_files = {} input_file_ids = job.get_job_data().get_input_file_ids() if input_file_ids: for input_file in ScaleFile.objects.get_files_for_queued_jobs(input_file_ids): input_files[input_file.id] = input_file exe_config = QueuedExecutionConfigurator(input_files).configure_queued_job(job) job_exe = JobExecution() job_exe.set_cluster_id('1234', job.id, job.num_exes) job_exe.job = job job_exe.job_type = job_type job_exe.exe_num = job.num_exes if not node: node = node_utils.create_node() job_exe.node = node if not timeout: timeout = job.timeout job_exe.timeout = timeout job_exe.input_file_size = input_file_size if not resources: resources = job.get_resources() job_exe.resources = resources.get_json().get_dict() job_exe.configuration = exe_config.get_dict() if not queued: queued = when job_exe.queued = queued if not started: started = when + datetime.timedelta(seconds=1) job_exe.started = started job_exe.save() if not priority: priority = job.priority # Configuration that occurs at schedule time workspaces = {} for workspace in Workspace.objects.all(): workspaces[workspace.name] = workspace secret_config = ScheduledExecutionConfigurator(workspaces).configure_scheduled_job(job_exe, job_type, job_type.get_job_interface(),'INFO') return RunningJobExecution(agent_id, job_exe, job_type, secret_config, priority)
def queue_jobs(self, jobs, requeue=False, priority=None): """Queues the given jobs. The caller must have obtained model locks on the job models in an atomic transaction. Any jobs that are not in a valid status for being queued, are without job input, or are superseded will be ignored. :param jobs: The job models to put on the queue :type jobs: list :param requeue: Whether this is a re-queue (True) or a first queue (False) :type requeue: bool :param priority: An optional argument to reset the jobs' priority when they are queued :type priority: int :returns: The list of job IDs that were successfully QUEUED :rtype: list """ when_queued = timezone.now() # Set job models to QUEUED queued_job_ids = Job.objects.update_jobs_to_queued(jobs, when_queued, requeue=requeue) if not queued_job_ids: return queued_job_ids # Done if nothing was queued # Retrieve the related job_type, job_type_rev, and batch models for the queued jobs queued_jobs = Job.objects.get_jobs_with_related(queued_job_ids) # Query for all input files of the queued jobs input_files = {} input_file_ids = set() for job in queued_jobs: input_file_ids.update(job.get_job_data().get_input_file_ids()) if input_file_ids: for input_file in ScaleFile.objects.get_files_for_queued_jobs( input_file_ids): input_files[input_file.id] = input_file # Bulk create queue models queues = [] configurator = QueuedExecutionConfigurator(input_files) for job in queued_jobs: config = configurator.configure_queued_job(job) manifest = None if JobInterfaceSunset.is_seed_dict(job.job_type.manifest): manifest = SeedManifest(job.job_type.manifest) if priority: queued_priority = priority elif job.priority: queued_priority = job.priority elif job.batch and self.batch.get_configuration().priority: queued_priority = self.batch.get_configuration().priority else: queued_priority = job.job_type.get_job_configuration().priority queue = Queue() queue.job_type_id = job.job_type_id queue.job_id = job.id queue.recipe_id = job.recipe_id queue.batch_id = job.batch_id queue.exe_num = job.num_exes queue.input_file_size = job.input_file_size if job.input_file_size else 0.0 queue.is_canceled = False queue.priority = queued_priority queue.timeout = manifest.get_timeout() if manifest else job.timeout queue.interface = job.get_job_interface().get_dict() queue.configuration = config.get_dict() queue.resources = job.get_resources().get_json().get_dict() queue.queued = when_queued queues.append(queue) if queues: self.bulk_create(queues) return queued_job_ids
def setUp(self): django.setup() cmd = 'command' cmd_args = 'run test' timeout = 60 workspace = storage_utils.create_workspace(base_url="http://test.com/") self.file_1 = storage_utils.create_file(workspace=workspace, file_path="path/1/file1.txt") self.file_2 = storage_utils.create_file(workspace=workspace, file_path="path/2/file2.txt") self.file_3 = storage_utils.create_file(workspace=workspace, file_path="path/3/file3.txt") input_files = { self.file_1.id: self.file_1, self.file_2.id: self.file_2, self.file_3.id: self.file_3 } manifest = job_utils.create_seed_manifest(command='command run test') imm = copy.deepcopy(manifest) imm['job']['jobVersion'] = '1.0.1' imm['job']['interface']['inputs']['files'].append( {'name': 'INPUT_METADATA_MANIFEST'}) self.seed_job_type = job_utils.create_seed_job_type(manifest=manifest) self.seed_job_type_metadata = job_utils.create_seed_job_type( manifest=imm) self.event = TriggerEvent.objects.create_trigger_event( 'TEST', None, {}, now()) self.seed_job = job_utils.create_job(job_type=self.seed_job_type, event=self.event, status='RUNNING') self.data_dict = { 'json': { 'input_1': 'my_val' }, 'files': { 'input_2': [self.file_1.id], 'input_3': [self.file_2.id, self.file_3.id] } } self.seed_job_meta = job_utils.create_job( job_type=self.seed_job_type_metadata, event=self.event, input=self.data_dict, status='RUNNING') config = { 'output_workspaces': { 'default': storage_utils.create_workspace().name } } self.seed_exe = job_utils.create_job_exe(job=self.seed_job, status='RUNNING', timeout=timeout, queued=now(), configuration=config) configurator = QueuedExecutionConfigurator(input_files) exe_config = configurator.configure_queued_job(self.seed_job_meta) self.seed_exe_meta = job_utils.create_job_exe( job=self.seed_job_meta, status='RUNNING', timeout=timeout, queued=now(), configuration=exe_config.get_dict())
def populate_queue(apps, schema_editor): from job.execution.configuration.json.exe_config import ExecutionConfiguration # Go through all of the queued job models and re-populate the queue table when_queued = now() Job = apps.get_model('job', 'Job') JobType = apps.get_model('job', 'JobType') JobExecution = apps.get_model('job', 'JobExecution') FileAncestryLink = apps.get_model('product', 'FileAncestryLink') Queue = apps.get_model('queue', 'Queue') ScaleFile = apps.get_model('storage', 'ScaleFile') Workspace = apps.get_model('storage', 'Workspace') # Attach needed methods to Job model Job.get_job_data = get_job_data Job.get_job_interface = job_get_job_interface Job.get_resources = job_get_resources # Attach needed methods to JobType model JobType.get_job_interface = job_type_get_job_interface JobType.get_resources = job_type_get_resources JobType.get_secrets_key = get_secrets_key total_count = Job.objects.filter(status='QUEUED').count() print 'Populating new queue table for %s queued jobs' % str( total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print 'Completed %s of %s jobs (%f%%)' % (done_count, total_count, percent) batch_end = done_count + batch_size job_qry = Job.objects.filter(status='QUEUED').select_related( 'job_type', 'job_type_rev') job_qry = job_qry.order_by('id')[done_count:batch_end] # Query for all input files input_files = {} input_file_ids = set() for job in job_qry: input_file_ids.update(job.get_job_data().get_input_file_ids()) if input_file_ids: qry = ScaleFile.objects.select_related('workspace').filter( id__in=input_file_ids) for input_file in qry.only('id', 'file_type', 'file_path', 'is_deleted', 'workspace__name').iterator(): input_files[input_file.id] = input_file # Bulk create queue models queues = [] configurator = QueuedExecutionConfigurator(input_files) for job in job_qry: config = configurator.configure_queued_job(job) queue = Queue() queue.job_type = job.job_type queue.job = job queue.exe_num = job.num_exes queue.input_file_size = job.disk_in_required if job.disk_in_required else 0.0 queue.is_canceled = False queue.priority = job.priority queue.timeout = job.timeout queue.interface = job.get_job_interface().get_dict() queue.configuration = config.get_dict() queue.resources = job.get_resources().get_json().get_dict() queue.queued = when_queued queues.append(queue) if not queues: return [] Queue.objects.bulk_create(queues) done_count += batch_size print 'All %s jobs completed' % str(total_count) total_count = JobExecution.objects.filter(status='QUEUED').count() print 'Updating file ancestry links for %s queued job executions' % str( total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print 'Completed %s of %s queued job executions (%f%%)' % ( done_count, total_count, percent) batch_end = done_count + batch_size job_exe_qry = JobExecution.objects.filter(status='QUEUED').defer( 'configuration', 'resources') job_exe_qry = job_exe_qry.order_by('id')[done_count:batch_end] job_exe_ids = [] for job_exe in job_exe_qry: job_exe_ids.append(job_exe.id) FileAncestryLink.objects.filter(job_exe_id__in=job_exe_ids).update( job_exe_id=None) done_count += batch_size print 'All file ancestry links for %s queued job executions completed' % str( total_count) print 'Deleting %s queued job executions...' % str(total_count) JobExecution.objects.filter(status='QUEUED').delete()