def queue_new_job(self, job_type, data, event, configuration=None): """Creates a new job for the given type and data. The new job is immediately placed on the queue. The new job, job_exe, and queue models are saved in the database in an atomic transaction. :param job_type: The type of the new job to create and queue :type job_type: :class:`job.models.JobType` :param data: The job data to run on :type data: :class:`job.configuration.data.job_data.JobData` :param event: The event that triggered the creation of this job :type event: :class:`trigger.models.TriggerEvent` :param configuration: The optional initial job configuration :type configuration: :class:`job.configuration.configuration.job_configuration.JobConfiguration` :returns: The new queued job :rtype: :class:`job.models.Job` :raises job.configuration.data.exceptions.InvalidData: If the job data is invalid """ job = Job.objects.create_job(job_type, event) if not configuration: configuration = JobConfiguration() job.configuration = configuration.get_dict() job.save() # No lock needed for this job since it doesn't exist outside this transaction yet Job.objects.populate_job_data(job, data) self._queue_jobs([job]) return job
def test_init_validation(self): """Tests the validation done in __init__""" # Try minimal acceptable configuration JobConfiguration() # Duplicate workspace name in pre-task config = {'pre_task': {'workspaces': [{'name': 'name1', 'mode': 'ro'}, {'name': 'name1', 'mode': 'ro'}]}, 'job_task': {'workspaces': []}} self.assertRaises(InvalidJobConfiguration, JobConfiguration, config) # Duplicate workspace name in job-task config = {'job_task': {'workspaces': [{'name': 'name1', 'mode': 'ro'}, {'name': 'name1', 'mode': 'ro'}]}} self.assertRaises(InvalidJobConfiguration, JobConfiguration, config) # Duplicate workspace name in post-task config = {'post_task': {'workspaces': [{'name': 'name1', 'mode': 'ro'}, {'name': 'name1', 'mode': 'ro'}]}, 'job_task': {'workspaces': []}} self.assertRaises(InvalidJobConfiguration, JobConfiguration, config)
def create_job_exe(job_type=None, job=None, status='RUNNING', configuration=None, error=None, command_arguments='test_arg', timeout=None, node=None, created=None, queued=None, started=None, pre_started=None, pre_completed=None, job_started=None, job_completed=None, post_started=None, post_completed=None, ended=None, last_modified=None): """Creates a job execution model for unit testing :returns: The job execution model :rtype: :class:`job.models.JobExecution` """ when = timezone.now() if not job: job = create_job(job_type=job_type) if not configuration: configuration = JobConfiguration().get_dict() if not timeout: timeout = job.timeout if not node: node = node_utils.create_node() if not created: created = when if not queued: queued = when if not started: started = when if not last_modified: last_modified = when return JobExecution.objects.create(job=job, status=status, error=error, configuration=configuration, command_arguments=command_arguments, timeout=timeout, node=node, created=created, queued=queued, started=started, pre_started=pre_started, pre_completed=pre_completed, job_started=job_started, job_completed=job_completed, post_started=post_started, post_completed=post_completed, ended=ended, last_modified=last_modified)
def populate_job_configuration(apps, schema_editor): from job.configuration.configuration.job_configuration import JobConfiguration, MODE_RO, MODE_RW from job.configuration.data.job_data import JobData # Go through all of the job models that have job data and populate their configuration Job = apps.get_model('job', 'Job') ScaleFile = apps.get_model('storage', 'ScaleFile') Workspace = apps.get_model('storage', 'Workspace') total_count = Job.objects.all().count() workspaces = {} for workspace in Workspace.objects.all().iterator(): workspaces[workspace.id] = workspace print 'Populating new configuration field for %s jobs' % str( total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print 'Completed %s of %s jobs (%f%%)' % (done_count, total_count, percent) batch_end = done_count + batch_size for job in Job.objects.select_related('job_type').order_by( 'id')[done_count:batch_end]: # Ignore jobs that don't have their job data populated yet if not job.data: continue data = JobData(job.data) input_file_ids = data.get_input_file_ids() input_files = ScaleFile.objects.filter( id__in=input_file_ids).select_related( 'workspace').iterator() input_workspaces = set() for input_file in input_files: input_workspaces.add(input_file.workspace.name) configuration = JobConfiguration() for name in input_workspaces: configuration.add_job_task_workspace(name, MODE_RO) if not job.job_type.is_system: for name in input_workspaces: configuration.add_pre_task_workspace(name, MODE_RO) # We add input workspaces to post task so it can perform a parse results move if requested by the # job's results manifest configuration.add_post_task_workspace(name, MODE_RW) for workspace_id in data.get_output_workspace_ids(): workspace = workspaces[workspace_id] if workspace.name not in input_workspaces: configuration.add_post_task_workspace( workspace.name, MODE_RW) elif job.job_type.name == 'scale-ingest': ingest_id = data.get_property_values(['Ingest ID' ])['Ingest ID'] from ingest.models import Ingest ingest = Ingest.objects.select_related('workspace').get( id=ingest_id) configuration.add_job_task_workspace( ingest.workspace.name, MODE_RW) job.configuration = configuration.get_dict() job.save() done_count += batch_size print 'All %s jobs completed' % str(total_count)