def _start_ingest_task(self, ingest): """Starts a task for the given ingest in an atomic transaction :param ingest: The ingest model :type ingest: :class:`ingest.models.Ingest` """ logger.info('Creating ingest task for %s', ingest.file_name) # Create new ingest job and mark ingest as QUEUED ingest_job_type = Ingest.objects.get_ingest_job_type() data = JobData() data.add_property_input('Ingest ID', str(ingest.id)) desc = {'strike_id': self.strike_id, 'file_name': ingest.file_name} when = ingest.transfer_ended if ingest.transfer_ended else now() event = TriggerEvent.objects.create_trigger_event('STRIKE_TRANSFER', None, desc, when) job_configuration = JobConfiguration() if ingest.workspace: job_configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW) if ingest.new_workspace: job_configuration.add_job_task_workspace(ingest.new_workspace.name, MODE_RW) ingest_job = Queue.objects.queue_new_job(ingest_job_type, data, event, job_configuration) ingest.job = ingest_job ingest.status = 'QUEUED' ingest.save() logger.info('Successfully created ingest task for %s', ingest.file_name)
def queue_new_job(self, job_type, data, event, configuration=None): """Creates a new job for the given type and data. The new job is immediately placed on the queue. The new job, job_exe, and queue models are saved in the database in an atomic transaction. :param job_type: The type of the new job to create and queue :type job_type: :class:`job.models.JobType` :param data: The job data to run on :type data: :class:`job.configuration.data.job_data.JobData` :param event: The event that triggered the creation of this job :type event: :class:`trigger.models.TriggerEvent` :param configuration: The optional initial job configuration :type configuration: :class:`job.configuration.configuration.job_configuration.JobConfiguration` :returns: The new queued job :rtype: :class:`job.models.Job` :raises job.configuration.data.exceptions.InvalidData: If the job data is invalid """ job = Job.objects.create_job(job_type, event) if not configuration: configuration = JobConfiguration() job.configuration = configuration.get_dict() job.save() # No lock needed for this job since it doesn't exist outside this transaction yet Job.objects.populate_job_data(job, data) self._queue_jobs([job]) return job
def test_populate_default_job_settings(self): """Tests the addition of default settings to the configuration.""" job_config = JobConfiguration() config_dict = { 'version': '1.0', 'default_settings': { 'setting_name': 'some_val', 'setting2': 'other_val' } } job_exe = MagicMock() job_exe.get_job_type_configuration.return_value = JobTypeConfiguration(config_dict) job_config.populate_default_job_settings(job_exe) populated_config = job_config.get_dict() populated_settings = populated_config['job_task']['settings'] populated_setting_values = [x.values() for x in populated_settings] results_dict = {x[0]: x[1] for x in populated_setting_values} self.assertTrue(results_dict == config_dict['default_settings'])
def test_init_validation(self): """Tests the validation done in __init__""" # Try minimal acceptable configuration JobConfiguration() # Duplicate workspace name in pre-task config = {'pre_task': {'workspaces': [{'name': 'name1', 'mode': 'ro'}, {'name': 'name1', 'mode': 'ro'}]}, 'job_task': {'workspaces': []}} self.assertRaises(InvalidJobConfiguration, JobConfiguration, config) # Duplicate workspace name in job-task config = {'job_task': {'workspaces': [{'name': 'name1', 'mode': 'ro'}, {'name': 'name1', 'mode': 'ro'}]}} self.assertRaises(InvalidJobConfiguration, JobConfiguration, config) # Duplicate workspace name in post-task config = {'post_task': {'workspaces': [{'name': 'name1', 'mode': 'ro'}, {'name': 'name1', 'mode': 'ro'}]}, 'job_task': {'workspaces': []}} self.assertRaises(InvalidJobConfiguration, JobConfiguration, config)
def create_job_exe(job_type=None, job=None, status='RUNNING', configuration=None, error=None, command_arguments='test_arg', timeout=None, node=None, created=None, queued=None, started=None, pre_started=None, pre_completed=None, job_started=None, job_completed=None, post_started=None, post_completed=None, ended=None, last_modified=None): """Creates a job execution model for unit testing :returns: The job execution model :rtype: :class:`job.models.JobExecution` """ when = timezone.now() if not job: job = create_job(job_type=job_type) if not configuration: configuration = JobConfiguration().get_dict() if not timeout: timeout = job.timeout if not node: node = node_utils.create_node() if not created: created = when if not queued: queued = when if not started: started = when if not last_modified: last_modified = when return JobExecution.objects.create(job=job, status=status, error=error, configuration=configuration, command_arguments=command_arguments, timeout=timeout, node=node, created=created, queued=queued, started=started, pre_started=pre_started, pre_completed=pre_completed, job_started=job_started, job_completed=job_completed, post_started=post_started, post_completed=post_completed, ended=ended, last_modified=last_modified)
def test_successful(self, mock_get_dict): """Tests calling JobConfiguration.update() successfully.""" mock_get_dict.return_value = self.job_configuration_dict job_configuration = JobConfiguration.convert_configuration(self.job_configuration_dict) self.assertEqual(job_configuration['version'], '1.1') self.assertFalse(job_configuration['job_task']['settings'])
def populate_job_configuration(apps, schema_editor): from job.configuration.configuration.job_configuration import JobConfiguration, MODE_RO, MODE_RW from job.configuration.data.job_data import JobData # Go through all of the job models that have job data and populate their configuration Job = apps.get_model('job', 'Job') ScaleFile = apps.get_model('storage', 'ScaleFile') Workspace = apps.get_model('storage', 'Workspace') total_count = Job.objects.all().count() workspaces = {} for workspace in Workspace.objects.all().iterator(): workspaces[workspace.id] = workspace print 'Populating new configuration field for %s jobs' % str( total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print 'Completed %s of %s jobs (%f%%)' % (done_count, total_count, percent) batch_end = done_count + batch_size for job in Job.objects.select_related('job_type').order_by( 'id')[done_count:batch_end]: # Ignore jobs that don't have their job data populated yet if not job.data: continue data = JobData(job.data) input_file_ids = data.get_input_file_ids() input_files = ScaleFile.objects.filter( id__in=input_file_ids).select_related( 'workspace').iterator() input_workspaces = set() for input_file in input_files: input_workspaces.add(input_file.workspace.name) configuration = JobConfiguration() for name in input_workspaces: configuration.add_job_task_workspace(name, MODE_RO) if not job.job_type.is_system: for name in input_workspaces: configuration.add_pre_task_workspace(name, MODE_RO) # We add input workspaces to post task so it can perform a parse results move if requested by the # job's results manifest configuration.add_post_task_workspace(name, MODE_RW) for workspace_id in data.get_output_workspace_ids(): workspace = workspaces[workspace_id] if workspace.name not in input_workspaces: configuration.add_post_task_workspace( workspace.name, MODE_RW) elif job.job_type.name == 'scale-ingest': ingest_id = data.get_property_values(['Ingest ID' ])['Ingest ID'] from ingest.models import Ingest ingest = Ingest.objects.select_related('workspace').get( id=ingest_id) configuration.add_job_task_workspace( ingest.workspace.name, MODE_RW) job.configuration = configuration.get_dict() job.save() done_count += batch_size print 'All %s jobs completed' % str(total_count)
def populate_job_configuration(apps, schema_editor): from job.configuration.configuration.job_configuration import JobConfiguration, MODE_RO, MODE_RW from job.configuration.data.job_data import JobData # Go through all of the job models that have job data and populate their configuration Job = apps.get_model("job", "Job") ScaleFile = apps.get_model("storage", "ScaleFile") Workspace = apps.get_model("storage", "Workspace") total_count = Job.objects.all().count() workspaces = {} for workspace in Workspace.objects.all().iterator(): workspaces[workspace.id] = workspace print "Populating new configuration field for %s jobs" % str(total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print "Completed %s of %s jobs (%f%%)" % (done_count, total_count, percent) batch_end = done_count + batch_size for job in Job.objects.select_related("job_type").order_by("id")[done_count:batch_end]: # Ignore jobs that don't have their job data populated yet if not job.data: continue data = JobData(job.data) input_file_ids = data.get_input_file_ids() input_files = ScaleFile.objects.filter(id__in=input_file_ids).select_related("workspace").iterator() input_workspaces = set() for input_file in input_files: input_workspaces.add(input_file.workspace.name) configuration = JobConfiguration() for name in input_workspaces: configuration.add_job_task_workspace(name, MODE_RO) if not job.job_type.is_system: for name in input_workspaces: configuration.add_pre_task_workspace(name, MODE_RO) # We add input workspaces to post task so it can perform a parse results move if requested by the # job's results manifest configuration.add_post_task_workspace(name, MODE_RW) for workspace_id in data.get_output_workspace_ids(): workspace = workspaces[workspace_id] if workspace.name not in input_workspaces: configuration.add_post_task_workspace(workspace.name, MODE_RW) elif job.job_type.name == "scale-ingest": ingest_id = data.get_property_values(["Ingest ID"])["Ingest ID"] from ingest.models import Ingest ingest = Ingest.objects.select_related("workspace").get(id=ingest_id) configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW) job.configuration = configuration.get_dict() job.save() done_count += batch_size print "All %s jobs completed" % str(total_count)