def queue_new_job(self, job_type, data, event, configuration=None): """Creates a new job for the given type and data. The new job is immediately placed on the queue. The new job, job_exe, and queue models are saved in the database in an atomic transaction. :param job_type: The type of the new job to create and queue :type job_type: :class:`job.models.JobType` :param data: The job data to run on :type data: :class:`job.configuration.data.job_data.JobData` :param event: The event that triggered the creation of this job :type event: :class:`trigger.models.TriggerEvent` :param configuration: The optional initial job configuration :type configuration: :class:`job.configuration.configuration.job_configuration.JobConfiguration` :returns: The new queued job :rtype: :class:`job.models.Job` :raises job.configuration.data.exceptions.InvalidData: If the job data is invalid """ job = Job.objects.create_job(job_type, event) if not configuration: configuration = JobConfiguration() job.configuration = configuration.get_dict() job.save() # No lock needed for this job since it doesn't exist outside this transaction yet Job.objects.populate_job_data(job, data) self._queue_jobs([job]) return job
def queue_new_job(self, job_type, data, event, configuration=None): """Creates a new job for the given type and data. The new job is immediately placed on the queue. The new job, job_exe, and queue models are saved in the database in an atomic transaction. :param job_type: The type of the new job to create and queue :type job_type: :class:`job.models.JobType` :param data: The job data to run on :type data: :class:`job.configuration.data.job_data.JobData` :param event: The event that triggered the creation of this job :type event: :class:`trigger.models.TriggerEvent` :param configuration: The optional initial job configuration :type configuration: :class:`job.configuration.configuration.job_configuration.JobConfiguration` :returns: The new queued job :rtype: :class:`job.models.Job` :raises job.configuration.data.exceptions.InvalidData: If the job data is invalid """ job = Job.objects.create_job(job_type, event) if not configuration: configuration = JobConfiguration() job.configuration = configuration.get_dict() job.save() # No lock needed for this job since it doesn't exist outside this transaction yet Job.objects.populate_job_data(job, data) self._queue_jobs([job]) return job
def test_populate_default_job_settings(self): """Tests the addition of default settings to the configuration.""" job_config = JobConfiguration() config_dict = { 'version': '1.0', 'default_settings': { 'setting_name': 'some_val', 'setting2': 'other_val' } } job_exe = MagicMock() job_exe.get_job_type_configuration.return_value = JobTypeConfiguration(config_dict) job_config.populate_default_job_settings(job_exe) populated_config = job_config.get_dict() populated_settings = populated_config['job_task']['settings'] populated_setting_values = [x.values() for x in populated_settings] results_dict = {x[0]: x[1] for x in populated_setting_values} self.assertTrue(results_dict == config_dict['default_settings'])
def populate_job_configuration(apps, schema_editor): from job.configuration.configuration.job_configuration import JobConfiguration, MODE_RO, MODE_RW from job.configuration.data.job_data import JobData # Go through all of the job models that have job data and populate their configuration Job = apps.get_model("job", "Job") ScaleFile = apps.get_model("storage", "ScaleFile") Workspace = apps.get_model("storage", "Workspace") total_count = Job.objects.all().count() workspaces = {} for workspace in Workspace.objects.all().iterator(): workspaces[workspace.id] = workspace print "Populating new configuration field for %s jobs" % str(total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print "Completed %s of %s jobs (%f%%)" % (done_count, total_count, percent) batch_end = done_count + batch_size for job in Job.objects.select_related("job_type").order_by("id")[done_count:batch_end]: # Ignore jobs that don't have their job data populated yet if not job.data: continue data = JobData(job.data) input_file_ids = data.get_input_file_ids() input_files = ScaleFile.objects.filter(id__in=input_file_ids).select_related("workspace").iterator() input_workspaces = set() for input_file in input_files: input_workspaces.add(input_file.workspace.name) configuration = JobConfiguration() for name in input_workspaces: configuration.add_job_task_workspace(name, MODE_RO) if not job.job_type.is_system: for name in input_workspaces: configuration.add_pre_task_workspace(name, MODE_RO) # We add input workspaces to post task so it can perform a parse results move if requested by the # job's results manifest configuration.add_post_task_workspace(name, MODE_RW) for workspace_id in data.get_output_workspace_ids(): workspace = workspaces[workspace_id] if workspace.name not in input_workspaces: configuration.add_post_task_workspace(workspace.name, MODE_RW) elif job.job_type.name == "scale-ingest": ingest_id = data.get_property_values(["Ingest ID"])["Ingest ID"] from ingest.models import Ingest ingest = Ingest.objects.select_related("workspace").get(id=ingest_id) configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW) job.configuration = configuration.get_dict() job.save() done_count += batch_size print "All %s jobs completed" % str(total_count)
def populate_job_configuration(apps, schema_editor): from job.configuration.configuration.job_configuration import JobConfiguration, MODE_RO, MODE_RW from job.configuration.data.job_data import JobData # Go through all of the job models that have job data and populate their configuration Job = apps.get_model('job', 'Job') ScaleFile = apps.get_model('storage', 'ScaleFile') Workspace = apps.get_model('storage', 'Workspace') total_count = Job.objects.all().count() workspaces = {} for workspace in Workspace.objects.all().iterator(): workspaces[workspace.id] = workspace print 'Populating new configuration field for %s jobs' % str( total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print 'Completed %s of %s jobs (%f%%)' % (done_count, total_count, percent) batch_end = done_count + batch_size for job in Job.objects.select_related('job_type').order_by( 'id')[done_count:batch_end]: # Ignore jobs that don't have their job data populated yet if not job.data: continue data = JobData(job.data) input_file_ids = data.get_input_file_ids() input_files = ScaleFile.objects.filter( id__in=input_file_ids).select_related( 'workspace').iterator() input_workspaces = set() for input_file in input_files: input_workspaces.add(input_file.workspace.name) configuration = JobConfiguration() for name in input_workspaces: configuration.add_job_task_workspace(name, MODE_RO) if not job.job_type.is_system: for name in input_workspaces: configuration.add_pre_task_workspace(name, MODE_RO) # We add input workspaces to post task so it can perform a parse results move if requested by the # job's results manifest configuration.add_post_task_workspace(name, MODE_RW) for workspace_id in data.get_output_workspace_ids(): workspace = workspaces[workspace_id] if workspace.name not in input_workspaces: configuration.add_post_task_workspace( workspace.name, MODE_RW) elif job.job_type.name == 'scale-ingest': ingest_id = data.get_property_values(['Ingest ID' ])['Ingest ID'] from ingest.models import Ingest ingest = Ingest.objects.select_related('workspace').get( id=ingest_id) configuration.add_job_task_workspace( ingest.workspace.name, MODE_RW) job.configuration = configuration.get_dict() job.save() done_count += batch_size print 'All %s jobs completed' % str(total_count)