def populate_job_configuration(apps, schema_editor): from job.configuration.configuration.job_configuration import JobConfiguration, MODE_RO, MODE_RW from job.configuration.data.job_data import JobData # Go through all of the job models that have job data and populate their configuration Job = apps.get_model("job", "Job") ScaleFile = apps.get_model("storage", "ScaleFile") Workspace = apps.get_model("storage", "Workspace") total_count = Job.objects.all().count() workspaces = {} for workspace in Workspace.objects.all().iterator(): workspaces[workspace.id] = workspace print "Populating new configuration field for %s jobs" % str(total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print "Completed %s of %s jobs (%f%%)" % (done_count, total_count, percent) batch_end = done_count + batch_size for job in Job.objects.select_related("job_type").order_by("id")[done_count:batch_end]: # Ignore jobs that don't have their job data populated yet if not job.data: continue data = JobData(job.data) input_file_ids = data.get_input_file_ids() input_files = ScaleFile.objects.filter(id__in=input_file_ids).select_related("workspace").iterator() input_workspaces = set() for input_file in input_files: input_workspaces.add(input_file.workspace.name) configuration = JobConfiguration() for name in input_workspaces: configuration.add_job_task_workspace(name, MODE_RO) if not job.job_type.is_system: for name in input_workspaces: configuration.add_pre_task_workspace(name, MODE_RO) # We add input workspaces to post task so it can perform a parse results move if requested by the # job's results manifest configuration.add_post_task_workspace(name, MODE_RW) for workspace_id in data.get_output_workspace_ids(): workspace = workspaces[workspace_id] if workspace.name not in input_workspaces: configuration.add_post_task_workspace(workspace.name, MODE_RW) elif job.job_type.name == "scale-ingest": ingest_id = data.get_property_values(["Ingest ID"])["Ingest ID"] from ingest.models import Ingest ingest = Ingest.objects.select_related("workspace").get(id=ingest_id) configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW) job.configuration = configuration.get_dict() job.save() done_count += batch_size print "All %s jobs completed" % str(total_count)
def populate_job_configuration(apps, schema_editor): from job.execution.configuration.json.exe_config import ExecutionConfiguration, MODE_RO, MODE_RW from job.configuration.data.job_data import JobData # Go through all of the job models that have job data and populate their configuration Job = apps.get_model('job', 'Job') ScaleFile = apps.get_model('storage', 'ScaleFile') Workspace = apps.get_model('storage', 'Workspace') total_count = Job.objects.all().count() workspaces = {} for workspace in Workspace.objects.all().iterator(): workspaces[workspace.id] = workspace print 'Populating new configuration field for %s jobs' % str(total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print 'Completed %s of %s jobs (%f%%)' % (done_count, total_count, percent) batch_end = done_count + batch_size for job in Job.objects.select_related('job_type').order_by('id')[done_count:batch_end]: # Ignore jobs that don't have their job data populated yet if not job.data: continue data = JobData(job.data) input_file_ids = data.get_input_file_ids() input_files = ScaleFile.objects.filter(id__in=input_file_ids).select_related('workspace').iterator() input_workspaces = set() for input_file in input_files: input_workspaces.add(input_file.workspace.name) configuration = ExecutionConfiguration() for name in input_workspaces: configuration.add_job_task_workspace(name, MODE_RO) if not job.job_type.is_system: for name in input_workspaces: configuration.add_pre_task_workspace(name, MODE_RO) # We add input workspaces to post task so it can perform a parse results move if requested by the # job's results manifest configuration.add_post_task_workspace(name, MODE_RW) for workspace_id in data.get_output_workspace_ids(): workspace = workspaces[workspace_id] if workspace.name not in input_workspaces: configuration.add_post_task_workspace(workspace.name, MODE_RW) elif job.job_type.name == 'scale-ingest': ingest_id = data.get_property_values(['Ingest ID'])['Ingest ID'] from ingest.models import Ingest ingest = Ingest.objects.select_related('workspace').get(id=ingest_id) configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW) job.configuration = configuration.get_dict() job.save() done_count += batch_size print 'All %s jobs completed' % str(total_count)