def test_convert_1_1_to_current(self): """Tests converting execution configuration 1.1 to current""" old_dict = { 'version': '1.1', 'job_task': { 'settings': [{ 'name': 'setting_1', 'value': 'value_1' }], 'workspaces': [{ 'name': 'name1', 'mode': 'ro' }] } } exe_config = ExecutionConfiguration(old_dict) new_dict = exe_config.get_dict() self.assertEqual(new_dict['version'], '2.0') self.assertEqual(3, len(new_dict['tasks']) ) # Version 1.1 will auto-create pre and post tasks self.assertEqual('main', new_dict['tasks'][1]['type']) self.assertEqual(1, len(new_dict['tasks'][1]['settings'])) self.assertEqual('value_1', new_dict['tasks'][1]['settings']['setting_1'])
def create_queue(job_type=None, priority=1, timeout=3600, cpus_required=1.0, mem_required=512.0, disk_in_required=200.0, disk_out_required=100.0, disk_total_required=300.0, gpus_required=0, queued=timezone.now()): """Creates a queue model for unit testing :param job_type: The job type :type job_type: :class:`job.models.JobType` :param priority: The priority :type priority: int :param timeout: The timeout :type timeout: int :param cpus_required: The number of CPUs required :type cpus_required: float :param mem_required: The memory required in MiB :type mem_required: float :param disk_in_required: The input disk space required in MiB :type disk_in_required: float :param disk_out_required: The output disk space required in MiB :type disk_out_required: float :param disk_total_required: The total disk space required in MiB :type disk_total_required: float :param gpus_required: The number of GPUs required :type gpus_required: float :param queued: The time the execution was queued :type queued: :class:`datetime.datetime` """ job = job_test_utils.create_job(job_type=job_type, status='QUEUED') resources = NodeResources([Cpus(cpus_required), Mem(mem_required), Disk(disk_total_required), Gpus(gpus_required)]) return Queue.objects.create(job_type=job.job_type, job=job, exe_num=job.num_exes, priority=priority, timeout=timeout, input_file_size=disk_in_required, interface=job.get_job_interface().get_dict(), configuration=ExecutionConfiguration().get_dict(), resources=resources.get_json().get_dict(), queued=queued)
def get_execution_configuration(self): """Returns the execution configuration for this queued job :returns: The execution configuration for this queued job :rtype: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` """ return ExecutionConfiguration(self.configuration, do_validate=False)
def test_init_validation(self): """Tests the validation done in __init__""" # Try minimal acceptable configuration ExecutionConfiguration() # Invalid version config = {'version': 'BAD'} self.assertRaises(InvalidExecutionConfiguration, ExecutionConfiguration, config)
def test_determine_error(self): """Tests that a pre-task successfully determines the correct error""" scale_errors = [ ScaleDatabaseError(), ScaleIOError(), ScaleOperationalError(), MissingSetting('') ] for scale_error in scale_errors: config = ExecutionConfiguration() config.create_tasks(['pre']) config.set_task_ids(self.job_exe.get_cluster_id()) task = PreTask('agent_1', self.job_exe, self.job_exe.job_type, config) update = job_test_utils.create_task_status_update( task.id, task.agent_id, TaskStatusUpdate.RUNNING, now()) task.update(update) update = job_test_utils.create_task_status_update( task.id, task.agent_id, TaskStatusUpdate.FAILED, now(), exit_code=scale_error.exit_code) error = task.determine_error(update) self.assertEqual(scale_error.error_name, error.name)
def init_with_database(self): """Initializes the job execution metrics with the execution history from the database """ oldest_time = self._finished_metrics_over_time.time_blocks[0].start blank_config = ExecutionConfiguration() for job_exe_end in JobExecutionEnd.objects.get_recent_job_exe_end_metrics( oldest_time): running_job_exe = RunningJobExecution('', job_exe_end.job_exe, job_exe_end.job_type, blank_config, 0) running_job_exe._set_final_status(job_exe_end.status, job_exe_end.ended, job_exe_end.error) self._finished_metrics.add_job_execution(running_job_exe) self._finished_metrics_over_time.add_job_execution(running_job_exe)
def create_job_exe(job_type=None, job=None, exe_num=None, node=None, timeout=None, input_file_size=10.0, queued=None, started=None, status='RUNNING', error=None, ended=None, output=None, task_results=None): """Creates a job_exe model for unit testing, may also create job_exe_end and job_exe_output models depending on status :returns: The job_exe model :rtype: :class:`job.execution.job_exe.RunningJobExecution` """ when = timezone.now() if not job: job = create_job(job_type=job_type, status=status, input_file_size=input_file_size) job_type = job.job_type job_exe = JobExecution() job_exe.job = job job_exe.job_type = job_type if not exe_num: exe_num = job.num_exes job_exe.exe_num = exe_num job_exe.set_cluster_id('1234', job.id, job_exe.exe_num) if not node: node = node_utils.create_node() job_exe.node = node if not timeout: timeout = job.timeout job_exe.timeout = timeout job_exe.input_file_size = input_file_size job_exe.resources = job.get_resources().get_json().get_dict() job_exe.configuration = ExecutionConfiguration().get_dict() if not queued: queued = when job_exe.queued = queued if not started: started = when + datetime.timedelta(seconds=1) job_exe.started = started job_exe.save() if status in ['COMPLETED', 'FAILED', 'CANCELED']: job_exe_end = JobExecutionEnd() job_exe_end.job_exe_id = job_exe.id job_exe_end.job = job_exe.job job_exe_end.job_type = job_exe.job_type job_exe_end.exe_num = job_exe.exe_num if not task_results: task_results = TaskResults() job_exe_end.task_results = task_results.get_dict() job_exe_end.status = status if status == 'FAILED' and not error: error = error_test_utils.create_error() job_exe_end.error = error job_exe_end.node = node job_exe_end.queued = queued job_exe_end.started = started job_exe_end.seed_started = task_results.get_task_started('main') job_exe_end.seed_ended = task_results.get_task_ended('main') if not ended: ended = started + datetime.timedelta(seconds=1) job_exe_end.ended = ended job_exe_end.save() if status == 'COMPLETED' or output: job_exe_output = JobExecutionOutput() job_exe_output.job_exe_id = job_exe.id job_exe_output.job = job_exe.job job_exe_output.job_type = job_exe.job_type job_exe_output.exe_num = job_exe.exe_num if not output: output = JobResults() job_exe_output.output = output.get_dict() job_exe_output.save() return job_exe
def configure_queued_job(self, job): """Creates and returns an execution configuration for the given queued job. The given job model should have its related job_type, job_type_rev, and batch models populated. :param job: The queued job model :type job: :class:`job.models.Job` :returns: The execution configuration for the queued job :rtype: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` """ config = ExecutionConfiguration() data = job.get_job_data() # Add input file meta-data input_files_dict = self._create_input_file_dict(data) config.set_input_files(input_files_dict) # Set up env vars for job's input data input_values = data.get_injected_input_values(input_files_dict) env_vars = data.get_injected_env_vars(input_files_dict) task_workspaces = {} if job.job_type.is_system: # Add any workspaces needed for this system job task_workspaces = QueuedExecutionConfigurator._system_job_workspaces(job) else: # Set any output workspaces needed if 'version' in job.input and job.input['version'] == '1.0': # Set output workspaces using legacy job data self._cache_workspace_names(data.get_output_workspace_ids()) output_workspaces = {} for output, workspace_id in data.get_output_workspaces().items(): output_workspaces[output] = self._cached_workspace_names[workspace_id] config.set_output_workspaces(output_workspaces) else: # Set output workspaces from job configuration output_workspaces = {} job_config = job.job_type.get_job_configuration() interface = JobInterfaceSunset.create(job.job_type.manifest, do_validate=False) for output_name in interface.get_file_output_names(): output_workspace = job_config.get_output_workspace(output_name) if output_workspace: output_workspaces[output_name] = output_workspace config.set_output_workspaces(output_workspaces) # Create main task with fields populated from input data args = job.get_job_interface().get_injected_command_args(input_values, env_vars) config.create_tasks(['main']) config.add_to_task('main', args=args, env_vars=env_vars, workspaces=task_workspaces) return config
def populate_job_configuration(apps, schema_editor): from job.execution.configuration.json.exe_config import ExecutionConfiguration, MODE_RO, MODE_RW from job.configuration.data.job_data import JobData # Go through all of the job models that have job data and populate their configuration Job = apps.get_model('job', 'Job') ScaleFile = apps.get_model('storage', 'ScaleFile') Workspace = apps.get_model('storage', 'Workspace') total_count = Job.objects.all().count() workspaces = {} for workspace in Workspace.objects.all().iterator(): workspaces[workspace.id] = workspace print 'Populating new configuration field for %s jobs' % str(total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print 'Completed %s of %s jobs (%f%%)' % (done_count, total_count, percent) batch_end = done_count + batch_size for job in Job.objects.select_related('job_type').order_by('id')[done_count:batch_end]: # Ignore jobs that don't have their job data populated yet if not job.data: continue data = JobData(job.data) input_file_ids = data.get_input_file_ids() input_files = ScaleFile.objects.filter(id__in=input_file_ids).select_related('workspace').iterator() input_workspaces = set() for input_file in input_files: input_workspaces.add(input_file.workspace.name) configuration = ExecutionConfiguration() for name in input_workspaces: configuration.add_job_task_workspace(name, MODE_RO) if not job.job_type.is_system: for name in input_workspaces: configuration.add_pre_task_workspace(name, MODE_RO) # We add input workspaces to post task so it can perform a parse results move if requested by the # job's results manifest configuration.add_post_task_workspace(name, MODE_RW) for workspace_id in data.get_output_workspace_ids(): workspace = workspaces[workspace_id] if workspace.name not in input_workspaces: configuration.add_post_task_workspace(workspace.name, MODE_RW) elif job.job_type.name == 'scale-ingest': ingest_id = data.get_property_values(['Ingest ID'])['Ingest ID'] from ingest.models import Ingest ingest = Ingest.objects.select_related('workspace').get(id=ingest_id) configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW) job.configuration = configuration.get_dict() job.save() done_count += batch_size print 'All %s jobs completed' % str(total_count)
def test_create_copy(self): """Tests the create_copy() method""" config = { 'version': '2.0', 'input_files': { 'INPUT_1': [{ 'id': 1234, 'type': 'PRODUCT', 'workspace_name': 'wksp-name', 'workspace_path': 'the/workspace/path/file.json', 'local_file_name': 'file_abcdfeg.json', 'is_deleted': False, }] }, 'output_workspaces': { 'OUTPUT_1': 'WORKSPACE_1' }, 'tasks': [{ 'task_id': 'task-1234', 'type': 'main', 'resources': { 'cpu': 1.0 }, 'args': 'foo ${INPUT_1} ${JOB_OUTPUT_DIR}', 'env_vars': { 'ENV_VAR_NAME': 'ENV_VAR_VALUE' }, 'workspaces': { 'WORKSPACE_NAME': { 'mode': 'ro' } }, 'mounts': { 'MOUNT_NAME': 'MOUNT_VOLUME_NAME' }, 'settings': { 'SETTING_NAME': 'SETTING_VALUE' }, 'volumes': { 'VOLUME_NAME_1': { 'container_path': '/the/container/path', 'mode': 'ro', 'type': 'host', 'host_path': '/the/host/path' }, 'VOLUME_NAME_2': { 'container_path': '/the/other/container/path', 'mode': 'rw', 'type': 'volume', 'driver': 'SUPER_DRIVER_5000', 'driver_opts': { 'turbo': 'yes-pleez' } } }, 'docker_params': [{ 'flag': 'hello', 'value': 'scale' }] }] } exe_config = ExecutionConfiguration(config) copy = exe_config.create_copy() self.assertDictEqual(copy.get_dict(), config)