def test_cleanup_all(self): self.mock_job.id = 1 self.mock_job.metadata.name = 'job_1' mock_config_map = Mock() mock_config_map.metadata.name = 'config_map_1' mock_pvc = Mock() mock_pvc.metadata.name = 'pvc_1' mock_cluster_api = Mock() mock_cluster_api.list_jobs.return_value = [self.mock_job] mock_cluster_api.list_config_maps.return_value = [mock_config_map] mock_cluster_api.list_persistent_volume_claims.return_value = [ mock_pvc ] mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.cleanup_all() mock_cluster_api.delete_job.assert_called_with('job_1') mock_cluster_api.delete_config_map.assert_called_with('config_map_1') mock_cluster_api.delete_persistent_volume_claim.assert_called_with( 'pvc_1') mock_cluster_api.list_persistent_volume_claims.assert_called_with( label_selector='bespin-job=true,bespin-job-id=1') mock_cluster_api.list_jobs.assert_called_with( label_selector='bespin-job=true,bespin-job-id=1') mock_cluster_api.list_config_maps.assert_called_with( label_selector='bespin-job=true,bespin-job-id=1')
def test_create_stage_data_persistent_volumes(self): manager = JobManager(cluster_api=Mock(), config=Mock(), job=self.mock_job) manager.create_stage_data_persistent_volumes(stage_data_size_in_g=10) manager.cluster_api.create_persistent_volume_claim.assert_has_calls([ call('job-data-51-jpb', storage_class_name=manager.storage_class_name, storage_size_in_g=10, labels=self.expected_metadata_labels) ])
def test_cleanup_run_workflow_job(self): mock_cluster_api = Mock() mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.cleanup_run_workflow_job() mock_cluster_api.delete_job.assert_called_with('run-workflow-51-jpb') mock_cluster_api.delete_persistent_volume_claim.assert_not_called()
def test_make_job_labels(self): manager = JobManager(cluster_api=Mock(), config=Mock(), job=self.mock_job) expected_label_dict = { 'bespin-job': 'true', 'bespin-job-id': '51', 'bespin-job-step': 'stage_data' } self.assertEqual( manager.make_job_labels(job_step_type=JobStepTypes.STAGE_DATA), expected_label_dict)
def test_cleanup_stage_data_job(self): mock_cluster_api = Mock() mock_config = Mock() manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.cleanup_stage_data_job() mock_cluster_api.delete_job.assert_called_with('stage-data-51-jpb') mock_cluster_api.delete_config_map.assert_called_with( 'stage-data-51-jpb')
def test_cleanup_organize_output_project_job(self): mock_cluster_api = Mock() mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.cleanup_organize_output_project_job() mock_cluster_api.delete_config_map.assert_called_with( 'organize-output-51-jpb') mock_cluster_api.delete_job.assert_called_with( 'organize-output-51-jpb')
def test_cleanup_record_output_project_job(self): mock_cluster_api = Mock() mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.cleanup_record_output_project_job() mock_cluster_api.delete_job.assert_called_with( 'record-output-project-51-jpb') mock_cluster_api.delete_persistent_volume_claim.assert_has_calls( [call('output-data-51-jpb')])
def test_read_record_output_project_details_pod_not_found(self): mock_cluster_api = Mock() mock_cluster_api.list_pods.return_value = [] mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) with self.assertRaises(ValueError) as raised_exception: manager.read_record_output_project_details() self.assertEqual(str(raised_exception.exception), 'Incorrect number of pods for record output step: 0')
def test_create_run_workflow_persistent_volumes(self): mock_cluster_api = Mock() mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.create_run_workflow_persistent_volumes() mock_cluster_api.create_persistent_volume_claim.assert_called_with( 'output-data-51-jpb', storage_class_name='nfs', storage_size_in_g=3, labels=self.expected_metadata_labels)
def test_read_record_output_project_details_missing_fields(self): mock_cluster_api = Mock() mock_pod = Mock() mock_pod.metadata.name = 'mypod' mock_pod.metadata.annotations = {'project_id': '123'} mock_cluster_api.list_pods.return_value = [mock_pod] mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) with self.assertRaises(ValueError) as raised_exception: manager.read_record_output_project_details() self.assertEqual(str(raised_exception.exception), 'Missing readme_file_id in pod annotations: mypod') mock_pod.metadata.annotations = {'readme_file_id': '456'} with self.assertRaises(ValueError) as raised_exception: manager.read_record_output_project_details() self.assertEqual(str(raised_exception.exception), 'Missing project_id in pod annotations: mypod') mock_pod.metadata.annotations = {} with self.assertRaises(ValueError) as raised_exception: manager.read_record_output_project_details() self.assertEqual(str(raised_exception.exception), 'Missing project_id in pod annotations: mypod')
def test_cleanup_save_output_job(self): mock_cluster_api = Mock() mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.cleanup_save_output_job() mock_cluster_api.delete_job.assert_called_with('save-output-51-jpb') mock_cluster_api.delete_config_map.assert_called_with( 'save-output-51-jpb') mock_cluster_api.delete_persistent_volume_claim.assert_has_calls([ call('job-data-51-jpb'), ], 'delete job data volume once running workflow completes')
def test_create_record_output_project_job(self): mock_cluster_api = Mock() mock_config = Mock(storage_class_name='nfs') mock_config.record_output_project_settings.service_account_name = 'annotation-writer-sa' manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.create_record_output_project_job() args, kwargs = mock_cluster_api.create_job.call_args name, batch_spec = args self.assertEqual(name, 'record-output-project-51-jpb') # job name self.assertEqual(batch_spec.name, 'record-output-project-51-jpb') # job spec name self.assertEqual( batch_spec.service_account_name, 'annotation-writer-sa') # service account to use for the job self.assertEqual(batch_spec.labels['bespin-job-id'], '51') # Bespin job id stored in a label self.assertEqual( batch_spec.labels['bespin-job-step'], 'record_output_project') # store the job step in a label job_container = batch_spec.container self.assertEqual(job_container.name, 'record-output-project-51-jpb') # container name self.assertEqual( job_container.image_name, self.mock_job.k8s_settings.record_output_project.image_name, 'record output project image name is based on a config setting') self.assertEqual(job_container.command, ['sh'], 'record output project base command is sh') self.assertEqual(job_container.args, ['/bespin/output-data/annotate_project_details.sh'], 'runs annotate_project_details script') self.assertEqual( job_container.env_dict['MY_POD_NAME'].field_path, 'metadata.name', 'record output project receives pod name in MY_POD_NAME') self.assertEqual(len(job_container.volumes), 1) job_data_volume = job_container.volumes[0] self.assertEqual(job_data_volume.name, 'output-data-51-jpb') self.assertEqual(job_data_volume.mount_path, '/bespin/output-data') self.assertEqual(job_data_volume.volume_claim_name, 'output-data-51-jpb') self.assertEqual(job_data_volume.read_only, True)
def test_read_record_output_project_details(self): mock_cluster_api = Mock() mock_pod = Mock() mock_pod.metadata.annotations = { 'project_id': '123', 'readme_file_id': '456' } mock_cluster_api.list_pods.return_value = [mock_pod] mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) project_id, readme_file_id = manager.read_record_output_project_details( ) self.assertEqual(project_id, '123') self.assertEqual(readme_file_id, '456') mock_cluster_api.list_pods.assert_called_with( label_selector= 'bespin-job=true,bespin-job-id=51,bespin-job-step=record_output_project' )
def __init__(self, settings): super(K8sJobActions, self).__init__(settings) self.cluster_api = settings.get_cluster_api() self.bespin_job = self.job_api.get_job() self.manager = JobManager(self.cluster_api, settings.config, self.bespin_job)
class K8sJobActions(BaseJobActions): """ Used by K8sLando to handle messages at a job specific context. """ def __init__(self, settings): super(K8sJobActions, self).__init__(settings) self.cluster_api = settings.get_cluster_api() self.bespin_job = self.job_api.get_job() self.manager = JobManager(self.cluster_api, settings.config, self.bespin_job) def _set_job_state(self, state): # Keep cached bespin_job state up to date super(K8sJobActions, self)._set_job_state(state) self.bespin_job.state = state def _set_job_step(self, step): # Keep cached bespin_job step up to date super(K8sJobActions, self)._set_job_step(step) self.bespin_job.step = step def job_is_at_state_and_step(self, state, step): return self.bespin_job.state == state and self.bespin_job.step == step def start_job(self, payload): """ Request from user to start running a job. This starts a job to stage user input data into a volume. :param payload:StartJobPayload contains job_id we should start """ self._set_job_state(JobStates.RUNNING) self._set_job_step(JobSteps.CREATE_VM) input_files = self.job_api.get_input_files() input_files_size_in_g = self._calculate_input_data_size_in_g( input_files) # The stage data volume contains the workflow, job order, file metadata, and the user's input files. stage_data_volume_size_in_g = self.config.base_stage_data_volume_size_in_g + input_files_size_in_g self._show_status("Creating stage data persistent volumes") self.manager.create_stage_data_persistent_volumes( stage_data_volume_size_in_g) self.perform_staging_step(input_files) @staticmethod def _calculate_input_data_size_in_g(input_files): total_bytes = 0 for dds_file in input_files.dds_files: total_bytes += dds_file.size for url_file in input_files.url_files: total_bytes += url_file.size return math.ceil(float(total_bytes) / (1024.0 * 1024.0 * 1024.0)) def perform_staging_step(self, input_files): self._set_job_step(JobSteps.STAGING) self._show_status("Creating Stage data job") job = self.manager.create_stage_data_job(input_files) self._show_status("Launched stage data job: {}".format( job.metadata.name)) def stage_job_complete(self, payload): """ Message from worker that a the staging job step is complete and successful. Sets the job state to RUNNING and puts the run job message into the queue for the worker. :param payload: JobStepCompletePayload: contains job id and vm_instance_name(unused) """ if not self.job_is_at_state_and_step(JobStates.RUNNING, JobSteps.STAGING): # ignore request to perform incompatible step logging.info( "Ignoring request to run job:{} wrong step/state".format( self.job_id)) return self._set_job_step(JobSteps.RUNNING) self._show_status("Cleaning up after stage data") self.manager.cleanup_stage_data_job() self._show_status("Creating volumes for running workflow") self.manager.create_run_workflow_persistent_volumes() self.run_workflow_job() def run_workflow_job(self): self._show_status("Creating run workflow job") job = self.manager.create_run_workflow_job() self._show_status("Launched run workflow job: {}".format( job.metadata.name)) def run_job_complete(self, payload): """ Message from worker that a the run job step is complete and successful. Sets the job state to STORING_OUTPUT and puts the store output message into the queue for the worker. :param payload: JobStepCompletePayload: contains job id and vm_instance_name(unused) """ if not self.job_is_at_state_and_step(JobStates.RUNNING, JobSteps.RUNNING): # ignore request to perform incompatible step logging.info( "Ignoring request to store output for job:{} wrong step/state". format(self.job_id)) return self.manager.cleanup_run_workflow_job() self.organize_output_project() def organize_output_project(self): self._set_job_step(JobSteps.ORGANIZE_OUTPUT_PROJECT) self._show_status("Creating organize output project job") methods_document = self.job_api.get_workflow_methods_document( self.bespin_job.workflow.methods_document) methods_content = None if methods_document: methods_content = methods_document.content job = self.manager.create_organize_output_project_job(methods_content) self._show_status("Launched organize output project job: {}".format( job.metadata.name)) def organize_output_complete(self, payload): if not self.job_is_at_state_and_step(JobStates.RUNNING, JobSteps.ORGANIZE_OUTPUT_PROJECT): # ignore request to perform incompatible step logging.info( "Ignoring request to organize output project for job:{} wrong step/state" .format(self.job_id)) return self.manager.cleanup_organize_output_project_job() self.save_output() def save_output(self): store_output_data = self.job_api.get_store_output_job_data() # get_store_output_job_data self._set_job_step(JobSteps.STORING_JOB_OUTPUT) self._show_status("Creating store output job") job = self.manager.create_save_output_job( store_output_data.share_dds_ids) self._show_status("Launched save output job: {}".format( job.metadata.name)) def store_job_output_complete(self, payload): """ Message from worker that a the store output job step is complete and successful. Records information about the resulting output project and frees cloud resources. :param payload: JobStepCompletePayload: contains job id and vm_instance_name(unused) """ if not self.job_is_at_state_and_step(JobStates.RUNNING, JobSteps.STORING_JOB_OUTPUT): # ignore request to perform incompatible step logging.info( "Ignoring request to cleanup for job:{} wrong step/state". format(self.job_id)) return self.manager.cleanup_save_output_job() self._set_job_step(JobSteps.RECORD_OUTPUT_PROJECT) self._show_status("Creating record output project job") job = self.manager.create_record_output_project_job() self._show_status("Launched record output project job: {}".format( job.metadata.name)) def record_output_project_complete(self, payload): """ Records the output project id and readme file id that based on the store output pod logs. """ if not self.job_is_at_state_and_step(JobStates.RUNNING, JobSteps.RECORD_OUTPUT_PROJECT): # ignore request to perform incompatible step logging.info( "Ignoring request to cleanup for job:{} wrong step/state". format(self.job_id)) return project_id, readme_file_id = self.manager.read_record_output_project_details( ) self._show_status("Saving project id {} and readme id {}.".format( project_id, readme_file_id)) self.job_api.save_project_details(project_id, readme_file_id) self.manager.cleanup_record_output_project_job() self._show_status("Marking job finished") self._set_job_step(JobSteps.NONE) self._set_job_state(JobStates.FINISHED) def restart_job(self, payload): """ Request from user to resume running a job. It will resume based on the value of bespin_job.step returned from the job api. Canceled jobs will always restart from the beginning :param payload:RestartJobPayload contains job_id we should restart """ full_restart = False if self.bespin_job.state != JobStates.CANCELED: self.manager.cleanup_jobs_and_config_maps() if self.bespin_job.step == JobSteps.STAGING: self._set_job_state(JobStates.RUNNING) input_files = self.job_api.get_input_files() self.perform_staging_step(input_files) elif self.bespin_job.step == JobSteps.RUNNING: self._set_job_state(JobStates.RUNNING) self.run_workflow_job() elif self.bespin_job.step == JobSteps.ORGANIZE_OUTPUT_PROJECT: self._set_job_state(JobStates.RUNNING) self.organize_output_project() elif self.bespin_job.step == JobSteps.STORING_JOB_OUTPUT: self._set_job_state(JobStates.RUNNING) self.save_output() elif self.bespin_job.step == JobSteps.RECORD_OUTPUT_PROJECT: self.cannot_restart_step_error( step_name="record output project") else: full_restart = True else: full_restart = True if full_restart: self.manager.cleanup_all() self.start_job(None) def cancel_job(self, payload): """ Request from user to cancel a running a job. Sets status to canceled and terminates the associated jobs, configmaps and pvcs :param payload: CancelJobPayload: contains job id we should cancel """ self._set_job_step(JobSteps.NONE) self._set_job_state(JobStates.CANCELED) self._show_status("Canceling job") self.manager.cleanup_all() def stage_job_error(self, payload): """ Message from watcher that the staging job had an error :param payload:JobStepErrorPayload: info about error """ self._job_step_failed("Staging job failed", payload) def run_job_error(self, payload): """ Message from watcher that the run workflow job had an error :param payload:JobStepErrorPayload: info about error """ self._job_step_failed("Running job failed", payload) def organize_output_error(self, payload): """ Message from watcher that the organize output project job had an error :param payload:JobStepErrorPayload: info about error """ self._job_step_failed("Organize output job failed", payload) def store_job_output_error(self, payload): """ Message from watcher that the store output project job had an error :param payload:JobStepErrorPayload: info about error """ self._job_step_failed("Storing job output failed", payload) def record_output_project_error(self, payload): self._job_step_failed("Recording output project failed", payload) def _job_step_failed(self, message, payload): self._set_job_state(JobStates.ERRORED) self._show_status(message) self._log_error(message=payload.message)
def test_create_stage_data_job_packed_workflow(self): mock_cluster_api = Mock() mock_config = Mock() manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) mock_input_files = Mock( dds_files=[Mock(destination_path='file1.txt', file_id='myid')]) manager.create_stage_data_job(input_files=mock_input_files) # it should have created a config map of what needs to be staged config_map_payload = { 'stagedata.json': json.dumps({ "items": [{ "type": "url", "source": "someurl.cwl", "dest": "/bespin/job-data/workflow/someurl.cwl" }, { "type": "write", "source": { "threads": 2 }, "dest": "/bespin/job-data/job-order.json" }, { "type": "DukeDS", "source": "myid", "dest": "/bespin/job-data/file1.txt" }] }) } mock_cluster_api.create_config_map.assert_called_with( name='stage-data-51-jpb', data=config_map_payload, labels=self.expected_metadata_labels) # it should have created a job args, kwargs = mock_cluster_api.create_job.call_args name, batch_spec = args self.assertEqual(name, 'stage-data-51-jpb') # job name self.assertEqual(batch_spec.name, 'stage-data-51-jpb') # job spec name self.assertEqual(batch_spec.labels['bespin-job-id'], '51') # Bespin job id stored in a label self.assertEqual(batch_spec.labels['bespin-job-step'], 'stage_data') # store the job step in a label job_container = batch_spec.container self.assertEqual(job_container.name, 'stage-data-51-jpb') # container name self.assertEqual(job_container.image_name, self.mock_job.k8s_settings.stage_data.image_name, 'stage data image name is based on a job setting') self.assertEqual(job_container.command, self.mock_job.k8s_settings.stage_data.base_command, 'stage data command is based on a job setting') self.assertEqual( job_container.args, [ '/bespin/config/stagedata.json', '/bespin/job-data/workflow-input-files-metadata.json' ], 'stage data command should receive config file as an argument') self.assertEqual( job_container.env_dict, {'DDSCLIENT_CONF': '/etc/ddsclient/config'}, 'DukeDS environment variable should point to the config mapped config file' ) self.assertEqual( job_container.requested_cpu, self.mock_job.k8s_settings.stage_data.cpus, 'stage data requested cpu is based on a config setting') self.assertEqual( job_container.requested_memory, self.mock_job.k8s_settings.stage_data.memory, 'stage data requested memory is based on a config setting') self.assertEqual(len(job_container.volumes), 3) user_data_volume = job_container.volumes[0] self.assertEqual(user_data_volume.name, 'job-data-51-jpb') self.assertEqual(user_data_volume.mount_path, '/bespin/job-data') self.assertEqual(user_data_volume.volume_claim_name, 'job-data-51-jpb') self.assertEqual(user_data_volume.read_only, False) config_map_volume = job_container.volumes[1] self.assertEqual(config_map_volume.name, 'stage-data-51-jpb') self.assertEqual(config_map_volume.mount_path, '/bespin/config') self.assertEqual(config_map_volume.config_map_name, 'stage-data-51-jpb') self.assertEqual(config_map_volume.source_key, 'stagedata.json') self.assertEqual(config_map_volume.source_path, 'stagedata.json') secret_volume = job_container.volumes[2] self.assertEqual(secret_volume.name, 'data-store-51-jpb') self.assertEqual(secret_volume.mount_path, '/etc/ddsclient') self.assertEqual(secret_volume.secret_name, mock_config.data_store_settings.secret_name, 'name of DukeDS secret is based on a config setting')
def test_create_run_workflow_job(self): mock_cluster_api = Mock() mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.create_run_workflow_job() # it should have created a job to run the workflow with several volumes mounted args, kwargs = mock_cluster_api.create_job.call_args name, batch_spec = args self.assertEqual(name, 'run-workflow-51-jpb') # job name self.assertEqual(batch_spec.name, 'run-workflow-51-jpb') # job spec name self.assertEqual(batch_spec.labels['bespin-job-id'], '51') # Bespin job id stored in a label self.assertEqual(batch_spec.labels['bespin-job-step'], 'run_workflow') # store the job step in a label job_container = batch_spec.container self.assertEqual(job_container.name, 'run-workflow-51-jpb') # container name self.assertEqual(job_container.image_name, self.mock_job.k8s_settings.run_workflow.image_name, 'run workflow image name is based on job settings') expected_bash_command = 'cwltool --cachedir /bespin/output-data/tmpout/ ' \ '--outdir /bespin/output-data/results/ ' \ '--max-ram 1G --max-cores 2 ' \ '--usage-report /bespin/output-data/job-51-jpb-resource-usage.json ' \ '--stdout /bespin/output-data/bespin-workflow-output.json ' \ '--stderr /bespin/output-data/bespin-workflow-output.log ' \ '/bespin/job-data/workflow/someurl.cwl#main ' \ '/bespin/job-data/job-order.json'.split(' ') self.assertEqual( job_container.command, expected_bash_command, 'run workflow command combines job settings and staged files') self.assertEqual( job_container.env_dict['CALRISSIAN_POD_NAME'].field_path, 'metadata.name', 'We should store the pod name in a CALRISSIAN_POD_NAME environment variable' ) self.assertEqual( job_container.requested_cpu, self.mock_job.k8s_settings.run_workflow.cpus, 'run workflow requested cpu is based on a job setting') self.assertEqual( job_container.requested_memory, self.mock_job.k8s_settings.run_workflow.memory, 'run workflow requested memory is based on a job setting') self.assertEqual(len(job_container.volumes), 3) job_data_volume = job_container.volumes[0] self.assertEqual(job_data_volume.name, 'job-data-51-jpb') self.assertEqual(job_data_volume.mount_path, '/bespin/job-data') self.assertEqual(job_data_volume.volume_claim_name, 'job-data-51-jpb') self.assertEqual(job_data_volume.read_only, True, 'job data should be a read only volume') output_data_volume = job_container.volumes[1] self.assertEqual(output_data_volume.name, 'output-data-51-jpb') self.assertEqual(output_data_volume.mount_path, '/bespin/output-data') self.assertEqual(output_data_volume.volume_claim_name, 'output-data-51-jpb') self.assertEqual(output_data_volume.read_only, False) system_data_volume = job_container.volumes[2] self.assertEqual(system_data_volume.name, 'system-data-51-jpb') self.assertEqual( system_data_volume.mount_path, mock_config.run_workflow_settings.system_data_volume.mount_path, 'mount path for the system volume is based on a config setting') self.assertEqual( system_data_volume.volume_claim_name, mock_config. run_workflow_settings.system_data_volume.volume_claim_name, 'pvc name for the system volume is based on a config setting') self.assertEqual( system_data_volume.read_only, True, 'system data should be read only for running workflow')
def test_create_organize_output_project_job(self): mock_cluster_api = Mock() mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.create_organize_output_project_job( methods_document_content='markdown') # it should have created a job to run the workflow with several volumes mounted args, kwargs = mock_cluster_api.create_job.call_args name, batch_spec = args self.assertEqual(name, 'organize-output-51-jpb') # job name self.assertEqual(batch_spec.name, 'organize-output-51-jpb') # job spec name self.assertEqual(batch_spec.labels['bespin-job-id'], '51') # Bespin job id stored in a label self.assertEqual(batch_spec.labels['bespin-job-step'], 'organize_output') # store the job step in a label job_container = batch_spec.container self.assertEqual(job_container.name, 'organize-output-51-jpb') # container name self.assertEqual( job_container.image_name, self.mock_job.k8s_settings.organize_output.image_name, 'organize output image name is based on job settings') self.assertEqual( job_container.command, self.mock_job.k8s_settings.organize_output.base_command, 'organize output command is based on job settings') self.assertEqual( job_container.requested_cpu, self.mock_job.k8s_settings.organize_output.cpus, 'organize output requested cpu is based on a job setting') self.assertEqual( job_container.requested_memory, self.mock_job.k8s_settings.organize_output.memory, 'organize output requested memory is based on a job setting') mock_cluster_api.create_config_map.assert_called_with( name='organize-output-51-jpb', data={ 'organizeoutput.json': json.dumps({ "bespin_job_id": '51', "destination_dir": "/bespin/output-data/results", "downloaded_workflow_path": "/bespin/job-data/workflow/someurl.cwl", "workflow_to_read": "/bespin/job-data/workflow/someurl.cwl", "workflow_type": "packed", "job_order_path": "/bespin/job-data/job-order.json", "bespin_workflow_stdout_path": "/bespin/output-data/bespin-workflow-output.json", "bespin_workflow_stderr_path": "/bespin/output-data/bespin-workflow-output.log", "methods_template": "markdown", "additional_log_files": ["/bespin/output-data/job-51-jpb-resource-usage.json"] }) }, labels={ 'bespin-job': 'true', 'bespin-job-id': '51' }) self.assertEqual(len(job_container.volumes), 3) job_data_volume = job_container.volumes[0] self.assertEqual(job_data_volume.name, 'job-data-51-jpb') self.assertEqual(job_data_volume.mount_path, '/bespin/job-data') self.assertEqual(job_data_volume.volume_claim_name, 'job-data-51-jpb') self.assertEqual(job_data_volume.read_only, True, 'job data should be a read only volume') output_data_volume = job_container.volumes[1] self.assertEqual(output_data_volume.name, 'output-data-51-jpb') self.assertEqual(output_data_volume.mount_path, '/bespin/output-data') self.assertEqual(output_data_volume.volume_claim_name, 'output-data-51-jpb') self.assertEqual(output_data_volume.read_only, False) config_map_volume = job_container.volumes[2] self.assertEqual(config_map_volume.name, 'organize-output-51-jpb') self.assertEqual(config_map_volume.mount_path, '/bespin/config') self.assertEqual(config_map_volume.config_map_name, 'organize-output-51-jpb') self.assertEqual(config_map_volume.source_key, 'organizeoutput.json') self.assertEqual(config_map_volume.source_path, 'organizeoutput.json')
def test_create_save_output_job(self): mock_cluster_api = Mock() mock_config = Mock(storage_class_name='nfs') manager = JobManager(cluster_api=mock_cluster_api, config=mock_config, job=self.mock_job) manager.create_save_output_job(share_dds_ids=['123', '456']) # it should have created a config map of what needs to be staged config_map_payload = { 'saveoutput.json': json.dumps({ "destination": "Bespin myworkflow v1 myjob 2019-03-11", "readme_file_path": "results/docs/README.md", "paths": ["/bespin/output-data/results"], "share": { "dds_user_ids": ["123", "456"] }, "activity": { "name": "myjob - Bespin Job 51", "description": "Bespin Job 51 - Workflow myworkflow v1", "started_on": "", "ended_on": "", "input_file_versions_json_path": "/bespin/job-data/workflow-input-files-metadata.json", "workflow_output_json_path": "/bespin/output-data/bespin-workflow-output.json" } }) } mock_cluster_api.create_config_map.assert_called_with( name='save-output-51-jpb', data=config_map_payload, labels=self.expected_metadata_labels) # it should have created a job args, kwargs = mock_cluster_api.create_job.call_args name, batch_spec = args self.assertEqual(name, 'save-output-51-jpb') # job name self.assertEqual(batch_spec.name, 'save-output-51-jpb') # job spec name self.assertEqual(batch_spec.labels['bespin-job-id'], '51') # Bespin job id stored in a label self.assertEqual(batch_spec.labels['bespin-job-step'], 'save_output') # store the job step in a label job_container = batch_spec.container self.assertEqual(job_container.name, 'save-output-51-jpb') # container name self.assertEqual(job_container.image_name, self.mock_job.k8s_settings.save_output.image_name, 'save output image name is based on a job setting') self.assertEqual(job_container.command, self.mock_job.k8s_settings.save_output.base_command, 'save output command is based on a job setting') self.assertEqual( job_container.args, [ '/bespin/config/saveoutput.json', '/bespin/output-data/annotate_project_details.sh' ], 'save output command should receive config file and output filenames as arguments' ) self.assertEqual( job_container.env_dict, {'DDSCLIENT_CONF': '/etc/ddsclient/config'}, 'DukeDS environment variable should point to the config mapped config file' ) self.assertEqual(job_container.requested_cpu, self.mock_job.k8s_settings.save_output.cpus, 'stage data requested cpu is based on a job setting') self.assertEqual( job_container.requested_memory, self.mock_job.k8s_settings.save_output.memory, 'stage data requested memory is based on a job setting') self.assertEqual(len(job_container.volumes), 4) job_data_volume = job_container.volumes[0] self.assertEqual(job_data_volume.name, 'job-data-51-jpb') self.assertEqual(job_data_volume.mount_path, '/bespin/job-data') self.assertEqual(job_data_volume.volume_claim_name, 'job-data-51-jpb') self.assertEqual(job_data_volume.read_only, True) job_data_volume = job_container.volumes[1] self.assertEqual(job_data_volume.name, 'output-data-51-jpb') self.assertEqual(job_data_volume.mount_path, '/bespin/output-data') self.assertEqual(job_data_volume.volume_claim_name, 'output-data-51-jpb') self.assertEqual( job_data_volume.read_only, False) # writable so we can write project_details file config_map_volume = job_container.volumes[2] self.assertEqual(config_map_volume.name, 'stage-data-51-jpb') self.assertEqual(config_map_volume.mount_path, '/bespin/config') self.assertEqual(config_map_volume.config_map_name, 'save-output-51-jpb') self.assertEqual(config_map_volume.source_key, 'saveoutput.json') self.assertEqual(config_map_volume.source_path, 'saveoutput.json') secret_volume = job_container.volumes[3] self.assertEqual(secret_volume.name, 'data-store-51-jpb') self.assertEqual(secret_volume.mount_path, '/etc/ddsclient') self.assertEqual(secret_volume.secret_name, mock_config.data_store_settings.secret_name, 'name of DukeDS secret is based on a config setting')