예제 #1
0
    def test_cleanup_all(self):
        self.mock_job.id = 1
        self.mock_job.metadata.name = 'job_1'
        mock_config_map = Mock()
        mock_config_map.metadata.name = 'config_map_1'
        mock_pvc = Mock()
        mock_pvc.metadata.name = 'pvc_1'

        mock_cluster_api = Mock()
        mock_cluster_api.list_jobs.return_value = [self.mock_job]
        mock_cluster_api.list_config_maps.return_value = [mock_config_map]
        mock_cluster_api.list_persistent_volume_claims.return_value = [
            mock_pvc
        ]
        mock_config = Mock(storage_class_name='nfs')

        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)
        manager.cleanup_all()

        mock_cluster_api.delete_job.assert_called_with('job_1')
        mock_cluster_api.delete_config_map.assert_called_with('config_map_1')
        mock_cluster_api.delete_persistent_volume_claim.assert_called_with(
            'pvc_1')

        mock_cluster_api.list_persistent_volume_claims.assert_called_with(
            label_selector='bespin-job=true,bespin-job-id=1')
        mock_cluster_api.list_jobs.assert_called_with(
            label_selector='bespin-job=true,bespin-job-id=1')
        mock_cluster_api.list_config_maps.assert_called_with(
            label_selector='bespin-job=true,bespin-job-id=1')
예제 #2
0
 def test_create_stage_data_persistent_volumes(self):
     manager = JobManager(cluster_api=Mock(),
                          config=Mock(),
                          job=self.mock_job)
     manager.create_stage_data_persistent_volumes(stage_data_size_in_g=10)
     manager.cluster_api.create_persistent_volume_claim.assert_has_calls([
         call('job-data-51-jpb',
              storage_class_name=manager.storage_class_name,
              storage_size_in_g=10,
              labels=self.expected_metadata_labels)
     ])
예제 #3
0
    def test_cleanup_run_workflow_job(self):
        mock_cluster_api = Mock()
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        manager.cleanup_run_workflow_job()

        mock_cluster_api.delete_job.assert_called_with('run-workflow-51-jpb')
        mock_cluster_api.delete_persistent_volume_claim.assert_not_called()
예제 #4
0
 def test_make_job_labels(self):
     manager = JobManager(cluster_api=Mock(),
                          config=Mock(),
                          job=self.mock_job)
     expected_label_dict = {
         'bespin-job': 'true',
         'bespin-job-id': '51',
         'bespin-job-step': 'stage_data'
     }
     self.assertEqual(
         manager.make_job_labels(job_step_type=JobStepTypes.STAGE_DATA),
         expected_label_dict)
예제 #5
0
    def test_cleanup_stage_data_job(self):
        mock_cluster_api = Mock()
        mock_config = Mock()
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        manager.cleanup_stage_data_job()

        mock_cluster_api.delete_job.assert_called_with('stage-data-51-jpb')
        mock_cluster_api.delete_config_map.assert_called_with(
            'stage-data-51-jpb')
예제 #6
0
    def test_cleanup_organize_output_project_job(self):
        mock_cluster_api = Mock()
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        manager.cleanup_organize_output_project_job()

        mock_cluster_api.delete_config_map.assert_called_with(
            'organize-output-51-jpb')
        mock_cluster_api.delete_job.assert_called_with(
            'organize-output-51-jpb')
예제 #7
0
    def test_cleanup_record_output_project_job(self):
        mock_cluster_api = Mock()
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        manager.cleanup_record_output_project_job()

        mock_cluster_api.delete_job.assert_called_with(
            'record-output-project-51-jpb')
        mock_cluster_api.delete_persistent_volume_claim.assert_has_calls(
            [call('output-data-51-jpb')])
예제 #8
0
    def test_read_record_output_project_details_pod_not_found(self):
        mock_cluster_api = Mock()
        mock_cluster_api.list_pods.return_value = []
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        with self.assertRaises(ValueError) as raised_exception:
            manager.read_record_output_project_details()

        self.assertEqual(str(raised_exception.exception),
                         'Incorrect number of pods for record output step: 0')
예제 #9
0
    def test_create_run_workflow_persistent_volumes(self):
        mock_cluster_api = Mock()
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        manager.create_run_workflow_persistent_volumes()

        mock_cluster_api.create_persistent_volume_claim.assert_called_with(
            'output-data-51-jpb',
            storage_class_name='nfs',
            storage_size_in_g=3,
            labels=self.expected_metadata_labels)
예제 #10
0
    def test_read_record_output_project_details_missing_fields(self):
        mock_cluster_api = Mock()
        mock_pod = Mock()
        mock_pod.metadata.name = 'mypod'
        mock_pod.metadata.annotations = {'project_id': '123'}
        mock_cluster_api.list_pods.return_value = [mock_pod]
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        with self.assertRaises(ValueError) as raised_exception:
            manager.read_record_output_project_details()
        self.assertEqual(str(raised_exception.exception),
                         'Missing readme_file_id in pod annotations: mypod')

        mock_pod.metadata.annotations = {'readme_file_id': '456'}
        with self.assertRaises(ValueError) as raised_exception:
            manager.read_record_output_project_details()
        self.assertEqual(str(raised_exception.exception),
                         'Missing project_id in pod annotations: mypod')

        mock_pod.metadata.annotations = {}
        with self.assertRaises(ValueError) as raised_exception:
            manager.read_record_output_project_details()
        self.assertEqual(str(raised_exception.exception),
                         'Missing project_id in pod annotations: mypod')
예제 #11
0
    def test_cleanup_save_output_job(self):
        mock_cluster_api = Mock()
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        manager.cleanup_save_output_job()

        mock_cluster_api.delete_job.assert_called_with('save-output-51-jpb')
        mock_cluster_api.delete_config_map.assert_called_with(
            'save-output-51-jpb')
        mock_cluster_api.delete_persistent_volume_claim.assert_has_calls([
            call('job-data-51-jpb'),
        ], 'delete job data volume once running workflow completes')
예제 #12
0
    def test_create_record_output_project_job(self):
        mock_cluster_api = Mock()
        mock_config = Mock(storage_class_name='nfs')
        mock_config.record_output_project_settings.service_account_name = 'annotation-writer-sa'
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        manager.create_record_output_project_job()

        args, kwargs = mock_cluster_api.create_job.call_args
        name, batch_spec = args
        self.assertEqual(name, 'record-output-project-51-jpb')  # job name
        self.assertEqual(batch_spec.name,
                         'record-output-project-51-jpb')  # job spec name
        self.assertEqual(
            batch_spec.service_account_name,
            'annotation-writer-sa')  # service account to use for the job
        self.assertEqual(batch_spec.labels['bespin-job-id'],
                         '51')  # Bespin job id stored in a label
        self.assertEqual(
            batch_spec.labels['bespin-job-step'],
            'record_output_project')  # store the job step in a label
        job_container = batch_spec.container
        self.assertEqual(job_container.name,
                         'record-output-project-51-jpb')  # container name
        self.assertEqual(
            job_container.image_name,
            self.mock_job.k8s_settings.record_output_project.image_name,
            'record output project image name is based on a config setting')
        self.assertEqual(job_container.command, ['sh'],
                         'record output project base command is sh')
        self.assertEqual(job_container.args,
                         ['/bespin/output-data/annotate_project_details.sh'],
                         'runs annotate_project_details script')
        self.assertEqual(
            job_container.env_dict['MY_POD_NAME'].field_path, 'metadata.name',
            'record output project receives pod name in MY_POD_NAME')
        self.assertEqual(len(job_container.volumes), 1)

        job_data_volume = job_container.volumes[0]
        self.assertEqual(job_data_volume.name, 'output-data-51-jpb')
        self.assertEqual(job_data_volume.mount_path, '/bespin/output-data')
        self.assertEqual(job_data_volume.volume_claim_name,
                         'output-data-51-jpb')
        self.assertEqual(job_data_volume.read_only, True)
예제 #13
0
    def test_read_record_output_project_details(self):
        mock_cluster_api = Mock()
        mock_pod = Mock()
        mock_pod.metadata.annotations = {
            'project_id': '123',
            'readme_file_id': '456'
        }
        mock_cluster_api.list_pods.return_value = [mock_pod]
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        project_id, readme_file_id = manager.read_record_output_project_details(
        )

        self.assertEqual(project_id, '123')
        self.assertEqual(readme_file_id, '456')
        mock_cluster_api.list_pods.assert_called_with(
            label_selector=
            'bespin-job=true,bespin-job-id=51,bespin-job-step=record_output_project'
        )
예제 #14
0
 def __init__(self, settings):
     super(K8sJobActions, self).__init__(settings)
     self.cluster_api = settings.get_cluster_api()
     self.bespin_job = self.job_api.get_job()
     self.manager = JobManager(self.cluster_api, settings.config,
                               self.bespin_job)
예제 #15
0
class K8sJobActions(BaseJobActions):
    """
    Used by K8sLando to handle messages at a job specific context.
    """
    def __init__(self, settings):
        super(K8sJobActions, self).__init__(settings)
        self.cluster_api = settings.get_cluster_api()
        self.bespin_job = self.job_api.get_job()
        self.manager = JobManager(self.cluster_api, settings.config,
                                  self.bespin_job)

    def _set_job_state(self, state):
        # Keep cached bespin_job state up to date
        super(K8sJobActions, self)._set_job_state(state)
        self.bespin_job.state = state

    def _set_job_step(self, step):
        # Keep cached bespin_job step up to date
        super(K8sJobActions, self)._set_job_step(step)
        self.bespin_job.step = step

    def job_is_at_state_and_step(self, state, step):
        return self.bespin_job.state == state and self.bespin_job.step == step

    def start_job(self, payload):
        """
        Request from user to start running a job. This starts a job to stage user input data into a volume.
        :param payload:StartJobPayload contains job_id we should start
        """
        self._set_job_state(JobStates.RUNNING)
        self._set_job_step(JobSteps.CREATE_VM)

        input_files = self.job_api.get_input_files()
        input_files_size_in_g = self._calculate_input_data_size_in_g(
            input_files)
        # The stage data volume contains the workflow, job order, file metadata, and the user's input files.
        stage_data_volume_size_in_g = self.config.base_stage_data_volume_size_in_g + input_files_size_in_g
        self._show_status("Creating stage data persistent volumes")
        self.manager.create_stage_data_persistent_volumes(
            stage_data_volume_size_in_g)

        self.perform_staging_step(input_files)

    @staticmethod
    def _calculate_input_data_size_in_g(input_files):
        total_bytes = 0
        for dds_file in input_files.dds_files:
            total_bytes += dds_file.size
        for url_file in input_files.url_files:
            total_bytes += url_file.size
        return math.ceil(float(total_bytes) / (1024.0 * 1024.0 * 1024.0))

    def perform_staging_step(self, input_files):
        self._set_job_step(JobSteps.STAGING)
        self._show_status("Creating Stage data job")
        job = self.manager.create_stage_data_job(input_files)
        self._show_status("Launched stage data job: {}".format(
            job.metadata.name))

    def stage_job_complete(self, payload):
        """
        Message from worker that a the staging job step is complete and successful.
        Sets the job state to RUNNING and puts the run job message into the queue for the worker.
        :param payload: JobStepCompletePayload: contains job id and vm_instance_name(unused)
        """
        if not self.job_is_at_state_and_step(JobStates.RUNNING,
                                             JobSteps.STAGING):
            # ignore request to perform incompatible step
            logging.info(
                "Ignoring request to run job:{} wrong step/state".format(
                    self.job_id))
            return
        self._set_job_step(JobSteps.RUNNING)
        self._show_status("Cleaning up after stage data")
        self.manager.cleanup_stage_data_job()

        self._show_status("Creating volumes for running workflow")
        self.manager.create_run_workflow_persistent_volumes()

        self.run_workflow_job()

    def run_workflow_job(self):
        self._show_status("Creating run workflow job")
        job = self.manager.create_run_workflow_job()
        self._show_status("Launched run workflow job: {}".format(
            job.metadata.name))

    def run_job_complete(self, payload):
        """
        Message from worker that a the run job step is complete and successful.
        Sets the job state to STORING_OUTPUT and puts the store output message into the queue for the worker.
        :param payload: JobStepCompletePayload: contains job id and vm_instance_name(unused)
        """
        if not self.job_is_at_state_and_step(JobStates.RUNNING,
                                             JobSteps.RUNNING):
            # ignore request to perform incompatible step
            logging.info(
                "Ignoring request to store output for job:{} wrong step/state".
                format(self.job_id))
            return
        self.manager.cleanup_run_workflow_job()
        self.organize_output_project()

    def organize_output_project(self):
        self._set_job_step(JobSteps.ORGANIZE_OUTPUT_PROJECT)
        self._show_status("Creating organize output project job")
        methods_document = self.job_api.get_workflow_methods_document(
            self.bespin_job.workflow.methods_document)
        methods_content = None
        if methods_document:
            methods_content = methods_document.content
        job = self.manager.create_organize_output_project_job(methods_content)
        self._show_status("Launched organize output project job: {}".format(
            job.metadata.name))

    def organize_output_complete(self, payload):
        if not self.job_is_at_state_and_step(JobStates.RUNNING,
                                             JobSteps.ORGANIZE_OUTPUT_PROJECT):
            # ignore request to perform incompatible step
            logging.info(
                "Ignoring request to organize output project for job:{} wrong step/state"
                .format(self.job_id))
            return
        self.manager.cleanup_organize_output_project_job()
        self.save_output()

    def save_output(self):
        store_output_data = self.job_api.get_store_output_job_data()
        # get_store_output_job_data
        self._set_job_step(JobSteps.STORING_JOB_OUTPUT)
        self._show_status("Creating store output job")
        job = self.manager.create_save_output_job(
            store_output_data.share_dds_ids)
        self._show_status("Launched save output job: {}".format(
            job.metadata.name))

    def store_job_output_complete(self, payload):
        """
        Message from worker that a the store output job step is complete and successful.
        Records information about the resulting output project and frees cloud resources.
        :param payload: JobStepCompletePayload: contains job id and vm_instance_name(unused)
        """
        if not self.job_is_at_state_and_step(JobStates.RUNNING,
                                             JobSteps.STORING_JOB_OUTPUT):
            # ignore request to perform incompatible step
            logging.info(
                "Ignoring request to cleanup for job:{} wrong step/state".
                format(self.job_id))
            return

        self.manager.cleanup_save_output_job()
        self._set_job_step(JobSteps.RECORD_OUTPUT_PROJECT)
        self._show_status("Creating record output project job")
        job = self.manager.create_record_output_project_job()
        self._show_status("Launched record output project job: {}".format(
            job.metadata.name))

    def record_output_project_complete(self, payload):
        """
        Records the output project id and readme file id that based on the store output pod logs.
        """
        if not self.job_is_at_state_and_step(JobStates.RUNNING,
                                             JobSteps.RECORD_OUTPUT_PROJECT):
            # ignore request to perform incompatible step
            logging.info(
                "Ignoring request to cleanup for job:{} wrong step/state".
                format(self.job_id))
            return
        project_id, readme_file_id = self.manager.read_record_output_project_details(
        )
        self._show_status("Saving project id {} and readme id {}.".format(
            project_id, readme_file_id))
        self.job_api.save_project_details(project_id, readme_file_id)
        self.manager.cleanup_record_output_project_job()

        self._show_status("Marking job finished")
        self._set_job_step(JobSteps.NONE)
        self._set_job_state(JobStates.FINISHED)

    def restart_job(self, payload):
        """
        Request from user to resume running a job. It will resume based on the value of bespin_job.step
        returned from the job api. Canceled jobs will always restart from the beginning
        :param payload:RestartJobPayload contains job_id we should restart
        """
        full_restart = False
        if self.bespin_job.state != JobStates.CANCELED:
            self.manager.cleanup_jobs_and_config_maps()
            if self.bespin_job.step == JobSteps.STAGING:
                self._set_job_state(JobStates.RUNNING)
                input_files = self.job_api.get_input_files()
                self.perform_staging_step(input_files)
            elif self.bespin_job.step == JobSteps.RUNNING:
                self._set_job_state(JobStates.RUNNING)
                self.run_workflow_job()
            elif self.bespin_job.step == JobSteps.ORGANIZE_OUTPUT_PROJECT:
                self._set_job_state(JobStates.RUNNING)
                self.organize_output_project()
            elif self.bespin_job.step == JobSteps.STORING_JOB_OUTPUT:
                self._set_job_state(JobStates.RUNNING)
                self.save_output()
            elif self.bespin_job.step == JobSteps.RECORD_OUTPUT_PROJECT:
                self.cannot_restart_step_error(
                    step_name="record output project")
            else:
                full_restart = True
        else:
            full_restart = True

        if full_restart:
            self.manager.cleanup_all()
            self.start_job(None)

    def cancel_job(self, payload):
        """
        Request from user to cancel a running a job.
        Sets status to canceled and terminates the associated jobs, configmaps and pvcs
        :param payload: CancelJobPayload: contains job id we should cancel
        """
        self._set_job_step(JobSteps.NONE)
        self._set_job_state(JobStates.CANCELED)
        self._show_status("Canceling job")
        self.manager.cleanup_all()

    def stage_job_error(self, payload):
        """
        Message from watcher that the staging job had an error
        :param payload:JobStepErrorPayload: info about error
        """
        self._job_step_failed("Staging job failed", payload)

    def run_job_error(self, payload):
        """
        Message from watcher that the run workflow job had an error
        :param payload:JobStepErrorPayload: info about error
        """
        self._job_step_failed("Running job failed", payload)

    def organize_output_error(self, payload):
        """
        Message from watcher that the organize output project job had an error
        :param payload:JobStepErrorPayload: info about error
        """
        self._job_step_failed("Organize output job failed", payload)

    def store_job_output_error(self, payload):
        """
        Message from watcher that the store output project job had an error
        :param payload:JobStepErrorPayload: info about error
        """
        self._job_step_failed("Storing job output failed", payload)

    def record_output_project_error(self, payload):
        self._job_step_failed("Recording output project failed", payload)

    def _job_step_failed(self, message, payload):
        self._set_job_state(JobStates.ERRORED)
        self._show_status(message)
        self._log_error(message=payload.message)
예제 #16
0
    def test_create_stage_data_job_packed_workflow(self):
        mock_cluster_api = Mock()
        mock_config = Mock()
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)
        mock_input_files = Mock(
            dds_files=[Mock(destination_path='file1.txt', file_id='myid')])
        manager.create_stage_data_job(input_files=mock_input_files)

        # it should have created a config map of what needs to be staged
        config_map_payload = {
            'stagedata.json':
            json.dumps({
                "items": [{
                    "type": "url",
                    "source": "someurl.cwl",
                    "dest": "/bespin/job-data/workflow/someurl.cwl"
                }, {
                    "type": "write",
                    "source": {
                        "threads": 2
                    },
                    "dest": "/bespin/job-data/job-order.json"
                }, {
                    "type": "DukeDS",
                    "source": "myid",
                    "dest": "/bespin/job-data/file1.txt"
                }]
            })
        }
        mock_cluster_api.create_config_map.assert_called_with(
            name='stage-data-51-jpb',
            data=config_map_payload,
            labels=self.expected_metadata_labels)

        # it should have created a job
        args, kwargs = mock_cluster_api.create_job.call_args
        name, batch_spec = args
        self.assertEqual(name, 'stage-data-51-jpb')  # job name
        self.assertEqual(batch_spec.name, 'stage-data-51-jpb')  # job spec name
        self.assertEqual(batch_spec.labels['bespin-job-id'],
                         '51')  # Bespin job id stored in a label
        self.assertEqual(batch_spec.labels['bespin-job-step'],
                         'stage_data')  # store the job step in a label
        job_container = batch_spec.container
        self.assertEqual(job_container.name,
                         'stage-data-51-jpb')  # container name
        self.assertEqual(job_container.image_name,
                         self.mock_job.k8s_settings.stage_data.image_name,
                         'stage data image name is based on a job setting')
        self.assertEqual(job_container.command,
                         self.mock_job.k8s_settings.stage_data.base_command,
                         'stage data command is based on a job setting')
        self.assertEqual(
            job_container.args, [
                '/bespin/config/stagedata.json',
                '/bespin/job-data/workflow-input-files-metadata.json'
            ], 'stage data command should receive config file as an argument')
        self.assertEqual(
            job_container.env_dict,
            {'DDSCLIENT_CONF': '/etc/ddsclient/config'},
            'DukeDS environment variable should point to the config mapped config file'
        )
        self.assertEqual(
            job_container.requested_cpu,
            self.mock_job.k8s_settings.stage_data.cpus,
            'stage data requested cpu is based on a config setting')
        self.assertEqual(
            job_container.requested_memory,
            self.mock_job.k8s_settings.stage_data.memory,
            'stage data requested memory is based on a config setting')
        self.assertEqual(len(job_container.volumes), 3)

        user_data_volume = job_container.volumes[0]
        self.assertEqual(user_data_volume.name, 'job-data-51-jpb')
        self.assertEqual(user_data_volume.mount_path, '/bespin/job-data')
        self.assertEqual(user_data_volume.volume_claim_name, 'job-data-51-jpb')
        self.assertEqual(user_data_volume.read_only, False)

        config_map_volume = job_container.volumes[1]
        self.assertEqual(config_map_volume.name, 'stage-data-51-jpb')
        self.assertEqual(config_map_volume.mount_path, '/bespin/config')
        self.assertEqual(config_map_volume.config_map_name,
                         'stage-data-51-jpb')
        self.assertEqual(config_map_volume.source_key, 'stagedata.json')
        self.assertEqual(config_map_volume.source_path, 'stagedata.json')

        secret_volume = job_container.volumes[2]
        self.assertEqual(secret_volume.name, 'data-store-51-jpb')
        self.assertEqual(secret_volume.mount_path, '/etc/ddsclient')
        self.assertEqual(secret_volume.secret_name,
                         mock_config.data_store_settings.secret_name,
                         'name of DukeDS secret is based on a config setting')
예제 #17
0
    def test_create_run_workflow_job(self):
        mock_cluster_api = Mock()
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        manager.create_run_workflow_job()

        # it should have created a job to run the workflow with several volumes mounted
        args, kwargs = mock_cluster_api.create_job.call_args
        name, batch_spec = args
        self.assertEqual(name, 'run-workflow-51-jpb')  # job name
        self.assertEqual(batch_spec.name,
                         'run-workflow-51-jpb')  # job spec name
        self.assertEqual(batch_spec.labels['bespin-job-id'],
                         '51')  # Bespin job id stored in a label
        self.assertEqual(batch_spec.labels['bespin-job-step'],
                         'run_workflow')  # store the job step in a label
        job_container = batch_spec.container
        self.assertEqual(job_container.name,
                         'run-workflow-51-jpb')  # container name
        self.assertEqual(job_container.image_name,
                         self.mock_job.k8s_settings.run_workflow.image_name,
                         'run workflow image name is based on job settings')
        expected_bash_command = 'cwltool --cachedir /bespin/output-data/tmpout/ ' \
                                '--outdir /bespin/output-data/results/ ' \
                                '--max-ram 1G --max-cores 2 ' \
                                '--usage-report /bespin/output-data/job-51-jpb-resource-usage.json ' \
                                '--stdout /bespin/output-data/bespin-workflow-output.json ' \
                                '--stderr /bespin/output-data/bespin-workflow-output.log ' \
                                '/bespin/job-data/workflow/someurl.cwl#main ' \
                                '/bespin/job-data/job-order.json'.split(' ')
        self.assertEqual(
            job_container.command, expected_bash_command,
            'run workflow command combines job settings and staged files')
        self.assertEqual(
            job_container.env_dict['CALRISSIAN_POD_NAME'].field_path,
            'metadata.name',
            'We should store the pod name in a CALRISSIAN_POD_NAME environment variable'
        )
        self.assertEqual(
            job_container.requested_cpu,
            self.mock_job.k8s_settings.run_workflow.cpus,
            'run workflow requested cpu is based on a job setting')
        self.assertEqual(
            job_container.requested_memory,
            self.mock_job.k8s_settings.run_workflow.memory,
            'run workflow requested memory is based on a job setting')

        self.assertEqual(len(job_container.volumes), 3)

        job_data_volume = job_container.volumes[0]
        self.assertEqual(job_data_volume.name, 'job-data-51-jpb')
        self.assertEqual(job_data_volume.mount_path, '/bespin/job-data')
        self.assertEqual(job_data_volume.volume_claim_name, 'job-data-51-jpb')
        self.assertEqual(job_data_volume.read_only, True,
                         'job data should be a read only volume')

        output_data_volume = job_container.volumes[1]
        self.assertEqual(output_data_volume.name, 'output-data-51-jpb')
        self.assertEqual(output_data_volume.mount_path, '/bespin/output-data')
        self.assertEqual(output_data_volume.volume_claim_name,
                         'output-data-51-jpb')
        self.assertEqual(output_data_volume.read_only, False)

        system_data_volume = job_container.volumes[2]
        self.assertEqual(system_data_volume.name, 'system-data-51-jpb')
        self.assertEqual(
            system_data_volume.mount_path,
            mock_config.run_workflow_settings.system_data_volume.mount_path,
            'mount path for the system volume is based on a config setting')
        self.assertEqual(
            system_data_volume.volume_claim_name, mock_config.
            run_workflow_settings.system_data_volume.volume_claim_name,
            'pvc name for the system volume is based on a config setting')
        self.assertEqual(
            system_data_volume.read_only, True,
            'system data should be read only for running workflow')
예제 #18
0
    def test_create_organize_output_project_job(self):
        mock_cluster_api = Mock()
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        manager.create_organize_output_project_job(
            methods_document_content='markdown')

        # it should have created a job to run the workflow with several volumes mounted
        args, kwargs = mock_cluster_api.create_job.call_args
        name, batch_spec = args
        self.assertEqual(name, 'organize-output-51-jpb')  # job name
        self.assertEqual(batch_spec.name,
                         'organize-output-51-jpb')  # job spec name
        self.assertEqual(batch_spec.labels['bespin-job-id'],
                         '51')  # Bespin job id stored in a label
        self.assertEqual(batch_spec.labels['bespin-job-step'],
                         'organize_output')  # store the job step in a label
        job_container = batch_spec.container
        self.assertEqual(job_container.name,
                         'organize-output-51-jpb')  # container name
        self.assertEqual(
            job_container.image_name,
            self.mock_job.k8s_settings.organize_output.image_name,
            'organize output image name is based on job settings')
        self.assertEqual(
            job_container.command,
            self.mock_job.k8s_settings.organize_output.base_command,
            'organize output command is based on job settings')
        self.assertEqual(
            job_container.requested_cpu,
            self.mock_job.k8s_settings.organize_output.cpus,
            'organize output requested cpu is based on a job setting')
        self.assertEqual(
            job_container.requested_memory,
            self.mock_job.k8s_settings.organize_output.memory,
            'organize output requested memory is based on a job setting')

        mock_cluster_api.create_config_map.assert_called_with(
            name='organize-output-51-jpb',
            data={
                'organizeoutput.json':
                json.dumps({
                    "bespin_job_id":
                    '51',
                    "destination_dir":
                    "/bespin/output-data/results",
                    "downloaded_workflow_path":
                    "/bespin/job-data/workflow/someurl.cwl",
                    "workflow_to_read":
                    "/bespin/job-data/workflow/someurl.cwl",
                    "workflow_type":
                    "packed",
                    "job_order_path":
                    "/bespin/job-data/job-order.json",
                    "bespin_workflow_stdout_path":
                    "/bespin/output-data/bespin-workflow-output.json",
                    "bespin_workflow_stderr_path":
                    "/bespin/output-data/bespin-workflow-output.log",
                    "methods_template":
                    "markdown",
                    "additional_log_files":
                    ["/bespin/output-data/job-51-jpb-resource-usage.json"]
                })
            },
            labels={
                'bespin-job': 'true',
                'bespin-job-id': '51'
            })

        self.assertEqual(len(job_container.volumes), 3)

        job_data_volume = job_container.volumes[0]
        self.assertEqual(job_data_volume.name, 'job-data-51-jpb')
        self.assertEqual(job_data_volume.mount_path, '/bespin/job-data')
        self.assertEqual(job_data_volume.volume_claim_name, 'job-data-51-jpb')
        self.assertEqual(job_data_volume.read_only, True,
                         'job data should be a read only volume')

        output_data_volume = job_container.volumes[1]
        self.assertEqual(output_data_volume.name, 'output-data-51-jpb')
        self.assertEqual(output_data_volume.mount_path, '/bespin/output-data')
        self.assertEqual(output_data_volume.volume_claim_name,
                         'output-data-51-jpb')
        self.assertEqual(output_data_volume.read_only, False)

        config_map_volume = job_container.volumes[2]
        self.assertEqual(config_map_volume.name, 'organize-output-51-jpb')
        self.assertEqual(config_map_volume.mount_path, '/bespin/config')
        self.assertEqual(config_map_volume.config_map_name,
                         'organize-output-51-jpb')
        self.assertEqual(config_map_volume.source_key, 'organizeoutput.json')
        self.assertEqual(config_map_volume.source_path, 'organizeoutput.json')
예제 #19
0
    def test_create_save_output_job(self):
        mock_cluster_api = Mock()
        mock_config = Mock(storage_class_name='nfs')
        manager = JobManager(cluster_api=mock_cluster_api,
                             config=mock_config,
                             job=self.mock_job)

        manager.create_save_output_job(share_dds_ids=['123', '456'])

        # it should have created a config map of what needs to be staged
        config_map_payload = {
            'saveoutput.json':
            json.dumps({
                "destination": "Bespin myworkflow v1 myjob 2019-03-11",
                "readme_file_path": "results/docs/README.md",
                "paths": ["/bespin/output-data/results"],
                "share": {
                    "dds_user_ids": ["123", "456"]
                },
                "activity": {
                    "name":
                    "myjob - Bespin Job 51",
                    "description":
                    "Bespin Job 51 - Workflow myworkflow v1",
                    "started_on":
                    "",
                    "ended_on":
                    "",
                    "input_file_versions_json_path":
                    "/bespin/job-data/workflow-input-files-metadata.json",
                    "workflow_output_json_path":
                    "/bespin/output-data/bespin-workflow-output.json"
                }
            })
        }
        mock_cluster_api.create_config_map.assert_called_with(
            name='save-output-51-jpb',
            data=config_map_payload,
            labels=self.expected_metadata_labels)

        # it should have created a job
        args, kwargs = mock_cluster_api.create_job.call_args
        name, batch_spec = args
        self.assertEqual(name, 'save-output-51-jpb')  # job name
        self.assertEqual(batch_spec.name,
                         'save-output-51-jpb')  # job spec name
        self.assertEqual(batch_spec.labels['bespin-job-id'],
                         '51')  # Bespin job id stored in a label
        self.assertEqual(batch_spec.labels['bespin-job-step'],
                         'save_output')  # store the job step in a label
        job_container = batch_spec.container
        self.assertEqual(job_container.name,
                         'save-output-51-jpb')  # container name
        self.assertEqual(job_container.image_name,
                         self.mock_job.k8s_settings.save_output.image_name,
                         'save output image name is based on a job setting')
        self.assertEqual(job_container.command,
                         self.mock_job.k8s_settings.save_output.base_command,
                         'save output command is based on a job setting')
        self.assertEqual(
            job_container.args, [
                '/bespin/config/saveoutput.json',
                '/bespin/output-data/annotate_project_details.sh'
            ],
            'save output command should receive config file and output filenames as arguments'
        )
        self.assertEqual(
            job_container.env_dict,
            {'DDSCLIENT_CONF': '/etc/ddsclient/config'},
            'DukeDS environment variable should point to the config mapped config file'
        )
        self.assertEqual(job_container.requested_cpu,
                         self.mock_job.k8s_settings.save_output.cpus,
                         'stage data requested cpu is based on a job setting')
        self.assertEqual(
            job_container.requested_memory,
            self.mock_job.k8s_settings.save_output.memory,
            'stage data requested memory is based on a job setting')
        self.assertEqual(len(job_container.volumes), 4)

        job_data_volume = job_container.volumes[0]
        self.assertEqual(job_data_volume.name, 'job-data-51-jpb')
        self.assertEqual(job_data_volume.mount_path, '/bespin/job-data')
        self.assertEqual(job_data_volume.volume_claim_name, 'job-data-51-jpb')
        self.assertEqual(job_data_volume.read_only, True)

        job_data_volume = job_container.volumes[1]
        self.assertEqual(job_data_volume.name, 'output-data-51-jpb')
        self.assertEqual(job_data_volume.mount_path, '/bespin/output-data')
        self.assertEqual(job_data_volume.volume_claim_name,
                         'output-data-51-jpb')
        self.assertEqual(
            job_data_volume.read_only,
            False)  # writable so we can write project_details file

        config_map_volume = job_container.volumes[2]
        self.assertEqual(config_map_volume.name, 'stage-data-51-jpb')
        self.assertEqual(config_map_volume.mount_path, '/bespin/config')
        self.assertEqual(config_map_volume.config_map_name,
                         'save-output-51-jpb')
        self.assertEqual(config_map_volume.source_key, 'saveoutput.json')
        self.assertEqual(config_map_volume.source_path, 'saveoutput.json')

        secret_volume = job_container.volumes[3]
        self.assertEqual(secret_volume.name, 'data-store-51-jpb')
        self.assertEqual(secret_volume.mount_path, '/etc/ddsclient')
        self.assertEqual(secret_volume.secret_name,
                         mock_config.data_store_settings.secret_name,
                         'name of DukeDS secret is based on a config setting')