예제 #1
0
    def _retrieve_logs(self):
        from foundations_contrib.global_state import config_manager
        from foundations_core_cli.job_submission.config import load
        from foundations_internal.change_directory import ChangeDirectory
        import os

        arguments = self._cli.arguments()

        env_name = arguments.scheduler_config
        job_id = arguments.job_id
        current_directory = os.getcwd()

        with ChangeDirectory(current_directory):
            load(arguments.scheduler_config or "scheduler")

        job_deployment_class = config_manager["deployment_implementation"][
            "deployment_type"
        ]
        job_deployment = job_deployment_class(job_id, None, None)

        job_status = job_deployment.get_job_status()

        if job_status is None:
            self._cli._fail_with_message(
                "Error: Job `{}` does not exist for environment `{}`".format(
                    job_id, env_name
                )
            )
        elif job_status == "queued":
            self._cli._fail_with_message(
                "Error: Job `{}` is queued and has not produced any logs".format(job_id)
            )
        else:
            logs = job_deployment.get_job_logs()
            print(logs)
    def test_loads_config_into_config_manager_when_config_present(self):
        from foundations_local_docker_scheduler_plugin.config.scheduler import translate

        self._set_up_config()
        load(self.config_name)
        self.mock_config_listing.update_config_manager_with_config.assert_called_with(
            self.config_name, translate)
예제 #3
0
    def _stop(self):
        from foundations_contrib.global_state import config_manager
        from foundations_core_cli.job_submission.config import load
        from foundations_internal.change_directory import ChangeDirectory
        import os

        arguments = self._cli.arguments()

        env_name = arguments.scheduler_config
        job_id = arguments.job_id
        current_directory = os.getcwd()

        with ChangeDirectory(current_directory):
            load(arguments.scheduler_config or "scheduler")

        job_deployment_class = config_manager["deployment_implementation"][
            "deployment_type"
        ]
        job_deployment = job_deployment_class(job_id, None, None)

        try:
            job_status = job_deployment.get_job_status()

            if job_status is None:
                self._cli._fail_with_message(
                    "Error: Job `{}` does not exist for environment `{}`".format(
                        job_id, env_name
                    )
                )
            elif job_status == "queued":
                self._cli._fail_with_message(
                    "Error: Job `{}` is queued and cannot be stopped".format(job_id)
                )
            elif job_status == "completed":
                self._cli._fail_with_message(
                    "Error: Job `{}` is completed and cannot be stopped".format(job_id)
                )
            else:
                if job_deployment.stop_running_job():
                    print("Stopped running job {}".format(job_id))
                else:
                    print("Error stopping job {}".format(job_id))
        except AttributeError:
            print("The specified scheduler does not support this functionality")
예제 #4
0
    def _delete_job(self):
        from foundations_contrib.global_state import config_manager
        from foundations_core_cli.job_submission.config import load
        from foundations_internal.change_directory import ChangeDirectory
        import os

        arguments = self._cli.arguments()

        env_name = arguments.scheduler_config
        job_id = arguments.job_id
        current_directory = os.getcwd()

        with ChangeDirectory(current_directory):
            load(arguments.scheduler_config or "scheduler")

        job_deployment_class = config_manager["deployment_implementation"][
            "deployment_type"
        ]
        job_deployment = job_deployment_class(job_id, None, None)

        job_status = job_deployment.get_job_status()

        if job_status is None:
            self._cli._fail_with_message(
                "Error: Job `{}` does not exist for environment `{}`".format(
                    job_id, env_name
                )
            )
        elif job_status in ("queued", "running", "pending"):
            self._cli._fail_with_message(
                "Error: Job `{}` has status `{}` and cannot be deleted".format(
                    job_id, job_status
                )
            )
        else:
            if job_deployment.cancel_jobs([job_id])[job_id]:
                print(f"Job {job_id} successfully deleted")
            else:
                print(
                    f"Could not completely delete job {job_id}. Please make sure that the job bundle exists under ~/.foundations/job_data/"
                )
예제 #5
0
    def _clear_queue(self):
        from foundations_contrib.global_state import config_manager
        from foundations_core_cli.job_submission.config import load
        from foundations_internal.change_directory import ChangeDirectory
        import os

        arguments = self._cli.arguments()

        current_directory = os.getcwd()

        with ChangeDirectory(current_directory):
            load(arguments.scheduler_config or "scheduler")

        job_deployment_class = config_manager["deployment_implementation"][
            "deployment_type"
        ]

        try:
            num_jobs_dequeued = job_deployment_class.clear_queue()
            print("Removed {} job(s) from queue".format(num_jobs_dequeued))
        except AttributeError:
            print("The specified scheduler does not support this functionality")
예제 #6
0
    def _retrieve_artifacts(self):
        from foundations_contrib.global_state import config_manager
        from foundations_core_cli.job_submission.config import load
        from foundations_internal.change_directory import ChangeDirectory
        import os

        arguments = self._cli.arguments()

        env_name = arguments.scheduler_config
        job_id = arguments.job_id
        current_directory = os.getcwd()

        if arguments.save_dir is None:
            arguments.save_dir = os.path.join(current_directory, str(job_id))

        with ChangeDirectory(current_directory):
            load(arguments.scheduler_config or "scheduler")

        job_deployment_class = config_manager["deployment_implementation"][
            "deployment_type"
        ]
        job_deployment = job_deployment_class(job_id, None, None)

        job_status = job_deployment.get_job_status()

        if job_status is None:
            self._cli._fail_with_message(
                "Error: Job `{}` does not exist for environment `{}`".format(
                    job_id, env_name
                )
            )
        else:
            if job_deployment.get_job_archive():
                print(f"Successfully retrieved Job {job_id} from archive store")
            else:
                print(f"Error: Could not download Job {job_id}")
예제 #7
0
def submit(arguments):
    from foundations_core_cli.job_submission.config import load
    from foundations_core_cli.job_submission.deployment import deploy
    from foundations_core_cli.job_submission.logs import stream_job_logs
    from foundations_internal.change_directory import ChangeDirectory
    from foundations_contrib.global_state import config_manager, log_manager
    from foundations_contrib.set_job_resources import set_job_resources
    from jsonschema import validate
    import os
    import os.path

    current_directory = os.getcwd()
    with ChangeDirectory(arguments.job_directory or current_directory):
        load(arguments.scheduler_config or 'scheduler')

        job_config = {}
        if os.path.exists('job.config.yaml'):
            with open('job.config.yaml') as file:
                job_config = yaml.load(file.read(), Loader=yaml.FullLoader)

        # validate(instance=job_config, schema=_job_schema)

        job_resource_args = {}

        if 'log_level' in job_config:
            config_manager['log_level'] = job_config['log_level']
        if 'worker' in job_config:
            config_manager['worker_container_overrides'].update(
                job_config['worker'])
        if 'num_gpus' in job_config:
            job_resource_args['num_gpus'] = job_config['num_gpus']
        if 'ram' in job_config:
            job_resource_args['ram'] = job_config['ram']

        logger = log_manager.get_logger(__name__)

        if arguments.command:
            config_manager['worker_container_overrides'][
                'args'] = arguments.command
            if not os.path.exists(arguments.command[0]):
                logger.warning(
                    f"Hey, seems like your command '{arguments.command[0]}' is not an existing file in your current directory. If you are using Atlas's advanced custom docker image functionality and know what you are doing, you can ignore this message."
                )
        else:
            logger.warning('No command was specified.')

        if arguments.num_gpus is not None:
            job_resource_args['num_gpus'] = arguments.num_gpus
        if arguments.ram is not None:
            job_resource_args['ram'] = arguments.ram
        set_job_resources(**job_resource_args)

        from foundations.global_state import current_foundations_context
        try:
            cur_job_id = current_foundations_context().pipeline_context(
            ).file_name
        except ValueError:
            cur_job_id = None

        deployment = deploy(
            arguments.project_name or job_config.get('project_name'),
            arguments.entrypoint or job_config.get('entrypoint'),
            arguments.params or job_config.get('params'))

        if arguments.stream_job_logs:
            try:
                stream_job_logs(deployment)
            except KeyboardInterrupt:
                pass

        if cur_job_id is not None:
            current_foundations_context().pipeline_context(
            ).file_name = cur_job_id

        return deployment
 def test_does_not_print_error_when_config_present(self):
     self._set_up_config()
     load(self.config_name)
     self.print_mock.assert_not_called()
 def test_prints_warning_message_when_config_missing(self):
     load(self.config_name)
     self.print_mock.assert_called_with(
         f"Could not find submission configuration with name: `{self.config_name}`"
     )
 def test_exits_when_config_missing(self):
     load(self.config_name)
     self.exit_mock.assert_called_with(1)