예제 #1
0
    def _retrieve_logs(self):
        from foundations_contrib.global_state import config_manager
        from foundations_core_cli.job_submission.config import load
        from foundations_internal.change_directory import ChangeDirectory
        import os

        arguments = self._cli.arguments()

        env_name = arguments.scheduler_config
        job_id = arguments.job_id
        current_directory = os.getcwd()

        with ChangeDirectory(current_directory):
            load(arguments.scheduler_config or "scheduler")

        job_deployment_class = config_manager["deployment_implementation"][
            "deployment_type"
        ]
        job_deployment = job_deployment_class(job_id, None, None)

        job_status = job_deployment.get_job_status()

        if job_status is None:
            self._cli._fail_with_message(
                "Error: Job `{}` does not exist for environment `{}`".format(
                    job_id, env_name
                )
            )
        elif job_status == "queued":
            self._cli._fail_with_message(
                "Error: Job `{}` is queued and has not produced any logs".format(job_id)
            )
        else:
            logs = job_deployment.get_job_logs()
            print(logs)
    def unbundle(self, path_to_save):
        import tarfile
        from distutils.dir_util import mkpath
        from foundations_internal.change_directory import ChangeDirectory

        with tarfile.open(self.job_archive(), "r:gz") as tar:
            mkpath(path_to_save)
            with ChangeDirectory(path_to_save):
                tar.extractall()
    def test_reset_clears_config_paths(self):
        from foundations_internal.change_directory import ChangeDirectory

        config_manager = ConfigManager()

        with ChangeDirectory('test/fixtures/single_config'):
            config_manager.config()

        config_manager.reset()
        self.assertEqual([], config_manager.config_paths())
    def test_load_multiple_config_from_yaml(self):
        from foundations_internal.change_directory import ChangeDirectory

        with ChangeDirectory('test/fixtures/multiple_configs'):
            config = ConfigManager().config()
            self._assert_is_subset(
                {
                    'title': 'test config',
                    'value': 'different value'
                }, config)
    def test_load_config_from_yaml(self):
        from foundations_internal.change_directory import ChangeDirectory

        with ChangeDirectory('test/fixtures/single_config'):
            config = ConfigManager().config()
            self._assert_is_subset(
                {
                    'title': 'test config',
                    'value': 'this exists as a test'
                }, config)
예제 #6
0
    def _test_command_that_loads_parameters_in_directory(
            self, command, script_directory, expected_loaded_parameters):
        from foundations_internal.change_directory import ChangeDirectory

        import subprocess
        import json
        import os
        import os.path as path

        with self.unset_foundations_home():
            env = self._update_environment_with_home_directory() if os.getenv(
                'RUNNING_ON_CI', False) else {}
            env = {**os.environ, **env}

            with ChangeDirectory(script_directory):
                completed_process = subprocess.run(command,
                                                   stdout=subprocess.PIPE,
                                                   env=env)
                process_output = completed_process.stdout.decode().strip(
                ).split('\n')
                print(process_output)

            if os.getenv('RUNNING_ON_CI', False):
                import re
                from foundations_local_docker_scheduler_plugin.job_deployment import JobDeployment
                from foundations_contrib.global_state import config_manager

                job_id_regex = re.search('Job \'(.+?)\' has completed.',
                                         process_output[-1])
                self.assertIsNotNone(job_id_regex)
                job_id = job_id_regex.group(1)

                # Creating a fake job deployment as a quick interface to grab its logs
                config_manager.config(
                )['scheduler_url'] = f"http://{os.environ['LOCAL_DOCKER_SCHEDULER_HOST']}:5000"
                job = JobDeployment(job_id, None, None)
                process_output = job.get_job_logs().split('\n')

            params_json = process_output[-2]
            job_id = process_output[-3]
            project_name = self.project_name
            result_parameters = json.loads(params_json)

            self.assertEqual(expected_loaded_parameters, result_parameters)
            self._assert_flattened_parameter_keys_in_project_job_parameter_names_set(
                project_name, expected_loaded_parameters)
            self._assert_flattened_parameter_values_for_job_in_job_parameters(
                job_id, expected_loaded_parameters)
            self._assert_flattened_parameter_keys_in_project_input_parameter_names_set(
                project_name, expected_loaded_parameters)
            if expected_loaded_parameters:
                self._assert_flattened_parameter_names_for_job_in_job_input_parameters(
                    job_id, expected_loaded_parameters)
예제 #7
0
    def _stop(self):
        from foundations_contrib.global_state import config_manager
        from foundations_core_cli.job_submission.config import load
        from foundations_internal.change_directory import ChangeDirectory
        import os

        arguments = self._cli.arguments()

        env_name = arguments.scheduler_config
        job_id = arguments.job_id
        current_directory = os.getcwd()

        with ChangeDirectory(current_directory):
            load(arguments.scheduler_config or "scheduler")

        job_deployment_class = config_manager["deployment_implementation"][
            "deployment_type"
        ]
        job_deployment = job_deployment_class(job_id, None, None)

        try:
            job_status = job_deployment.get_job_status()

            if job_status is None:
                self._cli._fail_with_message(
                    "Error: Job `{}` does not exist for environment `{}`".format(
                        job_id, env_name
                    )
                )
            elif job_status == "queued":
                self._cli._fail_with_message(
                    "Error: Job `{}` is queued and cannot be stopped".format(job_id)
                )
            elif job_status == "completed":
                self._cli._fail_with_message(
                    "Error: Job `{}` is completed and cannot be stopped".format(job_id)
                )
            else:
                if job_deployment.stop_running_job():
                    print("Stopped running job {}".format(job_id))
                else:
                    print("Error stopping job {}".format(job_id))
        except AttributeError:
            print("The specified scheduler does not support this functionality")
예제 #8
0
    def _delete_job(self):
        from foundations_contrib.global_state import config_manager
        from foundations_core_cli.job_submission.config import load
        from foundations_internal.change_directory import ChangeDirectory
        import os

        arguments = self._cli.arguments()

        env_name = arguments.scheduler_config
        job_id = arguments.job_id
        current_directory = os.getcwd()

        with ChangeDirectory(current_directory):
            load(arguments.scheduler_config or "scheduler")

        job_deployment_class = config_manager["deployment_implementation"][
            "deployment_type"
        ]
        job_deployment = job_deployment_class(job_id, None, None)

        job_status = job_deployment.get_job_status()

        if job_status is None:
            self._cli._fail_with_message(
                "Error: Job `{}` does not exist for environment `{}`".format(
                    job_id, env_name
                )
            )
        elif job_status in ("queued", "running", "pending"):
            self._cli._fail_with_message(
                "Error: Job `{}` has status `{}` and cannot be deleted".format(
                    job_id, job_status
                )
            )
        else:
            if job_deployment.cancel_jobs([job_id])[job_id]:
                print(f"Job {job_id} successfully deleted")
            else:
                print(
                    f"Could not completely delete job {job_id}. Please make sure that the job bundle exists under ~/.foundations/job_data/"
                )
예제 #9
0
    def _clear_queue(self):
        from foundations_contrib.global_state import config_manager
        from foundations_core_cli.job_submission.config import load
        from foundations_internal.change_directory import ChangeDirectory
        import os

        arguments = self._cli.arguments()

        current_directory = os.getcwd()

        with ChangeDirectory(current_directory):
            load(arguments.scheduler_config or "scheduler")

        job_deployment_class = config_manager["deployment_implementation"][
            "deployment_type"
        ]

        try:
            num_jobs_dequeued = job_deployment_class.clear_queue()
            print("Removed {} job(s) from queue".format(num_jobs_dequeued))
        except AttributeError:
            print("The specified scheduler does not support this functionality")
예제 #10
0
    def _retrieve_artifacts(self):
        from foundations_contrib.global_state import config_manager
        from foundations_core_cli.job_submission.config import load
        from foundations_internal.change_directory import ChangeDirectory
        import os

        arguments = self._cli.arguments()

        env_name = arguments.scheduler_config
        job_id = arguments.job_id
        current_directory = os.getcwd()

        if arguments.save_dir is None:
            arguments.save_dir = os.path.join(current_directory, str(job_id))

        with ChangeDirectory(current_directory):
            load(arguments.scheduler_config or "scheduler")

        job_deployment_class = config_manager["deployment_implementation"][
            "deployment_type"
        ]
        job_deployment = job_deployment_class(job_id, None, None)

        job_status = job_deployment.get_job_status()

        if job_status is None:
            self._cli._fail_with_message(
                "Error: Job `{}` does not exist for environment `{}`".format(
                    job_id, env_name
                )
            )
        else:
            if job_deployment.get_job_archive():
                print(f"Successfully retrieved Job {job_id} from archive store")
            else:
                print(f"Error: Could not download Job {job_id}")
예제 #11
0
    def _test_command_that_loads_parameters_in_directory_for_python(
            self, command, script_directory, expected_loaded_parameters,
            check_for_warning):
        from foundations_internal.change_directory import ChangeDirectory

        import subprocess
        import json
        import os.path as path

        env = self._update_environment_with_home_directory()

        with ChangeDirectory(script_directory):
            env = None if check_for_warning else env
            completed_process = subprocess.run(command,
                                               stdout=subprocess.PIPE,
                                               env=env)
            process_output = completed_process.stdout.decode()

        warnings, _, params_json = process_output.strip().rpartition('\n')
        if check_for_warning:
            self.assertIn('Script not run with Foundations.', warnings)

        result_parameters = json.loads(params_json)
        self.assertEqual(expected_loaded_parameters, result_parameters)
예제 #12
0
def submit(arguments):
    from foundations_core_cli.job_submission.config import load
    from foundations_core_cli.job_submission.deployment import deploy
    from foundations_core_cli.job_submission.logs import stream_job_logs
    from foundations_internal.change_directory import ChangeDirectory
    from foundations_contrib.global_state import config_manager, log_manager
    from foundations_contrib.set_job_resources import set_job_resources
    from jsonschema import validate
    import os
    import os.path

    current_directory = os.getcwd()
    with ChangeDirectory(arguments.job_directory or current_directory):
        load(arguments.scheduler_config or 'scheduler')

        job_config = {}
        if os.path.exists('job.config.yaml'):
            with open('job.config.yaml') as file:
                job_config = yaml.load(file.read(), Loader=yaml.FullLoader)

        # validate(instance=job_config, schema=_job_schema)

        job_resource_args = {}

        if 'log_level' in job_config:
            config_manager['log_level'] = job_config['log_level']
        if 'worker' in job_config:
            config_manager['worker_container_overrides'].update(
                job_config['worker'])
        if 'num_gpus' in job_config:
            job_resource_args['num_gpus'] = job_config['num_gpus']
        if 'ram' in job_config:
            job_resource_args['ram'] = job_config['ram']

        logger = log_manager.get_logger(__name__)

        if arguments.command:
            config_manager['worker_container_overrides'][
                'args'] = arguments.command
            if not os.path.exists(arguments.command[0]):
                logger.warning(
                    f"Hey, seems like your command '{arguments.command[0]}' is not an existing file in your current directory. If you are using Atlas's advanced custom docker image functionality and know what you are doing, you can ignore this message."
                )
        else:
            logger.warning('No command was specified.')

        if arguments.num_gpus is not None:
            job_resource_args['num_gpus'] = arguments.num_gpus
        if arguments.ram is not None:
            job_resource_args['ram'] = arguments.ram
        set_job_resources(**job_resource_args)

        from foundations.global_state import current_foundations_context
        try:
            cur_job_id = current_foundations_context().pipeline_context(
            ).file_name
        except ValueError:
            cur_job_id = None

        deployment = deploy(
            arguments.project_name or job_config.get('project_name'),
            arguments.entrypoint or job_config.get('entrypoint'),
            arguments.params or job_config.get('params'))

        if arguments.stream_job_logs:
            try:
                stream_job_logs(deployment)
            except KeyboardInterrupt:
                pass

        if cur_job_id is not None:
            current_foundations_context().pipeline_context(
            ).file_name = cur_job_id

        return deployment