def _retrieve_logs(self): from foundations_contrib.global_state import config_manager from foundations_core_cli.job_submission.config import load from foundations_internal.change_directory import ChangeDirectory import os arguments = self._cli.arguments() env_name = arguments.scheduler_config job_id = arguments.job_id current_directory = os.getcwd() with ChangeDirectory(current_directory): load(arguments.scheduler_config or "scheduler") job_deployment_class = config_manager["deployment_implementation"][ "deployment_type" ] job_deployment = job_deployment_class(job_id, None, None) job_status = job_deployment.get_job_status() if job_status is None: self._cli._fail_with_message( "Error: Job `{}` does not exist for environment `{}`".format( job_id, env_name ) ) elif job_status == "queued": self._cli._fail_with_message( "Error: Job `{}` is queued and has not produced any logs".format(job_id) ) else: logs = job_deployment.get_job_logs() print(logs)
def unbundle(self, path_to_save): import tarfile from distutils.dir_util import mkpath from foundations_internal.change_directory import ChangeDirectory with tarfile.open(self.job_archive(), "r:gz") as tar: mkpath(path_to_save) with ChangeDirectory(path_to_save): tar.extractall()
def test_reset_clears_config_paths(self): from foundations_internal.change_directory import ChangeDirectory config_manager = ConfigManager() with ChangeDirectory('test/fixtures/single_config'): config_manager.config() config_manager.reset() self.assertEqual([], config_manager.config_paths())
def test_load_multiple_config_from_yaml(self): from foundations_internal.change_directory import ChangeDirectory with ChangeDirectory('test/fixtures/multiple_configs'): config = ConfigManager().config() self._assert_is_subset( { 'title': 'test config', 'value': 'different value' }, config)
def test_load_config_from_yaml(self): from foundations_internal.change_directory import ChangeDirectory with ChangeDirectory('test/fixtures/single_config'): config = ConfigManager().config() self._assert_is_subset( { 'title': 'test config', 'value': 'this exists as a test' }, config)
def _test_command_that_loads_parameters_in_directory( self, command, script_directory, expected_loaded_parameters): from foundations_internal.change_directory import ChangeDirectory import subprocess import json import os import os.path as path with self.unset_foundations_home(): env = self._update_environment_with_home_directory() if os.getenv( 'RUNNING_ON_CI', False) else {} env = {**os.environ, **env} with ChangeDirectory(script_directory): completed_process = subprocess.run(command, stdout=subprocess.PIPE, env=env) process_output = completed_process.stdout.decode().strip( ).split('\n') print(process_output) if os.getenv('RUNNING_ON_CI', False): import re from foundations_local_docker_scheduler_plugin.job_deployment import JobDeployment from foundations_contrib.global_state import config_manager job_id_regex = re.search('Job \'(.+?)\' has completed.', process_output[-1]) self.assertIsNotNone(job_id_regex) job_id = job_id_regex.group(1) # Creating a fake job deployment as a quick interface to grab its logs config_manager.config( )['scheduler_url'] = f"http://{os.environ['LOCAL_DOCKER_SCHEDULER_HOST']}:5000" job = JobDeployment(job_id, None, None) process_output = job.get_job_logs().split('\n') params_json = process_output[-2] job_id = process_output[-3] project_name = self.project_name result_parameters = json.loads(params_json) self.assertEqual(expected_loaded_parameters, result_parameters) self._assert_flattened_parameter_keys_in_project_job_parameter_names_set( project_name, expected_loaded_parameters) self._assert_flattened_parameter_values_for_job_in_job_parameters( job_id, expected_loaded_parameters) self._assert_flattened_parameter_keys_in_project_input_parameter_names_set( project_name, expected_loaded_parameters) if expected_loaded_parameters: self._assert_flattened_parameter_names_for_job_in_job_input_parameters( job_id, expected_loaded_parameters)
def _stop(self): from foundations_contrib.global_state import config_manager from foundations_core_cli.job_submission.config import load from foundations_internal.change_directory import ChangeDirectory import os arguments = self._cli.arguments() env_name = arguments.scheduler_config job_id = arguments.job_id current_directory = os.getcwd() with ChangeDirectory(current_directory): load(arguments.scheduler_config or "scheduler") job_deployment_class = config_manager["deployment_implementation"][ "deployment_type" ] job_deployment = job_deployment_class(job_id, None, None) try: job_status = job_deployment.get_job_status() if job_status is None: self._cli._fail_with_message( "Error: Job `{}` does not exist for environment `{}`".format( job_id, env_name ) ) elif job_status == "queued": self._cli._fail_with_message( "Error: Job `{}` is queued and cannot be stopped".format(job_id) ) elif job_status == "completed": self._cli._fail_with_message( "Error: Job `{}` is completed and cannot be stopped".format(job_id) ) else: if job_deployment.stop_running_job(): print("Stopped running job {}".format(job_id)) else: print("Error stopping job {}".format(job_id)) except AttributeError: print("The specified scheduler does not support this functionality")
def _delete_job(self): from foundations_contrib.global_state import config_manager from foundations_core_cli.job_submission.config import load from foundations_internal.change_directory import ChangeDirectory import os arguments = self._cli.arguments() env_name = arguments.scheduler_config job_id = arguments.job_id current_directory = os.getcwd() with ChangeDirectory(current_directory): load(arguments.scheduler_config or "scheduler") job_deployment_class = config_manager["deployment_implementation"][ "deployment_type" ] job_deployment = job_deployment_class(job_id, None, None) job_status = job_deployment.get_job_status() if job_status is None: self._cli._fail_with_message( "Error: Job `{}` does not exist for environment `{}`".format( job_id, env_name ) ) elif job_status in ("queued", "running", "pending"): self._cli._fail_with_message( "Error: Job `{}` has status `{}` and cannot be deleted".format( job_id, job_status ) ) else: if job_deployment.cancel_jobs([job_id])[job_id]: print(f"Job {job_id} successfully deleted") else: print( f"Could not completely delete job {job_id}. Please make sure that the job bundle exists under ~/.foundations/job_data/" )
def _clear_queue(self): from foundations_contrib.global_state import config_manager from foundations_core_cli.job_submission.config import load from foundations_internal.change_directory import ChangeDirectory import os arguments = self._cli.arguments() current_directory = os.getcwd() with ChangeDirectory(current_directory): load(arguments.scheduler_config or "scheduler") job_deployment_class = config_manager["deployment_implementation"][ "deployment_type" ] try: num_jobs_dequeued = job_deployment_class.clear_queue() print("Removed {} job(s) from queue".format(num_jobs_dequeued)) except AttributeError: print("The specified scheduler does not support this functionality")
def _retrieve_artifacts(self): from foundations_contrib.global_state import config_manager from foundations_core_cli.job_submission.config import load from foundations_internal.change_directory import ChangeDirectory import os arguments = self._cli.arguments() env_name = arguments.scheduler_config job_id = arguments.job_id current_directory = os.getcwd() if arguments.save_dir is None: arguments.save_dir = os.path.join(current_directory, str(job_id)) with ChangeDirectory(current_directory): load(arguments.scheduler_config or "scheduler") job_deployment_class = config_manager["deployment_implementation"][ "deployment_type" ] job_deployment = job_deployment_class(job_id, None, None) job_status = job_deployment.get_job_status() if job_status is None: self._cli._fail_with_message( "Error: Job `{}` does not exist for environment `{}`".format( job_id, env_name ) ) else: if job_deployment.get_job_archive(): print(f"Successfully retrieved Job {job_id} from archive store") else: print(f"Error: Could not download Job {job_id}")
def _test_command_that_loads_parameters_in_directory_for_python( self, command, script_directory, expected_loaded_parameters, check_for_warning): from foundations_internal.change_directory import ChangeDirectory import subprocess import json import os.path as path env = self._update_environment_with_home_directory() with ChangeDirectory(script_directory): env = None if check_for_warning else env completed_process = subprocess.run(command, stdout=subprocess.PIPE, env=env) process_output = completed_process.stdout.decode() warnings, _, params_json = process_output.strip().rpartition('\n') if check_for_warning: self.assertIn('Script not run with Foundations.', warnings) result_parameters = json.loads(params_json) self.assertEqual(expected_loaded_parameters, result_parameters)
def submit(arguments): from foundations_core_cli.job_submission.config import load from foundations_core_cli.job_submission.deployment import deploy from foundations_core_cli.job_submission.logs import stream_job_logs from foundations_internal.change_directory import ChangeDirectory from foundations_contrib.global_state import config_manager, log_manager from foundations_contrib.set_job_resources import set_job_resources from jsonschema import validate import os import os.path current_directory = os.getcwd() with ChangeDirectory(arguments.job_directory or current_directory): load(arguments.scheduler_config or 'scheduler') job_config = {} if os.path.exists('job.config.yaml'): with open('job.config.yaml') as file: job_config = yaml.load(file.read(), Loader=yaml.FullLoader) # validate(instance=job_config, schema=_job_schema) job_resource_args = {} if 'log_level' in job_config: config_manager['log_level'] = job_config['log_level'] if 'worker' in job_config: config_manager['worker_container_overrides'].update( job_config['worker']) if 'num_gpus' in job_config: job_resource_args['num_gpus'] = job_config['num_gpus'] if 'ram' in job_config: job_resource_args['ram'] = job_config['ram'] logger = log_manager.get_logger(__name__) if arguments.command: config_manager['worker_container_overrides'][ 'args'] = arguments.command if not os.path.exists(arguments.command[0]): logger.warning( f"Hey, seems like your command '{arguments.command[0]}' is not an existing file in your current directory. If you are using Atlas's advanced custom docker image functionality and know what you are doing, you can ignore this message." ) else: logger.warning('No command was specified.') if arguments.num_gpus is not None: job_resource_args['num_gpus'] = arguments.num_gpus if arguments.ram is not None: job_resource_args['ram'] = arguments.ram set_job_resources(**job_resource_args) from foundations.global_state import current_foundations_context try: cur_job_id = current_foundations_context().pipeline_context( ).file_name except ValueError: cur_job_id = None deployment = deploy( arguments.project_name or job_config.get('project_name'), arguments.entrypoint or job_config.get('entrypoint'), arguments.params or job_config.get('params')) if arguments.stream_job_logs: try: stream_job_logs(deployment) except KeyboardInterrupt: pass if cur_job_id is not None: current_foundations_context().pipeline_context( ).file_name = cur_job_id return deployment