def _at_exit_callback(): from foundations_contrib.global_state import ( current_foundations_job, message_router, ) from foundations_contrib.archiving.upload_artifacts import upload_artifacts from foundations_events.producers.jobs import CompleteJob from foundations_events.producers.jobs import FailedJob global _exception_happened upload_artifacts(current_foundations_job().job_id) # This try-except block should be refactored at a later date if _exception_happened: FailedJob( message_router, current_foundations_job(), { "type": Exception, "exception": "", "traceback": [] }, ).push_message() else: CompleteJob(message_router, current_foundations_job()).push_message()
def _log_param_in_running_job(key, value): from foundations_contrib.global_state import current_foundations_job, redis_connection project_name = current_foundations_job().project_name job_id = current_foundations_job().job_id _insert_parameter_name_into_projects_params_set(redis_connection, project_name, key) _insert_input_parameter_name_into_projects_input_params_set(redis_connection, project_name, key) _insert_parameter_value_into_job_run_data(redis_connection, job_id, key, value) _insert_input_parameter_name_into_job_input_parameter_data(redis_connection, job_id, key)
def set_up(self): from uuid import uuid4 from foundations_events.producers.jobs import QueueJob from foundations_contrib.global_state import message_router, current_foundations_job foundations.set_project_name('default') self._job_id = str(uuid4()) current_foundations_job().job_id = self._job_id queue_job = QueueJob(message_router, current_foundations_job()) queue_job.push_message()
def _log_metric_in_running_job(key, value): from foundations_contrib.global_state import message_router, current_foundations_job from foundations_events.producers.metric_logged import MetricLogged project_name = current_foundations_job().project_name job_id = current_foundations_job().job_id metric_logged_producer = MetricLogged(message_router, project_name, job_id, key, value) metric_logged_producer.push_message()
def set_job_resources(num_gpus=0, ram=None): """ Specifies the resources to run a job with. The available amount will greatly depend on what is available on the infrastrcture that the Foundations job orchestrator is setup on. Arguments: num_gpus {int} -- The number of GPUs to run the job with. Set to 0 to run with CPU resources instead. By default uses 1 GPU. ram {number} -- The amount of ram in GB to use while running the job. Must be greater than 0 or None. If None, no limit will be set. Returns: - This function doesn't return a value. Raises: ValueError -- If either the RAM or GPU quantity is an invalid value (ex: less than 0) or not specified. Notes: Setting the resources for a job from a given notebook or driver file will cause any additional jobs (ex: hyperparameter search) deployed from the same file and using the same process to use the same resources, unless specified otherwise. To clear specifying resources and use the default, you can pass in set_job_resources(1, None). Set num_gpus=0 to use CPU instead. """ if ram is not None and ram <= 0: raise ValueError( 'Invalid RAM quantity. Please provide a RAM quantity greater than zero.' ) if not isinstance(num_gpus, int) or num_gpus < 0: raise ValueError( 'Invalid GPU quantity. Please provide a non-negative integer GPU quantity.' ) job_resources = JobResources(num_gpus, ram) current_foundations_job().job_resources = job_resources
def deploy(project_name, entrypoint, params): import os import os.path as path import json from foundations_contrib.job_deployer import deploy_job from foundations_contrib.global_state import ( current_foundations_job, redis_connection, config_manager, ) if project_name is None: project_name = path.basename(os.getcwd()) job = current_foundations_job() job.project_name = project_name config_manager["run_script_environment"] = { "script_to_run": entrypoint, "enable_stages": False, } job.user_name = (_get_user_name_from_token()) if params is not None: with open("foundations_job_parameters.json", "w+") as params_file: json.dump(params, params_file) return deploy_job(job, None, {})
def test_ram_set_less_than_or_equal_to_zero_does_not_actually_set_job_resources( self): with self.assertRaises(ValueError) as error_context: set_job_resources(self.num_gpus, self.invalid_ram) job_resources = current_foundations_job().job_resources self.assertEqual(self.default_job_resources, job_resources)
def _config(): from uuid import uuid4 from os import getcwd from foundations_contrib.global_state import ( config_manager, current_foundations_job, ) from foundations_contrib.global_state import config_manager from foundations_contrib.local_file_system_pipeline_archive import LocalFileSystemPipelineArchive from foundations_contrib.local_file_system_pipeline_listing import LocalFileSystemPipelineListing # ensure a job uuid is set current_foundations_job().job_id = "integration-test-job" # separates test runs test_uuid = uuid4() # below is used to create archives for all different types archive_root = getcwd() + "/tmp/archives_{}".format(test_uuid) archive_implementation = { "archive_type": LocalFileSystemPipelineArchive, "constructor_arguments": [archive_root], } config_manager["archive_listing_implementation"] = { "archive_listing_type": LocalFileSystemPipelineListing, "constructor_arguments": [archive_root], } config_manager["persisted_data_archive_implementation"] = archive_implementation config_manager["provenance_archive_implementation"] = archive_implementation config_manager["job_source_archive_implementation"] = archive_implementation config_manager["artifact_archive_implementation"] = archive_implementation config_manager["miscellaneous_archive_implementation"] = archive_implementation
def log_warning_if_not_running_in_job(function_if_running_in_job, *args): from foundations_contrib.global_state import log_manager, current_foundations_job if current_foundations_job().is_in_running_job(): function_if_running_in_job(*args) elif not log_manager.foundations_not_running_warning_printed(): logger = log_manager.get_logger(__name__) logger.warning('Script not run with Foundations.') log_manager.set_foundations_not_running_warning_printed()
def _set_tags(klass, job_name, tags): from foundations_contrib.global_state import current_foundations_job from foundations import set_tag foundations_job = current_foundations_job() foundations_job.job_id = job_name if tags is not None: for key, value in tags.items(): set_tag(key, value) foundations_job.job_id = None
def save_artifact(filepath, key=None): from foundations_contrib.global_state import log_manager, current_foundations_job logger = log_manager.get_logger(__name__) foundations_job = current_foundations_job() if not foundations_job.is_in_running_job(): logger.warning('Cannot save artifact outside of job.') else: job_id = foundations_job.job_id artifact_saver = _ArtifactSaver(logger, filepath, job_id, key) artifact_saver.save_artifact()
def create_syncable_directory(key, directory_path=None, source_job_id=None): from foundations.artifacts.syncable_directory import SyncableDirectory from foundations_contrib.global_state import current_foundations_job from tempfile import mkdtemp if directory_path is None: directory_path = mkdtemp() try: job_id = current_foundations_job().job_id except ValueError: job_id = None return SyncableDirectory(key, directory_path, job_id, source_job_id or job_id)
def set_up_job_environment(): from foundations_events.producers.jobs import QueueJob from foundations_events.producers.jobs import RunJob from foundations_contrib.global_state import ( current_foundations_job, message_router, config_manager, ) import atexit config_manager["_is_deployment"] = True _get_logger().debug( f"Foundations has been run with the following configuration:\n" f"{yaml.dump(config_manager.config(), default_flow_style=False)}") foundations_job = current_foundations_job() _set_job_state(foundations_job) QueueJob(message_router, foundations_job).push_message() RunJob(message_router, foundations_job).push_message() atexit.register(_at_exit_callback) _set_up_exception_handling()
def _configure(): from foundations_contrib.global_state import current_foundations_job current_foundations_job().job_id = 'integration-test-job'
def _set_job_id(self, job_id): from foundations_contrib.global_state import current_foundations_job current_foundations_job().job_id = job_id
def test_set_job_resources_ram_defaults_to_none(self): set_job_resources(num_gpus=self.num_gpus) job_resources = current_foundations_job().job_resources self.assertEqual(JobResources(self.num_gpus, None), job_resources)
def test_gpu_set_to_negative_value_not_actually_set_job_resources(self): with self.assertRaises(ValueError) as error_context: set_job_resources(self.negative_gpus, self.ram) job_resources = current_foundations_job().job_resources self.assertEqual(self.default_job_resources, job_resources)
def test_ram_set_to_none_is_valid_configuration(self): set_job_resources(self.num_gpus, None) expected_job_resources = JobResources(num_gpus=self.num_gpus, ram=None) job_resources = current_foundations_job().job_resources self.assertEqual(expected_job_resources, job_resources)
def test_set_job_resources_sets_job_resources_object_in_current_foundations_job( self): set_job_resources(self.num_gpus, self.ram) job_resources = current_foundations_job().job_resources self.assertEqual(self.job_resources, job_resources)
def tear_down(self): current_foundations_job().reset_job_resources()
def test_set_job_resources_num_gpus_defaults_to_zero(self): set_job_resources(ram=self.ram) job_resources = current_foundations_job().job_resources self.assertEqual(JobResources(0, self.ram), job_resources)
import foundations import json from foundations_contrib.global_state import current_foundations_job params = foundations.load_parameters() print(current_foundations_job().job_id) print(json.dumps(params))
def _foundations_job(self): from foundations_contrib.global_state import current_foundations_job return current_foundations_job()
import os import foundations from foundations_contrib.global_state import current_foundations_job, message_router from foundations_events.producers.jobs import RunJob foundations.set_project_name('default') job_id = os.environ['ACCEPTANCE_TEST_JOB_ID'] current_foundations_job().job_id = job_id RunJob(message_router, current_foundations_job()).push_message() foundations.log_metric('key', 'value') print('Hello World!')