def __init__(self): super(EnvironmentController, self).__init__() self.file_collection = FileCollectionController() self.spinner = Spinner() if not self.is_initialized: raise ProjectNotInitialized( __("error", "controller.environment.__init__"))
def __init__(self): super(SnapshotController, self).__init__() self.code = CodeController() self.file_collection = FileCollectionController() self.environment = EnvironmentController() if not self.is_initialized: raise ProjectNotInitialized( __("error", "controller.snapshot.__init__"))
def setup_method(self): # provide mountable tmp directory for docker tempfile.tempdir = "/tmp" if not platform.system( ) == "Windows" else None test_datmo_dir = os.environ.get('TEST_DATMO_DIR', tempfile.gettempdir()) self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir) self.project = ProjectController(self.temp_dir) self.file_collection = FileCollectionController(self.temp_dir)
def status(self): """Return the project status information if initialized Returns ------- status_dict : dict dictionary with project metadata and config current_snapshot : datmo.core.entity.snapshot.Snapshot snapshot object of the current state of the repo if present else None latest_snapshot_user_generated : datmo.core.entity.snapshot.Snapshot snapshot object of the latest snapshot generated by the user if present else None latest_snapshot_auto_generated : datmo.core.entity.snapshot.Snapshot snapshot object of the latest snapshot generated automatically by datmo if present else None unstaged_code : bool True if code has unstaged changes unstaged_environment : bool True if environment has unstaged changes unstaged_files : bool True if files have unstaged changes """ if not self.is_initialized: raise ProjectNotInitialized( __("error", "controller.project.status")) # TODO: Add in note when environment is not setup or intialized # Add in project metadata status_dict = self.model.to_dictionary().copy() # Find all project settings status_dict["config"] = self.config_store.to_dict() # Find the latest snapshot generated by the user descending_snapshots = self.dal.snapshot.query( { "visible": True }, sort_key="created_at", sort_order="descending") latest_snapshot_user_generated = descending_snapshots[ 0] if descending_snapshots else None # Show the latest snapshot generated automatically by datmo descending_snapshots = self.dal.snapshot.query( { "visible": False }, sort_key="created_at", sort_order="descending") latest_snapshot_auto_generated = descending_snapshots[ 0] if descending_snapshots else None # TODO: add in latest run self.code_controller = CodeController() try: unstaged_code = self.code_controller.check_unstaged_changes() except UnstagedChanges: unstaged_code = True self.environment_controller = EnvironmentController() try: unstaged_environment = self.environment_controller.check_unstaged_changes( ) except UnstagedChanges: unstaged_environment = True self.file_collection_controller = FileCollectionController() try: unstaged_files = self.file_collection_controller.check_unstaged_changes( ) except UnstagedChanges: unstaged_files = True # If exists, obtain the current snapshot, if unstaged changes, will be None self.snapshot_controller = SnapshotController() try: current_snapshot = self.snapshot_controller.current_snapshot() except UnstagedChanges: current_snapshot = None return status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, \ unstaged_code, unstaged_environment, unstaged_files
class ProjectController(BaseController): """ProjectController inherits from BaseController and manages business logic related to the project. One model is associated with each project currently. Methods ------- init(name, description) Initialize the project repository as a new model or update the existing project cleanup() Remove all datmo references from the current repository. NOTE: THIS WILL DELETE ALL DATMO WORK status() Give the user a picture of the status of the project, snapshots, and tasks """ def __init__(self): super(ProjectController, self).__init__() def init(self, name, description): """ Initialize the project This function will initialize the project or reinitialize it the project is already initialized. Parameters ---------- name : str description : str Returns ------- bool """ is_new_model = False old_model = self.model if not self.model: is_new_model = True try: # Always validate inputs to the init function validate("create_project", { "name": name, "description": description }) # Initialize File Driver if needed if not self.file_driver.is_initialized: self.file_driver.init() # Initialize the dal if not self.dal.is_initialized: self.dal.init() # Initialize Code Driver if needed if not self.code_driver.is_initialized: self.code_driver.init() # Initialize Environment Driver if needed if not self.environment_driver.is_initialized: self.environment_driver.init() # Initialize the config JSON store self.config_store = JSONStore( os.path.join(self.home, Config().datmo_directory_name, ".config")) # Create model if new else update if is_new_model: _ = self.dal.model.create( Model({ "name": name, "description": description })) else: self._model = self.dal.model.update({ "id": self.model.id, "name": name, "description": description }) # Connect Environment Driver if needed # (not required but will warn if not present) try: if not self.environment_driver.is_connected: self.environment_driver.connect() except EnvironmentConnectFailed: self.logger.warning( __("warn", "controller.general.environment.failed")) # Build the initial default Environment (NOT NECESSARY) # self.environment_driver.build_image(tag="datmo-" + \ # self.model.name) return True except Exception: # if any error occurred with new model, ensure no initialize occurs and raise previous error # if any error occurred with existing model, ensure no updates were made, raise previous error if is_new_model: self.cleanup() else: self._model = self.dal.model.update({ "id": old_model.id, "name": old_model.name, "description": old_model.description }) raise def cleanup(self): """Cleans the project structure completely Notes ----- This function will not error out but will gracefully exit, since it is used in cases where init fails as a check against mid-initialized projects Returns ------- bool """ if not self.is_initialized: self.logger.warning( __("warn", "controller.project.cleanup.not_init")) # Remove Datmo environment_driver references, give warning if error try: # Obtain image id before cleaning up if exists images = self.environment_driver.list_images(name="datmo-" + \ self.model.name) image_id = images[0].id if images else None except Exception: self.logger.warning( __("warn", "controller.project.cleanup.environment")) # Remove Datmo code_driver references, give warning if error try: if self.code_driver.is_initialized: for ref in self.code_driver.list_refs(): self.code_driver.delete_ref(ref) except Exception: self.logger.warning(__("warn", "controller.project.cleanup.code")) try: # Remove Hidden Datmo file structure, give warning if error self.file_driver.delete_hidden_datmo_file_structure() except (FileIOError, PathDoesNotExist): self.logger.warning(__("warn", "controller.project.cleanup.files")) try: if image_id: # Remove image created during init self.environment_driver.remove_image( image_id_or_name=image_id, force=True) # Remove any dangling images (optional) # Stop and remove all running environments with image_id self.environment_driver.stop_remove_containers_by_term( image_id, force=True) except Exception: self.logger.warning( __("warn", "controller.project.cleanup.environment")) return True def status(self): """Return the project status information if initialized Returns ------- status_dict : dict dictionary with project metadata and config current_snapshot : datmo.core.entity.snapshot.Snapshot snapshot object of the current state of the repo if present else None latest_snapshot_user_generated : datmo.core.entity.snapshot.Snapshot snapshot object of the latest snapshot generated by the user if present else None latest_snapshot_auto_generated : datmo.core.entity.snapshot.Snapshot snapshot object of the latest snapshot generated automatically by datmo if present else None unstaged_code : bool True if code has unstaged changes unstaged_environment : bool True if environment has unstaged changes unstaged_files : bool True if files have unstaged changes """ if not self.is_initialized: raise ProjectNotInitialized( __("error", "controller.project.status")) # TODO: Add in note when environment is not setup or intialized # Add in project metadata status_dict = self.model.to_dictionary().copy() # Find all project settings status_dict["config"] = self.config_store.to_dict() # Find the latest snapshot generated by the user descending_snapshots = self.dal.snapshot.query( { "visible": True }, sort_key="created_at", sort_order="descending") latest_snapshot_user_generated = descending_snapshots[ 0] if descending_snapshots else None # Show the latest snapshot generated automatically by datmo descending_snapshots = self.dal.snapshot.query( { "visible": False }, sort_key="created_at", sort_order="descending") latest_snapshot_auto_generated = descending_snapshots[ 0] if descending_snapshots else None # TODO: add in latest run self.code_controller = CodeController() try: unstaged_code = self.code_controller.check_unstaged_changes() except UnstagedChanges: unstaged_code = True self.environment_controller = EnvironmentController() try: unstaged_environment = self.environment_controller.check_unstaged_changes( ) except UnstagedChanges: unstaged_environment = True self.file_collection_controller = FileCollectionController() try: unstaged_files = self.file_collection_controller.check_unstaged_changes( ) except UnstagedChanges: unstaged_files = True # If exists, obtain the current snapshot, if unstaged changes, will be None self.snapshot_controller = SnapshotController() try: current_snapshot = self.snapshot_controller.current_snapshot() except UnstagedChanges: current_snapshot = None return status_dict, current_snapshot, latest_snapshot_user_generated, latest_snapshot_auto_generated, \ unstaged_code, unstaged_environment, unstaged_files
class TestFileCollectionController(): def setup_method(self): # provide mountable tmp directory for docker tempfile.tempdir = "/tmp" if not platform.system( ) == "Windows" else None test_datmo_dir = os.environ.get('TEST_DATMO_DIR', tempfile.gettempdir()) self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir) Config().set_home(self.temp_dir) self.project_controller = ProjectController() self.file_collection_controller = FileCollectionController() def teardown_method(self): pass def __setup(self): # Create the files in the project files directory dirpath1 = os.path.join( self.file_collection_controller.file_driver.files_directory, "dirpath1") os.makedirs(dirpath1) filepath1 = os.path.join( self.file_collection_controller.file_driver.files_directory, "filepath1") with open(filepath1, "wb") as _: pass return filepath1, dirpath1 def test_current_file_collection(self): self.project_controller.init("test3", "test description") _ = self.__setup() # Test failure because of unstaged changes failed = False try: self.file_collection_controller.current_file_collection() except UnstagedChanges: failed = True assert failed # Test success with created files file_collection_obj = self.file_collection_controller.create([]) current_file_collection_obj = self.file_collection_controller.current_file_collection( ) assert current_file_collection_obj == file_collection_obj def test_create(self): self.project_controller.init("test3", "test description") # Test failure creation of collection if no path given failed = False try: self.file_collection_controller.create() except TypeError: failed = True assert failed # Test create success with paths paths = self.__setup() file_collection_obj = self.file_collection_controller.create(paths) assert file_collection_obj assert file_collection_obj.id assert file_collection_obj.path assert file_collection_obj.driver_type assert file_collection_obj.filehash == "74be16979710d4c4e7c6647856088456" # Test create success without paths (should be the same as previous) file_collection_obj_1 = self.file_collection_controller.create([]) assert file_collection_obj_1 == file_collection_obj assert file_collection_obj_1.id == file_collection_obj.id assert file_collection_obj_1.path == file_collection_obj.path assert file_collection_obj_1.driver_type == file_collection_obj.driver_type assert file_collection_obj_1.filehash == file_collection_obj.filehash # Test create success with paths again (should be same as previous) file_collection_obj_2 = self.file_collection_controller.create(paths) assert file_collection_obj_2 == file_collection_obj_1 assert file_collection_obj_2.id == file_collection_obj_1.id assert file_collection_obj_2.path == file_collection_obj_1.path assert file_collection_obj_2.driver_type == file_collection_obj_1.driver_type assert file_collection_obj_2.filehash == file_collection_obj_1.filehash # Test file collection with empty paths (should be same as previous) file_collection_obj_3 = self.file_collection_controller.create([]) assert file_collection_obj_3 == file_collection_obj_2 assert file_collection_obj_3.id == file_collection_obj_2.id assert file_collection_obj_3.path == file_collection_obj_2.path assert file_collection_obj_3.driver_type == file_collection_obj_2.driver_type assert file_collection_obj_3.filehash == file_collection_obj_2.filehash def test_list(self): self.project_controller.init("test4", "test description") paths_1 = self.__setup() filepath2 = os.path.join(self.file_collection_controller.home, "filepath2") with open(filepath2, "wb") as f: f.write(to_bytes("test" + "\n")) paths_2 = [filepath2] file_collection_obj_1 = self.file_collection_controller.create(paths_1) file_collection_obj_2 = self.file_collection_controller.create(paths_2) # List all code and ensure they exist result = self.file_collection_controller.list() assert len(result) == 2 and \ file_collection_obj_1 in result and \ file_collection_obj_2 in result def test_delete(self): self.project_controller.init("test5", "test description") paths = self.__setup() file_collection_obj = self.file_collection_controller.create(paths) # Delete code in the project result = self.file_collection_controller.delete(file_collection_obj.id) # Check if code retrieval throws error thrown = False try: self.file_collection_controller.dal.file_collection.get_by_id( file_collection_obj.id) except EntityNotFound: thrown = True assert result == True and \ thrown == True def test_exists_file(self): self.project_controller.init("test6", "test description") paths = self.__setup() file_collection_obj = self.file_collection_controller.create(paths) # check for file_collection_id result = self.file_collection_controller.exists( file_collection_id=file_collection_obj.id) assert result # check for file_hash in file_collection result = self.file_collection_controller.exists( file_hash=file_collection_obj.filehash) assert result # check for not proper file_collection_id result = self.file_collection_controller.exists( file_collection_id="test_file_collection_id") assert not result def test_calculate_project_files_hash(self): self.project_controller.init("test7", "test description") filepath1, dirpath1 = self.__setup() # Test if hash is for 1 blank filepath and empty directory result = self.file_collection_controller._calculate_project_files_hash( ) assert result == "74be16979710d4c4e7c6647856088456" def test_has_unstaged_changes(self): self.project_controller.init("test8", "test description") # Create the files in the project files directory paths = self.__setup() # Test when there are unstaged changes result = self.file_collection_controller._has_unstaged_changes() assert result # Save the file collection self.file_collection_controller.create(paths) # Test when there are no unstaged changes result = self.file_collection_controller._has_unstaged_changes() assert not result # Change the file contents with open(paths[0], "wb") as f: f.write(to_bytes("hello")) # Test when there are unstaged changes again result = self.file_collection_controller._has_unstaged_changes() assert result def test_check_unstaged_changes(self): self.project_controller.init("test9", "test description") # Create the files in the project files directory paths = self.__setup() # Test when there are unstaged changes failed = False try: _ = self.file_collection_controller.check_unstaged_changes() except UnstagedChanges: failed = True assert failed # Save the file collection self.file_collection_controller.create(paths) # Test when there are no unstaged changes result = self.file_collection_controller.check_unstaged_changes() assert not result # Change the file contents with open(paths[0], "wb") as f: f.write(to_bytes("hello")) # Test when there are unstaged changes again failed = False try: _ = self.file_collection_controller.check_unstaged_changes() except UnstagedChanges: failed = True assert failed # Test when there are no files (should be staged) os.remove(paths[0]) shutil.rmtree(paths[1]) result = self.file_collection_controller.check_unstaged_changes() assert not result def test_checkout(self): self.project_controller.init("test9", "test description") # Create the files in the project files directory paths = self.__setup() # Create a file collection to checkout to with paths file_collection_obj = self.file_collection_controller.create(paths) # Checkout success when there are no unstaged changes result = self.file_collection_controller.checkout( file_collection_obj.id) assert result current_hash = self.file_collection_controller._calculate_project_files_hash( ) assert current_hash == "74be16979710d4c4e7c6647856088456" assert file_collection_obj.filehash == current_hash # Check the filenames as well because the hash does not take this into account assert os.path.isfile(paths[0]) # Change file contents to make it unstaged with open(paths[0], "wb") as f: f.write(to_bytes("hello")) # Checkout failure when there are unstaged changes failed = False try: _ = self.file_collection_controller.checkout( file_collection_obj.id) except UnstagedChanges: failed = True assert failed # Create a new file collection with paths file_collection_obj_1 = self.file_collection_controller.create(paths) # Checkout success back when there are no unstaged changes result = self.file_collection_controller.checkout( file_collection_obj.id) assert result current_hash = self.file_collection_controller._calculate_project_files_hash( ) assert current_hash == "74be16979710d4c4e7c6647856088456" assert file_collection_obj.filehash == current_hash assert file_collection_obj_1.filehash != current_hash # Check the filenames as well because the hash does not take this into account assert os.path.isfile(paths[0])
class EnvironmentController(BaseController): """EnvironmentController inherits from BaseController and manages business logic related to the environment. Parameters ---------- home : str home path of the project Methods ------- create(dictionary) Create an environment within the project build(id) Build the environment for use within the project list() List all environments within the project delete(id) Delete the specified environment from the project """ def __init__(self): super(EnvironmentController, self).__init__() self.file_collection = FileCollectionController() self.spinner = Spinner() if not self.is_initialized: raise ProjectNotInitialized( __("error", "controller.environment.__init__")) def get_supported_environments(self): """Get all the supported environments Returns ------- list List of all available environments """ return self.environment_driver.get_supported_environments() def setup(self, options, save_hardware_file=True): """Create a pre-defined supported environment and add it to the project environment directory The user can build on top of the pre-defined environment and create new ones of their own Parameters ---------- options : dict can include the following values: name : str the name to be used to specify a supported environment save_hardware_file : bool, optional boolean to save hardware file along with other files (default is True to save the file and create distinct hashes based on software and hardware) Returns ------- Environment returns an object representing the environment created Raises ------ UnstagedChanges if unstaged changes exist in the environment it should fail """ # Check unstaged changes before trying to setup try: self.check_unstaged_changes() except UnstagedChanges: raise UnstagedChanges( __("error", "controller.environment.setup.unstaged", self.file_driver.environment_directory)) try: _ = self.environment_driver.setup( options, definition_path=self.file_driver.environment_directory) except Exception: raise create_dict = { "name": options['name'], "description": "supported base environment created by datmo" } return self.create(create_dict, save_hardware_file=save_hardware_file) def create(self, dictionary, save_hardware_file=True): """Create an environment Parameters ---------- dictionary : dict optional values to populate required environment entity args paths : list, optional list of absolute or relative filepaths and/or dirpaths to collect with destination names (e.g. "/path/to/file>hello", "/path/to/file2", "/path/to/dir>newdir") (default if none provided is to pull from project environment folder and project root. If none found create default definition) name : str, optional name of the environment (default is None) description : str, optional description of the environment (default is None) save_hardware_file : bool boolean to save hardware file along with other files (default is True to save the file and create distinct hashes based on software and hardware) Returns ------- Environment returns an object representing the environment created Raises ------ EnvironmentDoesNotExist if there is no environment found after given parameters and defaults are checked PathDoesNotExist if any source paths provided do not exist """ # Validate Inputs create_dict = {"model_id": self.model.id} create_dict["driver_type"] = self.environment_driver.type validate("create_environment", dictionary) # Create temp environment folder _temp_env_dir = get_datmo_temp_path(self.home) # Step 1: Populate a path list from the user inputs in a format compatible # with the input of the File Collection create function paths = [] # a. add in user given paths as is if they exist if "paths" in dictionary and dictionary['paths']: paths.extend(dictionary['paths']) # b. if there exists projet environment directory AND no paths exist, add in absolute paths if not paths and os.path.isdir(self.file_driver.environment_directory): paths.extend([ os.path.join(self.file_driver.environment_directory, filepath) for filepath in list_all_filepaths( self.file_driver.environment_directory) ]) # c. add in default environment definition filepath as specified by the environment driver # if path exists and NO OTHER PATHS exist src_environment_filename = self.environment_driver.get_default_definition_filename( ) src_environment_filepath = os.path.join(self.home, src_environment_filename) _, environment_filename = os.path.split(src_environment_filepath) create_dict['definition_filename'] = environment_filename if not paths and os.path.exists(src_environment_filepath): paths.append(src_environment_filepath) # Step 2: Check existing paths and create files as needed to populate the # full environment within the temporary directory paths = self._setup_compatible_environment( create_dict, paths, _temp_env_dir, save_hardware_file=save_hardware_file) # Step 3: Pass in all paths for the environment to the file collection create # If PathDoesNotExist is found for any source paths, then error if not paths: raise EnvironmentDoesNotExist() try: file_collection_obj = self.file_collection.create(paths) except PathDoesNotExist as e: raise PathDoesNotExist( __("error", "controller.environment.create.filepath.dne", str(e))) # Step 4: Add file collection information to create dict and check unique hash create_dict['file_collection_id'] = file_collection_obj.id create_dict['unique_hash'] = file_collection_obj.filehash # Check if unique hash is unique or not. # If not, DO NOT CREATE Environment and return existing Environment object results = self.dal.environment.query( {"unique_hash": file_collection_obj.filehash}) if results: return results[0] # Step 5: Delete the temporary directory shutil.rmtree(_temp_env_dir) # Step 6: Add optional arguments to the Environment entity for optional_arg in ["name", "description"]: if optional_arg in dictionary: create_dict[optional_arg] = dictionary[optional_arg] # Step 7: Create environment and return return self.dal.environment.create(Environment(create_dict)) def build(self, environment_id): """Build environment from definition file Parameters ---------- environment_id : str environment object id to build Returns ------- bool returns True if success Raises ------ EnvironmentDoesNotExist if the specified Environment does not exist. """ self.environment_driver.init() if not self.exists(environment_id): raise EnvironmentDoesNotExist( __("error", "controller.environment.build", environment_id)) environment_obj = self.dal.environment.get_by_id(environment_id) file_collection_obj = self.dal.file_collection.\ get_by_id(environment_obj.file_collection_id) # TODO: Check hardware info here if different from creation time # Build the Environment with the driver datmo_definition_filepath = os.path.join( self.home, file_collection_obj.path, "datmo" + environment_obj.definition_filename) try: self.spinner.start() result = self.environment_driver.build( environment_id, path=datmo_definition_filepath) finally: self.spinner.stop() return result def run(self, environment_id, options, log_filepath): """Run and log an instance of the environment with the options given Parameters ---------- environment_id : str options : dict can include the following values: command : list, optional ports : list, optional Here are some example ports used for common applications. * 'jupyter notebook' - 8888 * flask API - 5000 * tensorboard - 6006 An example input for the above would be ["8888:8888", "5000:5000", "6006:6006"] which maps the running host port (right) to that of the environment (left) name : str, optional volumes : dict, optional mem_limit : str, optional gpu : bool, default False detach : bool, optional stdin_open : bool, optional tty : bool, optional log_filepath : str filepath to the log file Returns ------- return_code : int system return code for container and logs run_id : str identification for run of the environment logs : str string version of output logs for the container """ self.environment_driver.init() # TODO: Check hardware info here if different from creation time final_return_code, run_id, logs = \ self.environment_driver.run(environment_id, options, log_filepath) return final_return_code, run_id, logs def list(self): # TODO: Add time filters return self.dal.environment.query({}) def update(self, environment_id, name=None, description=None): """Update the environment metadata""" if not self.exists(environment_id): raise EnvironmentDoesNotExist() update_environment_input_dict = {"id": environment_id} if name: update_environment_input_dict['name'] = name if description: update_environment_input_dict['description'] = description return self.dal.environment.update(update_environment_input_dict) def delete(self, environment_id): """Delete all traces of an environment Parameters ---------- environment_id : str environment object id to remove Returns ------- bool True if success Raises ------ EnvironmentDoesNotExist if the specified Environment does not exist. """ self.environment_driver.init() if not self.exists(environment_id): raise EnvironmentDoesNotExist( __("error", "controller.environment.delete", environment_id)) # Remove file collection environment_obj = self.dal.environment.get_by_id(environment_id) file_collection_deleted = self.file_collection.delete( environment_obj.file_collection_id) # Remove artifacts associated with the environment_driver environment_artifacts_removed = self.environment_driver.remove( environment_id, force=True) # Delete environment_driver object delete_success = self.dal.environment.delete(environment_obj.id) return file_collection_deleted and environment_artifacts_removed and \ delete_success def stop(self, run_id=None, match_string=None, all=False): """Stop the trace of running environment Parameters ---------- run_id : str, optional stop environment with specific run id (default is None, which means it is not used) match_string : str, optional stop environment with a string to match the environment name (default is None, which means it is not used) all : bool, optional stop all environments Notes ----- The user must provide only one of the above, if multiple are given or none are given the function will error Returns ------- bool True if success Raises ------ RequiredArgumentMissing TooManyArguments """ self.environment_driver.init() if not (run_id or match_string or all): raise RequiredArgumentMissing() if sum(map(bool, [run_id, match_string, all])) > 1: raise TooManyArgumentsFound() stop_success = False if run_id: # Stop the instance(e.g. container) running using environment driver(e.g. docker) stop_success = self.environment_driver.stop(run_id, force=True) if match_string: # Stop all tasks matching the string given stop_success = self.environment_driver.stop_remove_containers_by_term( term=match_string, force=True) if all: # Stop all tasks associated within the enclosed project all_match_string = "datmo-task-" + self.model.id stop_success = self.environment_driver.stop_remove_containers_by_term( term=all_match_string, force=True) return stop_success def exists(self, environment_id=None, environment_unique_hash=None): """Returns a boolean if the environment exists Parameters ---------- environment_id : str environment id to check for environment_unique_hash : str unique hash for the environment to check for Returns ------- bool True if exists else False """ if environment_id: environment_objs = self.dal.environment.query( {"id": environment_id}) elif environment_unique_hash: environment_objs = self.dal.environment.query( {"unique_hash": environment_unique_hash}) else: raise ArgumentError() env_exists = False if environment_objs: env_exists = True return env_exists def check_unstaged_changes(self): """Checks if there exists any unstaged changes for the environment in project environment directory. Returns ------- bool False if it's already staged else error Raises ------ EnvironmentNotInitialized error if not initialized (must initialize first) UnstagedChanges error if not there exists unstaged changes in environment """ if not self.is_initialized: raise EnvironmentNotInitialized() # Check if unstaged changes exist if self._has_unstaged_changes(): raise UnstagedChanges() return False def checkout(self, environment_id): """Checkout to specific environment id Parameters ---------- environment_id : str environment id to checkout to Returns ------- bool True if success Raises ------ EnvironmentNotInitialized error if not initialized (must initialize first) PathDoesNotExist if environment id does not exist UnstagedChanges error if not there exists unstaged changes in environment """ if not self.is_initialized: raise EnvironmentNotInitialized() if not self.exists(environment_id): raise EnvironmentDoesNotExist( __("error", "controller.environment.checkout_env", environment_id)) # Check if unstaged changes exist if self._has_unstaged_changes(): raise UnstagedChanges() # Check if environment has is same as current results = self.dal.environment.query({"id": environment_id}) environment_obj = results[0] environment_hash = environment_obj.unique_hash if self._calculate_project_environment_hash() == environment_hash: return True # Remove all content from project environment directory for file in os.listdir(self.file_driver.environment_directory): file_path = os.path.join(self.file_driver.environment_directory, file) try: if os.path.isfile(file_path): os.remove(file_path) elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(e) # Add in files for that environment id file_collection_obj = self.dal.file_collection.\ get_by_id(environment_obj.file_collection_id) environment_definition_path = os.path.join(self.home, file_collection_obj.path) # Copy to temp folder and remove files that are datmo specific _temp_env_dir = get_datmo_temp_path(self.home) self.file_driver.copytree(environment_definition_path, _temp_env_dir) for filename in self.environment_driver.get_datmo_definition_filenames( ): os.remove(os.path.join(_temp_env_dir, filename)) # Copy from temp folder to project environment directory self.file_driver.copytree(_temp_env_dir, self.file_driver.environment_directory) shutil.rmtree(_temp_env_dir) return True def _setup_compatible_environment(self, create_dict, paths, directory, save_hardware_file=True): """Setup compatible environment from user paths. Creates the necessary datmo files if they are not already present Parameters ---------- create_dict : dict dictionary for entity creation, this is mutated in the function (not returned) paths : list list of absolute or relative filepaths and/or dirpaths to collect with destination names (e.g. "/path/to/file>hello", "/path/to/file2", "/path/to/dir>newdir") directory : str path of directory to save additional files to save_hardware_file : bool boolean to save hardware file along with other files (default is True to save the file and create distinct hashes based on software and hardware) Returns ------- paths : list returns the input paths with the paths of the new files created appended """ # a. look for the default definition, if not present add it to the directory, and add it to paths original_definition_filepath = "" if all(create_dict['definition_filename'] not in path for path in paths): self.environment_driver.create_default_definition(directory) original_definition_filepath = os.path.join( directory, create_dict['definition_filename']) paths.append(original_definition_filepath) else: for idx, path in enumerate(paths): if create_dict['definition_filename'] in path: src_path, dest_path = parse_path(path) original_definition_filepath = src_path # b. use the default definition and create a datmo definition in the directory, and add to paths datmo_definition_filepath = \ os.path.join(directory, "datmo" + create_dict['definition_filename']) if not os.path.isfile(datmo_definition_filepath): _, original_definition_filepath, datmo_definition_filepath = \ self.environment_driver.create(path=original_definition_filepath, output_path=datmo_definition_filepath) paths.append(datmo_definition_filepath) # c. get the hardware info and save it to the entity, if save_hardware_file is True # then save it to file and add it to the paths create_dict[ 'hardware_info'] = self.environment_driver.get_hardware_info() if save_hardware_file: hardware_info_filepath = os.path.join(directory, "hardware_info") _ = JSONStore(hardware_info_filepath, initial_dict=create_dict['hardware_info']) paths.append(hardware_info_filepath) return paths def _calculate_project_environment_hash(self, save_hardware_file=True): """Return the environment hash from contents in project environment directory Parameters ---------- save_hardware_file : bool include the hardware info file within the hash Returns ------- str unique hash of the project environment directory """ # Populate paths from the project environment directory paths = [] if os.path.isdir(self.file_driver.environment_directory): paths.extend([ os.path.join(self.file_driver.environment_directory, filepath) for filepath in list_all_filepaths( self.file_driver.environment_directory) ]) # Create a temp dir to save any additional files necessary _temp_dir = get_datmo_temp_path(self.home) # Setup compatible environment and create add paths paths = self._setup_compatible_environment( { "definition_filename": self.environment_driver.get_default_definition_filename() }, paths, _temp_dir, save_hardware_file=save_hardware_file) # Create new temp directory _temp_dir_2 = get_datmo_temp_path(self.home) # Hash the paths of the environment with a different temp dir dirhash = self.file_driver.calculate_hash_paths(paths, _temp_dir_2) # Remove both temporary directories shutil.rmtree(_temp_dir) shutil.rmtree(_temp_dir_2) return dirhash def _has_unstaged_changes(self): """Return whether there are unstaged changes""" env_hash = self._calculate_project_environment_hash() env_hash_no_hardware = self._calculate_project_environment_hash( save_hardware_file=False) environment_files = list_all_filepaths( self.file_driver.environment_directory) if self.exists(environment_unique_hash=env_hash) or self.exists( environment_unique_hash=env_hash_no_hardware ) or not environment_files: return False return True
def __init__(self, home): super(EnvironmentController, self).__init__(home) self.file_collection = FileCollectionController(home)
class EnvironmentController(BaseController): """EnvironmentController inherits from BaseController and manages business logic related to the environment. Parameters ---------- home : str home path of the project Methods ------- create(dictionary) Create an environment within the project build(id) Build the environment for use within the project list() List all environments within the project delete(id) Delete the specified environment from the project """ def __init__(self, home): super(EnvironmentController, self).__init__(home) self.file_collection = FileCollectionController(home) def create(self, dictionary): """Create an environment Parameters ---------- dictionary : dict optional values to populate required environment entity args definition_filepath : str, optional absolute filepath to the environment definition file (default is to use driver default filepath) hardware_info : dict, optional information about the environment hardware (default is to extract hardware from platform currently running) language : str, optional programming language used (default is None, which allows Driver to determine default) optional values to populate optional environment entity args description : str, optional description of the environment (default is blank) Returns ------- Environment returns an object representing the environment created Raises ------ RequiredArgumentMissing if any arguments above are not provided. """ # Validate Inputs create_dict = { "model_id": self.model.id, } create_dict["driver_type"] = self.environment_driver.type create_dict["language"] = dictionary.get("language", None) if "definition_filepath" in dictionary and dictionary[ 'definition_filepath']: original_definition_filepath = dictionary['definition_filepath'] # Split up the given path and save definition filename definition_path, definition_filename = \ os.path.split(original_definition_filepath) create_dict['definition_filename'] = definition_filename # Create datmo environment definition in the same dir as definition filepath datmo_definition_filepath = \ os.path.join(definition_path, "datmo" + definition_filename) _, _, _, requirements_filepath = self.environment_driver.create( path=dictionary['definition_filepath'], output_path=datmo_definition_filepath) else: # If path is not given, then only use the language to create a default environment # Use the default create to find environment definition _, original_definition_filepath, datmo_definition_filepath, requirements_filepath = \ self.environment_driver.create(language=create_dict['language']) # Split up the default path obtained to save the definition name definition_path, definition_filename = \ os.path.split(original_definition_filepath) create_dict['definition_filename'] = definition_filename hardware_info_filepath = self._store_hardware_info( dictionary, create_dict, definition_path) # Add all environment files to collection: # definition path, datmo_definition_path, hardware_info filepaths = [ original_definition_filepath, datmo_definition_filepath, hardware_info_filepath ] if requirements_filepath: filepaths.append(requirements_filepath) file_collection_obj = self.file_collection.create(filepaths) create_dict['file_collection_id'] = file_collection_obj.id # Delete temporary files created once transfered into file collection if requirements_filepath: os.remove(requirements_filepath) os.remove(original_definition_filepath) os.remove(datmo_definition_filepath) os.remove(hardware_info_filepath) create_dict['unique_hash'] = file_collection_obj.filehash # Check if unique hash is unique or not. # If not, DO NOT CREATE Environment and return existing Environment object results = self.dal.environment.query( {"unique_hash": file_collection_obj.filehash}) if results: return results[0] # Optional args for Environment entity for optional_arg in ["description"]: if optional_arg in dictionary: create_dict[optional_arg] = dictionary[optional_arg] # Create environment and return return self.dal.environment.create(Environment(create_dict)) def _store_hardware_info(self, dictionary, create_dict, definition_path): if "hardware_info" in dictionary: create_dict['hardware_info'] = dictionary['hardware_info'] else: # Extract hardware info of the container (currently taking from system platform) # TODO: extract hardware information directly from the container (system, node, release, version, machine, processor) = platform.uname() create_dict['hardware_info'] = { 'system': system, 'node': node, 'release': release, 'version': version, 'machine': machine, 'processor': processor } # Create hardware info file in definition path hardware_info_filepath = os.path.join(definition_path, "hardware_info") _ = JSONStore(hardware_info_filepath, initial_dict=create_dict['hardware_info']) return hardware_info_filepath def build(self, environment_id): """Build environment from definition file Parameters ---------- environment_id : str environment object id to build Returns ------- bool returns True if success Raises ------ PathDoesNotExist if the specified Environment does not exist. """ environment_obj = self.dal.environment.get_by_id(environment_id) if not environment_obj: raise PathDoesNotExist( __("error", "controller.environment.build", environment_id)) file_collection_obj = self.dal.file_collection.\ get_by_id(environment_obj.file_collection_id) # TODO: Check hardware info here if different from creation time # Build the Environment with the driver datmo_definition_filepath = os.path.join( self.home, file_collection_obj.path, "datmo" + environment_obj.definition_filename) result = self.environment_driver.build(environment_id, path=datmo_definition_filepath) return result def run(self, environment_id, options, log_filepath): """Run and log an instance of the environment with the options given Parameters ---------- environment_id : str options : dict can include the following values: command : list, optional ports : list, optional Here are some example ports used for common applications. * 'jupyter notebook' - 8888 * flask API - 5000 * tensorboard - 6006 An example input for the above would be ["8888:8888", "5000:5000", "6006:6006"] which maps the running host port (right) to that of the environment (left) name : str, optional volumes : dict, optional detach : bool, optional stdin_open : bool, optional tty : bool, optional log_filepath : str filepath to the log file Returns ------- return_code : int system return code for container and logs run_id : str identification for run of the environment logs : str string version of output logs for the container """ # TODO: Check hardware info here if different from creation time final_return_code, run_id, logs = \ self.environment_driver.run(environment_id, options, log_filepath) return final_return_code, run_id, logs def list(self): # TODO: Add time filters return self.dal.environment.query({}) def delete(self, environment_id): """Delete all traces of an environment Parameters ---------- environment_id : str environment object id to remove Returns ------- bool True if success Raises ------ PathDoesNotExist if the specified Environment does not exist. """ environment_obj = self.dal.environment.get_by_id(environment_id) if not environment_obj: raise PathDoesNotExist( __("error", "controller.environment.delete", environment_id)) # Remove file collection file_collection_deleted = self.file_collection.delete( environment_obj.file_collection_id) # Remove artifacts associated with the environment_driver environment_artifacts_removed = self.environment_driver.remove( environment_id, force=True) # Delete environment_driver object delete_success = self.dal.environment.delete(environment_obj.id) return file_collection_deleted and environment_artifacts_removed and \ delete_success def stop(self, run_id=None, match_string=None, all=False): """Stop the trace of running environment Parameters ---------- run_id : str, optional stop environment with specific run id (default is None, which means it is not used) match_string : str, optional stop environment with a string to match the environment name (default is None, which means it is not used) all : bool, optional stop all environments Notes ----- The user must provide only one of the above, if multiple are given or none are given the function will error Returns ------- bool True if success Raises ------ RequiredArgumentMissing TooManyArguments """ if not (run_id or match_string or all): raise RequiredArgumentMissing() if sum(map(bool, [run_id, match_string, all])) > 1: raise TooManyArgumentsFound() if run_id: # Stop the instance(e.g. container) running using environment driver(e.g. docker) stop_success = self.environment_driver.stop(run_id, force=True) if match_string: # Stop all tasks matching the string given stop_success = self.environment_driver.stop_remove_containers_by_term( term=match_string, force=True) if all: # Stop all tasks associated within the enclosed project all_match_string = "datmo-task-" + self.model.id stop_success = self.environment_driver.stop_remove_containers_by_term( term=all_match_string, force=True) return stop_success
class SnapshotController(BaseController): """SnapshotController inherits from BaseController and manages business logic related to snapshots Parameters ---------- home : str home path of the project Attributes ---------- code : datmo.core.controller.code.code.CodeController file_collection : datmo.core.controller.file.file_collection.FileCollectionController environment : datmo.core.controller.environment.environment.EnvironmentController Methods ------- create(dictionary) Create a snapshot within the project checkout(id) Checkout to a specific snapshot within the project list(session_id=None) List all snapshots present within the project based on given filters delete(id) Delete the snapshot specified from the project """ def __init__(self): super(SnapshotController, self).__init__() self.code = CodeController() self.file_collection = FileCollectionController() self.environment = EnvironmentController() if not self.is_initialized: raise ProjectNotInitialized( __("error", "controller.snapshot.__init__")) def create(self, dictionary): """Create snapshot object Parameters ---------- dictionary : dict for each of the 5 key components, this function will search for one of the variables below starting from the top. Default functionality is described below for each component as well for reference if none of the variables are given. code : code_id : str, optional code reference associated with the snapshot; if not provided will look to inputs below for code creation commit_id : str, optional commit id provided by the user if already available Default ------- commits will be taken and code created via the CodeController and are added to the snapshot at the time of snapshot creation environment : environment_id : str, optional id for environment used to create snapshot environment_paths : list, optional list of absolute or relative filepaths and/or dirpaths to collect with destination names (e.g. "/path/to/file>hello", "/path/to/file2", "/path/to/dir>newdir") Default ------- default environment files will be searched and environment will be created with the EnvironmentController and added to the snapshot at the time of snapshot creation file_collection : file_collection_id : str, optional file collection associated with the snapshot paths : list, optional list of absolute or relative filepaths and/or dirpaths to collect with destination names (e.g. "/path/to/file:hello", "/path/to/file2", "/path/to/dir:newdir") Default ------- paths will be considered empty ([]), and the FileCollectionController will create a blank FileCollection that is empty. config : config : dict, optional key, value pairs of configurations config_filepath : str, optional absolute filepath to configuration parameters file config_filename : str, optional name of file with configuration parameters Default ------- config will be considered empty ({}) and saved to the snapshot stats : stats : dict, optional key, value pairs of metrics and statistics stats_filepath : str, optional absolute filepath to stats parameters file stats_filename : str, optional name of file with metrics and statistics. Default ------- stats will be considered empty ({}) and saved to the snapshot for the remaining optional arguments it will search for them in the input dictionary message : str long description of snapshot session_id : str, optional session id within which snapshot is created, will overwrite default if given task_id : str, optional task id associated with snapshot label : str, optional short description of snapshot visible : bool, optional True if visible to user via list command else False Returns ------- datmo.core.entity.snapshot.Snapshot snapshot object with all relevant parameters Raises ------ RequiredArgumentMissing if required arguments are not given by the user FileIOError if files are not present or there is an error in File IO """ # Validate Inputs create_dict = { "model_id": self.model.id, "session_id": self.current_session.id, } validate("create_snapshot", dictionary) # Message must be present if "message" in dictionary: create_dict['message'] = dictionary['message'] else: raise RequiredArgumentMissing( __("error", "controller.snapshot.create.arg", "message")) # Code setup self._code_setup(dictionary, create_dict) # Environment setup self._env_setup(dictionary, create_dict) # File setup self._file_setup(dictionary, create_dict) # Config setup self._config_setup(dictionary, create_dict) # Stats setup self._stats_setup(dictionary, create_dict) # If snapshot object with required args already exists, return it # DO NOT create a new snapshot with the same required arguments results = self.dal.snapshot.query({ "model_id": create_dict["model_id"], "code_id": create_dict['code_id'], "environment_id": create_dict['environment_id'], "file_collection_id": create_dict['file_collection_id'], "config": create_dict['config'], "stats": create_dict['stats'] }) if results: return results[0] # Optional args for Snapshot entity optional_args = ["task_id", "label", "visible"] for optional_arg in optional_args: if optional_arg in dictionary: create_dict[optional_arg] = dictionary[optional_arg] # Create snapshot and return return self.dal.snapshot.create(Snapshot(create_dict)) def create_from_task(self, message, task_id, label=None, config=None, stats=None): """Create snapshot from a completed task. # TODO: enable create from task DURING a run Parameters ---------- message : str long description of snapshot task_id : str task object to use to create snapshot label: str, optional short description of snapshot config : dict, optional key, value pairs of configurations stats : dict, optional key, value pairs of metrics and statistics Returns ------- datmo.core.entity.snapshot.Snapshot snapshot object with all relevant parameters Raises ------ TaskNotComplete if task specified has not been completed """ validate( "create_snapshot_from_task", { "message": message, "task_id": task_id, "label": label, "config": config, "stats": stats }) task_obj = self.dal.task.get_by_id(task_id) if not task_obj.status and not task_obj.after_snapshot_id: raise TaskNotComplete( __("error", "controller.snapshot.create_from_task", str(task_obj.id))) after_snapshot_obj = self.dal.snapshot.get_by_id( task_obj.after_snapshot_id) snapshot_update_dict = { "id": task_obj.after_snapshot_id, "message": message, "visible": True } if label: snapshot_update_dict["label"] = label if config: snapshot_update_dict["config"] = config if stats: snapshot_update_dict["stats"] = stats else: # Append to any existing stats already present snapshot_update_dict["stats"] = {} if after_snapshot_obj.stats is not None: snapshot_update_dict["stats"].update(after_snapshot_obj.stats) if task_obj.results is not None: snapshot_update_dict["stats"].update(task_obj.results) if snapshot_update_dict["stats"] == {}: snapshot_update_dict["stats"] = None return self.dal.snapshot.update(snapshot_update_dict) def checkout(self, snapshot_id): # Get snapshot object snapshot_obj = self.dal.snapshot.get_by_id(snapshot_id) code_obj = self.dal.code.get_by_id(snapshot_obj.code_id) file_collection_obj = self.dal.file_collection.\ get_by_id(snapshot_obj.file_collection_id) environment_obj = self.dal.environment. \ get_by_id(snapshot_obj.environment_id) # check for unstaged changes in code self.code_driver.check_unstaged_changes() # check for unstaged changes in environment self.environment.check_unstaged_changes() # check for unstaged changes in file self.file_collection.check_unstaged_changes() # Checkout code_driver to the relevant commit ref code_checkout_success = self.code_driver.checkout_ref( code_obj.commit_id) # Checkout environment_driver to relevant environment id environment_checkout_success = self.environment.checkout( environment_obj.id) # Checkout file_driver to relevant file collection id file_checkout_success = self.file_collection.checkout( file_collection_obj.id) return (code_checkout_success and environment_checkout_success and file_checkout_success) def list(self, session_id=None, visible=None, sort_key=None, sort_order=None): query = {} if session_id: try: self.dal.session.get_by_id(session_id) except EntityNotFound: raise SessionDoesNotExist( __("error", "controller.snapshot.list", session_id)) query['session_id'] = session_id if visible is not None and isinstance(visible, bool): query['visible'] = visible return self.dal.snapshot.query(query, sort_key, sort_order) def update(self, snapshot_id, config=None, stats=None, message=None, label=None, visible=None): """Update the snapshot metadata""" if not snapshot_id: raise RequiredArgumentMissing( __("error", "controller.snapshot.delete.arg", "snapshot_id")) update_snapshot_input_dict = {'id': snapshot_id} validate( "update_snapshot", { "config": config, "stats": stats, "message": message, "label": label, "visible": visible }) if config is not None: update_snapshot_input_dict['config'] = config if stats is not None: update_snapshot_input_dict['stats'] = stats if message is not None: update_snapshot_input_dict['message'] = message if label is not None: update_snapshot_input_dict['label'] = label if visible is not None: update_snapshot_input_dict['visible'] = visible return self.dal.snapshot.update(update_snapshot_input_dict) def get(self, snapshot_id): """Get snapshot object and return Parameters ---------- snapshot_id : str id for the snapshot you would like to get Returns ------- datmo.core.entity.snapshot.Snapshot core snapshot object Raises ------ DoesNotExist snapshot does not exist """ try: return self.dal.snapshot.get_by_id(snapshot_id) except EntityNotFound: raise DoesNotExist() def get_files(self, snapshot_id, mode="r"): """Get list of file objects for snapshot id. Parameters ---------- snapshot_id : str id for the snapshot you would like to get file objects for mode : str file open mode (default is "r" to open file for read) Returns ------- list list of python file objects Raises ------ DoesNotExist snapshot object does not exist """ try: snapshot_obj = self.dal.snapshot.get_by_id(snapshot_id) except EntityNotFound: raise DoesNotExist() file_collection_obj = self.dal.file_collection.get_by_id( snapshot_obj.file_collection_id) return self.file_driver.get_collection_files( file_collection_obj.filehash, mode=mode) def delete(self, snapshot_id): """Delete all traces of a snapshot Parameters ---------- snapshot_id : str id for the snapshot to remove Returns ------- bool True if success Raises ------ RequiredArgumentMissing if the provided snapshot_id is None """ if not snapshot_id: raise RequiredArgumentMissing( __("error", "controller.snapshot.delete.arg", "snapshot_id")) return self.dal.snapshot.delete(snapshot_id) def _code_setup(self, incoming_dictionary, create_dict): """ Set the code_id by using: 1. code_id 2. commit_id string, which creates a new code_id 3. create a new code id Parameters ---------- incoming_dictionary : dict dictionary for the create function defined above create_dict : dict dictionary for creating the Snapshot entity """ if "code_id" in incoming_dictionary: create_dict['code_id'] = incoming_dictionary['code_id'] elif "commit_id" in incoming_dictionary: create_dict['code_id'] = self.code.\ create(commit_id=incoming_dictionary['commit_id']).id else: create_dict['code_id'] = self.code.create().id def _env_setup(self, incoming_dictionary, create_dict): """ TODO: Parameters ---------- incoming_dictionary : dict dictionary for the create function defined above create_dict : dict dictionary for creating the Snapshot entity """ if "environment_id" in incoming_dictionary: create_dict['environment_id'] = incoming_dictionary[ 'environment_id'] elif "environment_paths" in incoming_dictionary: create_dict['environment_id'] = self.environment.create({ "paths": incoming_dictionary['environment_paths'] }).id else: # create some default environment create_dict['environment_id'] = self.environment.\ create({}).id def _file_setup(self, incoming_dictionary, create_dict): """ Checks for user inputs and uses the file collection controller to obtain the file collection id and create the necessary collection Parameters ---------- incoming_dictionary : dict dictionary for the create function defined above create_dict : dict dictionary for creating the Snapshot entity """ if "file_collection_id" in incoming_dictionary: create_dict['file_collection_id'] = incoming_dictionary[ 'file_collection_id'] elif "paths" in incoming_dictionary: # transform file paths to file_collection_id create_dict['file_collection_id'] = self.file_collection.\ create(incoming_dictionary['paths']).id else: # create some default file collection create_dict['file_collection_id'] = self.file_collection.\ create([]).id def _config_setup(self, incoming_dictionary, create_dict): """ Fills in snapshot config by having one of the following: 1. config = JSON object 2. config_filepath = some location where a json file exists 3. config_filename = just the file name Parameters ---------- incoming_dictionary : dict dictionary for the create function defined above create_dict : dict dictionary for creating the Snapshot entity Raises ------ FileIOError """ if "config" in incoming_dictionary: create_dict['config'] = incoming_dictionary['config'] elif "config_filepath" in incoming_dictionary: if not os.path.isfile(incoming_dictionary['config_filepath']): raise FileIOError( __("error", "controller.snapshot.create.file_config")) # If path exists transform file to config dict config_json_driver = JSONStore( incoming_dictionary['config_filepath']) create_dict['config'] = config_json_driver.to_dict() elif "config_filename" in incoming_dictionary: config_filename = incoming_dictionary['config_filename'] create_dict['config'] = self._find_in_filecollection( config_filename, create_dict['file_collection_id']) else: config_filename = "config.json" create_dict['config'] = self._find_in_filecollection( config_filename, create_dict['file_collection_id']) def _stats_setup(self, incoming_dictionary, create_dict): """Fills in snapshot stats by having one of the following: 1. stats = JSON object 2. stats_filepath = some location where a json file exists 3. stats_filename = just the file name Parameters ---------- incoming_dictionary : dict dictionary for the create function defined above create_dict : dict dictionary for creating the Snapshot entity Raises ------ FileIOError """ if "stats" in incoming_dictionary: create_dict['stats'] = incoming_dictionary['stats'] elif "stats_filepath" in incoming_dictionary: if not os.path.isfile(incoming_dictionary['stats_filepath']): raise FileIOError( __("error", "controller.snapshot.create.file_stat")) # If path exists transform file to config dict stats_json_driver = JSONStore( incoming_dictionary['stats_filepath']) create_dict['stats'] = stats_json_driver.to_dict() elif "stats_filename" in incoming_dictionary: stats_filename = incoming_dictionary['stats_filename'] create_dict['stats'] = self._find_in_filecollection( stats_filename, create_dict['file_collection_id']) else: stats_filename = "stats.json" create_dict['stats'] = self._find_in_filecollection( stats_filename, create_dict['file_collection_id']) def _find_in_filecollection(self, file_to_find, file_collection_id): """ Attempts to find a file within the file collection Returns ------- dict output dictionary of the JSON file """ file_collection_obj = self.file_collection.dal.file_collection.\ get_by_id(file_collection_id) file_collection_path = \ self.file_collection.file_driver.get_collection_path( file_collection_obj.filehash) # find all of the possible paths it could exist possible_paths = [os.path.join(self.home, file_to_find)] + \ [os.path.join(self.home, item[0], file_to_find) for item in os.walk(file_collection_path)] existing_possible_paths = [ possible_path for possible_path in possible_paths if os.path.isfile(possible_path) ] if not existing_possible_paths: # TODO: Add some info / warning that no file was found # create some default stats return {} else: # If any such path exists, transform file to stats dict json_file = JSONStore(existing_possible_paths[0]) return json_file.to_dict()
class TestFileCollectionController(): def setup_method(self): # provide mountable tmp directory for docker tempfile.tempdir = "/tmp" if not platform.system( ) == "Windows" else None test_datmo_dir = os.environ.get('TEST_DATMO_DIR', tempfile.gettempdir()) self.temp_dir = tempfile.mkdtemp(dir=test_datmo_dir) self.project = ProjectController(self.temp_dir) self.file_collection = FileCollectionController(self.temp_dir) def teardown_method(self): pass def test_create(self): self.project.init("test3", "test description") # Test failure creation of collection failed = False try: self.file_collection.create() except: failed = True assert failed # Test successful creation of collection self.file_collection.file_driver.create("dirpath1", directory=True) self.file_collection.file_driver.create("filepath1") dirpath1 = os.path.join(self.file_collection.home, "dirpath1") filepath1 = os.path.join(self.file_collection.home, "filepath1") filepaths = [filepath1, dirpath1] file_collection_obj = self.file_collection.create(filepaths) assert file_collection_obj assert file_collection_obj.id assert file_collection_obj.path assert file_collection_obj.driver_type # Test file collection with same filepaths/filehash returns same object file_collection_obj_2 = self.file_collection.create(filepaths) assert file_collection_obj_2 == file_collection_obj def test_list(self): self.project.init("test4", "test description") self.file_collection.file_driver.create("dirpath1", directory=True) self.file_collection.file_driver.create("filepath1") dirpath1 = os.path.join(self.file_collection.home, "dirpath1") filepath1 = os.path.join(self.file_collection.home, "filepath1") filepaths_1 = [filepath1, dirpath1] filepath2 = os.path.join(self.file_collection.home, "filepath2") with open(filepath2, "w") as f: f.write(to_unicode("test" + "\n")) filepaths_2 = [filepath2] file_collection_obj_1 = self.file_collection.create(filepaths_1) file_collection_obj_2 = self.file_collection.create(filepaths_2) # List all code and ensure they exist result = self.file_collection.list() assert len(result) == 2 and \ file_collection_obj_1 in result and \ file_collection_obj_2 in result def test_delete(self): self.project.init("test5", "test description") # Test successful creation of collection self.file_collection.file_driver.create("dirpath1", directory=True) self.file_collection.file_driver.create("filepath1") dirpath1 = os.path.join(self.file_collection.home, "dirpath1") filepath1 = os.path.join(self.file_collection.home, "filepath1") filepaths = [filepath1, dirpath1] file_collection_obj = self.file_collection.create(filepaths) # Delete code in the project result = self.file_collection.delete(file_collection_obj.id) # Check if code retrieval throws error thrown = False try: self.file_collection.dal.file_collection.get_by_id( file_collection_obj.id) except EntityNotFound: thrown = True assert result == True and \ thrown == True