def _calculate_project_files_hash(self): """Return the file hash of the file collections filepaths for project files directory Returns ------- str unique hash of the project files directory """ # Populate paths from the project files directory paths = [] if os.path.isdir(self.file_driver.files_directory): paths.extend([ os.path.join(self.file_driver.files_directory, filepath) for filepath in list_all_filepaths( self.file_driver.files_directory) ]) # Create a temp dir to use for calculating the hash _temp_dir = get_datmo_temp_path(self.home) # Hash the paths of the files dirhash = self.file_driver.calculate_hash_paths(paths, _temp_dir) # Remove temporary directory shutil.rmtree(_temp_dir) return dirhash
def _get_tracked_files(self): """Return list of tracked files relative to the root directory This will look through all of the files and will exclude any datmo directories (.datmo) and any paths included in .datmoignore TODO: add general list of directories to ignore here (should be passed in by higher level code) Returns ------- list list of filepaths relative to the the root of the repo """ all_files = set(list_all_filepaths(self.root)) # Ignore the .datmo/ folder and all contents within it spec = pathspec.PathSpec.from_lines('gitwildmatch', [self._datmo_directory_name]) dot_datmo_files = set(spec.match_tree(self.root)) # Ignore the .git/ folder and all contents within it spec = pathspec.PathSpec.from_lines('gitwildmatch', [".git"]) dot_git_files = set(spec.match_tree(self.root)) # Load ignored files from .datmoignore file if exi datmoignore_files = {".datmoignore"} if os.path.isfile(os.path.join(self.root, ".datmoignore")): with open(self._datmo_ignore_filepath, "r") as f: spec = pathspec.PathSpec.from_lines('gitignore', f) datmoignore_files.update(set(spec.match_tree(self.root))) return list(all_files - dot_datmo_files - dot_git_files - datmoignore_files)
def _has_unstaged_changes(self): """Return whether there are unstaged changes""" file_hash = self._calculate_project_files_hash() files = list_all_filepaths(self.file_driver.files_directory) # if already exists in the db or is an empty directory if self.exists(file_hash=file_hash) or not files: return False return True
def create(self, paths): """Create a FileCollection Parameters ---------- paths : list list of absolute or relative filepaths and/or dirpaths to collect with destination names (e.g. "/path/to/file>hello", "/path/to/file2", "/path/to/dir>newdir") Returns ------- FileCollection an object representing the collection of files Raises ------ RequiredArgumentMissing if any arguments needed for FileCollection are not provided """ # TODO: Validate Inputs create_dict = { "model_id": self.model.id, } # Populate a path list from the user inputs compatible with the file driver # a. add in user given paths as is if they exist (already within paths) # b. if there are NO paths found from input AND project files directory if not paths and os.path.isdir(self.file_driver.files_directory): paths.extend([ os.path.join(self.file_driver.files_directory, filepath) for filepath in list_all_filepaths( self.file_driver.files_directory) ]) # Parse paths to create collection and add in filehash create_dict['filehash'], create_dict['file_path_map'], create_dict['directory_path_map'] =\ self.file_driver.create_collection(paths) # If file collection with filehash exists, return it results = self.dal.file_collection.query({ "filehash": create_dict['filehash'] }) if results: return results[0] # Add in path of the collection created above create_dict['path'] = self.file_driver.get_relative_collection_path( create_dict['filehash']) # Add in driver_type of the relative collection path create_dict['driver_type'] = self.file_driver.type # Create file collection and return return self.dal.file_collection.create(FileCollection(create_dict))
def _has_unstaged_changes(self): """Return whether there are unstaged changes""" env_hash = self._calculate_project_environment_hash() env_hash_no_hardware = self._calculate_project_environment_hash( save_hardware_file=False) environment_files = list_all_filepaths( self.file_driver.environment_directory) if self.exists(environment_unique_hash=env_hash) or self.exists( environment_unique_hash=env_hash_no_hardware ) or not environment_files: return False return True
def test_calculate_commit_hash(self): self.__setup() # Test if the hash matches the test file tracked_filepaths = self.file_code_manager._get_tracked_files() result = self.file_code_manager._calculate_commit_hash( tracked_filepaths) temp_dir_path = os.path.join( self.file_code_manager._code_filepath, os.listdir(self.file_code_manager._code_filepath)[0]) # Assert temp directory was created and populated correctly for item in list_all_filepaths(temp_dir_path): assert item in tracked_filepaths # Assert the correct commit hash was returned assert result == "69a329523ce1ec88bf63061863d9cb14"
def test_list_all_filepaths(self): # Create a file and a directory to test on filepath = os.path.join(self.temp_dir, "test.txt") dirpath = os.path.join(self.temp_dir, "test_dir") dirfilepath = os.path.join(dirpath, "test.txt") with open(filepath, "wb") as f: f.write(to_bytes("test" + "\n")) os.makedirs(dirpath) with open(dirfilepath, "wb") as f: f.write(to_bytes("test" + "\n")) # List all paths result = list_all_filepaths(self.temp_dir) assert len(result) == 2 assert "test.txt" in result assert os.path.join("test_dir", "test.txt") in result
def _calculate_project_environment_hash(self, save_hardware_file=True): """Return the environment hash from contents in project environment directory. If environment_directory not present then will assume it is empty Parameters ---------- save_hardware_file : bool include the hardware info file within the hash Returns ------- str unique hash of the project environment directory """ # Populate paths from the project environment directory paths = [] if os.path.isdir(self.environment_driver.environment_directory_path): paths.extend([ os.path.join( self.environment_driver.environment_directory_path, filepath) for filepath in list_all_filepaths( self.environment_driver.environment_directory_path) ]) # Create a temp dir to save any additional files necessary _temp_dir = get_datmo_temp_path(self.home) # Setup compatible environment and create add paths paths = self._setup_compatible_environment( { "definition_filename": self.environment_driver.get_default_definition_filename() }, paths, _temp_dir, save_hardware_file=save_hardware_file) # Create new temp directory _temp_dir_2 = get_datmo_temp_path(self.home) # Hash the paths of the environment with a different temp dir dirhash = self.file_driver.calculate_hash_paths(paths, _temp_dir_2) # Remove both temporary directories shutil.rmtree(_temp_dir) shutil.rmtree(_temp_dir_2) return dirhash
def create(self, dictionary, save_hardware_file=True): """Create an environment Parameters ---------- dictionary : dict optional values to populate required environment entity args paths : list, optional list of absolute or relative filepaths and/or dirpaths to collect with destination names (e.g. "/path/to/file>hello", "/path/to/file2", "/path/to/dir>newdir") (default if none provided is to pull from project environment folder and project root. If none found create default definition) name : str, optional name of the environment (default is None) description : str, optional description of the environment (default is None) save_hardware_file : bool boolean to save hardware file along with other files (default is True to save the file and create distinct hashes based on software and hardware) Returns ------- Environment returns an object representing the environment created Raises ------ EnvironmentDoesNotExist if there is no environment found after given parameters and defaults are checked PathDoesNotExist if any source paths provided do not exist """ # Validate Inputs create_dict = {"model_id": self.model.id} create_dict["driver_type"] = self.environment_driver.type validate("create_environment", dictionary) # Create temp environment folder _temp_env_dir = get_datmo_temp_path(self.home) # Step 1: Populate a path list from the user inputs in a format compatible # with the input of the File Collection create function paths = [] # a. add in user given paths as is if they exist if "paths" in dictionary and dictionary['paths']: paths.extend(dictionary['paths']) # b. if there exists projet environment directory AND no paths exist, add in absolute paths if not paths and os.path.isdir(self.file_driver.environment_directory): paths.extend([ os.path.join(self.file_driver.environment_directory, filepath) for filepath in list_all_filepaths( self.file_driver.environment_directory) ]) # c. add in default environment definition filepath as specified by the environment driver # if path exists and NO OTHER PATHS exist src_environment_filename = self.environment_driver.get_default_definition_filename( ) src_environment_filepath = os.path.join(self.home, src_environment_filename) _, environment_filename = os.path.split(src_environment_filepath) create_dict['definition_filename'] = environment_filename if not paths and os.path.exists(src_environment_filepath): paths.append(src_environment_filepath) # Step 2: Check existing paths and create files as needed to populate the # full environment within the temporary directory paths = self._setup_compatible_environment( create_dict, paths, _temp_env_dir, save_hardware_file=save_hardware_file) # Step 3: Pass in all paths for the environment to the file collection create # If PathDoesNotExist is found for any source paths, then error if not paths: raise EnvironmentDoesNotExist() try: file_collection_obj = self.file_collection.create(paths) except PathDoesNotExist as e: raise PathDoesNotExist( __("error", "controller.environment.create.filepath.dne", str(e))) # Step 4: Add file collection information to create dict and check unique hash create_dict['file_collection_id'] = file_collection_obj.id create_dict['unique_hash'] = file_collection_obj.filehash # Check if unique hash is unique or not. # If not, DO NOT CREATE Environment and return existing Environment object results = self.dal.environment.query( {"unique_hash": file_collection_obj.filehash}) if results: return results[0] # Step 5: Delete the temporary directory shutil.rmtree(_temp_env_dir) # Step 6: Add optional arguments to the Environment entity for optional_arg in ["name", "description"]: if optional_arg in dictionary: create_dict[optional_arg] = dictionary[optional_arg] # Step 7: Create environment and return return self.dal.environment.create(Environment(create_dict))