def test_hash_files_or_dirs_with_file(): with tempfile.NamedTemporaryFile() as tmp: tmp.write("hashme".encode()) hash1 = hash_files_or_dirs([f"file:///{tmp.name}"]) # compute hash again with no change to file hash2 = hash_files_or_dirs([f"file:///{tmp.name}"]) assert hash1 == hash2
def test_hash_files_or_dirs_unsorted_input_list(): with tempfile.NamedTemporaryFile() as tmp1: tmp1.write("hashme".encode()) with tempfile.NamedTemporaryFile() as tmp2: tmp2.write("hashme".encode()) hash1 = hash_files_or_dirs([tmp1.name, tmp2.name]) hash2 = hash_files_or_dirs([tmp2.name, tmp1.name]) assert hash1 == hash2
def test_hash_files_or_dirs_with_directory(): with tempfile.TemporaryDirectory() as tmpdirname: temp_dir = Path(tmpdirname) file_name = temp_dir / "test.txt" file_name.write_text("foo bar") hash1 = hash_files_or_dirs([tmpdirname]) # compute hash again with no change to directory hash2 = hash_files_or_dirs([tmpdirname]) assert hash1 == hash2
def test_hash_files_or_dirs_rename_file(): with tempfile.TemporaryDirectory() as tmpdirname: temp_dir = Path(tmpdirname) file_name = temp_dir / "test.txt" file_name.write_text("foo bar") hash1 = hash_files_or_dirs([tmpdirname]) # rename file file_name.rename(temp_dir / "test1.txt") hash2 = hash_files_or_dirs([tmpdirname]) assert hash1 != hash2
def test_hash_files_or_dirs_change_file_content(): with tempfile.TemporaryDirectory() as tmpdirname: temp_dir = Path(tmpdirname) file_name = temp_dir / "test.txt" file_name.write_text("foo bar") hash1 = hash_files_or_dirs([tmpdirname]) # change file content file_name.write_text("new text") hash2 = hash_files_or_dirs([tmpdirname]) assert hash1 != hash2
def test_hash_files_or_dirs_add_new_file(): with tempfile.TemporaryDirectory() as tmpdirname: temp_dir = Path(tmpdirname) file_name = temp_dir / "test.txt" file_name.write_text("foo bar") hash1 = hash_files_or_dirs([tmpdirname]) # add new file file_name2 = temp_dir / "test2.txt" file_name2.write_text("test test") hash2 = hash_files_or_dirs([tmpdirname]) assert hash1 != hash2
def _generate_code_upload_path(self) -> str or None: """Generate an upload path for local training scripts based on their content.""" from sagemaker.workflow.utilities import hash_files_or_dirs if self.estimator.source_dir: source_dir_url = urlparse(self.estimator.source_dir) if source_dir_url.scheme == "" or source_dir_url.scheme == "file": code_hash = hash_files_or_dirs([self.estimator.source_dir] + self.estimator.dependencies) return f"{self.name}-{code_hash}"[:1024] elif self.estimator.entry_point: entry_point_url = urlparse(self.estimator.entry_point) if entry_point_url.scheme == "" or entry_point_url.scheme == "file": code_hash = hash_files_or_dirs([self.estimator.entry_point] + self.estimator.dependencies) return f"{self.name}-{code_hash}"[:1024] return None