def _handle_artifacts(self, model, nemo_file_folder): tarfile_artifacts = [] app_state = AppState() for conf_path, artiitem in model.artifacts.items(): if artiitem.path_type == model_utils.ArtifactPathType.LOCAL_PATH: if not os.path.exists(artiitem.path): raise FileNotFoundError(f"Artifact {conf_path} not found at location: {artiitem.path}") # Generate new uniq artifact name and copy it to nemo_file_folder # Note uuid.uuid4().hex is guaranteed to be 32 character long artifact_base_name = os.path.basename(artiitem.path) artifact_uniq_name = f"{uuid.uuid4().hex}_{artifact_base_name}" shutil.copy2(artiitem.path, os.path.join(nemo_file_folder, artifact_uniq_name)) # Update artifacts registry artiitem.hashed_path = "nemo:" + artifact_uniq_name model.artifacts[conf_path] = artiitem elif artiitem.path_type == model_utils.ArtifactPathType.TAR_PATH: # process all tarfile artifacts in one go, so preserve key-value pair tarfile_artifacts.append((conf_path, artiitem)) else: raise ValueError(f"Directly referencing artifacts from other nemo files isn't supported yet") # Process current tarfile artifacts by unpacking the previous tarfile and extract the artifacts # that are currently required. model_metadata = app_state.get_model_metadata_from_guid(model.model_guid) if len(tarfile_artifacts) > 0 and model_metadata.restoration_path is not None: # Need to step into nemo archive to extract file # Get path where the command is executed - the artifacts will be "retrieved" there # (original .nemo behavior) cwd = os.getcwd() try: # Step into the nemo archive to try and find the file with tempfile.TemporaryDirectory() as archive_dir: self._unpack_nemo_file(path2file=model_metadata.restoration_path, out_folder=archive_dir) os.chdir(archive_dir) for conf_path, artiitem in tarfile_artifacts: # Get basename and copy it to nemo_file_folder if 'nemo:' in artiitem.path: artifact_base_name = artiitem.path.split('nemo:')[1] else: artifact_base_name = os.path.basename(artiitem.path) # no need to hash here as we are in tarfile_artifacts which are already hashed artifact_uniq_name = artifact_base_name shutil.copy2(artifact_base_name, os.path.join(nemo_file_folder, artifact_uniq_name)) # Update artifacts registry new_artiitem = model_utils.ArtifactItem() new_artiitem.path = "nemo:" + artifact_uniq_name new_artiitem.path_type = model_utils.ArtifactPathType.TAR_PATH model.artifacts[conf_path] = new_artiitem finally: # change back working directory os.chdir(cwd)
def test_mock_save_to_restore_chained(self): with tempfile.NamedTemporaryFile( 'w') as empty_file, tempfile.NamedTemporaryFile( 'w') as empty_file2: # Write some data empty_file.writelines(["*****\n"]) empty_file.flush() # Update config + create ,pde;s cfg = _mock_model_config() cfg.model.temp_file = empty_file.name # Create models model = MockModel(cfg=cfg.model, trainer=None) model = model.to('cpu') assert model.temp_file == empty_file.name def save_copy(model, save_folder, restore_folder): # Where model will be saved model_save_path = os.path.join( save_folder, f"{model.__class__.__name__}.nemo") model.save_to(save_path=model_save_path) # Where model will be restored from model_restore_path = os.path.join( restore_folder, f"{model.__class__.__name__}.nemo") shutil.copy(model_save_path, model_restore_path) return model_restore_path # Save test with tempfile.TemporaryDirectory() as level4: with tempfile.TemporaryDirectory() as level3: with tempfile.TemporaryDirectory() as level2: with tempfile.TemporaryDirectory() as level1: path = save_copy(model, level1, level2) model_copy2 = model.__class__.restore_from(path) path = save_copy(model_copy2, level2, level3) model_copy3 = model.__class__.restore_from(path) path = save_copy(model_copy3, level3, level4) model_copy = model.__class__.restore_from(path) # Restore test assert model_copy.temp_data == ["*****\n"] # AppState test appstate = AppState() metadata = appstate.get_model_metadata_from_guid(model_copy.model_guid) assert metadata.guid != model.model_guid assert metadata.restoration_path == path
def test_restore_from_save_restore_connector_extracted_dir(self): class MySaveRestoreConnector( save_restore_connector.SaveRestoreConnector): def save_to(self, model, save_path: str): save_path = save_path.replace(".nemo", "_XYZ.nemo") super().save_to(model, save_path) class MockModelV2(MockModel): pass with tempfile.TemporaryDirectory() as extracted_tempdir: with tempfile.TemporaryDirectory() as tmpdir: # Update config cfg = _mock_model_config() # Create model save_path = os.path.join(tmpdir, 'save_custom.nemo') model_with_custom_connector = MockModel(cfg=cfg.model, trainer=None) model_with_custom_connector._save_restore_connector = MySaveRestoreConnector( ) model_with_custom_connector.save_to(save_path) nemo_filepath = os.path.join(tmpdir, 'save_custom_XYZ.nemo') assert os.path.exists(nemo_filepath) # extract the contents to this dir apriori # simulate by extracting now before calling restore_from connector = MySaveRestoreConnector() MySaveRestoreConnector._unpack_nemo_file( nemo_filepath, extracted_tempdir) assert get_size(extracted_tempdir) > 0 # delete the old directory and preserve only the new extracted directory (escape scope of old dir) # next, set the model's extracted directory path connector.model_extracted_dir = extracted_tempdir # note, we pass in the "old" nemo_filepath, stored somewhere other than the extracted directory # this nemo_filepath is no longer valid, and has been deleted. restored_model = MockModelV2.restore_from( nemo_filepath, save_restore_connector=connector) assert type(restored_model) == MockModelV2 assert type( restored_model._save_restore_connector) == MySaveRestoreConnector # assert models have correct restoration information and paths appstate = AppState() original_metadata = appstate.get_model_metadata_from_guid( model_with_custom_connector.model_guid) assert original_metadata.restoration_path is None restored_metadata = appstate.get_model_metadata_from_guid( restored_model.model_guid) assert restored_metadata.restoration_path is not None # assert that the restore path was the path of the pre-extracted directory # irrespective of whether an old `nemo_filepath` (which doesnt exist anymore) was passed to restore_from. assert extracted_tempdir in restored_metadata.restoration_path assert extracted_tempdir not in nemo_filepath assert not os.path.exists(nemo_filepath) # test for parameter equality model_with_custom_connector = model_with_custom_connector.to('cpu') restored_model = restored_model.to('cpu') original_state_dict = model_with_custom_connector.state_dict() restored_state_dict = restored_model.state_dict() for orig, restored in zip(original_state_dict.keys(), restored_state_dict.keys()): assert (original_state_dict[orig] - restored_state_dict[restored]).abs().mean() < 1e-6