コード例 #1
0
    def _handle_artifacts(self, model, nemo_file_folder):
        tarfile_artifacts = []
        app_state = AppState()
        for conf_path, artiitem in model.artifacts.items():
            if artiitem.path_type == model_utils.ArtifactPathType.LOCAL_PATH:
                if not os.path.exists(artiitem.path):
                    raise FileNotFoundError(f"Artifact {conf_path} not found at location: {artiitem.path}")

                # Generate new uniq artifact name and copy it to nemo_file_folder
                # Note uuid.uuid4().hex is guaranteed to be 32 character long
                artifact_base_name = os.path.basename(artiitem.path)
                artifact_uniq_name = f"{uuid.uuid4().hex}_{artifact_base_name}"
                shutil.copy2(artiitem.path, os.path.join(nemo_file_folder, artifact_uniq_name))

                # Update artifacts registry
                artiitem.hashed_path = "nemo:" + artifact_uniq_name
                model.artifacts[conf_path] = artiitem

            elif artiitem.path_type == model_utils.ArtifactPathType.TAR_PATH:
                # process all tarfile artifacts in one go, so preserve key-value pair
                tarfile_artifacts.append((conf_path, artiitem))

            else:
                raise ValueError(f"Directly referencing artifacts from other nemo files isn't supported yet")

        # Process current tarfile artifacts by unpacking the previous tarfile and extract the artifacts
        # that are currently required.
        model_metadata = app_state.get_model_metadata_from_guid(model.model_guid)
        if len(tarfile_artifacts) > 0 and model_metadata.restoration_path is not None:
            # Need to step into nemo archive to extract file
            # Get path where the command is executed - the artifacts will be "retrieved" there
            # (original .nemo behavior)
            cwd = os.getcwd()
            try:
                # Step into the nemo archive to try and find the file
                with tempfile.TemporaryDirectory() as archive_dir:
                    self._unpack_nemo_file(path2file=model_metadata.restoration_path, out_folder=archive_dir)
                    os.chdir(archive_dir)
                    for conf_path, artiitem in tarfile_artifacts:
                        # Get basename and copy it to nemo_file_folder
                        if 'nemo:' in artiitem.path:
                            artifact_base_name = artiitem.path.split('nemo:')[1]
                        else:
                            artifact_base_name = os.path.basename(artiitem.path)
                        # no need to hash here as we are in tarfile_artifacts which are already hashed
                        artifact_uniq_name = artifact_base_name
                        shutil.copy2(artifact_base_name, os.path.join(nemo_file_folder, artifact_uniq_name))

                        # Update artifacts registry
                        new_artiitem = model_utils.ArtifactItem()
                        new_artiitem.path = "nemo:" + artifact_uniq_name
                        new_artiitem.path_type = model_utils.ArtifactPathType.TAR_PATH
                        model.artifacts[conf_path] = new_artiitem
            finally:
                # change back working directory
                os.chdir(cwd)
コード例 #2
0
    def test_mock_save_to_restore_chained(self):
        with tempfile.NamedTemporaryFile(
                'w') as empty_file, tempfile.NamedTemporaryFile(
                    'w') as empty_file2:
            # Write some data
            empty_file.writelines(["*****\n"])
            empty_file.flush()

            # Update config + create ,pde;s
            cfg = _mock_model_config()
            cfg.model.temp_file = empty_file.name

            # Create models
            model = MockModel(cfg=cfg.model, trainer=None)
            model = model.to('cpu')

            assert model.temp_file == empty_file.name

            def save_copy(model, save_folder, restore_folder):
                # Where model will be saved
                model_save_path = os.path.join(
                    save_folder, f"{model.__class__.__name__}.nemo")
                model.save_to(save_path=model_save_path)
                # Where model will be restored from
                model_restore_path = os.path.join(
                    restore_folder, f"{model.__class__.__name__}.nemo")
                shutil.copy(model_save_path, model_restore_path)
                return model_restore_path

            # Save test
            with tempfile.TemporaryDirectory() as level4:
                with tempfile.TemporaryDirectory() as level3:
                    with tempfile.TemporaryDirectory() as level2:
                        with tempfile.TemporaryDirectory() as level1:
                            path = save_copy(model, level1, level2)
                        model_copy2 = model.__class__.restore_from(path)
                        path = save_copy(model_copy2, level2, level3)
                    model_copy3 = model.__class__.restore_from(path)
                    path = save_copy(model_copy3, level3, level4)
                model_copy = model.__class__.restore_from(path)

        # Restore test
        assert model_copy.temp_data == ["*****\n"]

        # AppState test
        appstate = AppState()
        metadata = appstate.get_model_metadata_from_guid(model_copy.model_guid)
        assert metadata.guid != model.model_guid
        assert metadata.restoration_path == path
コード例 #3
0
ファイル: test_save_restore.py プロジェクト: NVIDIA/NeMo
    def test_restore_from_save_restore_connector_extracted_dir(self):
        class MySaveRestoreConnector(
                save_restore_connector.SaveRestoreConnector):
            def save_to(self, model, save_path: str):
                save_path = save_path.replace(".nemo", "_XYZ.nemo")
                super().save_to(model, save_path)

        class MockModelV2(MockModel):
            pass

        with tempfile.TemporaryDirectory() as extracted_tempdir:
            with tempfile.TemporaryDirectory() as tmpdir:
                # Update config
                cfg = _mock_model_config()

                # Create model
                save_path = os.path.join(tmpdir, 'save_custom.nemo')
                model_with_custom_connector = MockModel(cfg=cfg.model,
                                                        trainer=None)
                model_with_custom_connector._save_restore_connector = MySaveRestoreConnector(
                )
                model_with_custom_connector.save_to(save_path)

                nemo_filepath = os.path.join(tmpdir, 'save_custom_XYZ.nemo')
                assert os.path.exists(nemo_filepath)

                # extract the contents to this dir apriori
                # simulate by extracting now before calling restore_from
                connector = MySaveRestoreConnector()
                MySaveRestoreConnector._unpack_nemo_file(
                    nemo_filepath, extracted_tempdir)
                assert get_size(extracted_tempdir) > 0

            # delete the old directory and preserve only the new extracted directory (escape scope of old dir)

            # next, set the model's extracted directory path
            connector.model_extracted_dir = extracted_tempdir

            # note, we pass in the "old" nemo_filepath, stored somewhere other than the extracted directory
            # this nemo_filepath is no longer valid, and has been deleted.
            restored_model = MockModelV2.restore_from(
                nemo_filepath, save_restore_connector=connector)
        assert type(restored_model) == MockModelV2
        assert type(
            restored_model._save_restore_connector) == MySaveRestoreConnector

        # assert models have correct restoration information and paths
        appstate = AppState()
        original_metadata = appstate.get_model_metadata_from_guid(
            model_with_custom_connector.model_guid)
        assert original_metadata.restoration_path is None

        restored_metadata = appstate.get_model_metadata_from_guid(
            restored_model.model_guid)
        assert restored_metadata.restoration_path is not None

        # assert that the restore path was the path of the pre-extracted directory
        # irrespective of whether an old `nemo_filepath` (which doesnt exist anymore) was passed to restore_from.
        assert extracted_tempdir in restored_metadata.restoration_path
        assert extracted_tempdir not in nemo_filepath
        assert not os.path.exists(nemo_filepath)

        # test for parameter equality
        model_with_custom_connector = model_with_custom_connector.to('cpu')
        restored_model = restored_model.to('cpu')

        original_state_dict = model_with_custom_connector.state_dict()
        restored_state_dict = restored_model.state_dict()
        for orig, restored in zip(original_state_dict.keys(),
                                  restored_state_dict.keys()):
            assert (original_state_dict[orig] -
                    restored_state_dict[restored]).abs().mean() < 1e-6