def test_download_azureml_dataset(test_output_dirs: OutputFolderForTests) -> None: dataset_name = "test-dataset" config = DummyModel() config.local_dataset = None config.azure_dataset_id = "" azure_config = get_default_azure_config() runner = MLRunner(config, azure_config=azure_config) # If the model has neither local_dataset or azure_dataset_id, mount_or_download_dataset should fail. # This mounting call must happen before any other operations on the container, because already the model # creation may need access to the dataset. with pytest.raises(ValueError) as ex: runner.setup() assert ex.value.args[0] == "The model must contain either local_dataset or azure_dataset_id." runner.project_root = test_output_dirs.root_dir # Pointing the model to a dataset folder that does not exist should raise an Exception fake_folder = runner.project_root / "foo" runner.container.local_dataset = fake_folder with pytest.raises(FileNotFoundError): runner.mount_or_download_dataset(runner.container.azure_dataset_id, runner.container.local_dataset) # If the local dataset folder exists, mount_or_download_dataset should not do anything. fake_folder.mkdir() local_dataset = runner.mount_or_download_dataset(runner.container.azure_dataset_id, runner.container.local_dataset) assert local_dataset == fake_folder # Pointing the model to a dataset in Azure should trigger a download runner.container.local_dataset = None runner.container.azure_dataset_id = dataset_name with logging_section("Starting download"): result_path = runner.mount_or_download_dataset(runner.container.azure_dataset_id, runner.container.local_dataset) # Download goes into <project_root> / "datasets" / "test_dataset" expected_path = runner.project_root / fixed_paths.DATASETS_DIR_NAME / dataset_name assert result_path == expected_path assert result_path.is_dir() dataset_csv = Path(result_path) / DATASET_CSV_FILE_NAME assert dataset_csv.is_file() # Check that each individual file in the dataset is present for folder in [1, *range(10, 20)]: sub_folder = result_path / str(folder) sub_folder.is_dir() for file in ["ct", "esophagus", "heart", "lung_l", "lung_r", "spinalcord"]: f = (sub_folder / file).with_suffix(".nii.gz") assert f.is_file()
def test_download_azureml_dataset( test_output_dirs: OutputFolderForTests) -> None: dataset_name = "test-dataset" config = ModelConfigBase(should_validate=False) azure_config = get_default_azure_config() runner = MLRunner(config, azure_config) runner.project_root = test_output_dirs.root_dir # If the model has neither local_dataset or azure_dataset_id, mount_or_download_dataset should fail. with pytest.raises(ValueError): runner.mount_or_download_dataset() # Pointing the model to a dataset folder that does not exist should raise an Exception fake_folder = runner.project_root / "foo" runner.model_config.local_dataset = fake_folder with pytest.raises(FileNotFoundError): runner.mount_or_download_dataset() # If the local dataset folder exists, mount_or_download_dataset should not do anything. fake_folder.mkdir() local_dataset = runner.mount_or_download_dataset() assert local_dataset == fake_folder # Pointing the model to a dataset in Azure should trigger a download runner.model_config.local_dataset = None runner.model_config.azure_dataset_id = dataset_name with logging_section("Starting download"): result_path = runner.mount_or_download_dataset() # Download goes into <project_root> / "datasets" / "test_dataset" expected_path = runner.project_root / fixed_paths.DATASETS_DIR_NAME / dataset_name assert result_path == expected_path assert result_path.is_dir() dataset_csv = Path(result_path) / DATASET_CSV_FILE_NAME assert dataset_csv.is_file() # Check that each individual file in the dataset is present for folder in [1, *range(10, 20)]: sub_folder = result_path / str(folder) sub_folder.is_dir() for file in [ "ct", "esophagus", "heart", "lung_l", "lung_r", "spinalcord" ]: f = (sub_folder / file).with_suffix(".nii.gz") assert f.is_file()