def test_run_container_with_plain_lightning_in_situ( test_output_dirs: OutputFolderForTests) -> None: """ Test if we can train a plain Lightning model, without any additional methods defined, end-to-end. """ runner = default_runner() local_dataset = test_output_dirs.root_dir / "dataset" local_dataset.mkdir() args = [ "", "--model=DummyContainerWithPlainLightning", "--model_configs_namespace=Tests.ML.configs", f"--output_to={test_output_dirs.root_dir}", f"--local_dataset={local_dataset}" ] with mock.patch("sys.argv", args): runner.run() assert isinstance(runner.lightning_container, DummyContainerWithPlainLightning) # Test if the outputs folder is relative to the folder that we specified via the commandline runner.lightning_container.outputs_folder.relative_to( test_output_dirs.root_dir) results = runner.lightning_container.outputs_folder # Test if all the files that are written during inference exist. assert not (results / "on_inference_start.txt").is_file() assert (results / "test_step.txt").is_file()
def test_run_container_in_situ(test_output_dirs: OutputFolderForTests) -> None: """ Test if we can get the config loader to load a Lightning container model, and then train locally. """ runner = default_runner() local_dataset = test_output_dirs.root_dir / "dataset" local_dataset.mkdir() args = [ "", "--model=DummyContainerWithModel", "--model_configs_namespace=Tests.ML.configs", f"--output_to={test_output_dirs.root_dir}", f"--local_dataset={local_dataset}" ] with mock.patch("sys.argv", args): loaded_config, actual_run = runner.run() assert actual_run is None assert isinstance(runner.lightning_container, DummyContainerWithModel) # Test if the outputs folder is relative to the folder that we specified via the commandline runner.lightning_container.outputs_folder.relative_to( test_output_dirs.root_dir) results = runner.lightning_container.outputs_folder # Test that the setup method has been called assert runner.lightning_container.local_dataset is not None assert (runner.lightning_container.local_dataset / "setup.txt").is_file() # Test if all the files that are written during inference exist. Data for all 3 splits must be processed assert (results / "on_inference_start.txt").is_file() assert (results / "on_inference_end.txt").is_file() for mode in ModelExecutionMode: assert (results / f"on_inference_start_{mode.value}.txt").is_file() assert (results / f"on_inference_end_{mode.value}.txt").is_file() step_results = results / f"inference_step_{mode.value}.txt" assert step_results.is_file() # We should have one line per data item, and there are around 6 of them result_lines = [ line for line in step_results.read_text().splitlines() if line.strip() ] assert len(result_lines) >= 5 metrics_per_split = pd.read_csv(results / "metrics_per_split.csv") # Training should have reduced the MSE to pretty much zero. expected = pd.read_csv( StringIO("""Split,MSE Test,1e-7 Val,1e-7 Train,1e-7""")) pd.testing.assert_frame_equal(metrics_per_split, expected, check_less_precise=True) # Test if we have an args file that lists all parameters args_file = (results / ARGS_TXT).read_text() assert "Container:" in args_file assert "adam_betas" in args_file # Report generation must run assert (results / "create_report.txt").is_file()
def test_load_invalid_container() -> None: """ Test if we loading a container fails if one of the parameters is not valid. """ DummyContainerWithParameters() runner = default_runner() args = ["", "--model=DummyContainerWithParameters", "--number_of_cross_validation_splits=1", "--model_configs_namespace=Tests.ML.configs"] with pytest.raises(ValueError) as ex: with mock.patch("sys.argv", args): runner.parse_and_load_model() assert "At least two splits required to perform cross validation, but got 1" in str(ex)
def test_override_azure_config_from_container() -> None: # Arguments partly to be set in AzureConfig, and partly in container. args = [ "", "--model", DummyContainerWithAzureConfigOverrides.__name__, "--model_configs_namespace", "Tests.ML.test_lightning_containers", "--container_subscription_id", "cli-container-subscription-id", "--subscription_id", "cli-subscription-id", "--tenant_id", "cli-tenant-id", "--application_id", "cli-application-id", "--experiment_name", "cli-experiment-name", "--workspace_name", "cli-workspace-name" ] with mock.patch("sys.argv", args): runner: Runner = default_runner() runner.parse_and_load_model() assert runner.azure_config is not None assert runner.lightning_container is not None # Current AzureConfig parameter priority is as follows: # 1. Container # 2. CLI # 3. YAML # 4. AzureConfig defaults # ==== Parameters declared in the container ==== # Unique container parameters can be set from CLI, then override AzureConfig assert runner.azure_config.subscription_id \ == runner.lightning_container.container_subscription_id \ == "cli-container-subscription-id" # If the container declares a clashing parameter, the CLI value will be # consumed by the original AzureConfig assert runner.azure_config.application_id == "cli-application-id" assert runner.lightning_container.application_id == "default-container-application-id" # However, it may then be overriden by the container default; this should be # avoided to prevent unexpected behaviour assert runner.azure_config.tenant_id \ == runner.lightning_container.tenant_id \ == "default-container-tenant-id" # ==== Parameters declared only in AzureConfig ==== # Hard-coded overrides ignore CLI value assert runner.azure_config.experiment_name == "hardcoded-experiment-name" # AzureConfig parameters not overriden in container can still be set from CLI assert runner.azure_config.workspace_name == "cli-workspace-name"
def test_run_fastmri_container(test_output_dirs: OutputFolderForTests) -> None: """ Test if we can get run the fastMRI model end-to-end. This takes about 2min on a CPU machine, hence only run in AzureML """ runner = default_runner() dataset_dir = test_output_dirs.root_dir / "dataset" dataset_dir.mkdir(parents=True) args = [ "", "--model=FastMriOnRandomData", f"--output_to={test_output_dirs.root_dir}", "--model_configs_namespace=Tests.ML.configs" ] with mock.patch("sys.argv", args): loaded_config, run_info = runner.run() assert isinstance(run_info, AzureRunInfo) from Tests.ML.configs.fastmri_random import FastMriOnRandomData assert isinstance(runner.lightning_container, FastMriOnRandomData)
def test_load_container_with_arguments() -> None: """ Test if we can load a container and override a value in it via the commandline. Parameters can only be set at container level, not at model level. """ DummyContainerWithParameters() runner = default_runner() args = ["", "--model=DummyContainerWithParameters", "--container_param=param1", "--model_configs_namespace=Tests.ML.configs"] with mock.patch("sys.argv", args): runner.parse_and_load_model() assert isinstance(runner.lightning_container, DummyContainerWithParameters) assert runner.lightning_container.container_param == "param1" # Overriding model parameters should not work args = ["", "--model=DummyContainerWithParameters", "--model_param=param2", "--model_configs_namespace=Tests.ML.configs"] with pytest.raises(ValueError) as ex: with mock.patch("sys.argv", args): runner.parse_and_load_model() assert "model_param" in str(ex)
def test_submit_container_to_azureml(container_name: str) -> None: """ Test if we can get the config loader to load a Lightning container model, and get it through the AzureML submission process. """ runner = default_runner() mock_run = Run.get_context() args = [ "", f"--model={container_name}", "--azureml=True", "--model_configs_namespace=Tests.ML.configs" ] with mock.patch("sys.argv", args): with mock.patch( "InnerEye.Azure.azure_config.AzureConfig.get_dataset_consumption", return_value=MockDatasetConsumption): with mock.patch("azureml.core.Experiment.submit", return_value=mock_run): loaded_config, actual_run = runner.run() assert actual_run == mock_run assert isinstance(runner.lightning_container, LightningContainer)