Ejemplo n.º 1
0
def model_train_unittest(config: Optional[DeepLearningConfig],
                         dirs: OutputFolderForTests,
                         checkpoint_handler: Optional[CheckpointHandler] = None,
                         lightning_container: Optional[LightningContainer] = None) -> \
        Tuple[StoringLogger, CheckpointHandler]:
    """
    A shortcut for running model training in the unit test suite. It runs training for the given config, with the
    default checkpoint handler initialized to point to the test output folder specified in dirs.
    :param config: The configuration of the model to train.
    :param dirs: The test fixture that provides an output folder for the test.
    :param lightning_container: An optional LightningContainer object that will be pass through to the training routine.
    :param checkpoint_handler: The checkpoint handler that should be used for training. If not provided, it will be
    created via get_default_checkpoint_handler.
    :return: Tuple[StoringLogger, CheckpointHandler]
    """
    runner = MLRunner(model_config=config, container=lightning_container)
    # Setup will set random seeds before model creation, and set the model in the container.
    # It will also set random seeds correctly. Later we use so initialized container.
    # For all tests running in AzureML, we need to skip the downloading of datasets that would otherwise happen,
    # because all unit test configs come with their own local dataset already.
    runner.setup(use_mount_or_download_dataset=False)
    if checkpoint_handler is None:
        azure_config = get_default_azure_config()
        checkpoint_handler = CheckpointHandler(azure_config=azure_config,
                                               container=runner.container,
                                               project_root=dirs.root_dir)
    _, storing_logger = model_train(checkpoint_handler=checkpoint_handler,
                                    container=runner.container)
    return storing_logger, checkpoint_handler  # type: ignore
Ejemplo n.º 2
0
def test_non_image_encoder(
        test_output_dirs: OutputFolderForTests,
        hidden_layer_num_feature_channels: Optional[int]) -> None:
    """
    Test if we can build a simple MLP model that only feeds off non-image features.
    """
    dataset_folder = Path(test_output_dirs.make_sub_dir("dataset"))
    dataset_contents = _get_fake_dataset_contents()
    (dataset_folder / DATASET_CSV_FILE_NAME).write_text(dataset_contents)
    config = NonImageEncoder(
        should_validate=False,
        hidden_layer_num_feature_channels=hidden_layer_num_feature_channels)
    config.local_dataset = dataset_folder
    config.set_output_to(test_output_dirs.root_dir)
    config.max_batch_grad_cam = 1
    config.validate()
    # run model training
    _, checkpoint_handler = model_train_unittest(
        config, output_folder=test_output_dirs)
    # run model inference
    runner = MLRunner(config)
    runner.setup()
    runner.model_inference_train_and_test(
        checkpoint_paths=checkpoint_handler.get_checkpoints_to_test())
    assert config.get_total_number_of_non_imaging_features() == 18
Ejemplo n.º 3
0
def test_model_name_is_set(test_output_dirs: OutputFolderForTests) -> None:
    container = DummyContainerWithModel()
    container.local_dataset = test_output_dirs.root_dir
    runner = MLRunner(model_config=None, container=container)
    runner.setup()
    expected_name = "DummyContainerWithModel"
    assert runner.container._model_name == expected_name
    assert expected_name in str(runner.container.outputs_folder)
Ejemplo n.º 4
0
 def _create_container(
         extra_local_dataset_paths: List[Path] = [],
         extra_azure_dataset_ids: List[str] = []) -> LightningContainer:
     container = DummyContainerWithModel()
     container.local_dataset = test_output_dirs.root_dir
     container.extra_local_dataset_paths = extra_local_dataset_paths  # type: ignore
     container.extra_azure_dataset_ids = extra_azure_dataset_ids
     runner = MLRunner(model_config=None, container=container)
     runner.setup()
     return runner.container
Ejemplo n.º 5
0
def test_optim_params1(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if the optimizer parameters are read correctly for InnerEye configs.
    """
    model = DummyModel()
    model.set_output_to(test_output_dirs.root_dir)
    runner = MLRunner(model_config=model)
    runner.setup()
    lightning_model = runner.container.model
    optim, _ = lightning_model.configure_optimizers()
    assert optim[0].param_groups[0]["lr"] == 1e-3
def test_regression_test(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test that the file comparison for regression tests is actually called in the workflow.
    """
    container = DummyContainerWithModel()
    container.local_dataset = test_output_dirs.root_dir
    container.regression_test_folder = Path(str(uuid.uuid4().hex))
    runner = MLRunner(container=container)
    runner.setup()
    with pytest.raises(ValueError) as ex:
        runner.run()
    assert "Folder with expected files does not exist" in str(ex)
Ejemplo n.º 7
0
def test_optim_params2(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if the optimizer parameters are read correctly for containers.
    """
    container = DummyContainerWithModel()
    container.local_dataset = test_output_dirs.root_dir
    runner = MLRunner(model_config=None, container=container)
    runner.setup()
    lightning_model = runner.container.model
    optim, _ = lightning_model.configure_optimizers()
    expected_lr = 1e-1
    assert container.l_rate == expected_lr
    assert optim[0].param_groups[0]["lr"] == expected_lr
Ejemplo n.º 8
0
def test_container_hooks(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if the hooks before training are called at the right place and in the right order.
    """
    container = DummyContainerWithHooks()
    container.local_dataset = test_output_dirs.root_dir
    runner = MLRunner(model_config=None, container=container)
    runner.setup()
    runner.run()
    # The hooks in DummyContainerWithHooks itself check that the hooks are called in the right order. Here,
    # only check that they have all been called.
    for file in [
            "global_rank_zero.txt", "local_rank_zero.txt", "all_ranks.txt"
    ]:
        assert (runner.container.outputs_folder /
                file).is_file(), f"Missing file: {file}"
Ejemplo n.º 9
0
def test_download_azureml_dataset(test_output_dirs: OutputFolderForTests) -> None:
    dataset_name = "test-dataset"
    config = DummyModel()
    config.local_dataset = None
    config.azure_dataset_id = ""
    azure_config = get_default_azure_config()
    runner = MLRunner(config, azure_config=azure_config)
    # If the model has neither local_dataset or azure_dataset_id, mount_or_download_dataset should fail.
    # This mounting call must happen before any other operations on the container, because already the model
    # creation may need access to the dataset.
    with pytest.raises(ValueError) as ex:
        runner.setup()
    assert ex.value.args[0] == "The model must contain either local_dataset or azure_dataset_id."
    runner.project_root = test_output_dirs.root_dir

    # Pointing the model to a dataset folder that does not exist should raise an Exception
    fake_folder = runner.project_root / "foo"
    runner.container.local_dataset = fake_folder
    with pytest.raises(FileNotFoundError):
        runner.mount_or_download_dataset(runner.container.azure_dataset_id, runner.container.local_dataset)

    # If the local dataset folder exists, mount_or_download_dataset should not do anything.
    fake_folder.mkdir()
    local_dataset = runner.mount_or_download_dataset(runner.container.azure_dataset_id, runner.container.local_dataset)
    assert local_dataset == fake_folder

    # Pointing the model to a dataset in Azure should trigger a download
    runner.container.local_dataset = None
    runner.container.azure_dataset_id = dataset_name
    with logging_section("Starting download"):
        result_path = runner.mount_or_download_dataset(runner.container.azure_dataset_id,
                                                       runner.container.local_dataset)
    # Download goes into <project_root> / "datasets" / "test_dataset"
    expected_path = runner.project_root / fixed_paths.DATASETS_DIR_NAME / dataset_name
    assert result_path == expected_path
    assert result_path.is_dir()
    dataset_csv = Path(result_path) / DATASET_CSV_FILE_NAME
    assert dataset_csv.is_file()
    # Check that each individual file in the dataset is present
    for folder in [1, *range(10, 20)]:
        sub_folder = result_path / str(folder)
        sub_folder.is_dir()
        for file in ["ct", "esophagus", "heart", "lung_l", "lung_r", "spinalcord"]:
            f = (sub_folder / file).with_suffix(".nii.gz")
            assert f.is_file()
Ejemplo n.º 10
0
def test_file_system_with_subfolders(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if a subfolder can be created within the output folder structure, for use with cross validation.
    """
    model = DummyModel()
    model.set_output_to(test_output_dirs.root_dir)
    container = InnerEyeContainer(model)
    # File system should be copied from model config to container
    assert container.file_system_config == model.file_system_config
    runner = MLRunner(model_config=model)
    runner.setup()
    assert str(runner.container.outputs_folder).endswith(model.model_name)
    output_subfolder = "foo"
    expected_folder = runner.container.outputs_folder / output_subfolder
    runner = MLRunner(model_config=model, output_subfolder=output_subfolder)
    runner.setup()
    assert runner.container.outputs_folder == expected_folder
def test_model_inference_on_single_run(test_output_dirs: OutputFolderForTests) -> None:
    falllback_run_id = FALLBACK_HELLO_CONTAINER_RUN

    files_to_check = ["test_mse.txt", "test_mae.txt"]

    training_run = get_most_recent_run(fallback_run_id_for_local_execution=falllback_run_id)
    all_training_files = training_run.get_file_names()
    for file in files_to_check:
        assert f"outputs/{file}" in all_training_files, f"{file} is missing"
    training_folder = test_output_dirs.root_dir / "training"
    training_folder.mkdir()
    training_files = [training_folder / file for file in files_to_check]
    for file, download_path in zip(files_to_check, training_files):
        training_run.download_file(f"outputs/{file}", output_file_path=str(download_path))

    container = HelloContainer()
    container.set_output_to(test_output_dirs.root_dir)
    container.model_id = get_most_recent_model_id(fallback_run_id_for_local_execution=falllback_run_id)
    azure_config = get_default_azure_config()
    azure_config.train = False
    ml_runner = MLRunner(container=container, azure_config=azure_config, project_root=test_output_dirs.root_dir)
    ml_runner.setup()
    ml_runner.run()

    inference_files = [container.outputs_folder / file for file in files_to_check]
    for inference_file in inference_files:
        assert inference_file.exists(), f"{inference_file} is missing"

    for training_file, inference_file in zip(training_files, inference_files):
        training_lines = training_file.read_text().splitlines()
        inference_lines = inference_file.read_text().splitlines()
        # We expect all the files we are reading to have a single float value
        assert len(training_lines) == 1
        train_value = float(training_lines[0].strip())
        assert len(inference_lines) == 1
        inference_value = float(inference_lines[0].strip())
        assert inference_value == pytest.approx(train_value, 1e-6)
Ejemplo n.º 12
0
def _test_mount_for_lightning_container(test_output_dirs: OutputFolderForTests,
                                        is_offline_run: bool,
                                        local_dataset: Optional[Path],
                                        azure_dataset: str,
                                        is_lightning_model: bool) -> LightningContainer:
    config: Optional[DeepLearningConfig] = None
    container: Optional[LightningContainer] = None
    if is_lightning_model:
        container = DummyContainerWithDatasets()
        container.azure_dataset_id = azure_dataset
        container.local_dataset = local_dataset
    else:
        config = DummyModel()
        config.azure_dataset_id = azure_dataset
        config.local_dataset = local_dataset
    # The legacy InnerEye models require an existing dataset_csv file present in the dataset folder. Create that.
    download_path = test_output_dirs.root_dir / "downloaded"
    mount_path = test_output_dirs.root_dir / "mounted"
    if not is_lightning_model:
        train_and_test_data = "train_and_test_data"
        for path in [download_path, mount_path, test_output_dirs.root_dir]:
            # If destination folder exists, delete content to ensure consistency and avoid 'FileExistsError'
            if (path / train_and_test_data).is_dir():
                shutil.rmtree(path / train_and_test_data)

            # Creates directory structure and copy data
            shutil.copytree(full_ml_test_data_path(train_and_test_data), path / train_and_test_data)
            # Copy 'data.csv' file
            shutil.copy(full_ml_test_data_path(DATASET_CSV_FILE_NAME), path / DATASET_CSV_FILE_NAME)

    with mock.patch("InnerEye.ML.run_ml.MLRunner.is_offline_run", is_offline_run):
        with mock.patch("InnerEye.ML.run_ml.download_dataset", return_value=download_path):
            with mock.patch("InnerEye.ML.run_ml.try_to_mount_input_dataset", return_value=mount_path):
                runner = MLRunner(config, container=container,
                                  azure_config=None, project_root=test_output_dirs.root_dir)
                runner.setup()
                return runner.container