Exemple #1
0
def test_innereyecontainer_setup_passes_on_allow_incomplete_labels(
        test_output_dirs: OutputFolderForTests,
        allow_partial_ground_truth: bool) -> None:
    """
    Test that InnerEyeContainer.setup passes on the correct value of allow_incomplete_labels to
    full_image_dataset.convert_channels_to_file_paths
    :param test_output_dirs: Test fixture.
    :param allow_partial_ground_truth: The value to set allow_incomplete_labels to and check it is
    passed through.
    """
    config = DummyModel()
    config.set_output_to(test_output_dirs.root_dir)
    config.allow_incomplete_labels = allow_partial_ground_truth
    container = InnerEyeContainer(config)

    def mocked_convert_channels_to_file_paths(
            _: List[str], __: pd.DataFrame, ___: Path, ____: str,
            allow_incomplete_labels: bool) -> Tuple[List[Optional[Path]], str]:
        paths: List[Optional[Path]] = []
        failed_channel_info = ''
        assert allow_incomplete_labels == allow_partial_ground_truth
        return paths, failed_channel_info

    with mock.patch("InnerEye.ML.lightning_base.convert_channels_to_file_paths"
                    ) as convert_channels_to_file_paths_mock:
        convert_channels_to_file_paths_mock.side_effect = mocked_convert_channels_to_file_paths
        container.setup()
        convert_channels_to_file_paths_mock.assert_called()
Exemple #2
0
def test_run_scoring(test_output_dirs: OutputFolderForTests,
                     is_ensemble: bool) -> None:
    """
    Run the scoring script on an image file.
    This test lives outside the normal Tests folder because it imports "score.py" from the repository root folder.
    If we switched to InnerEye as a package, we would have to treat this import special.
    The inference run here is on a 1-channel model, whereas test_register_and_score_model works with a 2-channel
    model.
    """
    seed_everything(42)
    checkpoint = test_output_dirs.root_dir / "checkpoint.ckpt"
    image_size = (40, 40, 40)
    test_crop_size = image_size
    dummy_config = DummyModel()
    dummy_config.test_crop_size = test_crop_size
    dummy_config.inference_stride_size = (10, 10, 10)
    dummy_config.inference_batch_size = 10
    create_model_and_store_checkpoint(dummy_config, checkpoint)
    all_paths = [checkpoint] * 2 if is_ensemble else [checkpoint]
    inference_pipeline, dummy_config = create_inference_pipeline(dummy_config,
                                                                 all_paths,
                                                                 use_gpu=False)
    image_with_header = io_util.load_nifti_image(test_image)
    image_with_header.image = image_with_header.image[:image_size[
        0], :image_size[1], :image_size[2]]
    result = run_inference([image_with_header, image_with_header],
                           inference_pipeline, dummy_config)
    assert image_with_header.image.shape == result.shape  # type: ignore
    print(f"Unique result values: {np.unique(result)}")
    assert np.all(result == 1)
Exemple #3
0
def test_score_check_spacing() -> None:
    config = DummyModel()
    config.dataset_expected_spacing_xyz = (1.0, 1.0, 3.0)
    image_with_header = io_util.load_nifti_image(img_nii_path)
    spacing_xyz = reverse_tuple_float3(image_with_header.header.spacing)
    assert is_spacing_valid(spacing_xyz, config.dataset_expected_spacing_xyz)
    assert is_spacing_valid(spacing_xyz, (1, 1, 3.01))
    assert not is_spacing_valid(spacing_xyz, (1, 1, 3.2))
Exemple #4
0
def model_config(
    slice_exclusion_rules: List[SliceExclusionRule],
    summed_probability_rules: List[SummedProbabilityRule]
) -> SegmentationModelBase:
    test_config = DummyModel()
    test_config.slice_exclusion_rules = slice_exclusion_rules
    test_config.summed_probability_rules = summed_probability_rules
    test_config.ground_truth_ids = ground_truth_ids
    return test_config
Exemple #5
0
def test_optim_params1(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if the optimizer parameters are read correctly for InnerEye configs.
    """
    model = DummyModel()
    model.set_output_to(test_output_dirs.root_dir)
    runner = MLRunner(model_config=model)
    runner.setup()
    lightning_model = runner.container.model
    optim, _ = lightning_model.configure_optimizers()
    assert optim[0].param_groups[0]["lr"] == 1e-3
Exemple #6
0
def test_construct_loss_function() -> None:
    model_config = DummyModel()
    model_config.loss_type = SegmentationLoss.Mixture
    # Weights deliberately do not sum to 1.0.
    weights = [1.5, 0.5]
    model_config.mixture_loss_components = [
        MixtureLossComponent(weights[0], SegmentationLoss.CrossEntropy, 0.2),
        MixtureLossComponent(weights[1], SegmentationLoss.SoftDice, 0.1)]
    loss_fn = ModelTrainingStepsForSegmentation.construct_loss_function(model_config)
    assert isinstance(loss_fn, MixtureLoss)
    assert len(loss_fn.components) == len(weights)
    assert loss_fn.components[0][0] == weights[0] / sum(weights)
    assert loss_fn.components[1][0] == weights[1] / sum(weights)
Exemple #7
0
def test_score_image_dicom_mock_run_store(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test that dicom in and dicom-rt out works, by mocking out run and store functions.

    This mocks out run_inference and store_as_ubyte_nifti so that init_from_model_inference_json
    is tested in addition to the tests in test_score_image_dicom_mock_all.

    :param test_output_dirs: Test output directories.
    """
    mock_segmentation = {'mock_segmentation': True}
    model_config = DummyModel()
    model_config.set_output_to(test_output_dirs.root_dir)
    checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt"
    create_model_and_store_checkpoint(model_config, checkpoint_path)

    azure_config = AzureConfig()
    project_root = Path(__file__).parent.parent
    ml_runner = MLRunner(model_config=model_config,
                         azure_config=azure_config,
                         project_root=project_root)
    model_folder = test_output_dirs.root_dir / "final"
    ml_runner.copy_child_paths_to_folder(model_folder=model_folder,
                                         checkpoint_paths=[checkpoint_path])

    zipped_dicom_series_path = test_output_dirs.root_dir / "temp_pack_dicom_series" / "dicom_series.zip"
    zip_known_dicom_series(zipped_dicom_series_path)

    score_pipeline_config = ScorePipelineConfig(
        data_folder=zipped_dicom_series_path.parent,
        model_folder=str(model_folder),
        image_files=[str(zipped_dicom_series_path)],
        result_image_name=HNSEGMENTATION_FILE.name,
        use_gpu=False,
        use_dicom=True,
        model_id="Dummy:1")

    with mock.patch('score.run_inference',
                    return_value=mock_segmentation) as mock_run_inference:
        with mock.patch(
                'score.store_as_ubyte_nifti',
                return_value=HNSEGMENTATION_FILE) as mock_store_as_ubyte_nifti:
            segmentation = score_image(score_pipeline_config)
            assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED,
                                     model_folder)

    mock_run_inference.assert_called()
    mock_store_as_ubyte_nifti.assert_called()
Exemple #8
0
def test_save_and_load_state_dict(lr_scheduler_type: LRSchedulerType) -> None:
    def object_dict_same(lr1: SchedulerWithWarmUp,
                         lr2: SchedulerWithWarmUp) -> bool:
        """
        Tests to see if two LRScheduler objects are the same.
        This ignores lambdas if one of the schedulers is LambdaLR, since lambdas are not stored to the state dict.
        """
        # ignore the _scheduler and _warmup objects, compare those separately
        dict1 = {
            key: val
            for key, val in lr1.__dict__.items()
            if key != "_scheduler" and key != "_warmup"
        }
        dict2 = {
            key: val
            for key, val in lr2.__dict__.items()
            if key != "_scheduler" and key != "_warmup"
        }

        # see if the underlying scheduler object is the same
        scheduler1_dict = {
            key: val
            for key, val in lr1._scheduler.__dict__.items()
            if key != "lr_lambdas"
        }
        scheduler2_dict = {
            key: val
            for key, val in lr2._scheduler.__dict__.items()
            if key != "lr_lambdas"
        }
        warmup1_dict = lr1._warmup.__dict__
        warmup2_dict = lr2._warmup.__dict__
        return dict1 == dict2 and scheduler1_dict == scheduler2_dict and warmup1_dict == warmup2_dict

    config = DummyModel(num_epochs=10,
                        l_rate_scheduler=lr_scheduler_type,
                        l_rate_exponential_gamma=0.9,
                        l_rate_step_gamma=0.9,
                        l_rate_step_step_size=2,
                        l_rate_multi_step_gamma=0.9,
                        l_rate_multi_step_milestones=[3, 5, 7],
                        l_rate_polynomial_gamma=0.9,
                        l_rate_warmup=LRWarmUpType.Linear,
                        l_rate_warmup_epochs=4)
    lr_scheduler_1, optimizer = _create_lr_scheduler_and_optimizer(config)

    lr_scheduler_1.step()
    # This is not supported functionality - we are doing this just to change _scheduler from its default state
    lr_scheduler_1._scheduler.step()
    lr_scheduler_1._scheduler.step()

    state_dict = lr_scheduler_1.state_dict()

    lr_scheduler_2, _ = _create_lr_scheduler_and_optimizer(config, optimizer)

    assert not object_dict_same(lr_scheduler_1, lr_scheduler_2)

    lr_scheduler_2.load_state_dict(state_dict)

    assert object_dict_same(lr_scheduler_1, lr_scheduler_2)
def test_run_scoring(is_ensemble: bool) -> None:
    checkpoints_paths = checkpoint_full_paths * 2 if is_ensemble else checkpoint_full_paths
    dummy_config = DummyModel()
    inference_pipeline, dummy_config = create_inference_pipeline(dummy_config, checkpoints_paths, use_gpu=False)
    image_with_header = io_util.load_nifti_image(test_image)
    result = run_inference([image_with_header, image_with_header], inference_pipeline, dummy_config)
    assert np.all(result == 1)
    assert image_with_header.image.shape == result.shape  # type: ignore
def test_create_lr_scheduler_last_epoch() -> None:
    """
    Test to check if the lr scheduler is initialized to the correct epoch
    """
    l_rate = 1e-3
    gamma = 0.5
    total_epochs = 5
    expected_lrs_per_epoch = [l_rate * (gamma**i) for i in range(total_epochs)]
    config = DummyModel()
    config.l_rate = l_rate
    config.l_rate_scheduler = LRSchedulerType.Step
    config.l_rate_step_step_size = 1
    config.l_rate_step_gamma = gamma
    # create lr scheduler
    initial_scheduler, initial_optimizer = _create_lr_scheduler_and_optimizer(
        config)
    # check lr scheduler initialization step
    initial_epochs = 3
    assert np.allclose(enumerate_scheduler(initial_scheduler, initial_epochs),
                       expected_lrs_per_epoch[:initial_epochs])
    # create lr scheduler for recovery checkpoint
    config.start_epoch = initial_epochs
    recovery_scheduler, recovery_optimizer = _create_lr_scheduler_and_optimizer(
        config)
    # Both the scheduler and the optimizer need to be loaded from the checkpoint.
    recovery_scheduler.load_state_dict(initial_scheduler.state_dict())
    recovery_optimizer.load_state_dict(initial_optimizer.state_dict())
    assert recovery_scheduler.last_epoch == config.start_epoch
    # check lr scheduler initialization matches the checkpoint epoch
    # as training will start for start_epoch + 1 in this case
    assert np.allclose(enumerate_scheduler(recovery_scheduler, 2),
                       expected_lrs_per_epoch[initial_epochs:])
Exemple #11
0
def test_download_azureml_dataset(test_output_dirs: OutputFolderForTests) -> None:
    dataset_name = "test-dataset"
    config = DummyModel()
    config.local_dataset = None
    config.azure_dataset_id = ""
    azure_config = get_default_azure_config()
    runner = MLRunner(config, azure_config=azure_config)
    # If the model has neither local_dataset or azure_dataset_id, mount_or_download_dataset should fail.
    # This mounting call must happen before any other operations on the container, because already the model
    # creation may need access to the dataset.
    with pytest.raises(ValueError) as ex:
        runner.setup()
    assert ex.value.args[0] == "The model must contain either local_dataset or azure_dataset_id."
    runner.project_root = test_output_dirs.root_dir

    # Pointing the model to a dataset folder that does not exist should raise an Exception
    fake_folder = runner.project_root / "foo"
    runner.container.local_dataset = fake_folder
    with pytest.raises(FileNotFoundError):
        runner.mount_or_download_dataset(runner.container.azure_dataset_id, runner.container.local_dataset)

    # If the local dataset folder exists, mount_or_download_dataset should not do anything.
    fake_folder.mkdir()
    local_dataset = runner.mount_or_download_dataset(runner.container.azure_dataset_id, runner.container.local_dataset)
    assert local_dataset == fake_folder

    # Pointing the model to a dataset in Azure should trigger a download
    runner.container.local_dataset = None
    runner.container.azure_dataset_id = dataset_name
    with logging_section("Starting download"):
        result_path = runner.mount_or_download_dataset(runner.container.azure_dataset_id,
                                                       runner.container.local_dataset)
    # Download goes into <project_root> / "datasets" / "test_dataset"
    expected_path = runner.project_root / fixed_paths.DATASETS_DIR_NAME / dataset_name
    assert result_path == expected_path
    assert result_path.is_dir()
    dataset_csv = Path(result_path) / DATASET_CSV_FILE_NAME
    assert dataset_csv.is_file()
    # Check that each individual file in the dataset is present
    for folder in [1, *range(10, 20)]:
        sub_folder = result_path / str(folder)
        sub_folder.is_dir()
        for file in ["ct", "esophagus", "heart", "lung_l", "lung_r", "spinalcord"]:
            f = (sub_folder / file).with_suffix(".nii.gz")
            assert f.is_file()
Exemple #12
0
def test_file_system_with_subfolders(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if a subfolder can be created within the output folder structure, for use with cross validation.
    """
    model = DummyModel()
    model.set_output_to(test_output_dirs.root_dir)
    container = InnerEyeContainer(model)
    # File system should be copied from model config to container
    assert container.file_system_config == model.file_system_config
    runner = MLRunner(model_config=model)
    runner.setup()
    assert str(runner.container.outputs_folder).endswith(model.model_name)
    output_subfolder = "foo"
    expected_folder = runner.container.outputs_folder / output_subfolder
    runner = MLRunner(model_config=model, output_subfolder=output_subfolder)
    runner.setup()
    assert runner.container.outputs_folder == expected_folder
Exemple #13
0
def test_score_image_dicom_mock_run(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test that dicom in and dicom-rt out works, by mocking out only the run scoring function.

    This mocks out run_inference so that store_as_ubyte_nifti
    is tested in addition to the tests in test_score_image_dicom_mock_run_store.

    :param test_output_dirs: Test output directories.
    """
    model_config = DummyModel()
    model_config.set_output_to(test_output_dirs.root_dir)
    checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt"
    create_model_and_store_checkpoint(model_config, checkpoint_path)

    azure_config = AzureConfig()
    project_root = Path(__file__).parent.parent
    ml_runner = MLRunner(model_config=model_config,
                         azure_config=azure_config,
                         project_root=project_root)
    model_folder = test_output_dirs.root_dir / "final"
    ml_runner.copy_child_paths_to_folder(model_folder=model_folder,
                                         checkpoint_paths=[checkpoint_path])

    zipped_dicom_series_path = zip_dicom_series(model_folder)

    score_pipeline_config = ScorePipelineConfig(
        data_folder=zipped_dicom_series_path.parent,
        model_folder=str(model_folder),
        image_files=[str(zipped_dicom_series_path)],
        result_image_name=HNSEGMENTATION_FILE.name,
        use_gpu=False,
        use_dicom=True)

    image_with_header = io_util.load_nifti_image(HNSEGMENTATION_FILE)

    with mock.patch(
            'score.run_inference',
            return_value=image_with_header.image) as mock_run_inference:
        segmentation = score_image(score_pipeline_config)
        assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED,
                                 model_folder)

    mock_run_inference.assert_called()
Exemple #14
0
def test_model_name_for_innereye_container() -> None:
    """
    Test if the InnerEye container picks up the name of the model correctly. The name will impact the output folder
    structure that is created.
    """
    expected_name = "DummyModel"
    model = DummyModel()
    assert model.model_name == expected_name
    container = InnerEyeContainer(model)
    assert container.model_name == expected_name
def test_run_ml_with_segmentation_model(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test training and testing of segmentation models, when it is started together via run_ml.
    """
    config = DummyModel()
    config.num_dataload_workers = 0
    config.restrict_subjects = "1"
    # Increasing the test crop size should not have any effect on the results.
    # This is for a bug in an earlier version of the code where the wrong execution mode was used to
    # compute the expected mask size at training time.
    config.test_crop_size = (75, 75, 75)
    config.inference_on_train_set = False
    config.inference_on_val_set = True
    config.inference_on_test_set = True
    config.set_output_to(test_output_dirs.root_dir)
    azure_config = get_default_azure_config()
    azure_config.train = True
    MLRunner(config, azure_config=azure_config).run()
def test_download_model_weights(
        test_output_dirs: OutputFolderForTests) -> None:
    # Download a sample ResNet model from a URL given in the Pytorch docs
    # The downloaded model does not match the architecture, which is okay since we are only testing the download here.

    model_config = DummyModel(weights_url=EXTERNAL_WEIGHTS_URL_EXAMPLE)
    manage_recovery = get_default_checkpoint_handler(
        model_config=model_config, project_root=test_output_dirs.root_dir)
    result_path = manage_recovery.download_weights()
    assert result_path.is_file()
def test_visualize_commandline1() -> None:
    """
    Testing for a bug in commandline processing: The model configuration was always overwritten with all the default
    values of each field in Config, rather than only the overrides specified on the commandline.
    :return:
    """
    default_config = DummyModel()
    old_photonorm = default_config.norm_method
    old_random_seed = default_config.get_effective_random_seed()
    new_dataset = "new_dataset"
    assert default_config.azure_dataset_id != new_dataset
    with mock.patch("sys.argv", ["", f"--azure_dataset_id={new_dataset}"]):
        updated_config, runner_config, _ = get_configs(default_config, yaml_file_path=fixed_paths.SETTINGS_YAML_FILE)
    assert updated_config.azure_dataset_id == new_dataset
    # These two values were not specified on the commandline, and should be at their original values.
    assert updated_config.norm_method == old_photonorm
    assert updated_config.get_effective_random_seed() == old_random_seed
    # Credentials and variables should have been picked up from yaml files
    assert len(runner_config.datasets_container) > 0
def test_single_element() -> None:
    config = DummyModel()
    element = ModelTrainingStepsForSegmentation.construct_non_mixture_loss_function(
        config, SegmentationLoss.CrossEntropy, power=None)
    mixture = MixtureLoss([(1.0, element)])
    target = torch.tensor([[[0, 0, 1], [1, 1, 0]]], dtype=torch.float32)
    logits = torch.tensor([[[-1e9, -1e9, 0], [0, 0, 0]]], dtype=torch.float32)
    # Extract class indices
    element_loss = element(logits, target)
    mixture_loss = mixture(logits, target)
    assert torch.isclose(element_loss, mixture_loss)
Exemple #19
0
def create_data_loaders(train_config: DummyModel) -> None:
    train_config.train_batch_size = 1
    train_config.local_dataset = base_path
    # create the dataset splits
    dataset_splits = train_config.get_dataset_splits()
    # create the data loaders
    data_loaders = train_config.create_data_loaders()
    train_loader = data_loaders[ModelExecutionMode.TRAIN]
    val_loader = data_loaders[ModelExecutionMode.VAL]

    def check_patient_id_in_dataset(loader: DataLoader, split: pd.DataFrame) -> None:
        subjects = list(split.subject.unique())
        for i, x in enumerate(loader):
            sample_from_loader = CroppedSample.from_dict(x)
            assert isinstance(sample_from_loader.metadata, list)
            assert len(sample_from_loader.metadata) == 1
            assert sample_from_loader.metadata[0].patient_id in subjects

    # check if the subjects in the dataloaders are the same in the corresponding dataset splits
    for loader, split in [(train_loader, dataset_splits.train), (val_loader, dataset_splits.val)]:
        check_patient_id_in_dataset(loader, split)
Exemple #20
0
def test_multistep_lr() -> None:
    l_rate = 0.3
    config = DummyModel(l_rate_scheduler=LRSchedulerType.MultiStep,
                        l_rate=l_rate,
                        l_rate_multi_step_gamma=0.1,
                        num_epochs=10,
                        l_rate_multi_step_milestones=[2],
                        l_rate_warmup=LRWarmUpType.Linear,
                        l_rate_warmup_epochs=5)

    def check_warmup(expected: List[float]) -> None:
        scheduler, _ = _create_lr_scheduler_and_optimizer(config)
        actual = enumerate_scheduler(scheduler, 4)
        assert actual == expected

    # No warmup: multi-step LR with milestone after 2 epochs
    original_schedule = [l_rate, l_rate, l_rate * 0.1, l_rate * 0.1]
    config.l_rate_warmup = LRWarmUpType.Linear
    config.l_rate_warmup_epochs = 0
    check_warmup(original_schedule)

    # 1 epoch warmup: linear function up to the initial learning rate gives a warmup value of half the initial LR
    config.l_rate_warmup_epochs = 1
    check_warmup([l_rate * 0.5] + original_schedule[:3])

    # 2 epochs warmup
    config.l_rate_warmup_epochs = 2
    check_warmup([l_rate / 3, l_rate * 2 / 3] + original_schedule[:2])
def test_resume_from_saved_state(lr_scheduler_type: LRSchedulerType,
                                 warmup_epochs: int) -> None:
    """
    Tests if LR scheduler when restarted from an epoch continues as expected.
    """
    restart_from_epoch = 4
    config = DummyModel(num_epochs=7,
                        l_rate_scheduler=lr_scheduler_type,
                        l_rate_exponential_gamma=0.9,
                        l_rate_step_gamma=0.9,
                        l_rate_step_step_size=2,
                        l_rate_multi_step_gamma=0.9,
                        l_rate_multi_step_milestones=[3, 5, 7],
                        l_rate_polynomial_gamma=0.9,
                        l_rate_warmup=LRWarmUpType.Linear
                        if warmup_epochs > 0 else LRWarmUpType.NoWarmUp,
                        l_rate_warmup_epochs=warmup_epochs)
    # This scheduler mimics what happens if we train for the full set of epochs
    scheduler_all_epochs, _ = _create_lr_scheduler_and_optimizer(config)
    expected_lr_list = enumerate_scheduler(scheduler_all_epochs,
                                           config.num_epochs)

    # Create a scheduler where training will be recovered
    scheduler1, optimizer1 = _create_lr_scheduler_and_optimizer(config)
    # Scheduler 1 is only run for 4 epochs, and then "restarted" to train the rest of the epochs.
    result_lr_list = enumerate_scheduler(scheduler1, restart_from_epoch)
    # resume state: This just means setting start_epoch in the config
    config.start_epoch = restart_from_epoch
    scheduler_resume, optimizer_resume = _create_lr_scheduler_and_optimizer(
        config)
    # Load a "checkpoint" for both scheduler and optimizer
    scheduler_resume.load_state_dict(scheduler1.state_dict())
    optimizer_resume.load_state_dict(optimizer1.state_dict())
    result_lr_list.extend(
        enumerate_scheduler(scheduler_resume,
                            config.num_epochs - restart_from_epoch))
    print(f"Actual   schedule: {result_lr_list}")
    print(f"Expected schedule: {expected_lr_list}")
    assert len(result_lr_list) == len(expected_lr_list)
    assert np.allclose(result_lr_list, expected_lr_list)
Exemple #22
0
def test_cosine_decay_function() -> None:
    """
    Tests Cosine lr decay function at (pi/2) and verifies if the value is correct.
    """
    config = DummyModel(l_rate_scheduler=LRSchedulerType.Cosine,
                        num_epochs=10,
                        min_l_rate=0.0)

    # create lr scheduler
    test_epoch = 5
    lr_scheduler, _ = _create_lr_scheduler_and_optimizer(config)
    for _ in range(test_epoch):
        lr_scheduler.step()
    assert lr_scheduler.get_last_lr()[0] == 0.5 * config.l_rate
Exemple #23
0
def _test_mount_for_lightning_container(test_output_dirs: OutputFolderForTests,
                                        is_offline_run: bool,
                                        local_dataset: Optional[Path],
                                        azure_dataset: str,
                                        is_lightning_model: bool) -> LightningContainer:
    config: Optional[DeepLearningConfig] = None
    container: Optional[LightningContainer] = None
    if is_lightning_model:
        container = DummyContainerWithDatasets()
        container.azure_dataset_id = azure_dataset
        container.local_dataset = local_dataset
    else:
        config = DummyModel()
        config.azure_dataset_id = azure_dataset
        config.local_dataset = local_dataset
    # The legacy InnerEye models require an existing dataset_csv file present in the dataset folder. Create that.
    download_path = test_output_dirs.root_dir / "downloaded"
    mount_path = test_output_dirs.root_dir / "mounted"
    if not is_lightning_model:
        train_and_test_data = "train_and_test_data"
        for path in [download_path, mount_path, test_output_dirs.root_dir]:
            # If destination folder exists, delete content to ensure consistency and avoid 'FileExistsError'
            if (path / train_and_test_data).is_dir():
                shutil.rmtree(path / train_and_test_data)

            # Creates directory structure and copy data
            shutil.copytree(full_ml_test_data_path(train_and_test_data), path / train_and_test_data)
            # Copy 'data.csv' file
            shutil.copy(full_ml_test_data_path(DATASET_CSV_FILE_NAME), path / DATASET_CSV_FILE_NAME)

    with mock.patch("InnerEye.ML.run_ml.MLRunner.is_offline_run", is_offline_run):
        with mock.patch("InnerEye.ML.run_ml.download_dataset", return_value=download_path):
            with mock.patch("InnerEye.ML.run_ml.try_to_mount_input_dataset", return_value=mount_path):
                runner = MLRunner(config, container=container,
                                  azure_config=None, project_root=test_output_dirs.root_dir)
                runner.setup()
                return runner.container
def test_two_elements() -> None:
    config = DummyModel()
    element1 = ModelTrainingStepsForSegmentation.construct_non_mixture_loss_function(
        config, SegmentationLoss.CrossEntropy, power=None)
    element2 = ModelTrainingStepsForSegmentation.construct_non_mixture_loss_function(
        config, SegmentationLoss.SoftDice, power=None)
    weight1, weight2 = 0.3, 0.7
    mixture = MixtureLoss([(weight1, element1), (weight2, element2)])
    target = torch.tensor([[[0, 0, 1], [1, 1, 0]]], dtype=torch.float32)
    logits = torch.tensor([[[-1e9, -1e9, 0], [0, 0, 0]]], dtype=torch.float32)
    # Extract class indices
    element1_loss = element1(logits, target)
    element2_loss = element2(logits, target)
    mixture_loss = mixture(logits, target)
    assert torch.isclose(weight1 * element1_loss + weight2 * element2_loss,
                         mixture_loss)
Exemple #25
0
def test_model_inference_train_and_test(
        test_output_dirs: OutputFolderForTests, perform_cross_validation: bool,
        perform_training_set_inference: bool) -> None:
    config = DummyModel()
    config.number_of_cross_validation_splits = 2 if perform_cross_validation else 0
    config.perform_training_set_inference = perform_training_set_inference
    # Plotting crashes with random TCL errors on Windows, disable that for Windows PR builds.
    config.is_plotting_enabled = common_util.is_linux()

    config.set_output_to(test_output_dirs.root_dir)
    config.local_dataset = full_ml_test_data_path()

    # To make it seem like there was a training run before this, copy checkpoints into the checkpoints folder.
    stored_checkpoints = full_ml_test_data_path("checkpoints")
    shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder))

    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)
    checkpoint_handler.additional_training_done()
    result, _, _ = MLRunner(config).model_inference_train_and_test(
        checkpoint_handler=checkpoint_handler)
    if result is None:
        raise ValueError("Error result cannot be None")
    assert isinstance(result, InferenceMetricsForSegmentation)
    for key, _ in result.epochs.items():
        epoch_folder_name = common_util.epoch_folder_name(key)
        for folder in [
                ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value,
                ModelExecutionMode.TEST.value
        ]:
            results_folder = config.outputs_folder / epoch_folder_name / folder
            folder_exists = results_folder.is_dir()
            if folder in [
                    ModelExecutionMode.TRAIN.value,
                    ModelExecutionMode.VAL.value
            ]:
                if perform_training_set_inference:
                    assert folder_exists
            else:
                assert folder_exists
Exemple #26
0
def test_lr_monotonically_decreasing_function(
        lr_scheduler_type: LRSchedulerType) -> None:
    """
    Tests if LR scheduler is a monotonically decreasing function
    """
    config = DummyModel(num_epochs=10,
                        l_rate_scheduler=lr_scheduler_type,
                        l_rate_exponential_gamma=0.9,
                        l_rate_step_gamma=0.9,
                        l_rate_step_step_size=1,
                        l_rate_multi_step_gamma=0.9,
                        l_rate_multi_step_milestones=[3, 5, 7],
                        l_rate_polynomial_gamma=0.9)

    def non_increasing(L: List) -> bool:
        return all(x >= y for x, y in zip(L, L[1:]))

    # create lr scheduler
    lr_scheduler, _ = _create_lr_scheduler_and_optimizer(config)
    lr_list = enumerate_scheduler(lr_scheduler, config.num_epochs)
    assert non_increasing(lr_list)
def test_model_inference_train_and_test(
        test_output_dirs: TestOutputDirectories,
        perform_cross_validation: bool,
        perform_training_set_inference: bool) -> None:
    config = DummyModel()
    config.number_of_cross_validation_splits = 2 if perform_cross_validation else 0
    config.perform_training_set_inference = perform_training_set_inference
    # Plotting crashes with random TCL errors on Windows, disable that for Windows PR builds.
    config.is_plotting_enabled = common_util.is_linux()

    config.set_output_to(test_output_dirs.root_dir)
    config.local_dataset = full_ml_test_data_path()

    # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder.
    stored_checkpoints = full_ml_test_data_path("checkpoints")
    shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder))
    result, _, _ = MLRunner(config).model_inference_train_and_test()
    if result is None:
        raise ValueError("Error result cannot be None")
    assert isinstance(result, InferenceMetricsForSegmentation)
    for key, _ in result.epochs.items():
        epoch_folder_name = common_util.epoch_folder_name(key)
        for folder in [
                ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value,
                ModelExecutionMode.TEST.value
        ]:
            results_folder = config.outputs_folder / epoch_folder_name / folder
            folder_exists = results_folder.is_dir()
            if folder in [
                    ModelExecutionMode.TRAIN.value,
                    ModelExecutionMode.VAL.value
            ]:
                if perform_training_set_inference:
                    assert folder_exists
            else:
                assert folder_exists
Exemple #28
0
def test_model_inference_train_and_test(
        test_output_dirs: OutputFolderForTests, perform_cross_validation: bool,
        perform_training_set_inference: bool) -> None:
    config = DummyModel()
    config.number_of_cross_validation_splits = 2 if perform_cross_validation else 0
    config.perform_training_set_inference = perform_training_set_inference
    # Plotting crashes with random TCL errors on Windows, disable that for Windows PR builds.
    config.is_plotting_enabled = common_util.is_linux()

    config.set_output_to(test_output_dirs.root_dir)
    config.local_dataset = full_ml_test_data_path()

    checkpoint_path = config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX
    create_model_and_store_checkpoint(config, checkpoint_path)
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)
    checkpoint_handler.additional_training_done()
    result, _, _ = MLRunner(config).model_inference_train_and_test(
        checkpoint_handler=checkpoint_handler)
    if result is None:
        raise ValueError("Error result cannot be None")
    assert isinstance(result, InferenceMetricsForSegmentation)
    epoch_folder_name = common_util.BEST_EPOCH_FOLDER_NAME
    for folder in [
            ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value,
            ModelExecutionMode.TEST.value
    ]:
        results_folder = config.outputs_folder / epoch_folder_name / folder
        folder_exists = results_folder.is_dir()
        if folder in [
                ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value
        ]:
            if perform_training_set_inference:
                assert folder_exists
        else:
            assert folder_exists
def test_is_offline_cross_val_parent_run(offline_parent_cv_run: bool) -> None:
    train_config = DummyModel()
    train_config.number_of_cross_validation_splits = 2 if offline_parent_cv_run else 0
    assert MLRunner(train_config).is_offline_cross_val_parent_run(
    ) == offline_parent_cv_run
Exemple #30
0
def default_config() -> ModelConfigBase:
    config = DummyModel()
    config.set_output_to(str(full_ml_test_data_path("outputs")))
    return config