コード例 #1
0
def test_register_model_invalid() -> None:
    ws = get_default_workspace()
    config = get_model_loader().create_model_config_from_name("Lung")
    with pytest.raises(Exception):
        ml_runner = MLRunner(config, None)
        ml_runner.register_segmentation_model(
            run=Run.get_context(),
            workspace=ws,
            best_epoch=0,
            best_epoch_dice=0,
            checkpoint_paths=checkpoint_paths,
            model_proc=ModelProcessing.DEFAULT)
    with pytest.raises(Exception):
        ml_runner = MLRunner(config, get_default_azure_config())
        ml_runner.register_segmentation_model(
            best_epoch=0,
            best_epoch_dice=0,
            checkpoint_paths=checkpoint_paths,
            model_proc=ModelProcessing.DEFAULT)
コード例 #2
0
def test_register_model_skip() -> None:
    """
    If the AzureML workspace can't be read, model registration should be skipped and return None.
    """
    checkpoint_paths = [
        full_ml_test_data_path('checkpoints') / '1_checkpoint.pth.tar'
    ]
    config = get_model_loader().create_model_config_from_name("Lung")
    ml_runner = MLRunner(config, None)
    raises = mock.Mock()
    raises.side_effect = Exception
    with mock.patch.object(AzureConfig, 'get_workspace', raises):
        model, deployment_result = ml_runner.register_segmentation_model(
            model_description="",
            checkpoint_paths=checkpoint_paths,
            model_proc=ModelProcessing.DEFAULT)
    assert model is None
    assert deployment_result is None
コード例 #3
0
def test_register_and_score_model(is_ensemble: bool,
                                  dataset_expected_spacing_xyz: Any,
                                  model_outside_package: bool,
                                  test_output_dirs: OutputFolderForTests) -> None:
    """
    End-to-end test which ensures the scoring pipeline is functioning as expected by performing the following:
    1) Registering a pre-trained model to AML
    2) Checking that a model zip from the registered model can be created successfully
    3) Calling the scoring pipeline to check inference can be run from the published model successfully
    """
    ws = get_default_workspace()
    # Get an existing config as template
    loader = get_model_loader("Tests.ML.configs" if model_outside_package else None)
    config: SegmentationModelBase = loader.create_model_config_from_name(
        model_name="BasicModel2EpochsOutsidePackage" if model_outside_package else "BasicModel2Epochs"
    )
    config.dataset_expected_spacing_xyz = dataset_expected_spacing_xyz
    config.set_output_to(test_output_dirs.root_dir)
    # copy checkpoints into the outputs (simulating a run)
    stored_checkpoints = full_ml_test_data_path(os.path.join("train_and_test_data", "checkpoints"))
    shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder))
    paths = [config.checkpoint_folder / "1_checkpoint.pth.tar"]
    checkpoints = paths * 2 if is_ensemble else paths
    model = None
    model_path = None
    # Mocking to get the source from the current directory
    # the score.py and python_wrapper.py cannot be moved inside the InnerEye package, which will be the
    # only code running (if these tests are run on the package).
    with mock.patch('InnerEye.Common.fixed_paths.repository_root_directory',
                    return_value=tests_root_directory().parent):
        try:
            tags = {"model_name": config.model_name}
            azure_config = get_default_azure_config()
            if model_outside_package:
                azure_config.extra_code_directory = "Tests"  # contains DummyModel
            deployment_hook = lambda cfg, azure_cfg, mdl, is_ens: (Path(cfg.model_name), azure_cfg.docker_shm_size)
            ml_runner = MLRunner(config, azure_config, model_deployment_hook=deployment_hook)
            model, deployment_path, deployment_details = ml_runner.register_segmentation_model(
                workspace=ws,
                tags=tags,
                best_epoch=0,
                best_epoch_dice=0,
                checkpoint_paths=checkpoints,
                model_proc=ModelProcessing.DEFAULT)
            assert model is not None
            model_path = Path(model.get_model_path(model.name, model.version, ws))
            assert (model_path / fixed_paths.ENVIRONMENT_YAML_FILE_NAME).exists()
            assert (model_path / Path("InnerEye/ML/runner.py")).exists()
            assert deployment_path == Path(config.model_name)
            assert deployment_details == azure_config.docker_shm_size

            # move test data into the data folder to simulate an actual run
            train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")

            img_channel_1_name = "id1_channel1.nii.gz"
            img_channel_1_path = train_and_test_data_dir / img_channel_1_name
            img_channel_2_name = "id1_channel2.nii.gz"
            img_channel_2_path = train_and_test_data_dir / img_channel_2_name

            # download the registered model and test that we can run the score pipeline on it
            model_root = Path(model.download(str(test_output_dirs.root_dir)))
            # create a dummy datastore to store model checkpoints and image data
            # this simulates the code shapshot being executed in a real run
            test_datastore = test_output_dirs.root_dir / "test_datastore"
            shutil.move(
                str(model_root / "test_outputs"),
                str(test_datastore / RELATIVE_TEST_OUTPUTS_PATH)
            )
            data_root = test_datastore / DEFAULT_DATA_FOLDER
            os.makedirs(data_root)
            shutil.copy(str(img_channel_1_path), data_root)
            shutil.copy(str(img_channel_2_path), data_root)

            # run score pipeline as a separate process using the python_wrapper.py code to simulate a real run
            return_code = SubprocessConfig(process="python", args=[
                str(model_root / "python_wrapper.py"),
                "--spawnprocess=python",
                str(model_root / "score.py"),
                f"--data-folder={str(test_datastore)}",
                f"--test_image_channels={img_channel_1_name},{img_channel_2_name}",
                "--use_gpu=False"
            ]).spawn_and_monitor_subprocess()

            # check that the process completed as expected
            assert return_code == 0
            expected_segmentation_path = Path(model_root) / DEFAULT_RESULT_IMAGE_NAME
            assert expected_segmentation_path.exists()

            # sanity check the resulting segmentation
            expected_shape = get_nifti_shape(img_channel_1_path)
            image_header = get_unit_image_header()
            assert_nifti_content(str(expected_segmentation_path), expected_shape, image_header, [0], np.ubyte)

        finally:
            # delete the registered model, and any downloaded artifacts
            shutil.rmtree(test_output_dirs.root_dir)
            if model and model_path:
                model.delete()
                shutil.rmtree(model_path)
コード例 #4
0
def test_register_and_score_model(
        is_ensemble: bool, dataset_expected_spacing_xyz: Any,
        model_outside_package: bool,
        test_output_dirs: OutputFolderForTests) -> None:
    """
    End-to-end test which ensures the scoring pipeline is functioning as expected by performing the following:
    1) Registering a pre-trained model to AML
    2) Checking that a model zip from the registered model can be created successfully
    3) Calling the scoring pipeline to check inference can be run from the published model successfully
    """
    # We are creating checkpoints on the fly in this test, writing a randomly initialized model.
    set_random_seed(0)
    # Get an existing config as template
    loader = get_model_loader(
        "Tests.ML.configs" if model_outside_package else None)
    config: SegmentationModelBase = loader.create_model_config_from_name(
        model_name="BasicModel2EpochsOutsidePackage"
        if model_outside_package else "BasicModel2Epochs")
    config.dataset_expected_spacing_xyz = dataset_expected_spacing_xyz
    config.set_output_to(test_output_dirs.root_dir)
    checkpoints_absolute = []
    model_and_info = ModelAndInfo(
        config=config, model_execution_mode=ModelExecutionMode.TRAIN)
    model_and_info.create_model()
    model_and_info.create_optimizer()
    checkpoints_absolute.append(model_and_info.save_checkpoint(epoch=10))
    if is_ensemble:
        checkpoints_absolute.append(model_and_info.save_checkpoint(epoch=20))
    checkpoints_relative = [
        f.relative_to(config.checkpoint_folder) for f in checkpoints_absolute
    ]
    azureml_model = None
    # Simulate a project root: We can't derive that from the repository root because that might point
    # into Python's package folder
    project_root = Path(__file__).parent.parent
    # Double-check that we are at the right place, by testing for a file that would quite certainly not be found
    # somewhere else
    assert (project_root / fixed_paths.SCORE_SCRIPT).is_file()
    try:
        azure_config = get_default_azure_config()
        if model_outside_package:
            azure_config.extra_code_directory = "Tests"  # contains BasicModel2EpochsOutsidePackage
        deployment_hook = lambda cfg, azure_cfg, mdl, is_ens: (Path(
            cfg.model_name), azure_cfg.docker_shm_size)
        ml_runner = MLRunner(config,
                             azure_config,
                             project_root=project_root,
                             model_deployment_hook=deployment_hook)
        registration_result = ml_runner.register_segmentation_model(
            model_description="",
            checkpoint_paths=checkpoints_absolute,
            model_proc=ModelProcessing.DEFAULT)
        assert registration_result is not None
        azureml_model, deployment_result = registration_result
        assert azureml_model is not None
        assert deployment_result == (Path(config.model_name),
                                     azure_config.docker_shm_size)

        # download the registered model and test that we can run the score pipeline on it
        model_root = Path(
            azureml_model.download(str(test_output_dirs.root_dir)))
        # The model needs to contain score.py at the root, the (merged) environment definition,
        # and the inference config.
        expected_files = [
            *fixed_paths.SCRIPTS_AT_ROOT,
            fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
            fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME,
            "InnerEye/ML/runner.py",
        ]
        # All checkpoints go into their own folder
        expected_files.extend(
            str(Path(CHECKPOINT_FOLDER) / c) for c in checkpoints_relative)
        for expected_file in expected_files:
            assert (model_root /
                    expected_file).is_file(), f"File {expected_file} missing"

        # create a dummy datastore to store the image data
        test_datastore = test_output_dirs.root_dir / "test_datastore"
        # move test data into the data folder to simulate an actual run
        train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")
        img_files = ["id1_channel1.nii.gz", "id1_channel2.nii.gz"]
        data_root = test_datastore / fixed_paths.DEFAULT_DATA_FOLDER
        data_root.mkdir(parents=True)
        for f in img_files:
            shutil.copy(str(train_and_test_data_dir / f), str(data_root))

        # run score pipeline as a separate process
        python_executable = sys.executable
        [return_code1,
         stdout1] = SubprocessConfig(process=python_executable,
                                     args=["--version"
                                           ]).spawn_and_monitor_subprocess()
        assert return_code1 == 0
        print(f"Executing Python version {stdout1[0]}")
        return_code, stdout2 = SubprocessConfig(
            process=python_executable,
            args=[
                str(model_root / fixed_paths.SCORE_SCRIPT),
                f"--data_folder={str(data_root)}",
                f"--image_files={img_files[0]},{img_files[1]}",
                "--use_gpu=False"
            ]).spawn_and_monitor_subprocess()

        # check that the process completed as expected
        assert return_code == 0, f"Subprocess failed with return code {return_code}. Stdout: {os.linesep.join(stdout2)}"
        expected_segmentation_path = Path(
            model_root) / DEFAULT_RESULT_IMAGE_NAME
        assert expected_segmentation_path.exists(
        ), f"Result file not found: {expected_segmentation_path}"

        # sanity check the resulting segmentation
        expected_shape = get_nifti_shape(train_and_test_data_dir /
                                         img_files[0])
        image_header = get_unit_image_header()
        assert_nifti_content(str(expected_segmentation_path), expected_shape,
                             image_header, [3], np.ubyte)

    finally:
        # delete the registered model
        if azureml_model:
            azureml_model.delete()