def test_copy_child_paths_to_folder(is_ensemble: bool, extra_code_directory: str, test_output_dirs: OutputFolderForTests) -> None: azure_config = AzureConfig(extra_code_directory=extra_code_directory) fake_model = SegmentationModelBase(should_validate=False) fake_model.set_output_to(test_output_dirs.root_dir) # To simulate ensemble models, there are two checkpoints, one in the root dir and one in a folder checkpoints_absolute, checkpoints_relative = create_checkpoints(fake_model, is_ensemble) # Simulate a project root: We can't derive that from the repository root because that might point # into Python's package folder project_root = Path(__file__).parent.parent ml_runner = MLRunner(model_config=fake_model, azure_config=azure_config, project_root=project_root) model_folder = test_output_dirs.root_dir / "final" ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=checkpoints_absolute) expected_files = [ fixed_paths.ENVIRONMENT_YAML_FILE_NAME, fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME, "InnerEye/ML/runner.py", "InnerEye/ML/model_testing.py", "InnerEye/Common/fixed_paths.py", "InnerEye/Common/common_util.py", ] for r in checkpoints_relative: expected_files.append(f"{CHECKPOINT_FOLDER}/{r}") for expected_file in expected_files: assert (model_folder / expected_file).is_file(), f"File missing: {expected_file}" trm = model_folder / "TestsOutsidePackage/test_register_model.py" if extra_code_directory: assert trm.is_file() else: assert not trm.is_file()
def test_experiment_name() -> None: c = AzureConfig() c.build_branch = "branch" c.get_git_information() assert create_experiment_name(c) == "branch" c.experiment_name = "foo" assert create_experiment_name(c) == "foo"
def __init__( self, model_config: ModelConfigBase, azure_config: Optional[AzureConfig] = None, project_root: Optional[Path] = None, post_cross_validation_hook: Optional[ PostCrossValidationHookSignature] = None, model_deployment_hook: Optional[ModelDeploymentHookSignature] = None ) -> None: """ Driver class to run a ML experiment. Note that the project root argument MUST be supplied when using InnerEye as a package! :param model_config: Model related configurations :param azure_config: Azure related configurations :param project_root: Project root. This should only be omitted if calling run_ml from the test suite. Supplying it is crucial when using InnerEye as a package or submodule! :param post_cross_validation_hook: A function to call after waiting for completion of cross validation runs. The function is called with the model configuration and the path to the downloaded and merged metrics files. :param model_deployment_hook: an optional function for deploying a model in an application-specific way. If present, it should take a model config (SegmentationModelBase), an AzureConfig, and an AzureML Model as arguments, and return an optional Path and a further object of any type. """ self.model_config = model_config self.azure_config: AzureConfig = azure_config or AzureConfig() self.project_root: Path = project_root or fixed_paths.repository_root_directory( ) self.post_cross_validation_hook = post_cross_validation_hook self.model_deployment_hook = model_deployment_hook
def test_score_image_dicom_mock_none( test_output_dirs: OutputFolderForTests) -> None: """ Test that dicom in and dicom-rt out works. Finally there is no mocking and full image scoring is run using the PassThroughModel. :param test_output_dirs: Test output directories. """ model_config = PassThroughModel() model_config.set_output_to(test_output_dirs.root_dir) checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt" create_model_and_store_checkpoint(model_config, checkpoint_path) azure_config = AzureConfig() project_root = Path(__file__).parent.parent ml_runner = MLRunner(model_config=model_config, azure_config=azure_config, project_root=project_root) model_folder = test_output_dirs.root_dir / "final" ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=[checkpoint_path]) zipped_dicom_series_path = zip_dicom_series(model_folder) score_pipeline_config = ScorePipelineConfig( data_folder=zipped_dicom_series_path.parent, model_folder=str(model_folder), image_files=[str(zipped_dicom_series_path)], result_image_name=HNSEGMENTATION_FILE.name, use_gpu=False, use_dicom=True) segmentation = score_image(score_pipeline_config) assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED, model_folder)
def test_build_config(test_output_dirs: OutputFolderForTests) -> None: """ Test that json with build information is created correctly. """ config = AzureConfig(build_number=42, build_user="******", build_branch="branch", build_source_id="00deadbeef", build_source_author="author", tag="tag", model="model") result_location = ExperimentResultLocation(azure_job_name="job") net_json = build_information_to_dot_net_json(config, result_location) expected = '{"BuildNumber": 42, "BuildRequestedFor": "user", "BuildSourceBranchName": "branch", ' \ '"BuildSourceVersion": "00deadbeef", "BuildSourceAuthor": "author", "ModelName": "model", ' \ '"ResultsContainerName": null, "ResultsUri": null, "DatasetFolder": null, "DatasetFolderUri": null, ' \ '"AzureBatchJobName": "job"}' assert expected == net_json result_folder = test_output_dirs.root_dir / "buildinfo" build_information_to_dot_net_json_file(config, result_location, folder=result_folder) result_file = result_folder / BUILDINFORMATION_JSON assert result_file.exists() assert result_file.read_text() == expected
def parse_and_load_model(self) -> ParserResult: """ Parses the command line arguments, and creates configuration objects for the model itself, and for the Azure-related parameters. Sets self.azure_config and self.model_config to their proper values. Returns the parser output from parsing the model commandline arguments. If no "model" argument is provided on the commandline, self.model_config will be set to None, and the return value is None. """ # Create a parser that will understand only the args we need for an AzureConfig parser1 = create_runner_parser() parser_result = parse_args_and_add_yaml_variables(parser1, yaml_config_file=self.yaml_config_file, project_root=self.project_root, fail_on_unknown_args=False) azure_config = AzureConfig(**parser_result.args) azure_config.project_root = self.project_root self.azure_config = azure_config self.model_config = None if not azure_config.model: raise ValueError("Parameter 'model' needs to be set to tell InnerEye which model to run.") model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser_result.args) # Create the model as per the "model" commandline option. This can return either a built-in config # of type DeepLearningConfig, or a LightningContainer. config_or_container = model_config_loader.create_model_config_from_name(model_name=azure_config.model) def parse_overrides_and_apply(c: object, previous_parser_result: ParserResult) -> ParserResult: assert isinstance(c, GenericConfig) parser = type(c).create_argparser() # For each parser, feed in the unknown settings from the previous parser. All commandline args should # be consumed by name, hence fail if there is something that is still unknown. parser_result = parse_arguments(parser, settings_from_yaml=previous_parser_result.unknown_settings_from_yaml, args=previous_parser_result.unknown, fail_on_unknown_args=True) # Apply the overrides and validate. Overrides can come from either YAML settings or the commandline. c.apply_overrides(parser_result.known_settings_from_yaml) c.apply_overrides(parser_result.overrides) c.validate() return parser_result # Now create a parser that understands overrides at model/container level. parser_result = parse_overrides_and_apply(config_or_container, parser_result) if isinstance(config_or_container, LightningContainer): self.lightning_container = config_or_container elif isinstance(config_or_container, ModelConfigBase): # Built-in InnerEye models use a fake container self.model_config = config_or_container self.lightning_container = InnerEyeContainer(config_or_container) else: raise ValueError(f"Don't know how to handle a loaded configuration of type {type(config_or_container)}") if azure_config.extra_code_directory: exist = "exists" if Path(azure_config.extra_code_directory).exists() else "does not exist" logging.info(f"extra_code_directory is {azure_config.extra_code_directory}, which {exist}") else: logging.info("extra_code_directory is unset") return parser_result
def get_configs( default_model_config: SegmentationModelBase, yaml_file_path: Path ) -> Tuple[SegmentationModelBase, AzureConfig, Dict]: parser_result = create_parser(yaml_file_path) args = parser_result.args runner_config = AzureConfig(**args) logging_to_stdout(args["log_level"]) config = default_model_config or ModelConfigLoader( ).create_model_config_from_name(runner_config.model) config.apply_overrides(parser_result.overrides, should_validate=False) return config, runner_config, args
def __init__(self, project_root: Path, yaml_config_file: Path, post_cross_validation_hook: Optional[PostCrossValidationHookSignature] = None, model_deployment_hook: Optional[ModelDeploymentHookSignature] = None): self.project_root = project_root self.yaml_config_file = yaml_config_file self.post_cross_validation_hook = post_cross_validation_hook self.model_deployment_hook = model_deployment_hook # model_config and azure_config are placeholders for now, and are set properly when command line args are # parsed. self.model_config: Optional[DeepLearningConfig] = None self.azure_config: AzureConfig = AzureConfig() self.lightning_container: LightningContainer = None # type: ignore
def parse_and_load_model(self) -> Optional[ParserResult]: """ Parses the command line arguments, and creates configuration objects for the model itself, and for the Azure-related parameters. Sets self.azure_config and self.model_config to their proper values. Returns the parser output from parsing the model commandline arguments. If no "model" argument is provided on the commandline, self.model_config will be set to None, and the return value is None. """ # Create a parser that will understand only the args we need for an AzureConfig parser1 = create_runner_parser() parser1_result = parse_args_and_add_yaml_variables(parser1, yaml_config_file=self.yaml_config_file, project_root=self.project_root, args=self.command_line_args, fail_on_unknown_args=False) azure_config = AzureConfig(**parser1_result.args) azure_config.project_root = self.project_root self.azure_config = azure_config self.model_config = None # type: ignore if not azure_config.model: return None model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser1_result.args) # Create the model as per the "model" commandline option model_config = model_config_loader.create_model_config_from_name( model_name=azure_config.model ) # This model will be either a classification model or a segmentation model. Those have different # fields that could be overridden on the command line. Create a parser that understands the fields we need # for the actual model type. We feed this parser will the YAML settings and commandline arguments that the # first parser did not recognize. parser2 = type(model_config).create_argparser() parser2_result = parse_arguments(parser2, settings_from_yaml=parser1_result.unknown_settings_from_yaml, args=parser1_result.unknown, fail_on_unknown_args=True) # Apply the overrides and validate. Overrides can come from either YAML settings or the commandline. model_config.apply_overrides(parser1_result.unknown_settings_from_yaml) model_config.apply_overrides(parser2_result.overrides) model_config.validate() # Set the file system related configs, they might be affected by the overrides that were applied. logging.info("Creating the adjusted output folder structure.") model_config.create_filesystem(self.project_root) if azure_config.extra_code_directory: exist = "exists" if Path(azure_config.extra_code_directory).exists() else "does not exist" logging.info(f"extra_code_directory is {azure_config.extra_code_directory}, which {exist}") else: logging.info("extra_code_directory is unset") self.model_config = model_config return parser2_result
def __init__(self, project_root: Path, yaml_config_file: Path, post_cross_validation_hook: Optional[PostCrossValidationHookSignature] = None, model_deployment_hook: Optional[ModelDeploymentHookSignature] = None, command_line_args: Optional[List[str]] = None): self.project_root = project_root self.yaml_config_file = yaml_config_file self.post_cross_validation_hook = post_cross_validation_hook self.model_deployment_hook = model_deployment_hook self.command_line_args = command_line_args # model_config and azure_config are placeholders for now, and are set properly when command line args are # parsed. self.model_config: ModelConfigBase = ModelConfigBase(azure_dataset_id="") self.azure_config: AzureConfig = AzureConfig()
def test_score_image_dicom_mock_run_store( test_output_dirs: OutputFolderForTests) -> None: """ Test that dicom in and dicom-rt out works, by mocking out run and store functions. This mocks out run_inference and store_as_ubyte_nifti so that init_from_model_inference_json is tested in addition to the tests in test_score_image_dicom_mock_all. :param test_output_dirs: Test output directories. """ mock_segmentation = {'mock_segmentation': True} model_config = DummyModel() model_config.set_output_to(test_output_dirs.root_dir) checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt" create_model_and_store_checkpoint(model_config, checkpoint_path) azure_config = AzureConfig() project_root = Path(__file__).parent.parent ml_runner = MLRunner(model_config=model_config, azure_config=azure_config, project_root=project_root) model_folder = test_output_dirs.root_dir / "final" ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=[checkpoint_path]) zipped_dicom_series_path = test_output_dirs.root_dir / "temp_pack_dicom_series" / "dicom_series.zip" zip_known_dicom_series(zipped_dicom_series_path) score_pipeline_config = ScorePipelineConfig( data_folder=zipped_dicom_series_path.parent, model_folder=str(model_folder), image_files=[str(zipped_dicom_series_path)], result_image_name=HNSEGMENTATION_FILE.name, use_gpu=False, use_dicom=True, model_id="Dummy:1") with mock.patch('score.run_inference', return_value=mock_segmentation) as mock_run_inference: with mock.patch( 'score.store_as_ubyte_nifti', return_value=HNSEGMENTATION_FILE) as mock_store_as_ubyte_nifti: segmentation = score_image(score_pipeline_config) assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED, model_folder) mock_run_inference.assert_called() mock_store_as_ubyte_nifti.assert_called()
def __init__(self, project_root: Path, yaml_config_file: Path, post_cross_validation_hook: Optional[ PostCrossValidationHookSignature] = None, model_deployment_hook: Optional[ ModelDeploymentHookSignature] = None): self.project_root = project_root self.yaml_config_file = yaml_config_file self.post_cross_validation_hook = post_cross_validation_hook self.model_deployment_hook = model_deployment_hook # model_config and azure_config are placeholders for now, and are set properly when command line args are # parsed. self.model_config: Optional[DeepLearningConfig] = None self.azure_config: AzureConfig = AzureConfig() self.lightning_container: LightningContainer = None # type: ignore # This field stores the MLRunner object that has been created in the most recent call to the run() method. self.ml_runner: Optional[MLRunner] = None
def __init__(self, model_config: ModelConfigBase, azure_config: Optional[AzureConfig] = None, project_root: Optional[Path] = None, model_deployment_hook: Optional[ModelDeploymentHookSignature] = None) -> None: """ Driver class to run a ML experiment. Note that the project root argument MUST be supplied when using InnerEye as a package! :param model_config: Model related configurations :param azure_config: Azure related configurations :param project_root: Project root. This should only be omitted if calling run_ml from the test suite. Supplying it is crucial when using InnerEye as a package or submodule! :param model_deployment_hook: optional function for deploying a model in an application-specific way """ self.model_config = model_config self.azure_config: AzureConfig = azure_config or AzureConfig() self.project_root: Path = project_root or fixed_paths.repository_root_directory() self.model_deployment_hook = model_deployment_hook
def test_score_image_dicom_mock_run( test_output_dirs: OutputFolderForTests) -> None: """ Test that dicom in and dicom-rt out works, by mocking out only the run scoring function. This mocks out run_inference so that store_as_ubyte_nifti is tested in addition to the tests in test_score_image_dicom_mock_run_store. :param test_output_dirs: Test output directories. """ model_config = DummyModel() model_config.set_output_to(test_output_dirs.root_dir) checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt" create_model_and_store_checkpoint(model_config, checkpoint_path) azure_config = AzureConfig() project_root = Path(__file__).parent.parent ml_runner = MLRunner(model_config=model_config, azure_config=azure_config, project_root=project_root) model_folder = test_output_dirs.root_dir / "final" ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=[checkpoint_path]) zipped_dicom_series_path = zip_dicom_series(model_folder) score_pipeline_config = ScorePipelineConfig( data_folder=zipped_dicom_series_path.parent, model_folder=str(model_folder), image_files=[str(zipped_dicom_series_path)], result_image_name=HNSEGMENTATION_FILE.name, use_gpu=False, use_dicom=True) image_with_header = io_util.load_nifti_image(HNSEGMENTATION_FILE) with mock.patch( 'score.run_inference', return_value=image_with_header.image) as mock_run_inference: segmentation = score_image(score_pipeline_config) assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED, model_folder) mock_run_inference.assert_called()
def test_get_child_paths(is_ensemble: bool, extra_code_directory: str) -> None: checkpoints = checkpoint_paths * 2 if is_ensemble else checkpoint_paths path_to_root = tests_root_directory().parent azure_config = AzureConfig(extra_code_directory=extra_code_directory) fake_model = ModelConfigBase(azure_dataset_id="fake_dataset_id") ml_runner = MLRunner(model_config=fake_model, azure_config=azure_config, project_root=path_to_root) child_paths = ml_runner.get_child_paths(checkpoints) assert fixed_paths.ENVIRONMENT_YAML_FILE_NAME in child_paths assert fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME in child_paths assert str(Path("InnerEye/ML/runner.py")) in child_paths assert str(Path("InnerEye/ML/model_testing.py")) in child_paths assert str(Path("InnerEye/Common/fixed_paths.py")) in child_paths assert str(Path("InnerEye/Common/common_util.py")) in child_paths trm = str(Path("TestsOutsidePackage/test_register_model.py")) if extra_code_directory: assert trm in child_paths else: assert trm not in child_paths assert all([x.relative_to(path_to_root) for x in checkpoints])
def main() -> None: parser = create_runner_parser(SegmentationModelBase) parser_result = parse_args_and_add_yaml_variables( parser, fail_on_unknown_args=True) surface_distance_config = SurfaceDistanceConfig.parse_args() azure_config = AzureConfig(**parser_result.args) config_model = azure_config.model if config_model is None: raise ValueError( "The name of the model to train must be given in the --model argument." ) model_config = ModelConfigLoader().create_model_config_from_name( config_model) model_config.apply_overrides(parser_result.overrides, should_validate=True) execution_mode = surface_distance_config.execution_mode run_mode = surface_distance_config.run_mode if run_mode == SurfaceDistanceRunType.IOV: ct_path = Path( "outputs") / SurfaceDistanceRunType.IOV.value.lower() / "ct.nii.gz" ct = load_nifti_image(ct_path).image else: ct = None annotators = [ annotator.strip() for annotator in surface_distance_config.annotators ] extended_annotators = annotators + [surface_distance_config.model_name] outlier_range = surface_distance_config.outlier_range predictions = load_predictions(run_mode, azure_config, model_config, execution_mode, extended_annotators, outlier_range) segmentations = [ load_nifti_image(Path(pred_seg.segmentation_path)) for pred_seg in predictions ] img_shape = segmentations[0].image.shape # transpose spacing to match image which is transposed in io_util voxel_spacing = segmentations[0].header.spacing[::-1] overall_gold_standard = np.zeros(img_shape) sds_for_annotator = sd_util.initialise_surface_distance_dictionary( extended_annotators, img_shape) plane = surface_distance_config.plane output_img_dir = Path(surface_distance_config.output_img_dir) subject_id: Optional[int] = None for prediction, pred_seg_w_header in zip(predictions, segmentations): subject_id = prediction.subject_id structure_name = prediction.structure_name annotator = prediction.annotator pred_segmentation = pred_seg_w_header.image if run_mode == SurfaceDistanceRunType.OUTLIERS: try: ground_truth = sd_util.load_ground_truth_from_run( model_config, surface_distance_config, subject_id, structure_name) except FileNotFoundError as e: logging.warning(e) continue elif run_mode == SurfaceDistanceRunType.IOV: ground_truth = sd_util.get_annotations_and_majority_vote( model_config, annotators, structure_name) else: raise ValueError( f'Unrecognised run mode: {run_mode}. Expected either IOV or OUTLIERS' ) binary_prediction_mask = multi_label_array_to_binary( pred_segmentation, 2)[1] # For comparison, plot gold standard vs predicted segmentation segmentation_and_groundtruth_plot(binary_prediction_mask, ground_truth, subject_id, structure_name, plane, output_img_dir, annotator=annotator) if run_mode == SurfaceDistanceRunType.IOV: overall_gold_standard += ground_truth # Calculate and plot surface distance sds_full = sd_util.calculate_surface_distances(ground_truth, binary_prediction_mask, list(voxel_spacing)) surface_distance_ground_truth_plot(ct, ground_truth, sds_full, subject_id, structure_name, plane, output_img_dir, annotator=annotator) if annotator is not None: sds_for_annotator[annotator] += sds_full # Plot all structures SDs for each annotator if run_mode == SurfaceDistanceRunType.IOV and subject_id is not None: for annotator, sds in sds_for_annotator.items(): num_classes = int(np.amax(np.unique(overall_gold_standard))) binarised_gold_standard = multi_label_array_to_binary( overall_gold_standard, num_classes)[1:].sum(axis=0) surface_distance_ground_truth_plot(ct, binarised_gold_standard, sds, subject_id, 'All', plane, output_img_dir, annotator=annotator)
def test_create_runner_parser(with_config: bool) -> None: """ Test parsing of commandline arguments: From arguments to the runner, can we reconstruct arguments for AzureConfig and for the model config? Check that default and non-default arguments are set correctly and recognized as default/non-default. """ azure_parser = create_runner_parser( SegmentationModelBase if with_config else None) args_list = [ "--model=Lung", "--train=False", "--l_rate=100.0", "--unknown=1", "--subscription_id", "Test1", "--tenant_id=Test2", "--application_id", "Test3", "--log_level=INFO", # Normally we don't use extra index URLs in InnerEye, hence this won't be set in YAML. "--pip_extra_index_url=foo" ] with mock.patch("sys.argv", [""] + args_list): parser_result = parse_args_and_add_yaml_variables( azure_parser, yaml_config_file=fixed_paths.SETTINGS_YAML_FILE) azure_config = AzureConfig(**parser_result.args) # These values have been set on the commandline, to values that are not the parser defaults. non_default_args = { "train": False, "model": "Lung", "subscription_id": "Test1", "application_id": "Test3", } for prop, value in non_default_args.items(): assert prop in parser_result.args, f"Property {prop} missing in args" assert parser_result.args[ prop] == value, f"Property {prop} does not have the expected value" assert getattr(azure_config, prop) == value, f"Property {prop} not in object" assert parser_result.overrides[prop] == value, \ f"Property {prop} has a non-default value, and should be recognized as such." # log_level is set on the commandline, to a value that is equal to the default. It should be recognized as an # override. log_level = "log_level" assert log_level in parser_result.args assert parser_result.args[log_level] == "INFO" assert log_level in parser_result.overrides assert parser_result.overrides[log_level] == "INFO" # These next variables should have been read from YAML. They should be in the args dictionary and in the object, # but not in the list overrides from_yaml = { "workspace_name": "InnerEye-DeepLearning", "azureml_datastore": "innereyedatasets", } for prop, value in from_yaml.items(): assert prop in parser_result.args, f"Property {prop} missing in args" assert parser_result.args[ prop] == value, f"Property {prop} does not have the expected value" assert getattr(azure_config, prop) == value, f"Property {prop} not in object" assert prop not in parser_result.overrides, f"Property {prop} should not be listed as having a " \ f"non-default value" assert "unknown" not in parser_result.args l_rate = "l_rate" if with_config: assert l_rate in parser_result.args assert parser_result.args[l_rate] == 100.0 assert parser_result.unknown == ["--unknown=1"] else: assert l_rate not in parser_result.args assert parser_result.unknown == ["--l_rate=100.0", "--unknown=1"]
def test_azureml_submit_constant() -> None: """ Make sure the config has the 'submit to azureml' key. """ azure_config = AzureConfig() assert hasattr(azure_config, AZURECONFIG_SUBMIT_TO_AZUREML)
def test_validate() -> None: with pytest.raises(ValueError) as ex: AzureConfig(only_register_model=True) assert ex.value.args[0] == "If only_register_model is set, must also provide a valid run_recovery_id"