def test_submit_for_inference(test_output_dirs: OutputFolderForTests) -> None:
    """
    Execute the submit_for_inference script on the model that was recently trained. This starts an AzureML job,
    and downloads the segmentation. Then check if the segmentation was actually produced.
    :return:
    """
    model = get_most_recent_model()
    image_file = fixed_paths_for_tests.full_ml_test_data_path(
    ) / "train_and_test_data" / "id1_channel1.nii.gz"
    assert image_file.exists(), f"Image file not found: {image_file}"
    settings_file = fixed_paths.SETTINGS_YAML_FILE
    assert settings_file.exists(), f"Settings file not found: {settings_file}"
    azure_config = AzureConfig.from_yaml(
        settings_file, project_root=fixed_paths.repository_root_directory())
    # Read the name of the branch from environment, so that the inference experiment is also listed alongside
    # all other AzureML runs that belong to the current PR.
    build_branch = os.environ.get("BUILD_BRANCH", None)
    experiment_name = to_azure_friendly_string(
        build_branch) if build_branch else "model_inference"
    azure_config.get_git_information()
    args = [
        "--image_file",
        str(image_file), "--model_id", model.id, "--settings",
        str(settings_file), "--download_folder",
        str(test_output_dirs.root_dir), "--cluster", "training-nc12",
        "--experiment", experiment_name
    ]
    seg_path = test_output_dirs.root_dir / DEFAULT_RESULT_IMAGE_NAME
    assert not seg_path.exists(
    ), f"Result file {seg_path} should not yet exist"
    submit_for_inference.main(
        args, project_root=fixed_paths.repository_root_directory())
    assert seg_path.exists(), f"Result file {seg_path} was not created"
예제 #2
0
def test_amlignore() -> None:
    """
    Test if the private settings files are excluded from getting into the AML snapshot.
    """
    amlignore = repository_root_directory(".amlignore")
    assert amlignore.is_file()
    ignored = amlignore.read_text()
    private_settings = repository_root_directory(PRIVATE_SETTINGS_FILE)
    if private_settings.is_file():
        assert PRIVATE_SETTINGS_FILE in ignored, f"{PRIVATE_SETTINGS_FILE} is not in .amlignore"
    test_variables = repository_root_directory(PROJECT_SECRETS_FILE)
    if test_variables.is_file():
        assert PROJECT_SECRETS_FILE in ignored, f"{PROJECT_SECRETS_FILE} is not in .amlignore"
def generate_classification_multilabel_notebook(
        result_notebook: Path,
        config: ScalarModelBase,
        train_metrics: Optional[Path] = None,
        val_metrics: Optional[Path] = None,
        test_metrics: Optional[Path] = None) -> Path:
    """
    Creates a reporting notebook for a multilabel classification model, using the given training, validation,
    and test set metrics. This report adds metrics specific to the multilabel task, and is meant to be used in
    addition to the standard report created for all classification models.
    Returns the report file after HTML conversion.
    """

    notebook_params = \
        {
            'innereye_path': str(fixed_paths.repository_root_directory()),
            'train_metrics_csv': str_or_empty(train_metrics),
            'val_metrics_csv': str_or_empty(val_metrics),
            'test_metrics_csv': str_or_empty(test_metrics),
            "config": codecs.encode(pickle.dumps(config), "base64").decode()
        }
    template = Path(
        __file__).absolute().parent / "classification_multilabel_report.ipynb"
    return generate_notebook(template,
                             notebook_params=notebook_params,
                             result_notebook=result_notebook)
예제 #4
0
def test_parsing_with_custom_yaml(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if additional model or Azure config settings can be read correctly from YAML files.
    """
    yaml_file = test_output_dirs.root_dir / "custom.yml"
    yaml_file.write_text("""variables:
  tenant_id: 'foo'
  start_epoch: 7
  random_seed: 1
""")
    # Arguments partly to be set in AzureConfig, and partly in model config.
    args = [
        "", "--tenant_id=bar", "--model", "Lung", "--num_epochs", "42",
        "--random_seed", "2"
    ]
    with mock.patch("sys.argv", args):
        runner = Runner(project_root=fixed_paths.repository_root_directory(),
                        yaml_config_file=yaml_file)
        loader_result = runner.parse_and_load_model()
    assert loader_result is not None
    assert runner.azure_config is not None
    # This is only present in yaml
    # This is present in yaml and command line, and the latter should be used.
    assert runner.azure_config.tenant_id == "bar"
    # Settings in model config: start_epoch is only in yaml
    assert runner.model_config.start_epoch == 7
    # Settings in model config: num_epochs is only on commandline
    assert runner.model_config.num_epochs == 42
    # Settings in model config: random_seed is both in yaml and command line, the latter should be used
    assert runner.model_config.get_effective_random_seed() == 2
    assert loader_result.overrides == {"num_epochs": 42, "random_seed": 2}
def test_submit_for_inference(test_output_dirs: OutputFolderForTests) -> None:
    """
    Execute the submit_for_inference script on the model that was recently trained. This starts an AzureML job,
    and downloads the segmentation. Then check if the segmentation was actually produced.
    :param test_output_dirs: Test output directories.
    """
    model = get_most_recent_model(fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN)
    assert PYTHON_ENVIRONMENT_NAME in model.tags, "Environment name not present in model properties"
    # Both parts of this test rely on the same model that was trained in a previous run. If these tests are executed
    # independently (via pytest.mark.parametrize), get_most_recent_model would pick up the AML run that the
    # previously executed part of this test submitted.
    for use_dicom in [False, True]:
        if use_dicom:
            size = (64, 64, 64)
            spacing = (1., 1., 2.5)
            image_file = test_output_dirs.root_dir / "temp_pack_dicom_series" / "dicom_series.zip"
            scratch_folder = test_output_dirs.root_dir / "temp_dicom_series"
            zip_random_dicom_series(size, spacing, image_file, scratch_folder)
        else:
            image_file = fixed_paths_for_tests.full_ml_test_data_path() / "train_and_test_data" / "id1_channel1.nii.gz"
        assert image_file.exists(), f"Image file not found: {image_file}"
        settings_file = fixed_paths.SETTINGS_YAML_FILE
        assert settings_file.exists(), f"Settings file not found: {settings_file}"
        args = ["--image_file", str(image_file),
                "--model_id", model.id,
                "--settings", str(settings_file),
                "--download_folder", str(test_output_dirs.root_dir),
                "--cluster", "training-nc12",
                "--experiment", get_experiment_name_from_environment() or "model_inference",
                "--use_dicom", str(use_dicom)]
        download_file = DEFAULT_RESULT_ZIP_DICOM_NAME if use_dicom else DEFAULT_RESULT_IMAGE_NAME
        seg_path = test_output_dirs.root_dir / download_file
        assert not seg_path.exists(), f"Result file {seg_path} should not yet exist"
        submit_for_inference.main(args, project_root=fixed_paths.repository_root_directory())
        assert seg_path.exists(), f"Result file {seg_path} was not created"
def test_recovery_on_2_nodes(test_output_dirs: OutputFolderForTests) -> None:
    args_list = ["--model", "BasicModel2EpochsMoreData",
                 "--azureml", "True",
                 "--num_nodes", "2",
                 "--run_recovery_id",
                 str(get_most_recent_run_id(fallback_run_id_for_local_execution=FALLBACK_2NODE_RUN)),
                 "--num_epochs", "4",
                 "--wait_for_completion", "True",
                 "--cluster", "training-nc12",
                 "--experiment", get_experiment_name_from_environment() or "recovery_on_2_nodes",
                 "--tag", "recovery_on_2_nodes"
                 ]
    script = str(repository_root_directory() / "InnerEye" / "ML" / "runner.py")
    # Submission of the recovery job will try to exit the process, catch that and check the submitted run.
    with pytest.raises(SystemExit):
        with mock.patch("sys.argv", [script] + args_list):
            main()
    run = get_most_recent_run(fallback_run_id_for_local_execution=FALLBACK_2NODE_RUN)
    assert run.status == RunStatus.COMPLETED
    # There are two nodes, so there should be one log file per node.
    log0_txt = get_job_log_file(run, index=0)
    log1_txt = get_job_log_file(run, index=1)
    assert "Downloading multiple files from run" in log0_txt
    assert "Downloading multiple files from run" not in log1_txt
    assert "Loading checkpoint that was created at (epoch = 2, global_step = 2)" in log0_txt
    assert "Loading checkpoint that was created at (epoch = 2, global_step = 2)" in log1_txt
def generate_classification_notebook(
        result_notebook: Path,
        config: ScalarModelBase,
        train_metrics: Optional[Path] = None,
        val_metrics: Optional[Path] = None,
        test_metrics: Optional[Path] = None) -> Path:
    """
    Creates a reporting notebook for a classification model, using the given training, validation, and test set metrics.
    Returns the report file after HTML conversion.
    """

    notebook_params = \
        {
            'innereye_path': str(fixed_paths.repository_root_directory()),
            'train_metrics_csv': str_or_empty(train_metrics),
            'val_metrics_csv': str_or_empty(val_metrics),
            'test_metrics_csv': str_or_empty(test_metrics),
            "config": codecs.encode(pickle.dumps(config), "base64").decode(),
            "is_crossval_report": False
        }
    template = Path(
        __file__).absolute().parent / "classification_crossval_report.ipynb"
    return generate_notebook(template,
                             notebook_params=notebook_params,
                             result_notebook=result_notebook)
예제 #8
0
    def generate_custom_report(self, report_dir: Path,
                               model_proc: ModelProcessing) -> Path:
        """
        Generate a custom report for the CovidDataset Hierarchical model. At the moment, this report will read the
        file model_output.csv generated for the training, validation or test sets and compute a 4 class accuracy
        and confusion matrix based on this.
        :param report_dir: Directory report is to be written to
        :param model_proc: Whether this is a single or ensemble model (model_output.csv will be located in different
        paths for single vs ensemble runs.)
        """
        def get_output_csv_path(mode: ModelExecutionMode) -> Path:
            p = get_best_epoch_results_path(mode=mode, model_proc=model_proc)
            return self.outputs_folder / p / MODEL_OUTPUT_CSV

        train_metrics = get_output_csv_path(ModelExecutionMode.TRAIN)
        val_metrics = get_output_csv_path(ModelExecutionMode.VAL)
        test_metrics = get_output_csv_path(ModelExecutionMode.TEST)

        notebook_params = \
            {
                'innereye_path': str(fixed_paths_innereye.repository_root_directory()),
                'train_metrics_csv': str_or_empty(train_metrics),
                'val_metrics_csv': str_or_empty(val_metrics),
                'test_metrics_csv': str_or_empty(test_metrics),
                "config": codecs.encode(pickle.dumps(self), "base64").decode(),
                "is_crossval_report": False
            }
        template = Path(__file__).absolute(
        ).parent.parent / "reports" / "CovidHierarchicalModelReport.ipynb"
        return generate_notebook(
            template,
            notebook_params=notebook_params,
            result_notebook=report_dir /
            get_ipynb_report_name(f"{self.model_category.value}_hierarchical"))
예제 #9
0
def test_cross_validation_for_lighting_container_models_is_supported() -> None:
    """
    Prior to https://github.com/microsoft/InnerEye-DeepLearning/pull/483 we raised an exception in
    runner.run when cross validation was attempted on a lightning container. This test checks that
    we do not raise the exception anymore, and instead pass on a cross validation hyperdrive config
    to azure_runner's submit_to_azureml method.
    """
    args_list = [
        "--model=HelloContainer", "--number_of_cross_validation_splits=5",
        "--azureml=True"
    ]
    mock_run = mock.MagicMock()
    mock_run.id = "foo"
    mock_run.experiment.name = "bar"
    with mock.patch("sys.argv", [""] + args_list):
        runner = Runner(project_root=fixed_paths.repository_root_directory(),
                        yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
        with mock.patch(
                "health_azure.himl.submit_run",
                return_value=mock_run) as create_and_submit_experiment_patch:
            with pytest.raises(SystemExit):
                runner.run()
            assert runner.lightning_container.model_name == 'HelloContainer'
            assert runner.lightning_container.number_of_cross_validation_splits == 5
            script_run_config_arg = create_and_submit_experiment_patch.call_args[
                1]["script_run_config"]
            assert isinstance(script_run_config_arg, HyperDriveConfig)
def test_runner1(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test starting a classification model via the commandline runner. Test if we can provide overrides
    for parameters that live inside the DeepLearningConfig, and ones that are specific to classification models.
    :return:
    """
    set_from_commandline = 12345
    scalar1 = '["label"]'
    model_name = "DummyClassification"
    initial_config = ModelConfigLoader[ScalarModelBase](
    ).create_model_config_from_name(model_name)
    assert initial_config.non_image_feature_channels == []
    output_root = str(test_output_dirs.root_dir)
    args = [
        "", "--model", model_name, "--train", "True", "--random_seed",
        str(set_from_commandline), "--non_image_feature_channels", scalar1,
        "--output_to", output_root, "--max_num_gpus", "1"
    ]
    with mock.patch("sys.argv", args):
        config, _ = runner.run(
            project_root=fixed_paths.repository_root_directory(),
            yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
    assert isinstance(config, ScalarModelBase)
    assert config.model_name == "DummyClassification"
    assert config.get_effective_random_seed() == set_from_commandline
    assert config.non_image_feature_channels == ["label"]
    assert str(config.outputs_folder).startswith(output_root)
    assert (config.logs_folder / runner.LOG_FILE_NAME).exists()
예제 #11
0
def default_runner() -> Runner:
    """
    Create an InnerEye Runner object with the default settings, pointing to the repository root and
    default settings files.
    """
    return Runner(project_root=repository_root_directory(),
                  yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
예제 #12
0
def get_default_azure_config() -> AzureConfig:
    """
    Gets the Azure-related configuration options, using the default settings file settings.yaml.
    """
    return AzureConfig.from_yaml(
        yaml_file_path=fixed_paths.SETTINGS_YAML_FILE,
        project_root=fixed_paths.repository_root_directory())
def generate_classification_notebook(
        result_notebook: Path,
        train_metrics: Optional[Path] = None,
        val_metrics: Optional[Path] = None,
        test_metrics: Optional[Path] = None,
        dataset_csv_path: Optional[Path] = None,
        dataset_subject_column: Optional[str] = None,
        dataset_file_column: Optional[str] = None) -> Path:
    """
    Creates a reporting notebook for a classification model, using the given training, validation, and test set metrics.
    Returns the report file after HTML conversion.
    """

    notebook_params = \
        {
            'innereye_path': str(fixed_paths.repository_root_directory()),
            'train_metrics_csv': str_or_empty(train_metrics),
            'val_metrics_csv': str_or_empty(val_metrics),
            'test_metrics_csv': str_or_empty(test_metrics),
            'dataset_csv_path': str_or_empty(dataset_csv_path),
            "dataset_subject_column": str_or_empty(dataset_subject_column),
            "dataset_file_column": str_or_empty(dataset_file_column)
        }
    template = Path(__file__).absolute().parent / "classification_report.ipynb"
    return generate_notebook(template,
                             notebook_params=notebook_params,
                             result_notebook=result_notebook)
def download_from_model_or_find_default(model_sas_urls: Dict[str, str],
                                        base_name: str,
                                        dir_path: Path) -> Path:
    """
    Identifies all files that have the given name from in the model, and downloads them. If any such files exist,
    return the first one. If no single file of the desired name is present in the model, try to copy the file from the
    present repository (the latter should only happen for malformed legacy models).
    :param model_sas_urls: The files making up the model, as a mapping from file name to a URL with
    an SAS token.
    :param base_name: The file name of the files to download.
    :param dir_path: The folder into which the files will be written. All downloaded files will keep the relative
    path that they also have in the model.
    :return: The absolute path to the desired file.
    """
    files = download_files_from_model(model_sas_urls, base_name, dir_path)
    if files:
        return files[0]
    logging.warning(
        f"The model does not contain any file with name '{base_name}'. Trying to find it in "
        "the repository root.")
    default_run_scoring = fixed_paths.repository_root_directory() / base_name
    if not default_run_scoring.exists():
        raise ValueError(
            f"No file with name {base_name} available in the repository.")
    result = dir_path / base_name
    shutil.copyfile(str(default_run_scoring), str(result))
    return result
def test_get_secrets() -> None:
    """
    Test that secrets can always be retrieved correctly from the environment.
    When running on the local dev box, the secrets would be read from a secrets file in the repository root directory
    and be written to the environment, retrieved later.
    When running in Azure, the secrets would be set as environment variables directly
    in the build definition.
    """
    print("Environment variables:")
    for env_variable, value in os.environ.items():
        print("{}: {}".format(env_variable, value))
    secrets_handler = SecretsHandling(
        project_root=fixed_paths.repository_root_directory())
    secrets = secrets_handler.get_secrets_from_environment_or_file(
        SECRETS_IN_ENVIRONMENT)
    for name in SECRETS_IN_ENVIRONMENT:
        assert name in secrets, "No value found for {}".format(name)
        assert secrets[name] is not None, "Value for {} is empty".format(name)
        # Variable names should automatically be converted to uppercase when using get_secret:
        assert secrets_handler.get_secret_from_environment(
            name=name.lower()) is not None
    no_such_variable = "no_such_variable"
    with pytest.raises(ValueError):
        secrets_handler.get_secret_from_environment(name=no_such_variable)
    assert secrets_handler.get_secret_from_environment(
        name=no_such_variable, allow_missing=True) is None
예제 #16
0
 def __init__(
     self,
     model_config: ModelConfigBase,
     azure_config: Optional[AzureConfig] = None,
     project_root: Optional[Path] = None,
     post_cross_validation_hook: Optional[
         PostCrossValidationHookSignature] = None,
     model_deployment_hook: Optional[ModelDeploymentHookSignature] = None
 ) -> None:
     """
     Driver class to run a ML experiment. Note that the project root argument MUST be supplied when using InnerEye
     as a package!
     :param model_config: Model related configurations
     :param azure_config: Azure related configurations
     :param project_root: Project root. This should only be omitted if calling run_ml from the test suite. Supplying
     it is crucial when using InnerEye as a package or submodule!
     :param post_cross_validation_hook: A function to call after waiting for completion of cross validation runs.
     The function is called with the model configuration and the path to the downloaded and merged metrics files.
     :param model_deployment_hook: an optional function for deploying a model in an application-specific way.
     If present, it should take a model config (SegmentationModelBase), an AzureConfig, and an AzureML
     Model as arguments, and return an optional Path and a further object of any type.
     """
     self.model_config = model_config
     self.azure_config: AzureConfig = azure_config or AzureConfig()
     self.project_root: Path = project_root or fixed_paths.repository_root_directory(
     )
     self.post_cross_validation_hook = post_cross_validation_hook
     self.model_deployment_hook = model_deployment_hook
예제 #17
0
 def __init__(self, **params: Any) -> None:
     self._model_name = type(self).__name__
     # This should be annotated as torch.utils.data.Dataset, but we don't want to import torch here.
     self._datasets_for_training: Optional[Dict[ModelExecutionMode, Any]] = None
     self._datasets_for_inference: Optional[Dict[ModelExecutionMode, Any]] = None
     super().__init__(throw_if_unknown_param=True, **params)
     logging.info("Creating the default output folder structure.")
     self.create_filesystem(fixed_paths.repository_root_directory())
예제 #18
0
def namespace_to_path(namespace: str, root: PathOrString = repository_root_directory()) -> Path:
    """
    Given a namespace (in form A.B.C) and an optional root directory R, create a path R/A/B/C
    :param namespace: Namespace to convert to path
    :param root: Path to prefix (default is project root)
    :return:
    """""
    return Path(root, *namespace.split("."))
예제 #19
0
def test_create_ml_runner_args(is_default_namespace: bool,
                               test_output_dirs: TestOutputDirectories,
                               is_offline_run: bool) -> None:
    """Test round trip parsing of commandline arguments:
    From arguments to the Azure runner to the arguments of the ML runner, checking that
    whatever is passed on can be correctly parsed."""
    logging_to_stdout()
    model_name = "Lung"
    outputs_folder = Path(test_output_dirs.root_dir)
    project_root = fixed_paths.repository_root_directory()
    if is_default_namespace:
        model_configs_namespace = None
    else:
        model_configs_namespace = "Tests.ML.configs"
        model_name = "DummyModel"

    args_list = [
        f"--model={model_name}", "--train=True", "--l_rate=100.0",
        "--norm_method=Simple Norm", "--subscription_id", "Test1",
        "--tenant_id=Test2", "--application_id", "Test3",
        "--datasets_storage_account=Test4", "--datasets_container", "Test5",
        "--pytest_mark", "gpu", f"--output_to={outputs_folder}"
    ]
    if not is_default_namespace:
        args_list.append(
            f"--model_configs_namespace={model_configs_namespace}")

    with mock.patch("sys.argv", [""] + args_list):
        with mock.patch(
                "InnerEye.ML.deep_learning_config.is_offline_run_context",
                return_value=is_offline_run):
            runner = Runner(project_root=project_root,
                            yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
            runner.parse_and_load_model()
            azure_config = runner.azure_config
            model_config = runner.model_config
    assert azure_config.datasets_storage_account == "Test4"
    assert azure_config.model == model_name
    assert model_config.l_rate == 100.0
    assert model_config.norm_method == PhotometricNormalizationMethod.SimpleNorm
    if is_offline_run:
        # The actual output folder must be a subfolder of the folder given on the commandline. The folder will contain
        # a timestamp, that will start with the year number, hence will start with 20...
        assert str(model_config.outputs_folder).startswith(
            str(outputs_folder / "20"))
        assert model_config.logs_folder == (model_config.outputs_folder /
                                            DEFAULT_LOGS_DIR_NAME)
    else:
        # For runs inside AzureML, the output folder is the project root (the root of the folders that are
        # included in the snapshot). The "outputs_to" argument will be ignored.
        assert model_config.outputs_folder == (project_root /
                                               DEFAULT_AML_UPLOAD_DIR)
        assert model_config.logs_folder == (project_root /
                                            DEFAULT_LOGS_DIR_NAME)

    assert not hasattr(model_config, "datasets_storage_account")
    assert azure_config.pytest_mark == "gpu"
예제 #20
0
def path_to_namespace(path: Path, root: PathOrString = repository_root_directory()) -> str:
    """
    Given a path (in form R/A/B/C) and an optional root directory R, create a namespace A.B.C.
    If root is provided, then path must be a relative child to it.
    :param path: Path to convert to namespace
    :param root: Path prefix to remove from namespace (default is project root)
    :return:
    """
    return ".".join([Path(x).stem for x in path.relative_to(root).parts])
예제 #21
0
def test_runner1(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test starting a classification model via the commandline runner. Test if we can provide overrides
    for parameters that live inside the DeepLearningConfig, and ones that are specific to classification models.
    :return:
    """
    set_from_commandline = 12345
    scalar1 = '["label"]'
    model_name = "DummyClassification"
    initial_config = ModelConfigLoader().create_model_config_from_name(
        model_name)
    assert initial_config.non_image_feature_channels == []
    output_root = str(test_output_dirs.root_dir)
    args = [
        "",
        "--model",
        model_name,
        "--train",
        "True",
        "--random_seed",
        str(set_from_commandline),
        "--non_image_feature_channels",
        scalar1,
        "--output_to",
        output_root,
        "--max_num_gpus",
        "1",
        "--recovery_checkpoint_save_interval",
        "2",
        "--recovery_checkpoints_save_last_k",
        "2",
        "--num_epochs",
        "6",
    ]
    with mock.patch("sys.argv", args):
        config, _ = runner.run(
            project_root=fixed_paths.repository_root_directory(),
            yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
    assert isinstance(config, ScalarModelBase)
    assert config.model_name == "DummyClassification"
    assert config.get_effective_random_seed() == set_from_commandline
    assert config.non_image_feature_channels == ["label"]
    assert str(config.outputs_folder).startswith(output_root)
    assert (config.logs_folder / LOG_FILE_NAME).exists()
    # Check that we saved one checkpoint every second epoch and that we kept only that last 2 and that last.ckpt has
    # been renamed to best.ckpt
    assert len(os.listdir(config.checkpoint_folder)) == 3
    assert (config.checkpoint_folder /
            str(RECOVERY_CHECKPOINT_FILE_NAME + "_epoch=3" +
                CHECKPOINT_SUFFIX)).exists()
    assert (config.checkpoint_folder /
            str(RECOVERY_CHECKPOINT_FILE_NAME + "_epoch=5" +
                CHECKPOINT_SUFFIX)).exists()
    assert (config.checkpoint_folder /
            BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).exists()
def get_most_recent_model() -> Model:
    most_recent_run = get_most_recent_run()
    azure_config = AzureConfig.from_yaml(
        fixed_paths.SETTINGS_YAML_FILE,
        project_root=fixed_paths.repository_root_directory())
    workspace = azure_config.get_workspace()
    run = fetch_run(workspace, most_recent_run)
    tags = run.get_tags()
    model_id = tags.get(MODEL_ID_KEY_NAME, None)
    assert model_id, f"No model_id tag was found on run {most_recent_run}"
    return Model(workspace=workspace, id=model_id)
예제 #23
0
def test_read_yaml_file_into_args(
        test_output_dirs: TestOutputDirectories) -> None:
    """
    Test if the arguments for specifying the YAML config file with storage account, etc
    are correctly wired up.
    """
    empty_yaml = Path(test_output_dirs.root_dir) / "nothing.yaml"
    empty_yaml.write_text("variables:\n")
    with mock.patch("sys.argv", ["", "--model=Lung"]):
        # Default behaviour: Application ID (service principal) should be picked up from YAML
        runner1 = Runner(project_root=fixed_paths.repository_root_directory(),
                         yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
        runner1.parse_and_load_model()
        assert len(runner1.azure_config.application_id) > 0
        # When specifying a dummy YAML file that does not contain the application ID, it should not
        # be set.
        runner2 = Runner(project_root=fixed_paths.repository_root_directory(),
                         yaml_config_file=empty_yaml)
        runner2.parse_and_load_model()
        assert runner2.azure_config.application_id == ""
예제 #24
0
 def create_filesystem(self, project_root: Path = fixed_paths.repository_root_directory()) -> None:
     """
     Creates new file system settings (outputs folder, logs folder) based on the information stored in the
     present object. If any of the folders do not yet exist, they are created.
     :param project_root: The root folder for the codebase that triggers the training run.
     """
     self.file_system_config = DeepLearningFileSystemConfig.create(
         project_root=project_root,
         model_name=self.model_name,
         is_offline_run=self.is_offline_run,
         output_to=self.output_to
     )
 def __init__(self, **params: Any) -> None:
     self._model_name = type(self).__name__
     # This should be annotated as torch.utils.data.Dataset, but we don't want to import torch here.
     self._datasets_for_training: Optional[Dict[ModelExecutionMode, Any]] = None
     self._datasets_for_inference: Optional[Dict[ModelExecutionMode, Any]] = None
     self.recovery_start_epoch = 0
     super().__init__(throw_if_unknown_param=True, **params)
     logging.info("Creating the default output folder structure.")
     self.create_filesystem(fixed_paths.repository_root_directory())
     # Disable the PL progress bar because all InnerEye models have their own console output
     self.pl_progress_bar_refresh_rate = 0
     self.pretraining_run_checkpoints: Optional[Any] = None
예제 #26
0
def test_read_yaml_file_into_args(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if the arguments for specifying the YAML config file with storage account, etc
    are correctly wired up.
    """
    empty_yaml = test_output_dirs.root_dir / "nothing.yaml"
    empty_yaml.write_text("variables:\n")
    with mock.patch("sys.argv", ["", "--model=Lung"]):
        # Default behaviour: tenant_id should be picked up from YAML
        runner1 = Runner(project_root=fixed_paths.repository_root_directory(),
                         yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
        runner1.parse_and_load_model()
        assert len(runner1.azure_config.application_id) > 0
        assert len(runner1.azure_config.resource_group) > 0
        # When specifying a dummy YAML file that does not contain any settings, no information in AzureConfig should
        # be set. Some settings are read from a private settings file, most notably application ID, which should
        # be present on people's local dev boxes. Hence, only assert on `resource_group` here.
        runner2 = Runner(project_root=fixed_paths.repository_root_directory(),
                         yaml_config_file=empty_yaml)
        runner2.parse_and_load_model()
        assert runner2.azure_config.resource_group == ""
def test_add_submodules_to_path() -> None:
    original_sys_path = sys.path
    try:
        fastmri_folder = repository_root_directory() / "fastMRI"
        fastmri_str = str(fastmri_folder)
        assert fastmri_folder.is_dir()
        if fastmri_str in sys.path:
            sys.path.remove(fastmri_str)
        add_submodules_to_path()
        assert fastmri_str in sys.path
    finally:
        sys.path = original_sys_path
예제 #28
0
def add_folder_to_sys_path_if_needed(folder_under_repo_root: str) -> None:
    """
    Checks if the Python paths in sys.path already contain the given folder, which is expected to be relative
    to the repository root. If that folder is not yet in sys.path, add it.
    """
    full_folder = repository_root_directory() / folder_under_repo_root
    for path_str in sys.path:
        path = Path(path_str)
        if path == full_folder:
            return
    print(f"Adding {full_folder} to sys.path")
    sys.path.append(str(full_folder))
예제 #29
0
 def get_path_to_checkpoint(self, epoch: int) -> Path:
     """
     Returns full path to a checkpoint given an epoch
     :param epoch: the epoch number
     :param for_mean_teacher_model: if True looking returns path to the mean teacher checkpoint. Else returns the
     path to the (main / student) model checkpoint.
     :return: path to a checkpoint given an epoch
     """
     return create_checkpoint_path(
         path=fixed_paths.repository_root_directory() /
         self.checkpoint_folder,
         epoch=epoch)
예제 #30
0
def test_framework_version(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if the Pytorch framework version can be read correctly from the current environment file.
    """
    environment_file = fixed_paths.repository_root_directory(
        ENVIRONMENT_YAML_FILE_NAME)
    assert environment_file.is_file(), "Environment file must be present"
    conda_dep = CondaDependencies(
        conda_dependencies_file_path=environment_file)
    framework = pytorch_version_from_conda_dependencies(conda_dep)
    # If this fails, it is quite likely that the AzureML SDK is behind pytorch, and does not yet know about a
    # new version of pytorch that we are using here.
    assert framework is not None