예제 #1
0
 def run_in_situ(self, azure_run_info: AzureRunInfo) -> None:
     """
     Actually run the AzureML job; this method will typically run on an Azure VM.
     :param azure_run_info: Contains all information about the present run in AzureML, in particular where the
     datasets are mounted.
     """
     # Only set the logging level now. Usually, when we set logging to DEBUG, we want diagnostics about the model
     # build itself, but not the tons of debug information that AzureML submissions create.
     # Suppress the logging from all processes but the one for GPU 0 on each node, to make log files more readable
     logging_to_stdout(
         self.azure_config.log_level if is_local_rank_zero() else "ERROR")
     package_setup_and_hacks()
     if is_global_rank_zero():
         self.print_git_tags()
     # For the PR build in AzureML, we can either pytest, or the training of the simple PR model. Running both
     # only works when using DDP_spawn, but that has as a side-effect that it messes up memory consumption of the
     # large models.
     if self.azure_config.pytest_mark:
         outputs_folder = Path.cwd() / fixed_paths.DEFAULT_AML_UPLOAD_DIR
         pytest_passed, results_file_path = run_pytest(
             self.azure_config.pytest_mark, outputs_folder)
         if not pytest_passed:
             # Terminate if pytest has failed. This makes the smoke test in
             # PR builds fail if pytest fails.
             pytest_failures = f"Not all PyTest tests passed. See {results_file_path}"
             raise ValueError(pytest_failures)
     else:
         # Set environment variables for multi-node training if needed. This function will terminate early
         # if it detects that it is not in a multi-node environment.
         set_environment_variables_for_multi_node()
         self.ml_runner = self.create_ml_runner()
         self.ml_runner.setup(azure_run_info)
         self.ml_runner.run()
예제 #2
0
def test_rnn_classifier_via_config_2(test_output_dirs: TestOutputDirectories) -> None:
    """
    Test if we can build an RNN classifier that learns sequences, of the same kind as in
    test_rnn_classifier_toy_problem, but built via the config.
    """
    expected_max_train_loss = 0.71
    expected_max_val_loss = 0.71
    num_sequences = 100
    ml_util.set_random_seed(123)
    dataset_contents = "subject,index,feature,label\n"
    for subject in range(num_sequences):
        # Sequences have variable length
        sequence_length = np.random.choice([9, 10, 11, 12])
        # Each sequence is a series of 0 and 1
        inputs = np.random.choice([0, 1], size=(sequence_length,), p=[1. / 3, 2. / 3])
        label = np.sum(inputs) > (sequence_length // 2)
        for i, value in enumerate(inputs.tolist()):
            dataset_contents += f"S{subject},{i},{value},{label}\n"
    logging_to_stdout()
    config = ToySequenceModel2(should_validate=False)
    config.num_epochs = 2
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset(dataset_contents)
    results = model_train(config)

    actual_train_loss = results.train_results_per_epoch[-1].values()[MetricType.LOSS.value][0]
    actual_val_loss = results.val_results_per_epoch[-1].values()[MetricType.LOSS.value][0]
    print(f"Training loss after {config.num_epochs} epochs: {actual_train_loss}")
    print(f"Validation loss after {config.num_epochs} epochs: {actual_val_loss}")
    assert actual_train_loss <= expected_max_train_loss, "Training loss too high"
    assert actual_val_loss <= expected_max_val_loss, "Validation loss too high"
    assert len(results.optimal_temperature_scale_values_per_checkpoint_epoch) \
           == config.get_total_number_of_save_epochs()
    assert np.allclose(results.optimal_temperature_scale_values_per_checkpoint_epoch, [0.97], rtol=0.1)
예제 #3
0
def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests,
                                       use_mixed_precision: bool) -> None:
    """
    Test training and testing of 2d classification models.
    """
    logging_to_stdout(logging.DEBUG)
    config = ClassificationModelForTesting2D()
    config.set_output_to(test_output_dirs.root_dir)

    # Train for 4 epochs, checkpoints at epochs 2 and 4
    config.num_epochs = 4
    config.use_mixed_precision = use_mixed_precision
    model_training_result, checkpoint_handler = model_train_unittest(
        config, dirs=test_output_dirs)
    assert model_training_result is not None
    expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05]

    expected_train_loss = [0.705931, 0.698664, 0.694489, 0.693151]
    expected_val_loss = [1.078517, 1.140510, 1.199026, 1.248595]

    actual_train_loss = model_training_result.get_metric(
        is_training=True, metric_type=MetricType.LOSS.value)
    actual_val_loss = model_training_result.get_metric(
        is_training=False, metric_type=MetricType.LOSS.value)
    actual_lr = model_training_result.get_metric(
        is_training=True, metric_type=MetricType.LEARNING_RATE.value)

    assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6)
    assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6)
    assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5)
    test_results = model_testing.model_test(
        config,
        ModelExecutionMode.TRAIN,
        checkpoint_handler=checkpoint_handler)
    assert isinstance(test_results, InferenceMetricsForClassification)
def test_rnn_classifier_via_config_1(
        use_combined_model: bool, imaging_feature_type: ImagingFeatureType,
        combine_hidden_state: bool, use_encoder_layer_norm: bool,
        use_mean_teacher_model: bool,
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if we can build a simple RNN model that only feeds off non-image features.
    This just tests the mechanics of training, but not if the model learned.
    """
    logging_to_stdout()
    config = ToySequenceModel(use_combined_model,
                              imaging_feature_type=imaging_feature_type,
                              combine_hidden_states=combine_hidden_state,
                              use_encoder_layer_norm=use_encoder_layer_norm,
                              use_mean_teacher_model=use_mean_teacher_model,
                              should_validate=False)
    config.use_mixed_precision = True
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset()
    # Patch the load_images function that will be called once we access a dataset item
    image_and_seg = ImageAndSegmentations[np.ndarray](
        images=np.random.uniform(0, 1, SCAN_SIZE),
        segmentations=np.random.randint(0, 2, SCAN_SIZE))
    with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats',
                    return_value=image_and_seg):
        model_train(
            config,
            get_default_checkpoint_handler(
                model_config=config, project_root=test_output_dirs.root_dir))
예제 #5
0
def test_rnn_classifier_via_config_1(
        use_combined_model: bool, imaging_feature_type: ImagingFeatureType,
        combine_hidden_state: bool, use_encoder_layer_norm: bool,
        use_mean_teacher_model: bool,
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if we can build a simple RNN model that only feeds off non-image features.
    This just tests the mechanics of training, but not if the model learned.
    """
    logging_to_stdout()
    config = ToySequenceModel(use_combined_model,
                              imaging_feature_type=imaging_feature_type,
                              combine_hidden_states=combine_hidden_state,
                              use_encoder_layer_norm=use_encoder_layer_norm,
                              use_mean_teacher_model=use_mean_teacher_model,
                              should_validate=False)
    # This fails with 16bit precision, saying "torch.nn.functional.binary_cross_entropy and torch.nn.BCELoss are
    # unsafe to autocast. Many models use a sigmoid layer right before the binary cross entropy layer. In this case,
    # combine the two layers using torch.nn.functional.binary_cross_entropy_with_logits or
    # torch.nn.BCEWithLogitsLoss.  binary_cross_entropy_with_logits and BCEWithLogits are safe to autocast."
    config.use_mixed_precision = False
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset()
    # Patch the load_images function that will be called once we access a dataset item
    image_and_seg = ImageAndSegmentations[np.ndarray](
        images=np.random.uniform(0, 1, SCAN_SIZE),
        segmentations=np.random.randint(0, 2, SCAN_SIZE))
    with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats',
                    return_value=image_and_seg):
        model_train(
            config,
            get_default_checkpoint_handler(
                model_config=config, project_root=test_output_dirs.root_dir))
def test_run_ml_with_classification_model(
        test_output_dirs: OutputFolderForTests,
        number_of_offline_cross_validation_splits: int,
        model_name: str) -> None:
    """
    Test training and testing of classification models, when it is started together via run_ml.
    """
    logging_to_stdout()
    azure_config = get_default_azure_config()
    azure_config.train = True
    config: ScalarModelBase = ModelConfigLoader[ScalarModelBase]() \
        .create_model_config_from_name(model_name)
    config.number_of_cross_validation_splits = number_of_offline_cross_validation_splits
    config.set_output_to(test_output_dirs.root_dir)
    # Trying to run DDP from the test suite hangs, hence restrict to single GPU.
    config.max_num_gpus = 1
    MLRunner(config, azure_config).run()
    _check_offline_cross_validation_output_files(config)

    if config.perform_cross_validation:
        # Test that the result files can be correctly picked up by the cross validation routine.
        # For that, we point the downloader to the local results folder. The core download method
        # recognizes run_recovery_id == None as the signal to read from the local_run_results folder.
        config_and_files = get_config_and_results_for_offline_runs(config)
        result_files = config_and_files.files
        # One file for VAL and one for TRAIN for each child run
        assert len(result_files
                   ) == config.get_total_number_of_cross_validation_runs() * 2
        for file in result_files:
            assert file.execution_mode != ModelExecutionMode.TEST
            assert file.dataset_csv_file is not None
            assert file.dataset_csv_file.exists()
            assert file.metrics_file is not None
            assert file.metrics_file.exists()
예제 #7
0
def test_rnn_classifier_via_config_2(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if we can build an RNN classifier that learns sequences, of the same kind as in
    test_rnn_classifier_toy_problem, but built via the config.
    """
    expected_max_train_loss = 0.71
    expected_max_val_loss = 0.71
    num_sequences = 100
    ml_util.set_random_seed(123)
    dataset_contents = "subject,index,feature,label\n"
    for subject in range(num_sequences):
        # Sequences have variable length
        sequence_length = np.random.choice([9, 10, 11, 12])
        # Each sequence is a series of 0 and 1
        inputs = np.random.choice([0, 1], size=(sequence_length,), p=[1. / 3, 2. / 3])
        label = np.sum(inputs) > (sequence_length // 2)
        for i, value in enumerate(inputs.tolist()):
            dataset_contents += f"S{subject},{i},{value},{label}\n"
    logging_to_stdout()
    config = ToySequenceModel2(should_validate=False)
    config.num_epochs = 2
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset(dataset_contents)
    results, _ = model_train_unittest(config, dirs=test_output_dirs)

    actual_train_loss = results.get_metric(is_training=True, metric_type=MetricType.LOSS.value)[-1]
    actual_val_loss = results.get_metric(is_training=False, metric_type=MetricType.LOSS.value)[-1]
    print(f"Training loss after {config.num_epochs} epochs: {actual_train_loss}")
    print(f"Validation loss after {config.num_epochs} epochs: {actual_val_loss}")
    assert actual_train_loss <= expected_max_train_loss, "Training loss too high"
    assert actual_val_loss <= expected_max_val_loss, "Validation loss too high"
def main() -> None:
    """
    Main function.
    """
    logging_to_stdout()
    config = ReportStructureExtremesConfig.parse_args()
    report_structure_extremes(config.dataset, config.yaml_file)
예제 #9
0
def test_rnn_classifier_via_config_1(use_combined_model: bool,
                                     imaging_feature_type: ImagingFeatureType,
                                     combine_hidden_state: bool,
                                     use_encoder_layer_norm: bool,
                                     use_mean_teacher_model: bool,
                                     test_output_dirs: TestOutputDirectories) -> None:
    """
    Test if we can build a simple RNN model that only feeds off non-image features.
    This just tests the mechanics of training, but not if the model learned.
    """
    logging_to_stdout()
    config = ToySequenceModel(use_combined_model,
                              imaging_feature_type=imaging_feature_type,
                              combine_hidden_states=combine_hidden_state,
                              use_encoder_layer_norm=use_encoder_layer_norm,
                              use_mean_teacher_model=use_mean_teacher_model,
                              should_validate=False)
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset()
    # Patch the load_images function that will be called once we access a dataset item
    image_and_seg = ImageAndSegmentations[np.ndarray](images=np.random.uniform(0, 1, SCAN_SIZE),
                                                      segmentations=np.random.randint(0, 2, SCAN_SIZE))
    with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg):
        results = model_train(config)
        assert len(results.optimal_temperature_scale_values_per_checkpoint_epoch) \
               == config.get_total_number_of_save_epochs()
예제 #10
0
 def run(self) -> Tuple[Optional[DeepLearningConfig], Optional[Run]]:
     """
     The main entry point for training and testing models from the commandline. This chooses a model to train
     via a commandline argument, runs training or testing, and writes all required info to disk and logs.
     :return: If submitting to AzureML, returns the model configuration that was used for training,
     including commandline overrides applied (if any).
     """
     # Usually, when we set logging to DEBUG, we want diagnostics about the model
     # build itself, but not the tons of debug information that AzureML submissions create.
     logging_to_stdout(logging.INFO if is_local_rank_zero() else "ERROR")
     initialize_rpdb()
     user_agent.append(azure_util.INNEREYE_SDK_NAME, azure_util.INNEREYE_SDK_VERSION)
     self.parse_and_load_model()
     if self.lightning_container.perform_cross_validation:
         if self.model_config is None:
             raise NotImplementedError("Cross validation for LightingContainer models is not yet supported.")
         # force hyperdrive usage if performing cross validation
         self.azure_config.hyperdrive = True
     run_object: Optional[Run] = None
     if self.azure_config.azureml:
         run_object = self.submit_to_azureml()
     else:
         self.run_in_situ()
     if self.model_config is None:
         return self.lightning_container, run_object
     return self.model_config, run_object
예제 #11
0
def test_run_ml_with_sequence_model(use_combined_model: bool,
                                    imaging_feature_type: ImagingFeatureType,
                                    test_output_dirs: TestOutputDirectories) -> None:
    """
    Test training and testing of sequence models, when it is started together via run_ml.
    """
    logging_to_stdout()
    config = ToySequenceModel(use_combined_model, imaging_feature_type,
                              should_validate=False, sequence_target_positions=[2, 10])
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset()
    config.num_epochs = 1
    config.max_batch_grad_cam = 1

    # make sure we are testing with at least one sequence position that will not exist
    # to ensure correct handling of sequences that do not contain all the expected target positions
    assert max(config.sequence_target_positions) > config.dataset_data_frame[config.sequence_column].astype(float).max()

    # Patch the load_images function that will be called once we access a dataset item
    image_and_seg = ImageAndSegmentations[np.ndarray](images=np.random.uniform(0, 1, SCAN_SIZE),
                                                      segmentations=np.random.randint(0, 2, SCAN_SIZE))
    with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg):
        azure_config = get_default_azure_config()
        azure_config.train = True
        MLRunner(config, azure_config).run()
def test_compare_folder_against_run(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if we can compare that a set of files exists in an AML run.
    """
    logging_to_stdout(log_level=logging.DEBUG)
    run = get_most_recent_run(
        fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN)
    file1 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_FOLDER / \
            FINAL_MODEL_FOLDER / MODEL_INFERENCE_JSON_FILE_NAME
    create_folder_and_write_text(
        file1, '{"model_name": "BasicModel2Epochs", "checkpoint_paths": ['
        '"checkpoints/last.ckpt"], '
        '"model_configs_namespace": "InnerEye.ML.configs.segmentation.BasicModel2Epochs"}'
    )
    with mock.patch("InnerEye.ML.baselines_util.RUN_CONTEXT", run):
        # First comparison only on the .json file should pass
        compare_folders_and_run_outputs(expected=test_output_dirs.root_dir,
                                        actual=Path.cwd(),
                                        csv_relative_tolerance=0.0)
        # Now add a file to the set of expected files that does not exist in the run: comparison should now fail
        no_such_file = "no_such_file.txt"
        file2 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_FOLDER / no_such_file
        create_folder_and_write_text(file2, "foo")
        with pytest.raises(ValueError) as ex:
            compare_folders_and_run_outputs(expected=test_output_dirs.root_dir,
                                            actual=Path.cwd(),
                                            csv_relative_tolerance=0.0)
        message = ex.value.args[0].splitlines()
        assert f"{baselines_util.MISSING_FILE}: {no_such_file}" in message
    # Now run the same comparison that failed previously, without mocking the RUN_CONTEXT. This should now
    # realize that the present run is an offline run, and skip the comparison
    compare_folders_and_run_outputs(expected=test_output_dirs.root_dir,
                                    actual=Path.cwd(),
                                    csv_relative_tolerance=0.0)
예제 #13
0
def test_is_completed_single_run() -> None:
    """
    Test if we can correctly check run status for a single run.
    :return:
    """
    logging_to_stdout()
    workspace = get_default_workspace()
    get_run_and_check(get_most_recent_run_id(), True, workspace)
예제 #14
0
def test_model_config_loader() -> None:
    logging_to_stdout(log_level=logging.DEBUG)
    default_loader = get_model_loader()
    assert default_loader.create_model_config_from_name("BasicModel2Epochs") is not None
    with pytest.raises(ValueError):
        default_loader.create_model_config_from_name("DummyModel")
    loader_including_tests = get_model_loader(namespace="Tests.ML.configs")
    assert loader_including_tests.create_model_config_from_name("BasicModel2Epochs") is not None
    assert loader_including_tests.create_model_config_from_name("DummyModel") is not None
예제 #15
0
def test_create_ml_runner_args(is_default_namespace: bool,
                               test_output_dirs: TestOutputDirectories,
                               is_offline_run: bool) -> None:
    """Test round trip parsing of commandline arguments:
    From arguments to the Azure runner to the arguments of the ML runner, checking that
    whatever is passed on can be correctly parsed."""
    logging_to_stdout()
    model_name = "Lung"
    outputs_folder = Path(test_output_dirs.root_dir)
    project_root = fixed_paths.repository_root_directory()
    if is_default_namespace:
        model_configs_namespace = None
    else:
        model_configs_namespace = "Tests.ML.configs"
        model_name = "DummyModel"

    args_list = [
        f"--model={model_name}", "--train=True", "--l_rate=100.0",
        "--norm_method=Simple Norm", "--subscription_id", "Test1",
        "--tenant_id=Test2", "--application_id", "Test3",
        "--datasets_storage_account=Test4", "--datasets_container", "Test5",
        "--pytest_mark", "gpu", f"--output_to={outputs_folder}"
    ]
    if not is_default_namespace:
        args_list.append(
            f"--model_configs_namespace={model_configs_namespace}")

    with mock.patch("sys.argv", [""] + args_list):
        with mock.patch(
                "InnerEye.ML.deep_learning_config.is_offline_run_context",
                return_value=is_offline_run):
            runner = Runner(project_root=project_root,
                            yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
            runner.parse_and_load_model()
            azure_config = runner.azure_config
            model_config = runner.model_config
    assert azure_config.datasets_storage_account == "Test4"
    assert azure_config.model == model_name
    assert model_config.l_rate == 100.0
    assert model_config.norm_method == PhotometricNormalizationMethod.SimpleNorm
    if is_offline_run:
        # The actual output folder must be a subfolder of the folder given on the commandline. The folder will contain
        # a timestamp, that will start with the year number, hence will start with 20...
        assert str(model_config.outputs_folder).startswith(
            str(outputs_folder / "20"))
        assert model_config.logs_folder == (model_config.outputs_folder /
                                            DEFAULT_LOGS_DIR_NAME)
    else:
        # For runs inside AzureML, the output folder is the project root (the root of the folders that are
        # included in the snapshot). The "outputs_to" argument will be ignored.
        assert model_config.outputs_folder == (project_root /
                                               DEFAULT_AML_UPLOAD_DIR)
        assert model_config.logs_folder == (project_root /
                                            DEFAULT_LOGS_DIR_NAME)

    assert not hasattr(model_config, "datasets_storage_account")
    assert azure_config.pytest_mark == "gpu"
예제 #16
0
def test_config_loader_on_lightning_container() -> None:
    """
    Test if the config loader can load an model that is neither classification nor segmentation.
    """
    # First test if the container can be instantiated at all (it is tricky to get that right when inheritance change)
    DummyContainerWithParameters()
    logging_to_stdout(log_level=logging.DEBUG)
    model = model_loader_including_tests.create_model_config_from_name("DummyContainerWithParameters")
    assert model is not None
예제 #17
0
def test_is_completed_ensemble_run() -> None:
    """
    Test if we can correctly check run status and status of child runs for an ensemble run.
    :return:
    """
    logging_to_stdout()
    workspace = get_default_workspace()
    run_id = get_most_recent_run_id(
        fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN)
    get_run_and_check(run_id, True, workspace)
예제 #18
0
def main(settings_yaml_file: Optional[Path] = None,
         project_root: Optional[Path] = None) -> None:
    """
    Main function.
    """
    logging_to_stdout()
    config = ReportStructureExtremesConfig.parse_args()
    azure_config = AzureConfig.from_yaml(yaml_file_path=settings_yaml_file or config.settings,
                                         project_root=project_root)
    report_structure_extremes(config.dataset, azure_config)
def main(args: Optional[List[str]] = None, project_root: Optional[Path] = None) -> None:
    """
    Main function.
    """
    logging_to_stdout()
    inference_config = SubmitForInferenceConfig.parse_args(args)
    settings = inference_config.settings or fixed_paths.SETTINGS_YAML_FILE
    azure_config = AzureConfig.from_yaml(settings, project_root=project_root)
    if inference_config.cluster:
        azure_config.cluster = inference_config.cluster
    submit_for_inference(inference_config, azure_config)
예제 #20
0
def get_configs(
        default_model_config: SegmentationModelBase, yaml_file_path: Path
) -> Tuple[SegmentationModelBase, AzureConfig, Dict]:
    parser_result = create_parser(yaml_file_path)
    args = parser_result.args
    runner_config = AzureConfig(**args)
    logging_to_stdout(args["log_level"])
    config = default_model_config or ModelConfigLoader(
    ).create_model_config_from_name(runner_config.model)
    config.apply_overrides(parser_result.overrides, should_validate=False)
    return config, runner_config, args
예제 #21
0
def plot_cross_validation(config: PlotCrossValidationConfig) -> Path:
    """
    Collects results from an AzureML cross validation run, and writes aggregate metrics files.
    :param config: The settings for plotting cross validation results.
    :return:
    """
    logging_to_stdout(logging.INFO)
    with logging_section("Downloading cross-validation results"):
        result_files, root_folder = download_crossval_result_files(config)
    config_and_files = OfflineCrossvalConfigAndFiles(config=config, files=result_files)
    with logging_section("Plotting cross-validation results"):
        plot_cross_validation_from_files(config_and_files, root_folder)
    return root_folder
예제 #22
0
 def run_in_situ(self) -> None:
     """
     Actually run the AzureML job; this method will typically run on an Azure VM.
     """
     # Only set the logging level now. Usually, when we set logging to DEBUG, we want diagnostics about the model
     # build itself, but not the tons of debug information that AzureML submissions create.
     logging_to_stdout(self.azure_config.log_level)
     suppress_logging_noise()
     pytest_failed = False
     training_failed = False
     pytest_passed = True
     # Ensure that both model training and pytest both get executed in all cases, so that we see a full set of
     # test results in each PR
     outputs_folder = self.model_config.outputs_folder
     try:
         logging_to_file(self.model_config.logs_folder / LOG_FILE_NAME)
         try:
             self.create_ml_runner().run()
         except Exception as ex:
             print_exception(ex, "Model training/testing failed.")
             training_failed = True
         if self.azure_config.pytest_mark:
             try:
                 pytest_passed, results_file_path = run_pytest(
                     self.azure_config.pytest_mark, outputs_folder)
                 if not pytest_passed:
                     logging.error(
                         f"Not all PyTest tests passed. See {results_file_path}"
                     )
             except Exception as ex:
                 print_exception(ex, "Unable to run PyTest.")
                 pytest_failed = True
     finally:
         # wait for aggregation if required, and only if the training actually succeeded.
         if not training_failed and self.model_config.should_wait_for_other_cross_val_child_runs(
         ):
             self.wait_for_cross_val_runs_to_finish_and_aggregate()
         disable_logging_to_file()
     message = []
     if training_failed:
         message.append("Training failed")
     if pytest_failed:
         message.append("Unable to run Pytest")
     if not pytest_passed:
         message.append("At least 1 test in Pytest failed")
     # Terminate if pytest or model training has failed. This makes the smoke test in
     # PR builds fail if pytest fails.
     if message:
         raise ValueError(
             f"One component of the training pipeline failed: {'. '.join(message)}"
         )
def test_compare_folder_against_parent_run(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if we can compare that a set of files exists in an AML run.
    """
    logging_to_stdout(log_level=logging.DEBUG)
    parent_run = get_most_recent_run(
        fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN)
    file1 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_PARENT_FOLDER / \
            CROSSVAL_RESULTS_FOLDER / "Test_outliers.txt"
    create_folder_and_write_text(
        file1, """

=== METRIC: Dice ===

No outliers found

=== METRIC: HausdorffDistance_mm ===

No outliers found""")
    with mock.patch("InnerEye.ML.baselines_util.PARENT_RUN_CONTEXT",
                    parent_run):
        # No plain files to compare. The file Test_outliers.txt should be compared and found to match.
        compare_folders_and_run_outputs(expected=test_output_dirs.root_dir,
                                        actual=Path.cwd(),
                                        csv_relative_tolerance=0.0)
        create_folder_and_write_text(file1, "foo")
        with pytest.raises(ValueError) as ex:
            compare_folders_and_run_outputs(expected=test_output_dirs.root_dir,
                                            actual=Path.cwd(),
                                            csv_relative_tolerance=0.0)
        message = ex.value.args[0].splitlines()
        assert f"{baselines_util.CONTENTS_MISMATCH}: {CROSSVAL_RESULTS_FOLDER}/{file1.name}" in message
        # Now add a file to the set of expected files that does not exist in the run: comparison should now fail
        no_such_file = "no_such_file.txt"
        file2 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_PARENT_FOLDER / no_such_file
        create_folder_and_write_text(file2, "foo")
        with pytest.raises(ValueError) as ex:
            compare_folders_and_run_outputs(expected=test_output_dirs.root_dir,
                                            actual=Path.cwd(),
                                            csv_relative_tolerance=0.0)
        message = ex.value.args[0].splitlines()
        assert f"{baselines_util.MISSING_FILE}: {no_such_file}" in message
    # Now run the same comparison without mocking the PARENT_RUN_CONTEXT. This should now
    # realize that the present run is a crossval child run
    with pytest.raises(ValueError) as ex:
        compare_folders_and_run_outputs(expected=test_output_dirs.root_dir,
                                        actual=Path.cwd(),
                                        csv_relative_tolerance=0.0)
    assert "no (parent) run to compare against" in str(ex)
def test_run_ml_with_multi_label_sequence_model(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test training and testing of sequence models that predicts at multiple time points,
    when it is started via run_ml.
    """
    logging_to_stdout()
    config = ToyMultiLabelSequenceModel(should_validate=False)
    assert config.get_target_indices() == [1, 2, 3]
    expected_prediction_targets = [
        f"{SEQUENCE_POSITION_HUE_NAME_PREFIX} {x}" for x in ["01", "02", "03"]
    ]
    _target_indices = config.get_target_indices()
    assert _target_indices is not None
    assert len(_target_indices) == len(expected_prediction_targets)
    metrics_dict = create_metrics_dict_for_scalar_models(config)
    assert metrics_dict.get_hue_names(
        include_default=False) == expected_prediction_targets
    config.set_output_to(test_output_dirs.root_dir)
    # Create a fake dataset directory to make config validation pass
    config.local_dataset = test_output_dirs.root_dir
    config.dataset_data_frame = _get_multi_label_sequence_dataframe()
    config.pre_process_dataset_dataframe()
    config.num_epochs = 1
    config.max_batch_grad_cam = 1
    azure_config = get_default_azure_config()
    azure_config.train = True
    MLRunner(config, azure_config).run()
    # The metrics file should have one entry per epoch per subject per prediction target,
    # for all the 3 prediction targets.
    metrics_file = config.outputs_folder / "Train" / SUBJECT_METRICS_FILE_NAME
    assert metrics_file.exists()
    metrics = pd.read_csv(metrics_file)
    assert LoggingColumns.Patient.value in metrics
    assert LoggingColumns.Epoch.value in metrics
    assert LoggingColumns.Hue.value in metrics
    assert metrics[LoggingColumns.Hue.value].unique().tolist(
    ) == expected_prediction_targets
    group_by_subject = metrics.groupby(
        by=[LoggingColumns.Patient.value, LoggingColumns.Epoch.value])
    expected_prediction_target_lengths = [3, 2, 3, 3]
    for i, x in enumerate(group_by_subject):
        assert len(x[1]) == expected_prediction_target_lengths[i]
    group_by_subject_and_target = metrics.groupby(by=[
        LoggingColumns.Patient.value, LoggingColumns.Epoch.value,
        LoggingColumns.Hue.value
    ])
    for _, group in group_by_subject_and_target:
        assert len(group) == 1
예제 #25
0
def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests,
                                       use_mixed_precision: bool) -> None:
    """
    Test training and testing of 2d classification models.
    """
    logging_to_stdout(logging.DEBUG)
    config = ClassificationModelForTesting2D()
    config.set_output_to(test_output_dirs.root_dir)

    # Train for 4 epochs, checkpoints at epochs 2 and 4
    config.num_epochs = 4
    config.use_mixed_precision = use_mixed_precision
    config.save_start_epoch = 2
    config.save_step_epochs = 2
    config.test_start_epoch = 2
    config.test_step_epochs = 2
    config.test_diff_epochs = 2
    expected_epochs = [2, 4]
    assert config.get_test_epochs() == expected_epochs

    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=Path(test_output_dirs.root_dir))
    model_training_result = model_training.model_train(
        config, checkpoint_handler=checkpoint_handler)
    assert model_training_result is not None
    expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05]

    expected_train_loss = [0.705931, 0.698664, 0.694489, 0.693151]
    expected_val_loss = [1.078517, 1.140510, 1.199026, 1.248595]

    def extract_loss(results: List[MetricsDict]) -> List[float]:
        return [d.values()[MetricType.LOSS.value][0] for d in results]

    actual_train_loss = extract_loss(
        model_training_result.train_results_per_epoch)
    actual_val_loss = extract_loss(model_training_result.val_results_per_epoch)
    actual_learning_rates = list(
        flatten(model_training_result.learning_rates_per_epoch))

    assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6)
    assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6)
    assert actual_learning_rates == pytest.approx(expected_learning_rates,
                                                  rel=1e-5)
    test_results = model_testing.model_test(
        config,
        ModelExecutionMode.TRAIN,
        checkpoint_handler=checkpoint_handler)
    assert isinstance(test_results, InferenceMetricsForClassification)
    assert list(test_results.epochs.keys()) == expected_epochs
def plot_cross_validation(config: PlotCrossValidationConfig) -> Path:
    """
    Collects results from an AzureML cross validation run, and writes aggregate metrics files.
    and assert that there are N+1 data files available. If false, this analysis only concerns the cross
    validation runs, and check that the number of files is N.
    :param config: The settings for plotting cross validation results.
    :return: The path with all cross validation result files.
    """
    logging_to_stdout(logging.INFO)
    with logging_section("Downloading cross-validation results"):
        result_files, root_folder = download_crossval_result_files(config)
    config_and_files = OfflineCrossvalConfigAndFiles(config=config,
                                                     files=result_files)
    with logging_section("Plotting cross-validation results"):
        plot_cross_validation_from_files(config_and_files, root_folder)
    return root_folder
def test_compare_files_text(test_output_dirs: OutputFolderForTests,
                            file_extension: str) -> None:
    """
    Checks the basic code to compare the contents of two text files.
    :param test_output_dirs:
    :param file_extension: The extension of the file to create.
    """
    logging_to_stdout(log_level=logging.DEBUG)
    expected = test_output_dirs.root_dir / f"expected{file_extension}"
    actual = test_output_dirs.root_dir / "actual.does_not_matter"
    # Make sure that we test different line endings - the files should still match
    create_folder_and_write_text(expected, "Line1\r\nLine2")
    create_folder_and_write_text(actual, "Line1\nLine2")
    assert compare_files(expected=expected, actual=actual) == ""
    actual.write_text("does_not_match")
    assert compare_files(expected=expected,
                         actual=actual) == baselines_util.CONTENTS_MISMATCH
예제 #28
0
def test_image_encoder_with_segmentation(
        test_output_dirs: OutputFolderForTests, encode_channels_jointly: bool,
        aggregation_type: AggregationType,
        imaging_feature_type: ImagingFeatureType) -> None:
    """
    Test if the image encoder networks can be trained on segmentations from HDF5.
    """
    logging_to_stdout()
    set_random_seed(0)
    scan_size = (6, 64, 60)
    dataset_contents = """subject,channel,path,label
    S1,week0,scan1.h5,
    S1,week1,scan2.h5,True
    S2,week0,scan3.h5,
    S2,week1,scan4.h5,False
    S3,week0,scan5.h5,
    S3,week1,scan6.h5,True
    S4,week0,scan7.h5,
    S4,week1,scan8.h5,True
    """
    config = ImageEncoder(encode_channels_jointly=encode_channels_jointly,
                          imaging_feature_type=imaging_feature_type,
                          should_validate=False,
                          aggregation_type=aggregation_type,
                          scan_size=scan_size)
    # This fails with 16bit precision, saying "torch.nn.functional.binary_cross_entropy and torch.nn.BCELoss are
    # unsafe to autocast. Many models use a sigmoid layer right before the binary cross entropy layer. In this case,
    # combine the two layers using torch.nn.functional.binary_cross_entropy_with_logits or
    # torch.nn.BCEWithLogitsLoss.  binary_cross_entropy_with_logits and BCEWithLogits are safe to autocast."
    config.use_mixed_precision = False
    config.set_output_to(test_output_dirs.root_dir)
    config.num_epochs = 1
    config.local_dataset = Path()
    config.dataset_data_frame = pd.read_csv(StringIO(dataset_contents),
                                            sep=",",
                                            dtype=str)
    # Patch the load_images function that will be called once we access a dataset item
    image_and_seg = ImageAndSegmentations[np.ndarray](
        images=np.zeros(scan_size, dtype=np.float32),
        segmentations=np.ones(scan_size, dtype=np.uint8))
    with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats',
                    return_value=image_and_seg):
        azure_config = get_default_azure_config()
        azure_config.train = True
        MLRunner(config, azure_config).run()
def test_is_completed() -> None:
    """
    Test if we can correctly check run status and status of child runs.
    :return:
    """
    logging_to_stdout()
    workspace = get_default_workspace()

    def get_run_and_check(run_id: str, expected: bool) -> None:
        run = fetch_run(workspace, run_id)
        status = is_run_and_child_runs_completed(run)
        assert status == expected

    get_run_and_check(DEFAULT_RUN_RECOVERY_ID, True)
    get_run_and_check(DEFAULT_ENSEMBLE_RUN_RECOVERY_ID, True)
    # This Hyperdrive run has 1 failing child run, the parent run completed successfully.
    get_run_and_check(
        "refs_pull_326_merge:HD_d123f042-ca58-4e35-9a64-48d71c5f63a7", False)
def test_compare_files_binary(test_output_dirs: OutputFolderForTests,
                              file_extension: str) -> None:
    """
    Checks the comparison of files that are not recognized as text files, for example images.
    :param test_output_dirs:
    :param file_extension: The extension of the file to create.
    """
    logging_to_stdout(log_level=logging.DEBUG)
    expected = test_output_dirs.root_dir / f"expected{file_extension}"
    actual = test_output_dirs.root_dir / "actual.does_not_matter"
    data1 = bytes([1, 2, 3])
    data2 = bytes([4, 5, 6])
    expected.write_bytes(data1)
    actual.write_bytes(data1)
    assert compare_files(expected=expected, actual=actual) == ""
    actual.write_bytes(data2)
    assert compare_files(expected=expected,
                         actual=actual) == baselines_util.CONTENTS_MISMATCH