def generate_report(config: DeepLearningConfig, best_epoch: int, model_proc: ModelProcessing) -> None:
        logging.info("Saving report in html")
        if config.model_category not in [ModelCategory.Segmentation, ModelCategory.Classification]:
            return

        try:
            def get_epoch_path(mode: ModelExecutionMode) -> Path:
                p = get_epoch_results_path(best_epoch, mode=mode, model_proc=model_proc)
                return config.outputs_folder / p / METRICS_FILE_NAME

            path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN)
            path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL)
            path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST)

            output_dir = config.outputs_folder / OTHER_RUNS_SUBDIR_NAME / ENSEMBLE_SPLIT_NAME \
                if model_proc == ModelProcessing.ENSEMBLE_CREATION else config.outputs_folder
            if config.model_category == ModelCategory.Segmentation:
                generate_segmentation_notebook(result_notebook=output_dir / REPORT_IPYNB,
                                               train_metrics=path_to_best_epoch_train,
                                               val_metrics=path_to_best_epoch_val,
                                               test_metrics=path_to_best_epoch_test)
            else:
                if isinstance(config, ScalarModelBase):
                    generate_classification_notebook(result_notebook=output_dir / REPORT_IPYNB,
                                                     train_metrics=path_to_best_epoch_train,
                                                     val_metrics=path_to_best_epoch_val,
                                                     test_metrics=path_to_best_epoch_test,
                                                     dataset_csv_path=config.local_dataset / DATASET_CSV_FILE_NAME
                                                                        if config.local_dataset else None,
                                                     dataset_subject_column=config.subject_column,
                                                     dataset_file_column=config.image_file_column)
                else:
                    logging.info(f"Cannot create report for config of type {type(config)}.")
        except Exception as ex:
            print_exception(ex, "Failed to generated reporting notebook.")
Exemple #2
0
 def plot_cross_validation_and_upload_results(self) -> Path:
     from InnerEye.ML.visualizers.plot_cross_validation import crossval_config_from_model_config, \
         plot_cross_validation, unroll_aggregate_metrics
     # perform aggregation as cross val splits are now ready
     plot_crossval_config = crossval_config_from_model_config(
         self.model_config)
     plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[
         RUN_RECOVERY_ID_KEY_NAME]
     plot_crossval_config.outputs_directory = self.model_config.outputs_folder
     plot_crossval_config.settings_yaml_file = self.yaml_config_file
     cross_val_results_root = plot_cross_validation(plot_crossval_config)
     if self.post_cross_validation_hook:
         self.post_cross_validation_hook(self.model_config,
                                         cross_val_results_root)
     # upload results to the parent run's outputs. Normally, we use blobxfer for that, but here we want
     # to ensure that the files are visible inside the AzureML UI.
     PARENT_RUN_CONTEXT.upload_folder(name=CROSSVAL_RESULTS_FOLDER,
                                      path=str(cross_val_results_root))
     if self.model_config.is_scalar_model:
         try:
             aggregates = pd.read_csv(cross_val_results_root /
                                      METRICS_AGGREGATES_FILE)
             unrolled_aggregate_metrics = unroll_aggregate_metrics(
                 aggregates)
             for m in unrolled_aggregate_metrics:
                 PARENT_RUN_CONTEXT.log(m.metric_name, m.metric_value)
         except Exception as ex:
             print_exception(
                 ex,
                 "Unable to log metrics to Hyperdrive parent run.",
                 logger_fn=logging.warning)
     return cross_val_results_root
 def create_and_set_torch_datasets(self,
                                   for_training: bool = True,
                                   for_inference: bool = True) -> None:
     """
     Creates and sets torch datasets for all model execution modes, and stores them in the self._datasets field.
     It also calls the hook to compute statistics for the train/val/test datasets.
     """
     # For models other than segmentation models, it is easier to create both training and inference datasets
     # in one go, ignoring the arguments.
     if self._datasets_for_training is None and self._datasets_for_inference is None:
         datasets = self.create_torch_datasets(self.get_dataset_splits())
         self._datasets_for_training = {
             mode: datasets[mode]
             for mode in [ModelExecutionMode.TRAIN, ModelExecutionMode.VAL]
         }
         self._datasets_for_inference = datasets
         for split, dataset in datasets.items():
             logging.info(
                 f"{split.value}: {len(dataset)} subjects. Detailed status: {dataset.status}"
             )
         if self.dataset_stats_hook:
             try:
                 self.dataset_stats_hook(datasets)
             except Exception as ex:
                 print_exception(
                     ex,
                     message=
                     "Error while calling the hook for computing dataset statistics."
                 )
    def generate_report(config: DeepLearningConfig, best_epoch: int,
                        model_proc: ModelProcessing) -> None:
        logging.info("Saving report in html")
        if not config.is_segmentation_model:
            return

        try:

            def get_epoch_path(mode: ModelExecutionMode) -> Path:
                p = get_epoch_results_path(best_epoch,
                                           mode=mode,
                                           model_proc=model_proc)
                return config.outputs_folder / p / METRICS_FILE_NAME

            path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN)
            path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL)
            path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST)

            output_dir = config.outputs_folder / OTHER_RUNS_SUBDIR_NAME / ENSEMBLE_SPLIT_NAME \
                if model_proc == ModelProcessing.ENSEMBLE_CREATION else config.outputs_folder
            generate_segmentation_notebook(
                result_notebook=output_dir / REPORT_IPYNB,
                train_metrics=path_to_best_epoch_train,
                val_metrics=path_to_best_epoch_val,
                test_metrics=path_to_best_epoch_test)
        except Exception as ex:
            print_exception(ex, "Failed to generated reporting notebook.")
def test_print_exception() -> None:
    """
    A test that just throws an exception, and allows to check if the diagnostics are at the right level.
    You need to inspect the test output manually.
    """
    try:
        raise ValueError("foo")
    except Exception as ex:
        print_exception(ex, "Message")
Exemple #6
0
 def try_compare_scores_against_baselines(self, model_proc: ModelProcessing) -> None:
     """
     Attempt comparison of scores against baseline scores and scatterplot creation if possible.
     """
     if not isinstance(self.model_config, SegmentationModelBase):  # keep type checker happy
         return
     try:
         from InnerEye.ML.baselines_util import compare_scores_against_baselines
         with logging_section("Comparing scores against baselines"):
             compare_scores_against_baselines(self.model_config, self.azure_config, model_proc)
     except Exception as ex:
         print_exception(ex, "Model baseline comparison failed.")
Exemple #7
0
 def run_in_situ(self) -> None:
     """
     Actually run the AzureML job; this method will typically run on an Azure VM.
     """
     # Only set the logging level now. Usually, when we set logging to DEBUG, we want diagnostics about the model
     # build itself, but not the tons of debug information that AzureML submissions create.
     logging_to_stdout(self.azure_config.log_level)
     suppress_logging_noise()
     pytest_failed = False
     training_failed = False
     pytest_passed = True
     # Ensure that both model training and pytest both get executed in all cases, so that we see a full set of
     # test results in each PR
     outputs_folder = self.model_config.outputs_folder
     try:
         logging_to_file(self.model_config.logs_folder / LOG_FILE_NAME)
         try:
             self.create_ml_runner().run()
         except Exception as ex:
             print_exception(ex, "Model training/testing failed.")
             training_failed = True
         if self.azure_config.pytest_mark:
             try:
                 pytest_passed, results_file_path = run_pytest(
                     self.azure_config.pytest_mark, outputs_folder)
                 if not pytest_passed:
                     logging.error(
                         f"Not all PyTest tests passed. See {results_file_path}"
                     )
             except Exception as ex:
                 print_exception(ex, "Unable to run PyTest.")
                 pytest_failed = True
     finally:
         # wait for aggregation if required, and only if the training actually succeeded.
         if not training_failed and self.model_config.should_wait_for_other_cross_val_child_runs(
         ):
             self.wait_for_cross_val_runs_to_finish_and_aggregate()
         disable_logging_to_file()
     message = []
     if training_failed:
         message.append("Training failed")
     if pytest_failed:
         message.append("Unable to run Pytest")
     if not pytest_passed:
         message.append("At least 1 test in Pytest failed")
     # Terminate if pytest or model training has failed. This makes the smoke test in
     # PR builds fail if pytest fails.
     if message:
         raise ValueError(
             f"One component of the training pipeline failed: {'. '.join(message)}"
         )
    def generate_report(self, model_proc: ModelProcessing) -> None:
        config = self.model_config
        if config.model_category not in [ModelCategory.Segmentation, ModelCategory.Classification]:
            logging.info(f"No reporting available for a model with category {config.model_category}")
            return
        logging.info("Saving report in HTML")
        try:
            def get_epoch_path(mode: ModelExecutionMode) -> Path:
                p = get_epoch_results_path(mode=mode, model_proc=model_proc)
                return config.outputs_folder / p / SUBJECT_METRICS_FILE_NAME

            path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN)
            path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL)
            path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST)

            output_dir = config.outputs_folder / OTHER_RUNS_SUBDIR_NAME / ENSEMBLE_SPLIT_NAME \
                if model_proc == ModelProcessing.ENSEMBLE_CREATION else config.outputs_folder

            reports_dir = output_dir / reports_folder
            if not reports_dir.exists():
                reports_dir.mkdir(exist_ok=False)

            if config.model_category == ModelCategory.Segmentation:
                generate_segmentation_notebook(
                    result_notebook=reports_dir / get_ipynb_report_name(config.model_category.value),
                    train_metrics=path_to_best_epoch_train,
                    val_metrics=path_to_best_epoch_val,
                    test_metrics=path_to_best_epoch_test)
            else:
                if isinstance(config, ScalarModelBase) and not isinstance(config, SequenceModelBase):
                    generate_classification_notebook(
                        result_notebook=reports_dir / get_ipynb_report_name(config.model_category.value),
                        config=config,
                        train_metrics=path_to_best_epoch_train,
                        val_metrics=path_to_best_epoch_val,
                        test_metrics=path_to_best_epoch_test)

                    if len(config.class_names) > 1:
                        generate_classification_multilabel_notebook(
                            result_notebook=reports_dir / get_ipynb_report_name(f"{config.model_category.value}_multilabel"),
                            config=config,
                            train_metrics=path_to_best_epoch_train,
                            val_metrics=path_to_best_epoch_val,
                            test_metrics=path_to_best_epoch_test)
                else:
                    logging.info(f"Cannot create report for config of type {type(config)}.")
        except Exception as ex:
            print_exception(ex, "Failed to generated reporting notebook.")
            raise
Exemple #9
0
def download_dataset(azure_dataset_id: str, target_folder: Path,
                     azure_config: AzureConfig) -> Path:
    """
    Downloads or checks for an existing dataset on the executing machine. If a local_dataset is supplied and the
    directory is present, return that. Otherwise, download the dataset specified by the azure_dataset_id from the
    AzureML dataset attached to the given AzureML workspace. The dataset is downloaded into the `target_folder`,
    in a subfolder that has the same name as the dataset. If there already appears to be such a folder, and the folder
    contains a dataset.csv file, no download is started.
    :param local_dataset: The path to an existing local dataset.
    :param azure_dataset_id: The name of a dataset that is registered in the AzureML workspace.
    :param target_folder: The folder in which to download the dataset from Azure.
    :param azure_config: All Azure-related configuration options.
    :return: A path on the local machine that contains the dataset.
    """
    workspace = azure_config.get_workspace()
    try:
        downloaded_via_blobxfer = download_dataset_via_blobxfer(
            dataset_id=azure_dataset_id,
            azure_config=azure_config,
            target_folder=target_folder)
        if downloaded_via_blobxfer:
            return downloaded_via_blobxfer
    except Exception as ex:
        print_exception(ex, message="Unable to download dataset via blobxfer.")
    logging.info("Trying to download dataset via AzureML datastore now.")
    azure_dataset = get_or_create_dataset(workspace, azure_dataset_id)
    if not isinstance(azure_dataset, FileDataset):
        raise ValueError(
            f"Expected to get a FileDataset, but got {type(azure_dataset)}")
    # The downloaded dataset may already exist from a previous run.
    expected_dataset_path = target_folder / azure_dataset_id
    expected_dataset_file = expected_dataset_path / DATASET_CSV_FILE_NAME
    logging.info(
        f"Model training will use dataset '{azure_dataset_id}' in Azure.")
    if expected_dataset_path.is_dir() and expected_dataset_file.is_file():
        logging.info(
            f"The dataset appears to be downloaded already in {expected_dataset_path}. Skipping."
        )
        return expected_dataset_path
    logging.info(
        "Starting to download the dataset - WARNING, this could take very long!"
    )
    with logging_section("Downloading dataset"):
        azure_dataset.download(target_path=str(expected_dataset_path),
                               overwrite=False)
    logging.info(
        f"Azure dataset '{azure_dataset_id}' is now available in {expected_dataset_path}"
    )
    return expected_dataset_path
Exemple #10
0
 def run_in_situ(self) -> None:
     """
     Actually run the AzureML job; this method will typically run on an Azure VM.
     """
     # Only set the logging level now. Usually, when we set logging to DEBUG, we want diagnostics about the model
     # build itself, but not the tons of debug information that AzureML submissions create.
     logging_to_stdout(self.azure_config.log_level)
     suppress_logging_noise()
     error_messages = []
     # For the PR build in AzureML, we can either pytest, or the training of the simple PR model. Running both
     # only works when using DDP_spawn, but that has as a side-effect that it messes up memory consumption of the
     # large models.
     if self.azure_config.pytest_mark:
         try:
             outputs_folder = Path.cwd() / fixed_paths.DEFAULT_AML_UPLOAD_DIR
             pytest_passed, results_file_path = run_pytest(self.azure_config.pytest_mark, outputs_folder)
             if not pytest_passed:
                 pytest_failures = f"Not all PyTest tests passed. See {results_file_path}"
                 logging.error(pytest_failures)
                 error_messages.append(pytest_failures)
         except Exception as ex:
             print_exception(ex, "Unable to run PyTest.")
             error_messages.append(f"Unable to run PyTest: {ex}")
     else:
         # Set environment variables for multi-node training if needed.
         # In particular, the multi-node environment variables should NOT be set in single node
         # training, otherwise this might lead to errors with the c10 distributed backend
         # (https://github.com/microsoft/InnerEye-DeepLearning/issues/395)
         if self.azure_config.num_nodes > 1:
             set_environment_variables_for_multi_node()
         try:
             logging_to_file(self.model_config.logs_folder / LOG_FILE_NAME)
             try:
                 self.create_ml_runner().run()
             except Exception as ex:
                 print_exception(ex, "Model training/testing failed.")
                 error_messages.append(f"Training failed: {ex}")
         finally:
             disable_logging_to_file()
     # Terminate if pytest or model training has failed. This makes the smoke test in
     # PR builds fail if pytest fails.
     if error_messages:
         raise ValueError(
             f"At least one component of the runner failed: {os.linesep} {os.linesep.join(error_messages)}")