def set_multiprocessing_start_method(self) -> None: """ Set the (PyTorch) multiprocessing start method. """ method = self.model_config.multiprocessing_start_method if is_windows(): if method != MultiprocessingStartMethod.spawn: logging.warning(f"Cannot set multiprocessing start method to '{method.name}' " "because only 'spawn' is available in Windows") else: logging.info(f"Setting multiprocessing start method to '{method.name}'") torch.multiprocessing.set_start_method(method.name, force=True)
combine_hidden_states=combine_hidden_state, use_encoder_layer_norm=use_encoder_layer_norm, use_mean_teacher_model=use_mean_teacher_model, should_validate=False) config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_mock_sequence_dataset() # Patch the load_images function that will be called once we access a dataset item image_and_seg = ImageAndSegmentations[np.ndarray](images=np.random.uniform(0, 1, SCAN_SIZE), segmentations=np.random.randint(0, 2, SCAN_SIZE)) with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg): results = model_train(config) assert len(results.optimal_temperature_scale_values_per_checkpoint_epoch) \ == config.get_total_number_of_save_epochs() @pytest.mark.skipif(common_util.is_windows(), reason="Has issues on windows build") @pytest.mark.parametrize(["use_combined_model", "imaging_feature_type"], [(False, ImagingFeatureType.Image), (True, ImagingFeatureType.Image), (True, ImagingFeatureType.Segmentation), (True, ImagingFeatureType.ImageAndSegmentation)]) def test_run_ml_with_sequence_model(use_combined_model: bool, imaging_feature_type: ImagingFeatureType, test_output_dirs: TestOutputDirectories) -> None: """ Test training and testing of sequence models, when it is started together via run_ml. """ logging_to_stdout() config = ToySequenceModel(use_combined_model, imaging_feature_type, should_validate=False, sequence_target_positions=[2, 10]) config.set_output_to(test_output_dirs.root_dir)
def test_train_classification_model( class_name: str, test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of classification models, asserting on the individual results from training and testing. Expected test results are stored for GPU with and without mixed precision. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting() config.class_names = [class_name] config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=Path(test_output_dirs.root_dir)) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 model_training_result = model_training.model_train( config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167] expected_val_loss = [0.737061, 0.736691, 0.736321, 0.735952] # Ensure that all metrics are computed on both training and validation set assert len( model_training_result.train_results_per_epoch) == config.num_epochs assert len( model_training_result.val_results_per_epoch) == config.num_epochs assert len(model_training_result.train_results_per_epoch[0]) >= 11 assert len(model_training_result.val_results_per_epoch[0]) >= 11 for metric in [ MetricType.ACCURACY_AT_THRESHOLD_05, MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD, MetricType.AREA_UNDER_PR_CURVE, MetricType.AREA_UNDER_ROC_CURVE, MetricType.CROSS_ENTROPY, MetricType.LOSS, MetricType.SECONDS_PER_BATCH, MetricType.SECONDS_PER_EPOCH, MetricType.SUBJECT_COUNT ]: assert metric.value in model_training_result.train_results_per_epoch[0], \ f"{metric.value} not in training" assert metric.value in model_training_result.val_results_per_epoch[0], \ f"{metric.value} not in validation" actual_train_loss = model_training_result.get_metric( is_training=True, metric_type=MetricType.LOSS.value) actual_val_loss = model_training_result.get_metric( is_training=False, metric_type=MetricType.LOSS.value) actual_lr = model_training_result.get_metric( is_training=True, metric_type=MetricType.LEARNING_RATE.value) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6), "Training loss" assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6), "Validation loss" assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5), "Learning rates" test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) expected_metrics = [0.636085, 0.735952] assert test_results.metrics.values(class_name)[MetricType.CROSS_ENTROPY.value] == \ pytest.approx(expected_metrics, abs=1e-5) # Run detailed logs file check only on CPU, it will contain slightly different metrics on GPU, but here # we want to mostly assert that the files look reasonable if machine_has_gpu: return # Check epoch_metrics.csv epoch_metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / EPOCH_METRICS_FILE_NAME # Auto-format will break the long header line, hence the strange way of writing it! expected_epoch_metrics = \ f"{LoggingColumns.Loss.value},{LoggingColumns.CrossEntropy.value}," \ f"{LoggingColumns.AccuracyAtThreshold05.value},{LoggingColumns.LearningRate.value}," + \ f"{LoggingColumns.AreaUnderRocCurve.value}," \ f"{LoggingColumns.AreaUnderPRCurve.value}," \ f"{LoggingColumns.AccuracyAtOptimalThreshold.value}," \ f"{LoggingColumns.FalsePositiveRateAtOptimalThreshold.value}," \ f"{LoggingColumns.FalseNegativeRateAtOptimalThreshold.value}," \ f"{LoggingColumns.OptimalThreshold.value}," \ f"{LoggingColumns.SubjectCount.value},{LoggingColumns.Epoch.value}," \ f"{LoggingColumns.CrossValidationSplitIndex.value}\n" + \ """0.6866141557693481,0.6866141557693481,0.5,0.0001,1.0,1.0,0.5,0.0,0.0,0.529514,2.0,0,-1 0.6864652633666992,0.6864652633666992,0.5,9.999712322065557e-05,1.0,1.0,0.5,0.0,0.0,0.529475,2.0,1,-1 0.6863163113594055,0.6863162517547607,0.5,9.999306876841536e-05,1.0,1.0,0.5,0.0,0.0,0.529437,2.0,2,-1 0.6861673593521118,0.6861673593521118,0.5,9.998613801725043e-05,1.0,1.0,0.5,0.0,0.0,0.529399,2.0,3,-1 """ check_log_file(epoch_metrics_path, expected_epoch_metrics, ignore_columns=[]) # Check metrics.csv: This contains the per-subject per-epoch model outputs # Randomization comes out slightly different on Windows, hence only execute the test on Linux if common_util.is_windows(): return metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / SUBJECT_METRICS_FILE_NAME metrics_expected = \ f"""epoch,subject,prediction_target,model_output,label,data_split,cross_validation_split_index 0,S2,{class_name},0.529514,1,Train,-1 0,S4,{class_name},0.521659,0,Train,-1 1,S4,{class_name},0.521482,0,Train,-1 1,S2,{class_name},0.529475,1,Train,-1 2,S4,{class_name},0.521305,0,Train,-1 2,S2,{class_name},0.529437,1,Train,-1 3,S2,{class_name},0.529399,1,Train,-1 3,S4,{class_name},0.521128,0,Train,-1 """ check_log_file(metrics_path, metrics_expected, ignore_columns=[]) # Check log METRICS_FILE_NAME inside of the folder epoch_004/Train, which is written when we run model_test. # Normally, we would run it on the Test and Val splits, but for convenience we test on the train split here. inference_metrics_path = config.outputs_folder / get_epoch_results_path(ModelExecutionMode.TRAIN) / \ SUBJECT_METRICS_FILE_NAME inference_metrics_expected = \ f"""prediction_target,subject,model_output,label,cross_validation_split_index,data_split {class_name},S2,0.5293986201286316,1.0,-1,Train {class_name},S4,0.5211275815963745,0.0,-1,Train """ check_log_file(inference_metrics_path, inference_metrics_expected, ignore_columns=[])
def compare_files(actual: List[Path], expected: List[str]) -> None: assert len(actual) == len(expected) for (f, e) in zip(actual, expected): assert f.exists() full_expected = full_ml_test_data_path(e) assert full_expected.exists() assert str(f).endswith(e) # To update the stored results, uncomment this line: # full_expected.write_bytes(f.read_bytes()) assert file_as_bytes(f) == file_as_bytes(full_expected) @pytest.mark.skipif( common_util.is_windows(), reason="Rendering of the graph is slightly different on Linux") def test_plot_normalization_result( test_output_dirs: TestOutputDirectories) -> None: """ Tests plotting of before/after histograms in photometric normalization. :return: """ size = (3, 3, 3) image = np.zeros((1, ) + size) for i, (z, y, x) in enumerate( itertools.product(range(size[0]), range(size[1]), range(size[2]))): image[0, z, y, x] = i labels = np.zeros((2, ) + size) labels[1, 1, 1, 1] = 1 sample = Sample(image=image,
import pandas as pd import pytest from InnerEye.Common.common_util import is_windows from InnerEye.Common.output_directories import OutputFolderForTests from InnerEye.ML.dataset.scalar_dataset import ScalarDataset from InnerEye.ML.dataset.scalar_sample import ScalarItem from InnerEye.ML.scalar_config import ScalarModelBase from InnerEye.ML.utils import ml_util @pytest.mark.parametrize("num_dataload_workers", [0, 1]) @pytest.mark.parametrize("shuffle", [False, True]) @pytest.mark.skipif( is_windows(), reason= "This test runs fine on local Windows boxes, but leads to odd timeouts in Azure" ) def test_dataloader_speed(test_output_dirs: OutputFolderForTests, num_dataload_workers: int, shuffle: bool) -> None: """ Test how dataloaders work when using multiple processes. """ ml_util.set_random_seed(0) # The dataset should only contain the file name stem, without extension. csv_string = StringIO("""subject,channel,path,value,scalar1 S1,image,4be9beed-5861-fdd2-72c2-8dd89aadc1ef S1,label,,True,1.0 S2,image,6ceacaf8-abd2-ffec-2ade-d52afd6dd1be S2,label,,True,2.0
expected_files = [config.checkpoint_folder / run_to_recover.id / expected_checkpoint_file] checkpoint_paths = run_recovery.get_checkpoint_paths(1) if is_ensemble: assert len(run_recovery.checkpoints_roots) == len(expected_files) assert all([(x in [y.parent for y in expected_files]) for x in run_recovery.checkpoints_roots]) assert len(checkpoint_paths) == len(expected_files) assert all([x in expected_files for x in checkpoint_paths]) else: assert len(checkpoint_paths) == 1 assert checkpoint_paths[0] == expected_files[0] assert all([expected_file.exists() for expected_file in expected_files]) @pytest.mark.skipif(common_util.is_windows(), reason="Has issues on the windows build") def test_download_checkpoints_hyperdrive_run(test_output_dirs: OutputFolderForTests, runner_config: AzureConfig) -> None: output_dir = test_output_dirs.root_dir config = ModelConfigBase(should_validate=False) config.set_output_to(output_dir) runner_config.run_recovery_id = DEFAULT_ENSEMBLE_RUN_RECOVERY_ID child_runs = fetch_child_runs(run=fetch_run(runner_config.get_workspace(), DEFAULT_ENSEMBLE_RUN_RECOVERY_ID)) # recover child runs separately also to test hyperdrive child run recovery functionality expected_checkpoint_file = "1" + CHECKPOINT_FILE_SUFFIX for child in child_runs: expected_files = [config.checkpoint_folder / child.id / expected_checkpoint_file] run_recovery = RunRecovery.download_checkpoints_from_recovery_run(runner_config, config, child) assert all([x in expected_files for x in run_recovery.get_checkpoint_paths(epoch=1)]) assert all([expected_file.exists() for expected_file in expected_files])
class WorkflowParams(param.Parameterized): """ This class contains all parameters that affect how the whole training and testing workflow is executed. """ random_seed: int = param.Integer( 42, doc="The seed to use for all random number generators.") number_of_cross_validation_splits: int = param.Integer( 0, bounds=(0, None), doc="Number of cross validation splits for k-fold cross " "validation") cross_validation_split_index: int = param.Integer( DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, bounds=(-1, None), doc="The index of the cross validation fold this model is " "associated with when performing k-fold cross validation") perform_training_set_inference: bool = \ param.Boolean(False, doc="If True, run full image inference on the training set at the end of training. If False and " "perform_validation_and_test_set_inference is True (default), only run inference on " "validation and test set. If both flags are False do not run inference.") perform_validation_and_test_set_inference: bool = \ param.Boolean(True, doc="If True (default), run full image inference on validation and test set after training.") weights_url: str = param.String( doc= "If provided, a url from which weights will be downloaded and used for model " "initialization.") local_weights_path: Optional[Path] = param.ClassSelector( class_=Path, default=None, allow_None=True, doc="The path to the weights to use for model " "initialization, when training outside AzureML.") generate_report: bool = param.Boolean( default=True, doc= "If True (default), write a modelling report in HTML format. If False," "do not write that report.") # The default multiprocessing start_method in both PyTorch and the Python standard library is "fork" for Linux and # "spawn" (the only available method) for Windows. There is some evidence that using "forkserver" on Linux # can reduce the chance of stuck jobs. multiprocessing_start_method: MultiprocessingStartMethod = \ param.ClassSelector(class_=MultiprocessingStartMethod, default=(MultiprocessingStartMethod.spawn if is_windows() else MultiprocessingStartMethod.fork), doc="Method to be used to start child processes in pytorch. Should be one of forkserver, " "fork or spawn. If not specified, fork is used on Linux and spawn on Windows. " "Set to forkserver as a possible remedy for stuck jobs.") monitoring_interval_seconds: int = param.Integer( 0, doc="Seconds delay between logging GPU/CPU resource " "statistics. If 0 or less, do not log any resource " "statistics.") def validate(self) -> None: if self.weights_url and self.local_weights_path: raise ValueError( "Cannot specify both local_weights_path and weights_url.") if self.number_of_cross_validation_splits == 1: raise ValueError( "At least two splits required to perform cross validation, but got " f"{self.number_of_cross_validation_splits}. To train without cross validation, set " "number_of_cross_validation_splits=0.") if 0 < self.number_of_cross_validation_splits <= self.cross_validation_split_index: raise ValueError( f"Cross validation split index is out of bounds: {self.cross_validation_split_index}, " f"which is invalid for CV with {self.number_of_cross_validation_splits} splits." ) elif self.number_of_cross_validation_splits == 0 and self.cross_validation_split_index != -1: raise ValueError( f"Cross validation split index must be -1 for a non cross validation run, " f"found number_of_cross_validation_splits = {self.number_of_cross_validation_splits} " f"and cross_validation_split_index={self.cross_validation_split_index}" ) @property def is_offline_run(self) -> bool: """ Returns True if the run is executing outside AzureML, or False if inside AzureML. """ return is_offline_run_context(RUN_CONTEXT) @property def perform_cross_validation(self) -> bool: """ True if cross validation will be be performed as part of the training procedure. :return: """ return self.number_of_cross_validation_splits > 1 def get_effective_random_seed(self) -> int: """ Returns the random seed set as part of this configuration. If the configuration corresponds to a cross validation split, then the cross validation fold index will be added to the set random seed in order to return the effective random seed. :return: """ seed = self.random_seed if self.perform_cross_validation: # offset the random seed based on the cross validation split index so each # fold has a different initial random state. seed += self.cross_validation_split_index return seed
use_mixed_precision=True) lightning_model = create_lightning_model(config) assert isinstance(lightning_model, SegmentationLightning) pipeline = InferencePipeline(model=lightning_model, model_config=config) image = np.random.uniform(-1, 1, (1, ) + size) result = pipeline.predict_and_post_process_whole_image( image, mask=np.ones(size), voxel_spacing_mm=(1, 1, 1)) # All posteriors and segmentations must have the size of the input image for p in [*result.posteriors, result.segmentation]: assert p.shape == size # Check that all results are not NaN. In particular, if stride size is not adjusted # correctly, the results would be partially NaN. image_util.check_array_range(p) @pytest.mark.skipif(is_windows(), reason="Too slow on windows") def test_inference_on_too_small_image() -> None: """ Running inference on a simplified Unet model when the input image is too small along an axis. """ with pytest.raises(ValueError) as ex: run_inference_on_unet((5, 10, 64)) assert "input image must have at least a size of (16, 16, 16)" in str(ex) @pytest.mark.skipif(is_windows(), reason="Too slow on windows") @pytest.mark.parametrize("size", [(26, 20, 50), (16, 16, 16)]) def test_inference_on_small_image(size: TupleInt3) -> None: """ Test case for a failure at test time: Inference failed when the image was smaller than the test_crop_size. Try with different size, one that has
def test_train_classification_model( test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of classification models, asserting on the individual results from training and testing. Expected test results are stored for GPU with and without mixed precision. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting() config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=Path(test_output_dirs.root_dir)) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 model_training_result = model_training.model_train( config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167] expected_val_loss = [0.737061, 0.736691, 0.736321, 0.735952] # Ensure that all metrics are computed on both training and validation set assert len( model_training_result.train_results_per_epoch) == config.num_epochs assert len( model_training_result.val_results_per_epoch) == config.num_epochs assert len(model_training_result.train_results_per_epoch[0]) >= 11 assert len(model_training_result.val_results_per_epoch[0]) >= 11 for metric in [ MetricType.ACCURACY_AT_THRESHOLD_05, MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD, MetricType.AREA_UNDER_PR_CURVE, MetricType.AREA_UNDER_ROC_CURVE, MetricType.CROSS_ENTROPY, MetricType.LOSS, # For unknown reasons, we don't get seconds_per_batch for the training data. # MetricType.SECONDS_PER_BATCH, MetricType.SECONDS_PER_EPOCH, MetricType.SUBJECT_COUNT, ]: assert metric.value in model_training_result.train_results_per_epoch[ 0], f"{metric.value} not in training" assert metric.value in model_training_result.val_results_per_epoch[ 0], f"{metric.value} not in validation" actual_train_loss = model_training_result.get_metric( is_training=True, metric_type=MetricType.LOSS.value) actual_val_loss = model_training_result.get_metric( is_training=False, metric_type=MetricType.LOSS.value) actual_lr = model_training_result.get_metric( is_training=True, metric_type=MetricType.LEARNING_RATE.value) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6), "Training loss" assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6), "Validation loss" assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5), "Learning rates" test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) expected_metrics = [0.636085, 0.735952] assert test_results.metrics.values()[MetricType.CROSS_ENTROPY.value] == \ pytest.approx(expected_metrics, abs=1e-5) # Run detailed logs file check only on CPU, it will contain slightly different metrics on GPU, but here # we want to mostly assert that the files look reasonable if machine_has_gpu: return # Check epoch_metrics.csv epoch_metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / EPOCH_METRICS_FILE_NAME # Auto-format will break the long header line, hence the strange way of writing it! expected_epoch_metrics = \ "loss,cross_entropy,accuracy_at_threshold_05,seconds_per_epoch,learning_rate," + \ "area_under_roc_curve,area_under_pr_curve,accuracy_at_optimal_threshold," \ "false_positive_rate_at_optimal_threshold,false_negative_rate_at_optimal_threshold," \ "optimal_threshold,subject_count,epoch,cross_validation_split_index\n" + \ """0.6866141557693481,0.6866141557693481,0.5,0,0.0001,1.0,1.0,0.5,0.0,0.0,0.529514,2.0,0,-1 0.6864652633666992,0.6864652633666992,0.5,0,9.999712322065557e-05,1.0,1.0,0.5,0.0,0.0,0.529475,2.0,1,-1 0.6863163113594055,0.6863162517547607,0.5,0,9.999306876841536e-05,1.0,1.0,0.5,0.0,0.0,0.529437,2.0,2,-1 0.6861673593521118,0.6861673593521118,0.5,0,9.998613801725043e-05,1.0,1.0,0.5,0.0,0.0,0.529399,2.0,3,-1 """ # We cannot compare columns like "seconds_per_epoch" because timing will obviously vary between machines. # Column must still be present, though. check_log_file(epoch_metrics_path, expected_epoch_metrics, ignore_columns=[LoggingColumns.SecondsPerEpoch.value]) # Check metrics.csv: This contains the per-subject per-epoch model outputs # Randomization comes out slightly different on Windows, hence only execute the test on Linux if common_util.is_windows(): return metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / SUBJECT_METRICS_FILE_NAME metrics_expected = \ """prediction_target,epoch,subject,model_output,label,cross_validation_split_index,data_split Default,0,S2,0.5295137763023376,1.0,-1,Train Default,0,S4,0.5216594338417053,0.0,-1,Train Default,1,S4,0.5214819312095642,0.0,-1,Train Default,1,S2,0.5294750332832336,1.0,-1,Train Default,2,S2,0.5294366478919983,1.0,-1,Train Default,2,S4,0.5213046073913574,0.0,-1,Train Default,3,S2,0.5293986201286316,1.0,-1,Train Default,3,S4,0.5211275815963745,0.0,-1,Train """ check_log_file(metrics_path, metrics_expected, ignore_columns=[]) # Check log METRICS_FILE_NAME inside of the folder epoch_004/Train, which is written when we run model_test. # Normally, we would run it on the Test and Val splits, but for convenience we test on the train split here. inference_metrics_path = config.outputs_folder / get_epoch_results_path(ModelExecutionMode.TRAIN) / \ SUBJECT_METRICS_FILE_NAME inference_metrics_expected = \ """prediction_target,subject,model_output,label,cross_validation_split_index,data_split Default,S2,0.5293986201286316,1.0,-1,Train Default,S4,0.5211275815963745,0.0,-1,Train """ check_log_file(inference_metrics_path, inference_metrics_expected, ignore_columns=[])
class DeepLearningConfig(GenericConfig, CudaAwareConfig): """ A class that holds all settings that are shared across segmentation models and regression/classification models. """ _model_category: ModelCategory = param.ClassSelector( class_=ModelCategory, doc="The high-level model category described by this config.") _model_name: str = param.String( None, doc="The human readable name of the model (for example, Liver). This is " "usually set from the class name.") random_seed: int = param.Integer( 42, doc="The seed to use for all random number generators.") azure_dataset_id: str = param.String( doc= "If provided, the ID of the dataset to use. This dataset must exist as a " "folder of the same name in the 'datasets' " "container in the datasets storage account.") local_dataset: Optional[Path] = param.ClassSelector( class_=Path, default=None, allow_None=True, doc="The path of the dataset to use, when training is running " "outside Azure.") num_dataload_workers: int = param.Integer( 8, bounds=(0, None), doc="The number of data loading workers (processes). When set to 0," "data loading is running in the same process (no process startup " "cost, hence good for use in unit testing. However, it " "does not give the same result as running with 1 worker process)") shuffle: bool = param.Boolean( True, doc="If true, the dataset will be shuffled randomly during training.") num_epochs: int = param.Integer(100, bounds=(1, None), doc="Number of epochs to train.") start_epoch: int = param.Integer( 0, bounds=(0, None), doc="The first epoch to train. Set to 0 to start a new " "training. Set to a value larger than zero for starting" " from a checkpoint.") l_rate: float = param.Number(1e-4, doc="The initial learning rate", bounds=(0, None)) _min_l_rate: float = param.Number( 0.0, doc= "The minimum learning rate for the Polynomial and Cosine schedulers.", bounds=(0.0, None)) l_rate_scheduler: LRSchedulerType = param.ClassSelector( default=LRSchedulerType.Polynomial, class_=LRSchedulerType, instantiate=False, doc="Learning rate decay method (Cosine, Polynomial, " "Step, MultiStep or Exponential)") l_rate_exponential_gamma: float = param.Number( 0.9, doc="Controls the rate of decay for the Exponential " "LR scheduler.") l_rate_step_gamma: float = param.Number( 0.1, doc="Controls the rate of decay for the " "Step LR scheduler.") l_rate_step_step_size: int = param.Integer( 50, bounds=(0, None), doc="The step size for Step LR scheduler") l_rate_multi_step_gamma: float = param.Number( 0.1, doc="Controls the rate of decay for the " "MultiStep LR scheduler.") l_rate_multi_step_milestones: Optional[List[int]] = param.List( None, bounds=(1, None), allow_None=True, class_=int, doc="The milestones for MultiStep decay.") l_rate_polynomial_gamma: float = param.Number( 1e-4, doc="Controls the rate of decay for the " "Polynomial LR scheduler.") l_rate_warmup: LRWarmUpType = param.ClassSelector( default=LRWarmUpType.NoWarmUp, class_=LRWarmUpType, instantiate=False, doc="The type of learning rate warm up to use. " "Can be NoWarmUp (default) or Linear.") l_rate_warmup_epochs: int = param.Integer( 0, bounds=(0, None), doc="Number of warmup epochs (linear warmup) before the " "scheduler starts decaying the learning rate. " "For example, if you are using MultiStepLR with " "milestones [50, 100, 200] and warmup epochs = 100, warmup " "will last for 100 epochs and the first decay of LR " "will happen on epoch 150") optimizer_type: OptimizerType = param.ClassSelector( default=OptimizerType.Adam, class_=OptimizerType, instantiate=False, doc="The optimizer_type to use") opt_eps: float = param.Number( 1e-4, doc="The epsilon parameter of RMSprop or Adam") rms_alpha: float = param.Number(0.9, doc="The alpha parameter of RMSprop") adam_betas: TupleFloat2 = param.NumericTuple( (0.9, 0.999), length=2, doc="The betas parameter of Adam, default is (0.9, 0.999)") momentum: float = param.Number( 0.6, doc="The momentum parameter of the optimizers") weight_decay: float = param.Number( 1e-4, doc="The weight decay used to control L2 regularization") save_start_epoch: int = param.Integer( 100, bounds=(0, None), doc="Save epoch checkpoints only when epoch is " "larger or equal to this value.") save_step_epochs: int = param.Integer( 50, bounds=(0, None), doc="Save epoch checkpoints when epoch number is a " "multiple of save_step_epochs") train_batch_size: int = param.Integer( 4, bounds=(0, None), doc="The number of crops that make up one minibatch during training.") detect_anomaly: bool = param.Boolean( False, doc="If true, test gradients for anomalies (NaN or Inf) during " "training.") use_mixed_precision: bool = param.Boolean( False, doc="If true, mixed precision training is activated during " "training.") use_model_parallel: bool = param.Boolean( False, doc="If true, neural network model is partitioned across all " "available GPUs to fit in a large model. It shall not be used " "together with data parallel.") test_diff_epochs: Optional[int] = param.Integer( None, doc="Number of different epochs of the same model to test", allow_None=True) test_step_epochs: Optional[int] = param.Integer( None, doc="How many epochs to move for each test", allow_None=True) test_start_epoch: Optional[int] = param.Integer( None, doc="The first epoch on which testing should run.", allow_None=True) monitoring_interval_seconds: int = param.Integer( 0, doc="Seconds delay between logging GPU/CPU resource " "statistics. If 0 or less, do not log any resource " "statistics.") number_of_cross_validation_splits: int = param.Integer( 0, bounds=(0, None), doc="Number of cross validation splits for k-fold cross " "validation") cross_validation_split_index: int = param.Integer( DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, bounds=(-1, None), doc="The index of the cross validation fold this model is " "associated with when performing k-fold cross validation") file_system_config: DeepLearningFileSystemConfig = param.ClassSelector( default=DeepLearningFileSystemConfig(), class_=DeepLearningFileSystemConfig, instantiate=False, doc="File system related configs") pin_memory: bool = param.Boolean( True, doc="Value of pin_memory argument to DataLoader") _overrides: Dict[str, Any] = param.Dict( instantiate=True, doc="Model config properties that were overridden from the commandline" ) restrict_subjects: Optional[str] = \ param.String(doc="Use at most this number of subjects for train, val, or test set (must be > 0 or None). " "If None, do not modify the train, val, or test sets. If a string of the form 'i,j,k' where " "i, j and k are integers, modify just the corresponding sets (i for train, j for val, k for " "test). If any of i, j or j are missing or are negative, do not modify the corresponding " "set. Thus a value of 20,,5 means limit training set to 20, keep validation set as is, and " "limit test set to 5. If any of i,j,k is '+', discarded members of the other sets are added " "to that set.", allow_None=True) perform_training_set_inference: bool = \ param.Boolean(False, doc="If False (default), run full image inference on validation and test set after training. If " "True, also run full image inference on the training set") perform_validation_and_test_set_inference: bool = \ param.Boolean(True, doc="If True (default), run full image inference on validation and test set after training.") _metrics_data_frame_loggers: MetricsDataframeLoggers = param.ClassSelector( default=None, class_=MetricsDataframeLoggers, instantiate=False, doc="Data frame loggers for this model " "config") _dataset_data_frame: Optional[DataFrame] = \ param.DataFrame(default=None, doc="The dataframe that contains the dataset for the model. This is usually read from disk " "from dataset.csv") _use_gpu: Optional[bool] = param.Boolean( None, doc="If true, a CUDA capable GPU with at least 1 device is " "available. If None, the use_gpu property has not yet been called.") avoid_process_spawn_in_data_loaders: bool = \ param.Boolean(is_windows(), doc="If True, use a data loader logic that avoid spawning new processes at the " "start of each epoch. This speeds up training on both Windows and Linux, but" "on Linux, inference is currently disabled as the data loaders hang. " "If False, use the default data loader logic that starts new processes for " "each epoch.") # The default multiprocessing start_method in both PyTorch and the Python standard library is "fork" for Linux and # "spawn" (the only available method) for Windows. There is some evidence that using "forkserver" on Linux # can reduce the chance of stuck jobs. multiprocessing_start_method: MultiprocessingStartMethod = \ param.ClassSelector(class_=MultiprocessingStartMethod, default=(MultiprocessingStartMethod.spawn if is_windows() else MultiprocessingStartMethod.fork), doc="Method to be used to start child processes in pytorch. Should be one of forkserver, " "fork or spawn. If not specified, fork is used on Linux and spawn on Windows. " "Set to forkserver as a possible remedy for stuck jobs.") output_to: Optional[str] = \ param.String(default=None, doc="If provided, the run outputs will be written to the given folder. If not provided, outputs " "will go into a subfolder of the project root folder.") max_batch_grad_cam: int = param.Integer( default=0, doc="Max number of validation batches for which " "to save gradCam images. By default " "visualizations are saved for all images " "in the validation set") label_smoothing_eps: float = param.Number( 0.0, bounds=(0.0, 1.0), doc="Target smoothing value for label smoothing") log_to_parent_run: bool = param.Boolean( default=False, doc="If true, hyperdrive child runs will log their metrics" "to their parent run.") use_imbalanced_sampler_for_training: bool = param.Boolean( default=False, doc="If True, use an imbalanced sampler during training.") drop_last_batch_in_training: bool = param.Boolean( default=False, doc="If True, drop the last incomplete batch during" "training. If all batches are complete, no batch gets " "dropped. If False, keep all batches.") log_summaries_to_files: bool = param.Boolean( default=True, doc= "If True, model summaries are logged to files in logs/model_summaries; " "if False, to stdout or driver log") mean_teacher_alpha: float = param.Number( bounds=(0, 1), allow_None=True, default=None, doc="If this value is set, the mean teacher model will be computed. " "Currently only supported for scalar models. In this case, we only " "report metrics and cross-validation results for " "the mean teacher model. Likewise the model used for inference " "is the mean teacher model. The student model is only used for " "training. Alpha is the momentum term for weight updates of the mean " "teacher model. After each training step the mean teacher model " "weights are updated using mean_teacher_" "weight = alpha * (mean_teacher_weight) " " + (1-alpha) * (current_student_weights). ") def __init__(self, **params: Any) -> None: self._model_name = type(self).__name__ # This should be annotated as torch.utils.data.Dataset, but we don't want to import torch here. self._datasets_for_training: Optional[Dict[ModelExecutionMode, Any]] = None self._datasets_for_inference: Optional[Dict[ModelExecutionMode, Any]] = None super().__init__(throw_if_unknown_param=True, **params) logging.info("Creating the default output folder structure.") self.create_filesystem(fixed_paths.repository_root_directory()) def validate(self) -> None: """ Validates the parameters stored in the present object. """ if len(self.adam_betas) < 2: raise ValueError( "The adam_betas parameter should be the coefficients used for computing running averages of " "gradient and its square") if self.azure_dataset_id is None and self.local_dataset is None: raise ValueError( "Either of local_dataset or azure_dataset_id must be set.") if self.number_of_cross_validation_splits == 1: raise ValueError( f"At least two splits required to perform cross validation found " f"number_of_cross_validation_splits={self.number_of_cross_validation_splits}" ) if 0 < self.number_of_cross_validation_splits <= self.cross_validation_split_index: raise ValueError( f"Cross validation split index is out of bounds: {self.cross_validation_split_index}, " f"which is invalid for CV with {self.number_of_cross_validation_splits} splits." ) elif self.number_of_cross_validation_splits == 0 and self.cross_validation_split_index != -1: raise ValueError( f"Cross validation split index must be -1 for a non cross validation run, " f"found number_of_cross_validation_splits = {self.number_of_cross_validation_splits} " f"and cross_validation_split_index={self.cross_validation_split_index}" ) if self.l_rate_scheduler == LRSchedulerType.MultiStep: if not self.l_rate_multi_step_milestones: raise ValueError( "Must specify l_rate_multi_step_milestones to use LR scheduler MultiStep" ) if sorted(set(self.l_rate_multi_step_milestones) ) != self.l_rate_multi_step_milestones: raise ValueError( "l_rate_multi_step_milestones must be a strictly increasing list" ) if self.l_rate_multi_step_milestones[0] <= 0: raise ValueError( "l_rate_multi_step_milestones cannot be negative or 0.") @property def model_name(self) -> str: """ Gets the human readable name of the model (e.g., Liver). This is usually set from the class name. :return: A model name as a string. """ return self._model_name @property def model_category(self) -> ModelCategory: """ Gets the high-level model category that this configuration objects represents (segmentation or scalar output). """ return self._model_category @property def is_segmentation_model(self) -> bool: """ Returns True if the present model configuration belongs to the high-level category ModelCategory.Segmentation. """ return self.model_category == ModelCategory.Segmentation @property def is_scalar_model(self) -> bool: """ Returns True if the present model configuration belongs to the high-level category ModelCategory.Scalar i.e. for Classification or Regression models. """ return self.model_category.is_scalar @property def compute_grad_cam(self) -> bool: return self.max_batch_grad_cam > 0 @property def min_l_rate(self) -> float: return self._min_l_rate @min_l_rate.setter def min_l_rate(self, value: float) -> None: if value > self.l_rate: raise ValueError( "l_rate must be >= min_l_rate, found: {}, {}".format( self.l_rate, value)) self._min_l_rate = value @property def outputs_folder(self) -> Path: """Gets the full path in which the model outputs should be stored.""" return self.file_system_config.outputs_folder @property def logs_folder(self) -> Path: """Gets the full path in which the model logs should be stored.""" return self.file_system_config.logs_folder @property def checkpoint_folder(self) -> str: """Gets the full path in which the model checkpoints should be stored during training.""" return str(self.outputs_folder / CHECKPOINT_FOLDER) @property def visualization_folder(self) -> Path: """Gets the full path in which the visualizations notebooks should be saved during training.""" return self.outputs_folder / VISUALIZATION_FOLDER @property def perform_cross_validation(self) -> bool: """ True if cross validation will be be performed as part of the training procedure. :return: """ return self.number_of_cross_validation_splits > 1 @property def overrides(self) -> Optional[Dict[str, Any]]: return self._overrides @property def dataset_data_frame(self) -> Optional[DataFrame]: """ Gets the pandas data frame that the model uses. :return: """ return self._dataset_data_frame @dataset_data_frame.setter def dataset_data_frame(self, data_frame: Optional[DataFrame]) -> None: """ Sets the pandas data frame that the model uses. :param data_frame: The data frame to set. """ self._dataset_data_frame = data_frame @property def metrics_data_frame_loggers(self) -> MetricsDataframeLoggers: """ Gets the metrics data frame loggers for this config. :return: """ return self._metrics_data_frame_loggers def set_output_to(self, output_to: PathOrString) -> None: """ Adjusts the file system settings in the present object such that all outputs are written to the given folder. :param output_to: The absolute path to a folder that should contain the outputs. """ if isinstance(output_to, Path): output_to = str(output_to) self.output_to = output_to self.create_filesystem() def create_filesystem( self, project_root: Path = fixed_paths.repository_root_directory() ) -> None: """ Creates new file system settings (outputs folder, logs folder) based on the information stored in the present object. If any of the folders do not yet exist, they are created. :param project_root: The root folder for the codebase that triggers the training run. """ self.file_system_config = DeepLearningFileSystemConfig.create( project_root=project_root, model_name=self.model_name, is_offline_run=self.is_offline_run, output_to=self.output_to) def create_dataframe_loggers(self) -> None: """ Initializes the metrics loggers that are stored in self._metrics_data_frame_loggers :return: """ self._metrics_data_frame_loggers = MetricsDataframeLoggers( outputs_folder=self.outputs_folder) def should_load_checkpoint_for_training(self) -> bool: """Returns true if start epoch > 0, that is, if an existing checkpoint is used to continue training.""" return self.start_epoch > 0 def should_save_epoch(self, epoch: int) -> bool: """Returns True if the present epoch should be saved, as per the save_start_epoch and save_step_epochs settings. Epoch writing starts with the first epoch that is >= save_start_epoch, and that is evenly divisible by save_step_epochs. A checkpoint is always written for the last epoch (num_epochs), such that it is easy to overwrite num_epochs on the commandline without having to change the test parameters at the same time. :param epoch: The current epoch. The first epoch is assumed to be 1.""" should_save_epoch = epoch >= self.save_start_epoch \ and epoch % self.save_step_epochs == 0 is_last_epoch = epoch == self.num_epochs return should_save_epoch or is_last_epoch def get_train_epochs(self) -> List[int]: """ Returns the epochs for which training will be performed. :return: """ return list(range(self.start_epoch + 1, self.num_epochs + 1)) def get_total_number_of_training_epochs(self) -> int: """ Returns the number of epochs for which a model will be trained. :return: """ return len(self.get_train_epochs()) def get_total_number_of_save_epochs(self) -> int: """ Returns the number of epochs for which a model checkpoint will be saved. :return: """ return len( list(filter(self.should_save_epoch, self.get_train_epochs()))) def get_total_number_of_validation_epochs(self) -> int: """ Returns the number of epochs for which a model will be validated. :return: """ return self.get_total_number_of_training_epochs() def get_test_epochs(self) -> List[int]: """ Returns the list of epochs for which the model should be evaluated on full images in the test set. These are all epochs starting at self.test_start_epoch, in intervals of self.n_steps_epoch. The last training epoch is always included. If either of the self.test_* fields is missing (set to None), only the last training epoch is returned. :return: """ test_epochs = {self.num_epochs} if self.test_diff_epochs is not None and self.test_start_epoch is not None and \ self.test_step_epochs is not None: for j in range(self.test_diff_epochs): epoch = self.test_start_epoch + self.test_step_epochs * j if epoch > self.num_epochs: break test_epochs.add(epoch) return sorted(test_epochs) def get_path_to_checkpoint(self, epoch: int) -> Path: """ Returns full path to a checkpoint given an epoch :param epoch: the epoch number :param for_mean_teacher_model: if True looking returns path to the mean teacher checkpoint. Else returns the path to the (main / student) model checkpoint. :return: path to a checkpoint given an epoch """ return create_checkpoint_path( path=fixed_paths.repository_root_directory() / self.checkpoint_folder, epoch=epoch) def get_effective_random_seed(self) -> int: """ Returns the random seed set as part of this configuration. If the configuration corresponds to a cross validation split, then the cross validation fold index will be added to the set random seed in order to return the effective random seed. :return: """ seed = self.random_seed if self.perform_cross_validation: # offset the random seed based on the cross validation split index so each # fold has a different initial random state. seed += self.cross_validation_split_index return seed @property # type: ignore def use_gpu(self) -> bool: # type: ignore """ Returns True if a CUDA capable GPU is present and should be used, False otherwise. """ if self._use_gpu is None: # Use a local import here because we don't want the whole file to depend on pytorch. from InnerEye.ML.utils.ml_util import is_gpu_available self._use_gpu = is_gpu_available() return self._use_gpu @use_gpu.setter def use_gpu(self, value: bool) -> None: """ Sets the flag that controls the use of the GPU. Raises a ValueError if the value is True, but no GPU is present. """ if value: # Use a local import here because we don't want the whole file to depend on pytorch. from InnerEye.ML.utils.ml_util import is_gpu_available if not is_gpu_available(): raise ValueError( "Can't set use_gpu to True if there is not CUDA capable GPU present." ) self._use_gpu = value @property def use_data_parallel(self) -> bool: """ Data parallel is used if GPUs are usable and the number of CUDA devices are greater than 1. :return: """ _devices = self.get_cuda_devices() return _devices is not None and len(_devices) > 1 def write_args_file(self, root: Optional[Path] = None) -> None: """ Writes the current config to disk. The file is written either to the given folder, or if omitted, to the default outputs folder. """ dst = (root or self.outputs_folder) / ARGS_TXT dst.write_text(data=str(self)) def should_wait_for_other_cross_val_child_runs(self) -> bool: """ Returns True if the current run is an online run and is the 0th cross validation split. In this case, this will be the run that will wait for all other child runs to finish in order to aggregate their results. :return: """ return ( not self.is_offline_run) and self.cross_validation_split_index == 0 @property def is_offline_run(self) -> bool: """ Returns True if the run is executing outside AzureML, or False if inside AzureML. """ return is_offline_run_context(RUN_CONTEXT) @property def compute_mean_teacher_model(self) -> bool: """ Returns True if the mean teacher model should be computed. """ return self.mean_teacher_alpha is not None def __str__(self) -> str: """Returns a string describing the present object, as a list of key == value pairs.""" arguments_str = "\nArguments:\n" property_dict = vars(self) keys = sorted(property_dict) for key in keys: arguments_str += "\t{:18}: {}\n".format(key, property_dict[key]) return arguments_str
from InnerEye.Azure.azure_util import is_running_on_azure_agent from InnerEye.Common.common_util import is_windows from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path from InnerEye.Common.output_directories import OutputFolderForTests from InnerEye.ML.config import SegmentationModelBase, equally_weighted_classes from InnerEye.ML.dataset.sample import PatientMetadata, Sample from InnerEye.ML.plotting import resize_and_save, scan_with_transparent_overlay from InnerEye.ML.utils import io_util from InnerEye.ML.utils.image_util import get_unit_image_header from InnerEye.ML.utils.io_util import load_nifti_image from InnerEye.ML.utils.ml_util import set_random_seed from InnerEye.ML.visualizers.patch_sampling import visualize_random_crops from Tests.ML.util import assert_binary_files_match, assert_file_exists @pytest.mark.skipif(is_windows(), reason="Plotting output is not consistent across platforms.") @pytest.mark.parametrize("labels_to_boundary", [True, False]) def test_visualize_patch_sampling(test_output_dirs: OutputFolderForTests, labels_to_boundary: bool) -> None: """ Tests if patch sampling and producing diagnostic images works as expected. :param test_output_dirs: :param labels_to_boundary: If true, the ground truth labels are placed close to the image boundary, so that crops have to be adjusted inwards. If false, ground truth labels are all far from the image boundaries. """ set_random_seed(0) shape = (10, 30, 30) foreground_classes = ["fg"] class_weights = equally_weighted_classes(foreground_classes) config = SegmentationModelBase(should_validate=False, crop_size=(2, 10, 10),
assert "Combination of input arguments is not recognized" in str(ex) def compare_files(actual: List[Path], expected: List[str]) -> None: assert len(actual) == len(expected) for (f, e) in zip(actual, expected): assert f.exists() full_expected = full_ml_test_data_path(e) assert full_expected.exists() assert str(f).endswith(e) # To update the stored results, uncomment this line: # full_expected.write_bytes(f.read_bytes()) assert file_as_bytes(f) == file_as_bytes(full_expected) @pytest.mark.skipif(common_util.is_windows(), reason="Rendering of the graph is slightly different on Linux") def test_plot_normalization_result(test_output_dirs: OutputFolderForTests) -> None: """ Tests plotting of before/after histograms in photometric normalization. :return: """ size = (3, 3, 3) image = np.zeros((1,) + size) for i, (z, y, x) in enumerate(itertools.product(range(size[0]), range(size[1]), range(size[2]))): image[0, z, y, x] = i labels = np.zeros((2,) + size) labels[1, 1, 1, 1] = 1 sample = Sample( image=image, labels=labels, mask=np.ones(size),
from InnerEye.Common.output_directories import OutputFolderForTests from InnerEye.ML.common import ModelExecutionMode from InnerEye.ML.configs.classification.DummyMulticlassClassification import DummyMulticlassClassification from InnerEye.ML.configs.classification.GlaucomaPublic import GlaucomaPublic from InnerEye.ML.dataset.scalar_dataset import ScalarDataset from InnerEye.ML.metrics_dict import MetricsDict from InnerEye.ML.reports.classification_report import ReportedScalarMetrics, get_correct_and_misclassified_examples, \ get_image_filepath_from_subject_id, get_image_labels_from_subject_id, get_image_outputs_from_subject_id, \ get_k_best_and_worst_performing, get_labels_and_predictions, get_metric, get_metrics_table_for_prediction_target, \ plot_image_from_filepath from InnerEye.ML.reports.notebook_report import generate_classification_crossval_notebook, \ generate_classification_notebook from InnerEye.ML.scalar_config import ScalarModelBase @pytest.mark.skipif(is_windows(), reason="Random timeout errors on windows.") def test_generate_classification_report( test_output_dirs: OutputFolderForTests) -> None: reports_folder = Path(__file__).parent test_metrics_file = reports_folder / "test_metrics_classification.csv" val_metrics_file = reports_folder / "val_metrics_classification.csv" config = ScalarModelBase(label_value_column="label", image_file_column="filePath", subject_column="subject") config.local_dataset = test_output_dirs.root_dir / "dataset" config.local_dataset.mkdir() dataset_csv = config.local_dataset / "dataset.csv" image_file_name = "image.npy" dataset_csv.write_text("subject,filePath,label\n" f"0,0_{image_file_name},0\n"
def _test_model_train(output_dirs: OutputFolderForTests, image_channels: Any, ground_truth_ids: Any, no_mask_channel: bool = False) -> None: def _check_patch_centers(diagnostics_per_epoch: List[np.ndarray], should_equal: bool) -> None: patch_centers_epoch1 = diagnostics_per_epoch[0] assert len( diagnostics_per_epoch ) > 1, "Not enough data to check patch centers, need at least 2" for diagnostic in diagnostics_per_epoch[1:]: assert np.array_equal(patch_centers_epoch1, diagnostic) == should_equal def _check_voxel_count(results_per_epoch: List[Dict[str, float]], expected_voxel_count_per_epoch: List[float], prefix: str) -> None: assert len(results_per_epoch) == len(expected_voxel_count_per_epoch) for epoch, (results, voxel_count) in enumerate( zip(results_per_epoch, expected_voxel_count_per_epoch)): # In the test data, both structures "region" and "region_1" are read from the same nifti file, hence # their voxel counts must be identical. for structure in ["region", "region_1"]: assert results[f"{MetricType.VOXEL_COUNT.value}/{structure}"] == pytest.approx(voxel_count, abs=1e-2), \ f"{prefix} voxel count mismatch for '{structure}' epoch {epoch}" def _mean(a: List[float]) -> float: return sum(a) / len(a) def _mean_list(lists: List[List[float]]) -> List[float]: return list(map(_mean, lists)) logging_to_stdout(log_level=logging.DEBUG) train_config = DummyModel() train_config.local_dataset = base_path train_config.set_output_to(output_dirs.root_dir) train_config.image_channels = image_channels train_config.ground_truth_ids = ground_truth_ids train_config.mask_id = None if no_mask_channel else train_config.mask_id train_config.random_seed = 42 train_config.class_weights = [0.5, 0.25, 0.25] train_config.store_dataset_sample = no_mask_channel train_config.check_exclusive = False if machine_has_gpu: expected_train_losses = [0.4554231, 0.4550124] expected_val_losses = [0.4553894, 0.4553061] else: expected_train_losses = [0.4554231, 0.4550112] expected_val_losses = [0.4553893, 0.4553061] loss_absolute_tolerance = 1e-6 expected_learning_rates = [train_config.l_rate, 5.3589e-4] model_training_result, _ = model_train_unittest(train_config, output_folder=output_dirs) assert isinstance(model_training_result, StoringLogger) # Check that all metrics from the BatchTimeCallback are present # # TODO: re-enable once the BatchTimeCallback is fixed # for epoch, epoch_results in model_training_result.results_per_epoch.items(): # for prefix in [TRAIN_PREFIX, VALIDATION_PREFIX]: # for metric_type in [BatchTimeCallback.EPOCH_TIME, # BatchTimeCallback.BATCH_TIME + " avg", # BatchTimeCallback.BATCH_TIME + " max", # BatchTimeCallback.EXCESS_LOADING_TIME]: # expected = BatchTimeCallback.METRICS_PREFIX + prefix + metric_type # assert expected in epoch_results, f"Expected {expected} in results for epoch {epoch}" # # Excess loading time can be zero because that only measure batches over the threshold # if metric_type != BatchTimeCallback.EXCESS_LOADING_TIME: # value = epoch_results[expected] # assert isinstance(value, float) # assert value > 0.0, f"Time for {expected} should be > 0" actual_train_losses = model_training_result.get_train_metric( MetricType.LOSS.value) actual_val_losses = model_training_result.get_val_metric( MetricType.LOSS.value) print("actual_train_losses = {}".format(actual_train_losses)) print("actual_val_losses = {}".format(actual_val_losses)) def assert_all_close(metric: str, expected: List[float], **kwargs: Any) -> None: actual = model_training_result.get_train_metric(metric) assert np.allclose( actual, expected, **kwargs ), f"Mismatch for {metric}: Got {actual}, expected {expected}" # check to make sure training batches are NOT all the same across epochs _check_patch_centers(model_training_result.train_diagnostics, should_equal=False) # check to make sure validation batches are all the same across epochs _check_patch_centers(model_training_result.val_diagnostics, should_equal=True) assert_all_close(MetricType.SUBJECT_COUNT.value, [3.0, 3.0]) assert_all_close(MetricType.LEARNING_RATE.value, expected_learning_rates, rtol=1e-6) if is_windows(): # Randomization comes out slightly different on Windows. Skip the rest of the detailed checks. return # Simple regression test: Voxel counts should be the same in both epochs on the validation set, # and be the same across 'region' and 'region_1' because they derive from the same Nifti files. # The following values are read off directly from the results of compute_dice_across_patches in the training loop # This checks that averages are computed correctly, and that metric computers are reset after each epoch. train_voxels = [[82765.0, 83212.0, 82740.0], [82831.0, 82647.0, 83255.0]] val_voxels = [[82765.0, 83212.0], [82765.0, 83212.0]] _check_voxel_count(model_training_result.train_results_per_epoch(), _mean_list(train_voxels), "Train") _check_voxel_count(model_training_result.val_results_per_epoch(), _mean_list(val_voxels), "Val") assert np.allclose(actual_train_losses, expected_train_losses, atol=loss_absolute_tolerance), "Train losses" assert np.allclose(actual_val_losses, expected_val_losses, atol=loss_absolute_tolerance), "Val losses" # Check that the metric we track for Hyperdrive runs is actually written. assert TrackedMetrics.Val_Loss.value.startswith(VALIDATION_PREFIX) tracked_metric = TrackedMetrics.Val_Loss.value[len(VALIDATION_PREFIX):] for val_result in model_training_result.val_results_per_epoch(): assert tracked_metric in val_result # The following values are read off directly from the results of compute_dice_across_patches in the # training loop. Results are slightly different for GPU, hence use a larger tolerance there. dice_tolerance = 1e-3 if machine_has_gpu else 4.5e-4 train_dice_region = [[0.0, 0.0, 0.0], [0.0376, 0.0343, 0.1017]] train_dice_region1 = [[0.4845, 0.4814, 0.4829], [0.4822, 0.4747, 0.4426]] # There appears to be some amount of non-determinism here: When using a tolerance of 1e-4, we get occasional # test failures on Linux in the cloud (not on Windows, not on AzureML) Unclear where it comes from. Even when # failing here, the losses match up to the expected tolerance. assert_all_close("Dice/region", _mean_list(train_dice_region), atol=dice_tolerance) assert_all_close("Dice/region_1", _mean_list(train_dice_region1), atol=dice_tolerance) expected_average_dice = [ _mean(train_dice_region[i] + train_dice_region1[i]) # type: ignore for i in range(len(train_dice_region)) ] assert_all_close("Dice/AverageAcrossStructures", expected_average_dice, atol=dice_tolerance) # check output files/directories assert train_config.outputs_folder.is_dir() assert train_config.logs_folder.is_dir() # Tensorboard event files go into a Lightning subfolder (Pytorch Lightning default) assert (train_config.logs_folder / "Lightning").is_dir() assert len([(train_config.logs_folder / "Lightning").glob("events*")]) == 1 assert train_config.num_epochs == 2 # Checkpoint folder assert train_config.checkpoint_folder.is_dir() actual_checkpoints = list(train_config.checkpoint_folder.rglob("*.ckpt")) assert len( actual_checkpoints) == 1, f"Actual checkpoints: {actual_checkpoints}" assert (train_config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).is_file() assert (train_config.outputs_folder / DATASET_CSV_FILE_NAME).is_file() assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.TRAIN]).is_file() assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.VAL]).is_file() # Path visualization: There should be 3 slices for each of the 2 subjects sampling_folder = train_config.outputs_folder / PATCH_SAMPLING_FOLDER assert sampling_folder.is_dir() assert train_config.show_patch_sampling > 0 assert len(list(sampling_folder.rglob( "*.png"))) == 3 * train_config.show_patch_sampling # # Test for saving of example images assert train_config.example_images_folder.is_dir( ) if train_config.store_dataset_sample else True example_files = list(train_config.example_images_folder.rglob("*.*")) assert len(example_files) == (3 * 2 * 2 if train_config.store_dataset_sample else 0 ) # images x epochs x patients
class DeepLearningConfig(WorkflowParams, DatasetParams, OutputParams, OptimizerParams, TrainerParams, GenericConfig): """ A class that holds all settings that are shared across segmentation models and regression/classification models. """ _model_category: ModelCategory = param.ClassSelector( class_=ModelCategory, doc="The high-level model category described by this config.") num_dataload_workers: int = param.Integer( 2, bounds=(0, None), doc="The number of data loading workers (processes). When set to 0," "data loading is running in the same process (no process startup " "cost, hence good for use in unit testing. However, it " "does not give the same result as running with 1 worker process)") shuffle: bool = param.Boolean( True, doc="If true, the dataset will be shuffled randomly during training.") train_batch_size: int = param.Integer( 4, bounds=(0, None), doc="The number of crops that make up one minibatch during training.") use_model_parallel: bool = param.Boolean( False, doc="If true, neural network model is partitioned across all " "available GPUs to fit in a large model. It shall not be used " "together with data parallel.") pin_memory: bool = param.Boolean( True, doc="Value of pin_memory argument to DataLoader") restrict_subjects: Optional[str] = \ param.String(doc="Use at most this number of subjects for train, val, or test set (must be > 0 or None). " "If None, do not modify the train, val, or test sets. If a string of the form 'i,j,k' where " "i, j and k are integers, modify just the corresponding sets (i for train, j for val, k for " "test). If any of i, j or j are missing or are negative, do not modify the corresponding " "set. Thus a value of 20,,5 means limit training set to 20, keep validation set as is, and " "limit test set to 5. If any of i,j,k is '+', discarded members of the other sets are added " "to that set.", allow_None=True) _dataset_data_frame: Optional[DataFrame] = \ param.DataFrame(default=None, doc="The dataframe that contains the dataset for the model. This is usually read from disk " "from dataset.csv") avoid_process_spawn_in_data_loaders: bool = \ param.Boolean(is_windows(), doc="If True, use a data loader logic that avoid spawning new processes at the " "start of each epoch. This speeds up training on both Windows and Linux, but" "on Linux, inference is currently disabled as the data loaders hang. " "If False, use the default data loader logic that starts new processes for " "each epoch.") max_batch_grad_cam: int = param.Integer( default=0, doc="Max number of validation batches for which " "to save gradCam images. By default " "visualizations are saved for all images " "in the validation set") label_smoothing_eps: float = param.Number( 0.0, bounds=(0.0, 1.0), doc="Target smoothing value for label smoothing") log_to_parent_run: bool = param.Boolean( default=False, doc="If true, hyperdrive child runs will log their metrics" "to their parent run.") use_imbalanced_sampler_for_training: bool = param.Boolean( default=False, doc="If True, use an imbalanced sampler during training.") drop_last_batch_in_training: bool = param.Boolean( default=False, doc="If True, drop the last incomplete batch during" "training. If all batches are complete, no batch gets " "dropped. If False, keep all batches.") log_summaries_to_files: bool = param.Boolean( default=True, doc= "If True, model summaries are logged to files in logs/model_summaries; " "if False, to stdout or driver log") mean_teacher_alpha: float = param.Number( bounds=(0, 1), allow_None=True, default=None, doc="If this value is set, the mean teacher model will be computed. " "Currently only supported for scalar models. In this case, we only " "report metrics and cross-validation results for " "the mean teacher model. Likewise the model used for inference " "is the mean teacher model. The student model is only used for " "training. Alpha is the momentum term for weight updates of the mean " "teacher model. After each training step the mean teacher model " "weights are updated using mean_teacher_" "weight = alpha * (mean_teacher_weight) " " + (1-alpha) * (current_student_weights). ") #: Name of the csv file providing information on the dataset to be used. dataset_csv: str = param.String( DATASET_CSV_FILE_NAME, doc= "Name of the CSV file providing information on the dataset to be used. " "For segmentation models, this file must contain at least the fields: `subject`, `channel`, `filePath`." ) def __init__(self, **params: Any) -> None: self._model_name = type(self).__name__ # This should be annotated as torch.utils.data.Dataset, but we don't want to import torch here. self._datasets_for_training: Optional[Dict[ModelExecutionMode, Any]] = None self._datasets_for_inference: Optional[Dict[ModelExecutionMode, Any]] = None self.recovery_start_epoch = 0 super().__init__(throw_if_unknown_param=True, **params) logging.info("Creating the default output folder structure.") self.create_filesystem(fixed_paths.repository_root_directory()) # Disable the PL progress bar because all InnerEye models have their own console output self.pl_progress_bar_refresh_rate = 0 self.extra_downloaded_run_id: Optional[Any] = None def validate(self) -> None: """ Validates the parameters stored in the present object. """ WorkflowParams.validate(self) OptimizerParams.validate(self) if self.azure_dataset_id is None and self.local_dataset is None: raise ValueError( "Either of local_dataset or azure_dataset_id must be set.") @property def model_category(self) -> ModelCategory: """ Gets the high-level model category that this configuration objects represents (segmentation or scalar output). """ return self._model_category @property def is_segmentation_model(self) -> bool: """ Returns True if the present model configuration belongs to the high-level category ModelCategory.Segmentation. """ return self.model_category == ModelCategory.Segmentation @property def is_scalar_model(self) -> bool: """ Returns True if the present model configuration belongs to the high-level category ModelCategory.Scalar i.e. for Classification or Regression models. """ return self.model_category.is_scalar @property def compute_grad_cam(self) -> bool: return self.max_batch_grad_cam > 0 @property def dataset_data_frame(self) -> Optional[DataFrame]: """ Gets the pandas data frame that the model uses. :return: """ return self._dataset_data_frame @dataset_data_frame.setter def dataset_data_frame(self, data_frame: Optional[DataFrame]) -> None: """ Sets the pandas data frame that the model uses. :param data_frame: The data frame to set. """ self._dataset_data_frame = data_frame def get_train_epochs(self) -> List[int]: """ Returns the epochs for which training will be performed. :return: """ return list(range(self.recovery_start_epoch + 1, self.num_epochs + 1)) def get_total_number_of_training_epochs(self) -> int: """ Returns the number of epochs for which a model will be trained. :return: """ return len(self.get_train_epochs()) def get_total_number_of_validation_epochs(self) -> int: """ Returns the number of epochs for which a model will be validated. :return: """ return self.get_total_number_of_training_epochs() @property def compute_mean_teacher_model(self) -> bool: """ Returns True if the mean teacher model should be computed. """ return self.mean_teacher_alpha is not None def __str__(self) -> str: """Returns a string describing the present object, as a list of key: value strings.""" arguments_str = "\nArguments:\n" # Avoid callable params, the bindings that are printed out can be humongous. # Avoid dataframes skip_params = { name for name, value in self.param.params().items() if isinstance(value, (param.Callable, param.DataFrame)) } for key, value in self.param.get_param_values(): if key not in skip_params: arguments_str += f"\t{key:40}: {value}\n" return arguments_str def load_checkpoint_and_modify(self, path_to_checkpoint: Path) -> Dict[str, Any]: """ By default, uses torch.load to read and return the state dict from the checkpoint file, and does no modification of the checkpoint file. Overloading this function: When weights_url or local_weights_path is set, the file downloaded may not be in the exact format expected by the model's load_state_dict() - for example, pretrained Imagenet weights for networks may have mismatched layer names in different implementations. In such cases, you can overload this function to extract the state dict from the checkpoint. NOTE: The model checkpoint will be loaded using the torch function load_state_dict() with argument strict=False, so extra care needs to be taken to check that the state dict is valid. Check the logs for warnings related to missing and unexpected keys. See https://pytorch.org/tutorials/beginner/saving_loading_models.html#warmstarting-model-using-parameters -from-a-different-model for an explanation on why strict=False is useful when loading parameters from other models. :param path_to_checkpoint: Path to the checkpoint file. :return: Dictionary with model and optimizer state dicts. The dict should have at least the following keys: 1. Key ModelAndInfo.MODEL_STATE_DICT_KEY and value set to the model state dict. 2. Key ModelAndInfo.EPOCH_KEY and value set to the checkpoint epoch. Other (optional) entries corresponding to keys ModelAndInfo.OPTIMIZER_STATE_DICT_KEY and ModelAndInfo.MEAN_TEACHER_STATE_DICT_KEY are also supported. """ return load_checkpoint(path_to_checkpoint=path_to_checkpoint, use_gpu=self.use_gpu)
def _test_model_train(output_dirs: OutputFolderForTests, image_channels: Any, ground_truth_ids: Any, no_mask_channel: bool = False) -> None: def _check_patch_centers(diagnostics_per_epoch: List[np.ndarray], should_equal: bool) -> None: patch_centers_epoch1 = diagnostics_per_epoch[0] assert len( diagnostics_per_epoch ) > 1, "Not enough data to check patch centers, need at least 2" for diagnostic in diagnostics_per_epoch[1:]: assert np.array_equal(patch_centers_epoch1, diagnostic) == should_equal def _check_voxel_count(results_per_epoch: List[Dict[str, float]], expected_voxel_count_per_epoch: List[float], prefix: str) -> None: assert len(results_per_epoch) == len(expected_voxel_count_per_epoch) for epoch, (results, voxel_count) in enumerate( zip(results_per_epoch, expected_voxel_count_per_epoch)): # In the test data, both structures "region" and "region_1" are read from the same nifti file, hence # their voxel counts must be identical. for structure in ["region", "region_1"]: assert results[f"{MetricType.VOXEL_COUNT.value}/{structure}"] == pytest.approx(voxel_count, abs=1e-2), \ f"{prefix} voxel count mismatch for '{structure}' epoch {epoch}" def _mean(a: List[float]) -> float: return sum(a) / len(a) def _mean_list(lists: List[List[float]]) -> List[float]: return list(map(_mean, lists)) logging_to_stdout(log_level=logging.DEBUG) train_config = DummyModel() train_config.local_dataset = base_path train_config.set_output_to(output_dirs.root_dir) train_config.image_channels = image_channels train_config.ground_truth_ids = ground_truth_ids train_config.mask_id = None if no_mask_channel else train_config.mask_id train_config.random_seed = 42 train_config.class_weights = [0.5, 0.25, 0.25] train_config.store_dataset_sample = True train_config.recovery_checkpoint_save_interval = 1 if machine_has_gpu: expected_train_losses = [0.4553468, 0.454904] expected_val_losses = [0.4553881, 0.4553041] else: expected_train_losses = [0.4553469, 0.4548947] expected_val_losses = [0.4553880, 0.4553041] loss_absolute_tolerance = 1e-6 expected_learning_rates = [train_config.l_rate, 5.3589e-4] checkpoint_handler = get_default_checkpoint_handler( model_config=train_config, project_root=Path(output_dirs.root_dir)) model_training_result = model_training.model_train( train_config, checkpoint_handler=checkpoint_handler) assert isinstance(model_training_result, ModelTrainingResults) def assert_all_close(metric: str, expected: List[float], **kwargs: Any) -> None: actual = model_training_result.get_training_metric(metric) assert np.allclose( actual, expected, **kwargs ), f"Mismatch for {metric}: Got {actual}, expected {expected}" # check to make sure training batches are NOT all the same across epochs _check_patch_centers(model_training_result.train_diagnostics, should_equal=False) # check to make sure validation batches are all the same across epochs _check_patch_centers(model_training_result.val_diagnostics, should_equal=True) assert_all_close(MetricType.SUBJECT_COUNT.value, [3.0, 3.0]) assert_all_close(MetricType.LEARNING_RATE.value, expected_learning_rates, rtol=1e-6) if is_windows(): # Randomization comes out slightly different on Windows. Skip the rest of the detailed checks. return # Simple regression test: Voxel counts should be the same in both epochs on the validation set, # and be the same across 'region' and 'region_1' because they derive from the same Nifti files. # The following values are read off directly from the results of compute_dice_across_patches in the training loop # This checks that averages are computed correctly, and that metric computers are reset after each epoch. train_voxels = [[83092.0, 83212.0, 82946.0], [83000.0, 82881.0, 83309.0]] val_voxels = [[82765.0, 83212.0], [82765.0, 83212.0]] _check_voxel_count(model_training_result.train_results_per_epoch, _mean_list(train_voxels), "Train") _check_voxel_count(model_training_result.val_results_per_epoch, _mean_list(val_voxels), "Val") actual_train_losses = model_training_result.get_training_metric( MetricType.LOSS.value) actual_val_losses = model_training_result.get_validation_metric( MetricType.LOSS.value) print("actual_train_losses = {}".format(actual_train_losses)) print("actual_val_losses = {}".format(actual_val_losses)) assert np.allclose(actual_train_losses, expected_train_losses, atol=loss_absolute_tolerance), "Train losses" assert np.allclose(actual_val_losses, expected_val_losses, atol=loss_absolute_tolerance), "Val losses" # Check that the metric we track for Hyperdrive runs is actually written. assert TrackedMetrics.Val_Loss.value.startswith(VALIDATION_PREFIX) tracked_metric = TrackedMetrics.Val_Loss.value[len(VALIDATION_PREFIX):] for val_result in model_training_result.val_results_per_epoch: assert tracked_metric in val_result # The following values are read off directly from the results of compute_dice_across_patches in the # training loop. Results are slightly different for CPU, hence use a larger tolerance there. dice_tolerance = 1e-4 if machine_has_gpu else 4.5e-4 train_dice_region = [[0.0, 0.0, 4.0282e-04], [0.0309, 0.0334, 0.0961]] train_dice_region1 = [[0.4806, 0.4800, 0.4832], [0.4812, 0.4842, 0.4663]] # There appears to be some amount of non-determinism here: When using a tolerance of 1e-4, we get occasional # test failures on Linux in the cloud (not on Windows, not on AzureML) Unclear where it comes from. Even when # failing here, the losses match up to the expected tolerance. assert_all_close("Dice/region", _mean_list(train_dice_region), atol=dice_tolerance) assert_all_close("Dice/region_1", _mean_list(train_dice_region1), atol=dice_tolerance) expected_average_dice = [ _mean(train_dice_region[i] + train_dice_region1[i]) # type: ignore for i in range(len(train_dice_region)) ] assert_all_close("Dice/AverageAcrossStructures", expected_average_dice, atol=dice_tolerance) # check output files/directories assert train_config.outputs_folder.is_dir() assert train_config.logs_folder.is_dir() # Tensorboard event files go into a Lightning subfolder (Pytorch Lightning default) assert (train_config.logs_folder / "Lightning").is_dir() assert len([(train_config.logs_folder / "Lightning").glob("events*")]) == 1 assert train_config.num_epochs == 2 # Checkpoint folder assert train_config.checkpoint_folder.is_dir() actual_checkpoints = list(train_config.checkpoint_folder.rglob("*.ckpt")) assert len( actual_checkpoints) == 2, f"Actual checkpoints: {actual_checkpoints}" assert (train_config.checkpoint_folder / RECOVERY_CHECKPOINT_FILE_NAME_WITH_SUFFIX).is_file() assert (train_config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).is_file() assert (train_config.outputs_folder / DATASET_CSV_FILE_NAME).is_file() assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.TRAIN]).is_file() assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.VAL]).is_file() # Path visualization: There should be 3 slices for each of the 2 subjects sampling_folder = train_config.outputs_folder / PATCH_SAMPLING_FOLDER assert sampling_folder.is_dir() assert train_config.show_patch_sampling > 0 assert len(list(sampling_folder.rglob( "*.png"))) == 3 * train_config.show_patch_sampling # Time per epoch: Test that we have all these times logged. model_training_result.get_training_metric( MetricType.SECONDS_PER_EPOCH.value) model_training_result.get_validation_metric( MetricType.SECONDS_PER_EPOCH.value) model_training_result.get_validation_metric( MetricType.SECONDS_PER_BATCH.value) model_training_result.get_training_metric( MetricType.SECONDS_PER_BATCH.value)
which are provided as input into the computational graph :return: """ loader = cropping_dataset.as_data_loader( shuffle=True, batch_size=2, num_dataload_workers=num_dataload_workers) for i, item in enumerate(loader): item = CroppedSample.from_dict(item) assert item.image.numpy().dtype == ImageDataType.IMAGE.value assert item.labels.numpy().dtype == ImageDataType.SEGMENTATION.value assert item.mask.numpy().dtype == ImageDataType.MASK.value assert item.mask_center_crop.numpy().dtype == ImageDataType.MASK.value assert item.labels_center_crop.numpy( ).dtype == ImageDataType.SEGMENTATION.value @pytest.mark.skipif(common_util.is_windows(), reason="Has issues on windows build") def test_cropping_dataset_padding(cropping_dataset: CroppingDataset, num_dataload_workers: int) -> None: """ Tests the data type of torch tensors (e.g. image, labels, and mask) created by the dataset generator, which are provided as input into the computational graph :return: """ cropping_dataset.args.crop_size = (300, 300, 300) cropping_dataset.args.padding_mode = PaddingMode.Zero loader = cropping_dataset.as_data_loader(shuffle=True, batch_size=2, num_dataload_workers=1) for i, item in enumerate(loader):
from InnerEye.Common.common_util import is_windows from InnerEye.Common.output_directories import TestOutputDirectories from InnerEye.ML.config import SegmentationModelBase, equally_weighted_classes from InnerEye.ML.dataset.sample import PatientMetadata, Sample from InnerEye.ML.plotting import resize_and_save, scan_with_transparent_overlay from InnerEye.ML.utils import io_util from InnerEye.ML.utils.image_util import get_unit_image_header from InnerEye.ML.utils.io_util import load_nifti_image from InnerEye.ML.utils.ml_util import set_random_seed from InnerEye.ML.visualizers.patch_sampling import visualize_random_crops from Tests.ML.util import assert_binary_files_match, assert_file_exists, is_running_on_azure from Tests.fixed_paths_for_tests import full_ml_test_data_path @pytest.mark.skipif( is_windows(), reason="Plotting output is not consistent across platforms.") @pytest.mark.parametrize("labels_to_boundary", [True, False]) def test_visualize_patch_sampling(test_output_dirs: TestOutputDirectories, labels_to_boundary: bool) -> None: """ Tests if patch sampling and producing diagnostic images works as expected. :param test_output_dirs: :param labels_to_boundary: If true, the ground truth labels are placed close to the image boundary, so that crops have to be adjusted inwards. If false, ground truth labels are all far from the image boundaries. """ set_random_seed(0) shape = (10, 30, 30) foreground_classes = ["fg"] class_weights = equally_weighted_classes(foreground_classes) config = SegmentationModelBase(should_validate=False, crop_size=(2, 10, 10),
def check_log_file(path: Path, expected_csv: str, ignore_columns: List[str]) -> None: df_expected = pd.read_csv(StringIO(expected_csv)) df_epoch_metrics_actual = pd.read_csv(path) for ignore_column in ignore_columns: assert ignore_column in df_epoch_metrics_actual # We cannot compare time because in different machines this takes different times del df_epoch_metrics_actual[ignore_column] if ignore_column in df_expected: del df_expected[ignore_column] pd.testing.assert_frame_equal(df_expected, df_epoch_metrics_actual, check_less_precise=True) @pytest.mark.skipif(common_util.is_windows(), reason="Too slow on windows") @pytest.mark.parametrize("model_name", ["DummyClassification", "DummyRegression"]) @pytest.mark.parametrize("number_of_offline_cross_validation_splits", [2]) @pytest.mark.parametrize("number_of_cross_validation_splits_per_fold", [2]) def test_run_ml_with_classification_model( test_output_dirs: TestOutputDirectories, number_of_offline_cross_validation_splits: int, number_of_cross_validation_splits_per_fold: int, model_name: str) -> None: """ Test training and testing of classification models, when it is started together via run_ml. """ logging_to_stdout() azure_config = get_default_azure_config() azure_config.train = True
class WorkflowParams(param.Parameterized): """ This class contains all parameters that affect how the whole training and testing workflow is executed. """ random_seed: int = param.Integer(42, doc="The seed to use for all random number generators.") number_of_cross_validation_splits: int = param.Integer(0, bounds=(0, None), doc="Number of cross validation splits for k-fold cross " "validation") cross_validation_split_index: int = param.Integer(DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, bounds=(-1, None), doc="The index of the cross validation fold this model is " "associated with when performing k-fold cross validation") inference_on_train_set: Optional[bool] = \ param.Boolean(None, doc="If set, enable/disable full image inference on training set after training.") inference_on_val_set: Optional[bool] = \ param.Boolean(None, doc="If set, enable/disable full image inference on validation set after training.") inference_on_test_set: Optional[bool] = \ param.Boolean(None, doc="If set, enable/disable full image inference on test set after training.") ensemble_inference_on_train_set: Optional[bool] = \ param.Boolean(None, doc="If set, enable/disable full image inference on the training set after ensemble training.") ensemble_inference_on_val_set: Optional[bool] = \ param.Boolean(None, doc="If set, enable/disable full image inference on validation set after ensemble training.") ensemble_inference_on_test_set: Optional[bool] = \ param.Boolean(None, doc="If set, enable/disable full image inference on test set after ensemble training.") weights_url: List[str] = param.List(default=[], class_=str, doc="If provided, a set of urls from which checkpoints will be downloaded" "and used for inference.") local_weights_path: List[Path] = param.List(default=[], class_=Path, doc="A list of checkpoints paths to use for inference, " "when the job is running outside Azure.") model_id: str = param.String(default="", doc="A model id string in the form 'model name:version' " "to use a registered model for inference.") generate_report: bool = param.Boolean(default=True, doc="If True (default), write a modelling report in HTML format. If False," "do not write that report.") pretraining_run_recovery_id: str = param.String(default=None, allow_None=True, doc="Extra run recovery id to download checkpoints from," "for custom modules (e.g. for loading pretrained weights)." "The downloaded RunRecovery object will be available in" "pretraining_run_checkpoints.") # The default multiprocessing start_method in both PyTorch and the Python standard library is "fork" for Linux and # "spawn" (the only available method) for Windows. There is some evidence that using "forkserver" on Linux # can reduce the chance of stuck jobs. multiprocessing_start_method: MultiprocessingStartMethod = \ param.ClassSelector(class_=MultiprocessingStartMethod, default=(MultiprocessingStartMethod.spawn if is_windows() else MultiprocessingStartMethod.fork), doc="Method to be used to start child processes in pytorch. Should be one of forkserver, " "fork or spawn. If not specified, fork is used on Linux and spawn on Windows. " "Set to forkserver as a possible remedy for stuck jobs.") monitoring_interval_seconds: int = param.Integer(0, doc="Seconds delay between logging GPU/CPU resource " "statistics. If 0 or less, do not log any resource " "statistics.") regression_test_folder: Optional[Path] = \ param.ClassSelector(class_=Path, default=None, allow_None=True, doc="A path to a folder that contains a set of files. At the end of training and " "model evaluation, all files given in that folder must be present in the job's output " "folder, and their contents must match exactly. When running in AzureML, you need to " "ensure that this folder is part of the snapshot that gets uploaded. The path should " "be relative to the repository root directory.") regression_test_csv_tolerance: float = \ param.Number(default=0.0, allow_None=False, doc="When comparing CSV files during regression tests, use this value as the maximum allowed " "relative difference of actual and expected results. Default: 0.0 (must match exactly)") def validate(self) -> None: if sum([bool(param) for param in [self.weights_url, self.local_weights_path, self.model_id]]) > 1: raise ValueError("Cannot specify more than one of local_weights_path, weights_url or model_id.") if self.model_id: if len(self.model_id.split(":")) != 2: raise ValueError( f"model_id should be in the form 'model_name:version', got {self.model_id}") if self.number_of_cross_validation_splits == 1: raise ValueError("At least two splits required to perform cross validation, but got " f"{self.number_of_cross_validation_splits}. To train without cross validation, set " "number_of_cross_validation_splits=0.") if 0 < self.number_of_cross_validation_splits <= self.cross_validation_split_index: raise ValueError(f"Cross validation split index is out of bounds: {self.cross_validation_split_index}, " f"which is invalid for CV with {self.number_of_cross_validation_splits} splits.") elif self.number_of_cross_validation_splits == 0 and self.cross_validation_split_index != -1: raise ValueError(f"Cross validation split index must be -1 for a non cross validation run, " f"found number_of_cross_validation_splits = {self.number_of_cross_validation_splits} " f"and cross_validation_split_index={self.cross_validation_split_index}") def is_inference_required(self, model_proc: ModelProcessing, data_split: ModelExecutionMode) -> bool: """ Returns True if inference is required for this model_proc (single or ensemble) and data_split (Train/Val/Test). :param model_proc: Whether we are testing an ensemble or single model. :param data_split: Indicates which of the 3 sets (training, test, or validation) is being processed. :return: True if inference required. """ settings = { ModelProcessing.DEFAULT: { ModelExecutionMode.TRAIN: self.inference_on_train_set, ModelExecutionMode.TEST: self.inference_on_test_set, ModelExecutionMode.VAL: self.inference_on_val_set, }, ModelProcessing.ENSEMBLE_CREATION: { ModelExecutionMode.TRAIN: self.ensemble_inference_on_train_set, ModelExecutionMode.TEST: self.ensemble_inference_on_test_set, ModelExecutionMode.VAL: self.ensemble_inference_on_val_set, } } inference_option = settings[model_proc][data_split] if inference_option is not None: return inference_option # Defaults for when to run inference in the absence of any command line switches. # This depends on ModelProcessing, perform_cross_validation, and ModelExecutionMode. # If the current combination of these three parameters is not in this data structure, # then default to False. defaults: Dict[ModelProcessing, Dict[bool, Dict[ModelExecutionMode, bool]]] = { ModelProcessing.DEFAULT: { False: { ModelExecutionMode.TRAIN: False, ModelExecutionMode.VAL: False, ModelExecutionMode.TEST: True, } }, ModelProcessing.ENSEMBLE_CREATION: { True: { ModelExecutionMode.TRAIN: False, ModelExecutionMode.VAL: False, ModelExecutionMode.TEST: True, } } } try: return defaults[model_proc][self.perform_cross_validation][data_split] except KeyError: return False @property def is_offline_run(self) -> bool: """ Returns True if the run is executing outside AzureML, or False if inside AzureML. """ return is_offline_run_context(RUN_CONTEXT) @property def perform_cross_validation(self) -> bool: """ True if cross validation will be be performed as part of the training procedure. :return: """ return self.number_of_cross_validation_splits > 1 def get_effective_random_seed(self) -> int: """ Returns the random seed set as part of this configuration. If the configuration corresponds to a cross validation split, then the cross validation fold index will be added to the set random seed in order to return the effective random seed. :return: """ seed = self.random_seed if self.perform_cross_validation: # offset the random seed based on the cross validation split index so each # fold has a different initial random state. seed += self.cross_validation_split_index return seed