def test_config_str() -> None: """ Check if dataframe fields are omitted from the string conversion of a config object. """ config = DeepLearningConfig() df = DataFrame(columns=["foobar"], data=[1.0, 2.0]) config.dataset_data_frame = df s = str(config) assert "foobar" not in s, f"Incorrect output: {s}"
def test_should_save_epoch(save_start_epoch: int, save_step_epochs: int, num_epochs: int, expected_true: List[int], verify_up_to_epoch: int) -> None: train_config = DeepLearningConfig(save_start_epoch=save_start_epoch, save_step_epochs=save_step_epochs, num_epochs=num_epochs, should_validate=False) for epoch in expected_true: assert train_config.should_save_epoch( epoch), "Epoch {} should be saved".format(epoch) expected_false = set(range(1, verify_up_to_epoch + 1)) - set(expected_true) for epoch in expected_false: assert not train_config.should_save_epoch( epoch), "Epoch {} should not be saved".format(epoch)
def __init__(self, config: DeepLearningConfig, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) self.outputs_folder = config.outputs_folder self.checkpoint_folder = config.checkpoint_folder self.model: DeviceAwareModule = DeviceAwareModule() # These two will be set later in set_optimizer_and_scheduler. # The ddp_spawn accelerator only works if the model configuration object is # not stored in here. Hence, need to do operations that require a full config # in a way that does not require storing the config. self.optimizer: Optional[Optimizer] = None self.l_rate_scheduler: Optional[_LRScheduler] = None self.cross_validation_split_index = config.cross_validation_split_index self.effective_random_seed = config.get_effective_random_seed() # This should be re-assigned on the outside, to a logger that is hooked up with the Trainer object. self.storing_logger = StoringLogger() # This will be initialized correctly in epoch_start self.random_state: Optional[RandomStateSnapshot] = None # training loggers self.train_metrics_folder = self.outputs_folder / ModelExecutionMode.TRAIN.value self.val_metrics_folder = self.outputs_folder / ModelExecutionMode.VAL.value fixed_logger_columns = { LoggingColumns.CrossValidationSplitIndex.value: config.cross_validation_split_index } self.train_epoch_metrics_logger = DataframeLogger( self.train_metrics_folder / EPOCH_METRICS_FILE_NAME, fixed_columns=fixed_logger_columns) self.val_epoch_metrics_logger = DataframeLogger( self.val_metrics_folder / EPOCH_METRICS_FILE_NAME, fixed_columns=fixed_logger_columns) # Stores information the checkpoint that created this model, if any. self.checkpoint_loading_message = ""
def test_get_total_number_of_training_epochs() -> None: c = DeepLearningConfig(num_epochs=2, should_validate=False) assert c.get_total_number_of_training_epochs() == 2 c = DeepLearningConfig(num_epochs=10, should_validate=False) # Fake recovering training c.recovery_start_epoch = 2 assert c.get_total_number_of_training_epochs() == 8
def test_use_gpu_flag(use_gpu_override: bool) -> None: config = DeepLearningConfig(should_validate=False) # On a model that does not have a use_gpu_override, the use_gpu flag should return True exactly when a GPU is # actually present. assert config.use_gpu == machine_has_gpu if machine_has_gpu: # If a GPU is present, the use_gpu flag should exactly return whatever the override says # (we can run in CPU mode even on a GPU) config.use_gpu = use_gpu_override assert config.use_gpu == use_gpu_override else: if use_gpu_override: # We are on a machine without a GPU, but the override says we should use the GPU: fail. with pytest.raises(ValueError) as ex: config.use_gpu = use_gpu_override assert "use_gpu to True if there is not CUDA capable GPU present" in str( ex) else: config.use_gpu = use_gpu_override assert config.use_gpu == use_gpu_override
def crossval_config_from_model_config(train_config: DeepLearningConfig) -> PlotCrossValidationConfig: """ Creates a configuration for plotting cross validation results that populates some key fields from the given model training configuration. :param train_config: :return: """ # Default to the last epoch for segmentation models. For classification models, the epoch does not need to be # specified because datafiles contain results for all epochs. epoch = train_config.num_epochs if train_config.is_segmentation_model else None return PlotCrossValidationConfig( run_recovery_id=None, model_category=train_config.model_category, epoch=epoch, should_validate=False, number_of_cross_validation_splits=train_config.get_total_number_of_cross_validation_runs())
def test_lr_scheduler_with_warmup(warmup_epochs: int, expected_values: List[float]) -> None: """ Check that warmup is applied correctly to a multistep scheduler """ initial_lr = 1 optimizer = torch.optim.Adam([torch.ones(2, 2, requires_grad=True)], lr=initial_lr) config = DeepLearningConfig(l_rate=initial_lr, l_rate_scheduler=LRSchedulerType.MultiStep, l_rate_multi_step_milestones=[2, 4], l_rate_multi_step_gamma=0.5, l_rate_warmup_epochs=warmup_epochs, l_rate_warmup=LRWarmUpType.Linear, should_validate=False) scheduler = SchedulerWithWarmUp(config, optimizer) lrs = enumerate_scheduler(scheduler, 4) assert lrs == expected_values
def visualize_random_crops_for_dataset( config: DeepLearningConfig, output_folder: Optional[Path] = None) -> None: """ For segmentation models only: This function generates visualizations of the effect of sampling random patches for training. Visualizations are stored in both Nifti format, and as 3 PNG thumbnail files, in the output folder. :param config: The model configuration. :param output_folder: The folder in which the visualizations should be written. If not provided, use a subfolder "patch_sampling" in the models's default output folder """ if not isinstance(config, SegmentationModelBase): return dataset_splits = config.get_dataset_splits() # Load a sample using the full image data loader full_image_dataset = FullImageDataset(config, dataset_splits.train) output_folder = output_folder or config.outputs_folder / PATCH_SAMPLING_FOLDER count = min(config.show_patch_sampling, len(full_image_dataset)) for sample_index in range(count): sample = full_image_dataset.get_samples_at_index(index=sample_index)[0] visualize_random_crops(sample, config, output_folder=output_folder)
def get_recovery_path_test(config: DeepLearningConfig, run_recovery: Optional[RunRecovery], epoch: int) -> Optional[List[Path]]: """ Decides the checkpoint path to use for inference/registration. If a run recovery object is used, use the checkpoint from there. If this checkpoint does not exist, or a run recovery object is not supplied, use the checkpoints from the current run. :param config: configuration file :param run_recovery: Optional run recovery object :param epoch: Epoch to recover :return: Constructed checkpoint path to recover from. """ if run_recovery: checkpoint_paths = run_recovery.get_checkpoint_paths(epoch) checkpoint_exists = [] # Discard any checkpoint paths that do not exist - they will make inference/registration fail. # This can happen when some child runs fail; it may still be worth running inference # or registering the model. for path in checkpoint_paths: if path.is_file(): checkpoint_exists.append(path) else: logging.warning(f"Could not recover checkpoint path {path}") if len(checkpoint_exists) > 0: return checkpoint_exists logging.warning( f"Using checkpoints from current run, " f"could not find any run recovery checkpoints for epoch {epoch}") # We found the checkpoint(s) in the run being recovered. If we didn't, it's probably because the epoch # is from the current run, which has been doing more training, so we look for it there. checkpoint_path = config.get_path_to_checkpoint(epoch) if not checkpoint_path.is_file(): logging.warning(f"Could not find checkpoint at path {checkpoint_path}") return None return [checkpoint_path]
def test_validate() -> None: # DeepLearningConfig cannot be initialized with neither of these these parameters set with pytest.raises(ValueError) as ex: DeepLearningConfig(local_dataset=None, azure_dataset_id=None) assert ex.value.args[ 0] == "Either of local_dataset or azure_dataset_id must be set." # The following should be okay DeepLearningConfig(local_dataset=Path("foo")) DeepLearningConfig(azure_dataset_id="bar") # DeepLearningConfig cannot be initialized with both these parameters set with pytest.raises(ValueError) as ex: DeepLearningConfig(local_dataset=Path("foo"), local_weights_path=Path("foo"), weights_url="bar") assert ex.value.args[ 0] == "Cannot specify both local_weights_path and weights_url." # The following should be okay DeepLearningConfig(local_dataset=Path("foo"), local_weights_path=Path("foo")) DeepLearningConfig(local_dataset=Path("foo"), weights_url="bar")
def test_get_total_number_of_training_epochs() -> None: c = DeepLearningConfig(num_epochs=2, should_validate=False) assert c.get_total_number_of_training_epochs() == 2 c = DeepLearningConfig(num_epochs=10, start_epoch=5, should_validate=False) assert c.get_total_number_of_training_epochs() == 5
def test_get_test_epochs() -> None: """ Test if the creation of the list of epochs for model testing will always contain at least the last training epoch. """ c = DeepLearningConfig(num_epochs=2, test_start_epoch=100, test_diff_epochs=2, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [2] c = DeepLearningConfig(num_epochs=100, test_start_epoch=100, test_diff_epochs=2, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [100] c = DeepLearningConfig(num_epochs=150, test_start_epoch=100, test_diff_epochs=2, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [100, 110, 150] c = DeepLearningConfig(num_epochs=100, test_start_epoch=100, test_diff_epochs=0, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [100] c = DeepLearningConfig(num_epochs=100, test_start_epoch=200, test_diff_epochs=None, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [100]
def test_get_test_epochs() -> None: """ Test if the creation of the list of epochs for model testing will always contain at least the last training epoch. """ c = DeepLearningConfig(num_epochs=2, test_start_epoch=100, test_diff_epochs=2, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [2] c = DeepLearningConfig(num_epochs=100, test_start_epoch=100, test_diff_epochs=2, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [100] c = DeepLearningConfig(num_epochs=150, test_start_epoch=100, test_diff_epochs=2, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [100, 110, 150] c = DeepLearningConfig(num_epochs=100, test_start_epoch=100, test_diff_epochs=0, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [100] c = DeepLearningConfig(num_epochs=100, test_start_epoch=200, test_diff_epochs=None, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [100] c = DeepLearningConfig(num_epochs=100, epochs_to_test=[1, 3, 5], should_validate=False) assert c.get_test_epochs() == [1, 3, 5, 100] # epochs_to_test should have precedence over (test_start_epoch, test_diff_epochs and test_step_epochs) c = DeepLearningConfig(num_epochs=150, epochs_to_test=[1, 3, 5], test_start_epoch=100, test_diff_epochs=2, test_step_epochs=10, should_validate=False) assert c.get_test_epochs() == [1, 3, 5, 150]
# ------------------------------------------------------------------------------------------ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. # ------------------------------------------------------------------------------------------ import pytest from InnerEye.ML.deep_learning_config import DeepLearningConfig, TrainerParams from InnerEye.ML.lightning_container import LightningContainer from Tests.ML.util import machine_has_gpu @pytest.mark.cpu_and_gpu @pytest.mark.parametrize( "config", [DeepLearningConfig(should_validate=False), LightningContainer()]) def test_use_gpu_flag(config: TrainerParams) -> None: """ Test that the use_gpu flag is set correctly on both InnerEye configs and containers. This checks for a bug in an earlier version where it was off for containers only. """ # With the default settings, the use_gpu flag should return True exactly when a GPU is # actually present. assert config.use_gpu == machine_has_gpu if machine_has_gpu: # If there is a GPU present, only setting max_num_gpus to 0 should make the use_gpu flag False. config.max_num_gpus = -1 assert config.use_gpu config.max_num_gpus = 1 assert config.use_gpu
# ------------------------------------------------------------------------------------------ import pytest from InnerEye.Common import common_util from InnerEye.ML.deep_learning_config import DeepLearningConfig, TrainerParams from InnerEye.ML.lightning_container import LightningContainer from Tests.ML.util import machine_has_gpu @pytest.mark.skipif(common_util.is_windows(), reason="Has issues on windows build") @pytest.mark.cpu_and_gpu @pytest.mark.parametrize( "config", [DeepLearningConfig(), LightningContainer()]) def test_use_gpu_flag(config: TrainerParams) -> None: """ Test that the use_gpu flag is set correctly on both InnerEye configs and containers. This checks for a bug in an earlier version where it was off for containers only. """ # With the default settings, the use_gpu flag should return True exactly when a GPU is # actually present. assert config.use_gpu == machine_has_gpu if machine_has_gpu: # If there is a GPU present, only setting max_num_gpus to 0 should make the use_gpu flag False. config.max_num_gpus = -1 assert config.use_gpu config.max_num_gpus = 1 assert config.use_gpu config.max_num_gpus = 0