def test_get_checkpoints_to_test(test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) # Set a local_weights_path to get checkpoint from. Model has not trained and no run recovery provided, # so the local weights should be used ignoring any epochs to test local_weights_path = test_output_dirs.root_dir / "exist.pth" create_checkpoint_file(local_weights_path) checkpoint_handler.container.local_weights_path = [local_weights_path] checkpoint_handler.download_recovery_checkpoints_or_weights() checkpoint_and_paths = checkpoint_handler.get_checkpoints_to_test() assert checkpoint_and_paths assert len(checkpoint_and_paths) == 1 assert checkpoint_and_paths[0] == local_weights_path checkpoint_handler.additional_training_done() checkpoint_handler.container.checkpoint_folder.mkdir(parents=True) # Copy checkpoint to make it seem like training has happened expected_checkpoint = config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX expected_checkpoint.touch() checkpoint_and_paths = checkpoint_handler.get_checkpoints_to_test() assert checkpoint_and_paths assert len(checkpoint_and_paths) == 1 assert checkpoint_and_paths[0] == expected_checkpoint
def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests, use_mixed_precision: bool) -> None: """ Test training and testing of 2d classification models. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting2D() config.set_output_to(test_output_dirs.root_dir) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 config.use_mixed_precision = use_mixed_precision checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=Path(test_output_dirs.root_dir)) model_training_result = model_training.model_train(config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.705931, 0.698664, 0.694489, 0.693151] expected_val_loss = [1.078517, 1.140510, 1.199026, 1.248595] actual_train_loss = model_training_result.get_metric(is_training=True, metric_type=MetricType.LOSS.value) actual_val_loss = model_training_result.get_metric(is_training=False, metric_type=MetricType.LOSS.value) actual_lr = model_training_result.get_metric(is_training=True, metric_type=MetricType.LEARNING_RATE.value) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6) assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6) assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5) test_results = model_testing.model_test(config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification)
def test_get_recovery_path_train(test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) assert checkpoint_handler.get_recovery_or_checkpoint_path_train() is None
def test_rnn_classifier_via_config_1( use_combined_model: bool, imaging_feature_type: ImagingFeatureType, combine_hidden_state: bool, use_encoder_layer_norm: bool, use_mean_teacher_model: bool, test_output_dirs: OutputFolderForTests) -> None: """ Test if we can build a simple RNN model that only feeds off non-image features. This just tests the mechanics of training, but not if the model learned. """ logging_to_stdout() config = ToySequenceModel(use_combined_model, imaging_feature_type=imaging_feature_type, combine_hidden_states=combine_hidden_state, use_encoder_layer_norm=use_encoder_layer_norm, use_mean_teacher_model=use_mean_teacher_model, should_validate=False) # This fails with 16bit precision, saying "torch.nn.functional.binary_cross_entropy and torch.nn.BCELoss are # unsafe to autocast. Many models use a sigmoid layer right before the binary cross entropy layer. In this case, # combine the two layers using torch.nn.functional.binary_cross_entropy_with_logits or # torch.nn.BCEWithLogitsLoss. binary_cross_entropy_with_logits and BCEWithLogits are safe to autocast." config.use_mixed_precision = False config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_mock_sequence_dataset() # Patch the load_images function that will be called once we access a dataset item image_and_seg = ImageAndSegmentations[np.ndarray]( images=np.random.uniform(0, 1, SCAN_SIZE), segmentations=np.random.randint(0, 2, SCAN_SIZE)) with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg): model_train( config, get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir))
def test_get_recovery_path_train( test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) config.outputs_folder.mkdir() checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) assert checkpoint_handler.get_recovery_path_train() is None # weights from local_weights_path and weights_url will be modified if needed and stored at this location expected_path = checkpoint_handler.output_params.outputs_folder / WEIGHTS_FILE # Set a weights_url to get checkpoint from checkpoint_handler.azure_config.run_recovery_id = "" checkpoint_handler.container.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE checkpoint_handler.download_recovery_checkpoints_or_weights() assert checkpoint_handler.local_weights_path == expected_path assert checkpoint_handler.get_recovery_path_train() == expected_path # Set a local_weights_path to get checkpoint from checkpoint_handler.container.weights_url = "" local_weights_path = test_output_dirs.root_dir / "exist.pth" create_checkpoint_file(local_weights_path) checkpoint_handler.container.local_weights_path = local_weights_path checkpoint_handler.download_recovery_checkpoints_or_weights() assert checkpoint_handler.local_weights_path == expected_path assert checkpoint_handler.get_recovery_path_train() == expected_path
def test_use_checkpoint_paths_or_urls(test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) # No checkpoint handling options set. checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) checkpoint_handler.download_recovery_checkpoints_or_weights() assert not checkpoint_handler.run_recovery assert not checkpoint_handler.trained_weights_paths # weights from local_weights_path and weights_url will be modified if needed and stored at this location # Set a weights_path checkpoint_handler.azure_config.run_recovery_id = "" checkpoint_handler.container.weights_url = [EXTERNAL_WEIGHTS_URL_EXAMPLE] checkpoint_handler.download_recovery_checkpoints_or_weights() expected_download_path = checkpoint_handler.output_params.checkpoint_folder / MODEL_WEIGHTS_DIR_NAME /\ os.path.basename(urlparse(EXTERNAL_WEIGHTS_URL_EXAMPLE).path) assert checkpoint_handler.trained_weights_paths[0] == expected_download_path assert checkpoint_handler.trained_weights_paths[0].is_file() # set a local_weights_path checkpoint_handler.container.weights_url = [] local_weights_path = test_output_dirs.root_dir / "exist.pth" create_checkpoint_file(local_weights_path) checkpoint_handler.container.local_weights_path = [local_weights_path] checkpoint_handler.download_recovery_checkpoints_or_weights() assert checkpoint_handler.trained_weights_paths[0] == local_weights_path assert checkpoint_handler.trained_weights_paths[0].is_file()
def test_rnn_classifier_via_config_1( use_combined_model: bool, imaging_feature_type: ImagingFeatureType, combine_hidden_state: bool, use_encoder_layer_norm: bool, use_mean_teacher_model: bool, test_output_dirs: OutputFolderForTests) -> None: """ Test if we can build a simple RNN model that only feeds off non-image features. This just tests the mechanics of training, but not if the model learned. """ logging_to_stdout() config = ToySequenceModel(use_combined_model, imaging_feature_type=imaging_feature_type, combine_hidden_states=combine_hidden_state, use_encoder_layer_norm=use_encoder_layer_norm, use_mean_teacher_model=use_mean_teacher_model, should_validate=False) config.use_mixed_precision = True config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_mock_sequence_dataset() # Patch the load_images function that will be called once we access a dataset item image_and_seg = ImageAndSegmentations[np.ndarray]( images=np.random.uniform(0, 1, SCAN_SIZE), segmentations=np.random.randint(0, 2, SCAN_SIZE)) with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg): model_train( config, get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir))
def test_use_local_weights_file( test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) config.outputs_folder.mkdir() # No checkpoint handling options set. checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) checkpoint_handler.download_recovery_checkpoints_or_weights() assert not checkpoint_handler.run_recovery assert not checkpoint_handler.local_weights_path # weights from local_weights_path and weights_url will be modified if needed and stored at this location expected_path = checkpoint_handler.model_config.outputs_folder / WEIGHTS_FILE # Set a weights_path checkpoint_handler.azure_config.run_recovery_id = "" config.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE checkpoint_handler.download_recovery_checkpoints_or_weights() assert checkpoint_handler.local_weights_path == expected_path assert checkpoint_handler.local_weights_path.is_file() # set a local_weights_path config.weights_url = "" local_weights_path = test_output_dirs.root_dir / "exist.pth" create_checkpoint_file(local_weights_path) config.local_weights_path = local_weights_path checkpoint_handler.download_recovery_checkpoints_or_weights() assert checkpoint_handler.local_weights_path == expected_path
def test_download_checkpoints_from_single_run( test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) # No checkpoint handling options set. checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) run_recovery_id = get_most_recent_run_id( fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN) # Set a run recovery object - non ensemble checkpoint_handler.azure_config.run_recovery_id = run_recovery_id checkpoint_handler.download_recovery_checkpoints_or_weights() assert checkpoint_handler.run_recovery expected_checkpoint_root = config.checkpoint_folder / run_recovery_id.split( ":")[1] expected_paths = [ create_recovery_checkpoint_path(path=expected_checkpoint_root), expected_checkpoint_root / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX ] assert checkpoint_handler.run_recovery.checkpoints_roots == [ expected_checkpoint_root ] for path in expected_paths: assert path.is_file()
def test_recover_training_mean_teacher_model( test_output_dirs: OutputFolderForTests) -> None: """ Tests that training can be recovered from a previous checkpoint. """ config = DummyClassification() config.mean_teacher_alpha = 0.999 config.recovery_checkpoint_save_interval = 1 config.set_output_to(test_output_dirs.root_dir / "original") os.makedirs(str(config.outputs_folder)) original_checkpoint_folder = config.checkpoint_folder # First round of training config.num_epochs = 2 checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) model_train(config, checkpoint_handler=checkpoint_handler) assert len(list(config.checkpoint_folder.glob("*.*"))) == 2 # Restart training from previous run config.start_epoch = 2 config.num_epochs = 3 config.set_output_to(test_output_dirs.root_dir / "recovered") os.makedirs(str(config.outputs_folder)) # make if seem like run recovery objects have been downloaded checkpoint_root = config.checkpoint_folder / "old_run" shutil.copytree(str(original_checkpoint_folder), str(checkpoint_root)) checkpoint_handler.run_recovery = RunRecovery([checkpoint_root]) model_train(config, checkpoint_handler=checkpoint_handler) # remove recovery checkpoints shutil.rmtree(checkpoint_root) assert len(list(config.checkpoint_folder.glob("*.*"))) == 2
def test_get_local_weights_path_or_download(test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) # If the model has neither local_weights_path or weights_url set, should fail. with pytest.raises(ValueError) as ex: checkpoint_handler.get_local_checkpoints_path_or_download() assert "none of model_id, local_weights_path or weights_url is set in the model config." in ex.value.args[0] # If local_weights_path folder exists, get_local_checkpoints_path_or_download should not do anything. local_weights_path = test_output_dirs.root_dir / "exist.pth" create_checkpoint_file(local_weights_path) checkpoint_handler.container.local_weights_path = [local_weights_path] returned_weights_path = checkpoint_handler.get_local_checkpoints_path_or_download() assert local_weights_path == returned_weights_path[0] # Pointing the model to a URL should trigger a download checkpoint_handler.container.local_weights_path = [] checkpoint_handler.container.weights_url = [EXTERNAL_WEIGHTS_URL_EXAMPLE] downloaded_weights = checkpoint_handler.get_local_checkpoints_path_or_download() expected_path = checkpoint_handler.output_params.checkpoint_folder / MODEL_WEIGHTS_DIR_NAME / \ os.path.basename(urlparse(EXTERNAL_WEIGHTS_URL_EXAMPLE).path) assert len(downloaded_weights) == 1 assert downloaded_weights[0].is_file() assert expected_path == downloaded_weights[0] # try again, should not re-download modified_time = downloaded_weights[0].stat().st_mtime downloaded_weights_new = checkpoint_handler.get_local_checkpoints_path_or_download() assert len(downloaded_weights_new) == 1 assert downloaded_weights_new[0].stat().st_mtime == modified_time
def test_get_checkpoints_to_test_single_run(test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) run_recovery_id = get_most_recent_run_id(fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN) # Now set a run recovery object and set the start epoch to 1, so we get one epoch from # run recovery and one from the training checkpoints checkpoint_handler.azure_config.run_recovery_id = run_recovery_id checkpoint_handler.additional_training_done() checkpoint_handler.download_recovery_checkpoints_or_weights() checkpoint_and_paths = checkpoint_handler.get_checkpoints_to_test() assert checkpoint_and_paths assert len(checkpoint_and_paths) == 1 assert checkpoint_and_paths[0] == config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX # Copy checkpoint to make it seem like training has happened expected_checkpoint = config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX expected_checkpoint.touch() checkpoint_and_paths = checkpoint_handler.get_checkpoints_to_test() assert checkpoint_and_paths assert len(checkpoint_and_paths) == 1 assert checkpoint_and_paths[0] == expected_checkpoint
def test_get_checkpoints_to_test( test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) config.outputs_folder.mkdir() manage_recovery = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) # Set a local_weights_path to get checkpoint from. Model has not trained and no run recovery provided, # so the local weights should be used ignoring any epochs to test config.epochs_to_test = [1, 2] local_weights_path = test_output_dirs.root_dir / "exist.pth" stored_checkpoint = create_checkpoint_path( full_ml_test_data_path("checkpoints"), epoch=1) shutil.copyfile(str(stored_checkpoint), local_weights_path) config.local_weights_path = local_weights_path manage_recovery.discover_and_download_checkpoints_from_previous_runs() checkpoint_and_paths = manage_recovery.get_checkpoints_to_test() assert checkpoint_and_paths assert len(checkpoint_and_paths) == 1 assert checkpoint_and_paths[0].epoch == 0 assert checkpoint_and_paths[0].checkpoint_paths == [ manage_recovery.model_config.outputs_folder / WEIGHTS_FILE ] # Now set a run recovery object and set the start epoch to 1, so we get one epoch from # run recovery and one from the training checkpoints manage_recovery.azure_config.run_recovery_id = DEFAULT_RUN_RECOVERY_ID config.start_epoch = 1 manage_recovery.additional_training_done() manage_recovery.discover_and_download_checkpoints_from_previous_runs() # Copy checkpoint to make it seem like training has happened stored_checkpoint = create_checkpoint_path( path=full_ml_test_data_path("checkpoints"), epoch=1) expected_checkpoint = create_checkpoint_path(path=config.checkpoint_folder, epoch=2) shutil.copyfile(str(stored_checkpoint), str(expected_checkpoint)) checkpoint_and_paths = manage_recovery.get_checkpoints_to_test() assert checkpoint_and_paths assert len(checkpoint_and_paths) == 2 assert checkpoint_and_paths[0].epoch == 1 assert checkpoint_and_paths[0].checkpoint_paths == [ create_checkpoint_path(path=config.checkpoint_folder / DEFAULT_RUN_RECOVERY_ID.split(":")[1], epoch=1) ] assert checkpoint_and_paths[1].epoch == 2 assert checkpoint_and_paths[1].checkpoint_paths == [ create_checkpoint_path(path=config.checkpoint_folder, epoch=2) ] # This epoch does not exist config.epochs_to_test = [3] checkpoint_and_paths = manage_recovery.get_checkpoints_to_test() assert checkpoint_and_paths is None
def test_download_recovery_checkpoints_from_ensemble_run(test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) run_recovery_id = get_most_recent_run_id(fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) checkpoint_handler.azure_config.run_recovery_id = run_recovery_id with pytest.raises(ValueError) as ex: checkpoint_handler.download_recovery_checkpoints_or_weights() assert "has child runs" in str(ex)
def test_download_model_weights( test_output_dirs: OutputFolderForTests) -> None: # Download a sample ResNet model from a URL given in the Pytorch docs # The downloaded model does not match the architecture, which is okay since we are only testing the download here. model_config = DummyModel(weights_url=EXTERNAL_WEIGHTS_URL_EXAMPLE) manage_recovery = get_default_checkpoint_handler( model_config=model_config, project_root=test_output_dirs.root_dir) result_path = manage_recovery.download_weights() assert result_path.is_file()
def test_download_checkpoints_from_hyperdrive_child_runs(test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) hyperdrive_run = get_most_recent_run(fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) checkpoint_handler.download_checkpoints_from_hyperdrive_child_runs(hyperdrive_run) expected_checkpoints = [config.checkpoint_folder / OTHER_RUNS_SUBDIR_NAME / str(i) / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX for i in range(2)] checkpoint_paths = checkpoint_handler.get_best_checkpoints() assert checkpoint_paths assert len(checkpoint_paths) == 2 assert set(expected_checkpoints) == set(checkpoint_paths)
def test_get_best_checkpoint_single_run( test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) # We have not set a run_recovery, nor have we trained, so this should fail to get a checkpoint with pytest.raises(ValueError) as ex: checkpoint_handler.get_best_checkpoint() assert "no run recovery object provided and no training has been done in this run" in ex.value.args[ 0] run_recovery_id = get_most_recent_run_id( fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN) # We have set a run_recovery_id now, so this should work: Should download all checkpoints that are available # in the run, into a subfolder of the checkpoint folder checkpoint_handler.azure_config.run_recovery_id = run_recovery_id checkpoint_handler.download_recovery_checkpoints_or_weights() expected_checkpoint = config.checkpoint_folder / run_recovery_id.split(":")[1] \ / f"{BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX}" checkpoint_paths = checkpoint_handler.get_best_checkpoint() assert checkpoint_paths assert len(checkpoint_paths) == 1 assert expected_checkpoint == checkpoint_paths[0] # From now on, the checkpoint handler will think that the run was started from epoch 1. We should pick up # the best checkpoint from the current run, or from the run recovery if the best checkpoint is there # and so no checkpoints have been written in the resumed run. checkpoint_handler.additional_training_done() # go back to non ensemble run recovery checkpoint_handler.azure_config.run_recovery_id = run_recovery_id checkpoint_handler.download_recovery_checkpoints_or_weights() config.start_epoch = 1 # There is no checkpoint in the current run - use the one from run_recovery checkpoint_paths = checkpoint_handler.get_best_checkpoint() expected_checkpoint = config.checkpoint_folder / run_recovery_id.split(":")[1] \ / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX assert checkpoint_paths assert len(checkpoint_paths) == 1 assert checkpoint_paths[0] == expected_checkpoint # Copy over checkpoints to make it look like training has happened and a better checkpoint written expected_checkpoint = config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX expected_checkpoint.touch() checkpoint_paths = checkpoint_handler.get_best_checkpoint() assert checkpoint_paths assert len(checkpoint_paths) == 1 assert expected_checkpoint == checkpoint_paths[0]
def test_get_recovery_path_train_single_run(test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) run_recovery_id = get_most_recent_run_id(fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN) checkpoint_handler.azure_config.run_recovery_id = run_recovery_id checkpoint_handler.download_recovery_checkpoints_or_weights() # Run recovery with start epoch provided should succeed expected_path = get_recovery_checkpoint_path(path=config.checkpoint_folder) assert checkpoint_handler.get_recovery_or_checkpoint_path_train() == expected_path
def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests, use_mixed_precision: bool) -> None: """ Test training and testing of 2d classification models. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting2D() config.set_output_to(test_output_dirs.root_dir) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 config.use_mixed_precision = use_mixed_precision config.save_start_epoch = 2 config.save_step_epochs = 2 config.test_start_epoch = 2 config.test_step_epochs = 2 config.test_diff_epochs = 2 expected_epochs = [2, 4] assert config.get_test_epochs() == expected_epochs checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=Path(test_output_dirs.root_dir)) model_training_result = model_training.model_train( config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.705931, 0.698664, 0.694489, 0.693151] expected_val_loss = [1.078517, 1.140510, 1.199026, 1.248595] def extract_loss(results: List[MetricsDict]) -> List[float]: return [d.values()[MetricType.LOSS.value][0] for d in results] actual_train_loss = extract_loss( model_training_result.train_results_per_epoch) actual_val_loss = extract_loss(model_training_result.val_results_per_epoch) actual_learning_rates = list( flatten(model_training_result.learning_rates_per_epoch)) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6) assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6) assert actual_learning_rates == pytest.approx(expected_learning_rates, rel=1e-5) test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) assert list(test_results.epochs.keys()) == expected_epochs
def test_rnn_classifier_via_config_2( test_output_dirs: OutputFolderForTests) -> None: """ Test if we can build an RNN classifier that learns sequences, of the same kind as in test_rnn_classifier_toy_problem, but built via the config. """ expected_max_train_loss = 0.71 expected_max_val_loss = 0.71 num_sequences = 100 ml_util.set_random_seed(123) dataset_contents = "subject,index,feature,label\n" for subject in range(num_sequences): # Sequences have variable length sequence_length = np.random.choice([9, 10, 11, 12]) # Each sequence is a series of 0 and 1 inputs = np.random.choice([0, 1], size=(sequence_length, ), p=[1. / 3, 2. / 3]) label = np.sum(inputs) > (sequence_length // 2) for i, value in enumerate(inputs.tolist()): dataset_contents += f"S{subject},{i},{value},{label}\n" logging_to_stdout() config = ToySequenceModel2(should_validate=False) config.num_epochs = 2 config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_mock_sequence_dataset(dataset_contents) results = model_train( config, get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir)) actual_train_loss = results.train_results_per_epoch[-1].values()[ MetricType.LOSS.value][0] actual_val_loss = results.val_results_per_epoch[-1].values()[ MetricType.LOSS.value][0] print( f"Training loss after {config.num_epochs} epochs: {actual_train_loss}") print( f"Validation loss after {config.num_epochs} epochs: {actual_val_loss}") assert actual_train_loss <= expected_max_train_loss, "Training loss too high" assert actual_val_loss <= expected_max_val_loss, "Validation loss too high" assert len(results.optimal_temperature_scale_values_per_checkpoint_epoch) \ == config.get_total_number_of_save_epochs() assert np.allclose( results.optimal_temperature_scale_values_per_checkpoint_epoch, [0.97], rtol=0.1)
def test_non_image_encoder(test_output_dirs: OutputFolderForTests, hidden_layer_num_feature_channels: Optional[int]) -> None: """ Test if we can build a simple MLP model that only feeds off non-image features. """ dataset_folder = Path(test_output_dirs.make_sub_dir("dataset")) dataset_contents = _get_fake_dataset_contents() (dataset_folder / DATASET_CSV_FILE_NAME).write_text(dataset_contents) config = NonImageEncoder(should_validate=False, hidden_layer_num_feature_channels=hidden_layer_num_feature_channels) config.local_dataset = dataset_folder config.max_batch_grad_cam = 1 config.validate() # run model training checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=Path(test_output_dirs.root_dir)) model_train(config, checkpoint_handler=checkpoint_handler) # run model inference MLRunner(config).model_inference_train_and_test(checkpoint_handler=checkpoint_handler) assert config.get_total_number_of_non_imaging_features() == 18
def test_get_recovery_path_train( test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) config.outputs_folder.mkdir() checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) assert checkpoint_handler.get_recovery_path_train() is None # weights from local_weights_path and weights_url will be modified if needed and stored at this location expected_path = checkpoint_handler.model_config.outputs_folder / WEIGHTS_FILE # Set a weights_url to get checkpoint from checkpoint_handler.azure_config.run_recovery_id = "" config.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE checkpoint_handler.download_recovery_checkpoints_or_weights() assert checkpoint_handler.local_weights_path == expected_path config.start_epoch = 0 assert checkpoint_handler.get_recovery_path_train() == expected_path # Can't resume training from an external checkpoint config.start_epoch = 20 with pytest.raises(ValueError) as ex: checkpoint_handler.get_recovery_path_train() assert ex.value.args == "Start epoch is > 0, but no run recovery object has been provided to resume training." # Set a local_weights_path to get checkpoint from config.weights_url = "" local_weights_path = test_output_dirs.root_dir / "exist.pth" create_checkpoint_file(local_weights_path) config.local_weights_path = local_weights_path checkpoint_handler.download_recovery_checkpoints_or_weights() assert checkpoint_handler.local_weights_path == expected_path config.start_epoch = 0 assert checkpoint_handler.get_recovery_path_train() == expected_path # Can't resume training from an external checkpoint config.start_epoch = 20 with pytest.raises(ValueError) as ex: checkpoint_handler.get_recovery_path_train() assert ex.value.args == "Start epoch is > 0, but no run recovery object has been provided to resume training."
def test_model_inference_train_and_test( test_output_dirs: OutputFolderForTests, perform_cross_validation: bool, perform_training_set_inference: bool) -> None: config = DummyModel() config.number_of_cross_validation_splits = 2 if perform_cross_validation else 0 config.perform_training_set_inference = perform_training_set_inference # Plotting crashes with random TCL errors on Windows, disable that for Windows PR builds. config.is_plotting_enabled = common_util.is_linux() config.set_output_to(test_output_dirs.root_dir) config.local_dataset = full_ml_test_data_path() # To make it seem like there was a training run before this, copy checkpoints into the checkpoints folder. stored_checkpoints = full_ml_test_data_path("checkpoints") shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder)) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) checkpoint_handler.additional_training_done() result, _, _ = MLRunner(config).model_inference_train_and_test( checkpoint_handler=checkpoint_handler) if result is None: raise ValueError("Error result cannot be None") assert isinstance(result, InferenceMetricsForSegmentation) for key, _ in result.epochs.items(): epoch_folder_name = common_util.epoch_folder_name(key) for folder in [ ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value, ModelExecutionMode.TEST.value ]: results_folder = config.outputs_folder / epoch_folder_name / folder folder_exists = results_folder.is_dir() if folder in [ ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value ]: if perform_training_set_inference: assert folder_exists else: assert folder_exists
def test_download_model_from_ensemble_run(test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) # No checkpoint handling options set. checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) model_id = get_most_recent_model_id(fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) # Set a run recovery object - non ensemble checkpoint_handler.container.model_id = model_id checkpoint_handler.download_recovery_checkpoints_or_weights() assert checkpoint_handler.trained_weights_paths expected_model_root = config.checkpoint_folder / MODEL_WEIGHTS_DIR_NAME / FINAL_ENSEMBLE_MODEL_FOLDER model_inference_config = read_model_inference_config(expected_model_root / MODEL_INFERENCE_JSON_FILE_NAME) expected_paths = [expected_model_root / x for x in model_inference_config.checkpoint_paths] assert len(checkpoint_handler.trained_weights_paths) == len(expected_paths) assert set(checkpoint_handler.trained_weights_paths) == set(expected_paths) for path in expected_paths: assert path.is_file()
def test_recover_training_mean_teacher_model( test_output_dirs: OutputFolderForTests) -> None: """ Tests that training can be recovered from a previous checkpoint. """ config = DummyClassification() config.mean_teacher_alpha = 0.999 config.autosave_every_n_val_epochs = 1 config.set_output_to(test_output_dirs.root_dir / "original") os.makedirs(str(config.outputs_folder)) original_checkpoint_folder = config.checkpoint_folder # First round of training config.num_epochs = 4 model_train_unittest(config, output_folder=test_output_dirs) assert len(list(config.checkpoint_folder.glob("*.*"))) == 1 assert (config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).is_file() # Restart training from previous run config.num_epochs = 3 config.set_output_to(test_output_dirs.root_dir / "recovered") os.makedirs(str(config.outputs_folder)) # make if seem like run recovery objects have been downloaded checkpoint_root = config.checkpoint_folder / "old_run" shutil.copytree(str(original_checkpoint_folder), str(checkpoint_root)) # Create a new checkpoint handler and set run_recovery to the copied checkpoints checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) checkpoint_handler.run_recovery = RunRecovery([checkpoint_root]) model_train_unittest(config, output_folder=test_output_dirs, checkpoint_handler=checkpoint_handler) # remove recovery checkpoints shutil.rmtree(checkpoint_root) assert len(list(config.checkpoint_folder.glob("*.ckpt"))) == 1
def test_get_recovery_path_train_single_run( test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) run_recovery_id = get_most_recent_run_id( fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN) checkpoint_handler.azure_config.run_recovery_id = run_recovery_id checkpoint_handler.download_recovery_checkpoints_or_weights() # We have not set a start_epoch but we are trying to use run_recovery, this should fail with pytest.raises(ValueError) as ex: checkpoint_handler.get_recovery_path_train() assert "Run recovery set, but start epoch is 0" in ex.value.args[0] # Run recovery with start epoch provided should succeed config.start_epoch = 20 expected_path = create_recovery_checkpoint_path( path=config.checkpoint_folder / run_recovery_id.split(":")[1]) assert checkpoint_handler.get_recovery_path_train() == expected_path
def test_get_local_weights_path_or_download( test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) manage_recovery = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) # If the model has neither local_weights_path or weights_url set, should fail. with pytest.raises(ValueError) as ex: manage_recovery.get_local_weights_path_or_download() assert "neither local_weights_path nor weights_url is set in the model config" in ex.value.args[ 0] # If local_weights_path folder exists, get_local_weights_path_or_download should not do anything. local_weights_path = manage_recovery.project_root / "exist.pth" local_weights_path.touch() manage_recovery.model_config.local_weights_path = local_weights_path returned_weights_path = manage_recovery.get_local_weights_path_or_download( ) assert local_weights_path == returned_weights_path # Pointing the model to a URL should trigger a download config.local_weights_path = None config.weights_url = EXTERNAL_WEIGHTS_URL_EXAMPLE downloaded_weights = manage_recovery.get_local_weights_path_or_download() # Download goes into <project_root> / "modelweights" / "resnet18-5c106cde.pth" expected_path = manage_recovery.project_root / MODEL_WEIGHTS_DIR_NAME / \ os.path.basename(urlparse(EXTERNAL_WEIGHTS_URL_EXAMPLE).path) assert downloaded_weights assert downloaded_weights.is_file() assert expected_path == downloaded_weights # try again, should not re-download modified_time = downloaded_weights.stat().st_mtime downloaded_weights_new = manage_recovery.get_local_weights_path_or_download( ) assert downloaded_weights_new assert downloaded_weights_new.stat().st_mtime == modified_time
def test_model_inference_train_and_test( test_output_dirs: OutputFolderForTests, perform_cross_validation: bool, perform_training_set_inference: bool) -> None: config = DummyModel() config.number_of_cross_validation_splits = 2 if perform_cross_validation else 0 config.perform_training_set_inference = perform_training_set_inference # Plotting crashes with random TCL errors on Windows, disable that for Windows PR builds. config.is_plotting_enabled = common_util.is_linux() config.set_output_to(test_output_dirs.root_dir) config.local_dataset = full_ml_test_data_path() checkpoint_path = config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX create_model_and_store_checkpoint(config, checkpoint_path) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) checkpoint_handler.additional_training_done() result, _, _ = MLRunner(config).model_inference_train_and_test( checkpoint_handler=checkpoint_handler) if result is None: raise ValueError("Error result cannot be None") assert isinstance(result, InferenceMetricsForSegmentation) epoch_folder_name = common_util.BEST_EPOCH_FOLDER_NAME for folder in [ ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value, ModelExecutionMode.TEST.value ]: results_folder = config.outputs_folder / epoch_folder_name / folder folder_exists = results_folder.is_dir() if folder in [ ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value ]: if perform_training_set_inference: assert folder_exists else: assert folder_exists
def _test_model_train(output_dirs: OutputFolderForTests, image_channels: Any, ground_truth_ids: Any, no_mask_channel: bool = False) -> None: def _check_patch_centers(diagnostics_per_epoch: List[np.ndarray], should_equal: bool) -> None: patch_centers_epoch1 = diagnostics_per_epoch[0] assert len( diagnostics_per_epoch ) > 1, "Not enough data to check patch centers, need at least 2" for diagnostic in diagnostics_per_epoch[1:]: assert np.array_equal(patch_centers_epoch1, diagnostic) == should_equal def _check_voxel_count(results_per_epoch: List[Dict[str, float]], expected_voxel_count_per_epoch: List[float], prefix: str) -> None: assert len(results_per_epoch) == len(expected_voxel_count_per_epoch) for epoch, (results, voxel_count) in enumerate( zip(results_per_epoch, expected_voxel_count_per_epoch)): # In the test data, both structures "region" and "region_1" are read from the same nifti file, hence # their voxel counts must be identical. for structure in ["region", "region_1"]: assert results[f"{MetricType.VOXEL_COUNT.value}/{structure}"] == pytest.approx(voxel_count, abs=1e-2), \ f"{prefix} voxel count mismatch for '{structure}' epoch {epoch}" def _mean(a: List[float]) -> float: return sum(a) / len(a) def _mean_list(lists: List[List[float]]) -> List[float]: return list(map(_mean, lists)) logging_to_stdout(log_level=logging.DEBUG) train_config = DummyModel() train_config.local_dataset = base_path train_config.set_output_to(output_dirs.root_dir) train_config.image_channels = image_channels train_config.ground_truth_ids = ground_truth_ids train_config.mask_id = None if no_mask_channel else train_config.mask_id train_config.random_seed = 42 train_config.class_weights = [0.5, 0.25, 0.25] train_config.store_dataset_sample = True train_config.recovery_checkpoint_save_interval = 1 if machine_has_gpu: expected_train_losses = [0.4553468, 0.454904] expected_val_losses = [0.4553881, 0.4553041] else: expected_train_losses = [0.4553469, 0.4548947] expected_val_losses = [0.4553880, 0.4553041] loss_absolute_tolerance = 1e-6 expected_learning_rates = [train_config.l_rate, 5.3589e-4] checkpoint_handler = get_default_checkpoint_handler( model_config=train_config, project_root=Path(output_dirs.root_dir)) model_training_result = model_training.model_train( train_config, checkpoint_handler=checkpoint_handler) assert isinstance(model_training_result, ModelTrainingResults) def assert_all_close(metric: str, expected: List[float], **kwargs: Any) -> None: actual = model_training_result.get_training_metric(metric) assert np.allclose( actual, expected, **kwargs ), f"Mismatch for {metric}: Got {actual}, expected {expected}" # check to make sure training batches are NOT all the same across epochs _check_patch_centers(model_training_result.train_diagnostics, should_equal=False) # check to make sure validation batches are all the same across epochs _check_patch_centers(model_training_result.val_diagnostics, should_equal=True) assert_all_close(MetricType.SUBJECT_COUNT.value, [3.0, 3.0]) assert_all_close(MetricType.LEARNING_RATE.value, expected_learning_rates, rtol=1e-6) if is_windows(): # Randomization comes out slightly different on Windows. Skip the rest of the detailed checks. return # Simple regression test: Voxel counts should be the same in both epochs on the validation set, # and be the same across 'region' and 'region_1' because they derive from the same Nifti files. # The following values are read off directly from the results of compute_dice_across_patches in the training loop # This checks that averages are computed correctly, and that metric computers are reset after each epoch. train_voxels = [[83092.0, 83212.0, 82946.0], [83000.0, 82881.0, 83309.0]] val_voxels = [[82765.0, 83212.0], [82765.0, 83212.0]] _check_voxel_count(model_training_result.train_results_per_epoch, _mean_list(train_voxels), "Train") _check_voxel_count(model_training_result.val_results_per_epoch, _mean_list(val_voxels), "Val") actual_train_losses = model_training_result.get_training_metric( MetricType.LOSS.value) actual_val_losses = model_training_result.get_validation_metric( MetricType.LOSS.value) print("actual_train_losses = {}".format(actual_train_losses)) print("actual_val_losses = {}".format(actual_val_losses)) assert np.allclose(actual_train_losses, expected_train_losses, atol=loss_absolute_tolerance), "Train losses" assert np.allclose(actual_val_losses, expected_val_losses, atol=loss_absolute_tolerance), "Val losses" # Check that the metric we track for Hyperdrive runs is actually written. assert TrackedMetrics.Val_Loss.value.startswith(VALIDATION_PREFIX) tracked_metric = TrackedMetrics.Val_Loss.value[len(VALIDATION_PREFIX):] for val_result in model_training_result.val_results_per_epoch: assert tracked_metric in val_result # The following values are read off directly from the results of compute_dice_across_patches in the # training loop. Results are slightly different for CPU, hence use a larger tolerance there. dice_tolerance = 1e-4 if machine_has_gpu else 4.5e-4 train_dice_region = [[0.0, 0.0, 4.0282e-04], [0.0309, 0.0334, 0.0961]] train_dice_region1 = [[0.4806, 0.4800, 0.4832], [0.4812, 0.4842, 0.4663]] # There appears to be some amount of non-determinism here: When using a tolerance of 1e-4, we get occasional # test failures on Linux in the cloud (not on Windows, not on AzureML) Unclear where it comes from. Even when # failing here, the losses match up to the expected tolerance. assert_all_close("Dice/region", _mean_list(train_dice_region), atol=dice_tolerance) assert_all_close("Dice/region_1", _mean_list(train_dice_region1), atol=dice_tolerance) expected_average_dice = [ _mean(train_dice_region[i] + train_dice_region1[i]) # type: ignore for i in range(len(train_dice_region)) ] assert_all_close("Dice/AverageAcrossStructures", expected_average_dice, atol=dice_tolerance) # check output files/directories assert train_config.outputs_folder.is_dir() assert train_config.logs_folder.is_dir() # Tensorboard event files go into a Lightning subfolder (Pytorch Lightning default) assert (train_config.logs_folder / "Lightning").is_dir() assert len([(train_config.logs_folder / "Lightning").glob("events*")]) == 1 assert train_config.num_epochs == 2 # Checkpoint folder assert train_config.checkpoint_folder.is_dir() actual_checkpoints = list(train_config.checkpoint_folder.rglob("*.ckpt")) assert len( actual_checkpoints) == 2, f"Actual checkpoints: {actual_checkpoints}" assert (train_config.checkpoint_folder / RECOVERY_CHECKPOINT_FILE_NAME_WITH_SUFFIX).is_file() assert (train_config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).is_file() assert (train_config.outputs_folder / DATASET_CSV_FILE_NAME).is_file() assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.TRAIN]).is_file() assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.VAL]).is_file() # Path visualization: There should be 3 slices for each of the 2 subjects sampling_folder = train_config.outputs_folder / PATCH_SAMPLING_FOLDER assert sampling_folder.is_dir() assert train_config.show_patch_sampling > 0 assert len(list(sampling_folder.rglob( "*.png"))) == 3 * train_config.show_patch_sampling # Time per epoch: Test that we have all these times logged. model_training_result.get_training_metric( MetricType.SECONDS_PER_EPOCH.value) model_training_result.get_validation_metric( MetricType.SECONDS_PER_EPOCH.value) model_training_result.get_validation_metric( MetricType.SECONDS_PER_BATCH.value) model_training_result.get_training_metric( MetricType.SECONDS_PER_BATCH.value)
def test_train_classification_model( class_name: str, test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of classification models, asserting on the individual results from training and testing. Expected test results are stored for GPU with and without mixed precision. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting() config.class_names = [class_name] config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=Path(test_output_dirs.root_dir)) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 model_training_result = model_training.model_train( config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167] expected_val_loss = [0.737061, 0.736691, 0.736321, 0.735952] # Ensure that all metrics are computed on both training and validation set assert len( model_training_result.train_results_per_epoch) == config.num_epochs assert len( model_training_result.val_results_per_epoch) == config.num_epochs assert len(model_training_result.train_results_per_epoch[0]) >= 11 assert len(model_training_result.val_results_per_epoch[0]) >= 11 for metric in [ MetricType.ACCURACY_AT_THRESHOLD_05, MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD, MetricType.AREA_UNDER_PR_CURVE, MetricType.AREA_UNDER_ROC_CURVE, MetricType.CROSS_ENTROPY, MetricType.LOSS, MetricType.SECONDS_PER_BATCH, MetricType.SECONDS_PER_EPOCH, MetricType.SUBJECT_COUNT ]: assert metric.value in model_training_result.train_results_per_epoch[0], \ f"{metric.value} not in training" assert metric.value in model_training_result.val_results_per_epoch[0], \ f"{metric.value} not in validation" actual_train_loss = model_training_result.get_metric( is_training=True, metric_type=MetricType.LOSS.value) actual_val_loss = model_training_result.get_metric( is_training=False, metric_type=MetricType.LOSS.value) actual_lr = model_training_result.get_metric( is_training=True, metric_type=MetricType.LEARNING_RATE.value) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6), "Training loss" assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6), "Validation loss" assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5), "Learning rates" test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) expected_metrics = [0.636085, 0.735952] assert test_results.metrics.values(class_name)[MetricType.CROSS_ENTROPY.value] == \ pytest.approx(expected_metrics, abs=1e-5) # Run detailed logs file check only on CPU, it will contain slightly different metrics on GPU, but here # we want to mostly assert that the files look reasonable if machine_has_gpu: return # Check epoch_metrics.csv epoch_metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / EPOCH_METRICS_FILE_NAME # Auto-format will break the long header line, hence the strange way of writing it! expected_epoch_metrics = \ f"{LoggingColumns.Loss.value},{LoggingColumns.CrossEntropy.value}," \ f"{LoggingColumns.AccuracyAtThreshold05.value},{LoggingColumns.LearningRate.value}," + \ f"{LoggingColumns.AreaUnderRocCurve.value}," \ f"{LoggingColumns.AreaUnderPRCurve.value}," \ f"{LoggingColumns.AccuracyAtOptimalThreshold.value}," \ f"{LoggingColumns.FalsePositiveRateAtOptimalThreshold.value}," \ f"{LoggingColumns.FalseNegativeRateAtOptimalThreshold.value}," \ f"{LoggingColumns.OptimalThreshold.value}," \ f"{LoggingColumns.SubjectCount.value},{LoggingColumns.Epoch.value}," \ f"{LoggingColumns.CrossValidationSplitIndex.value}\n" + \ """0.6866141557693481,0.6866141557693481,0.5,0.0001,1.0,1.0,0.5,0.0,0.0,0.529514,2.0,0,-1 0.6864652633666992,0.6864652633666992,0.5,9.999712322065557e-05,1.0,1.0,0.5,0.0,0.0,0.529475,2.0,1,-1 0.6863163113594055,0.6863162517547607,0.5,9.999306876841536e-05,1.0,1.0,0.5,0.0,0.0,0.529437,2.0,2,-1 0.6861673593521118,0.6861673593521118,0.5,9.998613801725043e-05,1.0,1.0,0.5,0.0,0.0,0.529399,2.0,3,-1 """ check_log_file(epoch_metrics_path, expected_epoch_metrics, ignore_columns=[]) # Check metrics.csv: This contains the per-subject per-epoch model outputs # Randomization comes out slightly different on Windows, hence only execute the test on Linux if common_util.is_windows(): return metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / SUBJECT_METRICS_FILE_NAME metrics_expected = \ f"""epoch,subject,prediction_target,model_output,label,data_split,cross_validation_split_index 0,S2,{class_name},0.529514,1,Train,-1 0,S4,{class_name},0.521659,0,Train,-1 1,S4,{class_name},0.521482,0,Train,-1 1,S2,{class_name},0.529475,1,Train,-1 2,S4,{class_name},0.521305,0,Train,-1 2,S2,{class_name},0.529437,1,Train,-1 3,S2,{class_name},0.529399,1,Train,-1 3,S4,{class_name},0.521128,0,Train,-1 """ check_log_file(metrics_path, metrics_expected, ignore_columns=[]) # Check log METRICS_FILE_NAME inside of the folder epoch_004/Train, which is written when we run model_test. # Normally, we would run it on the Test and Val splits, but for convenience we test on the train split here. inference_metrics_path = config.outputs_folder / get_epoch_results_path(ModelExecutionMode.TRAIN) / \ SUBJECT_METRICS_FILE_NAME inference_metrics_expected = \ f"""prediction_target,subject,model_output,label,cross_validation_split_index,data_split {class_name},S2,0.5293986201286316,1.0,-1,Train {class_name},S4,0.5211275815963745,0.0,-1,Train """ check_log_file(inference_metrics_path, inference_metrics_expected, ignore_columns=[])