def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests, use_mixed_precision: bool) -> None: """ Test training and testing of 2d classification models. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting2D() config.set_output_to(test_output_dirs.root_dir) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 config.use_mixed_precision = use_mixed_precision model_training_result, checkpoint_handler = model_train_unittest( config, dirs=test_output_dirs) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.705931, 0.698664, 0.694489, 0.693151] expected_val_loss = [1.078517, 1.140510, 1.199026, 1.248595] actual_train_loss = model_training_result.get_metric( is_training=True, metric_type=MetricType.LOSS.value) actual_val_loss = model_training_result.get_metric( is_training=False, metric_type=MetricType.LOSS.value) actual_lr = model_training_result.get_metric( is_training=True, metric_type=MetricType.LEARNING_RATE.value) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6) assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6) assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5) test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification)
def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests, use_mixed_precision: bool) -> None: """ Test training and testing of 2d classification models. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting2D() config.set_output_to(test_output_dirs.root_dir) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 config.use_mixed_precision = use_mixed_precision config.save_start_epoch = 2 config.save_step_epochs = 2 config.test_start_epoch = 2 config.test_step_epochs = 2 config.test_diff_epochs = 2 expected_epochs = [2, 4] assert config.get_test_epochs() == expected_epochs checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=Path(test_output_dirs.root_dir)) model_training_result = model_training.model_train( config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.705931, 0.698664, 0.694489, 0.693151] expected_val_loss = [1.078517, 1.140510, 1.199026, 1.248595] def extract_loss(results: List[MetricsDict]) -> List[float]: return [d.values()[MetricType.LOSS.value][0] for d in results] actual_train_loss = extract_loss( model_training_result.train_results_per_epoch) actual_val_loss = extract_loss(model_training_result.val_results_per_epoch) actual_learning_rates = list( flatten(model_training_result.learning_rates_per_epoch)) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6) assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6) assert actual_learning_rates == pytest.approx(expected_learning_rates, rel=1e-5) test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) assert list(test_results.epochs.keys()) == expected_epochs
def test_train_classification_model(test_output_dirs: TestOutputDirectories, use_mixed_precision: bool) -> None: """ Test training and testing of classification models, asserting on the individual results from training and testing. Expected test results are stored for GPU with and without mixed precision. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting() config.set_output_to(test_output_dirs.root_dir) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 config.use_mixed_precision = use_mixed_precision config.save_start_epoch = 2 config.save_step_epochs = 2 config.test_start_epoch = 2 config.test_step_epochs = 2 config.test_diff_epochs = 2 expected_epochs = [2, 4] assert config.get_test_epochs() == expected_epochs model_training_result = model_training.model_train(config) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] use_mixed_precision_and_gpu = use_mixed_precision and machine_has_gpu if use_mixed_precision_and_gpu: expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167] expected_val_loss = [0.737039, 0.736721, 0.736339, 0.735957] else: expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167] expected_val_loss = [0.737061, 0.736690, 0.736321, 0.735952] def extract_loss(results: List[MetricsDict]) -> List[float]: return [d.values()[MetricType.LOSS.value][0] for d in results] actual_train_loss = extract_loss( model_training_result.train_results_per_epoch) actual_val_loss = extract_loss(model_training_result.val_results_per_epoch) actual_learning_rates = list( flatten(model_training_result.learning_rates_per_epoch)) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6) assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6) assert actual_learning_rates == pytest.approx(expected_learning_rates, rel=1e-5) test_results = model_testing.model_test(config, ModelExecutionMode.TRAIN) assert isinstance(test_results, InferenceMetricsForClassification) assert list(test_results.epochs.keys()) == expected_epochs if use_mixed_precision_and_gpu: expected_metrics = { 2: [0.635942, 0.736691], 4: [0.636085, 0.735952], } else: expected_metrics = { 2: [0.635941, 0.736690], 4: [0.636084, 0.735952], } for epoch in expected_epochs: assert test_results.epochs[epoch].values()[MetricType.CROSS_ENTROPY.value] == \ pytest.approx(expected_metrics[epoch], abs=1e-6) # Run detailed logs file check only on CPU, it will contain slightly different metrics on GPU, but here # we want to mostly assert that the files look reasonable if not machine_has_gpu: # Check log EPOCH_METRICS_FILE_NAME epoch_metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / EPOCH_METRICS_FILE_NAME # Auto-format will break the long header line, hence the strange way of writing it! expected_epoch_metrics = \ "loss,cross_entropy,accuracy_at_threshold_05,seconds_per_batch,seconds_per_epoch,learning_rate," + \ "area_under_roc_curve,area_under_pr_curve,accuracy_at_optimal_threshold," \ "false_positive_rate_at_optimal_threshold,false_negative_rate_at_optimal_threshold," \ "optimal_threshold,subject_count,epoch,cross_validation_split_index\n" + \ """0.6866141557693481,0.6866141557693481,0.5,0,0,0.0001,1.0,1.0,0.5,0.0,0.0,0.529514,2.0,1,-1 0.6864652633666992,0.6864652633666992,0.5,0,0,9.999712322065557e-05,1.0,1.0,0.5,0.0,0.0,0.529475,2.0,2,-1 0.6863163113594055,0.6863162517547607,0.5,0,0,9.999306876841536e-05,1.0,1.0,0.5,0.0,0.0,0.529437,2.0,3,-1 0.6861673593521118,0.6861673593521118,0.5,0,0,9.998613801725043e-05,1.0,1.0,0.5,0.0,0.0,0.529399,2.0,4,-1 """ check_log_file(epoch_metrics_path, expected_epoch_metrics, ignore_columns=[ LoggingColumns.SecondsPerBatch.value, LoggingColumns.SecondsPerEpoch.value ]) # Check log METRICS_FILE_NAME metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / METRICS_FILE_NAME metrics_expected = \ """prediction_target,epoch,subject,model_output,label,cross_validation_split_index,data_split Default,1,S4,0.5216594338417053,0.0,-1,Train Default,1,S2,0.5295137763023376,1.0,-1,Train Default,2,S4,0.5214819312095642,0.0,-1,Train Default,2,S2,0.5294750332832336,1.0,-1,Train Default,3,S4,0.5213046073913574,0.0,-1,Train Default,3,S2,0.5294366478919983,1.0,-1,Train Default,4,S4,0.5211275815963745,0.0,-1,Train Default,4,S2,0.5293986201286316,1.0,-1,Train """ check_log_file(metrics_path, metrics_expected, ignore_columns=[])
def test_train_classification_model( class_name: str, test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of classification models, asserting on the individual results from training and testing. Expected test results are stored for GPU with and without mixed precision. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting() config.class_names = [class_name] config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=Path(test_output_dirs.root_dir)) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 model_training_result = model_training.model_train( config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167] expected_val_loss = [0.737061, 0.736691, 0.736321, 0.735952] # Ensure that all metrics are computed on both training and validation set assert len( model_training_result.train_results_per_epoch) == config.num_epochs assert len( model_training_result.val_results_per_epoch) == config.num_epochs assert len(model_training_result.train_results_per_epoch[0]) >= 11 assert len(model_training_result.val_results_per_epoch[0]) >= 11 for metric in [ MetricType.ACCURACY_AT_THRESHOLD_05, MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD, MetricType.AREA_UNDER_PR_CURVE, MetricType.AREA_UNDER_ROC_CURVE, MetricType.CROSS_ENTROPY, MetricType.LOSS, MetricType.SECONDS_PER_BATCH, MetricType.SECONDS_PER_EPOCH, MetricType.SUBJECT_COUNT ]: assert metric.value in model_training_result.train_results_per_epoch[0], \ f"{metric.value} not in training" assert metric.value in model_training_result.val_results_per_epoch[0], \ f"{metric.value} not in validation" actual_train_loss = model_training_result.get_metric( is_training=True, metric_type=MetricType.LOSS.value) actual_val_loss = model_training_result.get_metric( is_training=False, metric_type=MetricType.LOSS.value) actual_lr = model_training_result.get_metric( is_training=True, metric_type=MetricType.LEARNING_RATE.value) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6), "Training loss" assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6), "Validation loss" assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5), "Learning rates" test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) expected_metrics = [0.636085, 0.735952] assert test_results.metrics.values(class_name)[MetricType.CROSS_ENTROPY.value] == \ pytest.approx(expected_metrics, abs=1e-5) # Run detailed logs file check only on CPU, it will contain slightly different metrics on GPU, but here # we want to mostly assert that the files look reasonable if machine_has_gpu: return # Check epoch_metrics.csv epoch_metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / EPOCH_METRICS_FILE_NAME # Auto-format will break the long header line, hence the strange way of writing it! expected_epoch_metrics = \ f"{LoggingColumns.Loss.value},{LoggingColumns.CrossEntropy.value}," \ f"{LoggingColumns.AccuracyAtThreshold05.value},{LoggingColumns.LearningRate.value}," + \ f"{LoggingColumns.AreaUnderRocCurve.value}," \ f"{LoggingColumns.AreaUnderPRCurve.value}," \ f"{LoggingColumns.AccuracyAtOptimalThreshold.value}," \ f"{LoggingColumns.FalsePositiveRateAtOptimalThreshold.value}," \ f"{LoggingColumns.FalseNegativeRateAtOptimalThreshold.value}," \ f"{LoggingColumns.OptimalThreshold.value}," \ f"{LoggingColumns.SubjectCount.value},{LoggingColumns.Epoch.value}," \ f"{LoggingColumns.CrossValidationSplitIndex.value}\n" + \ """0.6866141557693481,0.6866141557693481,0.5,0.0001,1.0,1.0,0.5,0.0,0.0,0.529514,2.0,0,-1 0.6864652633666992,0.6864652633666992,0.5,9.999712322065557e-05,1.0,1.0,0.5,0.0,0.0,0.529475,2.0,1,-1 0.6863163113594055,0.6863162517547607,0.5,9.999306876841536e-05,1.0,1.0,0.5,0.0,0.0,0.529437,2.0,2,-1 0.6861673593521118,0.6861673593521118,0.5,9.998613801725043e-05,1.0,1.0,0.5,0.0,0.0,0.529399,2.0,3,-1 """ check_log_file(epoch_metrics_path, expected_epoch_metrics, ignore_columns=[]) # Check metrics.csv: This contains the per-subject per-epoch model outputs # Randomization comes out slightly different on Windows, hence only execute the test on Linux if common_util.is_windows(): return metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / SUBJECT_METRICS_FILE_NAME metrics_expected = \ f"""epoch,subject,prediction_target,model_output,label,data_split,cross_validation_split_index 0,S2,{class_name},0.529514,1,Train,-1 0,S4,{class_name},0.521659,0,Train,-1 1,S4,{class_name},0.521482,0,Train,-1 1,S2,{class_name},0.529475,1,Train,-1 2,S4,{class_name},0.521305,0,Train,-1 2,S2,{class_name},0.529437,1,Train,-1 3,S2,{class_name},0.529399,1,Train,-1 3,S4,{class_name},0.521128,0,Train,-1 """ check_log_file(metrics_path, metrics_expected, ignore_columns=[]) # Check log METRICS_FILE_NAME inside of the folder epoch_004/Train, which is written when we run model_test. # Normally, we would run it on the Test and Val splits, but for convenience we test on the train split here. inference_metrics_path = config.outputs_folder / get_epoch_results_path(ModelExecutionMode.TRAIN) / \ SUBJECT_METRICS_FILE_NAME inference_metrics_expected = \ f"""prediction_target,subject,model_output,label,cross_validation_split_index,data_split {class_name},S2,0.5293986201286316,1.0,-1,Train {class_name},S4,0.5211275815963745,0.0,-1,Train """ check_log_file(inference_metrics_path, inference_metrics_expected, ignore_columns=[])
def test_train_classification_multilabel_model( test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of classification models, asserting on the individual results from training and testing. Expected test results are stored for GPU with and without mixed precision. """ logging_to_stdout(logging.DEBUG) config = DummyMulticlassClassification() config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=Path(test_output_dirs.root_dir)) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 model_training_result = model_training.model_train( config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [ 0.699870228767395, 0.6239662170410156, 0.551329493522644, 0.4825132489204407 ] expected_val_loss = [ 0.6299371719360352, 0.5546272993087769, 0.4843321740627289, 0.41909298300743103 ] # Ensure that all metrics are computed on both training and validation set assert len( model_training_result.train_results_per_epoch) == config.num_epochs assert len( model_training_result.val_results_per_epoch) == config.num_epochs assert len(model_training_result.train_results_per_epoch[0]) >= 11 assert len(model_training_result.val_results_per_epoch[0]) >= 11 for class_name in config.class_names: for metric in [ MetricType.ACCURACY_AT_THRESHOLD_05, MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD, MetricType.AREA_UNDER_PR_CURVE, MetricType.AREA_UNDER_ROC_CURVE, MetricType.CROSS_ENTROPY ]: assert f'{metric.value}/{class_name}' in model_training_result.train_results_per_epoch[ 0], f"{metric.value} not in training" assert f'{metric.value}/{class_name}' in model_training_result.val_results_per_epoch[ 0], f"{metric.value} not in validation" for metric in [ MetricType.LOSS, MetricType.SECONDS_PER_EPOCH, MetricType.SUBJECT_COUNT ]: assert metric.value in model_training_result.train_results_per_epoch[ 0], f"{metric.value} not in training" assert metric.value in model_training_result.val_results_per_epoch[ 0], f"{metric.value} not in validation" actual_train_loss = model_training_result.get_metric( is_training=True, metric_type=MetricType.LOSS.value) actual_val_loss = model_training_result.get_metric( is_training=False, metric_type=MetricType.LOSS.value) actual_lr = model_training_result.get_metric( is_training=True, metric_type=MetricType.LEARNING_RATE.value) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6), "Training loss" assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6), "Validation loss" assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5), "Learning rates" test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) expected_metrics = { MetricType.CROSS_ENTROPY: [1.3996, 5.2966, 1.4020, 0.3553, 0.6908], MetricType.ACCURACY_AT_THRESHOLD_05: [0.0000, 0.0000, 0.0000, 1.0000, 1.0000] } for i, class_name in enumerate(config.class_names): for metric in expected_metrics.keys(): assert expected_metrics[metric][i] == pytest.approx( test_results.metrics.get_single_metric(metric_name=metric, hue=class_name), 1e-4) def get_epoch_path(mode: ModelExecutionMode) -> Path: p = get_epoch_results_path(mode=mode) return config.outputs_folder / p / SUBJECT_METRICS_FILE_NAME path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN) path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL) path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST) generate_classification_notebook( result_notebook=config.outputs_folder / get_ipynb_report_name(config.model_category.value), config=config, train_metrics=path_to_best_epoch_train, val_metrics=path_to_best_epoch_val, test_metrics=path_to_best_epoch_test) assert (config.outputs_folder / get_html_report_name(config.model_category.value)).exists() report_name_multilabel = f"{config.model_category.value}_multilabel" generate_classification_multilabel_notebook( result_notebook=config.outputs_folder / get_ipynb_report_name(report_name_multilabel), config=config, train_metrics=path_to_best_epoch_train, val_metrics=path_to_best_epoch_val, test_metrics=path_to_best_epoch_test) assert (config.outputs_folder / get_html_report_name(report_name_multilabel)).exists()
def run_model_test( data_split: ModelExecutionMode) -> Optional[InferenceMetrics]: return model_test(config, data_split=data_split, checkpoint_handler=checkpoint_handler, model_proc=model_proc)
def test_recover_testing_from_run_recovery( mean_teacher_model: bool, test_output_dirs: OutputFolderForTests) -> None: """ Checks that inference results are the same whether from a checkpoint in the same run, from a run recovery or from a local_weights_path param. """ # Train for 4 epochs config = DummyClassification() if mean_teacher_model: config.mean_teacher_alpha = 0.999 config.set_output_to(test_output_dirs.root_dir / "original") os.makedirs(str(config.outputs_folder)) config.save_start_epoch = 2 config.save_step_epochs = 2 checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) train_results = model_train(config, checkpoint_handler=checkpoint_handler) assert len(train_results.learning_rates_per_epoch) == config.num_epochs # Run inference on this test_results = model_test(config=config, data_split=ModelExecutionMode.TEST, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) assert list(test_results.epochs.keys()) == [config.num_epochs] # Mimic using a run recovery and see if it is the same config_run_recovery = DummyClassification() if mean_teacher_model: config_run_recovery.mean_teacher_alpha = 0.999 config_run_recovery.set_output_to(test_output_dirs.root_dir / "run_recovery") os.makedirs(str(config_run_recovery.outputs_folder)) checkpoint_handler_run_recovery = get_default_checkpoint_handler( model_config=config_run_recovery, project_root=test_output_dirs.root_dir) # make it seem like run recovery objects have been downloaded checkpoint_root = config_run_recovery.checkpoint_folder / "recovered" shutil.copytree(str(config.checkpoint_folder), str(checkpoint_root)) checkpoint_handler_run_recovery.run_recovery = RunRecovery( [checkpoint_root]) test_results_run_recovery = model_test( config_run_recovery, data_split=ModelExecutionMode.TEST, checkpoint_handler=checkpoint_handler_run_recovery) assert isinstance(test_results_run_recovery, InferenceMetricsForClassification) assert list(test_results_run_recovery.epochs.keys()) == [config.num_epochs] assert test_results.epochs[config.num_epochs].values()[MetricType.CROSS_ENTROPY.value] == \ test_results_run_recovery.epochs[config.num_epochs].values()[MetricType.CROSS_ENTROPY.value] # Run inference with the local checkpoints config_local_weights = DummyClassification() if mean_teacher_model: config_local_weights.mean_teacher_alpha = 0.999 config_local_weights.set_output_to(test_output_dirs.root_dir / "local_weights_path") os.makedirs(str(config_local_weights.outputs_folder)) local_weights_path = test_output_dirs.root_dir / "local_weights_file.pth" shutil.copyfile( str( create_checkpoint_path(config.checkpoint_folder, epoch=config.num_epochs)), local_weights_path) config_local_weights.local_weights_path = local_weights_path checkpoint_handler_local_weights = get_default_checkpoint_handler( model_config=config_local_weights, project_root=test_output_dirs.root_dir) checkpoint_handler_local_weights.discover_and_download_checkpoints_from_previous_runs( ) test_results_local_weights = model_test( config_local_weights, data_split=ModelExecutionMode.TEST, checkpoint_handler=checkpoint_handler_local_weights) assert isinstance(test_results_local_weights, InferenceMetricsForClassification) assert list(test_results_local_weights.epochs.keys()) == [0] assert test_results.epochs[config.num_epochs].values()[MetricType.CROSS_ENTROPY.value] == \ test_results_local_weights.epochs[0].values()[MetricType.CROSS_ENTROPY.value]
def test_train_classification_model( test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of classification models, asserting on the individual results from training and testing. Expected test results are stored for GPU with and without mixed precision. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting() config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=Path(test_output_dirs.root_dir)) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 model_training_result = model_training.model_train( config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167] expected_val_loss = [0.737061, 0.736691, 0.736321, 0.735952] # Ensure that all metrics are computed on both training and validation set assert len( model_training_result.train_results_per_epoch) == config.num_epochs assert len( model_training_result.val_results_per_epoch) == config.num_epochs assert len(model_training_result.train_results_per_epoch[0]) >= 11 assert len(model_training_result.val_results_per_epoch[0]) >= 11 for metric in [ MetricType.ACCURACY_AT_THRESHOLD_05, MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD, MetricType.AREA_UNDER_PR_CURVE, MetricType.AREA_UNDER_ROC_CURVE, MetricType.CROSS_ENTROPY, MetricType.LOSS, # For unknown reasons, we don't get seconds_per_batch for the training data. # MetricType.SECONDS_PER_BATCH, MetricType.SECONDS_PER_EPOCH, MetricType.SUBJECT_COUNT, ]: assert metric.value in model_training_result.train_results_per_epoch[ 0], f"{metric.value} not in training" assert metric.value in model_training_result.val_results_per_epoch[ 0], f"{metric.value} not in validation" actual_train_loss = model_training_result.get_metric( is_training=True, metric_type=MetricType.LOSS.value) actual_val_loss = model_training_result.get_metric( is_training=False, metric_type=MetricType.LOSS.value) actual_lr = model_training_result.get_metric( is_training=True, metric_type=MetricType.LEARNING_RATE.value) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6), "Training loss" assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6), "Validation loss" assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5), "Learning rates" test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) expected_metrics = [0.636085, 0.735952] assert test_results.metrics.values()[MetricType.CROSS_ENTROPY.value] == \ pytest.approx(expected_metrics, abs=1e-5) # Run detailed logs file check only on CPU, it will contain slightly different metrics on GPU, but here # we want to mostly assert that the files look reasonable if machine_has_gpu: return # Check epoch_metrics.csv epoch_metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / EPOCH_METRICS_FILE_NAME # Auto-format will break the long header line, hence the strange way of writing it! expected_epoch_metrics = \ "loss,cross_entropy,accuracy_at_threshold_05,seconds_per_epoch,learning_rate," + \ "area_under_roc_curve,area_under_pr_curve,accuracy_at_optimal_threshold," \ "false_positive_rate_at_optimal_threshold,false_negative_rate_at_optimal_threshold," \ "optimal_threshold,subject_count,epoch,cross_validation_split_index\n" + \ """0.6866141557693481,0.6866141557693481,0.5,0,0.0001,1.0,1.0,0.5,0.0,0.0,0.529514,2.0,0,-1 0.6864652633666992,0.6864652633666992,0.5,0,9.999712322065557e-05,1.0,1.0,0.5,0.0,0.0,0.529475,2.0,1,-1 0.6863163113594055,0.6863162517547607,0.5,0,9.999306876841536e-05,1.0,1.0,0.5,0.0,0.0,0.529437,2.0,2,-1 0.6861673593521118,0.6861673593521118,0.5,0,9.998613801725043e-05,1.0,1.0,0.5,0.0,0.0,0.529399,2.0,3,-1 """ # We cannot compare columns like "seconds_per_epoch" because timing will obviously vary between machines. # Column must still be present, though. check_log_file(epoch_metrics_path, expected_epoch_metrics, ignore_columns=[LoggingColumns.SecondsPerEpoch.value]) # Check metrics.csv: This contains the per-subject per-epoch model outputs # Randomization comes out slightly different on Windows, hence only execute the test on Linux if common_util.is_windows(): return metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / SUBJECT_METRICS_FILE_NAME metrics_expected = \ """prediction_target,epoch,subject,model_output,label,cross_validation_split_index,data_split Default,0,S2,0.5295137763023376,1.0,-1,Train Default,0,S4,0.5216594338417053,0.0,-1,Train Default,1,S4,0.5214819312095642,0.0,-1,Train Default,1,S2,0.5294750332832336,1.0,-1,Train Default,2,S2,0.5294366478919983,1.0,-1,Train Default,2,S4,0.5213046073913574,0.0,-1,Train Default,3,S2,0.5293986201286316,1.0,-1,Train Default,3,S4,0.5211275815963745,0.0,-1,Train """ check_log_file(metrics_path, metrics_expected, ignore_columns=[]) # Check log METRICS_FILE_NAME inside of the folder epoch_004/Train, which is written when we run model_test. # Normally, we would run it on the Test and Val splits, but for convenience we test on the train split here. inference_metrics_path = config.outputs_folder / get_epoch_results_path(ModelExecutionMode.TRAIN) / \ SUBJECT_METRICS_FILE_NAME inference_metrics_expected = \ """prediction_target,subject,model_output,label,cross_validation_split_index,data_split Default,S2,0.5293986201286316,1.0,-1,Train Default,S4,0.5211275815963745,0.0,-1,Train """ check_log_file(inference_metrics_path, inference_metrics_expected, ignore_columns=[])
def test_recover_testing_from_run_recovery( mean_teacher_model: bool, test_output_dirs: OutputFolderForTests) -> None: """ Checks that inference results are the same whether from a checkpoint in the same run, from a run recovery or from a local_weights_path param. """ # Train for 4 epochs config = DummyClassification() if mean_teacher_model: config.mean_teacher_alpha = 0.999 config.set_output_to(test_output_dirs.root_dir / "original") os.makedirs(str(config.outputs_folder)) train_results, checkpoint_handler = model_train_unittest( config, output_folder=test_output_dirs) assert len(train_results.train_results_per_epoch()) == config.num_epochs # Run inference on this test_results = model_test( config=config, data_split=ModelExecutionMode.TEST, checkpoint_paths=checkpoint_handler.get_checkpoints_to_test()) assert isinstance(test_results, InferenceMetricsForClassification) # Mimic using a run recovery and see if it is the same config_run_recovery = DummyClassification() if mean_teacher_model: config_run_recovery.mean_teacher_alpha = 0.999 config_run_recovery.set_output_to(test_output_dirs.root_dir / "run_recovery") os.makedirs(str(config_run_recovery.outputs_folder)) checkpoint_handler_run_recovery = get_default_checkpoint_handler( model_config=config_run_recovery, project_root=test_output_dirs.root_dir) # make it seem like run recovery objects have been downloaded checkpoint_root = config_run_recovery.checkpoint_folder / "recovered" shutil.copytree(str(config.checkpoint_folder), str(checkpoint_root)) checkpoint_handler_run_recovery.run_recovery = RunRecovery( [checkpoint_root]) test_results_run_recovery = model_test( config_run_recovery, data_split=ModelExecutionMode.TEST, checkpoint_paths=checkpoint_handler_run_recovery. get_checkpoints_to_test()) assert isinstance(test_results_run_recovery, InferenceMetricsForClassification) assert test_results.metrics.values()[MetricType.CROSS_ENTROPY.value] == \ test_results_run_recovery.metrics.values()[MetricType.CROSS_ENTROPY.value] # Run inference with the local checkpoints config_local_weights = DummyClassification() if mean_teacher_model: config_local_weights.mean_teacher_alpha = 0.999 config_local_weights.set_output_to(test_output_dirs.root_dir / "local_weights_path") os.makedirs(str(config_local_weights.outputs_folder)) local_weights_path = test_output_dirs.root_dir / "local_weights_file.pth" shutil.copyfile( str(config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX), local_weights_path) config_local_weights.local_weights_path = [local_weights_path] checkpoint_handler_local_weights = get_default_checkpoint_handler( model_config=config_local_weights, project_root=test_output_dirs.root_dir) checkpoint_handler_local_weights.download_recovery_checkpoints_or_weights() test_results_local_weights = model_test( config_local_weights, data_split=ModelExecutionMode.TEST, checkpoint_paths=checkpoint_handler_local_weights. get_checkpoints_to_test()) assert isinstance(test_results_local_weights, InferenceMetricsForClassification) assert test_results.metrics.values()[MetricType.CROSS_ENTROPY.value] == \ test_results_local_weights.metrics.values()[MetricType.CROSS_ENTROPY.value]
def run_model_test( data_split: ModelExecutionMode) -> Optional[InferenceMetrics]: return model_test(config, data_split=data_split, run_recovery=run_recovery, model_proc=model_proc)