コード例 #1
0
def test_rnn_classifier_via_config_1(use_combined_model: bool,
                                     imaging_feature_type: ImagingFeatureType,
                                     combine_hidden_state: bool,
                                     use_encoder_layer_norm: bool,
                                     use_mean_teacher_model: bool,
                                     test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if we can build a simple RNN model that only feeds off non-image features.
    This just tests the mechanics of training, but not if the model learned.
    """
    logging_to_stdout()
    config = ToySequenceModel(use_combined_model,
                              imaging_feature_type=imaging_feature_type,
                              combine_hidden_states=combine_hidden_state,
                              use_encoder_layer_norm=use_encoder_layer_norm,
                              use_mean_teacher_model=use_mean_teacher_model,
                              should_validate=False)
    config.use_mixed_precision = True
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset()
    # Patch the load_images function that will be called once we access a dataset item
    image_and_seg = ImageAndSegmentations[np.ndarray](images=np.random.uniform(0, 1, SCAN_SIZE),
                                                      segmentations=np.random.randint(0, 2, SCAN_SIZE))
    with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg):
        model_train_unittest(config, dirs=test_output_dirs)
コード例 #2
0
def test_run_model_with_invalid_trainer_arguments(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if the trainer_arguments in a LightningContainer are passed to the trainer.
    """
    container = DummyContainerWithInvalidTrainerArguments()
    with pytest.raises(Exception) as ex:
        model_train_unittest(config=None, output_folder=test_output_dirs, lightning_container=container)
    assert "no_such_argument" in str(ex)
コード例 #3
0
def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests,
                                       use_mixed_precision: bool) -> None:
    """
    Test training and testing of 2d classification models.
    """
    logging_to_stdout(logging.DEBUG)
    config = ClassificationModelForTesting2D()
    config.set_output_to(test_output_dirs.root_dir)

    # Train for 4 epochs, checkpoints at epochs 2 and 4
    config.num_epochs = 4
    config.use_mixed_precision = use_mixed_precision
    model_training_result, checkpoint_handler = model_train_unittest(
        config, dirs=test_output_dirs)
    assert model_training_result is not None
    expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05]

    expected_train_loss = [0.705931, 0.698664, 0.694489, 0.693151]
    expected_val_loss = [1.078517, 1.140510, 1.199026, 1.248595]

    actual_train_loss = model_training_result.get_metric(
        is_training=True, metric_type=MetricType.LOSS.value)
    actual_val_loss = model_training_result.get_metric(
        is_training=False, metric_type=MetricType.LOSS.value)
    actual_lr = model_training_result.get_metric(
        is_training=True, metric_type=MetricType.LEARNING_RATE.value)

    assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6)
    assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6)
    assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5)
    test_results = model_testing.model_test(
        config,
        ModelExecutionMode.TRAIN,
        checkpoint_handler=checkpoint_handler)
    assert isinstance(test_results, InferenceMetricsForClassification)
コード例 #4
0
def test_non_image_encoder(
        test_output_dirs: OutputFolderForTests,
        hidden_layer_num_feature_channels: Optional[int]) -> None:
    """
    Test if we can build a simple MLP model that only feeds off non-image features.
    """
    dataset_folder = Path(test_output_dirs.make_sub_dir("dataset"))
    dataset_contents = _get_fake_dataset_contents()
    (dataset_folder / DATASET_CSV_FILE_NAME).write_text(dataset_contents)
    config = NonImageEncoder(
        should_validate=False,
        hidden_layer_num_feature_channels=hidden_layer_num_feature_channels)
    config.local_dataset = dataset_folder
    config.set_output_to(test_output_dirs.root_dir)
    config.max_batch_grad_cam = 1
    config.validate()
    # run model training
    _, checkpoint_handler = model_train_unittest(
        config, output_folder=test_output_dirs)
    # run model inference
    runner = MLRunner(config)
    runner.setup()
    runner.model_inference_train_and_test(
        checkpoint_paths=checkpoint_handler.get_checkpoints_to_test())
    assert config.get_total_number_of_non_imaging_features() == 18
コード例 #5
0
def test_autosave_checkpoints(test_output_dirs: OutputFolderForTests,
                              num_epochs: int) -> None:
    """
    Tests that all autosave checkpoints are cleaned up after training.
    """
    # Lightning does not overwrite checkpoints in-place. Rather, it writes "autosave.ckpt",
    # then "autosave-1.ckpt" and deletes "autosave.ckpt", then "autosave.ckpt" and deletes "autosave-v1.ckpt"
    # All those checkpoints should be cleaned up after training, only the best checkpoint should remain.
    config = DummyClassification()
    config.autosave_every_n_val_epochs = 1
    config.set_output_to(test_output_dirs.root_dir)
    config.num_epochs = num_epochs
    model_train_unittest(config, output_folder=test_output_dirs)
    assert len(list(config.checkpoint_folder.glob("*.*"))) == 1
    assert (config.checkpoint_folder /
            LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).is_file()
コード例 #6
0
def test_rnn_classifier_via_config_2(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if we can build an RNN classifier that learns sequences, of the same kind as in
    test_rnn_classifier_toy_problem, but built via the config.
    """
    expected_max_train_loss = 0.71
    expected_max_val_loss = 0.71
    num_sequences = 100
    ml_util.set_random_seed(123)
    dataset_contents = "subject,index,feature,label\n"
    for subject in range(num_sequences):
        # Sequences have variable length
        sequence_length = np.random.choice([9, 10, 11, 12])
        # Each sequence is a series of 0 and 1
        inputs = np.random.choice([0, 1], size=(sequence_length,), p=[1. / 3, 2. / 3])
        label = np.sum(inputs) > (sequence_length // 2)
        for i, value in enumerate(inputs.tolist()):
            dataset_contents += f"S{subject},{i},{value},{label}\n"
    logging_to_stdout()
    config = ToySequenceModel2(should_validate=False)
    config.num_epochs = 2
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset(dataset_contents)
    results, _ = model_train_unittest(config, dirs=test_output_dirs)

    actual_train_loss = results.get_metric(is_training=True, metric_type=MetricType.LOSS.value)[-1]
    actual_val_loss = results.get_metric(is_training=False, metric_type=MetricType.LOSS.value)[-1]
    print(f"Training loss after {config.num_epochs} epochs: {actual_train_loss}")
    print(f"Validation loss after {config.num_epochs} epochs: {actual_val_loss}")
    assert actual_train_loss <= expected_max_train_loss, "Training loss too high"
    assert actual_val_loss <= expected_max_val_loss, "Validation loss too high"
コード例 #7
0
def test_recover_training_mean_teacher_model(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Tests that training can be recovered from a previous checkpoint.
    """
    config = DummyClassification()
    config.mean_teacher_alpha = 0.999
    config.autosave_every_n_val_epochs = 1
    config.set_output_to(test_output_dirs.root_dir / "original")
    os.makedirs(str(config.outputs_folder))

    original_checkpoint_folder = config.checkpoint_folder

    # First round of training
    config.num_epochs = 4
    model_train_unittest(config, output_folder=test_output_dirs)
    assert len(list(config.checkpoint_folder.glob("*.*"))) == 1
    assert (config.checkpoint_folder /
            LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).is_file()

    # Restart training from previous run
    config.num_epochs = 3
    config.set_output_to(test_output_dirs.root_dir / "recovered")
    os.makedirs(str(config.outputs_folder))
    # make if seem like run recovery objects have been downloaded
    checkpoint_root = config.checkpoint_folder / "old_run"
    shutil.copytree(str(original_checkpoint_folder), str(checkpoint_root))

    # Create a new checkpoint handler and set run_recovery to the copied checkpoints
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)
    checkpoint_handler.run_recovery = RunRecovery([checkpoint_root])

    model_train_unittest(config,
                         output_folder=test_output_dirs,
                         checkpoint_handler=checkpoint_handler)
    # remove recovery checkpoints
    shutil.rmtree(checkpoint_root)
    assert len(list(config.checkpoint_folder.glob("*.ckpt"))) == 1
コード例 #8
0
def test_recovery_e2e(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test restarting a training: Train a small model for 5 epochs, then continue training to epoch 10 from the results
    of the first training run.
    """
    model_config = DummyClassification()
    model_config.set_output_to(test_output_dirs.root_dir)
    num_epochs_1 = 5
    model_config.num_epochs = num_epochs_1
    storing_logger_1, checkpoint_handler = model_train_unittest(
        model_config, output_folder=test_output_dirs)
    # Logger should have results for epochs 0..4
    assert list(storing_logger_1.epochs) == list(range(num_epochs_1))
    # Now restart the job, train to epoch 10
    num_epochs_2 = 10
    model_config.num_epochs = num_epochs_2
    storing_logger_2, _ = model_train_unittest(
        model_config,
        output_folder=test_output_dirs,
        checkpoint_handler=checkpoint_handler)
    # Logger should have results only for epochs 5..9
    assert list(storing_logger_2.epochs) == list(
        range(num_epochs_1, num_epochs_2))
コード例 #9
0
def _test_model_train(output_dirs: OutputFolderForTests,
                      image_channels: Any,
                      ground_truth_ids: Any,
                      no_mask_channel: bool = False) -> None:
    def _check_patch_centers(diagnostics_per_epoch: List[np.ndarray],
                             should_equal: bool) -> None:
        patch_centers_epoch1 = diagnostics_per_epoch[0]
        assert len(
            diagnostics_per_epoch
        ) > 1, "Not enough data to check patch centers, need at least 2"
        for diagnostic in diagnostics_per_epoch[1:]:
            assert np.array_equal(patch_centers_epoch1,
                                  diagnostic) == should_equal

    def _check_voxel_count(results_per_epoch: List[Dict[str, float]],
                           expected_voxel_count_per_epoch: List[float],
                           prefix: str) -> None:
        assert len(results_per_epoch) == len(expected_voxel_count_per_epoch)
        for epoch, (results, voxel_count) in enumerate(
                zip(results_per_epoch, expected_voxel_count_per_epoch)):
            # In the test data, both structures "region" and "region_1" are read from the same nifti file, hence
            # their voxel counts must be identical.
            for structure in ["region", "region_1"]:
                assert results[f"{MetricType.VOXEL_COUNT.value}/{structure}"] == pytest.approx(voxel_count, abs=1e-2), \
                    f"{prefix} voxel count mismatch for '{structure}' epoch {epoch}"

    def _mean(a: List[float]) -> float:
        return sum(a) / len(a)

    def _mean_list(lists: List[List[float]]) -> List[float]:
        return list(map(_mean, lists))

    logging_to_stdout(log_level=logging.DEBUG)
    train_config = DummyModel()
    train_config.local_dataset = base_path
    train_config.set_output_to(output_dirs.root_dir)
    train_config.image_channels = image_channels
    train_config.ground_truth_ids = ground_truth_ids
    train_config.mask_id = None if no_mask_channel else train_config.mask_id
    train_config.random_seed = 42
    train_config.class_weights = [0.5, 0.25, 0.25]
    train_config.store_dataset_sample = no_mask_channel
    train_config.check_exclusive = False

    if machine_has_gpu:
        expected_train_losses = [0.4554231, 0.4550124]
        expected_val_losses = [0.4553894, 0.4553061]
    else:
        expected_train_losses = [0.4554231, 0.4550112]
        expected_val_losses = [0.4553893, 0.4553061]
    loss_absolute_tolerance = 1e-6
    expected_learning_rates = [train_config.l_rate, 5.3589e-4]

    model_training_result, _ = model_train_unittest(train_config,
                                                    output_folder=output_dirs)
    assert isinstance(model_training_result, StoringLogger)
    # Check that all metrics from the BatchTimeCallback are present
    # # TODO: re-enable once the BatchTimeCallback is fixed
    # for epoch, epoch_results in model_training_result.results_per_epoch.items():
    #     for prefix in [TRAIN_PREFIX, VALIDATION_PREFIX]:
    #         for metric_type in [BatchTimeCallback.EPOCH_TIME,
    #                             BatchTimeCallback.BATCH_TIME + " avg",
    #                             BatchTimeCallback.BATCH_TIME + " max",
    #                             BatchTimeCallback.EXCESS_LOADING_TIME]:
    #             expected = BatchTimeCallback.METRICS_PREFIX + prefix + metric_type
    #             assert expected in epoch_results, f"Expected {expected} in results for epoch {epoch}"
    #             # Excess loading time can be zero because that only measure batches over the threshold
    #             if metric_type != BatchTimeCallback.EXCESS_LOADING_TIME:
    #                 value = epoch_results[expected]
    #                 assert isinstance(value, float)
    #                 assert value > 0.0, f"Time for {expected} should be > 0"

    actual_train_losses = model_training_result.get_train_metric(
        MetricType.LOSS.value)
    actual_val_losses = model_training_result.get_val_metric(
        MetricType.LOSS.value)
    print("actual_train_losses = {}".format(actual_train_losses))
    print("actual_val_losses = {}".format(actual_val_losses))

    def assert_all_close(metric: str, expected: List[float],
                         **kwargs: Any) -> None:
        actual = model_training_result.get_train_metric(metric)
        assert np.allclose(
            actual, expected, **kwargs
        ), f"Mismatch for {metric}: Got {actual}, expected {expected}"

    # check to make sure training batches are NOT all the same across epochs
    _check_patch_centers(model_training_result.train_diagnostics,
                         should_equal=False)
    # check to make sure validation batches are all the same across epochs
    _check_patch_centers(model_training_result.val_diagnostics,
                         should_equal=True)
    assert_all_close(MetricType.SUBJECT_COUNT.value, [3.0, 3.0])
    assert_all_close(MetricType.LEARNING_RATE.value,
                     expected_learning_rates,
                     rtol=1e-6)

    if is_windows():
        # Randomization comes out slightly different on Windows. Skip the rest of the detailed checks.
        return

    # Simple regression test: Voxel counts should be the same in both epochs on the validation set,
    # and be the same across 'region' and 'region_1' because they derive from the same Nifti files.
    # The following values are read off directly from the results of compute_dice_across_patches in the training loop
    # This checks that averages are computed correctly, and that metric computers are reset after each epoch.
    train_voxels = [[82765.0, 83212.0, 82740.0], [82831.0, 82647.0, 83255.0]]
    val_voxels = [[82765.0, 83212.0], [82765.0, 83212.0]]
    _check_voxel_count(model_training_result.train_results_per_epoch(),
                       _mean_list(train_voxels), "Train")
    _check_voxel_count(model_training_result.val_results_per_epoch(),
                       _mean_list(val_voxels), "Val")

    assert np.allclose(actual_train_losses,
                       expected_train_losses,
                       atol=loss_absolute_tolerance), "Train losses"
    assert np.allclose(actual_val_losses,
                       expected_val_losses,
                       atol=loss_absolute_tolerance), "Val losses"
    # Check that the metric we track for Hyperdrive runs is actually written.
    assert TrackedMetrics.Val_Loss.value.startswith(VALIDATION_PREFIX)
    tracked_metric = TrackedMetrics.Val_Loss.value[len(VALIDATION_PREFIX):]
    for val_result in model_training_result.val_results_per_epoch():
        assert tracked_metric in val_result

    # The following values are read off directly from the results of compute_dice_across_patches in the
    # training loop. Results are slightly different for GPU, hence use a larger tolerance there.
    dice_tolerance = 1e-3 if machine_has_gpu else 4.5e-4
    train_dice_region = [[0.0, 0.0, 0.0], [0.0376, 0.0343, 0.1017]]
    train_dice_region1 = [[0.4845, 0.4814, 0.4829], [0.4822, 0.4747, 0.4426]]
    # There appears to be some amount of non-determinism here: When using a tolerance of 1e-4, we get occasional
    # test failures on Linux in the cloud (not on Windows, not on AzureML) Unclear where it comes from. Even when
    # failing here, the losses match up to the expected tolerance.
    assert_all_close("Dice/region",
                     _mean_list(train_dice_region),
                     atol=dice_tolerance)
    assert_all_close("Dice/region_1",
                     _mean_list(train_dice_region1),
                     atol=dice_tolerance)
    expected_average_dice = [
        _mean(train_dice_region[i] + train_dice_region1[i])  # type: ignore
        for i in range(len(train_dice_region))
    ]
    assert_all_close("Dice/AverageAcrossStructures",
                     expected_average_dice,
                     atol=dice_tolerance)

    # check output files/directories
    assert train_config.outputs_folder.is_dir()
    assert train_config.logs_folder.is_dir()

    # Tensorboard event files go into a Lightning subfolder (Pytorch Lightning default)
    assert (train_config.logs_folder / "Lightning").is_dir()
    assert len([(train_config.logs_folder / "Lightning").glob("events*")]) == 1

    assert train_config.num_epochs == 2
    # Checkpoint folder
    assert train_config.checkpoint_folder.is_dir()
    actual_checkpoints = list(train_config.checkpoint_folder.rglob("*.ckpt"))
    assert len(
        actual_checkpoints) == 1, f"Actual checkpoints: {actual_checkpoints}"
    assert (train_config.checkpoint_folder /
            LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).is_file()
    assert (train_config.outputs_folder / DATASET_CSV_FILE_NAME).is_file()
    assert (train_config.outputs_folder /
            STORED_CSV_FILE_NAMES[ModelExecutionMode.TRAIN]).is_file()
    assert (train_config.outputs_folder /
            STORED_CSV_FILE_NAMES[ModelExecutionMode.VAL]).is_file()

    # Path visualization: There should be 3 slices for each of the 2 subjects
    sampling_folder = train_config.outputs_folder / PATCH_SAMPLING_FOLDER
    assert sampling_folder.is_dir()
    assert train_config.show_patch_sampling > 0
    assert len(list(sampling_folder.rglob(
        "*.png"))) == 3 * train_config.show_patch_sampling

    # # Test for saving of example images
    assert train_config.example_images_folder.is_dir(
    ) if train_config.store_dataset_sample else True
    example_files = list(train_config.example_images_folder.rglob("*.*"))
    assert len(example_files) == (3 * 2 *
                                  2 if train_config.store_dataset_sample else 0
                                  )  # images x epochs x patients
コード例 #10
0
def test_recover_testing_from_run_recovery(
        mean_teacher_model: bool,
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Checks that inference results are the same whether from a checkpoint in the same run, from a run recovery or from a
    local_weights_path param.
    """
    # Train for 4 epochs
    config = DummyClassification()
    if mean_teacher_model:
        config.mean_teacher_alpha = 0.999
    config.set_output_to(test_output_dirs.root_dir / "original")
    os.makedirs(str(config.outputs_folder))

    train_results, checkpoint_handler = model_train_unittest(
        config, output_folder=test_output_dirs)
    assert len(train_results.train_results_per_epoch()) == config.num_epochs

    # Run inference on this
    test_results = model_test(
        config=config,
        data_split=ModelExecutionMode.TEST,
        checkpoint_paths=checkpoint_handler.get_checkpoints_to_test())
    assert isinstance(test_results, InferenceMetricsForClassification)

    # Mimic using a run recovery and see if it is the same
    config_run_recovery = DummyClassification()
    if mean_teacher_model:
        config_run_recovery.mean_teacher_alpha = 0.999
    config_run_recovery.set_output_to(test_output_dirs.root_dir /
                                      "run_recovery")
    os.makedirs(str(config_run_recovery.outputs_folder))

    checkpoint_handler_run_recovery = get_default_checkpoint_handler(
        model_config=config_run_recovery,
        project_root=test_output_dirs.root_dir)
    # make it seem like run recovery objects have been downloaded
    checkpoint_root = config_run_recovery.checkpoint_folder / "recovered"
    shutil.copytree(str(config.checkpoint_folder), str(checkpoint_root))
    checkpoint_handler_run_recovery.run_recovery = RunRecovery(
        [checkpoint_root])
    test_results_run_recovery = model_test(
        config_run_recovery,
        data_split=ModelExecutionMode.TEST,
        checkpoint_paths=checkpoint_handler_run_recovery.
        get_checkpoints_to_test())
    assert isinstance(test_results_run_recovery,
                      InferenceMetricsForClassification)
    assert test_results.metrics.values()[MetricType.CROSS_ENTROPY.value] == \
           test_results_run_recovery.metrics.values()[MetricType.CROSS_ENTROPY.value]

    # Run inference with the local checkpoints
    config_local_weights = DummyClassification()
    if mean_teacher_model:
        config_local_weights.mean_teacher_alpha = 0.999
    config_local_weights.set_output_to(test_output_dirs.root_dir /
                                       "local_weights_path")
    os.makedirs(str(config_local_weights.outputs_folder))

    local_weights_path = test_output_dirs.root_dir / "local_weights_file.pth"
    shutil.copyfile(
        str(config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX),
        local_weights_path)
    config_local_weights.local_weights_path = [local_weights_path]

    checkpoint_handler_local_weights = get_default_checkpoint_handler(
        model_config=config_local_weights,
        project_root=test_output_dirs.root_dir)
    checkpoint_handler_local_weights.download_recovery_checkpoints_or_weights()
    test_results_local_weights = model_test(
        config_local_weights,
        data_split=ModelExecutionMode.TEST,
        checkpoint_paths=checkpoint_handler_local_weights.
        get_checkpoints_to_test())
    assert isinstance(test_results_local_weights,
                      InferenceMetricsForClassification)
    assert test_results.metrics.values()[MetricType.CROSS_ENTROPY.value] == \
           test_results_local_weights.metrics.values()[MetricType.CROSS_ENTROPY.value]
コード例 #11
0
def test_image_encoder(
        test_output_dirs: OutputFolderForTests, encode_channels_jointly: bool,
        use_non_imaging_features: bool,
        kernel_size_per_encoding_block: Optional[Union[TupleInt3,
                                                       List[TupleInt3]]],
        stride_size_per_encoding_block: Optional[Union[TupleInt3,
                                                       List[TupleInt3]]],
        reduction_factor: float, expected_num_reduced_features: int,
        aggregation_type: AggregationType) -> None:
    """
    Test if the image encoder networks can be trained without errors (including GradCam computation and data
    augmentation).
    """
    logging_to_stdout()
    set_random_seed(0)
    dataset_folder = Path(test_output_dirs.make_sub_dir("dataset"))
    scan_size = (6, 64, 60)
    scan_files: List[str] = []
    for s in range(4):
        random_scan = np.random.uniform(0, 1, scan_size)
        scan_file_name = f"scan{s + 1}{NumpyFile.NUMPY.value}"
        np.save(str(dataset_folder / scan_file_name), random_scan)
        scan_files.append(scan_file_name)

    dataset_contents = """subject,channel,path,label,numerical1,numerical2,categorical1,categorical2
S1,week0,scan1.npy,,1,10,Male,Val1
S1,week1,scan2.npy,True,2,20,Female,Val2
S2,week0,scan3.npy,,3,30,Female,Val3
S2,week1,scan4.npy,False,4,40,Female,Val1
S3,week0,scan1.npy,,5,50,Male,Val2
S3,week1,scan3.npy,True,6,60,Male,Val2
"""
    (dataset_folder / "dataset.csv").write_text(dataset_contents)
    numerical_columns = ["numerical1", "numerical2"
                         ] if use_non_imaging_features else []
    categorical_columns = ["categorical1", "categorical2"
                           ] if use_non_imaging_features else []
    non_image_feature_channels = get_non_image_features_dict(default_channels=["week1", "week0"],
                                                             specific_channels={"categorical2": ["week1"]}) \
        if use_non_imaging_features else {}
    config_for_dataset = ScalarModelBase(
        local_dataset=dataset_folder,
        image_channels=["week0", "week1"],
        image_file_column="path",
        label_channels=["week1"],
        label_value_column="label",
        non_image_feature_channels=non_image_feature_channels,
        numerical_columns=numerical_columns,
        categorical_columns=categorical_columns,
        should_validate=False)
    config_for_dataset.read_dataset_into_dataframe_and_pre_process()

    dataset = ScalarDataset(
        config_for_dataset,
        sample_transform=ScalarItemAugmentation(
            ImageTransformationPipeline(
                [RandomAffine(10), ColorJitter(0.2)],
                use_different_transformation_per_channel=True)))
    assert len(dataset) == 3

    config = ImageEncoder(
        encode_channels_jointly=encode_channels_jointly,
        should_validate=False,
        numerical_columns=numerical_columns,
        categorical_columns=categorical_columns,
        non_image_feature_channels=non_image_feature_channels,
        categorical_feature_encoder=config_for_dataset.
        categorical_feature_encoder,
        encoder_dimensionality_reduction_factor=reduction_factor,
        aggregation_type=aggregation_type,
        scan_size=(6, 64, 60))

    if kernel_size_per_encoding_block:
        config.kernel_size_per_encoding_block = kernel_size_per_encoding_block
    if stride_size_per_encoding_block:
        config.stride_size_per_encoding_block = stride_size_per_encoding_block

    config.set_output_to(test_output_dirs.root_dir)
    config.max_batch_grad_cam = 1
    model = create_model_with_temperature_scaling(config)
    input_size: List[Tuple] = [(len(config.image_channels), *scan_size)]
    if use_non_imaging_features:
        input_size.append(
            (config.get_total_number_of_non_imaging_features(), ))

        # Original number output channels (unreduced) is
        # num initial channel * (num encoder block - 1) = 4 * (3-1) = 8
        if encode_channels_jointly:
            # reduced_num_channels + num_non_img_features
            assert model.final_num_feature_channels == expected_num_reduced_features + \
                   config.get_total_number_of_non_imaging_features()
        else:
            # num_img_channels * reduced_num_channels + num_non_img_features
            assert model.final_num_feature_channels == len(config.image_channels) * expected_num_reduced_features + \
                   config.get_total_number_of_non_imaging_features()

    summarizer = ModelSummary(model)
    summarizer.generate_summary(input_sizes=input_size)
    config.local_dataset = dataset_folder
    config.validate()
    model_train_unittest(config, dirs=test_output_dirs)
コード例 #12
0
def test_train_classification_model(
        class_name: str, test_output_dirs: OutputFolderForTests) -> None:
    """
    Test training and testing of classification models, asserting on the individual results from training and
    testing.
    Expected test results are stored for GPU with and without mixed precision.
    """
    logging_to_stdout(logging.DEBUG)
    config = ClassificationModelForTesting()
    config.class_names = config.target_names = [class_name]
    config.set_output_to(test_output_dirs.root_dir)
    # Train for 4 epochs, checkpoints at epochs 2 and 4
    config.num_epochs = 4
    model_training_result, checkpoint_handler = model_train_unittest(
        config, dirs=test_output_dirs)
    assert model_training_result is not None
    expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05]
    expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167]
    expected_val_loss = [0.737061, 0.736691, 0.736321, 0.735952]
    # Ensure that all metrics are computed on both training and validation set
    train_results_per_epoch = model_training_result.train_results_per_epoch()
    val_results_per_epoch = model_training_result.val_results_per_epoch()
    assert len(train_results_per_epoch) == config.num_epochs
    assert len(val_results_per_epoch) == config.num_epochs
    assert len(train_results_per_epoch[0]) >= 11
    assert len(val_results_per_epoch[0]) >= 11

    for metric in [
            MetricType.ACCURACY_AT_THRESHOLD_05,
            MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD,
            MetricType.AREA_UNDER_PR_CURVE, MetricType.AREA_UNDER_ROC_CURVE,
            MetricType.CROSS_ENTROPY, MetricType.LOSS,
            MetricType.SECONDS_PER_BATCH, MetricType.SECONDS_PER_EPOCH,
            MetricType.SUBJECT_COUNT
    ]:
        assert metric.value in train_results_per_epoch[
            0], f"{metric.value} not in training"
        assert metric.value in val_results_per_epoch[
            0], f"{metric.value} not in validation"

    actual_train_loss = model_training_result.get_metric(
        is_training=True, metric_type=MetricType.LOSS.value)
    actual_val_loss = model_training_result.get_metric(
        is_training=False, metric_type=MetricType.LOSS.value)
    actual_lr = model_training_result.get_metric(
        is_training=True, metric_type=MetricType.LEARNING_RATE.value)
    assert actual_train_loss == pytest.approx(expected_train_loss,
                                              abs=1e-6), "Training loss"
    assert actual_val_loss == pytest.approx(expected_val_loss,
                                            abs=1e-6), "Validation loss"
    assert actual_lr == pytest.approx(expected_learning_rates,
                                      rel=1e-5), "Learning rates"
    test_results = model_testing.model_test(
        config,
        ModelExecutionMode.TRAIN,
        checkpoint_handler=checkpoint_handler)
    assert isinstance(test_results, InferenceMetricsForClassification)
    expected_metrics = [0.636085, 0.735952]
    assert test_results.metrics.values(class_name)[MetricType.CROSS_ENTROPY.value] == \
           pytest.approx(expected_metrics, abs=1e-5)
    # Run detailed logs file check only on CPU, it will contain slightly different metrics on GPU, but here
    # we want to mostly assert that the files look reasonable
    if machine_has_gpu:
        return

    # Check epoch_metrics.csv
    epoch_metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / EPOCH_METRICS_FILE_NAME
    # Auto-format will break the long header line, hence the strange way of writing it!
    expected_epoch_metrics = \
        f"{LoggingColumns.Loss.value},{LoggingColumns.CrossEntropy.value}," \
        f"{LoggingColumns.AccuracyAtThreshold05.value},{LoggingColumns.LearningRate.value}," + \
        f"{LoggingColumns.AreaUnderRocCurve.value}," \
        f"{LoggingColumns.AreaUnderPRCurve.value}," \
        f"{LoggingColumns.AccuracyAtOptimalThreshold.value}," \
        f"{LoggingColumns.FalsePositiveRateAtOptimalThreshold.value}," \
        f"{LoggingColumns.FalseNegativeRateAtOptimalThreshold.value}," \
        f"{LoggingColumns.OptimalThreshold.value}," \
        f"{LoggingColumns.SubjectCount.value},{LoggingColumns.Epoch.value}," \
        f"{LoggingColumns.CrossValidationSplitIndex.value}\n" + \
        """0.6866141557693481,0.6866141557693481,0.5,0.0001,1.0,1.0,0.5,0.0,0.0,0.529514,2.0,0,-1	
        0.6864652633666992,0.6864652633666992,0.5,9.999712322065557e-05,1.0,1.0,0.5,0.0,0.0,0.529475,2.0,1,-1	
        0.6863163113594055,0.6863162517547607,0.5,9.999306876841536e-05,1.0,1.0,0.5,0.0,0.0,0.529437,2.0,2,-1	
        0.6861673593521118,0.6861673593521118,0.5,9.998613801725043e-05,1.0,1.0,0.5,0.0,0.0,0.529399,2.0,3,-1	
        """
    check_log_file(epoch_metrics_path,
                   expected_epoch_metrics,
                   ignore_columns=[])
    # Check metrics.csv: This contains the per-subject per-epoch model outputs
    # Randomization comes out slightly different on Windows, hence only execute the test on Linux
    if common_util.is_windows():
        return
    metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / SUBJECT_METRICS_FILE_NAME
    metrics_expected = \
        f"""epoch,subject,prediction_target,model_output,label,data_split,cross_validation_split_index
0,S2,{class_name},0.529514,1,Train,-1
0,S4,{class_name},0.521659,0,Train,-1
1,S4,{class_name},0.521482,0,Train,-1
1,S2,{class_name},0.529475,1,Train,-1
2,S4,{class_name},0.521305,0,Train,-1
2,S2,{class_name},0.529437,1,Train,-1
3,S2,{class_name},0.529399,1,Train,-1
3,S4,{class_name},0.521128,0,Train,-1
"""
    check_log_file(metrics_path, metrics_expected, ignore_columns=[])
    # Check log METRICS_FILE_NAME inside of the folder best_validation_epoch/Train, which is written when we run model_test.
    # Normally, we would run it on the Test and Val splits, but for convenience we test on the train split here.
    inference_metrics_path = config.outputs_folder / get_best_epoch_results_path(ModelExecutionMode.TRAIN) / \
                             SUBJECT_METRICS_FILE_NAME
    inference_metrics_expected = \
        f"""prediction_target,subject,model_output,label,epoch,cross_validation_split_index,data_split
{class_name},S2,0.5293986201286316,1.0,{BEST_EPOCH_FOLDER_NAME},-1,Train
{class_name},S4,0.5211275815963745,0.0,{BEST_EPOCH_FOLDER_NAME},-1,Train
"""
    check_log_file(inference_metrics_path,
                   inference_metrics_expected,
                   ignore_columns=[])

    inference_model_output_path = config.outputs_folder / get_best_epoch_results_path(ModelExecutionMode.TRAIN) / \
                                  model_testing.MODEL_OUTPUT_CSV
    inference_model_output_expected = \
        f"""subject,prediction_target,label,model_output,cross_validation_split_index
S2,{class_name},1.000000,0.529399,-1
S4,{class_name},0.000000,0.521128,-1"""
    check_log_file(inference_model_output_path,
                   inference_model_output_expected,
                   ignore_columns=[])
コード例 #13
0
def test_train_classification_multilabel_model(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test training and testing of classification models, asserting on the individual results from training and
    testing.
    Expected test results are stored for GPU with and without mixed precision.
    """
    logging_to_stdout(logging.DEBUG)
    config = DummyMulticlassClassification()
    config.set_output_to(test_output_dirs.root_dir)
    # Train for 4 epochs, checkpoints at epochs 2 and 4
    config.num_epochs = 4
    model_training_result, checkpoint_handler = model_train_unittest(
        config, dirs=test_output_dirs)
    assert model_training_result is not None
    expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05]
    expected_train_loss = [
        0.699870228767395, 0.6239662170410156, 0.551329493522644,
        0.4825132489204407
    ]
    expected_val_loss = [
        0.6299371719360352, 0.5546272993087769, 0.4843321740627289,
        0.41909298300743103
    ]
    # Ensure that all metrics are computed on both training and validation set
    train_results_per_epoch = model_training_result.train_results_per_epoch()
    val_results_per_epoch = model_training_result.val_results_per_epoch()
    assert len(train_results_per_epoch) == config.num_epochs
    assert len(val_results_per_epoch) == config.num_epochs
    assert len(train_results_per_epoch[0]) >= 11
    assert len(val_results_per_epoch[0]) >= 11
    for class_name in config.class_names:
        for metric in [
                MetricType.ACCURACY_AT_THRESHOLD_05,
                MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD,
                MetricType.AREA_UNDER_PR_CURVE,
                MetricType.AREA_UNDER_ROC_CURVE, MetricType.CROSS_ENTROPY
        ]:
            assert f'{metric.value}/{class_name}' in train_results_per_epoch[
                0], f"{metric.value} not in training"
            assert f'{metric.value}/{class_name}' in val_results_per_epoch[
                0], f"{metric.value} not in validation"
    for metric in [
            MetricType.LOSS, MetricType.SECONDS_PER_EPOCH,
            MetricType.SUBJECT_COUNT
    ]:
        assert metric.value in train_results_per_epoch[
            0], f"{metric.value} not in training"
        assert metric.value in val_results_per_epoch[
            0], f"{metric.value} not in validation"

    actual_train_loss = model_training_result.get_metric(
        is_training=True, metric_type=MetricType.LOSS.value)
    actual_val_loss = model_training_result.get_metric(
        is_training=False, metric_type=MetricType.LOSS.value)
    actual_lr = model_training_result.get_metric(
        is_training=True, metric_type=MetricType.LEARNING_RATE.value)
    assert actual_train_loss == pytest.approx(expected_train_loss,
                                              abs=1e-6), "Training loss"
    assert actual_val_loss == pytest.approx(expected_val_loss,
                                            abs=1e-6), "Validation loss"
    assert actual_lr == pytest.approx(expected_learning_rates,
                                      rel=1e-5), "Learning rates"
    test_results = model_testing.model_test(
        config,
        ModelExecutionMode.TRAIN,
        checkpoint_handler=checkpoint_handler)
    assert isinstance(test_results, InferenceMetricsForClassification)

    expected_metrics = {
        MetricType.CROSS_ENTROPY: [1.3996, 5.2966, 1.4020, 0.3553, 0.6908],
        MetricType.ACCURACY_AT_THRESHOLD_05:
        [0.0000, 0.0000, 0.0000, 1.0000, 1.0000]
    }

    for i, class_name in enumerate(config.class_names):
        for metric in expected_metrics.keys():
            assert expected_metrics[metric][i] == pytest.approx(
                test_results.metrics.get_single_metric(metric_name=metric,
                                                       hue=class_name), 1e-4)

    def get_epoch_path(mode: ModelExecutionMode) -> Path:
        p = get_best_epoch_results_path(mode=mode)
        return config.outputs_folder / p / SUBJECT_METRICS_FILE_NAME

    path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN)
    path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL)
    path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST)
    generate_classification_notebook(
        result_notebook=config.outputs_folder /
        get_ipynb_report_name(config.model_category.value),
        config=config,
        train_metrics=path_to_best_epoch_train,
        val_metrics=path_to_best_epoch_val,
        test_metrics=path_to_best_epoch_test)
    assert (config.outputs_folder /
            get_html_report_name(config.model_category.value)).exists()

    report_name_multilabel = f"{config.model_category.value}_multilabel"
    generate_classification_multilabel_notebook(
        result_notebook=config.outputs_folder /
        get_ipynb_report_name(report_name_multilabel),
        config=config,
        train_metrics=path_to_best_epoch_train,
        val_metrics=path_to_best_epoch_val,
        test_metrics=path_to_best_epoch_test)
    assert (config.outputs_folder /
            get_html_report_name(report_name_multilabel)).exists()