예제 #1
0
def test_unet2d_encode(num_patches: int,
                       num_channels: int,
                       num_output_channels: int,
                       is_downsampling: bool,
                       image_shape: TupleInt2) -> None:
    """
    Test if the Encode block of a Unet3D correctly works when passing in kernels that only operate in X and Y.
    """
    set_random_seed(1234)
    layer = UNet3D.UNetEncodeBlock((num_channels, num_output_channels),
                                   kernel_size=(1, 3, 3),
                                   downsampling_stride=(1, 2, 2) if is_downsampling else 1)
    input_shape = (num_patches, num_channels) + (1,) + image_shape
    input = torch.rand(*input_shape).float()
    output = layer(input)

    def output_image_size(input_image_size: int) -> int:
        # If max pooling is added, it is done with a kernel size of 2, shrinking the image by a factor of 2
        image_shrink_factor = 2 if is_downsampling else 1
        return input_image_size // image_shrink_factor

    # Expected output shape:
    # The first dimension (patches) should be retained unchanged.
    # We should get as many output channels as requested
    # Unet is defined as running over degenerate 3D images with Z=1, this should be preserved.
    # The two trailing dimensions are the adjusted image dimensions
    expected_output_shape = (num_patches, num_output_channels, 1,
                             output_image_size(image_shape[0]), output_image_size(image_shape[1]))
    assert output.shape == expected_output_shape
예제 #2
0
def test_rnn_classifier_via_config_2(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if we can build an RNN classifier that learns sequences, of the same kind as in
    test_rnn_classifier_toy_problem, but built via the config.
    """
    expected_max_train_loss = 0.71
    expected_max_val_loss = 0.71
    num_sequences = 100
    ml_util.set_random_seed(123)
    dataset_contents = "subject,index,feature,label\n"
    for subject in range(num_sequences):
        # Sequences have variable length
        sequence_length = np.random.choice([9, 10, 11, 12])
        # Each sequence is a series of 0 and 1
        inputs = np.random.choice([0, 1], size=(sequence_length,), p=[1. / 3, 2. / 3])
        label = np.sum(inputs) > (sequence_length // 2)
        for i, value in enumerate(inputs.tolist()):
            dataset_contents += f"S{subject},{i},{value},{label}\n"
    logging_to_stdout()
    config = ToySequenceModel2(should_validate=False)
    config.num_epochs = 2
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset(dataset_contents)
    results, _ = model_train_unittest(config, dirs=test_output_dirs)

    actual_train_loss = results.get_metric(is_training=True, metric_type=MetricType.LOSS.value)[-1]
    actual_val_loss = results.get_metric(is_training=False, metric_type=MetricType.LOSS.value)[-1]
    print(f"Training loss after {config.num_epochs} epochs: {actual_train_loss}")
    print(f"Validation loss after {config.num_epochs} epochs: {actual_val_loss}")
    assert actual_train_loss <= expected_max_train_loss, "Training loss too high"
    assert actual_val_loss <= expected_max_val_loss, "Validation loss too high"
    def test_epoch(checkpoint_paths: List[Path]) -> Optional[MetricsDict]:
        pipeline = create_inference_pipeline(config=config,
                                             checkpoint_paths=checkpoint_paths)

        if pipeline is None:
            return None

        # for mypy
        assert isinstance(pipeline, ScalarInferencePipelineBase)

        ml_util.set_random_seed(config.get_effective_random_seed(), "Model Testing")
        ds = config.get_torch_dataset_for_inference(data_split).as_data_loader(
            shuffle=False,
            batch_size=1,
            num_dataload_workers=0
        )

        logging.info(f"Starting to evaluate model on {data_split.value} set.")
        metrics_dict = create_metrics_dict_for_scalar_models(config)
        for sample in ds:
            result = pipeline.predict(sample)
            model_output = result.posteriors
            label = result.labels.to(device=model_output.device)
            sample_id = result.subject_ids[0]
            compute_scalar_metrics(metrics_dict,
                                   subject_ids=[sample_id],
                                   model_output=model_output,
                                   labels=label,
                                   loss_type=config.loss_type)
            logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}")

        average = metrics_dict.average(across_hues=False)
        logging.info(average.to_string())

        return metrics_dict
예제 #4
0
def test_rnn_classifier_via_config_2(test_output_dirs: TestOutputDirectories) -> None:
    """
    Test if we can build an RNN classifier that learns sequences, of the same kind as in
    test_rnn_classifier_toy_problem, but built via the config.
    """
    expected_max_train_loss = 0.71
    expected_max_val_loss = 0.71
    num_sequences = 100
    ml_util.set_random_seed(123)
    dataset_contents = "subject,index,feature,label\n"
    for subject in range(num_sequences):
        # Sequences have variable length
        sequence_length = np.random.choice([9, 10, 11, 12])
        # Each sequence is a series of 0 and 1
        inputs = np.random.choice([0, 1], size=(sequence_length,), p=[1. / 3, 2. / 3])
        label = np.sum(inputs) > (sequence_length // 2)
        for i, value in enumerate(inputs.tolist()):
            dataset_contents += f"S{subject},{i},{value},{label}\n"
    logging_to_stdout()
    config = ToySequenceModel2(should_validate=False)
    config.num_epochs = 2
    config.set_output_to(test_output_dirs.root_dir)
    config.dataset_data_frame = _get_mock_sequence_dataset(dataset_contents)
    results = model_train(config)

    actual_train_loss = results.train_results_per_epoch[-1].values()[MetricType.LOSS.value][0]
    actual_val_loss = results.val_results_per_epoch[-1].values()[MetricType.LOSS.value][0]
    print(f"Training loss after {config.num_epochs} epochs: {actual_train_loss}")
    print(f"Validation loss after {config.num_epochs} epochs: {actual_val_loss}")
    assert actual_train_loss <= expected_max_train_loss, "Training loss too high"
    assert actual_val_loss <= expected_max_val_loss, "Validation loss too high"
    assert len(results.optimal_temperature_scale_values_per_checkpoint_epoch) \
           == config.get_total_number_of_save_epochs()
    assert np.allclose(results.optimal_temperature_scale_values_per_checkpoint_epoch, [0.97], rtol=0.1)
예제 #5
0
    def test_epoch(
            test_epoch: int,
            run_recovery: Optional[RunRecovery]) -> Optional[MetricsDict]:
        pipeline = create_inference_pipeline(config, test_epoch, run_recovery)

        if pipeline is None:
            return None

        # for mypy
        assert isinstance(pipeline, ScalarInferencePipelineBase)

        ml_util.set_random_seed(config.get_effective_random_seed(),
                                "Model Testing")
        ds = config.get_torch_dataset_for_inference(data_split).as_data_loader(
            shuffle=False, batch_size=1, num_dataload_workers=0)

        logging.info(
            f"Starting to evaluate model from epoch {test_epoch} on {data_split.value} set."
        )
        metrics_dict = create_metrics_dict_from_config(config)
        for sample in ds:
            result = pipeline.predict(sample)
            # Since batch size is 1, we only have 1 item in each of the fields in result
            sample_id, label_gpu, model_output = result.subject_ids[
                0], result.labels, result.model_outputs

            compute_scalar_metrics(metrics_dict, [sample_id], model_output,
                                   label_gpu, config.loss_type)
            logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}")

        average = metrics_dict.average(across_hues=False)
        logging.info(average.to_string())

        return metrics_dict
예제 #6
0
def test_unet2d_decode(num_patches: int,
                       image_shape: TupleInt3) -> None:
    """
    Test if the Decode block of a UNet3D creates tensors of the expected size when the kernels only operate in
    X and Y.
    """
    set_random_seed(1234)
    num_input_channels = image_shape[0]
    num_output_channels = num_input_channels // 2
    upsample_layer = UNet2D.UNetDecodeBlock((num_input_channels, num_output_channels),
                                            upsample_kernel_size=(1, 4, 4),
                                            upsampling_stride=(1, 2, 2))
    encode_layer = UNet2D.UNetEncodeBlockSynthesis(channels=(num_output_channels, num_output_channels),
                                                   kernel_size=(1, 3, 3))

    dim_z = 1
    input_shape = (num_patches, num_input_channels, dim_z, image_shape[1], image_shape[2])
    input_tensor = torch.rand(*input_shape).float()
    skip_connection = torch.zeros((num_patches, num_output_channels, dim_z, image_shape[1] * 2, image_shape[2] * 2))
    output = encode_layer(upsample_layer(input_tensor), skip_connection)

    def output_image_size(i: int) -> int:
        return image_shape[i] * 2

    # Expected output shape:
    # The first dimension (patches) should be retained unchanged.
    # We should get as many output channels as requested
    # Unet is defined as running over degenerate 3D images with Z=1, this should be preserved.
    # The two trailing dimensions are the adjusted image dimensions
    expected_output_shape = (num_patches, num_output_channels, dim_z, output_image_size(1), output_image_size(2))
    assert output.shape == expected_output_shape
def test_set_temperature() -> None:
    """
    Test to make sure a temperature scale parameter that optimizes calibration of the logits is learnt
    """
    ml_util.set_random_seed(0)
    ece_loss_fn = ECELoss(activation=torch.nn.functional.sigmoid)
    loss_fn = BCEWithLogitsLoss()
    model: ModelWithTemperature = ModelWithTemperature(
        model=IdentityModel(),
        temperature_scaling_config=TemperatureScalingConfig(lr=0.1,
                                                            max_iter=10))
    # Temperature should not be learnt during model training
    assert model.temperature.requires_grad is False

    def criterion_fn(
            _logits: torch.Tensor,
            _labels: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        return loss_fn(_logits, _labels), ece_loss_fn(_logits, _labels)

    labels = torch.rand(size=(5, 1))
    logits = torch.ones_like(labels)

    before_loss, before_ece = criterion_fn(logits, labels)
    optimal_temperature = model.set_temperature(logits,
                                                labels,
                                                criterion_fn,
                                                use_gpu=False)
    after_loss, after_ece = criterion_fn(model(logits), labels)
    assert after_loss.item() < before_loss.item()
    assert after_ece.item() < before_ece.item()
    assert np.isclose(optimal_temperature, 1.44, rtol=0.1)
    assert model.temperature.requires_grad is False
def set_random_seed_for_dataloader_worker(worker_id: int) -> None:
    """
    Set the seed for the random number generators of python, numpy.
    """
    # Set the seeds for numpy and python random based on the offset of the worker_id and initial seed,
    # converting the initial_seed which is a long to modulo int32 which is what numpy expects.
    random_seed = (torch.initial_seed() + worker_id) % (2**32)
    ml_util.set_random_seed(random_seed, f"Data loader worker ({worker_id})")
예제 #9
0
def test_cropped_sample(use_mask: bool) -> None:
    ml_util.set_random_seed(1)
    image_size = [4] * 3
    crop_size = (2, 2, 2)
    center_size = (1, 1, 1)

    # create small image sample for random cropping
    image = np.random.uniform(size=[1] + image_size)
    labels = np.zeros(shape=[2] + image_size)
    # Two foreground points in the corners at (0, 0, 0) and (3, 3, 3)
    labels[0] = 1
    labels[0, 0, 0, 0] = 0
    labels[0, 3, 3, 3] = 0
    labels[1, 0, 0, 0] = 1
    labels[1, 3, 3, 3] = 1
    crop_slicer: Optional[slice]
    if use_mask:
        # If mask is used, the cropping center point should be inside the mask.
        # Create a mask that has exactly 1 point of overlap with the labels,
        # that point must then be the center
        mask = np.zeros(shape=image_size, dtype=ImageDataType.MASK.value)
        mask[3, 3, 3] = 1
        expected_center: Optional[List[int]] = [3, 3, 3]
        crop_slicer = slice(2, 4)
    else:
        mask = np.ones(shape=image_size, dtype=ImageDataType.MASK.value)
        expected_center = None
        crop_slicer = None

    sample = Sample(image=image,
                    labels=labels,
                    mask=mask,
                    metadata=DummyPatientMetadata)

    for _ in range(0, 100):
        cropped_sample = CroppingDataset.create_random_cropped_sample(
            sample=sample,
            crop_size=crop_size,
            center_size=center_size,
            class_weights=[0, 1])

        if expected_center is not None:
            assert list(cropped_sample.center_indices
                        ) == expected_center  # type: ignore
            assert np.array_equal(
                cropped_sample.image, sample.image[:, crop_slicer, crop_slicer,
                                                   crop_slicer])
            assert np.array_equal(
                cropped_sample.labels, sample.labels[:, crop_slicer,
                                                     crop_slicer, crop_slicer])
            assert np.array_equal(
                cropped_sample.mask, sample.mask[crop_slicer, crop_slicer,
                                                 crop_slicer])
        else:
            # The crop center point must be any point that has a positive foreground label
            center = cropped_sample.center_indices
            print("Center point chosen: {}".format(center))
            assert labels[1, center[0], center[1], center[2]] != 0
예제 #10
0
def create_valid_image() -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    ml_util.set_random_seed(1)
    valid_image_4d = np.random.uniform(size=((5, ) + image_size)) * 10
    valid_mask = np.random.randint(2, size=image_size)
    class_assignments = np.random.randint(2, size=image_size)
    valid_labels = np.zeros((number_of_classes, ) + image_size)
    for c in range(number_of_classes):
        valid_labels[c, class_assignments == c] = 1
    return valid_image_4d, valid_labels, valid_mask
def test_mean_teacher_model() -> None:
    """
    Test training and weight updates of the mean teacher model computation.
    """
    def _get_parameters_of_model(
            model: Union[torch.nn.Module, DataParallelModel]) -> Any:
        """
        Returns the iterator of model parameters
        """
        if isinstance(model, DataParallelModel):
            return model.module.parameters()
        else:
            return model.parameters()

    config = DummyClassification()
    config.num_epochs = 1
    # Set train batch size to be arbitrary big to ensure we have only one training step
    # i.e. one mean teacher update.
    config.train_batch_size = 100
    # Train without mean teacher
    model_train(config)

    # Retrieve the weight after one epoch
    model = create_model_with_temperature_scaling(config)
    print(config.get_path_to_checkpoint(1))
    _ = model_util.load_checkpoint(model, config.get_path_to_checkpoint(1))
    model_weight = next(_get_parameters_of_model(model))

    # Get the starting weight of the mean teacher model
    ml_util.set_random_seed(config.get_effective_random_seed())
    _ = create_model_with_temperature_scaling(config)
    mean_teach_model = create_model_with_temperature_scaling(config)
    initial_weight_mean_teacher_model = next(
        _get_parameters_of_model(mean_teach_model))

    # Now train with mean teacher and check the update of the weight
    alpha = 0.999
    config.mean_teacher_alpha = alpha
    model_train(config)

    # Retrieve weight of mean teacher model saved in the checkpoint
    mean_teacher_model = create_model_with_temperature_scaling(config)
    _ = model_util.load_checkpoint(
        mean_teacher_model,
        config.get_path_to_checkpoint(1, for_mean_teacher_model=True))
    result_weight = next(_get_parameters_of_model(mean_teacher_model))
    # Retrieve the associated student weight
    _ = model_util.load_checkpoint(model, config.get_path_to_checkpoint(1))
    student_model_weight = next(_get_parameters_of_model(model))

    # Assert that the student weight corresponds to the weight of a simple training without mean teacher
    # computation
    assert student_model_weight.allclose(model_weight)

    # Check the update of the parameters
    assert torch.all(alpha * initial_weight_mean_teacher_model +
                     (1 - alpha) * student_model_weight == result_weight)
예제 #12
0
def test_valid_class_weights(class_weights: List[float]) -> None:
    """
    Produce a large number of crops and make sure the crop center class proportions respect class weights
    """
    ml_util.set_random_seed(1)
    num_classes = len(valid_labels)
    image = np.zeros_like(valid_image_4d)
    labels = np.zeros_like(valid_labels)
    class0, class1, class2 = non_empty_classes = [0, 2, 4]
    labels[class0] = 1
    labels[class0][3, 3, 3] = 0
    labels[class0][3, 2, 3] = 0
    labels[class1][3, 3, 3] = 1
    labels[class2][3, 2, 3] = 1

    mask = np.ones_like(valid_mask)
    sample = Sample(image=image,
                    labels=labels,
                    mask=mask,
                    metadata=DummyPatientMetadata)

    crop_size = (1, 1, 1)
    total_crops = 200
    sampled_label_center_distribution = np.zeros(num_classes)

    # If there is no class that has a non-zero weight and is present in the sample, there is no valid
    # way to select a class, so we expect an exception to be thrown.
    if class_weights is not None and sum(class_weights[c]
                                         for c in non_empty_classes) == 0:
        with pytest.raises(ValueError):
            augmentation.random_crop(sample, crop_size, class_weights)
        return

    for _ in range(0, total_crops):
        crop_sample, center, _ = augmentation.random_crop(
            sample, crop_size, class_weights)
        sampled_class = list(labels[:, center[0], center[1],
                                    center[2]]).index(1)
        sampled_label_center_distribution[sampled_class] += 1

    sampled_label_center_distribution /= total_crops

    if class_weights is None:
        weight = 1.0 / len(non_empty_classes)
        expected_label_center_distribution = [
            weight if c in non_empty_classes else 0.0
            for c in range(number_of_classes)
        ]
    else:
        total = sum(class_weights[c] for c in non_empty_classes)
        expected_label_center_distribution = [
            class_weights[c] / total if c in non_empty_classes else 0.0
            for c in range(number_of_classes)
        ]
    assert np.allclose(sampled_label_center_distribution,
                       expected_label_center_distribution,
                       atol=0.1)
예제 #13
0
def test_standardize_features() -> None:
    """
    Test if the non-image feature can be normalized to mean 0, std 1.
    :return:
    """
    set_random_seed(1234)
    expected_mean = torch.tensor([[123, 2, 3], [4, 5, 6]])
    expected_std = torch.tensor([[0, 2, 3], [3, 4, 4]])
    feature_size = (2, 3)
    sequences: List[ClassificationItemSequence] = []
    for s in range(1000):
        items = []
        seq_length = torch.randint(low=3, high=6, size=(1, )).item()
        for i in range(seq_length):  # type: ignore
            # All features are random Gaussian, apart from feature 0 which is constant.
            # Normalization must be able to deal with constant features when dividing by standard deviation.
            features = torch.randn(size=feature_size, dtype=torch.float32
                                   ) * expected_std + expected_mean
            # Randomly put some infinite values in the vector
            features[s % 2, s %
                     3] = np.inf if torch.rand(1) > 0.9 else features[s % 2,
                                                                      s % 3]
            features[0, 0] = expected_mean[0, 0]
            item = ScalarItem(metadata=GeneralSampleMetadata(id="foo"),
                              numerical_non_image_features=features,
                              categorical_non_image_features=features,
                              label=torch.tensor([]),
                              images=torch.tensor([]),
                              segmentations=torch.tensor([]))
            items.append(item)
        sequences.append(ClassificationItemSequence(id="foo", items=items))
    mean_std = FeatureStatistics.from_data_sources(sequences)
    assert mean_std.mean.shape == feature_size
    assert mean_std.std.shape == feature_size

    assert_tensors_equal(mean_std.mean, expected_mean, 0.07)
    assert_tensors_equal(mean_std.std, expected_std, 0.07)

    # After normalization, mean should be 0, and std should be 1.
    standardized_seq = mean_std.standardize(sequences)
    mean_std_from_standardized = FeatureStatistics.from_data_sources(
        standardized_seq)
    # After normalization, the mean should be 0, apart from the constant feature, which should be left untouched,
    # hence its mean is the original feature value.
    expected_mean_from_standardized = torch.zeros(feature_size)
    expected_mean_from_standardized[0, 0] = expected_mean[0, 0]
    expected_std_from_standardized = torch.ones(feature_size)
    expected_std_from_standardized[0, 0] = 0.0
    assert_tensors_equal(mean_std_from_standardized.mean,
                         expected_mean_from_standardized,
                         abs=1e-5)
    assert_tensors_equal(mean_std_from_standardized.std,
                         expected_std_from_standardized,
                         abs=1e-5)
 def on_validation_epoch_start(self) -> None:
     """
     Stores the state of all random number generators, and resets them all to a fixed seed. This is done to ensure
     that any randomization when loading validation data is consistent during training. In particular, this ensures
     that drawing random patches for segmentation model training is giving a validation set that does not fluctuate.
     """
     # Store the random number generator state, so that the next training epoch starts from here.
     self.random_state = RandomStateSnapshot.snapshot_random_state()
     # reset the random state for validation, so that we get consistent behaviour when drawing random patches
     # when validating segmentation models.
     seed = self.effective_random_seed
     set_random_seed(seed, "Validation")
def test_cropping_dataset_has_reproducible_randomness(cropping_dataset: CroppingDataset,
                                                      num_dataload_workers: int) -> None:
    cropping_dataset.dataset_indices = [1, 2] * 2
    expected_center_indices = None
    for k in range(3):
        ml_util.set_random_seed(1)
        loader = cropping_dataset.as_data_loader(shuffle=True, batch_size=4,
                                                 num_dataload_workers=num_dataload_workers)
        for i, item in enumerate(loader):
            item = CroppedSample.from_dict(sample=item)
            if expected_center_indices is None:
                expected_center_indices = item.center_indices
            else:
                assert np.array_equal(expected_center_indices, item.center_indices)
예제 #16
0
def main(args: CheckPatchSamplingConfig) -> None:
    # Identify paths to inputs and outputs
    commandline_args = {
        "train_batch_size": 1,
        "local_dataset": Path(args.local_dataset)
    }
    output_folder = Path(args.output_folder)
    output_folder.mkdir(parents=True, exist_ok=True)

    # Create a config file
    config = ModelConfigLoader[SegmentationModelBase](
    ).create_model_config_from_name(args.model_name,
                                    overrides=commandline_args)
    config.show_patch_sampling = args.number_samples
    ml_util.set_random_seed(config.random_seed)
    visualize_random_crops_for_dataset(config, output_folder=output_folder)
 def on_validation_epoch_start(self) -> None:
     """
     Stores the state of all random number generators, and resets them all to a fixed seed. This is done to ensure
     that any randomization when loading validation data is consistent during training. In particular, this ensures
     that drawing random patches for segmentation model training is giving a validation set that does not fluctuate.
     """
     self.val_timers.reset()
     # In Lightning, the validation epoch is running "inside" the training. If we get here, it means that training
     # is done for this epoch, even though the on_training_epoch hook has not yet been called.
     self.train_timers.epoch_end()
     # Store the random number generator state, so that the next training epoch starts from here.
     self.random_state = RandomStateSnapshot.snapshot_random_state()
     # reset the random state for validation, so that we get consistent behaviour when drawing random patches
     # when validating segmentation models.
     seed = self.effective_random_seed
     set_random_seed(seed, "Validation")
def test_visualize_patch_sampling_2d(
        test_output_dirs: TestOutputDirectories) -> None:
    """
    Tests if patch sampling works for 2D images.
    :param test_output_dirs:
    """
    set_random_seed(0)
    shape = (1, 20, 30)
    foreground_classes = ["fg"]
    class_weights = equally_weighted_classes(foreground_classes)
    config = SegmentationModelBase(should_validate=False,
                                   crop_size=(1, 5, 10),
                                   class_weights=class_weights)
    image = np.random.rand(1, *shape).astype(np.float32) * 1000
    mask = np.ones(shape)
    labels = np.zeros((len(class_weights), ) + shape)
    labels[1, 0, 8:12, 5:25] = 1
    labels[0] = 1 - labels[1]
    output_folder = Path(test_output_dirs.root_dir)
    image_header = None
    sample = Sample(image=image,
                    mask=mask,
                    labels=labels,
                    metadata=PatientMetadata(patient_id='123',
                                             image_header=image_header))
    heatmap = visualize_random_crops(sample,
                                     config,
                                     output_folder=output_folder)
    expected_folder = full_ml_test_data_path("patch_sampling")
    expected_heatmap = expected_folder / "sampling_2d.npy"
    # To update the stored results, uncomment this line:
    # np.save(str(expected_heatmap), heatmap)
    assert np.allclose(heatmap, np.load(
        str(expected_heatmap))), "Patch sampling created a different heatmap."
    assert len(list(output_folder.rglob("*.nii.gz"))) == 0
    assert len(list(output_folder.rglob("*.png"))) == 1
    actual_file = output_folder / "123_sampled_patches.png"
    assert_file_exists(actual_file)
    expected = expected_folder / "sampling_2d.png"
    # To update the stored results, uncomment this line:
    # expected.write_bytes(actual_file.read_bytes())
    if not is_running_on_azure():
        # When running on the Azure build agents, it appears that the bounding box of the images
        # is slightly different than on local runs, even with equal dpi settings.
        # It says: Image sizes don't match: actual (685, 469), expected (618, 424)
        # Not able to figure out how to make the run results consistent, hence disable in cloud runs.
        assert_binary_files_match(actual_file, expected)
예제 #19
0
def test_image_encoder_with_segmentation(
        test_output_dirs: OutputFolderForTests, encode_channels_jointly: bool,
        aggregation_type: AggregationType,
        imaging_feature_type: ImagingFeatureType) -> None:
    """
    Test if the image encoder networks can be trained on segmentations from HDF5.
    """
    logging_to_stdout()
    set_random_seed(0)
    scan_size = (6, 64, 60)
    dataset_contents = """subject,channel,path,label
    S1,week0,scan1.h5,
    S1,week1,scan2.h5,True
    S2,week0,scan3.h5,
    S2,week1,scan4.h5,False
    S3,week0,scan5.h5,
    S3,week1,scan6.h5,True
    S4,week0,scan7.h5,
    S4,week1,scan8.h5,True
    """
    config = ImageEncoder(encode_channels_jointly=encode_channels_jointly,
                          imaging_feature_type=imaging_feature_type,
                          should_validate=False,
                          aggregation_type=aggregation_type,
                          scan_size=scan_size)
    # This fails with 16bit precision, saying "torch.nn.functional.binary_cross_entropy and torch.nn.BCELoss are
    # unsafe to autocast. Many models use a sigmoid layer right before the binary cross entropy layer. In this case,
    # combine the two layers using torch.nn.functional.binary_cross_entropy_with_logits or
    # torch.nn.BCEWithLogitsLoss.  binary_cross_entropy_with_logits and BCEWithLogits are safe to autocast."
    config.use_mixed_precision = False
    config.set_output_to(test_output_dirs.root_dir)
    config.num_epochs = 1
    config.local_dataset = Path()
    config.dataset_data_frame = pd.read_csv(StringIO(dataset_contents),
                                            sep=",",
                                            dtype=str)
    # Patch the load_images function that will be called once we access a dataset item
    image_and_seg = ImageAndSegmentations[np.ndarray](
        images=np.zeros(scan_size, dtype=np.float32),
        segmentations=np.ones(scan_size, dtype=np.uint8))
    with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats',
                    return_value=image_and_seg):
        azure_config = get_default_azure_config()
        azure_config.train = True
        MLRunner(config, azure_config).run()
예제 #20
0
def test_basic_layer_forward_and_backward_pass() -> None:
    set_random_seed(1234)
    layer = BasicLayer(channels=(input_channels, output_channels),
                       kernel_size=5,
                       padding=PaddingMode.NoPadding,
                       dilation=1)

    output_tensor = layer(input_tensor)
    criterion = MSELoss()
    loss = torch.sqrt(criterion(output_tensor, label_tensor))
    loss.backward()

    # Verify that output tensor has no negative values after the ReLU operation
    assert np.all(output_tensor.detach().numpy() >= 0.0)

    # Verify the loss value (assertion value is computed without the in-place operation)
    # The loss value is verified for both relu_in_place=True and relu_in_place=False cases.
    assert loss.item() == pytest.approx(0.6774, abs=1e-04)
def test_plot_overlay(test_output_dirs: OutputFolderForTests,
                      dimension: int) -> None:
    set_random_seed(0)
    shape = (10, 30, 30)
    image = np.random.rand(*shape).astype(np.float32) * 1000
    mask = np.zeros(shape).flatten()
    for i in range(len(mask)):
        mask[i] = i
    mask = mask.reshape(shape)
    plt.figure()
    scan_with_transparent_overlay(image, mask, dimension, shape[dimension] // 2, spacing=(1.0, 1.0, 1.0))
    file = Path(test_output_dirs.root_dir) / "plot.png"
    resize_and_save(5, 5, file)
    assert file.exists()
    expected = full_ml_test_data_path("patch_sampling") / f"overlay_{dimension}.png"
    # To update the stored results, uncomment this line:
    # expected.write_bytes(file.read_bytes())
    assert_binary_files_match(file, expected)
예제 #22
0
def test_image_encoder_with_segmentation(
        test_output_dirs: OutputFolderForTests, encode_channels_jointly: bool,
        aggregation_type: AggregationType,
        imaging_feature_type: ImagingFeatureType) -> None:
    """
    Test if the image encoder networks can be trained on segmentations from HDF5.
    """
    logging_to_stdout()
    set_random_seed(0)
    scan_size = (6, 64, 60)
    dataset_contents = """subject,channel,path,label
    S1,week0,scan1.h5,
    S1,week1,scan2.h5,True
    S2,week0,scan3.h5,
    S2,week1,scan4.h5,False
    S3,week0,scan5.h5,
    S3,week1,scan6.h5,True
    S4,week0,scan7.h5,
    S4,week1,scan8.h5,True
    """
    config = ImageEncoder(encode_channels_jointly=encode_channels_jointly,
                          imaging_feature_type=imaging_feature_type,
                          should_validate=False,
                          aggregation_type=aggregation_type,
                          scan_size=scan_size)
    config.use_mixed_precision = True
    config.set_output_to(test_output_dirs.root_dir)
    config.num_epochs = 1
    config.local_dataset = Path()
    config.dataset_data_frame = pd.read_csv(StringIO(dataset_contents),
                                            sep=",",
                                            dtype=str)
    # Patch the load_images function that will be called once we access a dataset item
    image_and_seg = ImageAndSegmentations[np.ndarray](
        images=np.zeros(scan_size, dtype=np.float32),
        segmentations=np.ones(scan_size, dtype=np.uint8))
    with mock.patch("InnerEye.ML.run_ml.is_offline_run_context",
                    return_value=True):
        with mock.patch(
                'InnerEye.ML.utils.io_util.load_image_in_known_formats',
                return_value=image_and_seg):
            azure_config = get_default_azure_config()
            azure_config.train = True
            MLRunner(config, azure_config=azure_config).run()
예제 #23
0
def test_dataparallel_criterion(use_mixed_precision: bool) -> None:
    set_random_seed(1)
    num_batches = torch.cuda.device_count()
    array_shape = [num_batches, 2, 8, 4, 8]
    segmentation = torch.rand(array_shape).cuda()
    ground_truth = torch.rand(array_shape).cuda()
    if use_mixed_precision:
        segmentation = segmentation.to(dtype=torch.float16)
    target_loss_values = torch.zeros(num_batches).cuda()

    # Sequential computation with multi-hardware parallelisation
    for ii in range(num_batches):
        loss_fn = SoftDiceLoss()
        loss = loss_fn(output=segmentation[ii:ii + 1],
                       target=ground_truth[ii:ii + 1])
        target_loss_values[ii] = loss

    # Use parallel criterion
    parallel_loss_fn = DataParallelCriterion(
        loss_fn,
        device_ids=list(range(torch.cuda.device_count())),
        use_mixed_precision=use_mixed_precision)
    segmentation_as_parallel = [
        segmentation[ii:ii + 1].to("cuda:{}".format(ii))
        for ii in range(num_batches)
    ]
    computed_loss_values = \
        parallel_loss_fn(segmentation_as_parallel[0], ground_truth) if num_batches == 1 \
            else parallel_loss_fn(segmentation_as_parallel, ground_truth)
    assert isinstance(computed_loss_values, torch.Tensor)
    if num_batches == 1:
        target_loss_values = target_loss_values[0]
    diff = (target_loss_values - computed_loss_values).abs().sum().item()
    # Even with autocast turned on, the result tensor always comes back as float32, and we can't run any more
    # detailed asserts on it. Best thing to do is to check if there are signs of lower precision computation.
    if use_mixed_precision:
        assert diff > 2e-5
    else:
        assert diff < 1e-10
def test_random_state_snapshot() -> None:
    """
    Test get and reset all random states via RandomStateSnapshot classes.
    """
    def _get_random_ints_from_libs(
    ) -> Tuple[List[int], np.ndarray, torch.Tensor]:
        _python_random = [random.randint(0, 100) for _ in range(0, 20)]
        _numpy_random = np.random.randint(0, 100, 20)
        _torch_random = torch.randint(0, 100, (20, 1))
        return _python_random, _numpy_random, _torch_random

    # set the random state
    ml_util.set_random_seed(0)
    # take snapshot of the random state at it's original state
    random_state = RandomStateSnapshot.snapshot_random_state()
    # create random numbers using python, numpy, and torch
    original_python_random, original_numpy_random, original_torch_random = _get_random_ints_from_libs(
    )
    # re-set the random state
    ml_util.set_random_seed(0)

    # validate that the current random state is accurately captured
    assert random.getstate() == random_state.random_state
    for i, x in enumerate(np.random.get_state()):
        assert np.array_equal(x, random_state.numpy_random_state[i])
    assert torch.equal(torch.random.get_rng_state(),
                       random_state.torch_random_state)
    assert random_state.torch_cuda_random_state is None

    # change the random state
    ml_util.set_random_seed(10)
    # create random numbers using python, numpy, and torch
    new_python_random, new_numpy_random, new_torch_random = _get_random_ints_from_libs(
    )
    # check that a new state was used to create these random numbers
    assert not new_python_random == original_python_random
    assert not np.array_equal(new_numpy_random, original_numpy_random)
    assert not torch.equal(new_torch_random, original_torch_random)

    # restore the original random stage
    random_state.restore_random_state()
    # get restored random variables
    restored_python_random, restored_numpy_random, restored_torch_random = _get_random_ints_from_libs(
    )
    # check restored variables match the original
    assert restored_python_random == original_python_random
    assert np.array_equal(restored_numpy_random, original_numpy_random)
    assert torch.equal(restored_torch_random, original_torch_random)
예제 #25
0
def segmentation_model_test_epoch(
        config: SegmentationModelBase,
        data_split: ModelExecutionMode,
        test_epoch: int,
        results_folder: Path,
        epoch_and_split: str,
        run_recovery: Optional[RunRecovery] = None) -> Optional[List[float]]:
    """
    The main testing loop for a given epoch. It loads the model and datasets, then proceeds to test the model.
    Returns a list with an entry for each image in the dataset. The entry is the average Dice score,
    where the average is taken across all non-background structures in the image.
    :param test_epoch: The last trained epoch of the model.
    :param config: The arguments which specify all required information.
    :param data_split: Is the model evaluated on train, test, or validation set?
    :param results_folder: The folder where to store the results
    :param epoch_and_split: A string that should uniquely identify the epoch and the data split (train/val/test).
    :param run_recovery: Run recovery data if applicable.
    :raises TypeError: If the arguments are of the wrong type.
    :raises ValueError: When there are issues loading the model.
    :return A list with the mean dice score (across all structures apart from background) for each image.
    """
    ml_util.set_random_seed(config.get_effective_random_seed(),
                            "Model Training")
    results_folder = Path(results_folder)
    results_folder.mkdir(exist_ok=True)

    test_dataframe = config.get_dataset_splits()[data_split]
    test_csv_path = results_folder / STORED_CSV_FILE_NAMES[data_split]
    test_dataframe.to_csv(path_or_buf=test_csv_path, index=False)
    logging.info("Results directory: {}".format(results_folder))
    logging.info(
        f"Starting evaluation of model {config.model_name} on {epoch_and_split}"
    )

    # Write the dataset id and ground truth ids into the results folder
    store_run_information(results_folder, config.azure_dataset_id,
                          config.ground_truth_ids, config.image_channels)

    ds = config.get_torch_dataset_for_inference(data_split)

    inference_pipeline = create_inference_pipeline(config=config,
                                                   epoch=test_epoch,
                                                   run_recovery=run_recovery)

    if inference_pipeline is None:
        # This will happen if there is no checkpoint for the given epoch, in either the recovered run (if any) or
        # the current one.
        return None

    # for mypy
    assert isinstance(inference_pipeline, FullImageInferencePipelineBase)

    # Deploy the trained model on a set of images and store output arrays.
    for sample_index, sample in enumerate(ds, 1):
        logging.info(f"Predicting for image {sample_index} of {len(ds)}...")
        sample = Sample.from_dict(sample=sample)
        inference_result = inference_pipeline.predict_and_post_process_whole_image(
            image_channels=sample.image,
            mask=sample.mask,
            patient_id=sample.patient_id,
            voxel_spacing_mm=sample.metadata.image_header.spacing)
        store_inference_results(inference_result=inference_result,
                                config=config,
                                results_folder=results_folder,
                                image_header=sample.metadata.image_header)

    # Evaluate model generated segmentation maps.
    num_workers = min(cpu_count(), len(ds))
    with Pool(processes=num_workers) as pool:
        pool_outputs = pool.map(
            partial(evaluate_model_predictions,
                    config=config,
                    dataset=ds,
                    results_folder=results_folder), range(len(ds)))

    average_dice = list()
    metrics_writer = MetricsPerPatientWriter()
    for (patient_metadata, metrics_for_patient) in pool_outputs:
        # Add the Dice score for the foreground classes, stored in the default hue
        metrics.add_average_foreground_dice(metrics_for_patient)
        average_dice.append(
            metrics_for_patient.get_single_metric(MetricType.DICE))
        # Structure names does not include the background class (index 0)
        for structure_name in config.ground_truth_ids:
            dice_for_struct = metrics_for_patient.get_single_metric(
                MetricType.DICE, hue=structure_name)
            hd_for_struct = metrics_for_patient.get_single_metric(
                MetricType.HAUSDORFF_mm, hue=structure_name)
            md_for_struct = metrics_for_patient.get_single_metric(
                MetricType.MEAN_SURFACE_DIST_mm, hue=structure_name)
            metrics_writer.add(patient=str(patient_metadata.patient_id),
                               structure=structure_name,
                               dice=dice_for_struct,
                               hausdorff_distance_mm=hd_for_struct,
                               mean_distance_mm=md_for_struct)

    metrics_writer.to_csv(results_folder / METRICS_FILE_NAME)
    metrics_writer.save_aggregates_to_csv(results_folder /
                                          METRICS_AGGREGATES_FILE)
    if config.is_plotting_enabled:
        plt.figure()
        boxplot_per_structure(metrics_writer.to_data_frame(),
                              column_name=MetricsFileColumns.DiceNumeric.value,
                              title=f"Dice score for {epoch_and_split}")
        # The box plot file will be written to the output directory. AzureML will pick that up, and display
        # on the run overview page, without having to log to the run context.
        plotting.resize_and_save(5, 4, results_folder / BOXPLOT_FILE)
        plt.close()
    logging.info(
        f"Finished evaluation of model {config.model_name} on {epoch_and_split}"
    )

    return average_dice
def test_dataloader_speed(test_output_dirs: OutputFolderForTests,
                          num_dataload_workers: int, shuffle: bool) -> None:
    """
    Test how dataloaders work when using multiple processes.
    """
    ml_util.set_random_seed(0)
    # The dataset should only contain the file name stem, without extension.
    csv_string = StringIO("""subject,channel,path,value,scalar1
S1,image,4be9beed-5861-fdd2-72c2-8dd89aadc1ef
S1,label,,True,1.0
S2,image,6ceacaf8-abd2-ffec-2ade-d52afd6dd1be
S2,label,,True,2.0
S3,image,61bc9d73-9fbb-bd7d-c06b-eeffbafabcc4
S3,label,,False,3.0
S4,image,61bc9d73-9fbb-bd7d-c06b-eeffbafabcc4
S4,label,,False,3.0
""")
    args = ScalarModelBase(image_channels=[],
                           label_channels=["label"],
                           label_value_column="value",
                           non_image_feature_channels=["label"],
                           numerical_columns=["scalar1"],
                           num_dataload_workers=num_dataload_workers,
                           num_dataset_reader_workers=num_dataload_workers,
                           avoid_process_spawn_in_data_loaders=True,
                           should_validate=False)
    dataset = ScalarDataset(args,
                            data_frame=pd.read_csv(csv_string, dtype=str))
    assert len(dataset) == 4
    num_epochs = 2
    total_start_time = time.time()
    loader = dataset.as_data_loader(shuffle=shuffle, batch_size=1)
    # The order in which items are expected in each epoch, when applying shuffling, and using 1 dataloader worker
    # This was determined before making any changes to the dataloader logic
    # (that is, when the as_data_loader method returns an instance of DataLoader, rather than RepeatDataLoader)
    expected_item_order = [
        ["S2", "S1", "S4", "S3"],
        ["S4", "S3", "S1", "S2"],
    ]
    for epoch in range(num_epochs):
        actual_item_order = []
        print(f"Starting epoch {epoch}")
        epoch_start_time = time.time()
        item_start_time = time.time()
        for i, item_dict in enumerate(loader):
            item_load_time = time.time() - item_start_time
            item = ScalarItem.from_dict(item_dict)
            # noinspection PyTypeChecker
            sample_id = item.metadata[0].id  # type: ignore
            print(
                f"Loading item {i} with ID = {sample_id} in {item_load_time:0.8f} sec"
            )
            if shuffle:
                actual_item_order.append(sample_id)
            else:
                assert sample_id == f"S{i + 1}"
            if not (epoch == 0 and i == 0):
                assert item_load_time < 0.1, f"We should only see significant item load times in the first batch " \
                                             f"of the first epoch, but got loading time of {item_load_time:0.2f} sec" \
                                             f" in epoch {epoch} batch {i}"
            # Sleep a bit so that the worker process can fill in items
            if num_dataload_workers > 0:
                time.sleep(0.05)
            item_start_time = time.time()
        if shuffle and num_dataload_workers == 1:
            assert actual_item_order == expected_item_order[
                epoch], f"Item in wrong order for epoch {epoch}"
        total_epoch_time = time.time() - epoch_start_time
        print(f"Total time for epoch {epoch}: {total_epoch_time} sec")
    total_time = time.time() - total_start_time
    print(f"Total time for all epochs: {total_time} sec")
def model_train(config: ModelConfigBase,
                checkpoint_handler: CheckpointHandler) -> ModelTrainingResults:
    """
    The main training loop. It creates the model, dataset, optimizer_type, and criterion, then proceeds
    to train the model. If a checkpoint was specified, then it loads the checkpoint before resuming training.

    :param config: The arguments which specify all required information.
    :param checkpoint_handler: Checkpoint handler object to find checkpoint paths for model initialization
    :raises TypeError: If the arguments are of the wrong type.
    :raises ValueError: When there are issues loading a previous checkpoint.
    """
    # Save the dataset files for later use in cross validation analysis
    config.write_dataset_files()

    # set the random seed for all libraries
    ml_util.set_random_seed(config.get_effective_random_seed(),
                            "Patch visualization")
    # Visualize how patches are sampled for segmentation models. This changes the random generator, but we don't
    # want training to depend on how many patients we visualized, and hence set the random seed again right after.
    with logging_section(
            "Visualizing the effect of sampling random crops for training"):
        visualize_random_crops_for_dataset(config)
    ml_util.set_random_seed(config.get_effective_random_seed(),
                            "Model training")

    logging.debug("Creating the PyTorch model.")

    # Create the train loader and validation loader to load images from the dataset
    data_loaders = config.create_data_loaders()

    # Get the path to the checkpoint to recover from
    checkpoint_path = checkpoint_handler.get_recovery_path_train()

    models_and_optimizer = ModelAndInfo(
        config=config,
        model_execution_mode=ModelExecutionMode.TRAIN,
        checkpoint_path=checkpoint_path)

    # Create the main model
    # If continuing from a previous run at a specific epoch, then load the previous model.
    model_loaded = models_and_optimizer.try_create_model_and_load_from_checkpoint(
    )
    if not model_loaded:
        raise ValueError(
            "There was no checkpoint file available for the model for given start_epoch {}"
            .format(config.start_epoch))

    # Print out a detailed breakdown of layers, memory consumption and time.
    generate_and_print_model_summary(config, models_and_optimizer.model)

    # Move model to GPU and adjust for multiple GPUs
    models_and_optimizer.adjust_model_for_gpus()

    # Create the mean teacher model and move to GPU
    if config.compute_mean_teacher_model:
        mean_teacher_model_loaded = models_and_optimizer.try_create_mean_teacher_model_load_from_checkpoint_and_adjust(
        )
        if not mean_teacher_model_loaded:
            raise ValueError(
                "There was no checkpoint file available for the mean teacher model "
                f"for given start_epoch {config.start_epoch}")

    # Create optimizer
    models_and_optimizer.create_optimizer()
    if checkpoint_handler.should_load_optimizer_checkpoint():
        optimizer_loaded = models_and_optimizer.try_load_checkpoint_for_optimizer(
        )
        if not optimizer_loaded:
            raise ValueError(
                f"There was no checkpoint file available for the optimizer for given start_epoch "
                f"{config.start_epoch}")

    # Create checkpoint directory for this run if it doesn't already exist
    logging.info(f"Models are saved at {config.checkpoint_folder}")
    if not config.checkpoint_folder.is_dir():
        config.checkpoint_folder.mkdir()

    # Create the SummaryWriters for Tensorboard
    writers = create_summary_writers(config)
    config.create_dataframe_loggers()

    # Create LR scheduler
    l_rate_scheduler = SchedulerWithWarmUp(config,
                                           models_and_optimizer.optimizer)

    # Training loop
    logging.info("Starting training")
    train_results_per_epoch, val_results_per_epoch, learning_rates_per_epoch = [], [], []

    resource_monitor = None
    if config.monitoring_interval_seconds > 0:
        # initialize and start GPU monitoring
        diagnostics_events = config.logs_folder / "diagnostics"
        logging.info(
            f"Starting resource monitor, outputting to {diagnostics_events}")
        resource_monitor = ResourceMonitor(
            interval_seconds=config.monitoring_interval_seconds,
            tensorboard_folder=diagnostics_events)
        resource_monitor.start()

    gradient_scaler = GradScaler(
    ) if config.use_gpu and config.use_mixed_precision else None
    optimal_temperature_scale_values = []
    for epoch in config.get_train_epochs():
        logging.info("Starting epoch {}".format(epoch))
        save_epoch = config.should_save_epoch(
            epoch) and models_and_optimizer.optimizer is not None

        # store the learning rates used for each epoch
        epoch_lrs = l_rate_scheduler.get_last_lr()
        learning_rates_per_epoch.append(epoch_lrs)

        train_val_params: TrainValidateParameters = \
            TrainValidateParameters(data_loader=data_loaders[ModelExecutionMode.TRAIN],
                                    model=models_and_optimizer.model,
                                    mean_teacher_model=models_and_optimizer.mean_teacher_model,
                                    epoch=epoch,
                                    optimizer=models_and_optimizer.optimizer,
                                    gradient_scaler=gradient_scaler,
                                    epoch_learning_rate=epoch_lrs,
                                    summary_writers=writers,
                                    dataframe_loggers=config.metrics_data_frame_loggers,
                                    in_training_mode=True)
        training_steps = create_model_training_steps(config, train_val_params)
        train_epoch_results = train_or_validate_epoch(training_steps)
        train_results_per_epoch.append(train_epoch_results.metrics)

        metrics.validate_and_store_model_parameters(writers.train, epoch,
                                                    models_and_optimizer.model)
        # Run without adjusting weights on the validation set
        train_val_params.in_training_mode = False
        train_val_params.data_loader = data_loaders[ModelExecutionMode.VAL]
        # if temperature scaling is enabled then do not save validation metrics for the checkpoint epochs
        # as these will be re-computed after performing temperature scaling on the validation set.
        if isinstance(config, SequenceModelBase):
            train_val_params.save_metrics = not (
                save_epoch and config.temperature_scaling_config)

        training_steps = create_model_training_steps(config, train_val_params)
        val_epoch_results = train_or_validate_epoch(training_steps)
        val_results_per_epoch.append(val_epoch_results.metrics)

        if config.is_segmentation_model:
            metrics.store_epoch_stats_for_segmentation(
                config.outputs_folder, epoch, epoch_lrs,
                train_epoch_results.metrics, val_epoch_results.metrics)

        if save_epoch:
            # perform temperature scaling if required
            if isinstance(
                    config,
                    SequenceModelBase) and config.temperature_scaling_config:
                optimal_temperature, scaled_val_results = \
                    temperature_scaling_steps(config, train_val_params, val_epoch_results)
                optimal_temperature_scale_values.append(optimal_temperature)
                # overwrite the metrics for the epoch with the metrics from the temperature scaled model
                val_results_per_epoch[-1] = scaled_val_results.metrics

            models_and_optimizer.save_checkpoint(epoch)

        # Updating the learning rate should happen at the end of the training loop, so that the
        # initial learning rate will be used for the very first epoch.
        l_rate_scheduler.step()

    model_training_results = ModelTrainingResults(
        train_results_per_epoch=train_results_per_epoch,
        val_results_per_epoch=val_results_per_epoch,
        learning_rates_per_epoch=learning_rates_per_epoch,
        optimal_temperature_scale_values_per_checkpoint_epoch=
        optimal_temperature_scale_values)

    logging.info("Finished training")

    # Since we have trained the model further, let the checkpoint_handler object know so it can handle
    # checkpoints correctly.
    checkpoint_handler.additional_training_done()

    # Upload visualization directory to AML run context to be able to see it
    # in the Azure UI.
    if config.max_batch_grad_cam > 0 and config.visualization_folder.exists():
        RUN_CONTEXT.upload_folder(name=VISUALIZATION_FOLDER,
                                  path=str(config.visualization_folder))

    writers.close_all()
    config.metrics_data_frame_loggers.close_all()
    if resource_monitor:
        # stop the resource monitoring process
        logging.info(
            "Shutting down the resource monitor process. Aggregate resource utilization:"
        )
        for name, value in resource_monitor.read_aggregate_metrics():
            logging.info(f"{name}: {value}")
            if not is_offline_run_context(RUN_CONTEXT):
                RUN_CONTEXT.log(name, value)
        resource_monitor.kill()

    return model_training_results
def train_or_validate_epoch(
        training_steps: ModelTrainingStepsBase
) -> ModelOutputsAndMetricsForEpoch:
    """
    Trains or validates the model for one epoch.
    :param training_steps: Training pipeline to use.
    :returns: The results for training or validation. Result type depends on the type of model that is trained.
    """
    epoch_start_time = time()
    training_random_state = None
    train_val_params = training_steps.train_val_params
    config = training_steps.model_config
    if not train_val_params.in_training_mode:
        # take the snapshot of the existing random state
        training_random_state = RandomStateSnapshot.snapshot_random_state()
        # reset the random state for validation
        ml_util.set_random_seed(config.get_effective_random_seed(),
                                "Model validation")

    status_string = "training" if train_val_params.in_training_mode else "validation"
    item_start_time = time()
    num_load_time_warnings = 0
    num_load_time_exceeded = 0
    num_batches = 0
    total_extra_load_time = 0.0
    total_load_time = 0.0
    model_outputs_epoch = []
    for batch_index, sample in enumerate(train_val_params.data_loader):
        item_finish_time = time()
        item_load_time = item_finish_time - item_start_time
        # Having slow minibatch loading is OK in the very first batch of the every epoch, where processes
        # are spawned. Later, the load time should be zero.
        if batch_index == 0:
            logging.info(
                f"Loaded the first minibatch of {status_string} data in {item_load_time:0.2f} sec."
            )
        elif item_load_time > MAX_ITEM_LOAD_TIME_SEC:
            num_load_time_exceeded += 1
            total_extra_load_time += item_load_time
            if num_load_time_warnings < MAX_LOAD_TIME_WARNINGS:
                logging.warning(
                    f"Loading {status_string} minibatch {batch_index} took {item_load_time:0.2f} sec. "
                    f"This can mean that there are not enough data loader worker processes, or that there "
                    f"is a "
                    f"performance problem in loading. This warning will be printed at most "
                    f"{MAX_LOAD_TIME_WARNINGS} times.")
                num_load_time_warnings += 1
        model_outputs_minibatch = training_steps.forward_and_backward_minibatch(
            sample, batch_index, train_val_params.epoch)
        model_outputs_epoch.append(model_outputs_minibatch)
        train_finish_time = time()
        logging.debug(
            f"Epoch {train_val_params.epoch} {status_string} batch {batch_index}: "
            f"Loaded in {item_load_time:0.2f}sec, "
            f"{status_string} in {(train_finish_time - item_finish_time):0.2f}sec. "
            f"Loss = {model_outputs_minibatch.loss}")
        total_load_time += item_finish_time - item_start_time
        num_batches += 1
        item_start_time = time()

    # restore the training random state when validation has finished
    if training_random_state is not None:
        training_random_state.restore_random_state()

    epoch_time_seconds = time() - epoch_start_time
    logging.info(
        f"Epoch {train_val_params.epoch} {status_string} took {epoch_time_seconds:0.2f} sec, "
        f"of which waiting for next minibatch took {total_load_time:0.2f} sec total. {num_batches} "
        "minibatches in total.")
    if num_load_time_exceeded > 0:
        logging.warning(
            "The dataloaders were not fast enough to always supply the next batch in less than "
            f"{MAX_ITEM_LOAD_TIME_SEC}sec.")
        logging.warning(
            f"In this epoch, {num_load_time_exceeded} out of {num_batches} batches exceeded the load time "
            f"threshold. The total loading time for the slow batches was {total_extra_load_time:0.2f}sec."
        )

    _metrics = training_steps.get_epoch_results_and_store(epoch_time_seconds) \
        if train_val_params.save_metrics else MetricsDict()
    return ModelOutputsAndMetricsForEpoch(
        metrics=_metrics,
        model_outputs=model_outputs_epoch,
        is_train=train_val_params.in_training_mode)
def test_visualize_patch_sampling(test_output_dirs: TestOutputDirectories,
                                  labels_to_boundary: bool) -> None:
    """
    Tests if patch sampling and producing diagnostic images works as expected.
    :param test_output_dirs:
    :param labels_to_boundary: If true, the ground truth labels are placed close to the image boundary, so that
    crops have to be adjusted inwards. If false, ground truth labels are all far from the image boundaries.
    """
    set_random_seed(0)
    shape = (10, 30, 30)
    foreground_classes = ["fg"]
    class_weights = equally_weighted_classes(foreground_classes)
    config = SegmentationModelBase(should_validate=False,
                                   crop_size=(2, 10, 10),
                                   class_weights=class_weights)
    image = np.random.rand(1, *shape).astype(np.float32) * 1000
    mask = np.ones(shape)
    labels = np.zeros((len(class_weights), ) + shape)
    if labels_to_boundary:
        # Generate foreground labels in such a way that a patch centered around a foreground pixel would
        # reach outside of the image.
        labels[1, 4:8, 3:27, 3:27] = 1
    else:
        labels[1, 4:8, 15:18, 15:18] = 1
    labels[0] = 1 - labels[1]
    output_folder = Path(test_output_dirs.root_dir)
    image_header = get_unit_image_header()
    sample = Sample(image=image,
                    mask=mask,
                    labels=labels,
                    metadata=PatientMetadata(patient_id='123',
                                             image_header=image_header))
    expected_folder = full_ml_test_data_path("patch_sampling")
    heatmap = visualize_random_crops(sample,
                                     config,
                                     output_folder=output_folder)
    expected_heatmap = expected_folder / ("sampled_to_boundary.npy"
                                          if labels_to_boundary else
                                          "sampled_center.npy")
    # To update the stored results, uncomment this line:
    # np.save(str(expected_heatmap), heatmap)
    assert np.allclose(heatmap, np.load(
        str(expected_heatmap))), "Patch sampling created a different heatmap."
    f1 = output_folder / "123_ct.nii.gz"
    assert_file_exists(f1)
    f2 = output_folder / "123_sampled_patches.nii.gz"
    assert_file_exists(f2)
    thumbnails = [
        "123_sampled_patches_dim0.png",
        "123_sampled_patches_dim1.png",
        "123_sampled_patches_dim2.png",
    ]
    for f in thumbnails:
        assert_file_exists(output_folder / f)

    expected = expected_folder / ("sampled_to_boundary.nii.gz"
                                  if labels_to_boundary else
                                  "sampled_center.nii.gz")
    # To update test results:
    # shutil.copy(str(f2), str(expected))
    expected_image = io_util.load_nifti_image(expected)
    actual_image = io_util.load_nifti_image(f2)
    np.allclose(expected_image.image, actual_image.image)
    if labels_to_boundary:
        for f in thumbnails:
            # Uncomment this line to update test results
            # (expected_folder / f).write_bytes((output_folder / f).read_bytes())
            if not is_running_on_azure():
                # When running on the Azure build agents, it appears that the bounding box of the images
                # is slightly different than on local runs, even with equal dpi settings.
                # Not able to figure out how to make the run results consistent, hence disable in cloud runs.
                assert_binary_files_match(output_folder / f,
                                          expected_folder / f)
예제 #30
0
def test_image_encoder(test_output_dirs: OutputFolderForTests,
                       encode_channels_jointly: bool,
                       use_non_imaging_features: bool,
                       kernel_size_per_encoding_block: Optional[Union[TupleInt3, List[TupleInt3]]],
                       stride_size_per_encoding_block: Optional[Union[TupleInt3, List[TupleInt3]]],
                       reduction_factor: float,
                       expected_num_reduced_features: int,
                       aggregation_type: AggregationType) -> None:
    """
    Test if the image encoder networks can be trained without errors (including GradCam computation and data
    augmentation).
    """
    logging_to_stdout()
    set_random_seed(0)
    dataset_folder = Path(test_output_dirs.make_sub_dir("dataset"))
    scan_size = (6, 64, 60)
    scan_files: List[str] = []
    for s in range(4):
        random_scan = np.random.uniform(0, 1, scan_size)
        scan_file_name = f"scan{s + 1}{NumpyFile.NUMPY.value}"
        np.save(str(dataset_folder / scan_file_name), random_scan)
        scan_files.append(scan_file_name)

    dataset_contents = """subject,channel,path,label,numerical1,numerical2,categorical1,categorical2
S1,week0,scan1.npy,,1,10,Male,Val1
S1,week1,scan2.npy,True,2,20,Female,Val2
S2,week0,scan3.npy,,3,30,Female,Val3
S2,week1,scan4.npy,False,4,40,Female,Val1
S3,week0,scan1.npy,,5,50,Male,Val2
S3,week1,scan3.npy,True,6,60,Male,Val2
"""
    (dataset_folder / "dataset.csv").write_text(dataset_contents)
    numerical_columns = ["numerical1", "numerical2"] if use_non_imaging_features else []
    categorical_columns = ["categorical1", "categorical2"] if use_non_imaging_features else []
    non_image_feature_channels = get_non_image_features_dict(default_channels=["week1", "week0"],
                                                             specific_channels={"categorical2": ["week1"]}) \
        if use_non_imaging_features else {}
    config_for_dataset = ScalarModelBase(
        local_dataset=dataset_folder,
        image_channels=["week0", "week1"],
        image_file_column="path",
        label_channels=["week1"],
        label_value_column="label",
        non_image_feature_channels=non_image_feature_channels,
        numerical_columns=numerical_columns,
        categorical_columns=categorical_columns,
        should_validate=False
    )
    config_for_dataset.read_dataset_into_dataframe_and_pre_process()

    dataset = ScalarDataset(config_for_dataset,
                            sample_transforms=ScalarItemAugmentation(
                                RandAugmentSlice(is_transformation_for_segmentation_maps=False)))
    assert len(dataset) == 3

    config = ImageEncoder(
        encode_channels_jointly=encode_channels_jointly,
        should_validate=False,
        numerical_columns=numerical_columns,
        categorical_columns=categorical_columns,
        non_image_feature_channels=non_image_feature_channels,
        categorical_feature_encoder=config_for_dataset.categorical_feature_encoder,
        encoder_dimensionality_reduction_factor=reduction_factor,
        aggregation_type=aggregation_type,
        scan_size=(6, 64, 60)
    )

    if kernel_size_per_encoding_block:
        config.kernel_size_per_encoding_block = kernel_size_per_encoding_block
    if stride_size_per_encoding_block:
        config.stride_size_per_encoding_block = stride_size_per_encoding_block

    config.set_output_to(test_output_dirs.root_dir)
    config.max_batch_grad_cam = 1
    model = create_model_with_temperature_scaling(config)
    input_size: List[Tuple] = [(len(config.image_channels), *scan_size)]
    if use_non_imaging_features:
        input_size.append((config.get_total_number_of_non_imaging_features(),))

        # Original number output channels (unreduced) is
        # num initial channel * (num encoder block - 1) = 4 * (3-1) = 8
        if encode_channels_jointly:
            # reduced_num_channels + num_non_img_features
            assert model.final_num_feature_channels == expected_num_reduced_features + \
                   config.get_total_number_of_non_imaging_features()
        else:
            # num_img_channels * reduced_num_channels + num_non_img_features
            assert model.final_num_feature_channels == len(config.image_channels) * expected_num_reduced_features + \
                   config.get_total_number_of_non_imaging_features()

    summarizer = ModelSummary(model)
    summarizer.generate_summary(input_sizes=input_size)
    config.local_dataset = dataset_folder
    config.validate()
    model_train(config, checkpoint_handler=get_default_checkpoint_handler(model_config=config,
                                                                          project_root=Path(test_output_dirs.root_dir)))