def test_anomaly_detection(value_to_insert: float, in_training_mode: bool) -> None: """ Test anomaly detection for the segmentation forward pass. :param value_to_insert: The value to insert in the image image (nan, inf, or a valid float) :param in_training_mode: If true, run the segmentation forward pass in training mode, otherwise use the settings for running on the validation set. :return: """ image_size = [1, 1, 4, 4, 4] labels_size = [1, 2, 4, 4, 4] mask_size = [1, 4, 4, 4] crop_size = (4, 4, 4) inference_stride_size = (2, 2, 2) ground_truth_ids = ["Lung"] # image to run inference on image = torch.from_numpy( np.random.uniform(size=image_size).astype(ImageDataType.IMAGE.value)) # labels for criterion labels = torch.from_numpy( np.random.uniform(size=labels_size).astype( ImageDataType.SEGMENTATION.value)) # create a random mask if required mask = torch.from_numpy((np.round(np.random.uniform( size=mask_size)).astype(dtype=ImageDataType.MASK.value))) config = SegmentationModelBase(crop_size=crop_size, inference_stride_size=inference_stride_size, image_channels=["ct"], ground_truth_ids=ground_truth_ids, should_validate=False, detect_anomaly=True) model_and_info = ModelAndInfo( config=config, model_execution_mode=ModelExecutionMode.TRAIN, checkpoint_path=None) model_and_info._model: BaseModel = SimpleModel(1, [1], 2, 2) # type: ignore model_and_info.create_summary_and_adjust_model_for_gpus() model_and_info.try_create_optimizer_and_load_from_checkpoint() config.use_gpu = False model = model_and_info.model optimizer = model_and_info.optimizer # Create the loss criterion criterion = lambda x, y: torch.tensor(value_to_insert, requires_grad=True) pipeline = SegmentationForwardPass(model, config, batch_size=1, optimizer=optimizer, in_training_mode=in_training_mode, criterion=criterion) image[0, 0, 0, 0, 0] = value_to_insert if np.isnan(value_to_insert) or np.isinf(value_to_insert): with pytest.raises(RuntimeError) as ex: pipeline.forward_pass_patches(patches=image, mask=mask, labels=labels) assert f"loss computation returned {value_to_insert}" in str(ex) else: pipeline.forward_pass_patches(patches=image, mask=mask, labels=labels)
def test_amp_activated(use_model_parallel: bool, execution_mode: ModelExecutionMode, use_mixed_precision: bool) -> None: """ Tests the mix precision flag and the model parallel flag. """ assert machine_has_gpu, "This test must be executed on a GPU machine." assert torch.cuda.device_count( ) > 1, "This test must be executed on a multi-GPU machine" # image, labels, and mask to run forward and backward passes image = torch.from_numpy( np.random.uniform(size=[1, 1, 4, 4, 4]).astype( ImageDataType.IMAGE.value)) labels = torch.from_numpy( np.random.uniform(size=[1, 2, 4, 4, 4]).astype( ImageDataType.SEGMENTATION.value)) mask = torch.from_numpy((np.round(np.random.uniform( size=[1, 4, 4, 4])).astype(dtype=ImageDataType.MASK.value))) crop_size = (4, 4, 4) model_config = SegmentationModelBase( crop_size=crop_size, image_channels=["ct"], ground_truth_ids=["Lung"], use_mixed_precision=use_mixed_precision, use_model_parallel=use_model_parallel, should_validate=False) assert model_config.use_gpu model_and_info = ModelAndInfo(config=model_config, model_execution_mode=execution_mode, checkpoint_path=None) model_and_info._model = SimpleModel(1, [1], 2, 2) # type: ignore # Move the model to the GPU. This is mostly to avoid issues with AMP, which has trouble # with first using a GPU model and later using a CPU-based one. try: model_and_info.create_summary_and_adjust_model_for_gpus() except NotImplementedError as ex: if use_model_parallel: # The SimpleModel does not implement model partitioning, and should hence fail at this step. assert "Model partitioning is not implemented" in str(ex) return else: raise ValueError(f"Expected this call to succeed, but got: {ex}") model_and_info.try_create_optimizer_and_load_from_checkpoint() model = model_and_info.model optimizer = model_and_info.optimizer # This is the same logic spelt out in adjust_model_for_gpus use_data_parallel = (execution_mode == ModelExecutionMode.TRAIN) or ( not use_model_parallel) if use_data_parallel: assert isinstance(model, DataParallelModel) gradient_scaler = GradScaler() if use_mixed_precision else None criterion = lambda x, y: torch.tensor([0.0], requires_grad=True).cuda() pipeline = SegmentationForwardPass(model, model_config, batch_size=1, optimizer=optimizer, gradient_scaler=gradient_scaler, criterion=criterion) logits, _ = pipeline._compute_loss(image, labels) # When using DataParallel, we expect to get a list of tensors back, one per GPU. if use_data_parallel: assert isinstance(logits, list) first_logit = logits[0] else: first_logit = logits if use_mixed_precision: assert first_logit.dtype == torch.float16 else: assert first_logit.dtype == torch.float32 # Verify that forward and backward passes do not throw an exception pipeline._forward_pass(patches=image, mask=mask, labels=labels)