def test_segmentation_to_one_hot(use_gpu: bool, input_on_gpu: bool) -> None: # Settings to test on large scale: # B = 16 # C = 2 # dim = (50, 400, 400) B = 2 C = 3 dim = (4, 1, 2) input_size = (B, C) + dim actual_class = 5 # This is deliberately replicated from get_datatype_for_image_tensors dtype = torch.float16 if is_gpu_available() else torch.float32 device = "cuda" if input_on_gpu else "cpu" seg = torch.ones(input_size, dtype=torch.uint8, device=device) * actual_class start_time = time.time() one_hot = segmentation_to_one_hot(seg, use_gpu, result_dtype=dtype) elapsed = time.time() - start_time print(f"Computed one-hot in {elapsed:0.2f}sec") assert one_hot.shape == (B, C * HDF5_NUM_SEGMENTATION_CLASSES) + dim assert one_hot.dtype == dtype # The result must be on the same device as the input. In particular, that means we can feed in a CPU # tensor, do the computation on the GPU, and still get back a CPU tensor. assert seg.device == one_hot.device for i in range(C * HDF5_NUM_SEGMENTATION_CLASSES): # Dimensions 5, 15, 25 should be all ones if i % HDF5_NUM_SEGMENTATION_CLASSES == actual_class: expected = torch.ones((B,) + dim, device=one_hot.device) assert one_hot[:, i, ...].float().allclose(expected), f"Dimension {i} should have all ones" else: expected = torch.zeros((B,) + dim, device=one_hot.device) assert one_hot[:, i, ...].float().allclose(expected), f"Dimension {i} should have all ones"
def use_gpu(self) -> bool: # type: ignore """ Returns True if a CUDA capable GPU is present and should be used, False otherwise. """ if self._use_gpu is None: # Use a local import here because we don't want the whole file to depend on pytorch. from InnerEye.ML.utils.ml_util import is_gpu_available self._use_gpu = is_gpu_available() return self._use_gpu
def use_gpu(self) -> bool: """ Returns True if a GPU is available, and the self.max_num_gpus flag allows it to be used. Returns False otherwise (i.e., if there is no GPU available, or self.max_num_gpus==0) """ if self.max_num_gpus == 0: return False from InnerEye.ML.utils.ml_util import is_gpu_available return is_gpu_available()
def use_gpu(self, value: bool) -> None: """ Sets the flag that controls the use of the GPU. Raises a ValueError if the value is True, but no GPU is present. """ if value: # Use a local import here because we don't want the whole file to depend on pytorch. from InnerEye.ML.utils.ml_util import is_gpu_available if not is_gpu_available(): raise ValueError("Can't set use_gpu to True if there is not CUDA capable GPU present.") self._use_gpu = value
def run(self) -> None: if self._interval_seconds <= 0: logging.warning( "Resource monitoring requires an interval that is larger than 0 seconds, but " "got: {}. Exiting.".format(self._interval_seconds)) logging.info("Process ({}) started with pid: {}".format( self.name, self.pid)) # create the TB writers and AML run context for this process writer = tensorboardX.SummaryWriter(self._tb_log_file_path) run_context = Run.get_context() is_offline_run = is_offline_run_context(run_context) current_iteration = 0 def log_to_azure_and_tb(label: str, value: float) -> None: writer.add_scalar(label, value, current_iteration) if not is_offline_run: run_context.log(label, value) gpu_available = is_gpu_available() while True: if gpu_available: gpus: List[GPU] = GPUtil.getGPUs() if len(gpus) > 0: for gpu in gpus: log_to_azure_and_tb( 'Diagnostics/GPU_{}_Load_Percent'.format(gpu.id), gpu.load * 100) log_to_azure_and_tb( 'Diagnostics/GPU_{}_MemUtil_Percent'.format( gpu.id), gpu.memoryUtil * 100) # log the average GPU usage log_to_azure_and_tb( 'Diagnostics/Average_GPU_Load_Percent', statistics.mean(map(lambda x: x.load, gpus)) * 100) log_to_azure_and_tb( 'Diagnostics/Average_GPU_MemUtil_Percent', statistics.mean(map(lambda x: x.memoryUtil, gpus)) * 100) # log the CPU util log_to_azure_and_tb('Diagnostics/CPU_Util_Percent', psutil.cpu_percent(interval=None)) log_to_azure_and_tb('Diagnostics/CPU_MemUtil_Percent', psutil.virtual_memory()[2]) current_iteration += 1 # pause the thread for the requested delay time.sleep(self._interval_seconds)
def load_checkpoint(model: torch.nn.Module, path_to_checkpoint: Path, optimizer: Optional[Optimizer] = None, optimizer_to_gpu: Optional[bool] = False) -> Optional[int]: """ Loads a checkpoint of a model. The epoch of the stored model and the epoch provided as argument must match. The provided model must match the stored model. :param model: The DataParallel object representing the network. Must have the same architecture of the stored model. :param path_to_checkpoint: The path to the checkpoint file. :param optimizer: The optimizer used for training :param optimizer_to_gpu: If true, move the optimizer to GPU, which we need to do if the model is also on GPU. :return: The checkpoint epoch if loaded and None if not loaded """ if not path_to_checkpoint.is_file(): logging.warning( f'No checkpoint found at {path_to_checkpoint} current working dir {os.getcwd()}' ) return None logging.info(f"Loading checkpoint {path_to_checkpoint}") # For model debugging, allow loading a GPU trained model onto the CPU. This will clearly only work # if the model is small. map_location = None if is_gpu_available() else 'cpu' checkpoint = torch.load(str(path_to_checkpoint), map_location=map_location) if isinstance(model, torch.nn.DataParallel): model.module.load_state_dict(checkpoint['state_dict']) else: model.load_state_dict(checkpoint['state_dict']) if optimizer is not None: opt_dict = checkpoint['opt_dict'] if optimizer_to_gpu: # https://github.com/pytorch/pytorch/issues/2830 for key, val in opt_dict.items(): if isinstance(val, torch.Tensor): opt_dict[key] = val.cuda() optimizer.load_state_dict(opt_dict) logging.info("Loaded checkpoint (epoch: {})".format(checkpoint['epoch'])) return checkpoint['epoch']
def run(self) -> None: if self._interval_seconds <= 0: logging.warning( "Resource monitoring requires an interval that is larger than 0 seconds, but " f"got: {self._interval_seconds}. Exiting.") self.kill() logging.info(f"Process '{self.name}' started with pid: {self.pid}") gpu_available = is_gpu_available() while True: if gpu_available: self.update_metrics(GPUtil.getGPUs()) # log the CPU utilization self.log_to_tensorboard('CPU/Load_Percent', psutil.cpu_percent(interval=None)) self.log_to_tensorboard('CPU/MemUtil_Percent', psutil.virtual_memory()[2]) self.step += 1 self.store_to_file() # pause the thread for the requested delay time.sleep(self._interval_seconds)
config.local_dataset = Path() config.dataset_data_frame = pd.read_csv(StringIO(dataset_contents), sep=",", dtype=str) # Patch the load_images function that will be called once we access a dataset item image_and_seg = ImageAndSegmentations[np.ndarray](images=np.zeros(scan_size, dtype=np.float32), segmentations=np.ones(scan_size, dtype=np.uint8)) with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg): azure_config = get_default_azure_config() azure_config.train = True MLRunner(config, azure_config).run() # No further asserts here because the models are still in experimental state. Most errors would come # from having invalid model architectures, which would throw runtime errors during training. # Verified manually that the cross entropy on the Val set that appears during training, and the # cross entropy when running on the Val set in test mode are the same. @pytest.mark.parametrize("use_gpu", [True, False] if is_gpu_available() else [False]) @pytest.mark.parametrize("input_on_gpu", [True, False] if is_gpu_available() else [False]) @pytest.mark.gpu def test_segmentation_to_one_hot(use_gpu: bool, input_on_gpu: bool) -> None: # Settings to test on large scale: # B = 16 # C = 2 # dim = (50, 400, 400) B = 2 C = 3 dim = (4, 1, 2) input_size = (B, C) + dim actual_class = 5 # This is deliberately replicated from get_datatype_for_image_tensors dtype = torch.float16 if is_gpu_available() else torch.float32 device = "cuda" if input_on_gpu else "cpu"
# ------------------------------------------------------------------------------------------ from typing import Any, List import pytest import torch from torch import Tensor from InnerEye.Common import common_util from InnerEye.ML.models.architectures.base_model import BaseModel, CropSizeConstraints from InnerEye.ML.models.losses.soft_dice import SoftDiceLoss from InnerEye.ML.models.parallel.data_parallel import DataParallelCriterion from InnerEye.ML.models.parallel.model_parallel import group_layers_with_balanced_memory, \ move_to_device, partition_layers from InnerEye.ML.utils.ml_util import is_gpu_available, set_random_seed no_gpu = not is_gpu_available() no_or_single_gpu = not torch.cuda.is_available( ) or torch.cuda.device_count() <= 1 class SimpleModel(BaseModel): """ A simple neural network model to test model parallelisation functions. """ def __init__(self, input_channels: Any, channels: Any, n_classes: int, kernel_size: int): # minimum crop size: Network first reduces size by 4, then halves, then multiplies by 2 and adds 1 # 64 -> 62 -> 30 -> 61 -> 61 super().__init__( name='SimpleModel', input_channels=input_channels,
from InnerEye.Common.type_annotations import PathOrString, TupleInt3 from InnerEye.ML.config import SegmentationModelBase from InnerEye.ML.dataset.full_image_dataset import PatientDatasetSource from InnerEye.ML.dataset.sample import PatientMetadata, Sample from InnerEye.ML.photometric_normalization import PhotometricNormalization from InnerEye.ML.utils import io_util from InnerEye.ML.utils.config_util import ModelConfigLoader from InnerEye.ML.utils.io_util import ImageHeader, ImageWithHeader from InnerEye.ML.utils.ml_util import is_gpu_available from Tests.fixed_paths_for_tests import full_ml_test_data_path TEST_CHANNEL_IDS = ["channel1", "channel2"] TEST_MASK_ID = "mask" TEST_GT_ID = "region" machine_has_gpu = is_gpu_available() no_gpu_available = not machine_has_gpu def create_dataset_csv_file(csv_string: str, dst: str) -> Path: """Creates a dataset.csv in the destination path from the csv_string provided""" (Path(dst) / "dataset.csv").write_text(csv_string) return Path(dst) def content_mismatch(actual: Any, expected: Any) -> str: """Returns error message for content mismatch.""" return "Content mismatch. \nActual:\n {}\nExpected:\n {}".format( actual, expected)
with mock.patch("InnerEye.ML.run_ml.is_offline_run_context", return_value=True): with mock.patch( 'InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg): azure_config = get_default_azure_config() azure_config.train = True MLRunner(config, azure_config=azure_config).run() # No further asserts here because the models are still in experimental state. Most errors would come # from having invalid model architectures, which would throw runtime errors during training. # Verified manually that the cross entropy on the Val set that appears during training, and the # cross entropy when running on the Val set in test mode are the same. @pytest.mark.parametrize("use_gpu", [True, False] if is_gpu_available() else [False]) @pytest.mark.parametrize("input_on_gpu", [True, False] if is_gpu_available() else [False]) @pytest.mark.gpu def test_segmentation_to_one_hot(use_gpu: bool, input_on_gpu: bool) -> None: # Settings to test on large scale: # B = 16 # C = 2 # dim = (50, 400, 400) B = 2 C = 3 dim = (4, 1, 2) input_size = (B, C) + dim actual_class = 5 # This is deliberately replicated from get_datatype_for_image_tensors dtype = torch.float16 if is_gpu_available() else torch.float32