def __init__(self, plans_file, fold, tasks, tags, output_folder_dict=None, dataset_directory_dict=None, batch_dice=True, stage=None, unpack_data=True, deterministic=True, fp16=False): super().__init__(plans_file, fold, tasks, tags, output_folder_dict=output_folder_dict, dataset_directory_dict=dataset_directory_dict, batch_dice=batch_dice, stage=stage, unpack_data=unpack_data, deterministic=deterministic, fp16=fp16) self.loss = DC_and_CE_loss( { 'batch_dice': self.batch_dice, 'smooth': 1e-5, 'do_bg': False }, {})
def update_loss(self): # we train the first 500 epochs with CE, then transition to Dice between 500 and 750. The last 250 epochs will be Dice only if self.epoch <= 500: weight_ce = 2 weight_dice = 0 elif 500 < self.epoch <= 750: weight_ce = 2 - 2 / 250 * (self.epoch - 500) weight_dice = 0 + 2 / 250 * (self.epoch - 500) elif 750 < self.epoch <= self.max_num_epochs: weight_ce = 0 weight_dice = 2 else: raise RuntimeError("Invalid epoch: %d" % self.epoch) self.print_to_log_file("weight ce", weight_ce, "weight dice", weight_dice) self.loss = DC_and_CE_loss( { 'batch_dice': self.batch_dice, 'smooth': 1e-5, 'do_bg': False }, {}, weight_ce=weight_ce, weight_dice=weight_dice) self.loss = MultipleOutputLoss2(self.loss, self.ds_loss_weights)
def __init__(self, plans_file, fold, output_folder=None, dataset_directory=None, batch_dice=True, stage=None, unpack_data=True, deterministic=True, fp16=False): super().__init__(plans_file, fold, output_folder, dataset_directory, batch_dice, stage, unpack_data, deterministic, fp16) self.loss = DC_and_CE_loss( { 'batch_dice': self.batch_dice, 'smooth': 0, 'do_bg': False }, {})
def __init__(self, plans_file, fold, output_folder=None, dataset_directory=None, batch_dice=True, stage=None, unpack_data=True, deterministic=True, fp16=False, lam=2, gpu="0", save_dir=None): """ :param deterministic: :param fold: can be either [0 ... 5) for cross-validation, 'all' to train on all available training data or None if you wish to load some checkpoint and do inference only :param plans_file: the pkl file generated by preprocessing. This file will determine all design choices :param subfolder_with_preprocessed_data: must be a subfolder of dataset_directory (just the name of the folder, not the entire path). This is where the preprocessed data lies that will be used for network training. We made this explicitly available so that differently preprocessed data can coexist and the user can choose what to use. Can be None if you are doing inference only. :param output_folder: where to store parameters, plot progress and to the validation :param dataset_directory: the parent directory in which the preprocessed Task data is stored. This is required because the split information is stored in this directory. For running prediction only this input is not required and may be set to None :param batch_dice: compute dice loss for each sample and average over all samples in the batch or pretend the batch is a pseudo volume? :param stage: The plans file may contain several stages (used for lowres / highres / pyramid). Stage must be specified for training: if stage 1 exists then stage 1 is the high resolution stage, otherwise it's 0 :param unpack_data: if False, npz preprocessed data will not be unpacked to npy. This consumes less space but is considerably slower! Running unpack_data=False with 2d should never be done! IMPORTANT: If you inherit from nnUNetTrainer and the init args change then you need to redefine self.init_args in your init accordingly. Otherwise checkpoints won't load properly! """ super(nnUNetTrainer, self).__init__(deterministic, fp16, lam, gpu, save_dir) self.unpack_data = unpack_data self.init_args = (plans_file, fold, output_folder, dataset_directory, batch_dice, stage, unpack_data, deterministic, fp16) # set through arguments from init self.stage = stage self.experiment_name = self.__class__.__name__ self.plans_file = plans_file self.output_folder = output_folder self.dataset_directory = dataset_directory self.output_folder_base = self.output_folder self.fold = fold self.plans = None # if we are running inference only then the self.dataset_directory is set (due to checkpoint loading) but it # irrelevant if self.dataset_directory is not None and isdir( self.dataset_directory): self.gt_niftis_folder = join(self.dataset_directory, "gt_segmentations") else: self.gt_niftis_folder = None self.folder_with_preprocessed_data = None # set in self.initialize() self.dl_tr = self.dl_val = None self.num_input_channels = self.num_classes = self.net_pool_per_axis = self.patch_size = self.batch_size = \ self.threeD = self.base_num_features = self.intensity_properties = self.normalization_schemes = \ self.net_num_pool_op_kernel_sizes = self.net_conv_kernel_sizes = None # loaded automatically from plans_file self.basic_generator_patch_size = self.data_aug_params = self.transpose_forward = self.transpose_backward = None self.batch_dice = batch_dice self.loss = DC_and_CE_loss( { 'batch_dice': self.batch_dice, 'smooth': 1e-5, 'do_bg': False, 'square': False }, {}) self.online_eval_foreground_dc = [] self.online_eval_tp = [] self.online_eval_fp = [] self.online_eval_fn = [] self.classes = self.do_dummy_2D_aug = self.use_mask_for_norm = self.only_keep_largest_connected_component = \ self.min_region_size_per_class = self.min_size_per_class = None self.inference_pad_border_mode = "constant" self.inference_pad_kwargs = {'constant_values': 0} self.update_fold(fold) self.pad_all_sides = None self.lr_scheduler_eps = 1e-3 self.lr_scheduler_patience = 30 self.initial_lr = 3e-4 self.weight_decay = 3e-5 self.oversample_foreground_percent = 0.33
def find_2d_configuration(): # lets compute a reference for 3D # we select hyperparameters here so that we get approximately the same patch size as we would get with the # regular unet. This is just my choice. You can do whatever you want # These default hyperparemeters will then be used by the experiment planner # since this is more parameter intensive than the UNet, we will test a configuration that has a lot of parameters # herefore we copy the UNet configuration for Task003_Liver cudnn.deterministic = False cudnn.benchmark = True patch_size = (512, 512) max_num_features = 512 num_modalities = 1 num_classes = 3 batch_size = 12 # now we fiddle with the network specific hyperparameters until everything just barely fits into a titanx blocks_per_stage_encoder = FabiansUNet.default_blocks_per_stage_encoder blocks_per_stage_decoder = FabiansUNet.default_blocks_per_stage_decoder initial_num_features = 30 # we neeed to add a [1, 1, 1] for the res unet because in this implementation all stages of the encoder can have a stride pool_op_kernel_sizes = [[1, 1], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2]] conv_op_kernel_sizes = [[3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3], [3, 3]] unet = FabiansUNet(num_modalities, initial_num_features, blocks_per_stage_encoder[:len(conv_op_kernel_sizes)], 2, pool_op_kernel_sizes, conv_op_kernel_sizes, get_default_network_config(2, dropout_p=None), num_classes, blocks_per_stage_decoder[:len(conv_op_kernel_sizes)-1], False, False, max_features=max_num_features).cuda() optimizer = SGD(unet.parameters(), lr=0.1, momentum=0.95) loss = DC_and_CE_loss({'batch_dice': True, 'smooth': 1e-5, 'do_bg': False}, {}) dummy_input = torch.rand((batch_size, num_modalities, *patch_size)).cuda() dummy_gt = (torch.rand((batch_size, 1, *patch_size)) * num_classes).round().clamp_(0, 2).cuda().long() for _ in range(20): optimizer.zero_grad() skips = unet.encoder(dummy_input) print([i.shape for i in skips]) output = unet.decoder(skips) l = loss(output, dummy_gt) l.backward() optimizer.step() if _ == 0: torch.cuda.empty_cache() # that should do. Now take the network hyperparameters and insert them in FabiansUNet.compute_approx_vram_consumption # whatever number this spits out, save it to FabiansUNet.use_this_for_batch_size_computation_2D print(FabiansUNet.compute_approx_vram_consumption(patch_size, initial_num_features, max_num_features, num_modalities, num_classes, pool_op_kernel_sizes, blocks_per_stage_encoder[:len(conv_op_kernel_sizes)], blocks_per_stage_decoder[:len(conv_op_kernel_sizes)-1], 2, batch_size))
unet.compute_reference_for_vram_consumption_3d() unet.compute_reference_for_vram_consumption_2d() dummy_input = torch.rand((batch_size, 4, *patch_size)).cuda() dummy_gt = (torch.rand( (batch_size, 1, *patch_size)) * 4).round().clamp_(0, 3).cuda().long() optimizer.zero_grad() skips = unet.encoder(dummy_input) print([i.shape for i in skips]) loss = DC_and_CE_loss( { 'batch_dice': True, 'smooth': 1e-5, 'smooth_in_nom': True, 'do_bg': False, 'rebalance_weights': None, 'background_weight': 1 }, {}) output = unet.decoder(skips) l = loss(output, dummy_gt) l.backward() optimizer.step() import hiddenlayer as hl g = hl.build_graph(unet, dummy_input) g.save("/home/fabian/test.pdf") """conv_op_kernel_sizes = ((3, 3, 3),
import torch.optim as optim import torch.nn.functional as F from torch.autograd import Variable from nnunet.training.loss_functions.dice_loss import DC_and_CE_loss, SoftDiceLoss # ------- Loss function from nnunet ------------------------------------------------------ try: dice_args = { "batch_dice": False, "do_bg": True, "smooth": 1., "square": False } ce_args = {"weight": None} dice_and_ce_loss = DC_and_CE_loss(dice_args, ce_args) except NameError: pass from functools import reduce from utils import lr_scheduler import utils.loss as dloss from utils.adversarial_attacks import FGSM_Attack from utils1 import * def train_nll(args, epoch, model, trainLoader, optimizer, trainF, config, scheduler): model.train() nProcessed = 0 nTrain = len(trainLoader.dataset)
def find_2d_configuration(): cudnn.benchmark = True cudnn.deterministic = False conv_op_kernel_sizes = ((3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3)) pool_op_kernel_sizes = ((1, 1), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2)) patch_size = (256, 256) base_num_features = 32 input_modalities = 4 blocks_per_stage_encoder = (1, 3, 4, 6, 6, 6, 6) blocks_per_stage_decoder = (2, 2, 2, 2, 2, 2) feat_map_mult_on_downscale = 2 num_classes = 5 max_features = 512 batch_size = 50 unet = FabiansPreActUNet(input_modalities, base_num_features, blocks_per_stage_encoder, feat_map_mult_on_downscale, pool_op_kernel_sizes, conv_op_kernel_sizes, get_default_network_config(2, dropout_p=None), num_classes, blocks_per_stage_decoder, True, False, max_features=max_features).cuda() scaler = GradScaler() optimizer = SGD(unet.parameters(), lr=0.1, momentum=0.95) print( unet.compute_approx_vram_consumption( patch_size, base_num_features, max_features, input_modalities, num_classes, pool_op_kernel_sizes, blocks_per_stage_encoder, blocks_per_stage_decoder, feat_map_mult_on_downscale, batch_size)) loss = DC_and_CE_loss({ 'batch_dice': True, 'smooth': 1e-5, 'do_bg': False }, {}) dummy_input = torch.rand( (batch_size, input_modalities, *patch_size)).cuda() dummy_gt = (torch.rand( (batch_size, 1, *patch_size)) * num_classes).round().clamp_( 0, num_classes - 1).cuda().long() for i in range(10): optimizer.zero_grad() with autocast(): skips = unet.encoder(dummy_input) print([i.shape for i in skips]) output = unet.decoder(skips)[0] l = loss(output, dummy_gt) print(l.item()) scaler.scale(l).backward() scaler.step(optimizer) scaler.update() with autocast(): import hiddenlayer as hl g = hl.build_graph(unet, dummy_input, transforms=None) g.save("/home/fabian/test_arch.pdf")