def forward(self, predictions, target): # x = target[:, :, 0] recon_x, mu, logvar = predictions if self.pre_maxpool is not None: target = self.pre_maxpool(target) # Reconstruction loss: # BCE = 0 BCE = self.reconstruction_loss(recon_x, target) # BCE = self.reconstruction_function(recon_x, target) # see Appendix B from VAE paper: # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 # https://arxiv.org/abs/1312.6114 # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) log_scalar("VAE_BCE_loss", BCE) log_scalar("VAE_KLD_loss", KLD) log_image("maxpooled_target", target) return BCE + KLD
def forward(self, predictions, all_targets): predictions = [predictions] if not isinstance(predictions, (list, tuple)) else predictions all_targets = [all_targets] if not isinstance(all_targets, (list, tuple)) else all_targets loss = 0 # # ---------------------------- # # Predict glia mask: # # ---------------------------- if self.train_glia_mask: assert not self.target_has_various_masks, "To be implemented" frg_kwargs = self.model.models[-1].foreground_prediction_kwargs if frg_kwargs is None: # Legacy: nb_glia_preds = 1 nb_glia_targets = [0] else: nb_glia_preds = len(frg_kwargs) nb_glia_targets = [frg_kwargs[dpth]["nb_target"] for dpth in frg_kwargs] all_glia_preds = predictions[-nb_glia_preds:] predictions = predictions[:-nb_glia_preds] loss_glia = 0 for counter, glia_pred, nb_tar in zip(range(len(all_glia_preds)), all_glia_preds, nb_glia_targets): glia_target = all_targets[nb_tar][:,[-1]] all_targets[nb_tar] = all_targets[nb_tar][:, :-1] assert self.target_has_label_segm gt_segm = all_targets[nb_tar][:,[0]] glia_target = auto_crop_tensor_to_shape(glia_target, glia_pred.shape) gt_segm = auto_crop_tensor_to_shape(gt_segm, glia_pred.shape) # TODO: generalize ignore label: valid_mask = (gt_segm != 0).float() glia_pred = glia_pred * valid_mask glia_target = glia_target * valid_mask with warnings.catch_warnings(record=True) as w: loss_glia_new = data_parallel(self.loss, (glia_pred, glia_target), self.devices).mean() loss_glia = loss_glia + loss_glia_new log_image("glia_target_d{}".format(counter), glia_target) log_image("glia_pred_d{}".format(counter), glia_pred) loss = loss + loss_glia log_scalar("loss_glia", loss_glia) for counter, nb_pred in enumerate(self.predictions_specs): assert len(predictions) > nb_pred pred = predictions[nb_pred] # TODO: add precrop_pred? # if self.precrop_pred is not None: # from segmfriends.utils.various import parse_data_slice # crop_slc = (slice(None), slice(None)) + parse_data_slice(self.precrop_pred) # predictions = predictions[crop_slc] pred_specs = self.predictions_specs[nb_pred] target = all_targets[pred_specs.get("target", 0)] target_dws_fact = pred_specs.get("target_dws_fact", None) if target_dws_fact is not None: assert isinstance(target_dws_fact, list) and len(target_dws_fact) == 3 target = target[(slice(None), slice(None)) + tuple(slice(None,None,dws) for dws in target_dws_fact)] target = auto_crop_tensor_to_shape(target, pred.shape, ignore_channel_and_batch_dims=True) if self.target_has_label_segm: if self.target_has_various_masks: target = target[:, 2:] else: target = target[:,1:] assert target.shape[1] % 2 == 0, "Target should include both affinities and masks" # Get ignore-mask and affinities: nb_channels = int(target.shape[1] / 2) affs_channels = pred_specs.get("affs_channels", None) if affs_channels is not None: if isinstance(affs_channels, str): affs_slice = parse_data_slice(affs_channels)[0] elif isinstance(affs_channels, list): # TODO: make as a tuple??? affs_slice = affs_channels else: raise ValueError("The passed affinities channels are not compatible") else: affs_slice = slice(None) gt_affs = target[:,:nb_channels][:, affs_slice] assert gt_affs.shape[1] == pred.shape[1], "Prediction has a wrong number of offset channels" valid_pixels = target[:,nb_channels:][:, affs_slice] # Invert affinities for Dice loss: (1 boundary, 0 otherwise) gt_affs = 1. - gt_affs pred = pred*valid_pixels gt_affs = gt_affs*valid_pixels with warnings.catch_warnings(record=True) as w: loss_new = data_parallel(self.loss, (pred, gt_affs), self.devices).mean() loss = loss + loss_new log_scalar("loss_sparse_d{}".format(counter), loss_new) # TODO: use Callback from Roman to run it every N iterations gc.collect() return loss
def forward(self, prediction, target): # shape of prediction: [batch, n_dir*n_channels*2, z, y, x]; e.g. [1, 64, 12, 324, 324] if self.log_counter % 100 == 0: log_now = True else: log_now = False self.log_counter += 1 # exclude the spatial borders of the volume due to lacking context for the network # Not protected against 'bad' slicing if type(self.exclude_borders) is list: b = self.exclude_borders prediction = prediction[..., b[0]:-(b[0] + 1), b[1]:-(b[1] + 1), b[2]:-(b[2] + 1)] target = target[..., b[0]:-(b[0] + 1), b[1]:-(b[1] + 1), b[2]:-(b[2] + 1)] if log_now: log_image('target_unmasked', target) if self.exclude_borders == 'auto': mask_res = np.ones((prediction.shape[0], (self.n_channels - 1) * self.n_directions, *prediction.shape[2:])) mask_class = np.ones( (prediction.shape[0], self.n_directions * self.n_channels, *prediction.shape[2:])) for i in range(self.n_directions): xoffset = -int( np.cos(i / self.n_directions * 2 * np.pi) * self.max_dist) yoffset = -int( np.sin(i / self.n_directions * 2 * np.pi) * self.max_dist) xslice = slice(xoffset - 1, None) if xoffset < 0 else slice( None, xoffset + 1) yslice = slice(yoffset - 1, None) if yoffset < 0 else slice( None, yoffset + 1) mask_class[:, self.n_channels * i:self.n_channels * (i + 1), :, yslice, :] = 0 # This is wrong # mask_res[:, (self.n_channels-1)*i: # (self.n_channels-1)*(i+1), :, yslice, :] = 0 # mask_res[:, (self.n_channels - 1) * i: # (self.n_channels-1)*(i+1), :, :, xslice] = 0 mask_res[:, i::self.n_directions, :, yslice, :] = 0 mask_res[:, i::self.n_directions, :, :, xslice] = 0 mask_class[:, self.n_channels * i:self.n_channels * (i + 1), :, :, xslice] = 0 mask_class = torch.Tensor(mask_class).cuda() mask_res = torch.Tensor(mask_res).cuda() prediction[:, :self.n_channels * self.n_directions] *= mask_class prediction[:, self.n_channels * self.n_directions:] *= mask_res target[:, self.n_directions * self.n_channels:] *= mask_class target[:, self.n_directions:self.n_directions * self.n_channels] *= mask_res # target[:, :-self.n_directions] = target[:, :-self.n_directions]*mask if log_now: log_image('mask_class', mask_class) log_image('mask_res', mask_res) log_image('pred', prediction) log_image('target_masked', target) one = prediction[:, :self.n_directions * self.n_channels].reshape( (1, self.n_directions, self.n_channels, *prediction.shape[2:])) one = one.permute(0, 2, 1, 3, 4, 5) # label = target[:, :self.n_directions].long() one_target = target[:, self.n_directions * self.n_channels:].reshape( (1, self.n_directions, self.n_channels, *prediction.shape[2:])) one_target = one_target.permute(0, 2, 1, 3, 4, 5) one[:, self.n_channels - 1] *= -1 one[:, self.n_channels - 1] += 1 one_target[:, self.n_channels - 1] *= -1 one_target[:, self.n_channels - 1] += 1 # # one_new = one*1. # one_new[:, self.n_channels-1] = 1-one[:, self.n_channels-1] if log_now: log_image('one', one) log_image('one_target', one_target) log_image('one_new', one) loss1 = self.sd(one, one_target) one[:, self.n_channels - 1] *= -1 one[:, self.n_channels - 1] += 1 res_pred = prediction[:, self.n_directions * self.n_channels:] res_tar = target[:, self.n_directions:self.n_directions * self.n_channels] # wrong # mask_quant = target[:, self.n_directions * self.n_channels:-self.n_directions] # mask_quant_int = target[:, self.n_directions * self.n_channels:] mask_quant = torch.empty(res_tar.shape, device='cuda') for i in range(self.n_directions): mask_quant[:, i::self.n_directions] = target[:, self.n_directions * self.n_channels + i * (self.n_channels ):self.n_directions * self.n_channels + (i + 1) * (self.n_channels) - 1] loss2 = self.l1(res_pred * mask_quant, res_tar * mask_quant) # if log_now: log_image('res_pred', res_pred) log_image('res_tar', res_tar) log_image('mask_quant', mask_quant) log_image('res_pred_masked', res_pred * mask_quant) log_image('res_tar_masked', res_tar * mask_quant) if self.log: log_scalar('SorensenDiceLoss', self.weights[0] * loss1) log_scalar('L1Loss', self.weights[1] * loss2) # log_image('test', mask_res[0, :, :, :, :]) return self.weights[0] * loss1 + self.weights[1] * loss2
def forward(self, all_predictions, target): mdl_kwargs = self.model_kwargs ptch_kwargs = mdl_kwargs["patchNet_kwargs"] nb_inputs = mdl_kwargs.get("number_multiscale_inputs") # print([(pred.shape[-3], pred.shape[-2], pred.shape[-1]) for pred in all_predictions]) # print([(targ.shape[-3], targ.shape[-2], targ.shape[-1]) for targ in target]) # Plot some patches with the raw: if self.model.return_input: raw_inputs = all_predictions[-nb_inputs:] all_predictions = all_predictions[:-nb_inputs] loss = 0 # # ---------------------------- # # Predict glia mask: # # ---------------------------- if self.train_glia_mask: assert self.glia_label is not None frg_kwargs = self.model.foreground_prediction_kwargs if frg_kwargs is None: # Legacy: nb_glia_preds = 1 nb_glia_targets = [0] else: nb_glia_preds = len(frg_kwargs) nb_glia_targets = [ frg_kwargs[dpth]["nb_target"] for dpth in frg_kwargs ] all_glia_preds = all_predictions[-nb_glia_preds:] all_predictions = all_predictions[:-nb_glia_preds] loss_glia = 0 for counter, glia_pred, nb_tar in zip(range(len(all_glia_preds)), all_glia_preds, nb_glia_targets): glia_target = (target[nb_tar][:, [1]] == self.glia_label).float() valid_mask = (target[nb_tar][:, [0]] != self.ignore_label).float() glia_target = auto_crop_tensor_to_shape( glia_target, glia_pred.shape) valid_mask = auto_crop_tensor_to_shape(valid_mask, glia_pred.shape) glia_pred = glia_pred * valid_mask glia_target = glia_target * valid_mask with warnings.catch_warnings(record=True) as w: loss_glia_cur = data_parallel(self.loss, (glia_pred, glia_target), self.devices).mean() loss_glia = loss_glia + loss_glia_cur log_image("glia_target_d{}".format(counter), glia_target) log_image("glia_pred_d{}".format(counter), glia_pred) loss = loss + loss_glia log_scalar("loss_glia", loss_glia) else: glia_pred = all_predictions.pop(-1) if self.train_sparse_loss: loss = loss + self.sparse_multilevelDiceLoss( all_predictions, target) # Delete affinities from targets: target = [tar[:, :2].int() for tar in target] # IoU loss: if self.add_IoU_loss: assert self.boundary_label is None, "Not implemented" assert self.indx_trained_patchNets is None loss = loss + self.IoU_loss(all_predictions, target) if self.indx_trained_patchNets is None: nb_preds = len(all_predictions) assert len(ptch_kwargs) == nb_preds indx_trained_patchNets = zip(range(nb_preds), range(nb_preds)) else: indx_trained_patchNets = self.indx_trained_patchNets # ---------------------------- # Loss on patches: # ---------------------------- for nb_patch_net, nb_pr in indx_trained_patchNets: # ---------------------------- # Initializations: # ---------------------------- pred = all_predictions[nb_pr] kwargs = ptch_kwargs[nb_patch_net] if isinstance(target, (list, tuple)): assert "nb_target" in kwargs, "Multiple targets passed. Target should be specified" gt_segm = target[kwargs["nb_target"]] else: gt_segm = target # Collect options from config: patch_shape_input = kwargs.get("patch_size") assert all(i % 2 == 1 for i in patch_shape_input), "Patch should be odd" patch_dws_fact = kwargs.get("patch_dws_fact", [1, 1, 1]) stride = tuple(kwargs.get("patch_stride", [1, 1, 1])) pred_dws_fact = kwargs.get("pred_dws_fact", [1, 1, 1]) # print(nb_patch_net, patch_dws_fact, pred_dws_fact) precrop_pred = kwargs.get("precrop_pred", None) limit_nb_patches = kwargs.get("limit_nb_patches", None) from segmfriends.utils.various import parse_data_slice if precrop_pred is not None: precrop_pred_slice = ( slice(None), slice(None)) + parse_data_slice(precrop_pred) pred = pred[precrop_pred_slice] central_shape = tuple(kwargs.get("central_shape", [1, 3, 3])) max_random_crop = tuple(kwargs.get("max_random_crop", [0, 5, 5])) if self.fix_bug_multiscale_patches: real_patch_shape = tuple( pt * fc - fc + 1 for pt, fc in zip(patch_shape_input, patch_dws_fact)) else: real_patch_shape = tuple( pt * fc for pt, fc in zip(patch_shape_input, patch_dws_fact)) full_target_shape = gt_segm.shape[-3:] assert all([ i <= j for i, j in zip(real_patch_shape, full_target_shape) ]), "Real-sized patch is too large!" # ---------------------------- # Deduce crop size of the prediction and select target patches accordingly: # ---------------------------- # print(pred.shape, full_target_shape, pred_dws_fact, real_patch_shape) crop_slice_targets, crop_slice_prediction = get_slicing_crops( pred.shape[2:], full_target_shape, pred_dws_fact, real_patch_shape) # print(crop_slice_prediction, crop_slice_targets, nb_patch_net) gt_segm = gt_segm[crop_slice_targets] pred = pred[crop_slice_prediction] full_target_shape = gt_segm.shape[-3:] # # ---------------------------- # # Plot some random patches with associated raw patch: # # ---------------------------- if self.model.return_input and nb_patch_net < 5: # raw = raw_inputs[kwargs["nb_target"]][crop_slice_targets] # FIXME: raw is not correct for deeper ones raw = raw_inputs[0][crop_slice_targets] raw_to_plot, gt_labels_to_plot, gt_masks_to_plot, pred_emb_to_plot = [], [], [], [] for n in range(40): # Select a random pixel and define sliding-window crop slices: selected_coord = [ np.random.randint(shp) for shp in pred.shape[2:] ] # selected_coord[0] = 4 # For plots, get always 4 full_patch_slice = (slice(None), slice(0, 1)) + tuple( slice(selected_coord[i], selected_coord[i] + real_patch_shape[i]) for i in range(len(selected_coord))) emb_slice = (slice(None), slice(0, 1)) + tuple( slice( selected_coord[i] + int(real_patch_shape[i] / 2), selected_coord[i] + int(real_patch_shape[i] / 2) + 1) for i in range(len(selected_coord))) pred_center_coord = [ int(selected_coord[i] / pred_dws_fact[i]) for i in range(len(selected_coord)) ] emb_slice_pred = (slice(None), slice(None)) + tuple( slice(pred_center_coord[i], pred_center_coord[i] + 1) for i in range(len(selected_coord))) # Collect data for current sliding window: center_label = gt_segm[emb_slice] center_label_repeated = center_label.repeat( 1, 1, *real_patch_shape) gt_patch_labels = gt_segm[full_patch_slice] gt_masks_to_plot.append( gt_patch_labels != center_label_repeated) gt_labels_to_plot.append(gt_patch_labels) # ignore_mask_patch = (gt_patch_labels == 0) pred_emb_to_plot.append(pred[emb_slice_pred]) raw_to_plot.append(raw[full_patch_slice]) # Highlight center pixel: raw_to_plot = torch.cat(raw_to_plot, dim=0) center_pixel_coord = (slice(None), 0) + tuple( int(shp / 2) for shp in real_patch_shape) raw_to_plot[center_pixel_coord] = raw_to_plot.min() - 1. gt_labels_to_plot = torch.cat(gt_labels_to_plot, dim=0) gt_masks_to_plot = torch.cat(gt_masks_to_plot, dim=0) pred_emb_to_plot = torch.cat(pred_emb_to_plot, dim=0) # Decode embeddings: ptch_num = kwargs["patchNet_number"] pred_patch_to_plot = data_parallel( self.model.patch_models[ptch_num], pred_emb_to_plot[:, :, 0, 0, 0], self.devices) # Downscale and rescale targets: down_sc_slice = (slice(None), slice(None)) + tuple( slice(int(dws_fact / 2), None, dws_fact) for dws_fact in patch_dws_fact) gt_masks_to_plot = torch.nn.functional.interpolate( gt_masks_to_plot[down_sc_slice].float(), scale_factor=tuple(patch_dws_fact)) pred_patch_to_plot = torch.nn.functional.interpolate( pred_patch_to_plot, scale_factor=tuple(patch_dws_fact)) gt_masks_to_plot = 1. - gt_masks_to_plot if patch_dws_fact[1] <= 6: pred_patch_to_plot = 1. - pred_patch_to_plot log_image("raw_patch_l{}".format(nb_patch_net), raw_to_plot) log_image("gt_label_patch_l{}".format(nb_patch_net), gt_labels_to_plot) log_image("gt_mask_patch_l{}".format(nb_patch_net), gt_masks_to_plot) log_image("pred_patch_l{}".format(nb_patch_net), pred_patch_to_plot) # # ---------------------------- # # Patch-Loss: # # ---------------------------- if kwargs.get("skip_standard_patch_loss", False): continue # If multiple strides were given, process all of them: all_strides = stride if isinstance(stride[0], list) else [stride] if limit_nb_patches is not None: all_limit_nb_patches = limit_nb_patches if isinstance( limit_nb_patches[0], list) else [limit_nb_patches] else: all_limit_nb_patches = [None for _ in all_strides] for nb_stride, stride, limit_nb_patches in zip( range(len(all_strides)), all_strides, all_limit_nb_patches): # ---------------------------- # Get some random prediction embeddings: # ---------------------------- pred_strides = get_prediction_strides(pred_dws_fact, stride) pred_patches, crop_slice_pred, nb_patches = extract_patches_torch_new( pred, (1, 1, 1), stride=pred_strides, max_random_crop=max_random_crop) # Try to get some raw patches: # TODO: the factor is simply the level in the UNet # get_slicing_crops(pred.shape[2:], full_target_shape, [1,1,1], real_patch_shape) # ---------------------------- # Collect gt_segm patches and corresponding center labels: # ---------------------------- crop_slice_targets = tuple( slice(sl.start, None) for sl in crop_slice_pred) gt_patches, _, _ = extract_patches_torch_new( gt_segm, real_patch_shape, stride=stride, crop_slice=crop_slice_targets, limit_patches_to=nb_patches) gt_patches = gt_patches[:, [0]] # Make sure to crop some additional border and get the centers correctly: # TODO: this can be now easily done by cropping the gt_patches... crop_slice_center_labels = (slice(None), slice(None)) + tuple( slice(slc.start + int(sh / 2), slc.stop) for slc, sh in zip(crop_slice_targets[2:], real_patch_shape)) target_at_patch_center, _, _ = extract_patches_torch_new( gt_segm, (1, 1, 1), stride=stride, crop_slice=crop_slice_center_labels, limit_patches_to=nb_patches) # Get GT and other masks separately: label_at_patch_center = target_at_patch_center[:, [0]] mask_at_patch_center = target_at_patch_center[:, [1]] # ---------------------------- # Ignore patches on the boundary or involving ignore-label: # ---------------------------- # Ignore pixels involving ignore-labels: ignore_masks = (gt_patches == self.ignore_label) valid_patches = (label_at_patch_center != self.ignore_label) assert self.boundary_label is not None, "Old boundary method is deprecated" # # Exclude a patch from training if the central region contains more than one gt label # # (i.e. it is really close to a boundary): # central_crop = (slice(None), slice(None)) + convert_central_shape_to_crop_slice(gt_patches.shape[-3:], central_shape) # mean_central_crop_labels = gt_patches[central_crop].mean(dim=-1, keepdim=True) \ # .mean(dim=-2, keepdim=True) \ # .mean(dim=-3, keepdim=True) # # valid_patches = valid_patches & (mean_central_crop_labels == center_labels) # is_on_boundary_mask = None patch_is_on_boundary = ( mask_at_patch_center == self.boundary_label).repeat( 1, 1, *real_patch_shape) # Ignore patches that represent a glia: if not self.train_patches_on_glia: assert self.glia_label is not None # print("Glia: ", (mask_at_patch_center != self.glia_label).min()) valid_patches = valid_patches & (mask_at_patch_center != self.glia_label) # Delete redundant patches from batch: valid_batch_indices = np.argwhere( valid_patches[:, 0, 0, 0, 0].cpu().detach().numpy())[:, 0] if limit_nb_patches is not None: limit = limit_nb_patches[0] if limit_nb_patches[1] == 'number': if valid_batch_indices.shape[0] > limit: valid_batch_indices = np.random.choice( valid_batch_indices, limit, replace=False) elif limit_nb_patches[1] == 'factor': assert limit <= 1. and limit >= 0. valid_batch_indices = np.random.choice( valid_batch_indices, int(limit * valid_batch_indices.shape[0]), replace=False) if valid_batch_indices.shape[0] == 0: print( "ZERO valid patches at level {}!".format(nb_patch_net)) # Avoid problems if all patches are invalid and torch complains that autograd cannot be performed: loss += pred_patches.sum() * 0. continue # ---------------------------- # Compute the actual (inverted) MeMasks targets: (0 is me, 1 are the others) # best targets for Dice loss (usually more me than others) # ---------------------------- center_labels_repeated = label_at_patch_center.repeat( 1, 1, *real_patch_shape) me_masks = gt_patches != center_labels_repeated if patch_is_on_boundary is not None: # If on boundary, we make (inverted) me_masks completely 1 (split from everything) me_masks = me_masks | patch_is_on_boundary # Downscale MeMasks using MaxPooling (preserve narrow processes): # moreover, during the maxPool, better shrink me mask than expanding (avoid merge predictions) if all(fctr == 1 for fctr in patch_dws_fact): maxpool = Identity() else: maxpool = nn.MaxPool3d(kernel_size=patch_dws_fact, stride=patch_dws_fact, padding=0) # Downscaling patch: down_sc_slice = (slice(None), slice(None)) + tuple( slice(int(dws_fact / 2), None, dws_fact) for dws_fact in patch_dws_fact) # Final targets: patch_targets = me_masks[valid_batch_indices].float( )[down_sc_slice] patch_ignore_masks = ignore_masks[valid_batch_indices][ down_sc_slice].byte() # Invert MeMasks: # best targets for Dice loss are: meMask == 0; others == 1 # FIXME: generalize if patch_dws_fact[1] > 6: patch_targets = 1. - patch_targets assert valid_batch_indices.max() < pred_patches.shape[ 0], "Something went wrong, more target patches were collected than predicted: {} targets vs {} pred...".format( valid_batch_indices.max(), pred_patches.shape[0]) pred_embed = pred_patches[valid_batch_indices] pred_embed = pred_embed[:, :, 0, 0, 0] # ---------------------------- # Expand embeddings to patches using PatchNet models: # ---------------------------- if "model_number" in kwargs: # FIXME: update this crap # In this case we are training a stacked model: mdl_num = kwargs["model_number"] ptch_num = kwargs["patchNet_number"] expanded_patches = data_parallel( self.model.models[mdl_num].patch_models[ptch_num], pred_embed, self.devices) else: expanded_patches = data_parallel( self.model.patch_models[nb_patch_net], pred_embed, self.devices) # print(expanded_patches.shape) assert expanded_patches.shape[ 1] == 1, "PatchNet should output only a one-channel mask!" # Some logs: if nb_stride == 0: log_image("ptc_trg_l{}".format(nb_patch_net), patch_targets) log_image("ptc_pred_l{}".format(nb_patch_net), expanded_patches) # log_image("ptc_ign_l{}".format(nb_patch_net), patch_ignore_masks) log_scalar("avg_targets_l{}".format(nb_patch_net), patch_targets.float().mean()) # Train only checkerboard pattern: if self.apply_checkerboard: checkerboard = np.zeros(patch_shape_input) # Verticals: center_coord = [int(sh / 2) for sh in patch_shape_input] checkerboard[:, center_coord[1], :] = 1 checkerboard[:, :, center_coord[2]] = 1 # Two diagonals: indices = np.indices(patch_shape_input) checkerboard[indices[1] == indices[2]] = 1 checkerboard[indices[1] == (patch_shape_input[2] - indices[2] - 1)] = 1 # Reduce z-context: z_mask = np.zeros_like(checkerboard) z_mask[center_coord[0]] = 1 for z in range(patch_shape_input[0]): offs = abs(center_coord[0] - z) if offs != 0: z_mask[z, offs:-offs, offs:-offs] = 1 checkerboard[np.logical_not(z_mask)] = 0 # Expand channels and wrap: checkerboard = torch.from_numpy(checkerboard).cuda( patch_ignore_masks.get_device()).float() checkerboard = checkerboard.unsqueeze(0).unsqueeze(0) checkerboard = checkerboard.repeat( *patch_ignore_masks.shape[:2], 1, 1, 1) # ---------------------------- # Apply ignore mask and compute loss: # ---------------------------- patch_valid_masks = 1. - patch_ignore_masks.float() if self.apply_checkerboard: patch_valid_masks = patch_valid_masks * checkerboard expanded_patches = expanded_patches * patch_valid_masks patch_targets = patch_targets * patch_valid_masks with warnings.catch_warnings(record=True) as w: loss_unet = data_parallel( self.loss, (expanded_patches, patch_targets.float()), self.devices).mean() loss = loss + loss_unet if nb_stride == 0: log_scalar("loss_l{}".format(nb_patch_net), loss_unet) log_scalar("nb_patches_l{}".format(nb_patch_net), expanded_patches.shape[0]) # print("Loss done, memory {}", torch.cuda.memory_allocated(0)/1000000) # TODO: use Callback from Roman to run it every N iterations gc.collect() return loss
def forward(self, input, target): """ Forward pass of the loss. It returns a loss which is a weighted linear combination of the three losses. The 1st channel of input and target are the boundary parts, while the 0th channel contains the stuff required for the affinity computations. """ # target for topological part boundary = target[1] # do some data engineering target, boundary_mask, boundary_contour = prepare_target(target[0]) loss = 0.0 sorensen_dice_loss = 0.0 topological_loss = 0.0 topo_sorensen_dice_loss = 0.0 if not self.pretraining: # create boundary map and boundary probabilies form input and target tensors start = time.time() b_map = boundary[0, 0, :, :, :].float().detach().cpu() maximum = torch.max(b_map) boundary_map = torch.where(b_map < maximum, torch.tensor(0.0), torch.tensor(1.0)).detach() boundary_prob = input[1][ 0, 0, :, :, :].cpu() * boundary_mask.cpu() + boundary_contour boundary_prob_ax = torch.stack([boundary_prob], dim=0) boundary_map_ax = torch.stack([boundary_map], dim=0) data_end = time.time() # Losses on the boundary branch topological_loss = self.TopoLoss(boundary_prob, boundary_map).cuda() end = time.time() self.log.info( 'Time elapsed to calculate topological features and gradient: ' + str(end - start) + ' s') """ We remove the boundary pixels in a distance of 3 pixels to the boundary because the interfere with the proper calculation of the gradients due to boundary artifacts. """ topo_sorensen_dice_loss = boundary_map_ax.shape[1] + self.SDLoss( boundary_prob_ax[:, :, 3:-3, 3:-3], boundary_map_ax[:, :, 3:-3, 3:-3]).cuda() # Sorensen-Dice loss on affinity channel sorensen_dice_loss = target.shape[1] + self.SDLoss(input[0], target) # logging of the different losses in tensorboardX log_scalar('training_loss/SorensenDiceAffinityBranch', sorensen_dice_loss) log_scalar('training_loss/SorensenDiceBoundaryBranch', topo_sorensen_dice_loss) log_scalar('training_loss/TopologicalLoss', topological_loss) if self.SD_weight > 1e-6: loss += self.SD_weight * sorensen_dice_loss if self.topo_weight > 1e-6 and self.pretraining == False: loss += self.topo_weight * topological_loss if self.topo_SD_weight > 1e-6 and self.pretraining == False: loss += self.topo_SD_weight * topo_sorensen_dice_loss self.log.info('Affinity SD Loss (weighted): ' + str(self.SD_weight * sorensen_dice_loss)) self.log.info('Boundary SD Loss (weighted): ' + str(self.topo_SD_weight * topo_sorensen_dice_loss)) self.log.info('Topological Loss (weighted): ' + str(self.topo_weight * topological_loss)) self.log.info("Loss: " + str(loss)) return loss
def forward(self, all_predictions, target): mdl = self.model nb_inputs = mdl.number_multiscale_inputs # Plot some patches with the raw: if self.model.return_input: raw_inputs = all_predictions[-nb_inputs:] all_predictions = all_predictions[:-nb_inputs] loss = 0 if self.train_sparse_loss: raise NotImplementedError loss = loss + self.sparse_multilevelDiceLoss(all_predictions, target) # Delete affinities from targets: target = [tar[:, :2].int() for tar in target] # ---------------------------- # Loss on patches: # ---------------------------- for mask_dec_indx in range(len(all_predictions)): # ---------------------------- # Initializations: # ---------------------------- mask_dec = self.model.mask_decoders[mask_dec_indx] pred = all_predictions[mask_dec_indx] gt_segm = target[mask_dec.target_index] # Collect options from config: mask_shape = mask_dec.mask_shape mask_dws_fact = mask_dec.mask_dws_fact sample_strides = mask_dec.sample_strides pred_dws_fact = mask_dec.pred_dws_fact crop_slice_prediction = mask_dec.crop_slice_prediction limit_nb_decoded_masks_to = mask_dec.limit_nb_decoded_masks_to if crop_slice_prediction is not None: precrop_pred_slice = (slice(None), slice(None)) + parse_data_slice(crop_slice_prediction) pred = pred[precrop_pred_slice] max_random_crop = mask_dec.max_random_crop real_shape_mask = tuple(pt * fc for pt, fc in zip(mask_shape, mask_dws_fact)) full_target_shape = gt_segm.shape[-3:] assert all([i <= j for i, j in zip(real_shape_mask, full_target_shape)]), "Real-sized patch is too large!" # ---------------------------- # Deduce crop size of the prediction and select target patches accordingly: # ---------------------------- # TODO: explain better what is going on here crop_slice_targets, crop_slice_prediction = get_slicing_crops(pred.shape[2:], full_target_shape, pred_dws_fact, real_shape_mask) gt_segm = gt_segm[crop_slice_targets] pred = pred[crop_slice_prediction] full_target_shape = gt_segm.shape[-3:] # # # ---------------------------- # # # Plot some random patches with associated raw patch: # # # ---------------------------- # if self.model.return_input and mask_dec_indx<5: # # raw = raw_inputs[kwargs["nb_target"]][crop_slice_targets] # # FIXME: raw is not correct for deeper ones # raw = raw_inputs[0][crop_slice_targets] # raw_to_plot, gt_labels_to_plot, gt_masks_to_plot, pred_emb_to_plot = [], [], [], [] # for n in range(40): # # Select a random pixel and define sliding-window crop slices: # selected_coord = [np.random.randint(shp) for shp in pred.shape[2:]] # # selected_coord[0] = 4 # For plots, get always 4 # full_patch_slice = (slice(None), slice(0,1)) + tuple( # slice(selected_coord[i], selected_coord[i] + real_shape_mask[i]) for i in range(len(selected_coord))) # emb_slice = (slice(None), slice(0,1)) + tuple(slice(selected_coord[i] + int(real_shape_mask[i] / 2), # selected_coord[i] + int( # real_shape_mask[i] / 2) + 1) for i in # range(len(selected_coord))) # pred_center_coord = [int(selected_coord[i] / pred_dws_fact[i]) for i in range(len(selected_coord))] # emb_slice_pred = (slice(None), slice(None)) + tuple( # slice(pred_center_coord[i], pred_center_coord[i] + 1) # for i in range(len(selected_coord))) # # # Collect data for current sliding window: # center_label = gt_segm[emb_slice] # center_label_repeated = center_label.repeat(1, 1, *real_shape_mask) # gt_patch_labels = gt_segm[full_patch_slice] # gt_masks_to_plot.append(gt_patch_labels != center_label_repeated) # gt_labels_to_plot.append(gt_patch_labels) # # ignore_mask_patch = (gt_patch_labels == 0) # pred_emb_to_plot.append(pred[emb_slice_pred]) # # raw_to_plot.append(raw[full_patch_slice]) # # # Highlight center pixel: # raw_to_plot = torch.cat(raw_to_plot, dim=0) # center_pixel_coord = (slice(None), 0) + tuple(int(shp / 2) for shp in real_shape_mask) # raw_to_plot[center_pixel_coord] = raw_to_plot.min() - 1. # # gt_labels_to_plot = torch.cat(gt_labels_to_plot, dim=0) # gt_masks_to_plot = torch.cat(gt_masks_to_plot, dim=0) # pred_emb_to_plot = torch.cat(pred_emb_to_plot, dim=0) # # # Decode embeddings: # ptch_num = kwargs["patchNet_number"] # pred_patch_to_plot = data_parallel(self.model.patch_models[ptch_num], pred_emb_to_plot[:, :, 0, 0, 0], self.devices) # # # Downscale and rescale targets: # down_sc_slice = (slice(None), slice(None)) + tuple( # slice(int(dws_fact / 2), None, dws_fact) for dws_fact in mask_dws_fact) # gt_masks_to_plot = torch.nn.functional.interpolate(gt_masks_to_plot[down_sc_slice].float(), scale_factor=tuple(mask_dws_fact)) # pred_patch_to_plot = torch.nn.functional.interpolate(pred_patch_to_plot, # scale_factor=tuple(mask_dws_fact)) # # gt_masks_to_plot = 1. - gt_masks_to_plot # if mask_dws_fact[1] <= 6: # pred_patch_to_plot = 1. - pred_patch_to_plot # # log_image("raw_patch_l{}".format(mask_dec_indx), raw_to_plot) # log_image("gt_label_patch_l{}".format(mask_dec_indx), gt_labels_to_plot) # log_image("gt_mask_patch_l{}".format(mask_dec_indx), gt_masks_to_plot) # log_image("pred_patch_l{}".format(mask_dec_indx), pred_patch_to_plot) # # ---------------------------- # # Patch-Loss: # # ---------------------------- # If multiple strides were given, process all of them: sample_strides = sample_strides if isinstance(sample_strides[0], list) else [sample_strides] if limit_nb_decoded_masks_to is not None: limit_nb_decoded_masks_to = limit_nb_decoded_masks_to if isinstance(limit_nb_decoded_masks_to[0], list) else [ limit_nb_decoded_masks_to] else: limit_nb_decoded_masks_to = [None for _ in sample_strides] for nb_stride, smpl_stride, max_nb_masks in zip(range(len(sample_strides)), sample_strides, limit_nb_decoded_masks_to): # ---------------------------- # Get some random prediction embeddings: # ---------------------------- prediction_strides = get_prediction_strides(pred_dws_fact, smpl_stride) selected_embeddings, crop_slice_pred, nb_selected_masks = extract_patches_torch(pred, (1, 1, 1), stride=prediction_strides, max_random_crop=max_random_crop) # ---------------------------- # Collect gt_segm patches and corresponding center labels: # ---------------------------- crop_slice_targets = tuple(slice(sl.start, None) for sl in crop_slice_pred) gt_patches, _, _ = extract_patches_torch(gt_segm, real_shape_mask, stride=smpl_stride, apply_specific_crop_slice=crop_slice_targets, limit_patches_nb_to=nb_selected_masks) gt_patches = gt_patches[:, [0]] # Make sure to crop some additional border and get the centers correctly: # TODO: this can be now easily done by cropping the gt_patches... crop_slice_center_labels = (slice(None), slice(None)) + tuple( slice(slc.start + int(sh / 2), slc.stop) for slc, sh in zip(crop_slice_targets[2:], real_shape_mask)) target_at_patch_center, _, _ = extract_patches_torch(gt_segm, (1, 1, 1), stride=smpl_stride, apply_specific_crop_slice=crop_slice_center_labels, limit_patches_nb_to=nb_selected_masks) # Get GT and other masks separately: label_at_patch_center = target_at_patch_center[:, [0]] mask_at_patch_center = target_at_patch_center[:, [1]] # ---------------------------- # Ignore patches on the boundary or involving ignore-label: # ---------------------------- # Ignore pixels involving ignore-labels: ignore_masks = (gt_patches == self.ignore_label) valid_patches = (label_at_patch_center != self.ignore_label) patch_is_on_boundary = None if self.boundary_label is not None: patch_is_on_boundary = (mask_at_patch_center == self.boundary_label).repeat(1, 1, *real_shape_mask) # Delete non-valid patches from batch: valid_batch_indices = np.argwhere(valid_patches[:, 0, 0, 0, 0].cpu().detach().numpy())[:, 0] if max_nb_masks is not None: limit = max_nb_masks[0] if max_nb_masks[1] == 'number': if valid_batch_indices.shape[0] > limit: valid_batch_indices = np.random.choice(valid_batch_indices, limit, replace=False) elif max_nb_masks[1] == 'factor': assert limit <= 1. and limit >= 0. valid_batch_indices = np.random.choice(valid_batch_indices, int(limit * valid_batch_indices.shape[0]), replace=False) if valid_batch_indices.shape[0] == 0: # Avoid problems if all patches are invalid and # torch complaining that autograd cannot be performed: loss += selected_embeddings.sum() * 0. print("ZERO valid patches at level {}".format(mask_dec_indx)) continue # ---------------------------- # Compute the actual (inverted) MeMasks targets: (0 is me, 1 are the others) # best targets for Dice loss (usually more me than others) # ---------------------------- center_labels_repeated = label_at_patch_center.repeat(1, 1, *real_shape_mask) target_me_masks = gt_patches != center_labels_repeated if patch_is_on_boundary is not None: # If on boundary, we make (inverted) me_masks completely 1 (split from everything) target_me_masks = target_me_masks | patch_is_on_boundary # Downscaling patches: down_sc_slice = (slice(None), slice(None)) + tuple( slice(int(dws_fact / 2), None, dws_fact) for dws_fact in mask_dws_fact) # Final targets: target_me_masks = target_me_masks[valid_batch_indices].float()[down_sc_slice] ignore_masks = ignore_masks[valid_batch_indices][down_sc_slice].byte() # Invert MeMasks: # best targets for Dice loss are: meMask == 0; others == 1 # TODO: generalize if mask_dws_fact[1] > 6: target_me_masks = 1. - target_me_masks assert valid_batch_indices.max() < selected_embeddings.shape[ 0], "Something went wrong, more target patches were collected than those predicted: {} targets vs {} pred...".format( valid_batch_indices.max(), selected_embeddings.shape[0]) selected_embeddings = selected_embeddings[valid_batch_indices] selected_embeddings = selected_embeddings[:, :, 0, 0, 0] # ---------------------------- # Decode the actual predicted using the decoder models: # ---------------------------- decoded_masks = data_parallel(mask_dec, selected_embeddings, self.devices) # print(expanded_patches.shape) assert decoded_masks.shape[1] == 1, "MaskDecoder should output only single-channel masks!" # Some logs: if nb_stride == 0: log_image("ptc_trg_l{}".format(mask_dec_indx), target_me_masks) log_image("ptc_pred_l{}".format(mask_dec_indx), decoded_masks) # log_image("ptc_ign_l{}".format(nb_patch_net), patch_ignore_masks) log_scalar("avg_targets_l{}".format(mask_dec_indx), target_me_masks.float().mean()) # ---------------------------- # Apply ignore mask and compute loss: # ---------------------------- valid_masks = 1. - ignore_masks.float() decoded_masks = decoded_masks * valid_masks target_me_masks = target_me_masks * valid_masks with warnings.catch_warnings(record=True) as w: reconstruction_loss = data_parallel(self.loss, (decoded_masks, target_me_masks.float()), self.devices).mean() loss = loss + reconstruction_loss if nb_stride == 0: log_scalar("loss_l{}".format(mask_dec_indx), reconstruction_loss) log_scalar("nb_patches_l{}".format(mask_dec_indx), decoded_masks.shape[0]) gc.collect() return loss