def test_msd_gradients(): t.manual_seed(1) dtype = t.double size = (11, 13) batch_sz = 2 for depth in [9]: print(f"Depth: {depth}") width = c_in = c_out = batch_sz x = Variable(t.randn(batch_sz, c_in, *size, dtype=dtype)).cuda() x.requires_grad = True net = MSDModule(c_in, c_out, depth, width) net.double() for p in net.parameters(): p.data = t.randn_like(p.data) assert net is not None # o = net(x) # analytical, reentrant, correct_grad_sizes = get_analytical_jacobian((x,), o) # print(analytical) # print(f"Reentrant: {reentrant}") # print(correct_grad_sizes) # print(f"Net L shape: {net.L.shape}") gradcheck(net, [x], raise_exception=True)
def __init__( self, c_in, c_out, depth, width, dilations=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ): """Create a new MSDModel base class. .. note:: Do not initialize MSDModel directly. Use :class:`~msd_pytorch.msd_segmentation_model.MSDSegmentationModel` or :class:`~msd_pytorch.msd_regression_model.MSDRegressionModel` instead. :param c_in: The number of input channels. :param c_out: The number of output channels. :param depth: The depth of the MSD network. :param width: The width of the MSD network. :param dilations: `list(int)` A list of dilations to use. Default is ``[1, 2, ..., 10]``. A good alternative is ``[1, 2, 4, 8]``. The dilations are repeated when there are more layers than supplied dilations. :returns: :rtype: """ self.c_in, self.c_out = c_in, c_out self.depth, self.width = depth, width self.dilations = dilations # This part of the network can be used to renormalize the # input and output data. Its parameters are saved when the # network is saved. self.scale_in = scaling_module(c_in, c_in) self.scale_out = scaling_module(c_out, c_out) self.msd = MSDModule(c_in, c_out, depth, width, dilations)
def test_parameters_change(): # This test ensures that all parameters are updated after an # update step. t.manual_seed(1) size = (30, 30) for batch_sz in [1]: for depth in range(0, 20, 6): width = c_in = c_out = batch_sz x = Variable(t.randn(batch_sz, c_in, *size)).cuda() target = Variable(t.randn(batch_sz, c_out, *size)).cuda() assert x.data.is_cuda net = MSDModule(c_in, c_out, depth, width) assert net is not None params0 = dict((n, p.data.clone()) for n, p in net.named_parameters()) # Train for two iterations. The convolution weights in # the MSD layers are not updated after the first # training step because the final 1x1 convolution # weights are zero. optimizer = optim.Adam(net.parameters()) optimizer.zero_grad() for _ in range(2): y = net(x) assert y is not None criterion = nn.L1Loss() loss = criterion(y, target) loss.backward() optimizer.step() params1 = dict(net.named_parameters()) for name in params1.keys(): p0, p1 = params0[name], params1[name] d = abs(p0 - p1.data.clone()).sum().item() assert 0.0 < d, ( f"Parameter {name} left unchanged: \n" f"Initial value: {p0}\n" f"Current value: {p1}\n" f"Gradient: {p1.grad}\n" ) # Check that the loss is not zero assert loss.abs().item() != approx(0.0)
def __init__(self, c_in, c_out, depth, width, conv3d=False, reflect=True): super(msdSegModule, self).__init__() self.c_in = c_in self.c_out = c_out self.depth, self.width = depth, width self.criterion = nn.NLLLoss2d() # This part of the network can be used to renormalize the # input data. Its parameters are saved when the network is # saved. net_fixed = nn.Conv2d(c_in, c_in, 1) net_fixed.bias.requires_grad = False net_fixed.bias.data.zero_() net_fixed.weight.requires_grad = False net_fixed.weight.data.fill_(1) self.net_fixed = net_fixed # The rest of the network has parameters that are updated # during training. self.net_msd = MSDModule(c_in, c_out, depth, width, msd_dilation, reflect=reflect, conv3d=conv3d) #net_trained = nn.Sequential(self.net_msd, nn.Conv2d( c_out, c_out, 1), nn.LogSoftmax(dim = 1)) net_trained = nn.Sequential(self.net_msd, nn.Conv2d(c_out, c_out, 1), nn.Softmax(dim=1)) self.net = nn.Sequential(net_fixed, net_trained) self.net.cuda() self.optimizer = optim.Adam(net_trained.parameters())
def __init__(self, n_dual, depth, width, dilations): super(DualMSDNet, self).__init__() self.n_dual = n_dual self.n_channels = n_dual + 2 self.input_concat_layer = ConcatenateLayer() self.block = MSDModule( c_in=self.n_channels, c_out=self.n_dual, depth=depth, width=width, dilations=dilations, )
def __init__(self, n_primal, depth, width, dilations): super(PrimalMSDNet, self).__init__() self.n_primal = n_primal self.n_channels = n_primal + 1 self.input_concat_layer = ConcatenateLayer() self.block = MSDModule( c_in=self.n_channels, c_out=self.n_primal, depth=depth, width=width, # number of channels per convolution dilations=dilations, )
def __init__(self, c_in, c_out, depth, width, dilations=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], ndim=2): """Create a new MSDModel base class. .. note:: Do not initialize MSDModel directly. Use :class:`~msd_pytorch.msd_segmentation_model.MSDSegmentationModel` or :class:`~msd_pytorch.msd_regression_model.MSDRegressionModel` instead. :param c_in: The number of input channels. :param c_out: The number of output channels. :param depth: The depth of the MSD network. :param width: The width of the MSD network. :param dilations: `list(int)` A list of dilations to use. Default is ``[1, 2, ..., 10]``. A good alternative is ``[1, 2, 4, 8]``. The dilations are repeated when there are more layers than supplied dilations. :param ndim: `int` The dimension of the convolutions. 2D convolutions are used by default. 3D is also possible. :returns: :rtype: """ self.c_in, self.c_out = c_in, c_out self.depth, self.width = depth, width self.dilations = dilations self.ndim = ndim if ndim not in [2, 3]: raise ValueError( f"Expected 2D or 3D convolutions (ndim=2 or 3). Got: {ndim}") # This part of the network can be used to renormalize the # input and output data. Its parameters are saved when the # network is saved. conv3d = ndim == 3 self.scale_in = scaling_module(c_in, conv3d=conv3d) self.scale_out = scaling_module(c_out, conv3d=conv3d) self.msd = MSDModule(c_in, c_out, depth, width, dilations, ndim=ndim)
def test_reflect(): batch_sz = 1 c_in, c_out = 2, 3 depth, width = 11, 3 size = (20,) * 2 x = t.randn(batch_sz, c_in, *size).cuda() target = t.randn(batch_sz, c_out, *size).cuda() net = MSDModule(c_in, c_out, depth, width) output = net(Variable(x)) assert target.shape == output.shape assert output.data.abs().sum().item() == approx(0)
def test_msd_gradients(): t.manual_seed(1) dtype = t.double size = (11, 13) batch_sz = 2 for depth in [9]: print(f"Depth: {depth}") width = c_in = c_out = batch_sz x = Variable(t.randn(batch_sz, c_in, *size, dtype=dtype)).cuda() x.requires_grad = True net = MSDModule(c_in, c_out, depth, width).cuda() net.double() # The weights of the final layer are initialized to zero by # default. This makes it trivial to pass gradcheck. Therefore, # we reinitialize all weights randomly. for p in net.parameters(): p.data = t.randn_like(p.data) gradcheck(net, [x], raise_exception=True, atol=1e-4, rtol=1e-3)
def test_with_tail(): batch_sz = 1 c_in, c_out = 2, 3 depth, width = 11, 3 size = (20,) * 2 x = t.randn(batch_sz, c_in, *size).cuda() target = Variable(t.randn(batch_sz, 1, *size).cuda()) net = nn.Sequential(MSDModule(c_in, c_out, depth, width), nn.Conv2d(3, 1, 1)) net.cuda() output = net(Variable(x)) loss = nn.MSELoss()(output, target) loss.backward() assert output.abs().sum().item() != approx(0.0)
def test_zero_depth_network(): with pytest.raises(ValueError): MSDModule(1, 1, depth=0, width=1) with pytest.raises(ValueError): MSDModule(1, 1, depth=1, width=0)
def test_msd_module_3D(): net = MSDModule(2, 3, 5, 1, ndim=3).cuda() x = t.ones(1, 2, 7, 7, 7).cuda() # The final layer is initialized with zeros. Therefore the output # of an untrained network must always be zero. assert net(x).sum().item() == 0.0
class msdSegModule(nn.Module): def __init__(self, c_in, c_out, depth, width, conv3d=False, reflect=True): super(msdSegModule, self).__init__() self.c_in = c_in self.c_out = c_out self.depth, self.width = depth, width self.criterion = nn.NLLLoss2d() # This part of the network can be used to renormalize the # input data. Its parameters are saved when the network is # saved. net_fixed = nn.Conv2d(c_in, c_in, 1) net_fixed.bias.requires_grad = False net_fixed.bias.data.zero_() net_fixed.weight.requires_grad = False net_fixed.weight.data.fill_(1) self.net_fixed = net_fixed # The rest of the network has parameters that are updated # during training. self.net_msd = MSDModule(c_in, c_out, depth, width, msd_dilation, reflect=reflect, conv3d=conv3d) #net_trained = nn.Sequential(self.net_msd, nn.Conv2d( c_out, c_out, 1), nn.LogSoftmax(dim = 1)) net_trained = nn.Sequential(self.net_msd, nn.Conv2d(c_out, c_out, 1), nn.Softmax(dim=1)) self.net = nn.Sequential(net_fixed, net_trained) self.net.cuda() self.optimizer = optim.Adam(net_trained.parameters()) def set_normalization(self, dataloader): mean = 0 var = 0 for (data_in, _) in dataloader: mean += data_in.mean() var += data_in.pow(2).mean() mean /= len(dataloader) var /= len(dataloader) std = np.sqrt(var - mean**2) # The input data should be roughly normally distributed after # passing through net_fixed. self.net_fixed.bias.data.fill_(-mean) self.net_fixed.weight.data.fill_(1 / std) def set_input(self, data): assert self.c_in == data.shape[1] self.input = Variable(data.cuda()) def set_target(self, data): # The class labels must be of long data type #data = data.long() # The class labels must reside on the GPU data = data.cuda() self.target = Variable(data) def forward(self, x=None, target=None): if x is not None: self.set_input(x) self.output = self.net(self.input) if target is not None: self.set_target(target) else: self.set_target(t.zeros(self.output.shape)) # print(self.target.data.shape) self.loss = crossEntropy2d_sum(self.output, self.target) # self.loss = self.criterion(self.output,self.target.squeeze(1)) def predict(self, x=None): if x is not None: self.set_input(x) return self.net(self.input) def learn(self, x=None, target=None): self.forward(x, target) self.optimizer.zero_grad() self.loss.backward() self.optimizer.step() def print_training(self, epoch=0, loss=0.0, printTime=True): print('Epoch: {}'.format(epoch), 'Loss: {}'.format(loss), 'Time: {}'.format(str(datetime.datetime.now()))) def train(self, dataloader, num_epochs, target_key='merged_masks', savefigures=False, num_fig=10, save_dir='.'): loss_list = [] if savefigures: print('The figures during training are saved in: {}'.format( save_dir)) for epoch in range(num_epochs): training_loss = 0 for sample in dataloader: self.learn(sample['images'], sample[target_key]) training_loss += self.get_loss() loss_list.append(training_loss / len(dataloader)) self.print_training(epoch, training_loss / len(dataloader)) if savefigures and epoch % (num_epochs // num_fig) == 0: self.save_output( os.path.join(save_dir, 'output_{}.png'.format(epoch))) self.save_input( os.path.join(save_dir, 'input_{}.png'.format(epoch))) self.save_target( os.path.join(save_dir, 'target_{}.png'.format(epoch))) #self.save_diff(os.path.join(save_dir, 'diff_{}.png'.format( epoch))) self.save_network(save_dir, 'msdNet_{}.pytorch'.format(epoch)) return loss_list def validate(self, dataloader, target_key='merged_masks'): validation_loss = 0 for sample in dataloader: self.learn(sample['images'], sample[target_key]) validation_loss += self.get_loss() return validation_loss / len(dataloader) def print(self): print(self.net) def get_loss(self): return self.loss.data.sum() def get_output(self): return self.output def get_network_path(self, save_dir, fname): save_path = os.path.join(save_dir, fname) return save_path def save_network(self, save_dir, fname): save_path = self.get_network_path(save_dir, fname) os.makedirs(save_dir, exist_ok=True) # Clear the L and G buffers before saving: self.net_msd.clear_buffers() t.save(self.net.state_dict(), save_path) return save_path def load_network(self, save_dir='.', fname=None, save_file=None): """Load network parameters from storage. :param save_dir: directory to save files in. :param name: name of the network. :param label: a label (such as current epoch) to add to the filename. :param save_file: a file path or stream-like object that overrides the default filename structure. :returns: :rtype: """ if save_file is None: save_file = self.get_network_path(save_dir, fname) self.net.load_state_dict(t.load(save_file)) self.net.cuda() def save_output(self, filename): #tvu.save_image(self.output.data.squeeze(), filename) imarr = self.output.data.squeeze().cpu().numpy() image_save(filename, imarr, 'output') def save_input(self, filename): #tvu.save_image(self.input.data.squeeze(), filename) imarr = self.input.data.squeeze().cpu().numpy() image_save(filename, imarr, 'input') def save_target(self, filename): #tvu.save_image(self.target.data.squeeze(), filename) imarr = self.target.data.squeeze().cpu().numpy() image_save(filename, imarr, 'target') def save_diff(self, filename): #tvu.save_image(t.abs(self.target - self.output).data.squeeze(), filename) imarr = t.abs(self.target - self.output).data.squeeze().cpu().numpy() image_save(filename, imarr, 'diff') def save_heatmap(self, filename): ''' Make a heatmap of the absolute sum of the convolution kernels ''' # heatmap = t.zeros(self.depth, self.depth) # conv_ws = [w for k, w in self.net.state_dict().items() # if 'convolution.weight' in k] # for i, w in enumerate(conv_ws): # for j in range(w.shape[1]): # heatmap[j, i] = w[:, j, :, :].abs().sum() L = self.net.L.clone() C = self.net.c_final.weight.data for i, c in enumerate(C.squeeze().tolist()): L[:, i, :, :].mul_(c) tvu.save_image(L[:, 1:, :, :].transpose(0, 1), filename, nrow=10) def save_g(self, filename): tvu.save_image(self.net.G[:, 1:, :, :].transpose(0, 1), filename, nrow=10)