def smooth_attn(attn_maps, kernel_size=3): # TODO: Fix and finish this! bs, c, d, h, w = attn_maps.size() conv3d = torch.conv3d(1, 1, kernel_size, bias=False) conv3d.weight = torch.ones_like(conv3d.weight) smoothed = conv3d(attn_maps) return smoothed
def function_hook(input, weight, bias, *args, **kwargs): base = nn.Conv2d if input.dim() == 4 else nn.Conv3d class Convolution(base): def __init__(self, weight, bias, stride, padding, dilation, groups): super().__init__(in_channels=input.shape[1], out_channels=weight.shape[0], kernel_size=weight.shape[2:], stride=stride, padding=padding, dilation=dilation, groups=groups, bias=not bias is None) params = {'weight': weight} if not bias is None: params['bias'] = bias self.load_state_dict(params) if input.dim() == 4: output = torch.conv2d(input.tensor(), weight, bias, *args, **kwargs) elif input.dim() == 5: output = torch.conv3d(input.tensor(), weight, bias, *args, **kwargs) return forward_hook(Convolution(weight, bias, *args, **kwargs), (input, ), output)
def conv3d_weight(input, weight_size, grad_output, stride=1, padding=0, dilation=1, groups=1, bias=None): r""" Computes the gradient of conv3d with respect to the weight of the convolution. Args: input: input tensor of shape (minibatch x in_channels x iT x iH x iW) weight_size : Shape of the weight gradient tensor grad_output : output gradient tensor (minibatch x out_channels x oT x oH x oW) stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias: optional bias tensor (out_channels). Default: None Examples:: >>> input = torch.randn(2, 8, 10, 10, 20, requires_grad=True) >>> weight = torch.randn(4, 8, 2, 3, 3, requires_grad=True) >>> output = F.conv3d(input, weight) >>> grad_output = torch.randn(output.shape) >>> grad_weight = torch.autograd.grad(output, weight, grad_output) >>> F.grad.conv3d_weight(input, weight.shape, grad_output) """ stride = _triple(stride) padding = _triple(padding) dilation = _triple(dilation) in_channels = input.shape[1] out_channels = grad_output.shape[1] min_batch = input.shape[0] grad_output = grad_output.repeat(1, in_channels // groups, 1, 1, 1) grad_output = grad_output.contiguous().view( grad_output.shape[0] * grad_output.shape[1], 1, grad_output.shape[2], grad_output.shape[3], grad_output.shape[4]) input = input.contiguous().view(1, input.shape[0] * input.shape[1], input.shape[2], input.shape[3], input.shape[4]) grad_weight = torch.conv3d(input, grad_output, bias, dilation, padding, stride, in_channels * min_batch) grad_weight = grad_weight.contiguous().view( min_batch, grad_weight.shape[1] // min_batch, grad_weight.shape[2], grad_weight.shape[3], grad_weight.shape[4]) return grad_weight.sum(dim=0).view( in_channels // groups, out_channels, grad_weight.shape[2], grad_weight.shape[3], grad_weight.shape[4]).transpose(0, 1).narrow( 2, 0, weight_size[2]).narrow(3, 0, weight_size[3]).narrow( 4, 0, weight_size[4])
def deltaE1(self): a = 1 / 2 b = 1 / 3 kernel = torch.tensor([[[b, a, b], [0, 1, 0], [b, a, b]], [[a, 1, a], [1, 0, 1], [a, 1, a]], [[b, a, b], [0, 1, 0], [b, a, b]]]).view( (1, 1, 3, 3, 3)) return torch.conv3d(self.grid, -self.J * kernel, padding=1) * self.grid
def get_outer_band_mask(self, tensor): channels = tensor.shape[1] kernel = self.get_band_width_kernel().to(tensor.device) band = torch.conv3d((tensor >= self.mask_cut[0]).float(), kernel.expand(channels, 1, -1, -1, -1), padding=self.band_width // 2, groups=channels) mask = (band > 0).float() - (tensor >= self.mask_cut[0]).float() return mask
def conv3d_weight(input, weight_size, grad_output, stride=1, padding=0, dilation=1, groups=1): r""" Computes the gradient of conv3d with respect to the weight of the convolution. Args: input: input tensor of shape (minibatch x in_channels x iT x iH x iW) weight_size : Shape of the weight gradient tensor grad_output : output gradient tensor (minibatch x out_channels x oT x oH x oW) stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 Examples:: >>> input = torch.randn(2, 8, 10, 10, 20, requires_grad=True) >>> weight = torch.randn(4, 8, 2, 3, 3, requires_grad=True) >>> output = F.conv3d(input, weight) >>> grad_output = torch.randn(output.shape) >>> grad_weight = torch.autograd.grad(output, weight, grad_output) >>> F.grad.conv3d_weight(input, weight.shape, grad_output) """ stride = _triple(stride) padding = _triple(padding) dilation = _triple(dilation) in_channels = input.shape[1] out_channels = grad_output.shape[1] min_batch = input.shape[0] input = input.detach() weight = torch.empty(weight_size, dtype=input.dtype, device=input.device, requires_grad=True) with torch.enable_grad(): result = torch.conv3d(input, weight, None, stride, padding, dilation, groups) result.backward(grad_output) return weight.grad
def _pairwise_distances(self, x, y, single_kernel=False): device = x.device kernels = self.distance_kernels.to(device) if single_kernel: kernels = kernels[:-1].sum(dim=0, keepdim=True) # Compute distances to y points distances_to_y = torch.conv3d(y.float().expand(1, 1, -1, -1, -1), kernels, padding=self.radius)[0] # Remove zero points from x relevant_distances = distances_to_y.permute( 1, 2, 3, 0)[x.nonzero(as_tuple=True)] # Compute distances from convolution values all_distances = torch.zeros_like(relevant_distances) indices = relevant_distances.nonzero(as_tuple=True) all_distances[all_distances == 0] = self.d_max.to(device) all_distances[indices] = self.distances[indices[1]].to(device) return all_distances
def update(self, x): # Prepare the inputs y = self.similarity(x, self.weight) t = self.teacher_signal if t is not None: t = t.unsqueeze(2).unsqueeze(3) * torch.ones_like(y, device=y.device) y = y.permute(0, 2, 3, 1).contiguous().view(-1, self.weight.size(0)) if t is not None: t = t.permute(0, 2, 3, 1).contiguous().view(-1, self.weight.size(0)) x_unf = unfold_map2d(x, self.weight.size(2), self.weight.size(3)) x_unf = x_unf.permute(0, 2, 3, 1, 4).contiguous().view(y.size(0), 1, -1) # Random abstention if self.random_abstention: abst_prob = self.victories_count / (self.victories_count.max() + y.size(0) / y.size(1)).clamp(1) scores = y * (torch.rand_like(abst_prob, device=y.device) >= abst_prob).float().unsqueeze(0) else: scores = y # Competition. The returned winner_mask is a bitmap telling where a neuron won and where one lost. if self.competitive: if t is not None: scores *= t winner_mask = (scores == scores.max(1, keepdim=True)[0]).float() if self.random_abstention: # Update statistics if using random abstension winner_mask_sum = winner_mask.sum( 0) # Number of inputs over which a neuron won self.victories_count += winner_mask_sum self.victories_count -= self.victories_count.min().item() else: winner_mask = torch.ones_like(y, device=y.device) # Lateral feedback if self.lfb_on: lfb_kernel = self.lfb_kernel if self.lfb_value == self.LFB_DoG or self.lfb_value == self.LFB_DoE: lfb_kernel = 2 * lfb_kernel - lfb_kernel.pow( 0.5 ) # Difference of Gaussians/Exponentials (mexican hat shaped function) lfb_in = F.pad(winner_mask.view(-1, *self.out_size), self.pad) if self.out_size.size(0) == 1: lfb_out = torch.conv1d(lfb_in.unsqueeze(1), lfb_kernel.unsqueeze(0).unsqueeze(1)) elif self.out_size.size(0) == 2: lfb_out = torch.conv2d(lfb_in.unsqueeze(1), lfb_kernel.unsqueeze(0).unsqueeze(1)) else: lfb_out = torch.conv3d(lfb_in.unsqueeze(1), lfb_kernel.unsqueeze(0).unsqueeze(1)) lfb_out = lfb_out.clamp(-1, 1).view_as(y) else: lfb_out = winner_mask if self.competitive: lfb_out[lfb_out == 0] = self.lfb_value elif t is not None: lfb_out = t # Compute step modulation coefficient r = lfb_out # RULE_BASE if self.weight_upd_rule == self.RULE_HEBB: r *= y # Compute delta r_abs = r.abs() r_sign = r.sign() delta_w = r_abs.unsqueeze(2) * ( r_sign.unsqueeze(2) * x_unf - self.weight.view(1, self.weight.size(0), -1)) # Since we use batches of inputs, we need to aggregate the different update steps of each kernel in a unique # update. We do this by taking the weighted average of teh steps, the weights being the r coefficients that # determine the length of each step r_sum = r_abs.sum(0) r_sum += (r_sum == 0).float() # Prevent divisions by zero delta_w_avg = (delta_w * r_abs.unsqueeze(2)).sum(0) / r_sum.unsqueeze(1) # Apply delta self.weight += self.eta * delta_w_avg.view_as(self.weight) # LFB kernel shrinking and LR schedule if self.lfb_on: self.lfb_kernel = self.lfb_kernel.pow(self.alpha) if self.lr_schedule is not None: self.eta = self.lr_schedule(self.eta)
def deltaE0(self): kernel = torch.tensor([[[0., 0, 0], [0, 1, 0], [0, 0, 0]], [[0., 1, 0], [1, 0, 1], [0, 1, 0]], [[0., 0, 0], [0, 1, 0], [0, 0, 0]]]).view( (1, 1, 3, 3, 3)) return torch.conv3d(self.grid, -self.J * kernel, padding=1) * self.grid
def conv3d(input, *args, **kwargs): return torch.conv3d(input.q, *args, **kwargs)
def forward(self, x): x = x.view(-1, self.in_channels, n_angle, *x.shape[-2:]) con = torch.conv3d(x, self.weight, self.bias, groups=self.groups) return con.view(-1, self.out_channels * n_angle, *x.shape[-2:])
kH = random.randint(2, 6) kW = random.randint(2, 6) input = torch.rand(n, iC, T, H, W) kernel = torch.rand(oC, iC, kT, kH, kW) bias = torch.rand(oC) oH = H - kH + 1 oW = W - kW + 1 oT = T - kT + 1 out = torch.zeros((n, oC, oT, oH, oW)) for t in range(oT): for row in range(oH): for col in range(oW): # input[:, :, t:t+kT, row:row+kH, col:col+kW] ==> (n, iC, kT, kH, kW) # kernel ==> (oC, iC, kT, kH, kW) this_input = input[:, :, t:t+kT, row:row+kH, col:col+kW].unsqueeze(1) # (n, 1, iC, kT, kH, kW) this_kernel = kernel.unsqueeze(0) # (1, oC, iC, kT, kH, kW) out[:, :, t, row, col] = torch.sum(this_input * this_kernel, (-1, -2, -3, -4)) out1 = torch.conv3d(input, kernel, bias) for b in range(oC): out[:, b, :, :, :] = out[:, b, :, :, :].add(bias[b])