def sp(self, i, t_feats, s_feats, margins, pooling_kernel='max'): """ Simple Pooling for channels reduction, including max pooling and avg pooling. """ b, sc, h, w = s_feats[i].shape _, tc, _, _ = t_feats[i].shape groups = tc // sc t = [] m = [] for c in range(0, tc, sc): if c == (tc // sc) * sc and len(self.ignore_inds) > 0: continue if c == (tc // sc) * sc and self.shave: continue t.append(t_feats[i][:, self.guided_inds[i][c:c + sc].detach(), :, :]) m.append(margins[:, self.guided_inds[i][c:c + sc].detach(), :, :]) t = torch.stack(t, dim=2) m = torch.stack(m, dim=2) # pooling_kernel: max F.adaptive_max_pool3d | avg F.adaptive_avg_pool3d t = F.adaptive_max_pool3d(t, (1, h, w)).squeeze(2) m = F.adaptive_max_pool3d(m, (1, 1, 1)).squeeze(2) return t, m
def forward(self, x): if self.method == 'avg': return F.adaptive_avg_pool3d(x, self.output_size) elif self.method == 'avg': return F.adaptive_max_pool3d(x, self.output_size) else: avg_pooled = F.adaptive_avg_pool3d(x, self.output_size) max_pooled = F.adaptive_max_pool3d(x, self.output_size) return avg_pooled + max_pooled
def forward_multiframe(self, x, pool=True): (B, T, C, H, W) = x.size() x = x.contiguous() x = x.view(B * T, C, H, W) x = self.feature_extraction(x) (_, C, H, W) = x.size() x = x.view(B, T, C, H, W) x = x.permute(0, 2, 1, 3, 4) if not pool: return x if self.pool_type == 'avgpool': x = F.adaptive_avg_pool3d(x, 1) elif self.pool_type == 'maxpool': x = F.adaptive_max_pool3d(x, 1) if self.with_fc: x = x.view(x.size(0), -1) x = self.fc(x) return x.view(x.size(0), -1, 1, 1) else: return x.view(x.size(0), -1, 1, 1) return x
def forward(self,x): identity = x if self.inter_channel is not None: x = self.relu(self.bn1(self.conv_down(x))) gran_tensor_list = [] for i in range(self.granularity): gran_tensor = x[:, i*(self.in_gran_channel):(i+1)*(self.in_gran_channel),...] B,C,T,H,W = gran_tensor.shape gran_tensor = F.adaptive_max_pool3d(gran_tensor,(T,H//(2**i),W//(2**i))) if self.order == 'hwt': gran_tensor,h_vis = self.axial_gran[i*3+0](gran_tensor,True) gran_tensor,w_vis = self.axial_gran[i*3+1](gran_tensor,True) gran_tensor,t_vis = self.axial_gran[i*3+2](gran_tensor,True) elif self.order == 'wht': gran_tensor = self.axial_gran[i*3+1](gran_tensor) gran_tensor = self.axial_gran[i*3+0](gran_tensor) gran_tensor = self.axial_gran[i*3+2](gran_tensor) elif self.order == 'wth': gran_tensor = self.axial_gran[i*3+1](gran_tensor) gran_tensor = self.axial_gran[i*3+2](gran_tensor) gran_tensor = self.axial_gran[i*3+0](gran_tensor) elif self.order == 'twh': gran_tensor = self.axial_gran[i*3+2](gran_tensor) gran_tensor = self.axial_gran[i*3+1](gran_tensor) gran_tensor = self.axial_gran[i*3+0](gran_tensor) else: raise NotImplementedError gran_tensor = F.interpolate(gran_tensor,size=(T,H,W)) gran_tensor_list.append(gran_tensor) x = torch.cat(gran_tensor_list,dim=1) x = self.bn2(self.conv_up(x)) out = identity+x return out
def forward(self, x): b, c, h, w = x.size() channel_att_sum = None for pool_type in self.pool_types: if pool_type == 'avg': y = x.unsqueeze(0) avg_pool = F.adaptive_avg_pool3d(y, (self.groups, 1, 1)) avg_pool = avg_pool.squeeze(0) channel_att_raw = self.mlp(avg_pool) elif pool_type == 'max': y = x.unsqueeze(0) max_pool = F.adaptive_max_pool3d(y, (self.groups, 1, 1)) max_pool = max_pool.squeeze(0) channel_att_raw = self.mlp(max_pool) elif pool_type == 'lp': lp_pool = F.lp_pool2d(x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))) channel_att_raw = self.mlp(lp_pool) elif pool_type == 'lse': # LSE pool only lse_pool = logsumexp_2d(x) channel_att_raw = self.mlp(lse_pool) if channel_att_sum is None: channel_att_sum = channel_att_raw else: channel_att_sum = channel_att_sum + channel_att_raw # channel_att_sum=channel_att_sum.unsqueeze(1) # channel_att_sum=F.upsample(channel_att_sum,c) # channel_att_sum=channel_att_sum.squeeze(1) scale = F.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as( x) return x * scale
def forward_multiframe_feat_emb(self, x, pool=True): (B, C, T, H, W) = x.size() x = x.permute(0, 2, 1, 3, 4).contiguous() x = x.view(B * T, C, H, W) input_shape = x.shape[-2:] features = self.backbone(x) out = self.classifier(features, pool=False) (_, C, H, W) = out.size() out = out.view(B, T, C, H, W) out = out.permute(0, 2, 1, 3, 4) if not pool: return out #_, C = out.size()[0:2] #out = out.view(B, C) else: #if self.pool_type == 'avgpool': # output_feature = F.adaptive_avg_pool2d(output_feature, 1) #elif self.pool_type == 'maxpool': # output_feature = F.adaptive_max_pool2d(output_feature, 1) output_pool = F.adaptive_max_pool3d(out, 1) _, C = output_pool.size()[0:2] output_pool = output_pool.view(B, C) #x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) return out, output_pool
def forward(self, x): _, c, h, w = x.size() # c2,h2,w2=int(c/4),int(h/4),int(w/4) c2, h2, w2 = self.group, int(h / 4), int(w / 4) # y=self.conv1(x) y = x.clone() y = y.unsqueeze(0) y1 = F.adaptive_avg_pool3d(y, (c2, h2, w2)) y1 = y1.squeeze(0) y1 = self.conv2_1(y1) # y=y.unsqueeze(0) y2 = F.adaptive_max_pool3d(y, (c2, h2, w2)) y2 = y2.squeeze(0) y2 = self.conv2_2(y2) # y1=y1.unsqueeze(1) # y2=y2.unsqueeze(1) # y3=torch.cat((y1,y2),dim=1) # y3=self.conv(y3) # y3=y3.squeeze(1) y3 = y1 + y2 y3 = y3.unsqueeze(0) y3 = F.upsample(y3, size=(c, h, w)) y3 = y3.squeeze(0) # y=self.conv3(y) y3 = F.sigmoid(y3) # print(y3.size()) return y3 * x
def apply(features: Tensor, proposal_bboxes: Tensor, proposal_batch_indices: Tensor, mode: Mode) -> Tensor: _, _, feature_map_t, feature_map_height, feature_map_width = features.shape scale = 1 / 16 output_size = (feature_map_t, 7, 7) if mode == Pooler.Mode.POOLING: pool = [] for (proposal_bbox, proposal_batch_index) in zip(proposal_bboxes, proposal_batch_indices): start_x = max(min(round(proposal_bbox[0].item() * scale), feature_map_width - 1), 0) # [0, feature_map_width) start_y = max(min(round(proposal_bbox[1].item() * scale), feature_map_height - 1), 0) # (0, feature_map_height] end_x = max(min(round(proposal_bbox[2].item() * scale) + 1, feature_map_width), 1) # [0, feature_map_width) end_y = max(min(round(proposal_bbox[3].item() * scale) + 1, feature_map_height), 1) # (0, feature_map_height] roi_feature_map = features[proposal_batch_index, :, :, start_y:end_y, start_x:end_x] pool.append(F.adaptive_max_pool3d(input=roi_feature_map, output_size=output_size)) pool = torch.stack(pool, dim=0) else: raise ValueError #pool = F.max_pool3d(input=pool, kernel_size=(1, 2, 2), stride=(1, 2, 2)) return pool
def forward(self, f, inputs, proposals): self.DEPTH, self.HEIGHT, self.WIDTH = inputs.shape[2:] crops = [] for p in proposals: b = int(p[0]) center = p[2:5] side_length = p[5:8] c0 = center - side_length / 2 # left bottom corner c1 = c0 + side_length # right upper corner c0 = (c0 / self.scale).floor().long() c1 = (c1 / self.scale).ceil().long() minimum = torch.LongTensor([[0, 0, 0]]).cuda() maximum = torch.LongTensor( np.array([[self.DEPTH, self.HEIGHT, self.WIDTH]]) / self.scale).cuda() c0 = torch.cat((c0.unsqueeze(0), minimum), 0) c1 = torch.cat((c1.unsqueeze(0), maximum), 0) c0, _ = torch.max(c0, 0) c1, _ = torch.min(c1, 0) # Slice 0 dim, should never happen if np.any((c1 - c0).cpu().data.numpy() < 1): print(p) print('c0:', c0, ', c1:', c1) crop = f[b, :, c0[0]:c1[0], c0[1]:c1[1], c0[2]:c1[2]] crop = F.adaptive_max_pool3d(crop, self.rcnn_crop_size) crops.append(crop) crops = torch.stack(crops) return crops
def forward(self, x): _, c, h, w = x.size() c2, h2, w2 = int(c / 4), int(h / 4), int(w / 4) y = x y = y.unsqueeze(0) y1 = F.adaptive_avg_pool3d(y, (c2, h2, w2)) y1 = y1.squeeze(0) y2 = F.adaptive_max_pool3d(y, (c2, h2, w2)) y2 = y2.squeeze(0) y3 = torch.cat((y1, y2), 1) print(y3.size()) y3 = self.conv2(y3) y3 = y3.unsqueeze(0) y3 = F.upsample(y3, size=(c, h, w)) y3 = y3.squeeze(0) # y=self.conv3(y) y3 = F.sigmoid(y3) # y2=F.adaptive_max_pool3d(y,(c2,h2,w2)) # y2=y2.squeeze(0) # y2=self.conv2(y2) # y2=y2.unsqueeze(0) # y2=F.upsample(y2,size=(c,h,w)) # y2=y2.squeeze(0) # # y=self.conv3(y) # y2=F.sigmoid(y2) return y3 * x
def forward(self, x): out = F.relu(self.conv1(x)) out = F.relu(self.conv2(out)) out = F.relu(self.conv3(out)) out = F.adaptive_max_pool3d(out, (1, 1, 1)) out = out.view(out.shape[0], -1) out = self.fc(out) return out
def forward(self, x): batch = x.shape[0] h = self.encoder(x) h = F.adaptive_max_pool3d(h, (None, 1, 1)) h = h.view((batch, -1)) residual = self.ln(h) # residual = self.skip_connect(h) return h * residual
def forward(self, x): # Split volume x_short, x_long = x x_short_id = x_short x_short_id = x_short x_long_id = x_long # Short x_short = self.conv_short(x_short) # Long x_long = self.conv_long(x_long) if not self.no_lateral: _, cs, t_short, h_short, w_short = x_short.size() _, cl, t_long, h_long, w_long = x_long.size() if self.pool == 'soft': x_short2long = F.adaptive_max_pool3d( x_short, (x_short.size()[-3], x_short.size()[-2], x_short.size()[-1])) ''' if (x_short.size()[-2]%2 != 0 or x_short.size()[-1]%2 != 0): padding = (1,0,1,0,0,0) # pad last dim by (0, 1) and 2nd to last by (0, 1) x_short2long = F.pad(x_short, padding, 'replicate') x_short2long = soft_pool3d(x_short2long,kernel_size=(1,2,2),stride=(1,2,2)) else: x_short2long = soft_pool3d(x_short,kernel_size=(1,2,2),stride=(1,2,2)) ''' else: x_short2long = F.avg_pool3d(x_short, kernel_size=(1, 2, 2), stride=(1, 2, 2)) if (x_short2long.shape[2] > 2): x_short2long = temporal_cossim_pool(x_short2long) if (list(x_short2long.size())[2:] != list(x_long[0].size())[2:]): t, h, w = list(x_long.size())[2:] x_short2long = F.interpolate(x_short2long, size=(t, h, w), mode='trilinear') x_short2long = self.conv_short2long(x_short2long) x_long = self.norm_long(x_long) x_short = self.norm_short(x_short) if not self.no_lateral: x_short2long = self.norm_long(x_short2long) x_long = torch.add(x_long, x_short2long) return (x_short, x_long)
def pool(self, inputs: Tensor, target_shape: List[int]) -> Tensor: if len(target_shape) == 2: return F.adaptive_max_pool2d(inputs, target_shape) elif len(target_shape) == 3: return F.adaptive_max_pool3d(inputs, target_shape) elif len(target_shape) == 1: return F.adaptive_max_pool1d(inputs, target_shape) else: raise RuntimeError(f"Invalid target_shape: {target_shape}")
def forward(self, x): features = self.features(x) out = F.relu(features, inplace=True) if self.ddata_pool=='avg': out = F.adaptive_avg_pool3d(out, (1, 1, 1)) elif self.ddata_pool=='max': out = F.adaptive_max_pool3d(out, (1, 1, 1)) out = torch.flatten(out, 1) return out
def forward(self, x): # pdb.set_trace() dx = self.dconv1(x) hx = self.hconv1(x) wx = self.wconv1(x) if self.pooling == 'max': dx = F.adaptive_max_pool3d(dx, (x.shape[2], 1, 1)) hx = F.adaptive_max_pool3d(hx, (1, x.shape[3], 1)) wx = F.adaptive_max_pool3d(wx, (1, 1, x.shape[4])) else: dx = F.adaptive_avg_pool3d(dx, (x.shape[2], 1, 1)) hx = F.adaptive_avg_pool3d(hx, (1, x.shape[3], 1)) wx = F.adaptive_avg_pool3d(wx, (1, 1, x.shape[4])) # dx = self.dpooling(dx) # hx = self.hpooling(hx) # wx = self.wpooling(wx) # dx = self.sig(self.dconv2(dx)) # hx = self.sig(self.hconv2(hx)) # wx = self.sig(self.wconv2(wx)) if self.middle_norm == 'sig': dx = self.sig(self.dconv2(dx)) hx = self.sig(self.hconv2(hx)) wx = self.sig(self.wconv2(wx)) else: dx = F.softmax(self.dconv2(dx), 1) hx = F.softmax(self.hconv2(hx), 1) wx = F.softmax(self.wconv2(wx), 1) attx = dx * hx * wx attx = self.sig(self.fuse(attx)) x = x * attx return x
def forward(self, X): b, n, din = X.size() d = self.boost_factor m = n/d assert(m*d==n) Xr = X.view(b,d,1,m,din).expand(b,d,m,m,din) Xrc= torch.cat((Xr,Xr.transpose(2,3)),dim=-1) #bxdxmxmx6 G = self.L.forward(Xrc) #bxdxmxmxK if self.sym_pool_max: #average each point, then max across all points Pr= Functional.adaptive_avg_pool3d(G, (m,1,self.dims[-1])).squeeze(-2) #bxdxmxK P = Functional.adaptive_max_pool2d(Pr,(1,self.dims[-1])).squeeze(-2) #bxdxK else: #max each point, then average over all points Pr= Functional.adaptive_max_pool3d(G, (m,1,self.dims[-1])).squeeze(-2) #bxdxmxK P = Functional.adaptive_avg_pool2d(Pr,(1,self.dims[-1])).squeeze(-2) #bxdxK Y = self.F.forward(P) #bxdxC Y = self.BoostPool.forward(Y).squeeze(-2) #bxC return Y
def forward_multiframe(self, x, pool=True): (B, C, T, H, W) = x.size() x = x.permute(0, 2, 1, 3, 4).contiguous() x = x.view(B * T, C, H, W) x = self.features(x) (_, C, H, W) = x.size() x = x.view(B, T, C, H, W) x = x.permute(0, 2, 1, 3, 4) if not pool: return x if self.pool_type == 'avgpool': x = F.adaptive_avg_pool3d(x, 1) elif self.pool_type == 'maxpool': x = F.adaptive_max_pool3d(x, 1) x = x.view(B, C) return x
def forward_multiframe_feat_emb(self, x, pool=True): (B, C, T, H, W) = x.size() x = x.permute(0, 2, 1, 3, 4).contiguous() x = x.view(B * T, C, H, W) x = self.features(x) x = self.fc(x) (_, C, H, W) = x.size() x = x.view(B, T, C, H, W) x = x.permute(0, 2, 1, 3, 4) if not pool: return x # for evaluation (sound source separation) if self.pool_type == 'avgpool': img = F.adaptive_avg_pool3d(x, 1) elif self.pool_type == 'maxpool': img = F.adaptive_max_pool3d(x, 1) img = img.view(B, C) return x, img
def forward(self, f, inputs, proposals): self.DEPTH, self.HEIGHT, self.WIDTH = inputs.shape[2:] crops = [] for p in proposals: b = int(p[0]) center = p[2:5] side_length = p[5:8] # left bottom corner c0 = center - side_length / 2 # right upper corner c1 = c0 + side_length # corresponding point on the downsampled feature map c0 = (c0 / self.scale).floor().long() c1 = (c1 / self.scale).ceil().long() minimum = torch.LongTensor([[0, 0, 0]]).cuda() maximum = torch.LongTensor( np.array([[self.DEPTH, self.HEIGHT, self.WIDTH]]) / self.scale).cuda() # clip the boxes, to make sure (0, 0, 0) <= (z0, y0, x0) and (z1, y1, x1) < (D, H, W) c0 = torch.cat((c0.unsqueeze(0), minimum), 0) c1 = torch.cat((c1.unsqueeze(0), maximum), 0) c0, _ = torch.max(c0, 0) c1, _ = torch.min(c1, 0) # This should never happen if np.any((c1 - c0).cpu().data.numpy() < 1): print(p) print('c0:', c0, ', c1:', c1) crop = f[b, :, c0[0]:c1[0], c0[1]:c1[1], c0[2]:c1[2]] crop = F.adaptive_max_pool3d(crop, self.rcnn_crop_size) crops.append(crop) crops = torch.stack(crops) return crops
def forward(self, x): _, c, h, w = x.size() c2, h2, w2 = int(c / 4), int(h / 4), int(w / 4) y = x y = y.unsqueeze(0) y1 = F.adaptive_avg_pool3d(y, (c2, h2, w2)) y1 = y1.squeeze(0) y1 = self.conv2(y1) y1 = y1.unsqueeze(0) y1 = F.upsample(y1, size=(c, h, w)) y1 = y1.squeeze(0) # y=self.conv3(y) y1 = F.sigmoid(y1) y2 = F.adaptive_max_pool3d(y, (c2, h2, w2)) y2 = y2.squeeze(0) y2 = self.conv2(y2) y2 = y2.unsqueeze(0) y2 = F.upsample(y2, size=(c, h, w)) y2 = y2.squeeze(0) # y=self.conv3(y) y2 = F.sigmoid(y2) return (y1 + y2) * x
def test_adaptive_max_pool3d(self): inp = torch.randn(1, 16, 16, 32, 32, device='cuda', dtype=self.dtype) out = F.adaptive_max_pool3d(inp, output_size=5, return_indices=True)
def conv_soft_argmax3d(input: torch.Tensor, kernel_size: Tuple[int, int, int] = (3, 3, 3), stride: Tuple[int, int, int] = (1, 1, 1), padding: Tuple[int, int, int] = (1, 1, 1), temperature: Union[torch.Tensor, float] = torch.tensor(1.0), normalized_coordinates: bool = False, eps: float = 1e-8, output_value: bool = True, strict_maxima_bonus: float = 0.0) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: r"""Function that computes the convolutional spatial Soft-Argmax 3D over the windows of a given input heatmap. Function has two outputs: argmax coordinates and the softmaxpooled heatmap values themselves. On each window, the function computed is: .. math:: ijk(X) = \frac{\sum{(i,j,k)} * exp(x / T) \in X} {\sum{exp(x / T) \in X}} .. math:: val(X) = \frac{\sum{x * exp(x / T) \in X}} {\sum{exp(x / T) \in X}} where T is temperature. Args: kernel_size (Tuple[int,int,int]): size of the window stride (Tuple[int,int,int]): stride of the window. padding (Tuple[int,int,int]): input zero padding temperature (torch.Tensor): factor to apply to input. Default is 1. normalized_coordinates (bool): whether to return the coordinates normalized in the range of [-1, 1]. Otherwise, it will return the coordinates in the range of the input shape. Default is False. eps (float): small value to avoid zero division. Default is 1e-8. output_value (bool): if True, val is outputed, if False, only ij strict_maxima_bonus (float): pixels, which are strict maxima will score (1 + strict_maxima_bonus) * value. This is needed for mimic behavior of strict NMS in classic local features Shape: - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` - Output: :math:`(N, C, 3, D_{out}, H_{out}, W_{out})`, :math:`(N, C, D_{out}, H_{out}, W_{out})`, where .. math:: D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor .. math:: H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor .. math:: W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - (\text{kernel\_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor Examples: >>> input = torch.randn(20, 16, 3, 50, 32) >>> nms_coords, nms_val = conv_soft_argmax2d(input, (3, 3, 3), (1, 2, 2), (0, 1, 1)) """ if not torch.is_tensor(input): raise TypeError("Input type is not a torch.Tensor. Got {}" .format(type(input))) if not len(input.shape) == 5: raise ValueError("Invalid input shape, we expect BxCxDxHxW. Got: {}" .format(input.shape)) if temperature <= 0: raise ValueError("Temperature should be positive float or tensor. Got: {}" .format(temperature)) b, c, d, h, w = input.shape kx, ky, kz = kernel_size device: torch.device = input.device dtype: torch.dtype = input.dtype input = input.view(b * c, 1, d, h, w) center_kernel: torch.Tensor = _get_center_kernel3d(kx, ky, kz, device).to(dtype) window_kernel: torch.Tensor = _get_window_grid_kernel3d(kx, ky, kz, device).to(dtype) # applies exponential normalization trick # https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/ # https://github.com/pytorch/pytorch/blob/bcb0bb7e0e03b386ad837015faba6b4b16e3bfb9/aten/src/ATen/native/SoftMax.cpp#L44 x_max = F.adaptive_max_pool3d(input, (1, 1, 1)) # max is detached to prevent undesired backprop loops in the graph x_exp = ((input - x_max.detach()) / temperature).exp() pool_coef: float = float(kx * ky * kz) # softmax denominator den = pool_coef * F.avg_pool3d(x_exp.view_as(input), kernel_size, stride=stride, padding=padding) + eps # We need to output also coordinates # Pooled window center coordinates grid_global: torch.Tensor = create_meshgrid3d( d, h, w, False, device=device).to(dtype).permute(0, 4, 1, 2, 3) grid_global_pooled = F.conv3d(grid_global, center_kernel, stride=stride, padding=padding) # Coordinates of maxima residual to window center # prepare kernel coords_max: torch.Tensor = F.conv3d(x_exp, window_kernel, stride=stride, padding=padding) coords_max = coords_max / den.expand_as(coords_max) coords_max = coords_max + grid_global_pooled.expand_as(coords_max) # [:,:, 0, ...] is depth (scale) # [:,:, 1, ...] is x # [:,:, 2, ...] is y if normalized_coordinates: coords_max = normalize_pixel_coordinates3d(coords_max.permute(0, 2, 3, 4, 1), d, h, w) coords_max = coords_max.permute(0, 4, 1, 2, 3) # Back B*C -> (b, c) coords_max = coords_max.view(b, c, 3, coords_max.size(2), coords_max.size(3), coords_max.size(4)) if not output_value: return coords_max x_softmaxpool = pool_coef * F.avg_pool3d(x_exp.view(input.size()) * input, kernel_size, stride=stride, padding=padding) / den if strict_maxima_bonus > 0: in_levels: int = input.size(2) out_levels: int = x_softmaxpool.size(2) skip_levels: int = (in_levels - out_levels) // 2 strict_maxima: torch.Tensor = F.avg_pool3d(kornia.feature.nms3d(input, kernel_size), 1, stride, 0) strict_maxima = strict_maxima[:, :, skip_levels:out_levels - skip_levels] x_softmaxpool *= 1.0 + strict_maxima_bonus * strict_maxima x_softmaxpool = x_softmaxpool.view(b, c, x_softmaxpool.size(2), x_softmaxpool.size(3), x_softmaxpool.size(4)) return coords_max, x_softmaxpool
def forward(self, x): x = F.adaptive_max_pool3d(x, output_size=(7, 6, 5)) x = F.adaptive_max_pool3d(x, output_size=1) return x
def forward(self, x): return torch.cat( (F.adaptive_avg_pool3d(x, 1), F.adaptive_max_pool3d(x, 1)), dim=1)
def forward(self, input: Tensor) -> Tensor: input = self.quant_handle(input) return F.adaptive_max_pool3d(input, self.output_size, self.return_indices)
def forward(self, x): scale_times = x.size(3) // self.s matrix_size = x.size(3) // scale_times out = self.conv1(x) n, _, t, h, w = out.size() rp = F.adaptive_max_pool3d(out, (t, matrix_size, 1)) cp = F.adaptive_max_pool3d(out, (t, 1, matrix_size)) if matrix_size == self.s: p = self.conv_p(rp).view(n, self.k, self.s, self.s, t) q = self.conv_q(cp).view(n, self.k, self.s, self.s, t) else: ones = x.new_ones((1, 1, matrix_size, matrix_size, 1), requires_grad=False) p = x.new_zeros(n, self.k, matrix_size, matrix_size, t) p_out = self.conv_p(rp).view(n, self.k, self.s, self.s, t, -1) count = x.new_zeros((1, 1, matrix_size, matrix_size, 1), requires_grad=False) for i in range(p_out.size(5)): p[:, :, i:self.s + i, i:self.s + i, :] += p_out[:, :, :, :, :, i] count[:, :, i:self.s + i, i:self.s + i, :] += 1 count = torch.where(count > 0, count, ones) p /= count q = x.new_zeros(n, self.k, matrix_size, matrix_size, t) q_out = self.conv_q(cp).view(n, self.k, self.s, self.s, t, 2) count = x.new_zeros((1, 1, matrix_size, matrix_size, 1), requires_grad=False) for i in range(q_out.size(5)): q[:, :, i:self.s + i, i:self.s + i, :] += q_out[:, :, :, :, :, i] count[:, :, i:self.s + i, i:self.s + i, :] += 1 count = torch.where(count > 0, count, ones) q /= count p = F.softmax(p, dim=3) q = F.softmax(q, dim=2) p = p.view(n, self.k, 1, matrix_size, matrix_size, t).expand(n, self.k, x.size(1) // self.k, matrix_size, matrix_size, t).contiguous() p = p.view(n, x.size(1), matrix_size, matrix_size, t).permute(0, 1, 4, 2, 3).contiguous() q = q.view(n, self.k, 1, matrix_size, matrix_size, t).expand(n, self.k, x.size(1) // self.k, matrix_size, matrix_size, t).contiguous() q = q.view(n, x.size(1), matrix_size, matrix_size, t).permute(0, 1, 4, 2, 3).contiguous() p = self.resize_mat(p, h // matrix_size) q = self.resize_mat(q, w // matrix_size) y = p.matmul(x) y = y.matmul(q) if self.tk > 0: tp = F.adaptive_avg_pool3d(out, (self.ts, 1, 1)) tm = self.conv_t(tp).view(n, self.tk, self.ts, self.ts) tm = F.softmax(tm, dim=3) tm = tm.view(n, self.tk, 1, 1, 1, self.ts, self.ts).expand(n, self.tk, x.size(1) // self.tk, h, w, self.ts, self.ts).contiguous() tm = tm.view(n, x.size(1), h * w, self.ts, self.ts) tm = self.resize_mat(tm, t // self.ts) tm = tm.view(n, x.size(1), h, w, t, t) y = y.permute(0, 1, 3, 4, 2).contiguous().view(n, x.size(1), h, w, t, 1) y = tm.matmul(y).squeeze(-1).permute(0, 1, 4, 2, 3).contiguous() y = self.conv2(y) return y
def pool(self, input): return F.adaptive_max_pool3d(input, 1)
def forward(self, x): for l in self.layers: x = l(x) x = F.adaptive_max_pool3d(x, 1) x = x.view(x.size(0), -1) return F.log_softmax(self.out(x), dim=-1)
def forward(self, x): batch, seq, z, h, w = x.size() x = x.view(-1, x.size(-3), x.size(-2), x.size(-1)) x = F.relu(self.bn_pre_1(self.conv_pre_1(x))) x = F.relu(self.bn_pre_2(self.conv_pre_2(x))) # -------------------------------- Encoder Path -------------------------------- # -- STC block 1 x_1 = F.relu(self.bn1_1(self.conv1_1(x))) x_1 = F.relu(self.bn1_2(self.conv1_2(x_1))) x_1 = x_1.view(batch, -1, x_1.size(1), x_1.size(2), x_1.size(3)).contiguous() # (batch, seq, c, h, w) x_1 = self.conv3d_1(x_1) x_1 = x_1.view(-1, x_1.size(2), x_1.size(3), x_1.size(4)).contiguous() # (batch * seq, c, h, w) # -- STC block 2 x_2 = F.relu(self.bn2_1(self.conv2_1(x_1))) x_2 = F.relu(self.bn2_2(self.conv2_2(x_2))) x_2 = x_2.view(batch, -1, x_2.size(1), x_2.size(2), x_2.size(3)).contiguous() # (batch, seq, c, h, w) x_2 = self.conv3d_2(x_2) x_2 = x_2.view( -1, x_2.size(2), x_2.size(3), x_2.size(4)).contiguous() # (batch * seq, c, h, w), seq = 1 # -- STC block 3 x_3 = F.relu(self.bn3_1(self.conv3_1(x_2))) x_3 = F.relu(self.bn3_2(self.conv3_2(x_3))) # -- STC block 4 x_4 = F.relu(self.bn4_1(self.conv4_1(x_3))) x_4 = F.relu(self.bn4_2(self.conv4_2(x_4))) # -------------------------------- Decoder Path -------------------------------- x_5 = F.relu( self.bn5_1( self.conv5_1( torch.cat((F.interpolate(x_4, scale_factor=(2, 2)), x_3), dim=1)))) x_5 = F.relu(self.bn5_2(self.conv5_2(x_5))) x_2 = x_2.view(batch, -1, x_2.size(1), x_2.size(2), x_2.size(3)) x_2 = x_2.permute(0, 2, 1, 3, 4).contiguous() x_2 = F.adaptive_max_pool3d(x_2, (1, None, None)) x_2 = x_2.permute(0, 2, 1, 3, 4).contiguous() x_2 = x_2.view(-1, x_2.size(2), x_2.size(3), x_2.size(4)).contiguous() x_6 = F.relu( self.bn6_1( self.conv6_1( torch.cat((F.interpolate(x_5, scale_factor=(2, 2)), x_2), dim=1)))) x_6 = F.relu(self.bn6_2(self.conv6_2(x_6))) x_1 = x_1.view(batch, -1, x_1.size(1), x_1.size(2), x_1.size(3)) x_1 = x_1.permute(0, 2, 1, 3, 4).contiguous() x_1 = F.adaptive_max_pool3d(x_1, (1, None, None)) x_1 = x_1.permute(0, 2, 1, 3, 4).contiguous() x_1 = x_1.view(-1, x_1.size(2), x_1.size(3), x_1.size(4)).contiguous() x_7 = F.relu( self.bn7_1( self.conv7_1( torch.cat((F.interpolate(x_6, scale_factor=(2, 2)), x_1), dim=1)))) x_7 = F.relu(self.bn7_2(self.conv7_2(x_7))) x = x.view(batch, -1, x.size(1), x.size(2), x.size(3)) x = x.permute(0, 2, 1, 3, 4).contiguous() x = F.adaptive_max_pool3d(x, (1, None, None)) x = x.permute(0, 2, 1, 3, 4).contiguous() x = x.view(-1, x.size(2), x.size(3), x.size(4)).contiguous() x_8 = F.relu( self.bn8_1( self.conv8_1( torch.cat((F.interpolate(x_7, scale_factor=(2, 2)), x), dim=1)))) res_x = F.relu(self.bn8_2(self.conv8_2(x_8))) return res_x