def __init__(self, opt): super(GrowingGenerator, self).__init__() self.opt = opt N = int(opt.nfc) self._pad = nn.ConstantPad3d((1, 1, 1, 1, 1, 1), 0.) self._pad_block = nn.ConstantPad3d((opt.num_layer - 1,opt.num_layer - 1,opt.num_layer - 1,opt.num_layer - 1,opt.num_layer - 1,opt.num_layer - 1),0.) \ if opt.train_mode == "generation" \ else nn.ConstantPad3d((opt.num_layer,opt.num_layer,opt.num_layer,opt.num_layer,opt.num_layer,opt.num_layer),0.) self.head = Conv3dBlock(opt.nc_im, N, (3, opt.ker_size, opt.ker_size), opt.padd_size, opt, generator=True) self.body = torch.nn.ModuleList([]) _first_stage = nn.Sequential() for i in range(opt.num_layer): block = Conv3dBlock(N, N, (3, opt.ker_size, opt.ker_size), opt.padd_size, opt, generator=True) _first_stage.add_module('block%d' % (i), block) self.body.append(_first_stage) self.tail = nn.Sequential( nn.Conv3d(N, opt.nc_im, kernel_size=(3, opt.ker_size, opt.ker_size), padding=opt.padd_size), nn.Tanh())
def __init__(self, nf=16): super(ITN3D, self).__init__() self.conv0 = nn.Conv3d(1, nf, kernel_size=3, padding=1) #64-64 self.bn0 = nn.BatchNorm3d(nf) self.conv1 = nn.Conv3d(nf, nf * 2, kernel_size=3, padding=1, stride=2) #64-32 self.bn1 = nn.BatchNorm3d(nf * 2) self.conv2 = nn.Conv3d(nf * 2, nf * 4, kernel_size=3, padding=1, stride=2) #32-16 self.bn2 = nn.BatchNorm3d(nf * 4) self.conv3 = nn.Conv3d(nf * 4, nf * 8, kernel_size=3, padding=1, stride=2) # 16-8 self.bn3 = nn.BatchNorm3d(nf * 8) self.bottleneck0 = nn.Conv3d(nf * 8, nf * 8, kernel_size=3, padding=1) #8-8 self.bnb0 = nn.BatchNorm3d(nf * 8) self.bottleneck1 = nn.Conv3d(nf * 8, nf * 8, kernel_size=3, padding=1) #8-8 self.bnb1 = nn.BatchNorm3d(nf * 8) self.up31 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=False) # 8-16 self.pad3 = nn.ConstantPad3d(1, 0) self.up32 = nn.Conv3d(nf * 8, nf * 4, kernel_size=3, padding=0) self.drop3 = nn.Dropout(0.5) self.up21 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=False) #16-32 self.pad2 = nn.ConstantPad3d(1, 0) self.up22 = nn.Conv3d(nf * 4 + nf * 4, nf * 2, kernel_size=3, padding=0) self.drop2 = nn.Dropout(0.5) self.up11 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=False) #32-64 self.pad1 = nn.ConstantPad3d(1, 0) self.up12 = nn.Conv3d(nf * 2 + nf * 2, nf, kernel_size=3, padding=0) self.drop1 = nn.Dropout(0.5) self.pad0 = nn.ConstantPad3d(1, 0) self.output = nn.Conv3d(nf + nf, 1, kernel_size=3, padding=0)
def __init__(self, input_nc, output_nc, n_residual_blocks=9): super(Generator3D, self).__init__() # Initial convolution block model = [ nn.ConstantPad3d(3, value=0), # instead of ReflectionPad3d nn.Conv3d(input_nc, 64, 7), nn.InstanceNorm3d(64), nn.ReLU(inplace=True) ] # Downsampling in_features = 64 out_features = in_features * 2 for _ in range(2): model += [ nn.Conv3d(in_features, out_features, 3, stride=2, padding=1), nn.InstanceNorm3d(out_features), nn.ReLU(inplace=True) ] in_features = out_features out_features = in_features * 2 # Residual blocks for _ in range(n_residual_blocks): model += [ResidualBlock3D(in_features)] # Upsampling out_features = in_features // 2 for _ in range(2): model += [ nn.ConvTranspose3d(in_features, out_features, 3, stride=2, padding=1, output_padding=1), nn.InstanceNorm3d(out_features), nn.ReLU(inplace=True) ] in_features = out_features out_features = in_features // 2 # Output layer model += [ nn.ConstantPad3d(3, value=0), # instead of ReflectionPad3d nn.Conv3d(64, output_nc, 7), nn.Tanh() ] self.model = nn.Sequential(*model)
def __init__(self, in_features): super(ResidualBlock3D, self).__init__() conv_block = [ nn.ConstantPad3d(1, value=0), # instead of ReflectionPad3d nn.Conv3d(in_features, in_features, 3), nn.InstanceNorm3d(in_features), nn.ReLU(inplace=True), nn.ConstantPad3d(1, value=0), # instead of ReflectionPad3d nn.Conv3d(in_features, in_features, 3), nn.InstanceNorm3d(in_features) ] self.conv_block = nn.Sequential(*conv_block)
def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm3d(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3x3(planes, planes) self.bn2 = nn.BatchNorm3d(planes) self.downsample = downsample self.stride = stride self.zero_pad_I = nn.ConstantPad3d( (2, 2, 2, 2, 2, 2), 0 ) # (padding_left, padding_right, padding_top , padding_bottom, padding_front, padding_back) self.zero_pad_Id = nn.ConstantPad3d((3, 3, 3, 3, 3, 3), 0)
def __init__(self, in_channels, out_channels, kernel_size=(2, 3, 3), dilation=(1, 1, 1), bias=False): super().__init__() assert len(kernel_size) == 3, 'kernel_size must be a 3-tuple.' time_pad = (kernel_size[0] - 1) * dilation[0] height_pad = ((kernel_size[1] - 1) * dilation[1]) // 2 width_pad = ((kernel_size[2] - 1) * dilation[2]) // 2 # Pad temporally on the left self.pad = nn.ConstantPad3d(padding=(width_pad, width_pad, height_pad, height_pad, time_pad, 0), value=0) self.conv = nn.Conv3d(in_channels, out_channels, kernel_size, dilation=dilation, stride=1, padding=0, bias=bias) self.norm = nn.BatchNorm3d(out_channels) self.activation = nn.ReLU(inplace=True)
def __init__(self, input_ch, output_ch, first_conv_stride=1): super(ResidualBlock, self).__init__() act = nn.ReLU(inplace=True) norm = nn.BatchNorm2d pad = nn.ZeroPad2d block = [pad(1), nn.Conv2d(input_ch, output_ch, 3, stride=first_conv_stride, bias=False), norm(output_ch), act] block += [pad(1), nn.Conv2d(output_ch, output_ch, 3, bias=False), norm(output_ch)] if input_ch != output_ch: self.varying_size = True """ As far as I know, the original authors didn't mention about what down-sampling method they used in identity mapping. This can be max pooling or average pooling. Please give me an advice if anyone knows about this pooling layer. For now, I'll use max pooling layer. Also, when I pad along the channel dimension, I add zero entries behind (not front) original data. This, best of my knowledge, is also not mentioned whether front or behind (or may be half and half) the original data across channel dimension. But I believe this is not a big issue. """ side_block = [pad(1), nn.MaxPool2d(kernel_size=3, stride=2), nn.ConstantPad3d((0, 0, 0, 0, 0, output_ch - input_ch), value=0.)] self.side_block = nn.Sequential(*side_block) else: self.varying_size = False self.block = nn.Sequential(*block)
def __init__(self, in_planes, planes, stride=1, option='A'): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.shortcut = nn.Sequential() if stride != 1 or in_planes != planes: if option == 'A': # For CIFAR10 ResNet paper uses option A. self.shortcut = LambdaLayer(lambda x: F.pad( x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4))) elif option == 'B': self.shortcut = nn.Sequential( nn.Conv2d( in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes) ) elif option == 'C': # Following Madry's WideResNet32 self.shortcut = nn.Sequential( nn.AvgPool2d(stride, stride), nn.ConstantPad3d([0, 0, 0, 0, 0, planes - in_planes], 0), ) else: raise ValueError('Unknown option.')
def _make_layer(self, layer_gates, block, planes, blocks, stride=1, conv_downsample=False): downsample = None outplanes = planes * block.expansion if stride != 1 or self.inplanes != outplanes: if conv_downsample: downsample = nn.Conv2d(self.inplanes, outplanes, kernel_size=1, stride=stride, bias=False) else: # Identity downsample uses strided average pooling + padding instead of convolution pad_amount = int(self.inplanes / 2) downsample = nn.Sequential( nn.AvgPool2d(2), nn.ConstantPad3d((0, 0, 0, 0, pad_amount, pad_amount), 0)) layers = [] layers.append( block(layer_gates[0], self.inplanes, planes, stride, downsample, conv_downsample)) self.inplanes = outplanes for i in range(1, blocks): layers.append(block(layer_gates[i], self.inplanes, planes)) return nn.Sequential(*layers)
def __init__(self, kernel_size, stride=None, padding='SAME'): super(MaxPool3dTFPadding, self).__init__() if padding == 'SAME': padding_shape = hp.get_padding_shape(kernel_size, stride) self.padding_shape = padding_shape self.pad = nn.ConstantPad3d(padding_shape, 0) self.pool = nn.MaxPool3d(kernel_size, stride, ceil_mode=True)
def __init__(self): super(Deasfn, self).__init__() self.in_channels = 30 self.FreqEncoder = nn.ModuleList() for i in range(1, 6): self.FreqEncoder.append( nn.Sequential( nn.ConstantPad3d((0, 0, 0, 0, 5 * i - 1, 0), 0), nn.Conv3d(30, 64, kernel_size=(5 * i, 1, 1), padding=0), nn.BatchNorm3d(64), nn.ReLU(inplace=True), nn.Conv3d(64, 128, kernel_size=(25, 1, 1)), nn.BatchNorm3d(128), nn.ReLU(inplace=True))) self.SpatialEncoder = nn.Sequential( self.make_layer(ResidualBlock3D, 128, 4), nn.AvgPool3d((25, 1, 1))) self.hidden = Parameter( torch.randn(1, 1, (len(self.FreqEncoder) + 1) * 128)) self.rnn = torch.nn.GRU(input_size=(len(self.FreqEncoder) + 1) * 128, hidden_size=(len(self.FreqEncoder) + 1) * 128) self.attention_block = AttentionLayer( (len(self.FreqEncoder) + 1) * 128) self.decoder = nn.Sequential( ConvBlock((len(self.FreqEncoder) + 1) * 128, 79, 3, 2), ConvBlock(79, 38, 3, 1), ConvBlock(38, 19, 3, 1), ) self.DecoderJH = nn.Sequential( ConvBlock(19, 19, 3, 1), ConvBlock(19, 19, 3, 1), nn.Conv2d(19, 19, kernel_size=3, stride=1, padding=1, bias=False)) self.DecoderPAF = nn.Sequential( ConvBlock(19, 38, 3, 1), ConvBlock(38, 38, 3, 1), nn.Conv2d(38, 38, kernel_size=3, stride=1, padding=1, bias=False))
def inference2d(self, image): batch_modulo = image.shape[2] % self.args.val_batch_size if batch_modulo != 0: batch_pad = self.args.val_batch_size - batch_modulo image = nn.ConstantPad3d((0, 0, 0, 0, batch_pad, 0), 0)(image) mark_step(self.args.run_lazy_mode) image = torch.transpose(image.squeeze(0), 0, 1) preds_shape = (image.shape[0], self.n_class + 1, *image.shape[2:]) if self.args.hpus: preds = None for start in range(0, image.shape[0] - self.args.val_batch_size + 1, self.args.val_batch_size): end = start + self.args.val_batch_size pred = self.model(image[start:end]) preds = pred if preds == None else torch.cat( (preds, pred), dim=0) mark_step(self.args.run_lazy_mode) if batch_modulo != 0: preds = preds[batch_pad:] mark_step(self.args.run_lazy_mode) else: preds = torch.zeros(preds_shape, dtype=image.dtype, device=image.device) for start in range(0, image.shape[0] - self.args.val_batch_size + 1, self.args.val_batch_size): end = start + self.args.val_batch_size pred = self.model(image[start:end]) preds[start:end] = pred.data if batch_modulo != 0: preds = preds[batch_pad:] return torch.transpose(preds, 0, 1).unsqueeze(0)
def __init__(self, embedding_size=128): super().__init__() self.pad = nn.ConstantPad3d((2, 3, 9, 10, 2, 3), 0) self.conv = nn.Sequential( nn.Conv3d(1, 16, 3, 2, 1), nn.ReLU(), nn.Conv3d(16, 16, 3, 1, 1), nn.ReLU(), nn.BatchNorm3d(16), nn.Conv3d(16, 32, 3, 2, 1), nn.ReLU(), nn.Conv3d(32, 32, 3, 1, 1), nn.ReLU(), nn.BatchNorm3d(32), nn.Conv3d(32, 64, 3, 2, 1), nn.ReLU(), nn.Conv3d(64, 64, 3, 1, 1), nn.BatchNorm3d(64), nn.ReLU(), nn.Conv3d(64, 128, 3, 2, 1), nn.ReLU(), nn.Conv3d(128, 128, 3, 1, 1), nn.ReLU(), nn.BatchNorm3d(128), nn.Conv3d(128, 256, 3, 2, 1), nn.ReLU(), nn.Conv3d(256, 256, 3, 1, 1), nn.ReLU(), nn.BatchNorm3d(256), ) self.dense = nn.Linear(256, embedding_size) self.dense_var = nn.Linear(256, embedding_size)
def __init__(self, in_planes, planes, stride=1, option='B'): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.shortcut = nn.Sequential() self.option=option # def tmp_func(x): # return F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes // 4, planes // 4), "constant", 0) if stride != 1 or in_planes != planes: if option == 'A': """ For CIFAR10 ResNet paper uses option A. """ self.shortcut = nn.ConstantPad3d((0, 0, 0, 0, planes // 4, planes // 4),0) if option == 'B': self.shortcut = nn.Sequential( nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes) )
def __init__(self, input_ch, output_ch, first_conv_stride=1): super(ResidualBlock, self).__init__() block = [ nn.ZeroPad2d(1), nn.Conv2d(input_ch, output_ch, 3, stride=first_conv_stride, bias=False), nn.BatchNorm2d(output_ch), nn.ReLU(inplace=True) ] block += [ nn.ZeroPad2d(1), nn.Conv2d(output_ch, output_ch, 3, bias=False), nn.BatchNorm2d(output_ch) ] self.block = nn.Sequential(*block) if first_conv_stride > 1: self.varying_size = True side_block = [ nn.ZeroPad2d(1), nn.MaxPool2d(kernel_size=3, stride=2), nn.ConstantPad3d((0, 0, 0, 0, 0, output_ch - input_ch), value=0.) ] # nn.ConstantPad3d((위,아래,좌,우,앞,뒤), values=0.). 3d 어레이 어디에 0값을 추가할지 선언 self.side_block = nn.Sequential(*side_block) else: self.varying_size = False
def __init__(self, in_channels, kernel_size): assert len( kernel_size ) == 3, "Kernel size should have shape (kt, ks1, ks2), got {}".format( kernel_size) # if in_channels % 2 != 0: # warn("channels should be (optimally) divisible by 2, got {}".format(in_channels)) n_inplanes = in_channels // 2 out_channels = in_channels // 2 self.out_channels = out_channels (kt, ks1, ks2) = kernel_size same_padding = ( # we need same padding for odd- and even-sized kernels # so not using padding argument in Conv3d floor((ks2 - 1) / 2), ceil((ks2 - 1) / 2), # left right floor((ks1 - 1) / 2), ceil((ks1 - 1) / 2), # top bottom floor((kt - 1) / 2), ceil((kt - 1) / 2) # front back ) super(LocalContext, self).__init__( # feature compression nn.Conv3d(in_channels, n_inplanes, (1, 1, 1), bias=False), nn.BatchNorm3d(n_inplanes), nn.ReLU(inplace=True), # spatial & temporal convolution nn.ConstantPad3d(same_padding, 0), nn.Conv3d(n_inplanes, out_channels, kernel_size, bias=False), nn.BatchNorm3d(out_channels), nn.ReLU(inplace=True))
def __init__(self, opts: mo.Unit3DOptions): super(Unit3D, self).__init__() self.pad = None self.batch3d = None self.activation = None if opts.padding not in ['SAME', 'VALID', 'TEMPORAL_VALID']: raise ValueError(f'padding should be in [VALID, SAME, TEMPORAL_VALID] but got {opts.padding}.') padding_shape = hp.get_padding_shape(opts.kernel_size, opts.stride) if opts.padding == 'SAME': simplify_pad, pad_size = hp.simplify_padding(padding_shape) if simplify_pad: self.conv3d = nn.Conv3d(opts.in_channels, opts.out_channels, opts.kernel_size, stride=opts.stride, padding=pad_size, bias=opts.use_bias) else: self.pad = nn.ConstantPad3d(padding_shape, 0) self.conv3d = nn.Conv3d(opts.in_channels, opts.out_channels, opts.kernel_size, stride=opts.stride, bias=opts.use_bias) elif opts.padding == 'VALID': pad_size = 0 self.conv3d = nn.Conv3d(opts.in_channels, opts.out_channels, opts.kernel_size, padding=pad_size, stride=opts.stride, bias=opts.use_bias) else: raise ValueError(f'Padding should be in [VALID|SAME] but got {opts.padding}') if opts.use_bn: self.batch3d = nn.BatchNorm3d(opts.out_channels) if opts.activation == 'relu': self.activation = nn.ReLU()
def equalize_dimensions(self, x, upsampled_data): padding_dim1 = abs(x.shape[-1] - upsampled_data.shape[-1]) padding_dim2 = abs(x.shape[-2] - upsampled_data.shape[-2]) padding_dim3 = abs(x.shape[-3] - upsampled_data.shape[-3]) if padding_dim1 % 2 == 0: padding_left = padding_dim1 // 2 padding_right = padding_dim1 // 2 else: padding_left = padding_dim1 // 2 padding_right = padding_dim1 - padding_left if padding_dim2 % 2 == 0: padding_top = padding_dim2 // 2 padding_bottom = padding_dim2 // 2 else: padding_top = padding_dim2 // 2 padding_bottom = padding_dim2 - padding_top if padding_dim3 % 2 == 0: padding_front = padding_dim3 // 2 padding_back = padding_dim3 // 2 else: padding_front = padding_dim3 // 2 padding_back = padding_dim3 - padding_front pad_fn = nn.ConstantPad3d( (padding_left, padding_right, padding_top, padding_bottom, padding_front, padding_back), 0) return pad_fn(x)
def __init__(self, input_dim, output_dim, kernel_size, stride, padding=0, norm='none', activation='relu', pad_type='zero'): super(Conv3dBlock, self).__init__() self.use_bias = True # initialize padding self.pad = nn.ConstantPad3d(padding, 0) # initialize normalization norm_dim = output_dim if norm == 'bn': self.norm = nn.BatchNorm3d(norm_dim) elif norm == 'in': self.norm = nn.InstanceNorm3d(norm_dim) elif norm == 'ln': self.norm = LayerNorm(norm_dim) elif norm == 'adain': self.norm = AdaptiveInstanceNorm3d(norm_dim) elif norm == 'none' or norm == 'sn': self.norm = None else: assert 0, "Unsupported normalization: {}".format(norm) # initialize activation if activation == 'relu': self.activation = nn.ReLU(inplace=True) elif activation == 'lrelu': self.activation = nn.LeakyReLU(0.2, inplace=True) elif activation == 'prelu': self.activation = nn.PReLU() elif activation == 'selu': self.activation = nn.SELU(inplace=True) elif activation == 'tanh': self.activation = nn.Tanh() elif activation == 'none': self.activation = None else: assert 0, "Unsupported activation: {}".format(activation) # initialize convolution if norm == 'sn': self.conv = SpectralNorm( nn.Conv3d(input_dim, output_dim, kernel_size, stride, bias=self.use_bias)) else: self.conv = nn.Conv3d(input_dim, output_dim, kernel_size, stride, bias=self.use_bias)
def __init__(self, in_channels, out_channels, use_conv3d=True, use_refl=True, need_last_nolin=True): super(UnPackBlock, self).__init__() self.useconv3d = use_conv3d self.need_last_nolin = need_last_nolin self.nolin = nn.ELU(inplace=True) if use_conv3d: self.conv3d_channels = 4 self.conv3d = nn.Conv3d(1, self.conv3d_channels, 3, bias=True) self.nolin3d = nn.ELU(inplace=True) self.conv2d = nn.Conv2d(in_channels, np.int(4 * out_channels / self.conv3d_channels), 3, bias=True) self.norm2d = nn.GroupNorm(16, np.int(4 * out_channels / self.conv3d_channels), 1e-10) else: self.conv2d = nn.Conv2d(in_channels, out_channels * 4, 3, bias=True) self.norm2d = nn.GroupNorm(16, out_channels * 4, 1e-10) if use_refl: self.pad2d = nn.ReflectionPad2d(1) self.pad3d = nn.ReplicationPad3d(1) else: self.pad2d = nn.ZeroPad2d(1) self.pad3d = nn.ConstantPad3d(1, 0) self.upsample3d = DepthToSpace(2)
def __init__(self): super(c3d, self).__init__() # 1st layer group self.g1 = nn.Sequential( nn.Conv3d(3, 64, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))) # 2nd layer group self.g2 = nn.Sequential( nn.Conv3d(64, 128, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))) # 3rd layer group self.g3 = nn.Sequential( nn.Conv3d(128, 256, kernel_size=3, padding=1), nn.ReLU(), nn.Conv3d(256, 256, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))) # 4th layer group self.g4 = nn.Sequential( nn.Conv3d(256, 512, kernel_size=3, padding=1), nn.ReLU(), nn.Conv3d(512, 512, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))) # 5th layer group self.g5 = nn.Sequential( nn.Conv3d(512, 512, kernel_size=3, padding=1), nn.ReLU(), nn.Conv3d(512, 512, kernel_size=3, padding=1), nn.ReLU(), nn.ConstantPad3d((0, 1, 0, 1, 0, 0), 0.), nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))) # FC layers group self.fcs = nn.Sequential(nn.Linear(307200, 8192), nn.ReLU(), nn.Linear(8192, 4096), nn.ReLU(), nn.Linear(4096, 1))
def __init__(self, retina, scale, depth, channels=False, flatten=True): """ Layer defining spatial glimpses :param retina: size of the retina, it is assumed as a square :param scale: factor by which the retina grows with depth :param depth: number of stacked growing patches :param channels: input image is using more than one channel :param flatten: return as flatted array instead of image patch """ super(SpatialGlimpse, self).__init__() self.retina = retina - retina % 2 self.scale = scale self.depth = depth self.flatten = flatten self.hard_tanh = nn.Hardtanh() self.pad_width = int(self.retina / 2) * scale**(depth - 1) self.padding = nn.ZeroPad2d(self.pad_width) if channels: self.padding = nn.ConstantPad3d(self.pad_width, 0.) self.scaler = nn.AdaptiveAvgPool2d((retina, retina)) print('pad_width:', self.pad_width) print('scale:', self.scale) print('depth:', self.depth) print('retina:', self.retina, '\n')
def same_padding_3d(images, ksizes, strides=(1, 1, 1), rates=(1, 1, 1)): assert len(images.size()) == 5 batch_size, channel, depth, rows, cols = images.size() out_depth = (depth + strides[0] - 1) // strides[0] out_rows = (rows + strides[1] - 1) // strides[1] out_cols = (cols + strides[2] - 1) // strides[2] effective_k_depth = (ksizes[0] - 1) * rates[0] + 1 effective_k_row = (ksizes[1] - 1) * rates[1] + 1 effective_k_col = (ksizes[2] - 1) * rates[2] + 1 padding_depth = max(0, (out_depth - 1) * strides[0] + effective_k_depth - depth) padding_rows = max(0, (out_rows - 1) * strides[1] + effective_k_row - rows) padding_cols = max(0, (out_cols - 1) * strides[2] + effective_k_col - cols) # Pad the input padding_top = int(padding_rows / 2.) padding_left = int(padding_cols / 2.) padding_front = int(padding_depth / 2.) padding_bottom = padding_rows - padding_top padding_right = padding_cols - padding_left padding_back = padding_depth - padding_front paddings = nn.ConstantPad3d( (padding_left, padding_right, padding_top, padding_bottom, padding_front, padding_back), 0 ) #Output: (N,C,Dout,Hout,Wout)(N, C, D_{out}, H_{out}, W_{out})(N,C,Dout,Hout,Wout) where images = paddings(images) return images
def __init__(self, kernel_size, stride, return_indices=False, return_pad=False): super(PadMaxPool3d, self).__init__() self.kernel_size = kernel_size self.stride = stride self.pool = nn.MaxPool3d(kernel_size, stride, return_indices=return_indices) self.pad = nn.ConstantPad3d(padding=0, value=0) self.return_indices = return_indices self.return_pad = return_pad
def forward(self, video, audio): pad = nn.ConstantPad3d((0, 0, 0, MAX_SEQ_LEN - video.shape[1], 0, 0), 0) video = self._video_linear_combination(pad(video).permute(0, 2, 1)) audio = self._audio_linear_combination(pad(audio).permute(0, 2, 1)) video = self._dropout(video.reshape(video.shape[0], -1)) audio = self._dropout(audio.reshape(audio.shape[0], -1)) return self._impl(video, audio)
def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm3d(planes) self.conv2 = nn.Conv3d(planes, planes, kernel_size=3, stride=stride, padding=(2, 2, 2), bias=False) self.bn2 = nn.BatchNorm3d(planes) self.conv3 = nn.Conv3d(planes, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm3d(planes * 4) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride self.down_pad = nn.ConstantPad3d((1, 1, 1, 1, 1, 1), 0) self.normal_pad = nn.ConstantPad3d((1, 1, 1, 1, 1, 1), 0)
def __init__(self, omegaConf: DictConfig): super(Conv3dModel, self).__init__() # When we are here, the config has already been checked by OmegaConf # so we can extract primitives to use with other libs conf = OmegaConf.to_container(omegaConf) assert isinstance(conf, dict) self.data_input_size = conf['data_input_size'] self.data_output_size = conf['data_output_size'] self.num_layers = conf['num_layers'] self.kernel_size = self._extend_for_multilayer(conf['kernel_size']) self.check_kernel_size() self.h_chan_dim = self._extend_for_multilayer(conf['h_chan_dim']) self.check_h_chan_dim() self.strides = self._extend_for_multilayer(conf['strides']) self.check_strides() self.paddings = self._extend_for_multilayer(conf['paddings']) self.check_paddings() layers: List[nn.Module] = [] for i in range(self.num_layers - 1): layers.append(nn.ConstantPad3d(self.paddings[i], 0)) layers.append( HexaConv3d( self.data_input_size[0] if i == 0 else self.h_chan_dim[i - 1], self.h_chan_dim[i], self.kernel_size[i], stride=self.strides[i], padding=0)) layers.append(nn.ReLU(True)) layers.append(nn.ConstantPad3d(self.paddings[-1], 0)) layers.append( HexaConv3d(self.h_chan_dim[-1], self.data_output_size[0], self.kernel_size[-1], stride=self.strides[-1], padding=0)) self.m = nn.Sequential(*layers)
def get_pad_operation(self): if self.op in ['Conv2d']: lr = (self.dilation[1]) * (self.kernel_size[1] // 2) hw = (self.dilation[0]) * (self.kernel_size[0] // 2) self.pad_op = nn.ConstantPad2d((lr, lr, hw, hw), 0) if self.op in ['Conv3d']: lr = (self.dilation[2]) * (self.kernel_size[2] // 2) hw = (self.dilation[1]) * (self.kernel_size[1] // 2) fb = (self.dilation[0]) * (self.kernel_size[0] // 2 ) # (front, back) => depth dimension self.pad_op = nn.ConstantPad3d((lr, lr, hw, hw, fb, fb), 0)
def sobelLayer(input): pad = nn.ConstantPad3d((1,1,1,1,1,1),-1) kernel = create3DsobelFilter() act = nn.Tanh() paded = pad(input) fake_sobel = F.conv3d(paded, kernel, padding = 0, groups = 1)/4 n,c,h,w,l = fake_sobel.size() fake = torch.norm(fake_sobel,2,1,True)/c*3 fake_out = act(fake)*2-1 return fake_out
def __init__(self, block_num, num, in_channels, filter_num, stride=1, increase_dim=False): """ Initialize the block. :param block_num: id of wrapped big Block.(contains 8 residual blocks) :param num: id of inside residual block in big Block. :param in_channels: input channel number. :param filter_num: an Array contains 2 elements represent conv kernel number of the 2 conv layers in this block. :param stride: conv stride. :param increase_dim: whether to increase the shape. """ super(Block, self).__init__() self.block_num = block_num self.num = num self.increase_dim = increase_dim self.conv1_stride = self.conv2_stride = stride self.bn1 = nn.BatchNorm2d(in_channels) # First BIB layer locate between 'bn1' and 'conv1'. if increase_dim: self.conv1_stride *= 2 self.conv1 = nn.Conv2d(in_channels, filter_num[0], kernel_size=3, stride=self.conv1_stride, padding=1, bias=False) self.relu1 = nn.ReLU(inplace=True) self.bn2 = nn.BatchNorm2d(filter_num[0]) # Second BIB layer locate between 'bn2' and 'conv2'. self.conv2 = nn.Conv2d(filter_num[0], filter_num[1], kernel_size=3, stride=self.conv2_stride, padding=1, bias=False) self.relu2 = nn.ReLU( inplace=True ) # 利用in-place计算可以节省内(显)存,同时还可以省去反复申请和释放内存的时间。但是会对原变量覆盖,只要不带来错误就用。 if increase_dim: self.avgpool = nn.AvgPool2d(2) self.pad = nn.ConstantPad3d( (0, 0, 0, 0, in_channels // 2, in_channels // 2), 0.) else: self.avgpool = None self.pad = None self.parameters()