def __init__(self): super(Net, self).__init__() # The first few layers consumes the most memory, so use simple convolution to save memory. # Call these layers preBlock, i.e., before the residual blocks of later layers. self.preBlock = nn.Sequential( nn.Conv3d(1, 24, kernel_size=3, padding=1), nn.BatchNorm3d(24), nn.ReLU(inplace=True), nn.Conv3d(24, 24, kernel_size=3, padding=1), nn.BatchNorm3d(24), nn.ReLU(inplace=True)) # 3 poolings, each pooling downsamples the feature map by a factor 2. # 3 groups of blocks. The first block of each group has one pooling. num_blocks_forw = [2, 2, 3, 3] num_blocks_back = [3, 3] self.featureNum_forw = [24, 32, 64, 64, 64] self.featureNum_back = [128, 64, 64] for i in range(len(num_blocks_forw)): blocks = [] for j in range(num_blocks_forw[i]): if j == 0: blocks.append(PostRes(self.featureNum_forw[i], self.featureNum_forw[i + 1])) else: blocks.append(PostRes(self.featureNum_forw[i + 1], self.featureNum_forw[i + 1])) setattr(self, 'forw' + str(i + 1), nn.Sequential(*blocks)) for i in range(len(num_blocks_back)): blocks = [] for j in range(num_blocks_back[i]): if j == 0: if i == 0: addition = 3 else: addition = 0 blocks.append(PostRes(self.featureNum_back[i + 1] + self.featureNum_forw[i + 2] + addition, self.featureNum_back[i])) else: blocks.append(PostRes(self.featureNum_back[i], self.featureNum_back[i])) setattr(self, 'back' + str(i + 2), nn.Sequential(*blocks)) self.maxpool1 = nn.MaxPool3d(kernel_size=2, stride=2, return_indices=True) self.maxpool2 = nn.MaxPool3d(kernel_size=2, stride=2, return_indices=True) self.maxpool3 = nn.MaxPool3d(kernel_size=2, stride=2, return_indices=True) self.maxpool4 = nn.MaxPool3d(kernel_size=2, stride=2, return_indices=True) self.unmaxpool1 = nn.MaxUnpool3d(kernel_size=2, stride=2) self.unmaxpool2 = nn.MaxUnpool3d(kernel_size=2, stride=2) self.path1 = nn.Sequential( nn.ConvTranspose3d(64, 64, kernel_size=2, stride=2), nn.BatchNorm3d(64), nn.ReLU(inplace=True)) self.path2 = nn.Sequential( nn.ConvTranspose3d(64, 64, kernel_size=2, stride=2), nn.BatchNorm3d(64), nn.ReLU(inplace=True)) self.drop = nn.Dropout3d(p=0.5, inplace=False) self.output = nn.Sequential(nn.Conv3d(self.featureNum_back[0], 64, kernel_size=1), nn.ReLU(), # nn.Dropout3d(p = 0.3), nn.Conv3d(64, 5 * len(config['anchors']), kernel_size=1)) self.nodule_output = nn.Sequential(nn.Conv3d(self.featureNum_back[0], 64, kernel_size=1), nn.ReLU(), # nn.Dropout3d(p = 0.3), nn.Conv3d(64, len(config['anchors']), kernel_size=1)) self.regress_output = nn.Sequential(nn.Conv3d(self.featureNum_back[0], 64, kernel_size=1), nn.ReLU(), # nn.Dropout3d(p = 0.3), nn.Conv3d(64, 4 * len(config['anchors']), kernel_size=1)) focal_bias = -math.log((1.0 - 0.01) / 0.01) self._modules['nodule_output'][2].bias.data.fill_(focal_bias) self.use_dropout = True
def get3DNet(paras): if paras == None or paras['structure'] == 'default': encoder = nn.Sequential( nn.Conv3d(1, 8, 3, stride=1, padding=1), # b, 16, 10, 10 nn.ReLU(True), nn.Conv3d(8, 16, 3, stride=1, padding=1), # b, 16, 10, 10 nn.ReLU(True), nn.MaxPool3d(2, stride=2), # b, 16, 5, 5 nn.Conv3d(16, 32, 3, stride=1, padding=1), # b, 8, 3, 3 nn.ReLU(True), # nn.MaxPool2d(2, stride=2), # b, 8, 2, 2 nn.Conv3d(32, 32, 3, stride=1, padding=1), # b, 8, 3, 3 nn.ReLU(True) ) decoder = nn.Sequential( nn.ConvTranspose3d(32, 16, 2, stride=2, padding=0), # b, 16, 5, 5 nn.ReLU(True), # nn.ConvTranspose2d(8, 1, 2, stride=2, padding=0), # b, 16, 5, 5 nn.Conv3d(16, 8, 3, stride=1, padding=1), # b, 8, 3, 3 nn.ReLU(True), nn.Conv3d(8, 1, 3, stride=1, padding=1), # b, 8, 3, 3 nn.Tanh(), ) elif paras['structure'] == 'default_BN': encoder = nn.Sequential( nn.Conv3d(1, 8, 3, stride=1, padding=1), # b, 16, 10, 10 nn.BatchNorm3d(8), nn.ReLU(True), nn.Conv3d(8, 16, 3, stride=1, padding=1), # b, 16, 10, 10 nn.BatchNorm3d(16), nn.ReLU(True), nn.MaxPool3d(2, stride=2), # b, 16, 5, 5 nn.Conv3d(16, 32, 3, stride=1, padding=1), # b, 8, 3, 3 nn.BatchNorm3d(32), nn.ReLU(True), # nn.MaxPool2d(2, stride=2), # b, 8, 2, 2 nn.Conv3d(32, 32, 3, stride=1, padding=1), # b, 8, 3, 3 nn.BatchNorm3d(32), nn.ReLU(True) ) decoder = nn.Sequential( nn.ConvTranspose3d(32, 16, 2, stride=2, padding=0), # b, 16, 5, 5 nn.BatchNorm3d(16), nn.ReLU(True), # nn.ConvTranspose2d(8, 1, 2, stride=2, padding=0), # b, 16, 5, 5 nn.Conv3d(16, 8, 3, stride=1, padding=1), # b, 8, 3, 3 nn.BatchNorm3d(8), nn.ReLU(True), nn.Conv3d(8, 1, 3, stride=1, padding=1), # b, 8, 3, 3 nn.Tanh(), ) elif paras['structure'] == 'debug': encoder = nn.Sequential( nn.Conv3d(1, 8, 3, stride=1, padding=1), # b, 16, 10, 10 nn.ReLU(True), nn.MaxPool3d(2, stride=2), # b, 16, 5, 5 nn.Conv3d(8, 16, 3, stride=1, padding=1), # b, 8, 3, 3 nn.ReLU(True) ) decoder = nn.Sequential( nn.ConvTranspose3d(16, 8, 2, stride=2, padding=0), # b, 16, 5, 5 nn.ReLU(True), nn.Conv3d(8, 1, 3, stride=1, padding=1), # b, 8, 3, 3 nn.Tanh(), ) elif paras['structure'] == 'decreasing': # Pooling + double channel encoder_layers = [] decoder_layers = [] downlayer_num = paras.get('decreasing_layer_num', 3) batchNorm = paras.get('batchNorm', True) root_feature_num = paras.get('root_feature_num', 32) # Down layers encoder_layers.append(nn.Conv3d(1, root_feature_num, 3, stride = 1, padding = 1)) for layerIdx in range(downlayer_num): in_feature_num = (2**layerIdx)*root_feature_num out_feature_num = in_feature_num*2 encoder_layers.append(nn.Conv3d(in_feature_num, out_feature_num, 3, stride = 1, padding = 1)) if batchNorm: encoder_layers.append(nn.BatchNorm3d(out_feature_num)) encoder_layers.append(nn.ReLU(True)) encoder_layers.append(nn.MaxPool3d(2, stride=2)) # Middle Layer # Up layers for layerIdx in range(downlayer_num): # print(layerIdx) in_feature_num = (2**(downlayer_num-layerIdx))*root_feature_num out_feature_num = int(in_feature_num/2) # print(in_feature_num) decoder_layers.append(nn.ConvTranspose3d(in_feature_num, out_feature_num, 2, stride=2, padding=0)) # b, 16, 5, 5 if batchNorm: decoder_layers.append(nn.BatchNorm3d(out_feature_num)) decoder_layers.append(nn.ReLU(True)) decoder_layers.append(nn.Conv3d(out_feature_num, 1, 3, stride = 1, padding = 1)) encoder = nn.Sequential(*encoder_layers) decoder = nn.Sequential(*decoder_layers) return encoder, decoder
def __init__(self): super(CustomModel, self).__init__() self.upsample = nn.Upsample(scale_factor=2, mode='bilinear') self.relu = nn.ReLU() self.tanh = nn.Tanh() # VOLUME SIZE # PARAMETERS # Encoding (input -> 512 vector) # 3 x 144 x 144 x 144 -> 8.9M (IN * F^3 + 1)*OUT self.down_conv_1 = nn.Conv3d( in_channels=3, out_channels=3, kernel_size=(9, 9, 9), stride=1, padding=0 ) # K x 136 x 136 x 136 -> 3*(3*9^3+1) = 6.5K self.down_conv_2 = nn.Conv3d( in_channels=3, out_channels=44, kernel_size=(10, 10, 10), stride=2, padding=0 ) # K x 64 x 64 x 64 -> 44*(3*10^3+1) = 132K self.down_conv_3 = nn.Conv3d( in_channels=44, out_channels=32, kernel_size=(6, 6, 6), stride=2, padding=2 ) # K x 32 x 32 x 32 -> 32*(44*6^3+1) = 305K self.down_conv_4 = nn.Conv3d( in_channels=32, out_channels=76, kernel_size=(6, 6, 6), stride=2, padding=2 ) # K x 16 x 16 x 16 -> 76*(32*6^3+1) = 525K self.down_conv_5 = nn.Conv3d( in_channels=76, out_channels=288, kernel_size=(3, 3, 3), stride=1, padding=1 ) # K x 16 x 16 x 16 -> 288*(76*3^3+1) = 590K self.down_conv_6 = nn.Conv3d( in_channels=288, out_channels=150, kernel_size=(4, 4, 4), stride=2, padding=1 ) # K x 8 x 8 x 8 -> 150*(288*4^3+1) = 2.7M self.down_conv_7 = nn.Conv3d( in_channels=150, out_channels=512, kernel_size=(3, 3, 3), stride=1, padding=1 ) # K x 8 x 8 x 8 -> 512*(150*3^3+1) = 2.1M self.down_pool_8 = nn.MaxPool3d(kernel_size=8) # 512 x 1 x 1 x 1 # Decoding (512 vector -> 32 x 32 x 512 volume) self.up_linear_1 = nn.Linear(in_features=512, out_features=1024) self.up_linear_2 = nn.Linear(in_features=1024, out_features=4096) self.up_conv_3 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1) self.up_conv_4 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1) # Position decoder (32 x 32 x 512 volume -> 32 x 32 x 300) self.pos_conv_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0) self.pos_conv_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0) self.pos_conv_3 = nn.Conv2d(in_channels=512, out_channels=num_points * 3, kernel_size=1, stride=1, padding=0) # Seed decoder (32 x 32 x 512 volume -> 32 x 32 x 3) self.seed_conv_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0) self.seed_conv_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0) self.seed_conv_3 = nn.Conv2d(in_channels=512, out_channels=3, kernel_size=1, stride=1, padding=0)
def __init__(self, outer_nc, inner_nc, input_nc=None, dim=3, submodule=None, outermost=False, innermost=False, norm='batch_norm', use_dropout=False): """Construct a Unet submodule with skip connections. Parameters: outer_nc (int) -- the number of filters in the outer conv layer inner_nc (int) -- the number of filters in the inner conv layer input_nc (int) -- the number of channels in input images/features dim (int) -- spatial data dimension submodule (UnetSkipConnectionBlock) -- previously defined submodules outermost (bool) -- if this module is the outermost module innermost (bool) -- if this module is the innermost module norm_layer -- normalization layer use_dropout (bool) -- if use dropout layers. """ super().__init__() self.outermost = outermost if dim == 2: if 'batch' in norm.lower(): norm_layer = nn.BatchNorm2d elif 'instance' in norm.lower(): norm_layer = nn.InstanceNorm2d else: raise ValueError("Norm layer '" + norm + "' unknown.") elif dim == 3: if 'batch' in norm.lower(): norm_layer = nn.BatchNorm3d elif 'instance' in norm.lower(): norm_layer = nn.InstanceNorm3d else: raise ValueError("Norm layer '" + norm + "' unknown.") else: raise ValueError("Dimension must be 2 or 3.") use_bias = 'instance' in norm.lower() if input_nc is None: input_nc = outer_nc downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=3, stride=1, padding=1, bias=use_bias) if dim == 2 else \ nn.Conv3d(input_nc, inner_nc, kernel_size=3, stride=1, padding=1, bias=use_bias) downrelu = nn.LeakyReLU(0.2, inplace=True) downnorm = norm_layer(inner_nc) uprelu = nn.ReLU(inplace=True) upnorm = norm_layer(outer_nc) downsample = nn.MaxPool2d(kernel_size=2) if dim == 2 else \ nn.MaxPool3d(kernel_size=2) if outermost: upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1) if dim == 2 else \ nn.ConvTranspose3d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1) # down = [downconv, downrelu, downconv, downsample] # up = [uprelu, upconv, nn.Softmax(dim=1)] down = [ self._block(input_nc, inner_nc, 'outer_block', norm_layer, downrelu, dim), downsample ] up = [ upconv, self._block(outer_nc, outer_nc, 'outer_block', norm_layer, nn.Softmax(dim=1), dim) ] model = down + [submodule] + up elif innermost: upconv = nn.ConvTranspose2d(inner_nc, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) if dim == 2 else \ nn.ConvTranspose3d(inner_nc, outer_nc, kernel_size=4, stride=2, padding=1) # down = [downrelu, downconv, downsample] # up = [uprelu, upconv, upnorm] down = [ self._block(input_nc, inner_nc, 'inner_block1', norm_layer, downrelu, dim) ] up = [ self._block(inner_nc, outer_nc, 'inner_block2', norm_layer, downrelu, dim) ] model = down + up else: upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) if dim == 2 else \ nn.ConvTranspose3d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) # down = [downrelu, downconv, downnorm, downsample] # up = [uprelu, upconv, upnorm] down = [ self._block(input_nc, inner_nc, 'inner_block', norm_layer, downrelu, dim), downsample ] up = [ upconv, self._block(outer_nc, outer_nc, 'outer_block', norm_layer, downrelu, dim) ] if use_dropout: model = down + [submodule] + up + [nn.Dropout(0.5)] else: model = down + [submodule] + up self.model = nn.Sequential(*model)
def __init__(self, num_classes, depth, pretrained=None, pretrained_base=True, feat_ext=False, num_stages=4, spatial_strides=(1, 2, 2, 2), temporal_strides=(1, 1, 1, 1), dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), conv1_kernel_t=1, conv1_stride_t=1, pool1_kernel_t=1, pool1_stride_t=1, style='pytorch', frozen_stages=-1, inflate_freq=(0, 0, 1, 1), inflate_stride=(1, 1, 1, 1), inflate_style='3x1x1', nonlocal_stages=(-1, ), nonlocal_freq=(0, 0, 0, 0), nonlocal_cfg=None, bn_eval=False, bn_frozen=False, partial_bn=False, with_cp=False, dropout_ratio=0.5, init_std=0.01): super(ResNet_SlowFast, self).__init__() if depth not in self.arch_settings: raise KeyError('invalid depth {} for resnet'.format(depth)) self.num_classes = num_classes self.depth = depth self.pretrained = pretrained self.pretrained_base = pretrained_base self.num_stages = num_stages assert 1 <= num_stages <= 4 self.spatial_strides = spatial_strides self.temporal_strides = temporal_strides self.dilations = dilations assert len(spatial_strides) == len(temporal_strides) == len( dilations) == num_stages self.out_indices = out_indices assert max(out_indices) < num_stages self.style = style self.frozen_stages = frozen_stages self.inflate_freqs = inflate_freq if not isinstance( inflate_freq, int) else (inflate_freq, ) * num_stages self.inflate_style = inflate_style self.nonlocal_stages = nonlocal_stages self.nonlocal_freqs = nonlocal_freq if not isinstance( nonlocal_freq, int) else (nonlocal_freq, ) * num_stages self.nonlocal_cfg = nonlocal_cfg self.bn_eval = bn_eval self.bn_frozen = bn_frozen self.partial_bn = partial_bn self.with_cp = with_cp self.feat_ext = feat_ext self.dropout_ratio = dropout_ratio self.init_std = init_std self.block, stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] self.inplanes = 64 self.conv1 = nn.Conv3d(3, 64, kernel_size=(conv1_kernel_t, 7, 7), stride=(conv1_stride_t, 2, 2), padding=((conv1_kernel_t - 1) // 2, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=(pool1_kernel_t, 3, 3), stride=(pool1_stride_t, 2, 2), padding=(pool1_kernel_t // 2, 1, 1)) self.res_layers = [] for i, num_blocks in enumerate(self.stage_blocks): spatial_stride = spatial_strides[i] temporal_stride = temporal_strides[i] dilation = dilations[i] planes = 64 * 2**i res_layer = make_res_layer(self.block, self.inplanes, planes, num_blocks, spatial_stride=spatial_stride, temporal_stride=temporal_stride, dilation=dilation, style=self.style, inflate_freq=self.inflate_freqs[i], inflate_style=self.inflate_style, nonlocal_freq=self.nonlocal_freqs[i], nonlocal_cfg=self.nonlocal_cfg if i in self.nonlocal_stages else None, with_cp=with_cp) self.inplanes = planes * self.block.expansion layer_name = 'layer{}'.format(i + 1) self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) self.feat_dim = self.block.expansion * 64 * 2**( len(self.stage_blocks) - 1) if self.dropout_ratio != 0: self.dropout = nn.Dropout(p=self.dropout_ratio) else: self.dropout = None self.avg_pool = nn.AdaptiveAvgPool3d(1) self.fc = nn.Linear(in_features=2048, out_features=num_classes) if not self.pretrained: nn.init.normal_(self.fc.weight, 0, self.init_std) nn.init.constant_(self.fc.bias, 0)
def __init__( self, z_dim, maxpool, # maxpool2, in_channels, # in_channels2, out_channels, # out_channels2, kernel_sizes, kernel_sizes_deconv, strides, strides_deconv, dilatations, dilatations_deconv, padding, # paddings2, padding_deconv, # paddings_deconv2, batchnorm, activation=torch.nn.ReLU, flow_type="nf", n_flows=2, n_res=3, n_embed=2000, dropout_val=0.5, gated=True, has_dense=True, resblocks=False, ): super(Autoencoder3DCNN, self).__init__() if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' self.n_embed = n_embed self.out_channels = out_channels # self.out_channels2 = out_channels2 self.device = device self.conv_layers = [] self.deconv_layers = [] # self.deconv_layers2 = [] # self.conv_layers2 = [] # self.bns2 = [] # self.bns_deconv2 = [] # self.resconv2 = [] # self.resdeconv2 = [] # self.indices2 = [torch.Tensor() for _ in range(len(in_channels2))] self.bns = [] self.resconv = [] self.resdeconv = [] self.bns_deconv = [] self.activations = [] self.activation = activation() self.activation_deconv = activation() self.activations_deconv = [] self.indices = [torch.Tensor() for _ in range(len(in_channels))] self.GaussianSample = GaussianSample(z_dim, z_dim) self.n_res = n_res self.resblocks = resblocks self.has_dense = has_dense self.batchnorm = batchnorm self.a_dim = None for i, ( ins, # in2, outs, # out2, ksize, stride, dilats, pad, # pad2 ) in enumerate( zip( in_channels, # in_channels2 + [None], out_channels, # out_channels2 + [None], kernel_sizes, strides, dilatations, padding, # paddings2 + [None] )): if not gated: self.conv_layers += [ torch.nn.Conv3d( in_channels=ins, out_channels=outs, kernel_size=ksize, stride=stride, padding=pad, dilation=dilats, ) ] # if i < len(in_channels) - 1: # self.conv_layers2 += [ # torch.nn.Conv3d(in_channels=in2, # out_channels=out2, # kernel_size=ksize, # stride=stride, # padding=pad2, # dilation=dilats, # ) # ] else: self.conv_layers += [ GatedConv3d(input_channels=ins, output_channels=outs, kernel_size=ksize, stride=stride, padding=pad, dilation=dilats, activation=nn.Tanh()) ] # if i < len(in_channels) - 1: # self.conv_layers2 += [ # GatedConv3d(input_channels=in2, # output_channels=out2, # kernel_size=ksize, # stride=stride, # padding=pad2, # dilation=dilats, # activation=nn.Tanh() # )] if resblocks and i != 0: for _ in range(n_res): self.resconv += [ResBlock(ins, outs, activation)] # if i < len(in_channels) - 1: # self.resconv2 += [ResBlock(in2, out2, activation)] self.bns += [nn.BatchNorm3d(num_features=outs)] # if i < len(in_channels) - 1: # self.bns2 += [nn.BatchNorm3d(num_features=out2)] self.activations += [activation()] for i, (ins, outs, ksize, stride, dilats, pad) in enumerate( zip(reversed(out_channels), reversed(in_channels), kernel_sizes_deconv, strides_deconv, dilatations_deconv, padding_deconv)): if not gated: self.deconv_layers += [ torch.nn.ConvTranspose3d(in_channels=ins, out_channels=outs, kernel_size=ksize, padding=pad, stride=stride, dilation=dilats) ] # if i < len(in_channels) - 1: # self.deconv_layers2 += [torch.nn.ConvTranspose3d(in_channels=in2, out_channels=out2, # kernel_size=ksize, padding=pad2, stride=stride, # dilation=dilats)] else: self.deconv_layers += [ GatedConvTranspose3d(input_channels=ins, output_channels=outs, kernel_size=ksize, stride=stride, padding=pad, dilation=dilats, activation=nn.Tanh()) ] # if i < len(in_channels) - 1: # self.deconv_layers2 += [GatedConvTranspose3d(input_channels=in2, output_channels=out2, # kernel_size=ksize, # stride=stride, padding=pad2, dilation=dilats, # activation=nn.Tanh() # )] if resblocks and i != 0: for _ in range(n_res): self.resdeconv += [ResBlockDeconv(ins, outs, activation)] # if i < len(in_channels) - 1: # self.resdeconv2 += [ResBlockDeconv(in2, out2, activation)] self.bns_deconv += [nn.BatchNorm3d(num_features=outs)] # if i < len(in_channels) - 1: # self.bns_deconv2 += [nn.BatchNorm3d(num_features=out2)] self.activations_deconv += [activation()] self.dense1 = torch.nn.Linear(in_features=out_channels[-1], out_features=z_dim) self.dense2 = torch.nn.Linear(in_features=z_dim, out_features=out_channels[-1]) self.dense1_bn = nn.BatchNorm1d(num_features=z_dim) self.dense2_bn = nn.BatchNorm1d(num_features=out_channels[-1]) self.dropout3d = nn.Dropout3d(dropout_val) self.dropout = nn.Dropout(dropout_val) self.maxpool = nn.MaxPool3d(maxpool, return_indices=True) self.maxunpool = nn.MaxUnpool3d(maxpool) self.conv_layers = nn.ModuleList(self.conv_layers) self.deconv_layers = nn.ModuleList(self.deconv_layers) self.bns = nn.ModuleList(self.bns) self.bns_deconv = nn.ModuleList(self.bns_deconv) self.resconv = nn.ModuleList(self.resconv) self.resdeconv = nn.ModuleList(self.resdeconv) # self.maxpool2 = nn.MaxPool3d(maxpool2, return_indices=True) # self.maxunpool2 = nn.MaxUnpool3d(3) # self.conv_layers2 = nn.ModuleList(self.conv_layers2) # self.deconv_layers2 = nn.ModuleList(self.deconv_layers2) # self.resconv2 = nn.ModuleList(self.resconv2) # self.bns_deconv2 = nn.ModuleList(self.bns_deconv2) # self.bns2 = nn.ModuleList(self.bns2) # self.resdeconv2 = nn.ModuleList(self.resdeconv2) self.flow_type = flow_type self.n_flows = n_flows if self.flow_type == "nf": self.flow = NormalizingFlows(in_features=[z_dim], n_flows=n_flows) if self.flow_type == "hf": self.flow = HouseholderFlow(in_features=[z_dim], auxiliary=False, n_flows=n_flows, h_last_dim=z_dim) if self.flow_type == "iaf": self.flow = IAF(z_dim, n_flows=n_flows, num_hidden=n_flows, h_size=z_dim, forget_bias=1., conv3d=False) if self.flow_type == "ccliniaf": self.flow = ccLinIAF(in_features=[z_dim], auxiliary=False, n_flows=n_flows, h_last_dim=z_dim) if self.flow_type == "o-sylvester": self.flow = SylvesterFlows(in_features=[z_dim], flow_flavour='o-sylvester', n_flows=1, h_last_dim=None) if self.flow_type == "quantizer": self.flow = Quantize(z_dim, self.n_embed)
def _construct_network(self, cfg): """ Builds a single pathway ResNet model. Args: cfg (CfgNode): model building configs, details are in the comments of the config file. """ assert cfg.MODEL.ARCH in _POOL1.keys() pool_size = _POOL1[cfg.MODEL.ARCH] assert len({len(pool_size), self.num_pathways}) == 1 assert cfg.RESNET.DEPTH in _MODEL_STAGE_DEPTH.keys() (d2, d3, d4, d5) = _MODEL_STAGE_DEPTH[cfg.RESNET.DEPTH] num_groups = cfg.RESNET.NUM_GROUPS width_per_group = cfg.RESNET.WIDTH_PER_GROUP dim_inner = num_groups * width_per_group temp_kernel = _TEMPORAL_KERNEL_BASIS[cfg.MODEL.ARCH] self.s1 = stem_helper.VideoModelStem( dim_in=cfg.DATA.INPUT_CHANNEL_NUM, dim_out=[width_per_group], kernel=[temp_kernel[0][0] + [7, 7]], stride=[[1, 2, 2]], padding=[[temp_kernel[0][0][0] // 2, 3, 3]], norm_module=self.norm_module, ) self.s2 = resnet_helper.ResStage( dim_in=[width_per_group], dim_out=[width_per_group * 4], dim_inner=[dim_inner], temp_kernel_sizes=temp_kernel[1], stride=cfg.RESNET.SPATIAL_STRIDES[0], num_blocks=[d2], num_groups=[num_groups], num_block_temp_kernel=cfg.RESNET.NUM_BLOCK_TEMP_KERNEL[0], nonlocal_inds=cfg.NONLOCAL.LOCATION[0], nonlocal_group=cfg.NONLOCAL.GROUP[0], nonlocal_pool=cfg.NONLOCAL.POOL[0], instantiation=cfg.NONLOCAL.INSTANTIATION, trans_func_name=cfg.RESNET.TRANS_FUNC, stride_1x1=cfg.RESNET.STRIDE_1X1, inplace_relu=cfg.RESNET.INPLACE_RELU, dilation=cfg.RESNET.SPATIAL_DILATIONS[0], norm_module=self.norm_module, ) for pathway in range(self.num_pathways): pool = nn.MaxPool3d( kernel_size=pool_size[pathway], stride=pool_size[pathway], padding=[0, 0, 0], ) self.add_module("pathway{}_pool".format(pathway), pool) self.s3 = resnet_helper.ResStage( dim_in=[width_per_group * 4], dim_out=[width_per_group * 8], dim_inner=[dim_inner * 2], temp_kernel_sizes=temp_kernel[2], stride=cfg.RESNET.SPATIAL_STRIDES[1], num_blocks=[d3], num_groups=[num_groups], num_block_temp_kernel=cfg.RESNET.NUM_BLOCK_TEMP_KERNEL[1], nonlocal_inds=cfg.NONLOCAL.LOCATION[1], nonlocal_group=cfg.NONLOCAL.GROUP[1], nonlocal_pool=cfg.NONLOCAL.POOL[1], instantiation=cfg.NONLOCAL.INSTANTIATION, trans_func_name=cfg.RESNET.TRANS_FUNC, stride_1x1=cfg.RESNET.STRIDE_1X1, inplace_relu=cfg.RESNET.INPLACE_RELU, dilation=cfg.RESNET.SPATIAL_DILATIONS[1], norm_module=self.norm_module, ) self.s4 = resnet_helper.ResStage( dim_in=[width_per_group * 8], dim_out=[width_per_group * 16], dim_inner=[dim_inner * 4], temp_kernel_sizes=temp_kernel[3], stride=cfg.RESNET.SPATIAL_STRIDES[2], num_blocks=[d4], num_groups=[num_groups], num_block_temp_kernel=cfg.RESNET.NUM_BLOCK_TEMP_KERNEL[2], nonlocal_inds=cfg.NONLOCAL.LOCATION[2], nonlocal_group=cfg.NONLOCAL.GROUP[2], nonlocal_pool=cfg.NONLOCAL.POOL[2], instantiation=cfg.NONLOCAL.INSTANTIATION, trans_func_name=cfg.RESNET.TRANS_FUNC, stride_1x1=cfg.RESNET.STRIDE_1X1, inplace_relu=cfg.RESNET.INPLACE_RELU, dilation=cfg.RESNET.SPATIAL_DILATIONS[2], norm_module=self.norm_module, ) self.s5 = resnet_helper.ResStage( dim_in=[width_per_group * 16], dim_out=[width_per_group * 32], dim_inner=[dim_inner * 8], temp_kernel_sizes=temp_kernel[4], stride=cfg.RESNET.SPATIAL_STRIDES[3], num_blocks=[d5], num_groups=[num_groups], num_block_temp_kernel=cfg.RESNET.NUM_BLOCK_TEMP_KERNEL[3], nonlocal_inds=cfg.NONLOCAL.LOCATION[3], nonlocal_group=cfg.NONLOCAL.GROUP[3], nonlocal_pool=cfg.NONLOCAL.POOL[3], instantiation=cfg.NONLOCAL.INSTANTIATION, trans_func_name=cfg.RESNET.TRANS_FUNC, stride_1x1=cfg.RESNET.STRIDE_1X1, inplace_relu=cfg.RESNET.INPLACE_RELU, dilation=cfg.RESNET.SPATIAL_DILATIONS[3], norm_module=self.norm_module, ) if self.enable_detection: self.head = head_helper.ResNetRoIHead( dim_in=[width_per_group * 32], num_classes=cfg.MODEL.NUM_CLASSES, pool_size=[[cfg.DATA.NUM_FRAMES // pool_size[0][0], 1, 1]], resolution=[[cfg.DETECTION.ROI_XFORM_RESOLUTION] * 2], scale_factor=[cfg.DETECTION.SPATIAL_SCALE_FACTOR], dropout_rate=cfg.MODEL.DROPOUT_RATE, act_func=cfg.MODEL.HEAD_ACT, aligned=cfg.DETECTION.ALIGNED, ) else: self.head = head_helper.ResNetBasicHead( dim_in=[width_per_group * 32], num_classes=cfg.MODEL.NUM_CLASSES, pool_size=[None, None] if cfg.MULTIGRID.SHORT_CYCLE else [[ cfg.DATA.NUM_FRAMES // pool_size[0][0], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[0][1], cfg.DATA.TRAIN_CROP_SIZE // 32 // pool_size[0][2], ]], # None for AdaptiveAvgPool3d((1, 1, 1)) dropout_rate=cfg.MODEL.DROPOUT_RATE, act_func=cfg.MODEL.HEAD_ACT, )
save_data_and_model("unsqueeze", input, model) input = Variable(torch.randn(1, 2, 4, 5)) deconv_adjpad2d = nn.ConvTranspose2d(2, 3, (3, 2), stride=(1, 2), padding=(1, 2), output_padding=(0, 1)) save_data_and_model("deconv_adjpad_2d", input, deconv_adjpad2d) input = Variable(torch.randn(1, 2, 3, 4, 5)) conv3d = nn.Conv3d(2, 3, (2, 3, 2), stride=(1, 1, 1), padding=(0, 0, 0), groups=1, dilation=(1, 1, 1), bias=False) save_data_and_model("conv3d", input, conv3d) input = Variable(torch.randn(1, 2, 3, 4, 5)) conv3d = nn.Conv3d(2, 3, (2, 3, 3), stride=(1, 2, 3), padding=(0, 1, 2), groups=1, dilation=(1, 2, 3), bias=True) save_data_and_model("conv3d_bias", input, conv3d) input = torch.randn(1, 2, 3, 4, 6) maxpool3d = nn.MaxPool3d((3, 2, 5), stride=(2, 1, 2), padding=(1, 0, 2)) save_data_and_model("max_pool3d", input, maxpool3d) input = torch.randn(1, 2, 3, 5, 6) avepool3d = nn.AvgPool3d((3, 4, 3), stride=(1, 2, 3), padding=(1, 2, 0)) save_data_and_model("ave_pool3d", input, avepool3d) input = Variable(torch.randn(1, 2, 3, 4, 5)) conv3d = nn.BatchNorm3d(2) save_data_and_model("batch_norm_3d", input, conv3d) class Softmax(nn.Module): def __init__(self): super(Softmax, self).__init__() self.softmax = nn.Softmax(dim=-1)
def __init__(self): super(PoolConv, self).__init__() self.pool = nn.MaxPool3d((3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1)) self.conv = nn.Conv3d(2, 2, kernel_size=3, stride=1, padding=1)
def __init__(self, block, layers, sample_input_D, sample_input_H, sample_input_W, num_seg_classes, shortcut_type='B', no_cuda=False): self.inplanes = 64 self.no_cuda = no_cuda super(ResNet, self).__init__() self.conv1 = nn.Conv3d(1, 64, kernel_size=7, stride=(2, 2, 2), padding=(3, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type) self.layer2 = self._make_layer(block, 128, layers[1], shortcut_type, stride=2) self.layer3 = self._make_layer(block, 256, layers[2], shortcut_type, stride=1, dilation=2) self.layer4 = self._make_layer(block, 512, layers[3], shortcut_type, stride=1, dilation=4) self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1)) self.fc = nn.Linear(512 * block.expansion, num_seg_classes) # self.conv_seg = nn.Sequential( # nn.ConvTranspose3d( # 512 * block.expansion, # 32, # 2, # stride=2 # ), # nn.BatchNorm3d(32), # nn.ReLU(inplace=True), # nn.Conv3d( # 32, # 32, # kernel_size=3, # stride=(1, 1, 1), # padding=(1, 1, 1), # bias=False), # nn.BatchNorm3d(32), # nn.ReLU(inplace=True), # nn.Conv3d( # 32, # num_seg_classes, # kernel_size=1, # stride=(1, 1, 1), # bias=False) # ) for m in self.modules(): if isinstance(m, nn.Conv3d): m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out') elif isinstance(m, nn.BatchNorm3d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, block, layers, num_classes=1000, search=False, op_code='conv3d', conv_config=None, conv_index=None): self.inplanes = 64 self.search = search self.op_code = op_code if conv_config is None: conv_config = [None] * 200 self.conv_config = conv_config super(ResNet, self).__init__() self.conv1 = nn.Conv3d(3, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) self.layer1 = self._make_layer(block, 64, layers[0], stride=1, search=search, op_code=op_code, conv_config=conv_config[:layers[0]]) self.layer2 = self._make_layer( block, 128, layers[1], stride=2, search=search, op_code=op_code, conv_config=conv_config[layers[0]:layers[0] + layers[1]]) self.layer3 = self._make_layer( block, 256, layers[2], stride=2, search=search, op_code=op_code, conv_config=conv_config[layers[0] + layers[1]:layers[0] + layers[1] + layers[2]]) self.layer4 = self._make_layer( block, 512, layers[3], stride=2, search=search, op_code=op_code, conv_config=conv_config[layers[0] + layers[1] + layers[2]:layers[0] + layers[1] + layers[2] + layers[3]]) self.avgpool = nn.AvgPool2d(7, stride=1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv3d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm3d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
def __init__(self, dim, inplanes, planes, downsample, use_gn, lr_mult, use_out, out_bn, whiten_type, temperature, with_gc): assert dim in [1, 2, 3], "dim {} is not supported yet".format(dim) # assert whiten_type in ['channel', 'spatial'] if dim == 3: conv_nd = nn.Conv3d if downsample: max_pool = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2)) else: max_pool = None bn_nd = nn.BatchNorm3d elif dim == 2: conv_nd = nn.Conv2d if downsample: max_pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)) else: max_pool = None bn_nd = nn.BatchNorm2d else: conv_nd = nn.Conv1d if downsample: max_pool = nn.MaxPool1d(kernel_size=2, stride=2) else: max_pool = None bn_nd = nn.BatchNorm1d super(_NonLocalNd_bn, self).__init__() self.conv_query = conv_nd(inplanes, planes, kernel_size=1) self.conv_key = conv_nd(inplanes, planes, kernel_size=1) if use_out: self.conv_value = conv_nd(inplanes, planes, kernel_size=1) self.conv_out = conv_nd(planes, inplanes, kernel_size=1, bias=False) else: self.conv_value = conv_nd(inplanes, inplanes, kernel_size=1, bias=False) self.conv_out = None if out_bn: self.out_bn = nn.BatchNorm2d(inplanes) else: self.out_bn = None if with_gc: self.conv_mask = conv_nd(inplanes, 1, kernel_size=1) if 'bn_affine' in whiten_type: self.key_bn_affine = nn.BatchNorm1d(planes) self.query_bn_affine = nn.BatchNorm1d(planes) if 'bn' in whiten_type: self.key_bn = nn.BatchNorm1d(planes, affine=False) self.query_bn = nn.BatchNorm1d(planes, affine=False) self.softmax = nn.Softmax(dim=2) self.downsample = max_pool # self.norm = nn.GroupNorm(num_groups=32, num_channels=inplanes) if use_gn else InPlaceABNSync(num_features=inplanes) self.gamma = nn.Parameter(torch.zeros(1)) self.scale = math.sqrt(planes) self.whiten_type = whiten_type self.temperature = temperature self.with_gc = with_gc self.reset_parameters() self.reset_lr_mult(lr_mult)
def __init__(self, n_features=32): super(DenchikModel, self).__init__() # 5 224 224 self.block1 = nn.Sequential( nn.Conv3d(3, n_features, kernel_size=3, stride=1, padding=(2, 1, 1), dilation=2), nn.BatchNorm3d(n_features), nn.ReLU(inplace=True), nn.Conv3d(n_features, n_features, kernel_size=3, stride=1, padding=(1, 0, 0), dilation=1), nn.BatchNorm3d(n_features), nn.ReLU(inplace=True), nn.MaxPool3d(kernel_size=(1, 3, 3))) # 1 71 71 self.block2 = nn.Sequential( nn.Conv3d(n_features, n_features * 2, kernel_size=3, stride=1, padding=(2, 1, 1), dilation=2), nn.BatchNorm3d(n_features * 2), nn.ReLU(inplace=True), nn.Conv3d(n_features * 2, n_features * 2, kernel_size=3, stride=1, padding=(0, 0, 0), dilation=1), nn.BatchNorm3d(n_features * 2), nn.ReLU(inplace=True), nn.MaxPool3d(kernel_size=(1, 3, 3))) # 1 23 23 self.block3 = nn.Sequential( nn.Conv3d(n_features * 2, n_features * 4, kernel_size=3, stride=1, padding=(1, 1, 1), dilation=1), nn.BatchNorm3d(n_features * 4), nn.ReLU(inplace=True), nn.Conv3d(n_features * 4, n_features * 4, kernel_size=3, stride=1, padding=(1, 0, 0), dilation=1), nn.BatchNorm3d(n_features * 4), nn.ReLU(inplace=True), nn.MaxPool3d(kernel_size=(1, 3, 3))) # 1 7 7 self.block4 = nn.Sequential( nn.Conv3d(n_features * 4, n_features * 8, kernel_size=3, stride=1, padding=(1, 1, 1), dilation=1), nn.BatchNorm3d(n_features * 8), nn.ReLU(inplace=True), nn.Conv3d(n_features * 8, n_features * 8, kernel_size=3, stride=1, padding=(0, 0, 0), dilation=1), nn.BatchNorm3d(n_features * 8), nn.ReLU(inplace=True), nn.AdaptiveAvgPool3d((1, 5, 5)), Flatten()) # 1 5 5 self.classifier1 = nn.Sequential(nn.Linear(256 * 5 * 5, 4), nn.Softmax()) self.classifier2 = nn.Sequential(nn.Linear(256 * 5 * 5, 1), nn.Sigmoid())
def __init__(self, block, layers, sample_size, sample_duration, shortcut_type='B', num_classes=400, **kwargs): self.inplanes = 64 super(ResNet, self).__init__() self.In_IM_size = kwargs['kwargs'].inputsize self.vdlist = kwargs['kwargs'].vdlist self.BOX_dir = kwargs['kwargs'].BBOX_dir self.img_num = kwargs['kwargs'].image_num self.ROI_size = int(math.ceil(self.In_IM_size / 16)) self.last_size = int(math.ceil(self.In_IM_size / 32)) self.time_scle = int(self.img_num / 8) self.conv1 = nn.Conv3d(3, 64, kernel_size=7, stride=(1, 2, 2), padding=(3, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type) self.layer2 = self._make_layer(block, 128, layers[1], shortcut_type, stride=2) self.layer3 = self._make_layer(block, 256, layers[2], shortcut_type, stride=2) self.layer4 = self._make_layer(block, 512, layers[3], shortcut_type, stride=1) self.ConDown = nn.Conv3d(2048, 512, (1, 1, 1), bias=True) self.Drop = nn.Dropout() last_duration = int(math.ceil(sample_duration / 16)) last_size = int(math.ceil(sample_size / 32)) self.avgpool = nn.AvgPool3d((last_duration, last_size, last_size), stride=1) # self.fc = nn.Linear(512 * block.expansion, num_classes) # self.fc = nn.Linear(512, num_classes) # -----------------graph ----------------- # load box # self.Load_BBox_infoV3 = Load_fm_BBoxV4(self.vdlist, self.img_num, self.time_scle) # ROI layer # self.RoI_layer = RoI_layer_mulity(out_size=self.last_size, in_im_sz=self.In_IM_size) # self.GNNV1 = ConvGraphV1() # -----------------graph ----------------- for m in self.modules(): if isinstance(m, nn.Conv3d): m.weight = nn.init.kaiming_normal(m.weight, mode='fan_out') elif isinstance(m, nn.BatchNorm3d): m.weight.data.fill_(1) m.bias.data.zero_()