def __init__(self, block, layers, num_classes=2, zero_init_residual=False): super(ResNet, self).__init__() self.in_channels = 64 self.conv1 = nn.Conv3d(in_channels=2, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool3d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv3d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm3d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def __init__(self): super(STsarnet, self).__init__() # first conv, with stride 1x2x2 and kernel size 3x7x7 self.conv1 = ST_Conv(1, 64, [1, 7, 7], stride=[1, 2, 2], padding=[1, 3, 3]) # output of conv2 is same size as of conv1, no downsampling needed. kernel_size 3x3x3 self.conv2 = ST_Basic_Block(64, 64, 3) # each of the final three layers doubles num_channels, while performing downsampling # inside the first block self.conv3 = ST_Basic_Block(64, 128, 3, downsample=True) self.conv4 = ST_Basic_Block(128, 256, 3, downsample=True) self.conv5 = ST_Basic_Block(256, 512, 3, downsample=True) # global average pooling of the output self.pool = nn.AdaptiveAvgPool3d(1)
def __init__(self, opt): """ Discriminator network """ super(NetDis, self).__init__() tch = opt.tch input_dim = opt.input_dim norm = opt.dis_norm sn = opt.dis_spectral_norm c_dim = opt.num_domains n_layer = 6 self.model, curr_dim = self._make_net(tch, input_dim, n_layer, norm, sn) self.conv1 = nn.Conv3d(curr_dim, 1, kernel_size=1, stride=1, bias=False) self.conv2 = nn.Conv3d(curr_dim, c_dim, kernel_size=1, bias=False) self.pool = nn.AdaptiveAvgPool3d(1)
def forward(self, input, lateral): x = self.conv1(input) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = torch.cat([x, lateral[0]], dim=1) x = self.res2(x) x = torch.cat([x, lateral[1]], dim=1) x = self.res3(x) x = torch.cat([x, lateral[2]], dim=1) x = self.res4(x) x = torch.cat([x, lateral[3]], dim=1) x = self.res5(x) x = nn.AdaptiveAvgPool3d(1)(x) x = x.view(-1, x.size(1)) return x
def __init__(self,dropout_rate,expand_k,freeze_backbone,freeze_blocks,pretrained_backbone=False,pretrained_path=None): super(C3D_SGA_STD, self).__init__() self.backbone=C3DBackbone() self.Regressor=nn.Sequential(nn.Dropout(dropout_rate),nn.Linear(512,2)) self.GAP=nn.AdaptiveAvgPool3d(1) self.freeze_backbone = freeze_backbone if freeze_blocks==None: self.freeze_blocks=['conv1a','conv2a','conv3a','conv3b','conv4a','conv4b','conv5a','conv5b'] else: self.freeze_blocks = freeze_blocks self.Softmax=nn.Softmax(dim=-1) self.pretrained_backbone=pretrained_backbone self.Conv_Atten=Self_Guided_Attention_Branch_Module(512,expand_k,out_t_channels=2) if self.pretrained_backbone and pretrained_path!=None: load_c3d_pretrained_model(self.backbone,pretrained_path)
def __init__(self): super(Vgg16Bn3D, self).__init__() self.trainOnSups = False num_classes = 2 # self.features = utils.make_vgg_blocks([64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512,], batch_norm=True) self.features = utils.make_vgg3D_blocks( [64, 'M', 128, 'M', 256, 'M', 512], batch_norm=True) self.avgpool = nn.AdaptiveAvgPool3d((7, 7, 7)) self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7 * 7, 2048), nn.ReLU(True), nn.Dropout(), nn.Linear(2048, 2048), nn.ReLU(True), nn.Dropout(), nn.Linear(2048, num_classes), nn.Sigmoid(), )
def __init__(self, in_channels, out_channels, net_mode='2d'): super(SEBlock, self).__init__() if net_mode == '2d': self.gap = nn.AdaptiveAvgPool2d(1) conv = nn.Conv2d elif net_mode == '3d': self.gap = nn.AdaptiveAvgPool3d(1) conv = nn.Conv3d else: self.gap = None conv = None self.conv1 = conv(in_channels, out_channels, 1) self.conv2 = conv(in_channels, out_channels, 1) self.relu = nn.ReLU(inplace=True) self.sigmoid = nn.Sigmoid()
def forward(self, x): b, c, t, h, w = x.size() spatial_pool_x = nn.AdaptiveAvgPool3d( (t, 1, 1))(x) / 2 + nn.AdaptiveMaxPool3d((t, 1, 1))(x) / 2 spatial_pool_x = self.channel_compress(spatial_pool_x) long_range_depen = self.long_range_depen( spatial_pool_x[:, :, ::4, :, :]) middle_range_depen = self.middle_range_depen( spatial_pool_x[:, :, ::2, :, :]) small_range_depen = self.small_range_depen( spatial_pool_x[:, :, ::1, :, :]) long_range_depen = self.tpp_1(long_range_depen) middle_range_depen = self.tpp_2(middle_range_depen) small_range_depen = self.tpp_3(small_range_depen) return self.fusion_1(long_range_depen).squeeze(2).squeeze(2).squeeze( 2) + self.fusion_2(middle_range_depen).squeeze(2).squeeze( 2).squeeze(2) + self.fusion_3(small_range_depen).squeeze( 2).squeeze(2).squeeze(2)
def __init__(self): super(Conv3d, self).__init__() self.time_normalization = time_normalization self.conv_time = nn.Conv3d(1, 32, (6, 1, 1)) self.conv_spat = nn.Conv3d(32, 64, (1, 2, 2)) self.pool_time = nn.AvgPool3d(kernel_size=(3, 1, 1), stride=(10, 1, 1)) self.conv_time2 = nn.Conv3d(64, 128, (6, 1, 1)) self.conv_spat2 = nn.Conv3d(128, 256, (1, 2, 2)) self.pool_time2 = nn.AvgPool3d(kernel_size=(3, 1, 1), stride=(10, 1, 1)) self.batchnorm = nn.BatchNorm3d(256) self.adaptivepool = nn.AdaptiveAvgPool3d((1, 1, 1)) self.linear = nn.Linear(256, 512) self.batchnormlinear = nn.BatchNorm1d(512) self.linear2 = nn.Linear(512, 2)
def resize(x, oupu): # Converts a tensor into any shape, using adaptive pooling layers if needed. # inputs: # x: a tensor, oupu: any shape(list, inference on multiple dimensions is allowed.) # output # A new tensor of the desired shape. args = resizing_args(list(x.shape), oupu) if len(args) == 3: if len(args[0]) == 3: pool = nn.AdaptiveAvgPool1d(args[1]) if len(args[0]) == 4: pool = nn.AdaptiveAvgPool2d(args[1]) if len(args[0]) == 5: pool = nn.AdaptiveAvgPool3d(args[1]) x = x.view(args[0]) x = pool(x) x = x.view(args[-1]) return x
def __init__(self, cfg): super(MyModel, self).__init__() self.conv1 = conv_bn(3, 32, (1, 1, 1)) self.res_stage2 = ResStage(dim_in=32, dim_out=32, stride=(1, 2, 2), num_blocks=5, hidden_dim=72) self.res_stage3 = ResStage(dim_in=32, dim_out=72, stride=(1, 2, 2), num_blocks=10, hidden_dim=162) self.res_stage4 = ResStage(dim_in=72, dim_out=136, stride=(1, 2, 2), num_blocks=25, hidden_dim=306) self.res_stage5 = ResStage(dim_in=136, dim_out=280, stride=(1, 2, 2), num_blocks=15, hidden_dim=630) self.conv5 = conv_1x1x1_bn(280, 630) self.pool5 = nn.AdaptiveAvgPool3d((1, 1, 1)) self.dropout = nn.Dropout(p=0.5) # Perform FC in a fully convolutional manner. The FC layer will be # initialized with a different std comparing to convolutional layers. self.projection1 = nn.Linear(630, 2048, bias=True) self.projection2 = nn.Linear(2048, cfg.MODEL.NUM_CLASSES, bias=True) self.act = nn.Softmax(dim=4) # self.head = ResNetBasicHead( # dim_in=[width_per_group * 32], # num_classes=num_classes, # pool_size=[None], # dropout_rate=0.5, # act_func='softmax', # softmax for kinetics, sigmoid for ada # ) init_helper.init_weights(self, 0.01, True)
def __init__(self, config, select_num=10, class_num=4, side=False, is_cat=False): super(Predictor, self).__init__() self.selector = Selector() self.select_num = select_num self.sftmax = nn.Softmax(dim=0) self.side = side self.is_cat = is_cat # ResNet Head stage = resnet.StageSpec(index=4, block_count=3, return_features=False) self.head = resnet.ResNetHead( block_module=config.MODEL.RESNETS.TRANS_FUNC, stages=(stage, ), num_groups=config.MODEL.RESNETS.NUM_GROUPS, width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP, stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1, dilation=config.MODEL.RESNETS.RES5_DILATION) self.head.load_state_dict( torch.load('/data6/SRIP19_SelfDriving/Outputs/layer4.pth') ) #TODO: how to avoid this? Maybe we can involve the weights into model_final.pth self.avgpool_glob = nn.AdaptiveAvgPool2d(output_size=14) if self.is_cat: self.avg1 = nn.AdaptiveAvgPool3d(output_size=1) self.avg2 = nn.AdaptiveAvgPool2d(output_size=1) self.fc1 = nn.Linear(4096, 100) self.relu1 = nn.ReLU(inplace=True) self.fc2 = nn.Linear(100, class_num) self.drop = nn.Dropout(p=0.25) if self.side: self.fc_side1 = nn.Linear(4096, 100) self.relu_side1 = nn.ReLU(inplace=True) self.fc_side2 = nn.Linear(100, 21) else: self.avg = nn.AdaptiveAvgPool2d(output_size=1) self.fc2 = nn.Linear(2048, class_num) self.drop = nn.Dropout(p=0.25) if self.side: self.fc_side = nn.Linear(2048, 21)
def __init__( self, input_fan, num_classes, dropout_rate=0.0, act_func="softmax", ): """ The `__init__` method of any subclass should also contain these arguments. ResNetBasicHead takes p pathways as input where p in [1, infty]. Args: dim_in (list): the list of channel dimensions of the p inputs to the ResNetHead. num_classes (int): the channel dimensions of the p outputs to the ResNetHead. pool_size (list): the list of kernel sizes of p spatial temporal poolings, temporal pool kernel size, spatial pool kernel size, spatial pool kernel size in order. dropout_rate (float): dropout rate. If equal to 0.0, perform no dropout. act_func (string): activation function to use. 'softmax': applies softmax on the output. 'sigmoid': applies sigmoid on the output. """ super(ResNetSimpleHead, self).__init__() self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1)) if dropout_rate > 0.0: self.dropout = nn.Dropout(dropout_rate) # Perform FC in a fully convolutional manner. The FC layer will be # initialized with a different std comparing to convolutional layers. self.avg_pool self.fc = nn.Linear(input_fan, num_classes, bias=True) # Softmax for evaluation and testing. if act_func == "softmax": self.act = nn.Softmax(dim=1) elif act_func == "sigmoid": self.act = nn.Sigmoid() else: raise NotImplementedError("{} is not supported as an activation" "function.".format(act_func))
def __init__(self, layer_sizes=(1, 1, 1, 1), block_type=SpatioTemporalResBlock, num_classes=4): super(R3DNet, self).__init__() self.num_classes = num_classes # first conv, with stride 1x2x2 and kernel size 3x7x7 self.conv1 = SpatioTemporalConv(3, 64, [3, 7, 7], stride=[1, 2, 2], padding=[1, 3, 3]) self.bn1 = nn.BatchNorm3d(64) self.relu1 = nn.ReLU() # output of conv2 is same size as of conv1, no downsampling needed. kernel_size 3x3x3 self.conv2 = SpatioTemporalResLayer(64, 64, 3, layer_sizes[0], block_type=block_type) # each of the final three layers doubles num_channels, while performing downsampling # inside the first block self.conv3 = SpatioTemporalResLayer(64, 128, 3, layer_sizes[1], block_type=block_type, downsample=True) self.conv4 = SpatioTemporalResLayer(128, 256, 3, layer_sizes[2], block_type=block_type, downsample=True) self.conv5 = SpatioTemporalResLayer(256, 512, 3, layer_sizes[3], block_type=block_type, downsample=True) # global average pooling of the output self.pool = nn.AdaptiveAvgPool3d(1) self.linear = nn.Linear(512, self.num_classes)
def __init__(self, block, conv_makers, layers, stem, num_classes=400, zero_init_residual=False, standardization=False, norm='BN', dilation=False): """ Generic resnet video generator. Args: block (nn.Module): resnet building block conv_makers (list(functions)): generator function for each layer layers (List[int]): number of blocks per layer stem (nn.Module, optional): Resnet stem, if None, defaults to conv-bn-relu. Defaults to None. num_classes (int, optional): Dimension of the final FC layer. Defaults to 400. zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False. """ super(VideoResNet, self).__init__() self.inplanes = 64 self.stem = stem() self.layer1 = self._make_layer(block, conv_makers[0], 64, layers[0], stride=1, standardization=standardization, norm=norm) self.layer2 = self._make_layer(block, conv_makers[1], 128, layers[1], stride=2, standardization=standardization, norm=norm) self.layer3 = self._make_layer(block, conv_makers[2], 256, layers[2], stride=2, standardization=standardization, norm=norm) self.layer4 = self._make_layer(block, conv_makers[3], 512, layers[3], stride=2, standardization=standardization, norm=norm) self.dilation = dilation if self.dilation: self.dilation1 = DilationBlock(in_ch=64, out_ch=64, num_layer=4, step=4) self.dilation2 = DilationBlock(in_ch=64, out_ch=64, num_layer=4, step=4) self.dilation3 = DilationBlock(in_ch=64, out_ch=64, num_layer=4, step=4) #self.dilation4 = DilationBlock(in_ch=256, out_ch=256, num_layer=4, step=4) self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1)) self.fc = nn.Linear(256 * block.expansion, num_classes) # init weights self._initialize_weights() if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0)
def single_extract(tc, val_loader, model): model.eval() features = {'data': [], 'target': []} with torch.no_grad(): for i, (input, target, index) in enumerate(val_loader): inputs = input # inputs = tc(input) output = model(inputs) # print(output.size()) output = nn.AdaptiveAvgPool3d(1)(output).view( output.size(0), output.size(1)) # print(output.size()) # print(target) for j in range(output.size(0)): features['data'].append(output[j].cpu().numpy()) features['target'].append(target[j].cpu().numpy()) if i % 10 == 0: print("{}/{} finished".format(i, len(val_loader))) return features
def __init__(self, in_planes, out_planes, factor=3, norm='none', mode='average'): super(PoolingBlock, self).__init__() assert mode in self.modes hidden_dim = int(factor * in_planes) layers = [ nn.AdaptiveAvgPool3d( (1, 1, 1)) if mode == 'average' else ContextPool3d(in_planes), *conv_1x1x1_bn(in_planes, hidden_dim, norm=norm), HSwish(), *conv_1x1x1_bn(hidden_dim, out_planes, norm=norm), ] self.conv = nn.Sequential(*layers)
def __init__(self, channel, reduction=4): super(SELayerCS, self).__init__() self.avg_pool = nn.AdaptiveAvgPool3d(1) self.fc = nn.Sequential(nn.Linear(channel, channel // reduction), SynchronizedBatchNorm1d(channel // reduction), nn.ELU(inplace=True), nn.Linear(channel // reduction, channel), SynchronizedBatchNorm1d(channel), nn.Sigmoid()) self.sc = nn.Sequential( nn.Conv3d(channel, 1, kernel_size=(1, 1, 1)), SynchronizedBatchNorm3d(1), nn.ELU(inplace=True), nn.MaxPool3d(kernel_size=(1, 8, 8), stride=(1, 8, 8)), conv3d_bn_elu(1, 1, kernel_size=(3, 3, 3), padding=(1, 1, 1)), nn.Upsample(scale_factor=(1, 8, 8), mode='trilinear', align_corners=False), nn.Conv3d(1, channel, kernel_size=(1, 1, 1)), SynchronizedBatchNorm3d(channel), nn.Sigmoid())
def convert(self, input_blob_size, **kwargs): """ Converts into efficient version of squeeze-excite (SE) for CPU. It changes conv in original SE into linear layer (better supported by CPU). """ if self.is_3d: avg_pool = nn.AdaptiveAvgPool3d(1) else: avg_pool = nn.AdaptiveAvgPool2d(1) """ Reshape tensor size to (B, C) for linear layer. """ reshape0 = _Reshape((input_blob_size[0], input_blob_size[1])) fc0 = nn.Linear( self.se.block[0].in_channels, self.se.block[0].out_channels, bias=(not (self.se.block[0].bias is None)), ) state_dict_fc0 = deepcopy(self.se.block[0].state_dict()) state_dict_fc0["weight"] = state_dict_fc0["weight"].squeeze() fc0.load_state_dict(state_dict_fc0) activation = deepcopy(self.se.block[1]) fc1 = nn.Linear( self.se.block[2].in_channels, self.se.block[2].out_channels, bias=(not (self.se.block[2].bias is None)), ) state_dict_fc1 = deepcopy(self.se.block[2].state_dict()) state_dict_fc1["weight"] = state_dict_fc1["weight"].squeeze() fc1.load_state_dict(state_dict_fc1) sigmoid = deepcopy(self.se.block[3]) """ Output of linear layer has output shape of (B, C). Need to reshape to proper shape before multiplying with input tensor. """ reshape_size_after_sigmoid = (input_blob_size[0], input_blob_size[1], 1, 1) + ((1, ) if self.is_3d else ()) reshape1 = _Reshape(reshape_size_after_sigmoid) se_layers = nn.Sequential(avg_pool, reshape0, fc0, activation, fc1, sigmoid, reshape1) # Add final elementwise multiplication and replace self.se self.se = _SkipConnectMul(se_layers) self.convert_flag = True
def forward(self, x): out = None for n in self.out_side: t_r, w_r, h_r = map(lambda s: math.ceil(s / n), x.size()[2:]) # Receptive Field Size s_t, s_w, s_h = map(lambda s: math.floor(s / n), x.size()[2:]) # Stride max_pool = nn.MaxPool3d(kernel_size=(t_r, w_r, h_r), stride=(s_t, s_w, s_h)) y = max_pool(x) avg_pool = nn.AdaptiveAvgPool3d((y.size(2), 1, 1)) y = avg_pool(y) # print(y.size()) if out is None: out = y.view(y.size()[0], y.size()[1], -1, 1, 1) else: out = torch.cat((out, y.view(y.size()[0], y.size()[1], -1, 1, 1)), 2) return out
def __init__(self, in_channels, out_channels, loss_weight=0.5, loss_cls=dict(type='CrossEntropyLoss')): super().__init__() self.conv = ConvModule(in_channels, in_channels * 2, (1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), bias=False, conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d', requires_grad=True)) self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1)) self.loss_weight = loss_weight self.dropout = nn.Dropout(p=0.5) self.fc = nn.Linear(in_channels * 2, out_channels) self.loss_cls = build_loss(loss_cls)
def __init__(self, block, layers, num_classes, mode='ip', target_transforms=None): super().__init__() assert mode in ['ip', 'ir'] self.mode = mode # 选取模型 self.target_transforms = target_transforms self.in_channels = 64 self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2), padding=(1, 3, 3), bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ReLU(inplace=True) self.max_pool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) # Conv2_x,Layers中记录的是该组block的个数 self.layer1 = self._make_layer(block, 64, layers[0], stride=1) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avg_pool = nn.AdaptiveAvgPool3d(1) self.fc = nn.Linear(512 * block.expansion, num_classes) # initialize for m in self.modules(): if isinstance(m, nn.Conv3d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm3d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, num_filters, conv_size, norm_module=nn.BatchNorm3d, freeze_bn=False): super(ExplicitRelation, self).__init__() self.pooling = nn.AdaptiveAvgPool3d((1, None, None)) pad_size = (conv_size - 1) // 2 self.conv = nn.Conv3d(num_filters, num_filters, kernel_size=(1, conv_size, conv_size), stride=(1, 1, 1), padding=(0, pad_size, pad_size), bias=False) self.bn = norm_module(num_features=num_filters, track_running_stats=(not freeze_bn)) self.activate = nn.Sigmoid()
def __init__(self, latent_planes: int, dropout_prob: float, num_classes: int): super(I3DClassifier, self).__init__() self.latent_planes = latent_planes self.dropout_prob = dropout_prob self.num_classes = num_classes self.avg_pool = nn.AdaptiveAvgPool3d([1, 1, 1]) self.dropout = nn.Dropout3d(dropout_prob) opts = mo.Unit3DOptions(in_channels=latent_planes, out_channels=self.num_classes, kernel_size=[1, 1, 1], stride=[1, 1, 1], activation='none', use_bias=False, use_bn=False, padding='VALID') self.classifier = ib.Unit3D(opts)
def __init__(self, layer_sizes, block_type=SpatioTemporalResBlock): super(R2Plus1DNet, self).__init__() # first conv, with stride 1x2x2 and kernel size 3x7x7 self.conv1 = SpatioTemporalConv(3, 64, [3, 3, 3], stride=[1, 2, 2], padding=[1, 1, 1]) # output of conv2 is same size as of conv1, no downsampling needed. kernel_size 3x3x3 self.conv2 = SpatioTemporalResLayer(64, 64, 3, layer_sizes[0], block_type=block_type) # each of the final three layers doubles num_channels, while performing downsampling # inside the first block self.conv3 = SpatioTemporalResLayer(64, 128, 3, layer_sizes[1], block_type=block_type, downsample=True) self.conv4 = SpatioTemporalResLayer(128, 256, 3, layer_sizes[2], block_type=block_type, downsample=True) self.conv5 = SpatioTemporalResLayer(256, 512, 3, layer_sizes[3], block_type=block_type, downsample=True) self.conv6 = SpatioTemporalResLayer(512, 1024, 3, layer_sizes[3], block_type=block_type, downsample=True) # self.conv7 = SpatioTemporalResLayer(512, 1024, 3, layer_sizes[4], block_type=block_type, downsample=True) # global average pooling of the output self.pool = nn.AdaptiveAvgPool3d(1)
def __init__(self, scale=1.0, input_size=224, num_classes=2): super(RECNN_Mask, self).__init__() self.conv1 = nn.Conv3d(1, 8, kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = nn.BatchNorm3d(8) self.layers = self._make_layers(in_planes=8) self.conv2 = nn.Conv3d(128, 512, kernel_size=1, stride=1, padding=0, bias=False) self.bn2 = nn.BatchNorm3d(512) self.linear = nn.Linear(512, num_classes) self.avgpool = nn.AdaptiveAvgPool3d(1)
def __init__(self, output_stride): super(ASPP_ELU, self).__init__() inplanes = 512 if output_stride == 16: dilations = [1, 6, 12, 18] elif output_stride == 8: dilations = [1, 12, 24, 36] elif output_stride == 4: dilations = [1, 24, 48, 72] elif output_stride == 2: dilations = [1, 48, 96, 144] self.aspp1 = _ASPPModule_ELU(inplanes, 64, 1, padding=0, dilation=dilations[0]) self.aspp2 = _ASPPModule_ELU(inplanes, 64, 3, padding=dilations[1], dilation=dilations[1]) self.aspp3 = _ASPPModule_ELU(inplanes, 64, 3, padding=dilations[2], dilation=dilations[2]) self.aspp4 = _ASPPModule_ELU(inplanes, 64, 3, padding=dilations[3], dilation=dilations[3]) self.global_avg_pool = nn.Sequential( nn.AdaptiveAvgPool3d((16, 16, 16)), nn.Conv3d(inplanes, 64, 1, stride=1, bias=False), nn.BatchNorm3d(64), nn.ELU()) self.conv1 = nn.Conv3d(320, 64, 1, bias=False) self.bn1 = nn.BatchNorm3d(64) self.relu = nn.ELU() self.dropout = nn.Dropout(0.5) self._init_weight()
def __init__(self, in_planes, kernel_size=3, temporal): super(SpatialTemporalAttention, self).__init__() self.avg_pool = nn.AdaptiveAvgPool3d() self.max_pool = nn.AdaptiveMaxPool3d() assert kernel_size in (3, 7), 'kernel size must be 3 or 7' padding = 3 if kernel_size == 7 else 1 self.conv1 = nn.Conv3d(in_planes, in_planes, kernel_size=(1, 7, 7), padding=padding, bias=False) self.conv2 = nn.Conv3d(in_planes, in_planes, kernel_size=(temporal, 1, 1), padding=padding, bias=False) self.sigmoid = nn.Sigmoid()
def __init__(self): super(C3DVQANet, self).__init__() self.diff = ResidualFrame(eps=1.0) self.conv1_1 = DownsampleConv3D(1, 1) self.conv1_2 = UpsampleConv3D(1, 1) self.conv2_1 = SpatialConv3D(1, 16) self.conv2_2 = SpatialConv3D(1, 16) self.conv3 = SpatialTemporalConv3D(32, 1) self.pool = nn.AdaptiveAvgPool3d(1) self.fc1 = nn.Linear(1, 4) self.relu1 = nn.LeakyReLU(inplace=True) self.fc2 = nn.Linear(4, 1) self.relu2 = nn.LeakyReLU(inplace=True)
def __init__(self, opt): super(moveNet, self).__init__() self.movedim = opt.movedim self.net = nn.Sequential( # 卷积 nn.Conv3d(3, 64, kernel_size=(2, 3, 3), stride=(1, 2, 2)), nn.ReLU(True), nn.MaxPool3d((2, 2, 2), stride=1), ) self.build_resblock(64, 64, 2) self.build_resblock(64, 80, 2, 2) self.build_resblock(80, 128, 2, 2) self.build_resblock(128, 200, 2, 2) self.net = nn.Sequential( self.net, nn.AdaptiveAvgPool3d(1) # nn.Linear(200, 1) ) self.line = nn.Linear(200, self.movedim)