def __init__(self, cfg, in_channels): super(FPNDetNetFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler = Pooler(cfg.MODEL.ROI_BOX_HEAD) input_size = in_channels * resolution**2 self.use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN self.pooler = pooler self.fc6 = nn.Conv2d( in_channels, in_channels * 4, kernel_size=resolution, stride=resolution, padding=0, bias=False if self.use_gn else True, ) if self.use_gn: self.gn6 = group_norm(in_channels * 4) self.fc7 = nn.Conv2d( in_channels * 4, in_channels * 4, kernel_size=1, stride=1, padding=0, bias=False if self.use_gn else True, ) if self.use_gn: self.gn7 = group_norm(in_channels * 4) self.out_channels = in_channels * 4
def __init__( self, cfg, in_channels, num_backbone_stages, top_blocks, ): """ Arguments: in_channels_list (list[int]): number of channels for each feature map that will be fed out_channels (int): number of channels of the FPN representation top_blocks (nn.Module or None): if provided, an extra operation will be performed on the output of the last (smallest resolution) FPN output, and the result will extend the result list """ super(Bottom2UP, self).__init__() # self.inner_blocks = [] # self.layer_blocks = [] # for idx, in_channels in enumerate(in_channels_list, 1): # 起始索引为1 # inner_block = "fpn_inner{}".format(idx) # 用下表起名: fpn_inner1, fpn_inner2, fpn_inner3, fpn_inner4 # layer_block = "fpn_layer{}".format(idx) # 用下表起名: fpn_layer1, fpn_layer2, fpn_layer3, fpn_layer4 # # if in_channels == 0: # continue # inner_block_module = conv_block(in_channels, out_channels, 1) # 该1*1卷积层主要作用为改变通道数为out_channels # layer_block_module = conv_block(out_channels, out_channels, 3, # 1) # 用3*3卷积对融合结果卷积,消除上采样的混叠效(aliasing effect) # self.add_module(inner_block, inner_block_module) # self.add_module(layer_block, layer_block_module) # self.inner_blocks.append(inner_block) # self.layer_blocks.append(layer_block) # self.top_blocks = top_blocks # 将top_blocks作为FPN类成员变量,指定最后一层的输出是否需要再经过池化等操作,这里是最大值池化 self.panet_buttomup_conv1_modules = nn.ModuleList() self.panet_buttomup_conv2_modules = nn.ModuleList() for i in range(num_backbone_stages): if cfg.MODEL.FPN.PANET.USE_GN: self.panet_buttomup_conv1_modules.append( nn.Sequential( nn.Conv2d(in_channels, in_channels, 3, 2, 1, bias=True), # 下采样 group_norm(in_channels), nn.ReLU(inplace=True))) self.panet_buttomup_conv2_modules.append( nn.Sequential( nn.Conv2d(in_channels, in_channels, 3, 1, 1, bias=True), # 像素相加后使用 group_norm(in_channels), nn.ReLU(inplace=True))) else: self.panet_buttomup_conv1_modules.append( nn.Conv2d(in_channels, in_channels, 3, 2, 1)) self.panet_buttomup_conv2_modules.append( nn.Conv2d(in_channels, in_channels, 3, 1, 1)) self.top_blocks = top_blocks
def __init__(self, channels, group=1, kernel=3, dilation=(1, 4, 8, 12), shuffle=False, deform=None): super(OneDynamicWeightsCat11, self).__init__() in_channel = channels // 4 self.scale1 = nn.Sequential( nn.Conv2d(channels, in_channel, 1, padding=0, bias=False), group_norm(in_channel), nn.ReLU(inplace=True)) if deform == 'deform': self.cata = nn.Conv2d(in_channel, group * kernel * kernel + 18, 3, padding=dilation[0], dilation=dilation[0], bias=False) self.unfold1 = DeformUnfold(kernel_size=(3, 3), padding=dilation[0], dilation=dilation[0]) else: self.cata = nn.Conv2d(in_channel, group * kernel * kernel, 3, padding=dilation[0], dilation=dilation[0], bias=False) self.unfold1 = nn.Unfold(kernel_size=(3, 3), padding=dilation[0], dilation=dilation[0]) self.softmax = nn.Softmax(dim=-1) self.shuffle = shuffle self.deform = deform self.group = group self.K = kernel * kernel self.scale2 = nn.Sequential( nn.Conv2d(in_channel * 2, in_channel, 1, padding=0, bias=True), group_norm(in_channel), nn.ReLU(inplace=True)) self.scale3 = nn.Sequential( nn.Conv2d(in_channel, channels, 1, padding=0, bias=True), group_norm(channels), nn.ReLU(inplace=True))
def __init__(self, plane): super(GCNwithNonlocal, self).__init__() inter_plane = plane // 2 self.node_k = nn.Conv2d(plane, inter_plane, kernel_size=1) self.node_v = nn.Conv2d(plane, inter_plane, kernel_size=1) self.node_q = nn.Conv2d(plane, inter_plane, kernel_size=1) self.conv_wg = nn.Conv1d(inter_plane, inter_plane, kernel_size=1, bias=False) self.bn_wg = group_norm(inter_plane) self.softmax = nn.Softmax(dim=2) self.out = nn.Sequential(nn.Conv2d(inter_plane, plane, kernel_size=1), group_norm(plane))
def __init__(self, channels): super(deformMP, self).__init__() in_channel = channels // 4 self.scale1 = nn.Sequential( nn.Conv2d(channels, in_channel, 1, padding=0, bias=False), group_norm(in_channel), nn.ReLU(inplace=True)) self.off_conva = nn.Conv2d(in_channel, 18, 3, padding=1, bias=False) self.kernel_conva = DeformConv(in_channel, in_channel, kernel_size=3, padding=1, bias=False) self.scale3 = nn.Sequential( nn.Conv2d(in_channel, channels, 1, padding=0, bias=False), group_norm(channels), nn.ReLU(inplace=True))
def __init__(self, cfg, in_channels): super(FPNXconv1fcFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler(output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, cfg=cfg) self.pooler = pooler use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION use_ws = cfg.MODEL.USE_WS xconvs = [] for ix in range(num_stacked_convs): if use_ws: xconvs.append( Conv2dWS(in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False if use_gn else True)) else: xconvs.append( nn.Conv2d(in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False if use_gn else True)) in_channels = conv_head_dim if use_gn: xconvs.append(group_norm(in_channels)) xconvs.append(nn.ReLU(inplace=True)) self.add_module("xconvs", nn.Sequential(*xconvs)) for modules in [ self.xconvs, ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) if not use_gn: torch.nn.init.constant_(l.bias, 0) input_size = conv_head_dim * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM self.fc6 = make_fc(input_size, representation_size, use_gn=False) self.out_channels = representation_size
def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=3, padding=1): """3x3 convolution with padding""" return [ (f'{module_name}_{postfix}/conv', nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)), (f'{module_name}_{postfix}/norm', group_norm(out_channels) if _GN else FrozenBatchNorm2d(out_channels) ), (f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True)) ]
def __init__(self, channels): super(ConvMP, self).__init__() in_channel = channels // 4 self.scale1 = nn.Sequential( nn.Conv2d(channels, in_channel, 1, padding=0, bias=False), group_norm(in_channel), nn.ReLU(inplace=True)) self.cata = nn.Conv2d(in_channel, in_channel, 3, padding=1, groups=in_channel) self.scale2 = nn.Sequential( nn.Conv2d(in_channel * 2, in_channel, 1, padding=0, bias=False), group_norm(in_channel), nn.ReLU(inplace=True)) self.scale3 = nn.Sequential( nn.Conv2d(in_channel, channels, 1, padding=0, bias=False), group_norm(channels), nn.ReLU(inplace=True))
def DFConv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=3, with_modulated_dcn=None, deformable_groups=None): """3x3 convolution with padding""" return [ (f'{module_name}_{postfix}/conv', DFConv2d(in_channels, out_channels, with_modulated_dcn=with_modulated_dcn, kernel_size=kernel_size, stride=stride, groups=groups, deformable_groups=deformable_groups, bias=False)), (f'{module_name}_{postfix}/norm', group_norm(out_channels) if _GN else FrozenBatchNorm2d(out_channels) ), (f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True)) ]
def conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=1, padding=0): """1x1 convolution with padding""" return [ ('{}_{}/conv'.format(module_name, postfix), nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)), ('{}_{}/norm'.format(module_name, postfix), group_norm(out_channels) if _GN else FrozenBatchNorm2d(out_channels)), ('{}_{}/relu'.format(module_name, postfix), nn.ReLU(inplace=True)) ]
def __init__(self, cfg): super(FPNXconv1fcFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN use_gw = cfg.MODEL.ROI_BOX_HEAD.USE_GW in_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type = cfg.MODEL.DECONV.BOX_NORM_TYPE else: norm_type = 'none' if cfg.MODEL.DECONV.BOX_NORM_TYPE == 'layernorm': self.box_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS) xconvs = [] for ix in range(num_stacked_convs): if cfg.MODEL.ROI_BOX_HEAD.USE_DECONV: xconvs.append( Deconv(in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=True, block=cfg.MODEL.DECONV.BLOCK, sampling_stride=cfg.MODEL.DECONV.STRIDE, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type)) in_channels = conv_head_dim else: xconvs.append( nn.Conv2d(in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False if (use_gn or use_gw) else True)) in_channels = conv_head_dim if use_gn or use_gw: xconvs.append(group_norm(in_channels)) xconvs.append(nn.ReLU(inplace=True)) self.add_module("xconvs", nn.Sequential(*xconvs)) for modules in [ self.xconvs, ]: for l in modules.modules(): if isinstance(l, nn.Conv2d) or isinstance(l, Deconv): torch.nn.init.normal_(l.weight, std=0.01) if not (use_gn or use_gw): torch.nn.init.constant_(l.bias, 0) input_size = conv_head_dim * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM block = 0 use_delinear = cfg.MODEL.ROI_BOX_HEAD.USE_DECONV if use_delinear: block = cfg.MODEL.DECONV.BLOCK_FC #check here self.fc6 = make_fc(input_size, representation_size, use_gn=False, use_gw=False, use_delinear=use_delinear, block=block, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type)
def __init__(self, planes, ratio=4): super(GloReLocalModule, self).__init__() self.phi = nn.Conv2d(planes, planes // ratio * 2, kernel_size=1, bias=False) self.bn_phi = group_norm(planes // ratio * 2) self.theta = nn.Conv2d(planes, planes // ratio, kernel_size=1, bias=False) self.bn_theta = group_norm(planes // ratio) # Interaction Space # Adjacency Matrix: (-)A_g self.conv_adj = nn.Conv1d(planes // ratio, planes // ratio, kernel_size=1, bias=False) self.bn_adj = group_norm(planes // ratio) # State Update Function: W_g self.conv_wg = nn.Conv1d(planes // ratio * 2, planes // ratio * 2, kernel_size=1, bias=False) self.bn_wg = group_norm(planes // ratio * 2) # last fc self.conv3 = nn.Conv2d(planes // ratio * 2, planes, kernel_size=1, bias=False) self.bn3 = group_norm(planes) self.local = nn.Sequential( nn.Conv2d(planes, planes, 3, groups=planes, stride=2, padding=1, bias=False), group_norm(planes), nn.Conv2d(planes, planes, 3, groups=planes, stride=2, padding=1, bias=False), group_norm(planes)) self.gcn_local_attention = GCNwithNonlocal(planes) self.sigmoid_spatial = nn.Sigmoid() self.final = nn.Sequential( nn.Conv2d(planes * 2, planes, kernel_size=1, bias=False), group_norm(planes)) self.relu = nn.ReLU(inplace=True)
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNPANETFeatureExtractor, self).__init__() self.cfg = cfg resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION # 14 scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES # (0.25, 0.125, 0.0625, 0.03125) sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO # 2 pooler = AdaptivePooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS # (256, 256, 256, 256) dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] # for layer_idx, layer_features in enumerate(layers, 1): # layer_name = "mask_fcn{}".format(layer_idx) # module = make_conv3x3( # next_feature, layer_features, # dilation=dilation, stride=1, use_gn=use_gn # ) # 这里用到膨胀卷积了 # self.add_module(layer_name, module) # next_feature = layer_features # self.blocks.append(layer_name) self.add_module("mask_fcn1_1", make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn)) self.add_module("mask_fcn1_2", make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn)) self.add_module("mask_fcn1_3", make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn)) self.add_module("mask_fcn1_4", make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn)) next_feature = layers[0] for layer_idx, layer_features in enumerate(layers[1:], 2): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3( next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn ) # 这里用到膨胀卷积了 self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) # TODO:区分前后景所需的模块,需要初始化权重!!! conv4 = nn.Conv2d(layers[2], layers[2], 3, 1, padding=1 * dilation, dilation=dilation, bias=False) nn.init.kaiming_normal_( conv4.weight, mode="fan_out", nonlinearity="relu" ) self.mask_conv4_fc = nn.Sequential( conv4, group_norm(layers[2]), nn.ReLU(inplace=True)) # --------------------------------------------------------------------------------------------------------# conv5 = nn.Conv2d(layers[2], int(layers[2] / 2), 3, 1, padding=1 * dilation, dilation=dilation, bias=False) nn.init.kaiming_normal_( conv5.weight, mode="fan_out", nonlinearity="relu" ) self.mask_conv5_fc = nn.Sequential( conv5, group_norm(int(layers[2] / 2)), nn.ReLU(inplace=True)) # self.mask_conv5_fc = nn.Sequential( # nn.Conv2d(layers[2], int(layers[2] / 2), 3, 1, padding=1 * dilation, dilation=dilation, bias=False), # group_norm(int(layers[2] / 2)), # nn.ReLU(inplace=True)) # nn.init.kaiming_normal_( # self.mask_conv5_fc.weight, mode="fan_out", nonlinearity="relu" # ) #---------------------------------------------------------------------------------------------------------# fc = nn.Linear(int(layers[2] / 2) * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION ** 2, (2 * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION) ** 2, bias=True) nn.init.kaiming_normal_( fc.weight, mode="fan_out", nonlinearity="relu" ) self.mask_fc = nn.Sequential( fc, nn.ReLU(inplace=True)) # self.mask_fc = nn.Sequential( # nn.Linear(int(layers[2] / 2) * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION ** 2, # (2 * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION) ** 2, bias=True), # nn.ReLU(inplace=True)) # nn.init.kaiming_normal_( # self.mask_fc.weight, mode="fan_out", nonlinearity="relu" # ) self.out_channels = layer_features
def __init__(self, cfg, in_channels): super(FPNXconv1fc_panet_FeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler(output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, panet=True) self.pooler = pooler use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION xconvs = [] for ix in range(num_stacked_convs): xconvs.append( nn.Conv2d(in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False if use_gn else True)) in_channels = conv_head_dim if use_gn: xconvs.append(group_norm(in_channels)) xconvs.append(nn.ReLU(inplace=True)) self.add_module("xconvs", nn.Sequential(*xconvs)) #tmp use hard-coded #levels num_levels = 4 self.conv1_head = nn.ModuleList() for i in range(num_levels): self.conv1_head.append( nn.Sequential( *((nn.Conv2d(in_channels, conv_head_dim, kernel_size=3, stride=1, padding=dilation, dilation=dilation, bias=False if use_gn else True), ) + ((group_norm(in_channels), ) if use_gn else ()) + (nn.ReLU(inplace=True), )))) #HACK: use MSRA INIT for modules in [ self.xconvs, ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") # torch.nn.init.normal_(l.weight, std=0.01) if not use_gn: torch.nn.init.constant_(l.bias, 0) for modules in [ self.conv1_head, ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") if not use_gn: torch.nn.init.constant_(l.bias, 0) input_size = conv_head_dim * resolution**2 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM self.fc6 = make_fc(input_size, representation_size, use_gn=False) self.out_channels = representation_size