def __init__( self, feat_channels, stacked_convs, la_down_rate=8, norm_type='gn', norm_groups=32, ): super(TaskDecomposition, self).__init__() self.feat_channels = feat_channels self.stacked_convs = stacked_convs self.norm_type = norm_type self.norm_groups = norm_groups self.in_channels = self.feat_channels * self.stacked_convs self.la_conv1 = nn.Conv2D(self.in_channels, self.in_channels // la_down_rate, 1) self.la_conv2 = nn.Conv2D(self.in_channels // la_down_rate, self.stacked_convs, 1) self.reduction_conv = ConvNormLayer(self.in_channels, self.feat_channels, filter_size=1, stride=1, norm_type=self.norm_type, norm_groups=self.norm_groups) self._init_weights()
def __init__(self, ch_in, ch_out, stride=1, base_width=64, cardinality=1): super(Bottleneck, self).__init__() self.stride = stride mid_planes = int( math.floor(ch_out * (base_width / 64)) * cardinality ) mid_planes = mid_planes // self.expansion self.conv1 = ConvNormLayer( ch_in, mid_planes, filter_size=1, stride=1, bias_on=False, norm_decay=None) self.conv2 = ConvNormLayer( mid_planes, mid_planes, filter_size=3, stride=stride, bias_on=False, norm_decay=None) self.conv3 = ConvNormLayer( mid_planes, ch_out, filter_size=1, stride=1, bias_on=False, norm_decay=None)
def __init__(self, ch_in, ch_out, norm_type='bn'): super(DeConv, self).__init__() self.deconv = nn.Sequential() conv1 = ConvNormLayer(ch_in=ch_in, ch_out=ch_out, stride=1, filter_size=1, norm_type=norm_type, initializer=XavierUniform()) conv2 = nn.Conv2DTranspose( in_channels=ch_out, out_channels=ch_out, kernel_size=4, padding=1, stride=2, groups=ch_out, weight_attr=ParamAttr(initializer=XavierUniform()), bias_attr=False) bn = batch_norm(ch_out, norm_type=norm_type, norm_decay=0.) conv3 = ConvNormLayer(ch_in=ch_out, ch_out=ch_out, stride=1, filter_size=1, norm_type=norm_type, initializer=XavierUniform()) self.deconv.add_sublayer('conv1', conv1) self.deconv.add_sublayer('relu6_1', nn.ReLU6()) self.deconv.add_sublayer('conv2', conv2) self.deconv.add_sublayer('bn', bn) self.deconv.add_sublayer('relu6_2', nn.ReLU6()) self.deconv.add_sublayer('conv3', conv3) self.deconv.add_sublayer('relu6_3', nn.ReLU6())
def __init__(self, in_channels=(512, 1024, 2048), out_channel=256, num_extra_levels=2, fpn_strides=[8, 16, 32, 64, 128], num_stacks=1, use_weighted_fusion=True, norm_type='bn', norm_groups=32, act='swish'): super(BiFPN, self).__init__() assert num_stacks > 0, "The number of stacks of BiFPN is at least 1." assert norm_type in ['bn', 'sync_bn', 'gn', None] assert act in ['swish', 'relu', None] assert num_extra_levels >= 0, \ "The `num_extra_levels` must be non negative(>=0)." self.in_channels = in_channels self.out_channel = out_channel self.num_extra_levels = num_extra_levels self.num_stacks = num_stacks self.use_weighted_fusion = use_weighted_fusion self.norm_type = norm_type self.norm_groups = norm_groups self.act = act self.num_levels = len(self.in_channels) + self.num_extra_levels if len(fpn_strides) != self.num_levels: for i in range(self.num_extra_levels): fpn_strides += [fpn_strides[-1] * 2] self.fpn_strides = fpn_strides self.lateral_convs = nn.LayerList() for in_c in in_channels: self.lateral_convs.append( ConvNormLayer(in_c, self.out_channel, 1, 1)) if self.num_extra_levels > 0: self.extra_convs = nn.LayerList() for i in range(self.num_extra_levels): if i == 0: self.extra_convs.append( ConvNormLayer(self.in_channels[-1], self.out_channel, 3, 2)) else: self.extra_convs.append(nn.MaxPool2D(3, 2, 1)) self.bifpn_cells = nn.LayerList() for i in range(self.num_stacks): self.bifpn_cells.append( BiFPNCell(self.out_channel, self.num_levels, use_weighted_fusion=self.use_weighted_fusion, norm_type=self.norm_type, norm_groups=self.norm_groups, act=self.act))
def __init__(self, ch_in, ch_out, stride=1): super(BasicBlock, self).__init__() self.conv1 = ConvNormLayer( ch_in, ch_out, filter_size=3, stride=stride, bias_on=False, norm_decay=None) self.conv2 = ConvNormLayer( ch_out, ch_out, filter_size=3, stride=1, bias_on=False, norm_decay=None)
def __init__(self, ch_ins, ch_out, up_strides, dcn_v2=True): super(IDAUp, self).__init__() for i in range(1, len(ch_ins)): ch_in = ch_ins[i] up_s = int(up_strides[i]) fan_in = ch_in * 3 * 3 stdv = 1. / math.sqrt(fan_in) proj = nn.Sequential( ConvNormLayer(ch_in, ch_out, filter_size=3, stride=1, use_dcn=dcn_v2, bias_on=dcn_v2, norm_decay=None, dcn_lr_scale=1., dcn_regularizer=None, initializer=Uniform(-stdv, stdv)), nn.ReLU()) node = nn.Sequential( ConvNormLayer(ch_out, ch_out, filter_size=3, stride=1, use_dcn=dcn_v2, bias_on=dcn_v2, norm_decay=None, dcn_lr_scale=1., dcn_regularizer=None, initializer=Uniform(-stdv, stdv)), nn.ReLU()) kernel_size = up_s * 2 fan_in = ch_out * kernel_size * kernel_size stdv = 1. / math.sqrt(fan_in) up = nn.Conv2DTranspose( ch_out, ch_out, kernel_size=up_s * 2, stride=up_s, padding=up_s // 2, groups=ch_out, weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)), bias_attr=False) fill_up_weights(up) setattr(self, 'proj_' + str(i), proj) setattr(self, 'up_' + str(i), up) setattr(self, 'node_' + str(i), node)
class TaskDecomposition(nn.Layer): """This code is based on https://github.com/fcjian/TOOD/blob/master/mmdet/models/dense_heads/tood_head.py """ def __init__( self, feat_channels, stacked_convs, la_down_rate=8, norm_type='gn', norm_groups=32, ): super(TaskDecomposition, self).__init__() self.feat_channels = feat_channels self.stacked_convs = stacked_convs self.norm_type = norm_type self.norm_groups = norm_groups self.in_channels = self.feat_channels * self.stacked_convs self.la_conv1 = nn.Conv2D(self.in_channels, self.in_channels // la_down_rate, 1) self.la_conv2 = nn.Conv2D(self.in_channels // la_down_rate, self.stacked_convs, 1) self.reduction_conv = ConvNormLayer(self.in_channels, self.feat_channels, filter_size=1, stride=1, norm_type=self.norm_type, norm_groups=self.norm_groups) self._init_weights() def _init_weights(self): normal_(self.la_conv1.weight, std=0.001) normal_(self.la_conv2.weight, std=0.001) def forward(self, feat, avg_feat=None): b, _, h, w = feat.shape if avg_feat is None: avg_feat = F.adaptive_avg_pool2d(feat, (1, 1)) weight = F.relu(self.la_conv1(avg_feat)) weight = F.sigmoid(self.la_conv2(weight)) # here new_conv_weight = layer_attention_weight * conv_weight # in order to save memory and FLOPs. conv_weight = weight.reshape([b, 1, self.stacked_convs, 1]) * \ self.reduction_conv.conv.weight.reshape( [1, self.feat_channels, self.stacked_convs, self.feat_channels]) conv_weight = conv_weight.reshape( [b, self.feat_channels, self.in_channels]) feat = feat.reshape([b, self.in_channels, h * w]) feat = paddle.bmm(conv_weight, feat).reshape([b, self.feat_channels, h, w]) if self.norm_type is not None: feat = self.reduction_conv.norm(feat) feat = F.relu(feat) return feat
def __init__(self, ch_in, ch_out, kernel_size, residual): super(Root, self).__init__() self.conv = ConvNormLayer( ch_in, ch_out, filter_size=1, stride=1, bias_on=False, norm_decay=None) self.residual = residual
def __init__(self, feat_in=256, feat_out=256, num_convs=4, norm_type='bn', use_dcn=False): super(FCOSFeat, self).__init__() self.num_convs = num_convs self.norm_type = norm_type self.cls_subnet_convs = [] self.reg_subnet_convs = [] for i in range(self.num_convs): in_c = feat_in if i == 0 else feat_out cls_conv_name = 'fcos_head_cls_tower_conv_{}'.format(i) cls_conv = self.add_sublayer( cls_conv_name, ConvNormLayer(ch_in=in_c, ch_out=feat_out, filter_size=3, stride=1, norm_type=norm_type, use_dcn=use_dcn, norm_name=cls_conv_name + '_norm', bias_on=True, lr_scale=2., name=cls_conv_name)) self.cls_subnet_convs.append(cls_conv) reg_conv_name = 'fcos_head_reg_tower_conv_{}'.format(i) reg_conv = self.add_sublayer( reg_conv_name, ConvNormLayer(ch_in=in_c, ch_out=feat_out, filter_size=3, stride=1, norm_type=norm_type, use_dcn=use_dcn, norm_name=reg_conv_name + '_norm', bias_on=True, lr_scale=2., name=reg_conv_name)) self.reg_subnet_convs.append(reg_conv)
def __init__(self, level, block, ch_in, ch_out, stride=1, level_root=False, root_dim=0, root_kernel_size=1, root_residual=False): super(Tree, self).__init__() if root_dim == 0: root_dim = 2 * ch_out if level_root: root_dim += ch_in if level == 1: self.tree1 = block(ch_in, ch_out, stride) self.tree2 = block(ch_out, ch_out, 1) else: self.tree1 = Tree( level - 1, block, ch_in, ch_out, stride, root_dim=0, root_kernel_size=root_kernel_size, root_residual=root_residual) self.tree2 = Tree( level - 1, block, ch_out, ch_out, 1, root_dim=root_dim + ch_out, root_kernel_size=root_kernel_size, root_residual=root_residual) if level == 1: self.root = Root(root_dim, ch_out, root_kernel_size, root_residual) self.level_root = level_root self.root_dim = root_dim self.downsample = None self.project = None self.level = level if stride > 1: self.downsample = nn.MaxPool2D(stride, stride=stride) if ch_in != ch_out: self.project = ConvNormLayer( ch_in, ch_out, filter_size=1, stride=1, bias_on=False, norm_decay=None)
def __init__(self, num_convs=4, in_channels=256, out_channels=256, norm_type=None): super(MaskFeat, self).__init__() self.num_convs = num_convs self.in_channels = in_channels self.out_channels = out_channels self.norm_type = norm_type fan_conv = out_channels * 3 * 3 fan_deconv = out_channels * 2 * 2 mask_conv = nn.Sequential() if norm_type == 'gn': for i in range(self.num_convs): conv_name = 'mask_inter_feat_{}'.format(i + 1) mask_conv.add_sublayer( conv_name, ConvNormLayer( ch_in=in_channels if i == 0 else out_channels, ch_out=out_channels, filter_size=3, stride=1, norm_type=self.norm_type, norm_name=conv_name + '_norm', initializer=KaimingNormal(fan_in=fan_conv), name=conv_name)) mask_conv.add_sublayer(conv_name + 'act', nn.ReLU()) else: for i in range(self.num_convs): conv_name = 'mask_inter_feat_{}'.format(i + 1) mask_conv.add_sublayer( conv_name, nn.Conv2D( in_channels=in_channels if i == 0 else out_channels, out_channels=out_channels, kernel_size=3, padding=1, weight_attr=paddle.ParamAttr(initializer=KaimingNormal( fan_in=fan_conv)))) mask_conv.add_sublayer(conv_name + 'act', nn.ReLU()) mask_conv.add_sublayer( 'conv5_mask', nn.Conv2DTranspose( in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=2, stride=2, weight_attr=paddle.ParamAttr(initializer=KaimingNormal( fan_in=fan_deconv)))) mask_conv.add_sublayer('conv5_mask' + 'act', nn.ReLU()) self.upsample = mask_conv
def __init__(self, ch_ins, ch_out, up_strides, dcn_v2=True): super(IDAUp, self).__init__() for i in range(1, len(ch_ins)): ch_in = ch_ins[i] up_s = int(up_strides[i]) proj = nn.Sequential( ConvNormLayer(ch_in, ch_out, filter_size=3, stride=1, use_dcn=dcn_v2, bias_on=dcn_v2, norm_decay=None, dcn_lr_scale=1., dcn_regularizer=None), nn.ReLU()) node = nn.Sequential( ConvNormLayer(ch_out, ch_out, filter_size=3, stride=1, use_dcn=dcn_v2, bias_on=dcn_v2, norm_decay=None, dcn_lr_scale=1., dcn_regularizer=None), nn.ReLU()) param_attr = paddle.ParamAttr(initializer=KaimingUniform()) up = nn.Conv2DTranspose(ch_out, ch_out, kernel_size=up_s * 2, weight_attr=param_attr, stride=up_s, padding=up_s // 2, groups=ch_out, bias_attr=False) # TODO: uncomment fill_up_weights #fill_up_weights(up) setattr(self, 'proj_' + str(i), proj) setattr(self, 'up_' + str(i), up) setattr(self, 'node_' + str(i), node)
def __init__(self, depth=34, residual_root=False): super(DLA, self).__init__() levels, channels = DLA_cfg[depth] if depth == 34: block = BasicBlock if depth == 46 or depth == 60 or depth == 102: block = Bottleneck self.channels = channels self.base_layer = nn.Sequential( ConvNormLayer( 3, channels[0], filter_size=7, stride=1, bias_on=False, norm_decay=None), nn.ReLU()) self.level0 = self._make_conv_level(channels[0], channels[0], levels[0]) self.level1 = self._make_conv_level( channels[0], channels[1], levels[1], stride=2) self.level2 = Tree( levels[2], block, channels[1], channels[2], 2, level_root=False, root_residual=residual_root) self.level3 = Tree( levels[3], block, channels[2], channels[3], 2, level_root=True, root_residual=residual_root) self.level4 = Tree( levels[4], block, channels[3], channels[4], 2, level_root=True, root_residual=residual_root) self.level5 = Tree( levels[5], block, channels[4], channels[5], 2, level_root=True, root_residual=residual_root)
def _make_conv_level(self, ch_in, ch_out, conv_num, stride=1): modules = [] for i in range(conv_num): modules.extend([ ConvNormLayer( ch_in, ch_out, filter_size=3, stride=stride if i == 0 else 1, bias_on=False, norm_decay=None), nn.ReLU() ]) ch_in = ch_out return nn.Sequential(*modules)
def __init__(self, in_dim=256, num_convs=4, conv_dim=256, mlp_dim=1024, resolution=7, norm_type='gn', freeze_norm=False, stage_name=''): super(XConvNormHead, self).__init__() self.in_dim = in_dim self.num_convs = num_convs self.conv_dim = conv_dim self.mlp_dim = mlp_dim self.norm_type = norm_type self.freeze_norm = freeze_norm self.bbox_head_convs = [] fan = conv_dim * 3 * 3 initializer = KaimingNormal(fan_in=fan) for i in range(self.num_convs): in_c = in_dim if i == 0 else conv_dim head_conv_name = stage_name + 'bbox_head_conv{}'.format(i) head_conv = self.add_sublayer( head_conv_name, ConvNormLayer(ch_in=in_c, ch_out=conv_dim, filter_size=3, stride=1, norm_type=self.norm_type, norm_name=head_conv_name + '_norm', freeze_norm=self.freeze_norm, initializer=initializer, name=head_conv_name)) self.bbox_head_convs.append(head_conv) fan = conv_dim * resolution * resolution self.fc6 = nn.Linear(conv_dim * resolution * resolution, mlp_dim, weight_attr=paddle.ParamAttr( initializer=XavierUniform(fan_out=fan)), bias_attr=paddle.ParamAttr( learning_rate=2., regularizer=L2Decay(0.)))
def __init__(self, feat_in=256, feat_out=96, num_fpn_stride=3, num_convs=2, norm_type='bn', share_cls_reg=False, act='hard_swish'): super(PicoFeat, self).__init__() self.num_convs = num_convs self.norm_type = norm_type self.share_cls_reg = share_cls_reg self.act = act self.cls_convs = [] self.reg_convs = [] for stage_idx in range(num_fpn_stride): cls_subnet_convs = [] reg_subnet_convs = [] for i in range(self.num_convs): in_c = feat_in if i == 0 else feat_out cls_conv_dw = self.add_sublayer( 'cls_conv_dw{}.{}'.format(stage_idx, i), ConvNormLayer(ch_in=in_c, ch_out=feat_out, filter_size=5, stride=1, groups=feat_out, norm_type=norm_type, bias_on=False, lr_scale=2.)) cls_subnet_convs.append(cls_conv_dw) cls_conv_pw = self.add_sublayer( 'cls_conv_pw{}.{}'.format(stage_idx, i), ConvNormLayer(ch_in=in_c, ch_out=feat_out, filter_size=1, stride=1, norm_type=norm_type, bias_on=False, lr_scale=2.)) cls_subnet_convs.append(cls_conv_pw) if not self.share_cls_reg: reg_conv_dw = self.add_sublayer( 'reg_conv_dw{}.{}'.format(stage_idx, i), ConvNormLayer(ch_in=in_c, ch_out=feat_out, filter_size=5, stride=1, groups=feat_out, norm_type=norm_type, bias_on=False, lr_scale=2.)) reg_subnet_convs.append(reg_conv_dw) reg_conv_pw = self.add_sublayer( 'reg_conv_pw{}.{}'.format(stage_idx, i), ConvNormLayer(ch_in=in_c, ch_out=feat_out, filter_size=1, stride=1, norm_type=norm_type, bias_on=False, lr_scale=2.)) reg_subnet_convs.append(reg_conv_pw) self.cls_convs.append(cls_subnet_convs) self.reg_convs.append(reg_subnet_convs)
def __init__(self, feat_channels): super(PicoSE, self).__init__() self.fc = nn.Conv2D(feat_channels, feat_channels, 1) self.conv = ConvNormLayer(feat_channels, feat_channels, 1, 1) self._init_weights()
def __init__(self, in_channels=256, mid_channels=128, out_channels=256, start_level=0, end_level=3, use_dcn_in_tower=False, norm_type='gn'): super(SOLOv2MaskHead, self).__init__() assert start_level >= 0 and end_level >= start_level self.in_channels = in_channels self.out_channels = out_channels self.mid_channels = mid_channels self.use_dcn_in_tower = use_dcn_in_tower self.range_level = end_level - start_level + 1 self.use_dcn = True if self.use_dcn_in_tower else False self.convs_all_levels = [] self.norm_type = norm_type for i in range(start_level, end_level + 1): conv_feat_name = 'mask_feat_head.convs_all_levels.{}'.format(i) conv_pre_feat = nn.Sequential() if i == start_level: conv_pre_feat.add_sublayer( conv_feat_name + '.conv' + str(i), ConvNormLayer( ch_in=self.in_channels, ch_out=self.mid_channels, filter_size=3, stride=1, use_dcn=self.use_dcn, norm_type=self.norm_type)) self.add_sublayer('conv_pre_feat' + str(i), conv_pre_feat) self.convs_all_levels.append(conv_pre_feat) else: for j in range(i): ch_in = 0 if j == 0: ch_in = self.in_channels + 2 if i == end_level else self.in_channels else: ch_in = self.mid_channels conv_pre_feat.add_sublayer( conv_feat_name + '.conv' + str(j), ConvNormLayer( ch_in=ch_in, ch_out=self.mid_channels, filter_size=3, stride=1, use_dcn=self.use_dcn, norm_type=self.norm_type)) conv_pre_feat.add_sublayer( conv_feat_name + '.conv' + str(j) + 'act', nn.ReLU()) conv_pre_feat.add_sublayer( 'upsample' + str(i) + str(j), nn.Upsample( scale_factor=2, mode='bilinear')) self.add_sublayer('conv_pre_feat' + str(i), conv_pre_feat) self.convs_all_levels.append(conv_pre_feat) conv_pred_name = 'mask_feat_head.conv_pred.0' self.conv_pred = self.add_sublayer( conv_pred_name, ConvNormLayer( ch_in=self.mid_channels, ch_out=self.out_channels, filter_size=1, stride=1, use_dcn=self.use_dcn, norm_type=self.norm_type))
def __init__(self, num_classes=80, in_channels=256, seg_feat_channels=256, stacked_convs=4, num_grids=[40, 36, 24, 16, 12], kernel_out_channels=256, dcn_v2_stages=[], segm_strides=[8, 8, 16, 32, 32], solov2_loss=None, score_threshold=0.1, mask_threshold=0.5, mask_nms=None, norm_type='gn', drop_block=False): super(SOLOv2Head, self).__init__() self.num_classes = num_classes self.in_channels = in_channels self.seg_num_grids = num_grids self.cate_out_channels = self.num_classes self.seg_feat_channels = seg_feat_channels self.stacked_convs = stacked_convs self.kernel_out_channels = kernel_out_channels self.dcn_v2_stages = dcn_v2_stages self.segm_strides = segm_strides self.solov2_loss = solov2_loss self.mask_nms = mask_nms self.score_threshold = score_threshold self.mask_threshold = mask_threshold self.norm_type = norm_type self.drop_block = drop_block self.kernel_pred_convs = [] self.cate_pred_convs = [] for i in range(self.stacked_convs): use_dcn = True if i in self.dcn_v2_stages else False ch_in = self.in_channels + 2 if i == 0 else self.seg_feat_channels kernel_conv = self.add_sublayer( 'bbox_head.kernel_convs.' + str(i), ConvNormLayer( ch_in=ch_in, ch_out=self.seg_feat_channels, filter_size=3, stride=1, use_dcn=use_dcn, norm_type=self.norm_type)) self.kernel_pred_convs.append(kernel_conv) ch_in = self.in_channels if i == 0 else self.seg_feat_channels cate_conv = self.add_sublayer( 'bbox_head.cate_convs.' + str(i), ConvNormLayer( ch_in=ch_in, ch_out=self.seg_feat_channels, filter_size=3, stride=1, use_dcn=use_dcn, norm_type=self.norm_type)) self.cate_pred_convs.append(cate_conv) self.solo_kernel = self.add_sublayer( 'bbox_head.solo_kernel', nn.Conv2D( self.seg_feat_channels, self.kernel_out_channels, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(initializer=Normal( mean=0., std=0.01)), bias_attr=True)) self.solo_cate = self.add_sublayer( 'bbox_head.solo_cate', nn.Conv2D( self.seg_feat_channels, self.cate_out_channels, kernel_size=3, stride=1, padding=1, weight_attr=ParamAttr(initializer=Normal( mean=0., std=0.01)), bias_attr=ParamAttr(initializer=Constant( value=float(-np.log((1 - 0.01) / 0.01)))))) if self.drop_block and self.training: self.drop_block_fun = DropBlock( block_size=3, keep_prob=0.9, name='solo_cate.dropblock')
def __init__(self, feat_in=256, feat_out=96, num_fpn_stride=3, num_convs=2, norm_type='bn', share_cls_reg=False, act='hard_swish', use_se=False): super(PicoFeat, self).__init__() self.num_convs = num_convs self.norm_type = norm_type self.share_cls_reg = share_cls_reg self.act = act self.use_se = use_se self.cls_convs = [] self.reg_convs = [] if use_se: assert share_cls_reg == True, \ 'In the case of using se, share_cls_reg is not supported' self.se = nn.LayerList() for stage_idx in range(num_fpn_stride): cls_subnet_convs = [] reg_subnet_convs = [] for i in range(self.num_convs): in_c = feat_in if i == 0 else feat_out cls_conv_dw = self.add_sublayer( 'cls_conv_dw{}.{}'.format(stage_idx, i), ConvNormLayer( ch_in=in_c, ch_out=feat_out, filter_size=5, stride=1, groups=feat_out, norm_type=norm_type, bias_on=False, lr_scale=2.)) cls_subnet_convs.append(cls_conv_dw) cls_conv_pw = self.add_sublayer( 'cls_conv_pw{}.{}'.format(stage_idx, i), ConvNormLayer( ch_in=in_c, ch_out=feat_out, filter_size=1, stride=1, norm_type=norm_type, bias_on=False, lr_scale=2.)) cls_subnet_convs.append(cls_conv_pw) if not self.share_cls_reg: reg_conv_dw = self.add_sublayer( 'reg_conv_dw{}.{}'.format(stage_idx, i), ConvNormLayer( ch_in=in_c, ch_out=feat_out, filter_size=5, stride=1, groups=feat_out, norm_type=norm_type, bias_on=False, lr_scale=2.)) reg_subnet_convs.append(reg_conv_dw) reg_conv_pw = self.add_sublayer( 'reg_conv_pw{}.{}'.format(stage_idx, i), ConvNormLayer( ch_in=in_c, ch_out=feat_out, filter_size=1, stride=1, norm_type=norm_type, bias_on=False, lr_scale=2.)) reg_subnet_convs.append(reg_conv_pw) self.cls_convs.append(cls_subnet_convs) self.reg_convs.append(reg_subnet_convs) if use_se: self.se.append(PicoSE(feat_out))
def __init__(self, in_channels, out_channel, spatial_scales=[0.25, 0.125, 0.0625, 0.03125], has_extra_convs=False, extra_stage=1, use_c5=True, norm_type=None, norm_decay=0., freeze_norm=False, relu_before_extra_convs=True): super(FPN, self).__init__() self.out_channel = out_channel for s in range(extra_stage): spatial_scales = spatial_scales + [spatial_scales[-1] / 2.] self.spatial_scales = spatial_scales self.has_extra_convs = has_extra_convs self.extra_stage = extra_stage self.use_c5 = use_c5 self.relu_before_extra_convs = relu_before_extra_convs self.norm_type = norm_type self.norm_decay = norm_decay self.freeze_norm = freeze_norm self.lateral_convs = [] self.fpn_convs = [] fan = out_channel * 3 * 3 # stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone # 0 <= st_stage < ed_stage <= 3 st_stage = 4 - len(in_channels) ed_stage = st_stage + len(in_channels) - 1 for i in range(st_stage, ed_stage + 1): if i == 3: lateral_name = 'fpn_inner_res5_sum' else: lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2) in_c = in_channels[i - st_stage] if self.norm_type == 'gn': lateral = self.add_sublayer( lateral_name, ConvNormLayer(ch_in=in_c, ch_out=out_channel, filter_size=1, stride=1, norm_type=self.norm_type, norm_decay=self.norm_decay, norm_name=lateral_name + '_norm', freeze_norm=self.freeze_norm, initializer=XavierUniform(fan_out=in_c), name=lateral_name)) else: lateral = self.add_sublayer( lateral_name, nn.Conv2D(in_channels=in_c, out_channels=out_channel, kernel_size=1, weight_attr=ParamAttr(initializer=XavierUniform( fan_out=in_c)))) self.lateral_convs.append(lateral) fpn_name = 'fpn_res{}_sum'.format(i + 2) if self.norm_type == 'gn': fpn_conv = self.add_sublayer( fpn_name, ConvNormLayer(ch_in=out_channel, ch_out=out_channel, filter_size=3, stride=1, norm_type=self.norm_type, norm_decay=self.norm_decay, norm_name=fpn_name + '_norm', freeze_norm=self.freeze_norm, initializer=XavierUniform(fan_out=fan), name=fpn_name)) else: fpn_conv = self.add_sublayer( fpn_name, nn.Conv2D(in_channels=out_channel, out_channels=out_channel, kernel_size=3, padding=1, weight_attr=ParamAttr(initializer=XavierUniform( fan_out=fan)))) self.fpn_convs.append(fpn_conv) # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5) if self.has_extra_convs: for i in range(self.extra_stage): lvl = ed_stage + 1 + i if i == 0 and self.use_c5: in_c = in_channels[-1] else: in_c = out_channel extra_fpn_name = 'fpn_{}'.format(lvl + 2) if self.norm_type == 'gn': extra_fpn_conv = self.add_sublayer( extra_fpn_name, ConvNormLayer(ch_in=in_c, ch_out=out_channel, filter_size=3, stride=2, norm_type=self.norm_type, norm_decay=self.norm_decay, norm_name=extra_fpn_name + '_norm', freeze_norm=self.freeze_norm, initializer=XavierUniform(fan_out=fan), name=extra_fpn_name)) else: extra_fpn_conv = self.add_sublayer( extra_fpn_name, nn.Conv2D(in_channels=in_c, out_channels=out_channel, kernel_size=3, stride=2, padding=1, weight_attr=ParamAttr( initializer=XavierUniform(fan_out=fan)))) self.fpn_convs.append(extra_fpn_conv)
def __init__(self, num_classes=80, feat_channels=256, stacked_convs=6, fpn_strides=(8, 16, 32, 64, 128), grid_cell_scale=8, grid_cell_offset=0.5, norm_type='gn', norm_groups=32, static_assigner_epoch=4, use_align_head=True, loss_weight={ 'class': 1.0, 'bbox': 1.0, 'iou': 2.0, }, nms='MultiClassNMS', static_assigner='ATSSAssigner', assigner='TaskAlignedAssigner'): super(TOODHead, self).__init__() self.num_classes = num_classes self.feat_channels = feat_channels self.stacked_convs = stacked_convs self.fpn_strides = fpn_strides self.grid_cell_scale = grid_cell_scale self.grid_cell_offset = grid_cell_offset self.static_assigner_epoch = static_assigner_epoch self.use_align_head = use_align_head self.nms = nms self.static_assigner = static_assigner self.assigner = assigner self.loss_weight = loss_weight self.giou_loss = GIoULoss() self.inter_convs = nn.LayerList() for i in range(self.stacked_convs): self.inter_convs.append( ConvNormLayer(self.feat_channels, self.feat_channels, filter_size=3, stride=1, norm_type=norm_type, norm_groups=norm_groups)) self.cls_decomp = TaskDecomposition(self.feat_channels, self.stacked_convs, self.stacked_convs * 8, norm_type=norm_type, norm_groups=norm_groups) self.reg_decomp = TaskDecomposition(self.feat_channels, self.stacked_convs, self.stacked_convs * 8, norm_type=norm_type, norm_groups=norm_groups) self.tood_cls = nn.Conv2D(self.feat_channels, self.num_classes, 3, padding=1) self.tood_reg = nn.Conv2D(self.feat_channels, 4, 3, padding=1) if self.use_align_head: self.cls_prob_conv1 = nn.Conv2D( self.feat_channels * self.stacked_convs, self.feat_channels // 4, 1) self.cls_prob_conv2 = nn.Conv2D(self.feat_channels // 4, 1, 3, padding=1) self.reg_offset_conv1 = nn.Conv2D( self.feat_channels * self.stacked_convs, self.feat_channels // 4, 1) self.reg_offset_conv2 = nn.Conv2D(self.feat_channels // 4, 4 * 2, 3, padding=1) self.scales_regs = nn.LayerList([ScaleReg() for _ in self.fpn_strides]) self._init_weights()