def _cls_subnet(self, conv_feat, conv_channel, num_base_anchor, num_class): p = self.p # classification subnet cls_conv1 = X.conv( data=conv_feat, kernel=3, filter=conv_channel, weight=self.cls_conv1_weight, bias=self.cls_conv1_bias, no_bias=False, name="cls_conv1" ) cls_conv1_relu = X.relu(cls_conv1) cls_conv2 = X.conv( data=cls_conv1_relu, kernel=3, filter=conv_channel, weight=self.cls_conv2_weight, bias=self.cls_conv2_bias, no_bias=False, name="cls_conv2" ) cls_conv2_relu = X.relu(cls_conv2) cls_conv3 = X.conv( data=cls_conv2_relu, kernel=3, filter=conv_channel, weight=self.cls_conv3_weight, bias=self.cls_conv3_bias, no_bias=False, name="cls_conv3" ) cls_conv3_relu = X.relu(cls_conv3) cls_conv4 = X.conv( data=cls_conv3_relu, kernel=3, filter=conv_channel, weight=self.cls_conv4_weight, bias=self.cls_conv4_bias, no_bias=False, name="cls_conv4" ) cls_conv4_relu = X.relu(cls_conv4) if p.fp16: cls_conv4_relu = X.to_fp32(cls_conv4_relu, name="cls_conv4_fp32") output_channel = num_base_anchor * (num_class - 1) output = X.conv( data=cls_conv4_relu, kernel=3, filter=output_channel, weight=self.cls_pred_weight, bias=self.cls_pred_bias, no_bias=False, name="cls_pred" ) return output
def se_v2_resnet_v1b_unit(input, name, filter, stride, dilate, proj, norm, **kwargs): """ diff with v1: move the SE module to 3x3 conv """ conv1 = conv(input, name=name + "_conv1", filter=filter // 4) bn1 = norm(conv1, name=name + "_bn1") relu1 = relu(bn1, name=name + "_relu1") conv2 = conv(relu1, name=name + "_conv2", stride=stride, filter=filter // 4, kernel=3) bn2 = norm(conv2, name=name + "_bn2") relu2 = relu(bn2, name=name + "_relu2") relu2 = se(relu2, prefix=name + "_se2", f_down=filter // 16, f_up=filter // 4) conv3 = conv(relu2, name=name + "_conv3", filter=filter) bn3 = norm(conv3, name=name + "_bn3") if proj: shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride) shortcut = norm(shortcut, name=name + "_sc_bn") else: shortcut = input eltwise = add(bn3, shortcut, name=name + "_plus") return relu(eltwise, name=name + "_relu")
def _bbox_subnet(self, conv_feat, conv_channel, num_base_anchor, num_class): p = self.p # regression subnet bbox_conv1 = X.conv( data=conv_feat, kernel=3, filter=conv_channel, weight=self.bbox_conv1_weight, bias=self.bbox_conv1_bias, no_bias=False, name="bbox_conv1" ) bbox_conv1_relu = X.relu(bbox_conv1) bbox_conv2 = X.conv( data=bbox_conv1_relu, kernel=3, filter=conv_channel, weight=self.bbox_conv2_weight, bias=self.bbox_conv2_bias, no_bias=False, name="bbox_conv2" ) bbox_conv2_relu = X.relu(bbox_conv2) bbox_conv3 = X.conv( data=bbox_conv2_relu, kernel=3, filter=conv_channel, weight=self.bbox_conv3_weight, bias=self.bbox_conv3_bias, no_bias=False, name="bbox_conv3" ) bbox_conv3_relu = X.relu(bbox_conv3) bbox_conv4 = X.conv( data=bbox_conv3_relu, kernel=3, filter=conv_channel, weight=self.bbox_conv4_weight, bias=self.bbox_conv4_bias, no_bias=False, name="bbox_conv4" ) bbox_conv4_relu = X.relu(bbox_conv4) if p.fp16: bbox_conv4_relu = X.to_fp32(bbox_conv4_relu, name="bbox_conv4_fp32") output_channel = num_base_anchor * 4 output = X.conv( data=bbox_conv4_relu, kernel=3, filter=output_channel, weight=self.bbox_pred_weight, bias=self.bbox_pred_bias, no_bias=False, name="bbox_pred" ) return output
def dcn_resnet_unit(input, name, filter, stride, dilate, proj, norm, **kwargs): conv1 = conv(input, name=name + "_conv1", filter=filter // 4) bn1 = norm(conv1, name=name + "_bn1") relu1 = relu(bn1, name=name + "_relu1") # conv2 filter router conv2_offset = conv(relu1, name=name + "_conv2_offset", filter=72, kernel=3, stride=stride, dilate=dilate) conv2 = mx.sym.contrib.DeformableConvolution(relu1, conv2_offset, kernel=(3, 3), stride=(stride, stride), dilate=(dilate, dilate), pad=(1, 1), num_filter=filter // 4, num_deformable_group=4, no_bias=True, name=name + "_conv2") bn2 = norm(conv2, name=name + "_bn2") relu2 = relu(bn2, name=name + "_relu2") conv3 = conv(relu2, name=name + "_conv3", filter=filter) bn3 = norm(conv3, name=name + "_bn3") if proj: shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride) shortcut = norm(shortcut, name=name + "_sc_bn") else: shortcut = input eltwise = add(bn3, shortcut, name=name + "_plus") return relu(eltwise, name=name + "_relu")
def trident_resnet_v1b_unit(input, name, id, filter, stride, dilate, proj, **kwargs): """ Compared with v1, v1b moves stride=2 to the 3x3 conv instead of the 1x1 conv and use std in pre-processing This is also known as the facebook re-implementation of ResNet(a.k.a. the torch ResNet) """ p = kwargs["params"] share_bn = p.branch_bn_shared share_conv = p.branch_conv_shared norm = p.normalizer ######################### prepare names ######################### if id is not None: conv_postfix = ("_shared%s" if share_conv else "_branch%s") % id bn_postfix = ("_shared%s" if share_bn else "_branch%s") % id other_postfix = "_branch%s" % id else: conv_postfix = "" bn_postfix = "" other_postfix = "" ######################### prepare parameters ######################### conv_params = lambda x: dict( weight=X.shared_var(name + "_%s_weight" % x) if share_conv else None, name=name + "_%s" % x + conv_postfix ) def bn_params(x): ret = dict( gamma=X.shared_var(name + "_%s_gamma" % x) if share_bn else None, beta=X.shared_var(name + "_%s_beta" % x) if share_bn else None, moving_mean=X.shared_var(name + "_%s_moving_mean" % x) if share_bn else None, moving_var=X.shared_var(name + "_%s_moving_var" % x) if share_bn else None, name=name + "_%s" % x + bn_postfix ) if norm.__name__ == "gn": del ret["moving_mean"], ret["moving_var"] return ret ######################### construct graph ######################### conv1 = conv(input, filter=filter // 4, **conv_params("conv1")) bn1 = norm(conv1, **bn_params("bn1")) relu1 = relu(bn1, name=name + other_postfix) conv2 = conv(relu1, filter=filter // 4, kernel=3, stride=stride, dilate=dilate, **conv_params("conv2")) bn2 = norm(conv2, **bn_params("bn2")) relu2 = relu(bn2, name=name + other_postfix) conv3 = conv(relu2, filter=filter, **conv_params("conv3")) bn3 = norm(conv3, **bn_params("bn3")) if proj: shortcut = conv(input, filter=filter, stride=stride, **conv_params("sc")) shortcut = norm(shortcut, **bn_params("sc_bn")) else: shortcut = input eltwise = add(bn3, shortcut, name=name + "_plus" + other_postfix) return relu(eltwise, name=name + "_relu" + other_postfix)
def _cls_head(self, conv_feat): xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) flatten = X.flatten(conv_feat, name="bbox_feat_flatten") fc1 = X.fc(flatten, filter=1024, name="bbox_cls_fc1", init=xavier_init) fc1 = self.add_norm(fc1) fc1 = X.relu(fc1) fc2 = X.fc(fc1, filter=1024, name="bbox_cls_fc2", init=xavier_init) fc2 = self.add_norm(fc2) fc2 = X.relu(fc2) return fc2
def trident_resnet_v1_unit(input, name, id, filter, stride, dilate, proj, **kwargs): p = kwargs["params"] share_bn = p.branch_bn_shared share_conv = p.branch_conv_shared norm = p.normalizer ######################### prepare names ######################### if id is not None: conv_postfix = ("_shared%s" if share_conv else "_branch%s") % id bn_postfix = ("_shared%s" if share_bn else "_branch%s") % id other_postfix = "_branch%s" % id else: conv_postfix = "" bn_postfix = "" other_postfix = "" ######################### prepare parameters ######################### conv_params = lambda x: dict( weight=X.shared_var(name + "_%s_weight" % x) if share_conv else None, name=name + "_%s" % x + conv_postfix ) bn_params = lambda x: dict( gamma=X.shared_var(name + "_%s_gamma" % x) if share_bn else None, beta=X.shared_var(name + "_%s_beta" % x) if share_bn else None, moving_mean=X.shared_var(name + "_%s_moving_mean" % x) if share_bn else None, moving_var=X.shared_var(name + "_%s_moving_var" % x) if share_bn else None, name=name + "_%s" % x + bn_postfix ) ######################### construct graph ######################### conv1 = conv(input, filter=filter // 4, stride=stride, **conv_params("conv1")) bn1 = norm(conv1, **bn_params("bn1")) relu1 = relu(bn1, name=name + other_postfix) conv2 = conv(relu1, filter=filter // 4, kernel=3, dilate=dilate, **conv_params("conv2")) bn2 = norm(conv2, **bn_params("bn2")) relu2 = relu(bn2, name=name + other_postfix) conv3 = conv(relu2, filter=filter, **conv_params("conv3")) bn3 = norm(conv3, **bn_params("bn3")) if proj: shortcut = conv(input, filter=filter, stride=stride, **conv_params("sc")) shortcut = norm(shortcut, **bn_params("sc_bn")) else: shortcut = input eltwise = add(bn3, shortcut, name=name + "_plus" + other_postfix) return relu(eltwise, name=name + "_relu" + other_postfix)
def _refine_pts(self, cls_feat, reg_feat, dcn_offset, pts_init_out): p = self.p point_conv_channel = p.head.point_conv_channel num_class = p.num_class output_channel = num_class - 1 pts_output_channel = p.point_generate.num_points * 2 cls_conv = mx.symbol.contrib.DeformableConvolution( data=cls_feat, offset=dcn_offset, kernel=(self.dcn_kernel, self.dcn_kernel), pad=(self.dcn_pad, self.dcn_pad), stride=(1, 1), dilate=(1, 1), num_filter=point_conv_channel, weight=self.cls_conv_weight, bias=self.cls_conv_bias, no_bias=False, name="cls_conv") cls_conv_relu = X.relu(cls_conv) cls_out = X.conv(data=cls_conv_relu, kernel=1, filter=output_channel, weight=self.cls_out_weight, bias=self.cls_out_bias, no_bias=False, name="cls_out") pts_refine_conv = mx.symbol.contrib.DeformableConvolution( data=reg_feat, offset=dcn_offset, kernel=(self.dcn_kernel, self.dcn_kernel), pad=(self.dcn_pad, self.dcn_pad), stride=(1, 1), dilate=(1, 1), num_filter=point_conv_channel, weight=self.pts_refine_conv_weight, bias=self.pts_refine_conv_bias, no_bias=False, name="pts_refine_conv") pts_refine_conv_relu = X.relu(pts_refine_conv) pts_refine_out = X.conv(data=pts_refine_conv_relu, kernel=1, filter=pts_output_channel, weight=self.pts_refine_out_weight, bias=self.pts_refine_out_bias, no_bias=False, name="pts_refine_out") pts_refine_out = pts_refine_out + X.block_grad(pts_init_out) return pts_refine_out, cls_out
def _get_bbox_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) flatten = X.flatten(conv_feat, name="bbox_feat_flatten") fc1 = X.fc(flatten, filter=1024, name="bbox_fc1", init=xavier_init) fc1 = X.relu(fc1) fc2 = X.fc(fc1, filter=1024, name="bbox_fc2", init=xavier_init) fc2 = X.relu(fc2) self._head_feat = fc2 return self._head_feat
def _get_bbox_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat from mxnext.backbone.resnet_v2 import Builder unit = Builder.resnet_stage( conv_feat, name="stage4", num_block=3, filter=2048, stride=1, dilate=1, norm_type=self.p.normalizer, norm_mom=0.9, ndev=8 ) bn1 = X.fixbn(unit, name='bn1') relu1 = X.relu(bn1, name='relu1') relu1 = X.to_fp32(relu1, name='c5_to_fp32') pool1 = X.pool(relu1, global_pool=True, name='pool1') self._head_feat = pool1 return self._head_feat
def _get_bbox_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) flatten = X.reshape(conv_feat, shape=(0, -1, 1, 1), name="bbox_feat_reshape") fc1 = X.conv(flatten, filter=1024, name="bbox_fc1", init=xavier_init) fc1 = self.add_norm(fc1) fc1 = X.relu(fc1) fc2 = X.conv(fc1, filter=1024, name="bbox_fc2", init=xavier_init) fc2 = self.add_norm(fc2) fc2 = X.relu(fc2) self._head_feat = fc2 return self._head_feat
def _get_mask_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat up_stride = int(self.pMask.resolution // self.pMaskRoi.out_size) dim_reduced = self.pMask.dim_reduced msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2) current = conv_feat for i in range(4): current = X.conv( current, name="mask_fcn_conv{}".format(i + 1), filter=dim_reduced, kernel=3, no_bias=False, init=msra_init ) current = self.add_norm(current) current = X.relu(current) mask_up = current for i in range(up_stride // 2): weight = X.var( name="mask_up{}_weight".format(i), init=msra_init, lr_mult=1, wd_mult=1) mask_up = mx.sym.Deconvolution( mask_up, kernel=(2, 2), stride=(2, 2), num_filter=dim_reduced, no_bias=False, weight=weight, name="mask_up{}".format(i) ) mask_up = X.relu( mask_up, name="mask_up{}_relu".format(i)) mask_up = X.to_fp32(mask_up, name='mask_up_to_fp32') self._head_feat = mask_up return self._head_feat
def get_output(self, conv_fpn_feat): if self.cls_logit_dict is not None and self.bbox_delta_dict is not None: return self.cls_logit_dict, self.bbox_delta_dict p = self.p num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale) conv_channel = p.head.conv_channel # FPN RPN share weight rpn_conv_weight = X.var('rpn_conv_weight', init=X.gauss(0.01)) rpn_conv_bias = X.var('rpn_conv_bias', init=X.zero_init()) rpn_conv_cls_weight = X.var('rpn_conv_cls_weight', init=X.gauss(0.01)) rpn_conv_cls_bias = X.var('rpn_conv_cls_bias', init=X.zero_init()) rpn_conv_bbox_weight = X.var('rpn_conv_bbox_weight', init=X.gauss(0.01)) rpn_conv_bbox_bias = X.var('rpn_conv_bbox_bias', init=X.zero_init()) cls_logit_dict = {} bbox_delta_dict = {} for stride in p.anchor_generate.stride: rpn_conv = X.conv( conv_fpn_feat['stride%s' % stride], kernel=3, filter=conv_channel, name="rpn_conv_3x3_%s" % stride, no_bias=False, weight=rpn_conv_weight, bias=rpn_conv_bias ) rpn_relu = X.relu(rpn_conv, name='rpn_relu_%s' % stride) if p.fp16: rpn_relu = X.to_fp32(rpn_relu, name="rpn_relu_%s_fp32" % stride) cls_logit = X.conv( rpn_relu, filter=2 * num_base_anchor, name="rpn_cls_score_stride%s" % stride, no_bias=False, weight=rpn_conv_cls_weight, bias=rpn_conv_cls_bias ) bbox_delta = X.conv( rpn_relu, filter=4 * num_base_anchor, name="rpn_bbox_pred_stride%s" % stride, no_bias=False, weight=rpn_conv_bbox_weight, bias=rpn_conv_bbox_bias ) cls_logit_dict[stride] = cls_logit bbox_delta_dict[stride] = bbox_delta self.cls_logit_dict = cls_logit_dict self.bbox_delta_dict = bbox_delta_dict return self.cls_logit_dict, self.bbox_delta_dict
def _get_output(self, mask_pred_logits, conv_feat): num_class = self.pBbox.num_class msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2) normal_init = mx.init.Normal(0.01) kaiming_uniform = mx.init.Xavier(rnd_type='uniform', factor_type='in', magnitude=3) mask_pred_logits = mx.sym.expand_dims(mask_pred_logits, axis=1) iou_head_maxpool_1 = X.pool( mask_pred_logits, name='iou_head_maxpool_1', kernel=2, stride=2, pad=0, ) iou_head_input = X.concat([conv_feat, iou_head_maxpool_1], axis=1, name='iou_head_input') hi = iou_head_input for ii in range(3): hi = X.conv( hi, filter=256, kernel=3, stride=1, name='iou_head_conv_%d'%ii, no_bias=False, init=msra_init, ) hi = X.relu(hi) hi = X.conv( hi, filter=256, kernel=3, stride=2, name='iou_head_conv_3', no_bias=False, init=msra_init ) hi = X.relu(hi) hi = X.flatten(data=hi) fc1 = X.relu(X.fc(hi, filter=1024, name='iou_head_FC1', init=kaiming_uniform)) fc2 = X.relu(X.fc(fc1, filter=1024, name='iou_head_FC2', init=kaiming_uniform)) iou_pred_logits = X.fc(fc2, filter=num_class, name='iou_head_pred', init=normal_init) return iou_pred_logits
def PConvModule(x, out_channels=256, kernel_size=[3, 3, 3], dilation=[1, 1, 1], groups=[1, 1, 1], ibn=None, part_deform=False, PConv_idx=-1, start_level=1, norm=None, bilinear_upsample=None, feat_sizes=None): assert PConv_idx > -1 and feat_sizes is not None name_pref = 'PConv{}_sepc'.format(PConv_idx) sepc0_weight, sepc0_bias = X.var(name=name_pref+'0_weight', init=X.gauss(std=0.01)), X.var(name=name_pref+'0_bias', init=X.zero_init()) sepc1_weight, sepc1_bias = X.var(name=name_pref+'1_weight', init=X.gauss(std=0.01)), X.var(name=name_pref+'1_bias', init=X.zero_init()) sepc2_weight, sepc2_bias = X.var(name=name_pref+'2_weight', init=X.gauss(std=0.01)), X.var(name=name_pref+'2_bias', init=X.zero_init()) sepc0_offset_weight, sepc0_offset_bias = None, None sepc1_offset_weight, sepc1_offset_bias = None, None sepc2_offset_weight, sepc2_offset_bias = None, None if part_deform: # NOTE zero_init for offset's weight and bias sepc0_offset_weight, sepc0_offset_bias = X.var(name=name_pref+'0_offset_weight', init=X.zero_init()), X.var(name=name_pref+'0_offset_bias', init=X.zero_init()) sepc1_offset_weight, sepc1_offset_bias = X.var(name=name_pref+'1_offset_weight', init=X.zero_init()), X.var(name=name_pref+'1_offset_bias', init=X.zero_init()) sepc2_offset_weight, sepc2_offset_bias = X.var(name=name_pref+'2_offset_weight', init=X.zero_init()), X.var(name=name_pref+'2_offset_bias', init=X.zero_init()) norm_func = [] if ibn: assert norm is not None norm_func = partial(norm, name=name_pref+'_ibn') sepc_conv0_func = partial( sepc_conv, name='PConv{}_sepc0_'.format(PConv_idx), out_channels=out_channels, kernel_size=kernel_size[0], stride=1, padding=(kernel_size[0]+(dilation[0]-1)*2)//2, dilation=dilation[0], groups=groups[0], deformable_groups=1, part_deform=part_deform, start_level=start_level, weight=sepc0_weight, bias=sepc0_bias, weight_offset=sepc0_offset_weight, bias_offset=sepc0_offset_bias) sepc_conv1_func = partial( sepc_conv, name='PConv{}_sepc1_'.format(PConv_idx), out_channels=out_channels, kernel_size=kernel_size[1], stride=1, padding=(kernel_size[1]+(dilation[1]-1)*2)//2, dilation=dilation[1], groups=groups[1], deformable_groups=1, part_deform=part_deform, start_level=start_level, weight=sepc1_weight, bias=sepc1_bias, weight_offset=sepc1_offset_weight, bias_offset=sepc1_offset_bias) sepc_conv2_func = partial( sepc_conv, name='PConv{}_sepc2_'.format(PConv_idx), out_channels=out_channels, kernel_size=kernel_size[2], stride=2, padding=(kernel_size[2]+(dilation[2]-1)*2)//2, dilation=dilation[2], groups=groups[2], deformable_groups=1, part_deform=part_deform, start_level=start_level, weight=sepc2_weight, bias=sepc2_bias, weight_offset=sepc2_offset_weight, bias_offset=sepc2_offset_bias) next_x = [] for level, feature in enumerate(x): temp_fea = sepc_conv1_func(i=level, x=feature) if level > 0: tmp = sepc_conv2_func(i=level, x=x[level - 1]) temp_fea = temp_fea + tmp if level < len(x) - 1: tmp_x = sepc_conv0_func(i=level,x=x[level+1]) if bilinear_upsample: tmp_x = mx.contrib.symbol.BilinearResize2D(tmp_x, scale_height=2, scale_width=2, name='PConv{}_upsampling_level{}'.format(PConv_idx,level)) else: tmp_x = mx.sym.UpSampling(tmp_x, scale=2, sample_type='nearest', num_args=1, name='PConv{}_upsampling_level{}'.format(PConv_idx,level)) tmp_x = mx.sym.slice_like(tmp_x, temp_fea) temp_fea = temp_fea + tmp_x next_x.append(temp_fea) if ibn: next_x = ibn_func(next_x, norm_func, feat_sizes) next_x = [relu(item, name='PConv{}_level{}_relu'.format(PConv_idx, level)) for level,item in enumerate(next_x)] return next_x
def resnet_c4c5_factory(cls, depth, use_3x3_conv0, use_bn_preprocess, num_branch, branch_dilates, branch_ids, branch_bn_shared, branch_conv_shared, branch_deform, norm_type="local", norm_mom=0.9, ndev=None, fp16=False): c1, c2, c3, c4, c5 = cls.resnet_factory(depth, use_3x3_conv0, use_bn_preprocess, num_branch, branch_dilates, branch_ids, branch_bn_shared, branch_conv_shared, branch_deform, norm_type, norm_mom, ndev, fp16) c5 = X.fixbn(c5, "bn1") c5 = X.relu(c5) return c4, c5
def SEPCFPN(inputs, out_channels=256, pconv_deform=False, lcconv_deform=None, ibn=None, Pconv_num=4, start_level=1, norm=None, bilinear_upsample=None, feat_sizes=None): assert feat_sizes is not None Pconvs_list = [] for i in range(Pconv_num): Pconvs_list.append(partial( PConvModule, out_channels=out_channels, ibn=ibn, part_deform=pconv_deform, PConv_idx=i, start_level=start_level, norm=norm, bilinear_upsample=bilinear_upsample, feat_sizes=feat_sizes)) if lcconv_deform is not None: assert lcconv_deform in [False, True] lconv_weight, lconv_bias = X.var(name='LConv_weight', init=X.gauss(std=0.01)), X.var(name='LConv_bias',init=X.zero_init()) cconv_weight, cconv_bias = X.var(name='CConv_weight', init=X.gauss(std=0.01)), X.var(name='CConv_bias',init=X.zero_init()) lconv_offset_weight, lconv_offset_bias = None, None cconv_offset_weight, cconv_offset_bias = None, None if lcconv_deform: lconv_offset_weight, lconv_offset_bias=X.var(name='LConv_offset_weight', init=X.zero_init()), X.var(name='LConv_offset_bias', init=X.zero_init()) cconv_offset_weight, cconv_offset_bias=X.var(name='CConv_offset_weight', init=X.zero_init()), X.var(name='CConv_offset_bias', init=X.zero_init()) lconv_func = partial(sepc_conv, name='LConv{}_',out_channels=out_channels, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, deformable_groups=1, part_deform=lcconv_deform, start_level=start_level, weight=lconv_weight, bias=lconv_bias, weight_offset=lconv_offset_weight, bias_offset=lconv_offset_bias) cconv_func = partial(sepc_conv, name='CConv{}_', out_channels=out_channels, kernel_size=3, stride=1, padding=1, dilation=1, groups=1, deformable_groups=1, part_deform=lcconv_deform, start_level=start_level, weight=cconv_weight, bias=cconv_bias, weight_offset=cconv_offset_weight, bias_offset=cconv_offset_bias) if ibn: assert norm is not None lbn = partial(norm, name='lconv_ibn') cbn = partial(norm, name='cconv_ibn') x = inputs for pconv in Pconvs_list: x = pconv(x) if lcconv_deform is None: return x cls_outs = [cconv_func(i=level, x=item) for level, item in enumerate(x)] loc_outs = [lconv_func(i=level, x=item) for level, item in enumerate(x)] if ibn: cls_outs = ibn_func(cls_outs, cbn, feat_sizes) loc_outs = ibn_func(loc_outs, lbn, feat_sizes) outs = [mx.sym.Concat(*[relu(s), relu(l)], num_args=2, dim=1) for s, l in zip(cls_outs, loc_outs)] return outs
def _reg_head(self, conv_feat): num_block = self.p.num_block or 4 for i in range(num_block): conv_feat = X.conv(conv_feat, kernel=3, filter=256, init=X.gauss(0.01), name="bbox_reg_block%s" % (i + 1)) conv_feat = self.add_norm(conv_feat) conv_feat = X.relu(conv_feat) return conv_feat
def _convs_and_fcs(self, x, num_convs, num_fcs, name, conv_init, fc_init): ''' Args: x: [N, C, H, W] feature maps num_convs: int num_fcs: int conv_init: mx initializer Returns: x: [N, C, H, W] or [N, C, 1, 1] ''' if num_convs == 0 and num_fcs == 0: return x out_channels = self.p.TSD.conv_out_channels out_fc_channels = self.p.TSD.fc_out_channels if num_convs > 0: for i in range(num_convs): x = X.relu( X.conv(x, kernel=3, filter=out_channels, no_bias=False, name=name + '_conv%s' % i, init=conv_init)) if num_fcs > 0: x = X.reshape(x, shape=(0, -1, 1, 1), name=name + '_conv_fc_flatten') for i in range(num_fcs): x = X.relu( X.conv(x, kernel=1, filter=out_fc_channels, no_bias=False, name=name + '_fc%s' % i, init=fc_init)) return x
def _cls_subnet(self, conv_feat, stride): p = self.p norm = p.normalizer conv_channel = p.head.conv_channel # classification subset cls_conv1 = X.conv(data=conv_feat, kernel=3, filter=conv_channel, weight=self.cls_conv1_weight, bias=self.cls_conv1_bias, no_bias=False, name="cls_conv1") cls_conv1 = norm(cls_conv1, name="cls_conv1_bn_s{}".format(stride)) cls_conv1_relu = X.relu(cls_conv1) cls_conv2 = X.conv(data=cls_conv1_relu, kernel=3, filter=conv_channel, weight=self.cls_conv2_weight, bias=self.cls_conv2_bias, no_bias=False, name="cls_conv2") cls_conv2 = norm(cls_conv2, name="cls_conv2_bn_s{}".format(stride)) cls_conv2_relu = X.relu(cls_conv2) cls_conv3 = X.conv(data=cls_conv2_relu, kernel=3, filter=conv_channel, weight=self.cls_conv3_weight, bias=self.cls_conv3_bias, no_bias=False, name="cls_conv3") cls_conv3 = norm(cls_conv3, name="cls_conv3_bn_s{}".format(stride)) cls_conv3_relu = X.relu(cls_conv3) if p.fp16: cls_conv3_relu = X.to_fp32(cls_conv3_relu, name="cls_conv3_fp32") return cls_conv3_relu
def _reg_subnet(self, conv_feat, stride): p = self.p norm = p.normalizer conv_channel = p.head.conv_channel # regression subnet reg_conv1 = X.conv(data=conv_feat, kernel=3, filter=conv_channel, weight=self.reg_conv1_weight, bias=self.reg_conv1_bias, no_bias=False, name="reg_conv1") reg_conv1 = norm(reg_conv1, name="reg_conv1_bn_s{}".format(stride)) reg_conv1_relu = X.relu(reg_conv1) reg_conv2 = X.conv(data=reg_conv1_relu, kernel=3, filter=conv_channel, weight=self.reg_conv2_weight, bias=self.reg_conv2_bias, no_bias=False, name="reg_conv2") reg_conv2 = norm(reg_conv2, name="reg_conv2_bn_s{}".format(stride)) reg_conv2_relu = X.relu(reg_conv2) reg_conv3 = X.conv(data=reg_conv2_relu, kernel=3, filter=conv_channel, weight=self.reg_conv3_weight, bias=self.reg_conv3_bias, no_bias=False, name="reg_conv3") reg_conv3 = norm(reg_conv3, name="reg_conv3_bn_s{}".format(stride)) reg_conv3_relu = X.relu(reg_conv3) if p.fp16: reg_conv3_relu = X.to_fp32(reg_conv3_relu, name="reg_conv3_fp32") return reg_conv3_relu
def _get_bbox_head_logit(self, conv_feat): #if self._head_feat is not None: # return self._head_feat stage = self.stage flatten = X.flatten(conv_feat, name="bbox_feat_flatten_" + stage) reshape = X.reshape(flatten, (0, 0, 1, 1), name="bbox_feat_reshape_" + stage) fc1 = X.conv(reshape, filter=1024, weight=self.fc1_weight, name="bbox_fc1_" + stage) fc1_relu = X.relu(fc1, name="bbox_fc1_relu_" + stage) fc2 = X.conv(fc1_relu, filter=1024, weight=self.fc2_weight, name="bbox_fc2_" + stage) fc2_relu = X.relu(fc2, name="bbox_fc2_" + stage) self._head_feat = fc2_relu return self._head_feat
def _init_pts(self, reg_feat): p = self.p point_conv_channel = p.head.point_conv_channel pts_output_channel = p.point_generate.num_points * 2 pts_init_conv = X.conv(data=reg_feat, kernel=3, filter=point_conv_channel, weight=self.pts_init_conv_weight, bias=self.pts_init_conv_bias, no_bias=False, name="pts_init_conv") pts_init_conv_relu = X.relu(pts_init_conv) pts_init_out = X.conv(data=pts_init_conv_relu, kernel=1, filter=pts_output_channel, weight=self.pts_init_out_weight, bias=self.pts_init_out_bias, no_bias=False, name="pts_init_out") return pts_init_out
def resnet_trident_unit(cls, data, name, filter, stride, dilate, proj, norm_type, norm_mom, ndev, branch_ids, branch_bn_shared, branch_conv_shared, branch_deform=False): """ One resnet unit is comprised of 2 or 3 convolutions and a shortcut. :param data: :param name: :param filter: :param stride: :param dilate: :param proj: :param norm_type: :param norm_mom: :param ndev: :param branch_ids: :param branch_bn_shared: :param branch_conv_shared: :param branch_deform: :return: """ if branch_ids is None: branch_ids = range(len(data)) norm = X.normalizer_factory(type=norm_type, ndev=ndev, mom=norm_mom) bn1 = cls.bn_shared(data, name=name + "_bn1", normalizer=norm, branch_ids=branch_ids, share_weight=branch_bn_shared) relu1 = [X.relu(bn) for bn in bn1] conv1 = cls.conv_shared(relu1, name=name + "_conv1", num_filter=filter // 4, kernel=(1, 1), branch_ids=branch_ids, share_weight=branch_conv_shared) bn2 = cls.bn_shared(conv1, name=name + "_bn2", normalizer=norm, branch_ids=branch_ids, share_weight=branch_bn_shared) relu2 = [X.relu(bn) for bn in bn2] if not branch_deform: conv2 = cls.conv_shared(relu2, name=name + "_conv2", num_filter=filter // 4, kernel=(3, 3), pad=dilate, stride=stride, dilate=dilate, branch_ids=branch_ids, share_weight=branch_conv_shared) else: conv2_offset = cls.conv_shared(relu2, name=name + "_conv2_offset", num_filter=72, kernel=(3, 3), pad=(1, 1), stride=(1, 1), dilate=(1, 1), no_bias=False, branch_ids=branch_ids, share_weight=branch_conv_shared) conv2 = cls.deform_conv_shared(relu2, name=name + "_conv2", conv_offset=conv2_offset, num_filter=filter // 4, kernel=(3, 3), pad=dilate, stride=stride, dilate=dilate, num_deformable_group=4, branch_ids=branch_ids, share_weight=branch_conv_shared) bn3 = cls.bn_shared(conv2, name=name + "_bn3", normalizer=norm, branch_ids=branch_ids, share_weight=branch_bn_shared) relu3 = [X.relu(bn) for bn in bn3] conv3 = cls.conv_shared(relu3, name=name + "_conv3", num_filter=filter, kernel=(1, 1), branch_ids=branch_ids, share_weight=branch_conv_shared) if proj: shortcut = cls.conv_shared(relu1, name=name + "_sc", num_filter=filter, kernel=(1, 1), branch_ids=branch_ids, share_weight=branch_conv_shared) else: shortcut = data return [X.add(conv3_i, shortcut_i, name=name + "_plus_branch{}".format(i)) \ for i, conv3_i, shortcut_i in zip(branch_ids, conv3, shortcut)]
def get_retinanet_neck(self, data): norm = self.p.normalizer c2, c3, c4, c5 = data import mxnet as mx xavier_init = mx.init.Xavier(factor_type="avg", rnd_type="uniform", magnitude=3) # P5 p5 = X.conv(data=c5, filter=256, no_bias=False, weight=X.var(name="P5_lateral_weight", init=xavier_init), bias=X.var(name="P5_lateral_bias", init=X.zero_init()), name="P5_lateral") p5_conv = X.conv(data=p5, kernel=3, filter=256, no_bias=False, weight=X.var(name="P5_conv_weight", init=xavier_init), bias=X.var(name="P5_conv_bias", init=X.zero_init()), name="P5_conv") # P4 p5_up = mx.sym.UpSampling(p5, scale=2, sample_type="nearest", name="P5_upsampling", num_args=1) p4_la = X.conv(data=c4, filter=256, no_bias=False, weight=X.var(name="P4_lateral_weight", init=xavier_init), bias=X.var(name="P4_lateral_bias", init=X.zero_init()), name="P4_lateral") p5_clip = mx.sym.slice_like(p5_up, p4_la, name="P4_clip") p4 = mx.sym.add_n(p5_clip, p4_la, name="P4_sum") p4_conv = X.conv(data=p4, kernel=3, filter=256, no_bias=False, weight=X.var(name="P4_conv_weight", init=xavier_init), bias=X.var(name="P4_conv_bias", init=X.zero_init()), name="P4_conv") # P3 p4_up = mx.sym.UpSampling(p4, scale=2, sample_type="nearest", name="P4_upsampling", num_args=1) p3_la = X.conv(data=c3, filter=256, no_bias=False, weight=X.var(name="P3_lateral_weight", init=xavier_init), bias=X.var(name="P3_lateral_bias", init=X.zero_init()), name="P3_lateral") p4_clip = mx.sym.slice_like(p4_up, p3_la, name="P3_clip") p3 = mx.sym.add_n(p4_clip, p3_la, name="P3_sum") p3_conv = X.conv(data=p3, kernel=3, filter=256, no_bias=False, weight=X.var(name="P3_conv_weight", init=xavier_init), bias=X.var(name="P3_conv_bias", init=X.zero_init()), name="P3_conv") # P6 P6 = X.conv(data=c5, kernel=3, stride=2, filter=256, no_bias=False, weight=X.var(name="P6_conv_weight", init=xavier_init), bias=X.var(name="P6_conv_bias", init=X.zero_init()), name="P6_conv") # P7 P6_relu = X.relu(data=P6, name="P6_relu") P7 = X.conv(data=P6_relu, kernel=3, stride=2, filter=256, no_bias=False, weight=X.var(name="P7_conv_weight", init=xavier_init), bias=X.var(name="P7_conv_bias", init=X.zero_init()), name="P7_conv") p3_conv = norm(p3_conv, name="P3_conv_bn") p4_conv = norm(p4_conv, name="P4_conv_bn") p5_conv = norm(p5_conv, name="P5_conv_bn") P6 = norm(P6, name="P6_conv_bn") P7 = norm(P7, name="P7_conv_bn") return p3_conv, p4_conv, p5_conv, P6, P7
def get_retinanet_neck(self, data): if self.neck is not None: return self.neck c2, c3, c4, c5 = data import mxnet as mx xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) # P5 p5 = X.conv(data=c5, filter=256, no_bias=False, weight=X.var(name="P5_lateral_weight", init=xavier_init), bias=X.var(name="P5_lateral_bias", init=X.zero_init()), name="P5_lateral") p5_conv = X.conv(data=p5, kernel=3, filter=256, no_bias=False, weight=X.var(name="P5_conv_weight", init=xavier_init), bias=X.var(name="P5_conv_bias", init=X.zero_init()), name="P5_conv") # P4 p5_up = mx.sym.UpSampling(p5, scale=2, sample_type="nearest", name="P5_upsampling", num_args=1) p4_la = X.conv(data=c4, filter=256, no_bias=False, weight=X.var(name="P4_lateral_weight", init=xavier_init), bias=X.var(name="P4_lateral_bias", init=X.zero_init()), name="P4_lateral") p5_clip = mx.sym.slice_like(p5_up, p4_la, name="P4_clip") p4 = mx.sym.add_n(p5_clip, p4_la, name="P4_sum") p4_conv = X.conv(data=p4, kernel=3, filter=256, no_bias=False, weight=X.var(name="P4_conv_weight", init=xavier_init), bias=X.var(name="P4_conv_bias", init=X.zero_init()), name="P4_conv") # P3 p4_up = mx.sym.UpSampling(p4, scale=2, sample_type="nearest", name="P4_upsampling", num_args=1) p3_la = X.conv(data=c3, filter=256, no_bias=False, weight=X.var(name="P3_lateral_weight", init=xavier_init), bias=X.var(name="P3_lateral_bias", init=X.zero_init()), name="P3_lateral") p4_clip = mx.sym.slice_like(p4_up, p3_la, name="P3_clip") p3 = mx.sym.add_n(p4_clip, p3_la, name="P3_sum") p3_conv = X.conv(data=p3, kernel=3, filter=256, no_bias=False, weight=X.var(name="P3_conv_weight", init=xavier_init), bias=X.var(name="P3_conv_bias", init=X.zero_init()), name="P3_conv") # P6 p6 = X.conv(data=c5, kernel=3, stride=2, filter=256, no_bias=False, weight=X.var(name="P6_conv_weight", init=xavier_init), bias=X.var(name="P6_conv_bias", init=X.zero_init()), name="P6_conv") # P7 p6_relu = X.relu(data=p6, name="P6_relu") p7 = X.conv(data=p6_relu, kernel=3, stride=2, filter=256, no_bias=False, weight=X.var(name="P7_conv_weight", init=xavier_init), bias=X.var(name="P7_conv_bias", init=X.zero_init()), name="P7_conv") self.neck = dict(stride8=p3_conv, stride16=p4_conv, stride32=p5_conv, stride64=p6, stride128=p7) return self.neck
def get_output(self, fpn_conv_feats, roi_feat, rois, is_train): ''' Args: fpn_conv_feats: dict of FPN features, each [batch_image, in_channels, fh, fw] roi_feat: [batch_image * image_roi, 256, roi_size, roi_size] rois: [batch_image, image_roi, 4] is_train: boolean Returns: cls_logit: [batch_image * image_roi, num_class] bbox_delta: [batch_image * image_roi, num_class * 4] tsd_cls_logit: [batch_image * image_roi, num_class] tsd_bbox_delta: [batch_image * image_roi, num_class * 4] delta_c: [batch_image * image_roi, 2*roi_size*roi_size, 1, 1] delta_r: [batch_image * image_roi, 2, 1, 1] ''' xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) # roi_feat: [batch_roi, 256, 7, 7] flatten = X.reshape( roi_feat, shape=(0, -1, 1, 1), name="bbox_feat_reshape") # [batch_roi, 256*7*7, 1, 1] x1 = flatten x2 = X.relu(X.conv(data=x1, kernel=1, filter=256, name="delta_shared_fc1", no_bias=False), name="delta_shared_fc1_relu") # [batch_roi, 256, 1, 1] delta_c = X.relu(X.conv(x2, filter=256, name="delta_c_fc1", init=X.gauss(0.01)), name="delta_c_fc1_relu") # [batch_roi, 256, 1, 1] delta_c = X.conv(delta_c, filter=2 * self.p.roi_size**2, name="delta_c_fc2", init=X.gauss(0.01)) # [batch_roi, 2*7*7, 1, 1] delta_r = X.relu(X.conv(x2, filter=256, name="delta_r_fc1", init=X.gauss(0.01)), name="delta_r_fc1_relu") # [batch_roi, 256, 1, 1] delta_r = X.conv(delta_r, filter=2, name="delta_r_fc2", init=X.gauss(0.01)) # [batch_roi, 2, 1, 1] image_roi = self.p.image_roi if is_train else 1000 batch_image = self.p.batch_image TSD_cls_feats = self.delta_c_pool.get_roi_feature( fpn_conv_feats, rois, delta_c, image_rois=image_roi, batch_image=batch_image) # [batch_roi, 256, 7, 7] TSD_loc_feats = self.delta_r_pool.get_roi_feature( fpn_conv_feats, rois, delta_r, image_rois=image_roi, batch_image=batch_image) # [batch_roi, 256, 7, 7] TSD_x_cls = self._convs_and_fcs( TSD_cls_feats, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='TSD_pc', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_reg = self._convs_and_fcs( TSD_loc_feats, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='TSD_pr', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_cls = self._convs_and_fcs( TSD_x_cls, 0, self.p.TSD.num_cls_fcs, name='TSD_cls', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] TSD_x_reg = self._convs_and_fcs( TSD_x_reg, 0, self.p.TSD.num_reg_fcs, name='TSD_reg', conv_init=xavier_init, fc_init=X.gauss(0.01)) # [batch_roi, batch_roi, 1, 1] num_class = self.p.num_class num_reg_class = 2 if self.p.regress_target.class_agnostic else num_class tsd_cls_logit = X.fc(TSD_x_cls, filter=num_class, name='tsd_cls_logit', init=X.gauss(0.01)) tsd_bbox_delta = X.fc(TSD_x_reg, filter=4 * num_reg_class, name='tsd_reg_delta', init=X.gauss(0.01)) x = self._convs_and_fcs(roi_feat, self.p.TSD.num_shared_convs, self.p.TSD.num_shared_fcs, name='shared_fc', conv_init=xavier_init, fc_init=X.gauss(0.01)) x_cls = x x_reg = x x_cls = self._convs_and_fcs(x_cls, 0, self.p.TSD.num_cls_fcs, name='cls', conv_init=xavier_init, fc_init=X.gauss(0.01)) x_reg = self._convs_and_fcs(x_reg, 0, self.p.TSD.num_reg_fcs, name='reg', conv_init=xavier_init, fc_init=X.gauss(0.01)) cls_logit = X.fc(x_cls, filter=num_class, name='bbox_cls_logit', init=X.gauss(0.01)) bbox_delta = X.fc(x_reg, filter=4 * num_reg_class, name='bbox_reg_delta', init=X.gauss(0.01)) if self.p.fp16: cls_logit = X.to_fp32(cls_logit, name="cls_logits_fp32") bbox_delta = X.to_fp32(bbox_delta, name="bbox_delta_fp32") tsd_cls_logit = X.to_fp32(tsd_cls_logit, name="tsd_cls_logit_fp32") tsd_bbox_delta = X.to_fp32(tsd_bbox_delta, name="tsd_bbox_delta_fp32") delta_c = X.to_fp32(delta_c, name="delta_c_fp32") delta_r = X.to_fp32(delta_r, name="delta_r_fp32") return cls_logit, bbox_delta, tsd_cls_logit, tsd_bbox_delta, delta_c, delta_r
def get_retinanet_neck(data): c2, c3, c4, c5 = data import mxnet as mx xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) # P5 p5 = X.conv(data=c5, filter=256, no_bias=False, weight=X.var(name="P5_lateral_weight", init=xavier_init), bias=X.var(name="P5_lateral_bias", init=X.zero_init()), name="P5_lateral") p5_conv = X.conv(data=p5, kernel=3, filter=256, no_bias=False, weight=X.var(name="P5_conv_weight", init=xavier_init), bias=X.var(name="P5_conv_bias", init=X.zero_init()), name="P5_conv") # P4 p5_up = mx.sym.UpSampling(p5, scale=2, sample_type="nearest", name="P5_upsampling", num_args=1) p4_la = X.conv(data=c4, filter=256, no_bias=False, weight=X.var(name="P4_lateral_weight", init=xavier_init), bias=X.var(name="P4_lateral_bias", init=X.zero_init()), name="P4_lateral") p5_clip = mx.sym.Crop(*[p5_up, p4_la], name="P4_clip") p4 = mx.sym.ElementWiseSum(*[p5_clip, p4_la], name="P4_sum") p4_conv = X.conv(data=p4, kernel=3, filter=256, no_bias=False, weight=X.var(name="P4_conv_weight", init=xavier_init), bias=X.var(name="P4_conv_bias", init=X.zero_init()), name="P4_conv") # P3 p4_up = mx.sym.UpSampling(p4, scale=2, sample_type="nearest", name="P4_upsampling", num_args=1) p3_la = X.conv(data=c3, filter=256, no_bias=False, weight=X.var(name="P3_lateral_weight", init=xavier_init), bias=X.var(name="P3_lateral_bias", init=X.zero_init()), name="P3_lateral") p4_clip = mx.sym.Crop(*[p4_up, p3_la], name="P3_clip") p3 = mx.sym.ElementWiseSum(*[p4_clip, p3_la], name="P3_sum") p3_conv = X.conv(data=p3, kernel=3, filter=256, no_bias=False, weight=X.var(name="P3_conv_weight", init=xavier_init), bias=X.var(name="P3_conv_bias", init=X.zero_init()), name="P3_conv") # P6 P6 = X.conv(data=c5, kernel=3, stride=2, filter=256, no_bias=False, weight=X.var(name="P6_conv_weight", init=xavier_init), bias=X.var(name="P6_conv_bias", init=X.zero_init()), name="P6_conv") # P7 P6_relu = X.relu(data=P6, name="P6_relu") P7 = X.conv(data=P6_relu, kernel=3, stride=2, filter=256, no_bias=False, weight=X.var(name="P7_conv_weight", init=xavier_init), bias=X.var(name="P7_conv_bias", init=X.zero_init()), name="P7_conv") return p3_conv, p4_conv, p5_conv, P6, P7
def get_refine_output(self, roi_feature, cls_logit, bbox_delta, cls_sec_logit, bbox_sec_delta): p = self.p num_class = p.num_class repeat_time = p.repeat_time class_agnostic = p.regress_target.class_agnostic num_reg_class = 2 if class_agnostic else num_class cls_logit = mx.sym.slice_axis(mx.sym.softmax(cls_logit), axis=1, begin=1, end=num_class) cls_sec_logit = mx.sym.slice_axis(mx.sym.softmax(cls_sec_logit), axis=1, begin=1, end=num_class) bbox_delta = mx.sym.slice_axis(bbox_delta, axis=1, begin=4, end=num_reg_class * 4) bbox_sec_delta = mx.sym.slice_axis(bbox_sec_delta, axis=1, begin=4, end=num_reg_class * 4) pred_feat1 = mx.sym.tile(mx.sym.concat(*[bbox_delta, cls_logit], dim=1), reps=(1, repeat_time)) pred_feat2 = mx.sym.tile(mx.sym.concat( *[bbox_sec_delta, cls_sec_logit], dim=1), reps=(1, repeat_time)) refine_feat1 = mx.sym.concat(*[roi_feature, pred_feat1], dim=1) refine_feat2 = mx.sym.concat(*[roi_feature, pred_feat2], dim=1) head_feat1 = X.fc(refine_feat1, filter=1024, weight=self.fc3_weight, bias=self.fc3_bias, name='fc3_conv_refine1') head_feat1 = X.relu(head_feat1) head_feat2 = X.fc(refine_feat2, filter=1024, weight=self.fc3_weight, bias=self.fc3_bias, name='fc3_conv_refine2') head_feat2 = X.relu(head_feat2) refine_cls_logit = X.fc(head_feat1, filter=num_class, name='refine_bbox_cls_logit1', init=X.gauss(0.01)) refine_cls_sec_logit = X.fc(head_feat2, filter=num_class, name='refine_bbox_cls_logit2', init=X.gauss(0.01)) refine_bbox_delta = X.fc(head_feat1, filter=4 * num_reg_class, name='refine_bbox_reg_delta1', init=X.gauss(0.001)) refine_bbox_sec_delta = X.fc(head_feat2, filter=4 * num_reg_class, name='refine_bbox_reg_delta2', init=X.gauss(0.001)) return refine_cls_logit, refine_bbox_delta, refine_cls_sec_logit, refine_bbox_sec_delta
def get_output(self, conv_fpn_feat): if self.cls_logit_dict is not None and self.bbox_delta_dict is not None: return self.cls_logit_dict, self.bbox_delta_dict p = self.p num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale) conv_channel = p.head.conv_channel # FPN RPN share weight rpn_conv_weight = X.var('rpn_conv_weight', init=X.gauss(0.01)) rpn_conv_bias = X.var('rpn_conv_bias', init=X.zero_init()) rpn_conv_gamma = X.var('rpn_conv_gamma') rpn_conv_beta = X.var('rpn_conv_beta') rpn_conv_mmean = X.var('rpn_conv_moving_mean') rpn_conv_mvar = X.var('rpn_conv_moving_var') rpn_conv_cls_weight = X.var('rpn_conv_cls_weight', init=X.gauss(0.01)) rpn_conv_cls_bias = X.var('rpn_conv_cls_bias', init=X.zero_init()) rpn_conv_bbox_weight = X.var('rpn_conv_bbox_weight', init=X.gauss(0.01)) rpn_conv_bbox_bias = X.var('rpn_conv_bbox_bias', init=X.zero_init()) cls_logit_dict = {} bbox_delta_dict = {} for stride in p.anchor_generate.stride: rpn_conv = X.conv( conv_fpn_feat['stride%s' % stride], kernel=3, filter=conv_channel, name="rpn_conv_3x3_%s" % stride, no_bias=False, weight=rpn_conv_weight, bias=rpn_conv_bias ) if p.normalizer.__name__ == "fix_bn": pass elif p.normalizer.__name__ == "sync_bn": rpn_conv = p.normalizer( rpn_conv, gamma=rpn_conv_gamma, beta=rpn_conv_beta, moving_mean=rpn_conv_mmean, moving_var=rpn_conv_mvar, name="rpn_conv_3x3_bn_%s" % stride ) elif p.normalizer.__name__ == "gn": rpn_conv = p.normalizer( rpn_conv, gamma=rpn_conv_gamma, beta=rpn_conv_beta, name="rpn_conv_3x3_gn_%s" % stride ) else: raise NotImplementedError("Unsupported normalizer {}".format(p.normalizer.__name__)) rpn_relu = X.relu(rpn_conv, name='rpn_relu_%s' % stride) if p.fp16: rpn_relu = X.to_fp32(rpn_relu, name="rpn_relu_%s_fp32" % stride) cls_logit = X.conv( rpn_relu, filter=2 * num_base_anchor, name="rpn_cls_score_stride%s" % stride, no_bias=False, weight=rpn_conv_cls_weight, bias=rpn_conv_cls_bias ) bbox_delta = X.conv( rpn_relu, filter=4 * num_base_anchor, name="rpn_bbox_pred_stride%s" % stride, no_bias=False, weight=rpn_conv_bbox_weight, bias=rpn_conv_bbox_bias ) cls_logit_dict[stride] = cls_logit bbox_delta_dict[stride] = bbox_delta self.cls_logit_dict = cls_logit_dict self.bbox_delta_dict = bbox_delta_dict return self.cls_logit_dict, self.bbox_delta_dict