def get_nasfpn_neck(self, data): if self.neck is not None: return self.neck dim_reduced = self.p.dim_reduced norm = self.p.normalizer num_stage = self.p.num_stage S0_kernel = self.p.S0_kernel import mxnet as mx xavier_init = mx.init.Xavier(factor_type="avg", rnd_type="uniform", magnitude=3) c2, c3, c4, c5 = data c6 = X.pool(data=c5, name="C6", kernel=2, stride=2, pad=0) c7 = X.pool(data=c5, name="C7", kernel=4, stride=4, pad=0) c_features = [c3, c4, c5, c6, c7] # 0 stage p0_names = ['S0_P3', 'S0_P4', 'S0_P5', 'S0_P6', 'S0_P7'] p_features = self.get_P0_features(c_features, p0_names, dim_reduced, xavier_init, norm, S0_kernel) # stack stage for i in range(num_stage): p_features = self.get_fused_P_feature(p_features, i + 1, dim_reduced, xavier_init, norm) self.neck = dict( stride8=p_features['S{}_P3'.format(num_stage)], stride16=p_features['S{}_P4'.format(num_stage)], stride32=p_features['S{}_P5'.format(num_stage)], stride64=p_features['S{}_P6'.format(num_stage)], stride128=p_features['S{}_P7'.format(num_stage)] ) return self.neck
def get_nasfpn_neck(self, data): dim_reduced = self.dim_reduced norm = self.norm num_stage = self.num_stage import mxnet as mx xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) c2, c3, c4, c5 = data c6 = X.pool(data=c5, name="C6", kernel=3, stride=2, pool_type="max") c7 = X.pool(data=c5, name="C7", kernel=5, stride=4, pool_type="max") c_features = [c3, c4, c5, c6, c7] # the 0 stage p0_names = ['S0_P3', 'S0_P4', 'S0_P5', 'S0_P6', 'S0_P7'] p_features = self.get_P0_features(c_features, p0_names, dim_reduced, xavier_init, norm) # stack stage for i in range(num_stage): p_features = self.get_fused_P_feature(p_features, i + 1, dim_reduced, xavier_init, norm) return p_features['S{}_P3'.format(num_stage)], \ p_features['S{}_P4'.format(num_stage)], \ p_features['S{}_P5'.format(num_stage)], \ p_features['S{}_P6'.format(num_stage)], \ p_features['S{}_P7'.format(num_stage)]
def _get_bbox_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat from mxnext.backbone.resnet_v2 import Builder unit = Builder.resnet_stage( conv_feat, name="stage4", num_block=3, filter=2048, stride=1, dilate=1, norm_type=self.p.normalizer, norm_mom=0.9, ndev=8 ) bn1 = X.fixbn(unit, name='bn1') relu1 = X.relu(bn1, name='relu1') relu1 = X.to_fp32(relu1, name='c5_to_fp32') pool1 = X.pool(relu1, global_pool=True, name='pool1') self._head_feat = pool1 return self._head_feat
def _get_output(self, mask_pred_logits, conv_feat): num_class = self.pBbox.num_class msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2) normal_init = mx.init.Normal(0.01) kaiming_uniform = mx.init.Xavier(rnd_type='uniform', factor_type='in', magnitude=3) mask_pred_logits = mx.sym.expand_dims(mask_pred_logits, axis=1) iou_head_maxpool_1 = X.pool( mask_pred_logits, name='iou_head_maxpool_1', kernel=2, stride=2, pad=0, ) iou_head_input = X.concat([conv_feat, iou_head_maxpool_1], axis=1, name='iou_head_input') hi = iou_head_input for ii in range(3): hi = X.conv( hi, filter=256, kernel=3, stride=1, name='iou_head_conv_%d'%ii, no_bias=False, init=msra_init, ) hi = X.relu(hi) hi = X.conv( hi, filter=256, kernel=3, stride=2, name='iou_head_conv_3', no_bias=False, init=msra_init ) hi = X.relu(hi) hi = X.flatten(data=hi) fc1 = X.relu(X.fc(hi, filter=1024, name='iou_head_FC1', init=kaiming_uniform)) fc2 = X.relu(X.fc(fc1, filter=1024, name='iou_head_FC2', init=kaiming_uniform)) iou_pred_logits = X.fc(fc2, filter=num_class, name='iou_head_pred', init=normal_init) return iou_pred_logits
def _get_bbox_head_logit(self, conv_feat): if self._head_feat is not None: return self._head_feat from mxnext.backbone.resnext import Builder unit = Builder.resnext_stage(conv_feat, name="stage4", num_block=3, filter=2048, stride=1, dilate=1, num_group=self.p.num_group, norm_type=self.p.normalizer, norm_mom=0.9, ndev=8) pool1 = X.pool(unit, global_pool=True, name='pool1') self._head_feat = pool1 return self._head_feat
def get_fpg_neck(self, data): if self.neck is not None: return self.neck dim_reduced = self.p.dim_reduced norm = self.p.normalizer num_stage = self.p.num_stage S0_kernel = self.p.S0_kernel import mxnet as mx xavier_init = mx.init.Xavier(factor_type="avg", rnd_type="uniform", magnitude=3) c2, c3, c4, c5 = data c6 = X.pool(data=c5, name='C6', kernel=3, stride=2, pad=1) # NOTE c_features = [c2, c3, c4, c5, c6] # 0 stage p0_names = ['S0_P2', 'S0_P3', 'S0_P4', 'S0_P5', 'S0_P6'] p_features = self.get_P0_features(c_features, p0_names, dim_reduced, xavier_init, norm, S0_kernel) self.feature_grids.append(p_features) # stack stage for i in range(num_stage): self.get_fused_P_feature(i + 1, dim_reduced, xavier_init, norm) self.neck = dict( stride4=self.feature_grids[-1][0], #P2 stride8=self.feature_grids[-1][1], #P3 stride16=self.feature_grids[-1][2], #P4 stride32=self.feature_grids[-1][3], #P5 stride64=self.feature_grids[-1][4], #P6 ) return self.neck
def get_fused_P_feature(p_features, stage, dim_reduced, init, norm): prefix = "S{}_".format(stage) with mx.name.Prefix(prefix): P3_0 = p_features['S{}_P3'.format(stage - 1)] # s8 P4_0 = p_features['S{}_P4'.format(stage - 1)] # s16 P5_0 = p_features['S{}_P5'.format(stage - 1)] # s32 P6_0 = p_features['S{}_P6'.format(stage - 1)] # s64 P7_0 = p_features['S{}_P7'.format(stage - 1)] # s128 # P4_1 = gp(P6_0, P4_0) P6_0_to_P4 = mx.sym.UpSampling(P6_0, scale=4, sample_type='nearest', name="P6_0_to_P4", num_args=1) P6_0_to_P4 = mx.sym.slice_like(P6_0_to_P4, P4_0) P4_1 = merge_gp(P6_0_to_P4, P4_0, name="gp_P6_0_P4_0") P4_1 = reluconvbn(P4_1, dim_reduced, init, norm, name="P4_1", prefix=prefix) # P4_2 = sum(P4_0, P4_1) P4_2 = merge_sum(P4_0, P4_1, name="sum_P4_0_P4_1") P4_2 = reluconvbn(P4_2, dim_reduced, init, norm, name="P4_2", prefix=prefix) # P3_3 = sum(P4_2, P3_0) end node P4_2_to_P3 = mx.sym.UpSampling(P4_2, scale=2, sample_type='nearest', name="P4_2_to_P3", num_args=1) P4_2_to_P3 = mx.sym.slice_like(P4_2_to_P3, P3_0) P3_3 = merge_sum(P4_2_to_P3, P3_0, name="sum_P4_2_P3_0") P3_3 = reluconvbn(P3_3, dim_reduced, init, norm, name="P3_3", prefix=prefix) P3 = P3_3 # P4_4 = sum(P4_2, P3_3) end node P3_3_to_P4 = X.pool(P3_3, name="P3_3_to_P4", kernel=2, stride=2, pad=0) P3_3_to_P4 = mx.sym.slice_like(P3_3_to_P4, P4_2) P4_4 = merge_sum(P4_2, P3_3_to_P4, name="sum_P4_4_P3_3") P4_4 = reluconvbn(P4_4, dim_reduced, init, norm, name="P4_4", prefix=prefix) P4 = P4_4 # P5_5 = sum(gp(P4_4, P3_3), P5_0) end node P4_4_to_P5 = X.pool(P4_4, kernel=2, stride=2, name="P4_4_to_P5", pad=0) P4_4_to_P5 = mx.sym.slice_like(P4_4_to_P5, P5_0) P3_3_to_P5 = X.pool(P3_3, kernel=4, stride=4, name="P3_3_to_P5", pad=0) P3_3_to_P5 = mx.sym.slice_like(P3_3_to_P5, P5_0) gp_P4_4_P3_3 = merge_gp(P4_4_to_P5, P3_3_to_P5, name="gp_P4_4_P3_3") P5_5 = merge_sum(gp_P4_4_P3_3, P5_0, name="sum_[gp_P4_4_P3_3]_P5_0") P5_5 = reluconvbn(P5_5, dim_reduced, init, norm, name="P5_5", prefix=prefix) P5 = P5_5 # P7_6 = sum(gp(P5_5, P4_2), P7_0) end node P4_2_to_P7 = X.pool(P4_2, name="P4_2_to_P7", kernel=8, stride=8, pad=0) P4_2_to_P7 = mx.sym.slice_like(P4_2_to_P7, P7_0) P5_5_to_P7 = X.pool(P5_5, name="P5_5_to_P7", kernel=4, stride=4, pad=0) P5_5_to_P7 = mx.sym.slice_like(P5_5_to_P7, P7_0) gp_P5_5_P4_2 = merge_gp(P5_5_to_P7, P4_2_to_P7, name="gp_P5_5_P4_2") P7_6 = merge_sum(gp_P5_5_P4_2, P7_0, name="sum_[gp_P5_5_P4_2]_P7_0") P7_6 = reluconvbn(P7_6, dim_reduced, init, norm, name="P7_6", prefix=prefix) P7 = P7_6 # P6_7 = gp(P7_6, P5_5) end node P7_6_to_P6 = mx.sym.UpSampling(P7_6, scale=2, sample_type='nearest', name="P7_6_to_P6", num_args=1) P7_6_to_P6 = mx.sym.slice_like(P7_6_to_P6, P6_0) P5_5_to_P6 = X.pool(P5_5, name="p5_5_to_P6", kernel=2, stride=2, pad=0) P5_5_to_P6 = mx.sym.slice_like(P5_5_to_P6, P6_0) P6_7 = merge_gp(P7_6_to_P6, P5_5_to_P6, name="gp_P7_6_to_P6_P5_5_to_P6") P6_7 = reluconvbn(P6_7, dim_reduced, init, norm, name="P6_7", prefix=prefix) P6 = P6_7 return { 'S{}_P3'.format(stage): P3, 'S{}_P4'.format(stage): P4, 'S{}_P5'.format(stage): P5, 'S{}_P6'.format(stage): P6, 'S{}_P7'.format(stage): P7 }
def get_fused_P_feature(p_features, stage, dim_reduced, init, norm): prefix = "S{}_".format(stage) with mx.name.Prefix(prefix): P3_0 = p_features['S{}_P3'.format(stage - 1)] # s8 P4_0 = p_features['S{}_P4'.format(stage - 1)] # s16 P5_0 = p_features['S{}_P5'.format(stage - 1)] # s32 P6_0 = p_features['S{}_P6'.format(stage - 1)] # s64 P7_0 = p_features['S{}_P7'.format(stage - 1)] # s128 # P7_1 P7_1 = P7_0 # P6_1 = sum(P6_0, P7_1) P7_1_to_P6 = mx.sym.UpSampling(P7_1, scale=2, sample_type='nearest', name="P7_1_to_P6", num_args=1) P6_1 = X.merge_sum([P6_0, P7_1_to_P6], name="sum_P6_0_P7_1") P6_1 = X.reluconvbn(P6_1, dim_reduced, init, norm, name="P6_1", prefix=prefix) # P5_1 = sum(P5_0, P6_1) P6_1_to_P5 = mx.sym.UpSampling(P6_1, scale=2, sample_type='nearest', name="P6_1_to_P5", num_args=1) P5_1 = X.merge_sum([P5_0, P6_1_to_P5], name="sum_P5_0_P6_1") P5_1 = X.reluconvbn(P5_1, dim_reduced, init, norm, name="P5_1", prefix=prefix) # P4_1 = sum(P4_0, P5_1) P5_1_to_P4 = mx.sym.UpSampling(P5_1, scale=2, sample_type='nearest', name="P5_1_to_P4", num_args=1) P4_1 = X.merge_sum([P4_0, P5_1_to_P4], name="sum_P4_0_P5_1") P4_1 = X.reluconvbn(P4_1, dim_reduced, init, norm, name="P4_1", prefix=prefix) # P3_1 = sum(P3_0, P4_1) P4_1_to_P3 = mx.sym.UpSampling(P4_1, scale=2, sample_type='nearest', name="P4_1_to_P3", num_args=1) P3_1 = X.merge_sum([P3_0, P4_1_to_P3], name="sum_P3_0_P4_1") P3_1 = X.reluconvbn(P3_1, dim_reduced, init, norm, name="P3_1", prefix=prefix) P3_2 = P3_1 P3 = P3_2 # P4_2 = sum(P3_2, P4_1) P3_2_to_P4 = X.pool(P3_2, name="P3_2_to_P4", kernel=2, stride=2, pad=0) P4_2 = X.merge_sum([P4_1, P3_2_to_P4], name="sum_P4_1_P3_2") P4_2 = X.reluconvbn(P4_2, dim_reduced, init, norm, name="P4_2", prefix=prefix) P4 = P4_2 # P5_2 = sum(P4_2, P5_1) P4_2_to_P5 = X.pool(P4_2, name="P4_2_to_P5", kernel=2, stride=2, pad=0) P5_2 = X.merge_sum([P5_1, P4_2_to_P5], name="sum_P5_1_P4_2") P5_2 = X.reluconvbn(P5_2, dim_reduced, init, norm, name="P5_2", prefix=prefix) P5 = P5_2 # P6_2 = sum(P5_2, P6_1) P5_2_to_P6 = X.pool(P5_2, name="P5_2_to_P6", kernel=2, stride=2, pad=0) P6_2 = X.merge_sum([P6_1, P5_2_to_P6], name="sum_P6_1_P5_2") P6_2 = X.reluconvbn(P6_2, dim_reduced, init, norm, name="P6_2", prefix=prefix) P6 = P6_2 # P7_2 = sum(P6_2, P7_1) P6_2_to_P7 = X.pool(P6_2, name="P6_2_to_P7", kernel=2, stride=2, pad=0) P7_2 = X.merge_sum([P7_1, P6_2_to_P7], name="sum_P7_1_P6_2") P7_2 = X.reluconvbn(P7_2, dim_reduced, init, norm, name="P7_2", prefix=prefix) P7 = P7_2 return { 'S{}_P3'.format(stage): P3, 'S{}_P4'.format(stage): P4, 'S{}_P5'.format(stage): P5, 'S{}_P6'.format(stage): P6, 'S{}_P7'.format(stage): P7 }
def get_fused_P_feature(p_features, stage, dim_reduced, init, norm): prefix = "S{}_".format(stage) with mx.name.Prefix(prefix): P2_0 = p_features['S{}_P2'.format(stage - 1)] # s4 P3_0 = p_features['S{}_P3'.format(stage - 1)] # s8 P4_0 = p_features['S{}_P4'.format(stage - 1)] # s16 P5_0 = p_features['S{}_P5'.format(stage - 1)] # s32 P6_0 = p_features['S{}_P6'.format(stage - 1)] # s64 P6_1 = P6_0 P6_1_to_P5 = mx.sym.UpSampling(P6_1, scale=2, sample_type='nearest', name="P6_1_to_P5", num_args=1) P6_1_to_P5 = mx.sym.slice_like(P6_1_to_P5, P5_0) P5_1 = X.merge_sum([P5_0, P6_1_to_P5], name="sum_P5_0_P6_1") P5_1 = X.reluconvbn(P5_1, dim_reduced, init, norm, name="P5_1", prefix=prefix) # P4_1 = sum(P4_0, P5_1) P5_1_to_P4 = mx.sym.UpSampling(P5_1, scale=2, sample_type='nearest', name="P5_1_to_P4", num_args=1) P5_1_to_P4 = mx.sym.slice_like(P5_1_to_P4, P4_0) P4_1 = X.merge_sum([P4_0, P5_1_to_P4], name="sum_P4_0_P5_1") P4_1 = X.reluconvbn(P4_1, dim_reduced, init, norm, name="P4_1", prefix=prefix) P4_1_to_P3 = mx.sym.UpSampling(P4_1, scale=2, sample_type='nearest', name="P4_1_to_P3", num_args=1) P4_1_to_P3 = mx.sym.slice_like(P4_1_to_P3, P3_0) P3_1 = X.merge_sum([P3_0, P4_1_to_P3], name="sum_P3_0_P4_1") P3_1 = X.reluconvbn(P3_1, dim_reduced, init, norm, name="P3_1", prefix=prefix) P3_1_to_P2 = mx.sym.UpSampling(P3_1, scale=2, sample_type='nearest', name="P3_1_to_P2", num_args=1) P3_1_to_P2 = mx.sym.slice_like(P3_1_to_P2, P2_0) P2_1 = X.merge_sum([P2_0, P3_1_to_P2], name="sum_P2_0_P3_1") P2_1 = X.reluconvbn(P2_1, dim_reduced, init, norm, name="P2_1", prefix=prefix) P2_2 = P2_1 P2 = P2_2 P2_2_to_P3 = X.pool(P2_2, name="P2_2_to_P3", kernel=3, stride=2, pad=1) P3_2 = X.merge_sum([P3_1, P2_2_to_P3], name="sum_P3_1_P2_2") P3_2 = X.reluconvbn(P3_2, dim_reduced, init, norm, name="P3_2", prefix=prefix) P3 = P3_2 P3_2_to_P4 = X.pool(P3_2, name="P3_2_to_P4", kernel=3, stride=2, pad=1) P4_2 = X.merge_sum([P4_1, P3_2_to_P4], name="sum_P4_1_P3_2") P4_2 = X.reluconvbn(P4_2, dim_reduced, init, norm, name="P4_2", prefix=prefix) P4 = P4_2 P4_2_to_P5 = X.pool(P4_2, name="P4_2_to_P5", kernel=3, stride=2, pad=1) P5_2 = X.merge_sum([P5_1, P4_2_to_P5], name="sum_P5_1_P4_2") P5_2 = X.reluconvbn(P5_2, dim_reduced, init, norm, name="P5_2", prefix=prefix) P5 = P5_2 P5_2_to_P6 = X.pool(P5_2, name="P5_2_to_P6", kernel=3, stride=2, pad=1) P6_2 = X.merge_sum([P6_1, P5_2_to_P6], name="sum_P6_1_P5_2") P6_2 = X.reluconvbn(P6_2, dim_reduced, init, norm, name="P6_2", prefix=prefix) P6 = P6_2 return { 'S{}_P2'.format(stage): P2, 'S{}_P3'.format(stage): P3, 'S{}_P4'.format(stage): P4, 'S{}_P5'.format(stage): P5, 'S{}_P6'.format(stage): P6, }
def fpn_conv_down(self, data): if self.fpn_feat: return self.fpn_feat c2, c3, c4, c5 = data if self.p.fp16: c2 = X.to_fp32(c2, name="c2_to_fp32") c3 = X.to_fp32(c3, name="c3_to_fp32") c4 = X.to_fp32(c4, name="c4_to_fp32") c5 = X.to_fp32(c5, name="c5_to_fp32") xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) # P5 p5 = X.conv(data=c5, filter=256, no_bias=False, weight=X.var(name="P5_lateral_weight", init=xavier_init), bias=X.var(name="P5_lateral_bias", init=X.zero_init()), name="P5_lateral") p5_conv = X.conv(data=p5, kernel=3, filter=256, no_bias=False, weight=X.var(name="P5_conv_weight", init=xavier_init), bias=X.var(name="P5_conv_bias", init=X.zero_init()), name="P5_conv") # P4 p5_up = mx.sym.UpSampling(p5, scale=2, sample_type="nearest", name="P5_upsampling", num_args=1) p4_la = X.conv(data=c4, filter=256, no_bias=False, weight=X.var(name="P4_lateral_weight", init=xavier_init), bias=X.var(name="P4_lateral_bias", init=X.zero_init()), name="P4_lateral") p5_clip = mx.sym.Crop(*[p5_up, p4_la], name="P4_clip") p4 = mx.sym.ElementWiseSum(*[p5_clip, p4_la], name="P4_sum") p4_conv = X.conv(data=p4, kernel=3, filter=256, no_bias=False, weight=X.var(name="P4_conv_weight", init=xavier_init), bias=X.var(name="P4_conv_bias", init=X.zero_init()), name="P4_conv") # P3 p4_up = mx.sym.UpSampling(p4, scale=2, sample_type="nearest", name="P4_upsampling", num_args=1) p3_la = X.conv(data=c3, filter=256, no_bias=False, weight=X.var(name="P3_lateral_weight", init=xavier_init), bias=X.var(name="P3_lateral_bias", init=X.zero_init()), name="P3_lateral") p4_clip = mx.sym.Crop(*[p4_up, p3_la], name="P3_clip") p3 = mx.sym.ElementWiseSum(*[p4_clip, p3_la], name="P3_sum") p3_conv = X.conv(data=p3, kernel=3, filter=256, no_bias=False, weight=X.var(name="P3_conv_weight", init=xavier_init), bias=X.var(name="P3_conv_bias", init=X.zero_init()), name="P3_conv") # P2 p3_up = mx.sym.UpSampling(p3, scale=2, sample_type="nearest", name="P3_upsampling", num_args=1) p2_la = X.conv(data=c2, filter=256, no_bias=False, weight=X.var(name="P2_lateral_weight", init=xavier_init), bias=X.var(name="P2_lateral_bias", init=X.zero_init()), name="P2_lateral") p3_clip = mx.sym.Crop(*[p3_up, p2_la], name="P2_clip") p2 = mx.sym.ElementWiseSum(*[p3_clip, p2_la], name="P2_sum") p2_conv = X.conv(data=p2, kernel=3, filter=256, no_bias=False, weight=X.var(name="P2_conv_weight", init=xavier_init), bias=X.var(name="P2_conv_bias", init=X.zero_init()), name="P2_conv") # P6 p6 = X.pool(p5_conv, name="P6_subsampling", kernel=1, stride=2, pad=0, pool_type='max') if self.p.fp16: p6 = X.to_fp16(p6, name="p6_to_fp16") p5_conv = X.to_fp16(p5_conv, name="p5_conv_to_fp16") p4_conv = X.to_fp16(p4_conv, name="p4_conv_to_fp16") p3_conv = X.to_fp16(p3_conv, name="p3_conv_to_fp16") p2_conv = X.to_fp16(p2_conv, name="p2_conv_to_fp16") conv_fpn_feat = dict() conv_fpn_feat.update({ "stride64": p6, "stride32": p5_conv, "stride16": p4_conv, "stride8": p3_conv, "stride4": p2_conv }) self.fpn_feat = conv_fpn_feat return self.fpn_feat