def bottleneck_gn_transformation( model, blob_in, dim_in, dim_out, stride, prefix, dim_inner, dilation=1, group=1 ): """Add a bottleneck transformation with GroupNorm to the model.""" # In original resnet, stride=2 is on 1x1. # In fb.torch resnet, stride=2 is on 3x3. (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride) # conv 1x1 -> GN -> ReLU cur = model.ConvGN( blob_in, prefix + '_branch2a', dim_in, dim_inner, kernel=1, group_gn=get_group_gn(dim_inner), stride=str1x1, pad=0, ) cur = model.Relu(cur, cur) # conv 3x3 -> GN -> ReLU cur = model.ConvGN( cur, prefix + '_branch2b', dim_inner, dim_inner, kernel=3, group_gn=get_group_gn(dim_inner), stride=str3x3, pad=1 * dilation, dilation=dilation, group=group, ) cur = model.Relu(cur, cur) # conv 1x1 -> GN (no ReLU) cur = model.ConvGN( cur, prefix + '_branch2c', dim_inner, dim_out, kernel=1, group_gn=get_group_gn(dim_out), stride=1, pad=0, ) return cur
def add_roi_Xconv1fc_gn_head(model, blob_in, dim_in, spatial_scale): """Add a X conv + 1fc head, with GroupNorm""" hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.ConvGN( current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', {'value': 0.})) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6', 'fc6') return 'fc6', fc_dim
def basic_gn_stem(model, data, **kwargs): """Add a basic ResNet stem (using GN)""" dim = 64 p = model.ConvGN( data, 'conv1', 3, dim, 7, group_gn=get_group_gn(dim), pad=3, stride=2 ) p = model.Relu(p, p) p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2) return p, dim
def mask_rcnn_fcn_head_v1upXconvs_gn( model, blob_in, dim_in, spatial_scale, num_convs ): """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm""" current = model.RoIFeatureTransform( blob_in, blob_out='_mask_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED for i in range(num_convs): current = model.ConvGN( current, '_mask_fcn' + str(i + 1), dim_in, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=('ConstantFill', {'value': 0.}) ) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose( current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner
def basic_gn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride): if dim_in == dim_out: return blob_in # output name is prefix + '_branch1_gn' return model.ConvGN( blob_in, prefix + '_branch1', dim_in, dim_out, kernel=1, group_gn=get_group_gn(dim_out), stride=stride, pad=0, group=1, )
def add_topdown_lateral_module( model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral ): """Add a top-down lateral module.""" # Lateral 1x1 conv if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0) ) else: lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def add_pose_fpn_func(model, fpn_orig54, lateral_pose_numbers=1): """add pose to fpn return [fpn5, fpn4, fpn3_pose, fpn2_pose] """ fpn_dim = cfg.FPN.DIM xavier_fill = ('XavierFill', {}) fpn5, fpn4 = fpn_orig54 blobs_out_fpn = [fpn5, fpn4, 'fpn3_pose', 'fpn2_pose'] # 0, 1, 2, 3 fpn_level_info = fpn_level_info_ResNet50_conv5() lateral_input_blobs = fpn_level_info.blobs # # stop gradient # for i in range(len(lateral_input_blobs)): # model.StopGradient(s, s) fpn_dim_lateral = fpn_level_info.dims middle_blobs_lateral = [ 'fpn_inner_before_pose_{}'.format(s) for s in fpn_level_info.blobs ] blobs_lateral = [] # prepare pose predict blobs hg_pred_NCHW_8 = model.net.NHWC2NCHW('pose_pred_8', 'hg_pred_NCHW_8') model.Relu(hg_pred_NCHW_8, hg_pred_NCHW_8) hg_pred_NCHW_4 = model.net.NHWC2NCHW('pose_pred_4', 'hg_pred_NCHW_4') model.Relu(hg_pred_NCHW_4, hg_pred_NCHW_4) hg_pred_NCHW = [hg_pred_NCHW_8, hg_pred_NCHW_4] for i in range(len(lateral_input_blobs)): if cfg.FPN.USE_GN: # use GroupNorm c = model.ConvGN( lateral_input_blobs[i], middle_blobs_lateral[i], # note: this is a prefix dim_in=fpn_dim_lateral[i], dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0)) middle_blobs_lateral[i] = c # rename it else: model.Conv(lateral_input_blobs[i], middle_blobs_lateral[i], dim_in=fpn_dim_lateral[i], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0)) # add pose blob to lateral for j in range(lateral_pose_numbers): concat_name = 'fpn_inner_pose{}_concat_{}'.format( j, lateral_input_blobs[i]) pose_conv_name = 'fpn_inner_pose{}_conv_{}'.format( j, lateral_input_blobs[i]) if j == 0: hg_pred_NCHW_concat, _ = model.net.Concat( [middle_blobs_lateral[i], hg_pred_NCHW[i]], [concat_name, concat_name + '_info'], axis=1) else: hg_pred_NCHW_concat, _ = model.net.Concat( [blob_in, hg_pred_NCHW[i]], [concat_name, concat_name + '_info'], axis=1) blob_in = model.Conv(hg_pred_NCHW_concat, pose_conv_name, fpn_dim + 16, fpn_dim, kernel=3, stride=1, pad=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) model.Relu(blob_in, blob_in) blobs_lateral.append(blob_in) #####add top-down # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn4, 'fpn4' + '_topdown_pose', scale=2) # Sum lateral and top-down model.net.Sum([blobs_lateral[0], td], 'fpn3_sum_pose') model.Conv('fpn3_sum_pose', blobs_out_fpn[2], fpn_dim, fpn_dim, kernel=3, stride=1, pad=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) td = model.net.UpsampleNearest(blobs_out_fpn[2], 'fpn3' + '_topdown_pose', scale=2) # Sum lateral and top-down model.net.Sum([blobs_lateral[1], td], 'fpn2_sum_pose') model.Conv('fpn2_sum_pose', blobs_out_fpn[3], fpn_dim, fpn_dim, kernel=3, stride=1, pad=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) return blobs_out_fpn
def DenseASPP(model, feature, num_features=2048, is_bn=True): """ * output_scale can only set as 8 or 16 """ dropout0 = 0.1 d_feature0 = 512 d_feature1 = 256 dim_in = num_features blob_out = "input" aspp3 = _DenseAsppBlock(model, feature, 'aspp3', input_num=dim_in, num1=d_feature0, num2=d_feature1, dilation_rate=3, drop_out=dropout0) feature, _ = model.net.Concat( (aspp3, feature), [blob_out + "_aspp3", "_" + blob_out + "_aspp3"], axis=1) aspp6 = _DenseAsppBlock(model, feature, 'aspp6', input_num=dim_in + d_feature1 * 1, num1=d_feature0, num2=d_feature1, dilation_rate=6, drop_out=dropout0) feature, _ = model.net.Concat( (aspp6, feature), [blob_out + "_aspp6", "_" + blob_out + "_aspp6"], axis=1) aspp12 = _DenseAsppBlock(model, feature, 'aspp12', input_num=dim_in + d_feature1 * 2, num1=d_feature0, num2=d_feature1, dilation_rate=12, drop_out=dropout0) feature, _ = model.net.Concat( (aspp12, feature), [blob_out + "_aspp12", "_" + blob_out + "_aspp12"], axis=1) aspp18 = _DenseAsppBlock(model, feature, 'aspp18', input_num=dim_in + d_feature1 * 3, num1=d_feature0, num2=d_feature1, dilation_rate=18, drop_out=dropout0) feature, _ = model.net.Concat( (aspp18, feature), [blob_out + "_aspp18", "_" + blob_out + "_aspp18"], axis=1) aspp24 = _DenseAsppBlock(model, feature, 'aspp24', input_num=dim_in + d_feature1 * 4, num1=d_feature0, num2=d_feature1, dilation_rate=24, drop_out=dropout0) x, _ = model.net.Concat((aspp3, aspp6, aspp12, aspp18, aspp24), [blob_out, "_" + blob_out + "_all"], axis=1) x = model.ConvGN(x, 'dense_aspp_reduce', 5 * d_feature1, 256, kernel=1, group_gn=get_group_gn(256), stride=1, pad=0, dilation=1, group=1) return x
def add_topdown_lateral_module( model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral ): """Add a top-down lateral module.""" # Lateral 1x1 conv if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0) ) else: ''' tmp1 = model.Conv(#########reduce channel /2 fpn_lataral_old, fpn_bottom + '_mid1', dim_in = dim_lateral * 2, dim_out = dim_lateral, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) tmp2 = model.net.UpsampleNearest(tmp1, fpn_bottom + '_mid2', scale=2)##upsample tmp3 = model.net.Sum([tmp2, fpn_lateral], fpn_bottom + '_mid3')##### add lat = model.Conv( tmp3, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) ''' lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def add_downtop_lateral_module( model, fpn_down, fpn_lateral, fpn_top, dim_top, dim_lateral ): if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_top + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0) ) else: lat = model.Conv( fpn_lateral, fpn_top + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) lat = model.Relu(lat, lat + '_relu') if cfg.FPN.USE_GN: # use GroupNorm dt = model.ConvGN( fpn_down, fpn_top + '_down', dim_in=dim_top, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=3, pad=1, stride=2, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0) ) else: dt = model.Conv( fpn_down, fpn_top + '_down', dim_in=dim_top, dim_out=dim_top, kernel=3, pad=1, stride=2, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) dt = model.Relu(dt, dt + '_relu') model.net.Sum([lat, dt], fpn_top)
def add_topdown_lateral_module(model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral): """Add a top-down lateral module.""" # Lateral 1x1 conv if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) else: lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) # ============================================================================= # add pose to lateral if 'res3' in fpn_lateral or 'res2' in fpn_lateral or 'res4' in fpn_lateral: if 'res4' in fpn_lateral: hg_pred_NCHW = model.net.NHWC2NCHW('pose_pred_16', 'hg_pred_NCHW_16') if 'res3' in fpn_lateral: hg_pred_NCHW = model.net.NHWC2NCHW('pose_pred_8', 'hg_pred_NCHW_8') if 'res2' in fpn_lateral: hg_pred_NCHW = model.net.NHWC2NCHW('pose_pred_4', 'hg_pred_NCHW_4') if 'ATR' in cfg.TRAIN.DATASETS[0]: heatmap_dim = 26 else: heatmap_dim = 26 model.Relu(hg_pred_NCHW, hg_pred_NCHW) hg_pred_NCHW_concat, _ = model.net.Concat([lat, hg_pred_NCHW], [ fpn_bottom + '_lateral_pose_concat', fpn_bottom + '_lateral_info' ], axis=1) lat = model.Conv(hg_pred_NCHW_concat, fpn_bottom + '_lateral_pose_concat_conv', dim_top + heatmap_dim, dim_top, kernel=3, stride=1, pad=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) model.Relu(lat, lat) # ============================================================================= # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def add_roi_cascade_Xconv1fc_gn_head(model, blob_in, dim_in, spatial_scale, stage_num): """Add cascade X conv + 1fc head, with GroupNorm""" hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION if model.train: if stage_num == 1: roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat_1st', blob_rois='rois_1st', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.ConvGN(current, 'head_conv' + str(i + 1) + '_1st', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6_1st', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_1st', 'fc6_1st') return ['fc6_1st'], fc_dim elif stage_num == 2: roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat_2nd', blob_rois='rois_2nd', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.ConvGN(current, 'head_conv' + str(i + 1) + '_2nd', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6_2nd', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_2nd', 'fc6_2nd') if not model.train: current_1st_2nd = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current_1st_2nd = model.ConvGNShared( current_1st_2nd, 'head_conv' + str(i + 1) + '_1st_2nd', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, conv_weight_init='head_conv' + str(i + 1) + '_1st_w', conv_bias_init=('ConstantFill', { 'value': 0. }), gn_bias_init='head_conv' + str(i + 1) + '_1st_b', gn_scale_innit='head_conv' + str(i + 1) + '_1st_s') current_1st_2nd = model.Relu(current_1st_2nd, current_1st_2nd) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FCShared(current_1st_2nd, 'fc6_1st_2nd', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_1st_2nd', 'fc6_1st_2nd') return ['fc6_2nd', 'fc6_1st_2nd'], fc_dim # return ['fc6_2nd'], fc_dim elif stage_num == 3: roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat_3rd', blob_rois='rois_3rd', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.ConvGN(current, 'head_conv' + str(i + 1) + '_3rd', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6_3rd', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_3rd', 'fc6_3rd') if not model.train: current_1st_3rd = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current_1st_3rd = model.ConvGNShared( current_1st_3rd, 'head_conv' + str(i + 1) + '_1st_3rd', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, conv_weight_init='head_conv' + str(i + 1) + '_1st_w', conv_bias_init=('ConstantFill', { 'value': 0. }), gn_bias_init='head_conv' + str(i + 1) + '_1st_b', gn_scale_innit='head_conv' + str(i + 1) + '_1st_s') current_1st_3rd = model.Relu(current_1st_3rd, current_1st_3rd) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FCShared(current_1st_3rd, 'fc6_1st_3rd', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_1st_3rd', 'fc6_1st_3rd') current_2nd_3rd = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current_2nd_3rd = model.ConvGNShared( current_2nd_3rd, 'head_conv' + str(i + 1) + '_1st_3rd', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, conv_weight_init='head_conv' + str(i + 1) + '_1st_w', conv_bias_init=('ConstantFill', { 'value': 0. }), gn_bias_init='head_conv' + str(i + 1) + '_2nd_b', gn_scale_innit='head_conv' + str(i + 1) + '_2nd_s') current_2nd_3rd = model.Relu(current_2nd_3rd, current_2nd_3rd) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FCShared(current_2nd_3rd, 'fc6_2nd_3rd', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_2nd_3rd', 'fc6_2nd_3rd') return ['fc6_3rd', 'fc6_1st_3rd', 'fc6_2nd_3rd'], fc_dim
def add_fpn(model, fpn_level_info): """Add FPN connections based on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. logger = logging.getLogger(__name__) logger.info('Creating Feature Map Processing Layers...') logger.info('Implements FPN->Inception->BPA->Post-hoc') fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. num_backbone_stages = ( len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) ) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] ## Added for Inception shuffle lateral_input_blobs_dim_shrunk = [ 'fpn_lateral_dim_shrunk_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] # First Post hoc is OPTIONAL # lateral_input_blobs_dim_shrunk_posthoc = [ # 'fpn_lateral_dim_shrunk_posthoc_{}'.format(s) # for s in fpn_level_info.blobs[:num_backbone_stages] # ] ## Added for Inception shuffle lateral_input_blobs_dim_shrunk_expanded = [ 'fpn_lateral_dim_shrunk_expanded_{}'.format(s) for s in fpn_level_info.blobs[:(num_backbone_stages - 1)] ] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages ] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill', {}) ## Inception lateral - using topdown # STEP 1: # Shrinking channel dimension of Lateral Blobs # Adding lateral connections and upscaling if cfg.FPN.USE_GN: # use GroupNorm c = model.ConvGN( lateral_input_blobs[0], lateral_input_blobs_dim_shrunk[0], # note: this is a prefix dim_in=fpn_dim_lateral[0], dim_out=int(fpn_dim / 2), group_gn=get_group_gn(int(fpn_dim / 2)), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) lateral_input_blobs_dim_shrunk[i] = c # rename it else: model.Conv( lateral_input_blobs[0], lateral_input_blobs_dim_shrunk[0], dim_in=fpn_dim_lateral[0], dim_out=int(fpn_dim / 2), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) for i in range(num_backbone_stages - 1): add_topdown_lateral_module( model, lateral_input_blobs_dim_shrunk[i], # top-down blob lateral_input_blobs[i + 1], # lateral blob lateral_input_blobs_dim_shrunk[i + 1], # next output blob int(fpn_dim / 2), # output dimension fpn_dim_lateral[i + 1] # lateral input dimension ) # Post-hoc scale-specific 3x3 convs : OPTIONAL # for i in range(num_backbone_stages): # if cfg.FPN.USE_GN: # # use GroupNorm # model.ConvGN( # lateral_input_blobs_dim_shrunk[i], # lateral_input_blobs_dim_shrunk_posthoc[i], # dim_in=int(fpn_dim / 2), # dim_out=int(fpn_dim / 2), # group_gn=get_group_gn(int(fpn_dim / 2)), # kernel=3, # pad=1, # stride=1, # weight_init=xavier_fill, # bias_init=const_fill(0.0) # ) # else: # model.Conv( # lateral_input_blobs_dim_shrunk[i], # lateral_input_blobs_dim_shrunk_posthoc[i], # dim_in=int(fpn_dim / 2), # dim_out=int(fpn_dim / 2), # kernel=3, # pad=1, # stride=1, # weight_init=xavier_fill, # bias_init=const_fill(0.0) # ) # STEP 2: # Expanding lateral blobs to same hW size as the second bottom-most layer for i in range(num_backbone_stages - 2): # Format(ip,op,scale) model.net.UpsampleNearest( lateral_input_blobs_dim_shrunk[i], lateral_input_blobs_dim_shrunk_expanded[i], scale= 2**(num_backbone_stages - 2 - i) ) lateral_input_blobs_dim_shrunk_expanded[num_backbone_stages - 2] = lateral_input_blobs_dim_shrunk[num_backbone_stages - 2] # STEP 3: # Concatenate all expanded layers lateral_concat, _ = model.net.Concat( lateral_input_blobs_dim_shrunk_expanded, ['lateral_concat_blob','lateral_concat_blob_dim'], order = 'NCHW' ) # Bottleneck layer to reduce computations # model.Conv( # lateral_concat, # 'lateral_concat_bottled', # dim_in=(fpn_dim / 2) * (num_backbone_stages - 1), # dim_out=int(fpn_dim * num_backbone_stages / 2), # kernel=1, # pad=0, # stride=1, # weight_init=xavier_fill, # bias_init=const_fill(0.0) # ) # STEP 4: # Inception Layer add_fpn_inception_module( model, lateral_concat, # input blob 'inception_out', # output blob int(fpn_dim / 2) * (num_backbone_stages - 1), # input dimension int(fpn_dim / 2), # output dimension 0 # Id number(if multiple blocks are used: use 0,1,2 ...) ) model.net.UpsampleNearest( 'inception_out', 'inception_out_2X', scale= 2 ) output_blobs[num_backbone_stages - 1], _ = model.net.Concat( ['inception_out_2X',lateral_input_blobs_dim_shrunk[num_backbone_stages - 1]], ['fpn_bot_' + str(num_backbone_stages - 1),'fpn_bot_dim_' + str(num_backbone_stages - 1)], order = 'NCHW' ) # STEP 5: # Recursively build up starting from the coarsest level for j in range(num_backbone_stages - 1): if cfg.FPN.USE_GN: # use GroupNorm pyr_lvl = model.ConvGN( output_blobs[num_backbone_stages - 1 - j], output_blobs[num_backbone_stages - 2 - j], dim_in=fpn_dim, dim_out=int(fpn_dim / 2), group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=2, weight_init=('XavierFill', {}), bias_init=const_fill(0.0) ) else: pyr_lvl = model.Conv( output_blobs[num_backbone_stages - 1 - j], output_blobs[num_backbone_stages - 2 - j], dim_in=fpn_dim, dim_out=int(fpn_dim / 2), kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) pyr_lvl_relu = model.Relu(pyr_lvl, 'pyr_lvl_relu' + str(num_backbone_stages - 2 - j)) output_blobs[num_backbone_stages - 2 - j], _ = model.net.Concat( [pyr_lvl_relu,lateral_input_blobs_dim_shrunk[num_backbone_stages - 2 - j]], ['fpn_bot_' + str(num_backbone_stages - 2 - j),'fpn_bot_dim' + str(num_backbone_stages - 2 - j)], order = 'NCHW' ) # Post-hoc scale-specific 3x3 convs: because PANet did so! # And looks logical too blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): if cfg.FPN.USE_GN: # use GroupNorm fpn_blob = model.ConvGN( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) else: fpn_blob = model.Conv( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) blobs_fpn += [fpn_blob] spatial_scales += [fpn_level_info.spatial_scales[i]] # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 subsampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKBONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv( fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales
def add_loop_topdown_module(model, attention_topdown, loop_bu, output_blobs, fpn_dim, dim, num_backbone_stages, tab): att_td = [ '{}_loop_c'.format(s) + str(tab) for s in attention_topdown[:num_backbone_stages] ] lp_bu = [ '{}_loop_c'.format(s) + str(tab) for s in loop_bu[:num_backbone_stages] ] for i in range(num_backbone_stages): td = model.ConvGN( attention_topdown[i], att_td[i], dim_in=fpn_dim, dim_out=dim, group_gn=get_group_gn(dim), kernel=3, pad=1, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) att_td[i] = model.net.Relu(td, td) bu = model.ConvGN( loop_bu[i], lp_bu[i], dim_in=fpn_dim, dim_out=dim, group_gn=get_group_gn(dim), kernel=3, pad=1, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) lp_bu[i] = model.net.Relu(bu, bu) model.net.Mul([att_td[0], lp_bu[0]], output_blobs[0]) for index in range(num_backbone_stages - 1): sum_blobs = [] mul_blob = [] pn_top_up = model.net.UpsampleNearest(output_blobs[index], output_blobs[index] + '_topdown', scale=2) sum_blobs.append(pn_top_up) if index == 0: mul = model.net.Mul([att_td[1], lp_bu[1]], lp_bu[1] + '_mul') model.net.Sum([mul, pn_top_up], output_blobs[1]) else: for i in range(index): re_scale = 2**(1 + index - i) feature_c = model.Conv( output_blobs[i], output_blobs[i] + '_To_' + output_blobs[index + 1] + '_c', dim_in=dim, dim_out=dim, kernel=3, pad=1, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) feature_up = model.net.UpsampleNearest(feature_c, feature_c + '_up', scale=re_scale) if index == 1: sum_blobs.append(feature_up) elif index == 2: mul_blob.append(feature_up) if index == 2: sum_blobs.append( model.net.Mul(mul_blob, lp_bu[index + 1] + '_fuse-mul')) sum_blobs.append( model.net.Mul([att_td[index + 1], lp_bu[index + 1]], lp_bu[index + 1] + '_mul')) model.net.Sum(sum_blobs, output_blobs[index + 1])
def add_fpn(model, fpn_level_info): """Add FPN connections based on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. num_backbone_stages = ( len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) ) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill', {}) # For the coarsest backbone level: 1x1 conv only seeds recursion if cfg.FPN.USE_GN: # use GroupNorm c = model.ConvGN( lateral_input_blobs[0], output_blobs[0], # note: this is a prefix dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) output_blobs[0] = c # rename it else: model.Conv( lateral_input_blobs[0], output_blobs[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) # # Step 1: recursively build down starting from the coarsest backbone level # # For other levels add top-down and lateral connections for i in range(num_backbone_stages - 1): add_topdown_lateral_module( model, output_blobs[i], # top-down blob lateral_input_blobs[i + 1], # lateral blob output_blobs[i + 1], # next output blob fpn_dim, # output dimension fpn_dim_lateral[i + 1] # lateral input dimension ) # Post-hoc scale-specific 3x3 convs blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): if cfg.FPN.USE_GN: # use GroupNorm fpn_blob = model.ConvGN( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) else: fpn_blob = model.Conv( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) blobs_fpn += [fpn_blob] spatial_scales += [fpn_level_info.spatial_scales[i]] # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 subsampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKBONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv( fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales
def add_fpn(model, fpn_level_info): """Add FPN connections based on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. logger = logging.getLogger(__name__) logger.info('Implementing Inception FPN Add_conf...') logger.info('Running on 2 GPUs') logger.info('FPN->Inception->BPA') fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. num_backbone_stages = (len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL)) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] ## Added for Inception shuffle lateral_input_blobs_dim_shrunk = [ 'fpn_lateral_dim_shrunk_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] output_blobs_inception = [ 'fpn_inception_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] output_blobs_inception_tower = [ 'fpn_inception_tower_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill', {}) ## Inception lateral - using topdown # STEP 1: # Shrinking channel dimension of Lateral Blobs # Adding lateral connections and upscaling if cfg.FPN.USE_GN: # use GroupNorm c = model.ConvGN(lateral_input_blobs[0], lateral_input_blobs_dim_shrunk[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0)) lateral_input_blobs_dim_shrunk[0] = c # rename it else: model.Conv(lateral_input_blobs[0], lateral_input_blobs_dim_shrunk[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0)) lateral_input_blobs_dim_shrunk[0] = model.Relu( lateral_input_blobs_dim_shrunk[0], lateral_input_blobs_dim_shrunk[0]) for i in range(num_backbone_stages - 1): add_topdown_lateral_module( model, lateral_input_blobs_dim_shrunk[i], # top-down blob lateral_input_blobs[i + 1], # lateral blob lateral_input_blobs_dim_shrunk[i + 1], # next output blob fpn_dim, # output dimension fpn_dim_lateral[i + 1] # lateral input dimension ) inception_input_reduced = model.Conv( lateral_input_blobs_dim_shrunk[num_backbone_stages - 1], 'inception_input_reduced', dim_in=fpn_dim, dim_out=int(fpn_dim / 2), kernel=1, pad=0, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) # STEP 4: # Inception Layer add_fpn_inception_module( model, 'inception_input_reduced', # input blob 'inception_out', # output blob int(fpn_dim / 2), # input dimension int(fpn_dim / 2), # output dimension 0 # Id number(if multiple blocks are used: use 0,1,2 ...) ) inception_output_expanded = model.Conv('inception_out', 'inception_out_expanded', dim_in=int(fpn_dim / 2), dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) bottom_seed = model.net.Sum([ 'inception_out_expanded', lateral_input_blobs_dim_shrunk[num_backbone_stages - 1] ], 'bottom_seed_bpa') output_blobs[num_backbone_stages - 1] = bottom_seed # Upwards Pyramid collapse # output_blobs[num_backbone_stages - 1], _ = model.net.Concat( # [lateral_input_blobs_dim_shrunk[num_backbone_stages - 1], 'inception_out'], # [output_blobs[num_backbone_stages - 1],'fpn_output_dim_' + str(num_backbone_stages - 1)], # order = 'NCHW' # ) for i in range(num_backbone_stages - 1): add_bottomup_lateral_module( model, output_blobs[num_backbone_stages - 1 - i], # bottom blob lateral_input_blobs_dim_shrunk[num_backbone_stages - 2 - i], # lateral blob output_blobs[num_backbone_stages - 2 - i], # next output blob fpn_dim, # output dimension ) # for j in range(num_backbone_stages - 1): # if cfg.FPN.USE_GN: # # use GroupNorm # pyr_lvl = model.ConvGN( # output_blobs[num_backbone_stages - 1 - j], # output_blobs_inception[num_backbone_stages - 2 - j], # dim_in=fpn_dim, # dim_out=int(fpn_dim / 2), # group_gn=get_group_gn(fpn_dim), # kernel=3, # pad=1, # stride=2, # weight_init=('XavierFill', {}), # bias_init=const_fill(0.0) # ) # else: # pyr_lvl = model.Conv( # output_blobs[num_backbone_stages - 1 - j], # output_blobs_inception[num_backbone_stages - 2 - j], # dim_in=fpn_dim, # dim_out=int(fpn_dim / 2), # kernel=3, # pad=1, # stride=2, # weight_init=xavier_fill, # bias_init=const_fill(0.0) # ) # pyr_lvl_relu = model.Relu(pyr_lvl, 'pyr_lvl_relu' + str(num_backbone_stages - 2 - j)) # output_blobs[num_backbone_stages - 2 - j], _ = model.net.Concat( # [pyr_lvl_relu, lateral_input_blobs_dim_shrunk[num_backbone_stages - 2 - j]], # ['fpn_bot_' + str(num_backbone_stages - 2 - j),'fpn_bot_dim' + str(num_backbone_stages - 2 - j)], # order = 'NCHW' # ) blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): if cfg.FPN.USE_GN: # use GroupNorm fpn_blob = model.ConvGN(output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0)) else: fpn_blob = model.Conv(output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0)) fpn_blob = model.Relu(fpn_blob, fpn_blob + '_relu') blobs_fpn += [fpn_blob] spatial_scales += [fpn_level_info.spatial_scales[i]] # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 subsampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKBONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv(fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0)) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales
def add_fpn_inception_module(model, inception_input, inception_output, dim_in, dim_out, num=0): ##### 1x1 path or l_path ##### if cfg.FPN.USE_GN: # use GroupNorm l_path = model.ConvGN(inception_input, 'l_path_blob_' + str(num), dim_in=dim_in, dim_out=int(dim_out / 4), group_gn=get_group_gn(int(dim_out / 4)), kernel=1, pad=0, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) else: l_path = model.Conv(inception_input, 'l_path_blob_' + str(num), dim_in=dim_in, dim_out=int(dim_out / 4), kernel=1, pad=0, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) l_path_op = model.Relu(l_path, 'l_path_blob_op_' + str(num) + '_relu') ##### 3x3 path or m_path ##### # 1x1 if cfg.FPN.USE_GN: # use GroupNorm m_path_1 = model.ConvGN(inception_input, 'm_path_blob_1_' + str(num), dim_in=dim_in, dim_out=int(dim_out / 4), group_gn=get_group_gn(int(dim_out / 4)), kernel=1, pad=0, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) else: m_path_1 = model.Conv(inception_input, 'm_path_blob_1_' + str(num), dim_in=dim_in, dim_out=int(dim_out / 4), kernel=1, pad=0, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) m_path_1 = model.Relu(m_path_1, 'm_path_blob_1_' + str(num) + '_relu') # 3x3 if cfg.FPN.USE_GN: # use GroupNorm m_path_2 = model.ConvGN(m_path_1, 'm_path_blob_2_' + str(num), dim_in=int(dim_out / 4), dim_out=int(dim_out / 4), group_gn=get_group_gn(int(dim_out / 4)), kernel=3, pad=1, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) else: m_path_2 = model.Conv(m_path_1, 'm_path_blob_2_' + str(num), dim_in=int(dim_out / 4), dim_out=int(dim_out / 4), kernel=3, pad=1, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) m_path_op = model.Relu(m_path_2, 'm_path_blob_op_' + str(num) + '_relu') ##### 5x5 = 2x(3x3) path or s_path ##### # 1x1 - part1 if cfg.FPN.USE_GN: # use GroupNorm s_path_1 = model.ConvGN(inception_input, 's_path_blob_1_' + str(num), dim_in=dim_in, dim_out=int(dim_out / 4), group_gn=get_group_gn(int(dim_out / 4)), kernel=1, pad=0, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) else: s_path_1 = model.Conv(inception_input, 's_path_blob_1_' + str(num), dim_in=dim_in, dim_out=int(dim_out / 4), kernel=1, pad=0, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) s_path_1 = model.Relu(s_path_1, 's_path_blob_1_' + str(num) + '_relu') # 3x3 - part2 if cfg.FPN.USE_GN: # use GroupNorm s_path_2 = model.ConvGN(s_path_1, 's_path_blob_2_' + str(num), dim_in=int(dim_out / 4), dim_out=int(dim_out / 4), group_gn=get_group_gn(dim_out / 4), kernel=3, pad=1, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) else: s_path_2 = model.Conv(s_path_1, 's_path_blob_2_' + str(num), dim_in=int(dim_out / 4), dim_out=int(dim_out / 4), kernel=3, pad=1, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) s_path_2 = model.Relu(s_path_2, 's_path_blob_2_' + str(num) + '_relu') # 3x3 - part3 if cfg.FPN.USE_GN: # use GroupNorm s_path_3 = model.ConvGN(s_path_2, 's_path_blob_3_' + str(num), dim_in=int(dim_out / 4), dim_out=int(dim_out / 4), group_gn=get_group_gn(dim_out / 4), kernel=3, pad=1, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) else: s_path_3 = model.Conv(s_path_2, 's_path_blob_3_' + str(num), dim_in=int(dim_out / 4), dim_out=int(dim_out / 4), kernel=3, pad=1, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) s_path_op = model.Relu(s_path_3, 's_path_blob_op_' + str(num) + '_relu') ##### MaxPool path or xs_path ##### # MaxPool - part1 xs_path_1 = model.MaxPool(inception_input, 'xs_path_blob_1_' + str(num), kernel=3, pad=1, stride=1) # 1x1 - part2 if cfg.FPN.USE_GN: # use GroupNorm xs_path_2 = model.ConvGN(xs_path_1, 'xs_path_blob_2_' + str(num), dim_in=dim_in, dim_out=int(dim_out / 4), group_gn=get_group_gn(int(dim_out / 4)), kernel=1, pad=0, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) else: xs_path_2 = model.Conv(xs_path_1, 'xs_path_blob_2_' + str(num), dim_in=dim_in, dim_out=int(dim_out / 4), kernel=1, pad=0, stride=1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) xs_path_op = model.Relu(xs_path_2, 'xs_path_blob_2_' + str(num) + '_relu') ##### Concat ##### model.net.Concat( [l_path_op, m_path_op, s_path_op, xs_path_op], [inception_output, 'inception_output_fpn_' + str(num) + '_dim'], order='NCHW')
def mask_rcnn_fcn_head_v1upXconvs_gn( model, blob_in, dim_in, spatial_scale, num_convs ): """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm""" current = model.RoIFeatureTransform( blob_in, blob_out='_mask_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED split_i = 0 # to keep track of i for i in range(num_convs - 1): # default-> range(num_convs) # branches out from one layer before the last layer current = model.ConvGN( current, '_[mask]_fcn' + str(i + 1), dim_in, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, dilation=dilation, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=('ConstantFill', {'value': 0.}) ) current = model.Relu(current, current) dim_in = dim_inner split_i = i + 1 # Splitting into branches # Branch 1 - FCN convfcn1 = model.ConvGN( current, '_[mask]_fcn' + str(split_i + 1), dim_inner, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) convfcn1_r = model.Relu(convfcn1, convfcn1) # upsample layer model.ConvTranspose( convfcn1_r, 'conv5_mask_fcn', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask_fcn = model.Relu('conv5_mask_fcn', 'conv5_mask_fcn') # Branch 2 - fc + FCN convfc1 = model.ConvGN( current, '_[mask]_fc' + str(split_i + 1), dim_inner, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) convfc1_r = model.Relu(convfc1, convfc1) # Conv layer to reduce no. of channels to reduce computation convfc2 = model.ConvGN( convfc1_r, '_[mask]_fc' + str(split_i + 2), dim_inner, int(dim_inner / 2), group_gn=get_group_gn(int(dim_inner / 2)), kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) convfc2_r = model.Relu(convfc2, convfc2) # fc layer convfc3 = model.FC( convfc2_r, '_[mask]_fc' + str(split_i + 3), int(dim_inner/2) * cfg.MRCNN.ROI_XFORM_RESOLUTION**2, # 128*14*14 4 * cfg.MRCNN.ROI_XFORM_RESOLUTION**2, # 4*14*14 = 28*28 weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) # Intentional error to stop code and read values in log #model.net.Reshape(3,a) # Reshape fc layer to add to FCN layer of the other branch # Note that this shape is different from the final FCN layer of the other branch model.net.Reshape( ['_[mask]_fc' + str(split_i + 3)], # [Input] ['_[mask]_fc_reshaped', '_[mask]_fc_old_shaped' + str(split_i + 3)], # [Output, old_shape] shape=(-1,1,cfg.MRCNN.ROI_XFORM_RESOLUTION*2,cfg.MRCNN.ROI_XFORM_RESOLUTION*2) # shape = (n,c,h,w) ) # Reshape with 1x1 conv to match shape of the final FCN layer of the other branch # This next step is not recommended, change it when you get a better idea in order to save memory. # TODO: Freeze this layer convfc_mask = model.Conv( '_[mask]_fc_reshaped', '_[mask]_fc_bg_fg', 1, dim_inner, kernel=1, pad=0, stride=1, weight_init=const_fill(1.0), bias_init=const_fill(0.0) ) blob_mask_fc = model.Relu('_[mask]_fc_bg_fg', '_[mask]_fc_bg_fg') # Adding the 2 branches blob_mask = model.net.Sum([blob_mask_fcn, blob_mask_fc],'fc_fusion_mask') return blob_mask, dim_inner
def add_dt_lateral_module( model, fpn_down, fpn_lateral, fpn_top, fpn_dim ): if cfg.FPN.USE_GN: # use GroupNorm lat1 = model.ConvGN( fpn_lateral, fpn_top + '_lateral', dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0) ) else: lat1 = model.Conv( fpn_lateral, fpn_top + '_lateral', dim_in=fpn_dim, dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) lat1 = model.Relu(lat1, lat1 + '_relu') # down-top /2 downsampling if cfg.FPN.USE_GN: # use GroupNorm dt = model.ConvGN( fpn_down, fpn_top + '_down', dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=2, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0) ) else: dt = model.Conv( fpn_down, fpn_top + '_down', dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) dt = model.Relu(dt, dt + '_relu') # Sum lateral and top-down model.net.Sum([lat1, dt], fpn_top)
def add_fpn(model, fpn_level_info): """Add FPN connections base on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() num_backbone_stages = (len(fpn_level_info.blobs) - (min_level - LOWEST_BACKONE_LVL)) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill, {}') # For the coarsest bnackbone level:1x1 conv only seeds recursion if cfg.FPN.USE_GN: # c = model.model.ConGN( lateral_input_blobs[0], output_blobs[0], # note :this ia s prefix dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0)) output_blobs[0] = c # else: model.Conv(lateral_input_blobs[0], output_blobs[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0)) # # Step 1: recursively build down starting from the coarsest backbone level # # For other levels add top-down and lateral connections for i in range(num_backbone_stages - 1): add_topdown_lateral_module(model, output_blobs[i], lateral_input_blobs[i + 1], output_blobs[i + 1], fpn_dim, fpn_dim_lateral[i + 1]) # Post-hoc scale-specific 3x3 convs blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): if cfg.FPN.USE_GN: # use GroupNorm fpn_blob = model.ConvGN(output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0)) else: fpn_blob = model.Conv(output_blobs[i], 'fon_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0)) blobs_fpn += {fpn_blob} spatial_scales += [fpn_level_info.spatial_scales[i]] # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKONE_LVL + 1: P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 sbusampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv(fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0)) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales