def bottleneck_gn_transformation(model, blob_in, dim_in, dim_out, stride, prefix, dim_inner, dilation=1, group=1): """Add a bottleneck transformation with GroupNorm to the model.""" # In original resnet, stride=2 is on 1x1. # In fb.torch resnet, stride=2 is on 3x3. (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride) # conv 1x1 -> GN -> ReLU cur = model.ConvGN( blob_in, prefix + '_branch2a', dim_in, dim_inner, kernel=1, group_gn=get_group_gn(dim_inner), stride=str1x1, pad=0, ) cur = model.Relu(cur, cur) # conv 3x3 -> GN -> ReLU cur = model.ConvGN( cur, prefix + '_branch2b', dim_inner, dim_inner, kernel=3, group_gn=get_group_gn(dim_inner), stride=str3x3, pad=1 * dilation, dilation=dilation, group=group, ) cur = model.Relu(cur, cur) # conv 1x1 -> GN (no ReLU) cur = model.ConvGN( cur, prefix + '_branch2c', dim_inner, dim_out, kernel=1, group_gn=get_group_gn(dim_out), stride=1, pad=0, ) return cur
def add_roi_Xconv1fc_gn_head(model, blob_in, dim_in, spatial_scale): """Add a X conv + 1fc head, with GroupNorm""" hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.ConvGN( current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', {'value': 0.})) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6', 'fc6') return 'fc6', fc_dim
def basic_gn_stem(model, data, **kwargs): """Add a basic ResNet stem (using GN)""" dim = 64 p = model.ConvGN(data, 'conv1', 3, dim, 7, group_gn=get_group_gn(dim), pad=3, stride=2) p = model.Relu(p, p) p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2) return p, dim
def mask_rcnn_fcn_head_v1upXconvs_gn(model, blob_in, dim_in, spatial_scale, num_convs): """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm""" current = model.RoIFeatureTransform( blob_in, blob_out='_mask_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED for i in range(num_convs): current = model.ConvGN(current, '_mask_fcn' + str(i + 1), dim_in, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose(current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner
def basic_gn_shortcut(model, prefix, blob_in, dim_in, dim_out, stride): if dim_in == dim_out: return blob_in # output name is prefix + '_branch1_gn' return model.ConvGN( blob_in, prefix + '_branch1', dim_in, dim_out, kernel=1, group_gn=get_group_gn(dim_out), stride=stride, pad=0, group=1, )
def add_topdown_lateral_module( model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral ): """Add a top-down lateral module.""" # Lateral 1x1 conv if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0) ) else: lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def add_fpn(model, fpn_level_info): """Add FPN connections based on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. num_backbone_stages = ( len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) ) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill', {}) # For the coarsest backbone level: 1x1 conv only seeds recursion if cfg.FPN.USE_GN: # use GroupNorm c = model.ConvGN( lateral_input_blobs[0], output_blobs[0], # note: this is a prefix dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) output_blobs[0] = c # rename it else: model.Conv( lateral_input_blobs[0], output_blobs[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) # # Step 1: recursively build down starting from the coarsest backbone level # # For other levels add top-down and lateral connections for i in range(num_backbone_stages - 1): add_topdown_lateral_module( model, output_blobs[i], # top-down blob lateral_input_blobs[i + 1], # lateral blob output_blobs[i + 1], # next output blob fpn_dim, # output dimension fpn_dim_lateral[i + 1] # lateral input dimension ) # Post-hoc scale-specific 3x3 convs blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): if cfg.FPN.USE_GN: # use GroupNorm fpn_blob = model.ConvGN( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) else: fpn_blob = model.Conv( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) blobs_fpn += [fpn_blob] spatial_scales += [fpn_level_info.spatial_scales[i]] # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 subsampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKBONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv( fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales
def add_roi_cascade_Xconv1fc_gn_head(model, blob_in, dim_in, spatial_scale, stage_num): """Add cascade X conv + 1fc head, with GroupNorm""" hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION if model.train: if stage_num == 1: roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat_1st', blob_rois='rois_1st', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.ConvGN( current, 'head_conv' + str(i + 1) + '_1st', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', {'value': 0.})) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6_1st', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_1st', 'fc6_1st') return ['fc6_1st'], fc_dim elif stage_num == 2: roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat_2nd', blob_rois='rois_2nd', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.ConvGN( current, 'head_conv' + str(i + 1) + '_2nd', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', {'value': 0.})) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6_2nd', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_2nd', 'fc6_2nd') if not model.train: current_1st_2nd = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current_1st_2nd = model.ConvGNShared( current_1st_2nd, 'head_conv' + str(i + 1) + '_1st_2nd', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, conv_weight_init='head_conv' + str(i + 1) + '_1st_w', conv_bias_init=('ConstantFill', {'value': 0.}), gn_bias_init='head_conv' + str(i + 1) + '_1st_b', gn_scale_innit='head_conv' + str(i + 1) + '_1st_s') current_1st_2nd = model.Relu(current_1st_2nd, current_1st_2nd) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FCShared(current_1st_2nd, 'fc6_1st_2nd', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_1st_2nd', 'fc6_1st_2nd') return ['fc6_2nd', 'fc6_1st_2nd'], fc_dim # return ['fc6_2nd'], fc_dim elif stage_num == 3: roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat_3rd', blob_rois='rois_3rd', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.ConvGN( current, 'head_conv' + str(i + 1) + '_3rd', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, weight_init=('MSRAFill', {}), bias_init=('ConstantFill', {'value': 0.})) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6_3rd', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_3rd', 'fc6_3rd') if not model.train: current_1st_3rd = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current_1st_3rd = model.ConvGNShared( current_1st_3rd, 'head_conv' + str(i + 1) + '_1st_3rd', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, conv_weight_init='head_conv' + str(i + 1) + '_1st_w', conv_bias_init=('ConstantFill', {'value': 0.}), gn_bias_init='head_conv' + str(i + 1) + '_1st_b', gn_scale_innit='head_conv' + str(i + 1) + '_1st_s') current_1st_3rd = model.Relu(current_1st_3rd, current_1st_3rd) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FCShared(current_1st_3rd, 'fc6_1st_3rd', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_1st_3rd', 'fc6_1st_3rd') current_2nd_3rd = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current_2nd_3rd = model.ConvGNShared( current_2nd_3rd, 'head_conv' + str(i + 1) + '_1st_3rd', dim_in, hidden_dim, 3, group_gn=get_group_gn(hidden_dim), stride=1, pad=1, conv_weight_init='head_conv' + str(i + 1) + '_1st_w', conv_bias_init=('ConstantFill', {'value': 0.}), gn_bias_init='head_conv' + str(i + 1) + '_2nd_b', gn_scale_innit='head_conv' + str(i + 1) + '_2nd_s' ) current_2nd_3rd = model.Relu(current_2nd_3rd, current_2nd_3rd) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FCShared(current_2nd_3rd, 'fc6_2nd_3rd', dim_in * roi_size * roi_size, fc_dim) model.Relu('fc6_2nd_3rd', 'fc6_2nd_3rd') return ['fc6_3rd', 'fc6_1st_3rd', 'fc6_2nd_3rd'], fc_dim