def add_topdown_lateral_module(model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral): """Add a top-down lateral module.""" # Lateral 1x1 conv if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) else: lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale): """Add RPN outputs to a single scale model (i.e., no FPN).""" anchors = generate_anchors(stride=1. / spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS) num_anchors = anchors.shape[0] dim_out = dim_in # RPN hidden representation model.Conv(blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('conv_rpn', 'conv_rpn') # Proposal classification scores model.Conv('conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Proposal bbox regression deltas model.Conv('conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs') model.GenerateProposals(['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'], ['rpn_rois', 'rpn_roi_probs'], anchors=anchors, spatial_scale=spatial_scale) if cfg.MODEL.FASTER_RCNN: if model.train: # Add op that generates training labels for in-network RPN proposals model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info']) else: # Alias rois to rpn_rois for inference model.net.Alias('rpn_rois', 'rois')
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): if dim_reduce is not None: # Optional dim reduction blob_in = model.Conv(blob_in, 'conv_dim_reduce', dim_in, dim_reduce, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) blob_in = model.Relu(blob_in, blob_in) dim_in = dim_reduce # Classification conv model.Conv(blob_in, 'conv_cls', dim_in, model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # # Bounding-box regression conv num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.Conv(blob_in, 'conv_bbox_pred', dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Classification PS RoI pooling model.net.PSRoIPool(['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=model.num_classes, spatial_scale=spatial_scale) model.AveragePool('psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE) model.net.Reshape('cls_score_4d', ['cls_score', '_cls_scores_shape'], shape=(-1, cfg.MODEL.NUM_CLASSES)) if not model.train: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Bbox regression PS RoI pooling model.net.PSRoIPool(['conv_bbox_pred', 'rois'], ['psroipooled_bbox', '_mapping_channel_bbox'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale) model.AveragePool('psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE)
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') model.FC(blob_in, 'bbox_pred', dim, model.num_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) if cfg.MODEL.ATTR and model.train: in_dim = dim if cfg.MODEL.CLS_EMBED: # first slice the fc7 feature model.net.SelectFG([blob_in, 'fg_idx'], 'fc7_fg') model.create_param(param_name='class_embedding', initializer=initializers.Initializer( "GaussianFill", std=0.01), shape=[model.num_classes, 256]) # op that just takes the class index and returns the corresponding row model.net.Embed(['class_embedding', 'labels_int32_fg'], 'embed_fg') # then do concatenation model.net.Concat(['fc7_fg', 'embed_fg'], ['concat_attr', 'concat_split'], axis=1) in_dim += 256 else: model.net.SelectFG([blob_in, 'fg_idx'], 'concat_attr') model.FC('concat_attr', 'fc_attr', in_dim, 512, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('fc_attr', 'fc_attr') model.FC('fc_attr', 'attr_score', 512, model.num_attributes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0))
def mask_rcnn_fcn_head_v0up(model, blob_in, dim_in, spatial_scale): """v0up design: conv5, deconv 2x2 (no weight sharing with the box head).""" blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks( model, blob_in, dim_in, spatial_scale ) dim_reduced = cfg.MRCNN.DIM_REDUCED model.ConvTranspose( blob_conv5, 'conv5_mask', dim_conv5, dim_reduced, kernel=2, pad=0, stride=2, weight_init=('GaussianFill', {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_reduced
def add_refine_net_mask_outputs(model, blob_in, dim_in): """ add Refine Net output blob_in: 'refine_mask_net_feat' blob_out: 'refined_mask_logits' or 'refined_mask_probs' """ num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = (cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill') blob_out = model.Conv(blob_in, 'refined_mask_logits', dim_in, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, { 'std': 0.001 }), bias_init=const_fill(0.0)) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'refined_mask_probs') return blob_out
def fcn_head_v1up4convs(model, blob_in, dim_in, spatial_scale, num_convs=4): dilation = cfg.FCN.DILATION dim_inner = cfg.FCN.DIM_REDUCED for i in range(num_convs): current = model.Conv( blob_in, '_[mask]_fcn' + str(i + 1), dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.FCN.CONV_INIT, {'std': 0.001}), bias_init=('ConstantFill', {'value': 0.}) ) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose( current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.FCN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner
def mask_rcnn_hourglass_head(model, blob_in, dim_in, spatial_scale): current = model.RoIFeatureTransform( blob_in, blob_out='_[mask]_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) prefix = 'mask_head_hg' n = cfg.MRCNN.NUM_HG_MODULES current, dim_inner = Hourglass.add_hourglass_head(model, current, 'mask_head_hg_out', dim_in, prefix, n) # upsample layer model.ConvTranspose(current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner
def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale): """Use a ResNet "conv5" / "stage5" head for mask prediction. Weights and computation are shared with the conv5 box head. Computation can only be shared during training, since inference is cascaded. v0upshare design: conv5, convT 2x2. """ # Since box and mask head are shared, these must match assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION if model.train: # share computation with bbox head at training time dim_conv5 = 2048 blob_conv5 = model.net.SampleAs(['res5_2_sum', 'roi_has_mask_int32'], ['_[mask]_res5_2_sum_sliced']) else: # re-compute at test time blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks( model, blob_in, dim_in, spatial_scale) dim_reduced = cfg.MRCNN.DIM_REDUCED blob_mask = model.ConvTranspose( blob_conv5, 'conv5_mask', dim_conv5, dim_reduced, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), # std only for gauss bias_init=const_fill(0.0)) model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_reduced
def add_semantic_segms_outputs(model, blob_in, dim): """Add Mask R-CNN specific outputs: either mask logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = (cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill') blob_out = model.Conv(blob_in, 'semantic_segms_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, { 'std': 0.001 }), bias_init=const_fill(0.0)) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') return blob_out
def add_boundary_rcnn_outputs(model, blob_in, dim): """Add Mask R-CNN specific outputs: either boundary logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES if cfg.BOUNDARY.CLS_SPECIFIC_MASK else 1 if cfg.BOUNDARY.USE_FC_OUTPUT: # Predict boundarys with a fully connected layer (ignore 'fcn' in the blob # name) blob_out = model.FC( blob_in, 'boundary_fcn_logits', dim, num_cls * cfg.BOUNDARY.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) else: # Predict boundary using Conv # Use GaussianFill for class-agnostic boundary prediction; fills based on # fan-in can be too large in this case and cause divergence fill = ( cfg.BOUNDARY.CONV_INIT if cfg.BOUNDARY.CLS_SPECIFIC_MASK else 'GaussianFill' ) blob_out = model.Conv( blob_in, 'boundary_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, {'std': 0.001}), bias_init=const_fill(0.0) ) if cfg.BOUNDARY.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation( 'boundary_fcn_logits', 'boundary_fcn_logits_up', num_cls, num_cls, cfg.BOUNDARY.UPSAMPLE_RATIO ) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'boundary_fcn_probs') return blob_out
def add_mask_rcnn_outputs(model, blob_in, dim): """Add Mask R-CNN specific outputs: either mask logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 if cfg.MRCNN.USE_FC_OUTPUT: # Predict masks with a fully connected layer (ignore 'fcn' in the blob # name) blob_out = model.FC( blob_in, 'mask_fcn_logits', dim, num_cls * cfg.MRCNN.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = ( cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill' ) blob_out = model.Conv( blob_in, 'mask_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, {'std': 0.001}), bias_init=const_fill(0.0) ) if cfg.MRCNN.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation( 'mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls, cfg.MRCNN.UPSAMPLE_RATIO ) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') return blob_out
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') model.FC(blob_in, 'bbox_pred', dim, model.num_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0))
def add_refine_net_head_isolate(model, blob_in, dim_in, prefix): """ Function that abstracts away different choices of fcn model. Note that the refine head is free of indicator type. """ # note that prefix must be 'mask' or 'keypoint' assert prefix in {'mask', 'keypoint'}, \ 'prefix must be mask/keypoints' blob_out = 'refine_' + prefix + '_net_feat' if cfg.REFINENET.HEAD == 'HOURGLASS': n = cfg.REFINENET.NUM_HG_MODULES current, dim_inner = Hourglass.add_hourglass_head( model, blob_in, 'refined_hg_out', dim_in, prefix, n) # upsample layer model.ConvTranspose(current, blob_out, dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) return blob_out, dim_inner elif cfg.REFINENET.HEAD == 'MRCNN_FCN': # Use similar heads as Mask head, but changed the names. # Note that this head occupies huge GPU memories(~7GB for batch 512). num_convs = cfg.REFINENET.MRCNN_FCN.NUM_CONVS use_deconv = cfg.REFINENET.MRCNN_FCN.USE_DECONV blob_out, dim_out = add_fcn_head(model, blob_in, blob_out, dim_in, prefix, num_convs, use_deconv) return blob_out, dim_out elif cfg.REFINENET.HEAD == 'RESNET_FCN': # Use resnet-like structures as the head, this should be memory # efficiency. (~ 1GB for batch 512) n_downsampling = cfg.REFINENET.RESNET_FCN.NUM_DOWNSAMPLING_LAYERS num_res_blocks = cfg.REFINENET.RESNET_FCN.NUM_RES_BLOCKS use_deconv = cfg.REFINENET.RESNET_FCN.USE_DECONV blob_out, dim_out = add_resnet_head(model, blob_in, blob_out, dim_in, prefix, n_downsampling, num_res_blocks, use_deconv) return blob_out, dim_out elif cfg.REFINENET.HEAD == 'KRCNN': # Use keypoint rcnn like head blob_out, dim_out = add_krcnn_head(model, blob_in, blob_out, dim_in, prefix) return blob_out, dim_out else: raise NotImplementedError('{} not supported'.format( cfg.REFINENET.HEAD))
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" model.FC( blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') model.FC( blob_in, 'bbox_pred', dim, model.num_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def add_topdown_lateral_module( model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral ): """Add a top-down lateral module.""" # Lateral 1x1 conv lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def add_prn_outputs(model, blob_in, dim): """Add RoI classification output ops.""" blob_out = model.FC(blob_in, 'prn_logits', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add sigmoid when testing; during training the sigmoid is # combined with the label cross entropy loss for numerical stability blob_out = model.net.Sigmoid('prn_logits', 'prn_probs', engine='CUDNN') return blob_out
def add_mlp_outputs(model, blob_in, dim): """Add classification ops.""" model.FC( blob_in, 'logits', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('logits', 'cls_prob', engine='CUDNN')
def mask_rcnn_fcn_head_v1upXconvs_gn(model, blob_in, dim_in, spatial_scale, num_convs): """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm""" current = model.RoIFeatureTransform( blob_in, blob_out='_mask_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED for i in range(num_convs): current = model.ConvGN(current, '_mask_fcn' + str(i + 1), dim_in, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose(current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner
def boundary_rcnn_fcn_head_v1upXconvs( model, blob_in, dim_in, spatial_scale, num_convs ): """v1upXconvs design: X * (conv 3x3), convT 2x2.""" current = model.RoIFeatureTransform( blob_in, blob_out='_[boundary]_roi_feat', blob_rois='boundary_rois', method=cfg.BOUNDARY.ROI_XFORM_METHOD, resolution=cfg.BOUNDARY.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.BOUNDARY.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) dilation = cfg.BOUNDARY.DILATION dim_inner = cfg.BOUNDARY.DIM_REDUCED for i in range(num_convs): current = model.Conv( current, '_[boundary]_fcn' + str(i + 1), dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.BOUNDARY.CONV_INIT, {'std': 0.001}), bias_init=('ConstantFill', {'value': 0.}) ) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose( current, 'conv5_boundary', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.BOUNDARY.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) blob_boundary = model.Relu('conv5_boundary', 'conv5_boundary') return blob_boundary, dim_inner
def mask_rcnn_fcn_head_v1upXconvs( model, blob_in, dim_in, spatial_scale, num_convs ): """v1upXconvs design: X * (conv 3x3), convT 2x2.""" current = model.RoIFeatureTransform( blob_in, blob_out='_[mask]_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED for i in range(num_convs): current = model.Conv( current, '_[mask]_fcn' + str(i + 1), dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=('ConstantFill', {'value': 0.}) ) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose( current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner
def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale): """Use a ResNet "conv5" / "stage5" head for mask prediction. Weights and computation are shared with the conv5 box head. Computation can only be shared during training, since inference is cascaded. v0upshare design: conv5, convT 2x2. """ # Since box and mask head are shared, these must match assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION if model.train: # share computation with bbox head at training time dim_conv5 = 2048 blob_conv5 = model.net.SampleAs( ['res5_2_sum', 'roi_has_mask_int32'], ['_[mask]_res5_2_sum_sliced'] ) else: # re-compute at test time blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks( model, blob_in, dim_in, spatial_scale ) dim_reduced = cfg.MRCNN.DIM_REDUCED blob_mask = model.ConvTranspose( blob_conv5, 'conv5_mask', dim_conv5, dim_reduced, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), # std only for gauss bias_init=const_fill(0.0) ) model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_reduced
def add_pan_bottom_up_path_lateral(model, pan_level_info): """Add PAN connections based on the model described in the PAN paper.""" # PAN levels are built starting from the finest level of the FPN. # First we recurisvely constructing higher resolution FPN levels. # In details: # N2 = P2, # N3 = Conv(Conv(N2, 3x3, s=2) + P3, 3x3, s=1) # N4 = Conv(Conv(N3, 3x3, s=2) + P4, 3x3, s=1) # N5 = Conv(Conv(N4, 3x3, s=2) + P5, 3x3, s=1) # It seems there is no higher level than N5 (i.e. P5) in PAN pan_dim = cfg.PAN.DIM xavier_fill = ('XavierFill', {}) num_backbone_stages = ( len(pan_level_info.blobs)# - (min_level - LOWEST_BACKBONE_LVL) ) fpn_input_blobs = pan_level_info.blobs pan_blobs = [ 'pan_{}'.format(s) for s in pan_level_info.blobs ] spatial_scales = [ sp for sp in pan_level_info.spatial_scales ] pan_dim_lateral = pan_level_info.dims # For the finest FPN level: N2 = P2 only seeds recursion pan_blobs[0] = pan_level_info.blobs[0] # For other levels add bottom-up path for i in range(num_backbone_stages - 1): # Buttom-up 3x3 subsample conv subsample = model.Conv( pan_blobs[i], pan_blobs[i] + '_sub', dim_in=pan_dim, dim_out=pan_dim_lateral[i], kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) model.Relu(subsample, subsample) # Sum lateral and buttom-up subsampled conv model.net.Sum([subsample, fpn_input_blobs[i + 1]], pan_blobs[i] + '_sum') # Post-hoc scale-specific 3x3 convs pan_blob = model.Conv( pan_blobs[i] + '_sum', pan_blobs[i + 1], dim_in=pan_dim_lateral[i], dim_out=pan_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) model.Relu(pan_blob, pan_blob) return pan_blobs, pan_dim, spatial_scales
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b' ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b' ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b' ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) rpn_cls_probs_fpn = model.net.Sigmoid( rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl ) model.GenerateProposals( [UnscopeGPUName(rpn_cls_probs_fpn._name), UnscopeGPUName(rpn_bbox_pred_fpn._name), 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc )
def add_fpn(model, fpn_level_info): """Add FPN connections based on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. num_backbone_stages = ( len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) ) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill', {}) # For the coarsest backbone level: 1x1 conv only seeds recursion if cfg.FPN.USE_GN: # use GroupNorm c = model.ConvGN( lateral_input_blobs[0], output_blobs[0], # note: this is a prefix dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) output_blobs[0] = c # rename it else: model.Conv( lateral_input_blobs[0], output_blobs[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) # # Step 1: recursively build down starting from the coarsest backbone level # # For other levels add top-down and lateral connections for i in range(num_backbone_stages - 1): add_topdown_lateral_module( model, output_blobs[i], # top-down blob lateral_input_blobs[i + 1], # lateral blob output_blobs[i + 1], # next output blob fpn_dim, # output dimension fpn_dim_lateral[i + 1] # lateral input dimension ) # Post-hoc scale-specific 3x3 convs blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): if cfg.FPN.USE_GN: # use GroupNorm fpn_blob = model.ConvGN( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) else: fpn_blob = model.Conv( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) blobs_fpn += [fpn_blob] spatial_scales += [fpn_level_info.spatial_scales[i]] # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 subsampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKBONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv( fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales
def add_semantic_segms_head(model, blob_in, dim_in): dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED num_convs = cfg.SEMANTIC_NET.NUM_CONVS use_deconv = cfg.SEMANTIC_NET.USE_DECONV current = blob_in for i in range(num_convs - 1): current = model.Conv(current, 'semantic_segms_fcn' + str(i + 1), dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner if use_deconv: current = model.Conv(current, 'semantic_segms_fcn' + str(num_convs), dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) # upsample layer current = model.ConvTranspose(current, 'semantic_segms_feature', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) else: current = model.Conv(current, 'semantic_segms_feature', dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) blob_mask = model.Relu(current, current) return blob_mask, dim_inner
def add_keypoint_outputs(model, blob_in, dim): """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.""" # NxKxHxW upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1) if cfg.KRCNN.USE_DECONV: # Apply ConvTranspose to the feature representation; results in 2x # upsampling blob_in = model.ConvTranspose( blob_in, 'kps_deconv', dim, cfg.KRCNN.DECONV_DIM, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu('kps_deconv', 'kps_deconv') dim = cfg.KRCNN.DECONV_DIM if upsample_heatmap: blob_name = 'kps_score_lowres' else: blob_name = 'kps_score' if cfg.KRCNN.USE_DECONV_OUTPUT: # Use ConvTranspose to predict heatmaps; results in 2x upsampling blob_out = model.ConvTranspose( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) else: # Use Conv to predict heatmaps; does no upsampling blob_out = model.Conv( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=1, pad=0, stride=1, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) if upsample_heatmap: # Increase heatmap output size via bilinear upsampling blob_out = model.BilinearInterpolation( blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE ) return blob_out
def add_keypoint_outputs(model, blob_in, dim): """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.""" # NxKxHxW upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1) if cfg.KRCNN.USE_DECONV: # Apply ConvTranspose to the feature representation; results in 2x # upsampling blob_in = model.ConvTranspose(blob_in, 'kps_deconv', dim, cfg.KRCNN.DECONV_DIM, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('kps_deconv', 'kps_deconv') dim = cfg.KRCNN.DECONV_DIM if upsample_heatmap: blob_name = 'kps_score_lowres' else: blob_name = 'kps_score' if cfg.KRCNN.USE_DECONV_OUTPUT: # Use ConvTranspose to predict heatmaps; results in 2x upsampling blob_out = model.ConvTranspose(blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.KRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) else: # Use Conv to predict heatmaps; does no upsampling blob_out = model.Conv(blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=1, pad=0, stride=1, weight_init=(cfg.KRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) if upsample_heatmap: # Increase heatmap output size via bilinear upsampling blob_out = model.BilinearInterpolation(blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE) return blob_out
def add_fcn_head(model, blob_in, blob_out, dim_in, prefix, num_convs, use_deconv): dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED current = blob_in for i in range(num_convs - 1): current = model.Conv(current, prefix + '_[refined_mask]_fcn' + str(i + 1), dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner if use_deconv: current = model.Conv(current, prefix + '_[refined_mask]_fcn' + str(num_convs), dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner model.ConvTranspose(current, blob_out, dim_in, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) else: model.Conv(current, blob_out, dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) blob_out = model.Relu(blob_out, blob_out) dim_out = dim_inner return blob_out, dim_out
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): if dim_reduce is not None: # Optional dim reduction blob_in = model.Conv( blob_in, 'conv_dim_reduce', dim_in, dim_reduce, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) blob_in = model.Relu(blob_in, blob_in) dim_in = dim_reduce # Classification conv model.Conv( blob_in, 'conv_cls', dim_in, model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # # Bounding-box regression conv num_bbox_reg_classes = ( 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes ) model.Conv( blob_in, 'conv_bbox_pred', dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Classification PS RoI pooling model.net.PSRoIPool( ['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=model.num_classes, spatial_scale=spatial_scale ) model.AveragePool( 'psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE ) model.net.Reshape( 'cls_score_4d', ['cls_score', '_cls_scores_shape'], shape=(-1, cfg.MODEL.NUM_CLASSES) ) if not model.train: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Bbox regression PS RoI pooling model.net.PSRoIPool( ['conv_bbox_pred', 'rois'], ['psroipooled_bbox', '_mapping_channel_bbox'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale ) model.AveragePool( 'psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE )
def add_fpn(model, fpn_level_info): """Add FPN connections based on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. num_backbone_stages = ( len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) ) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill', {}) # For the coarest backbone level: 1x1 conv only seeds recursion model.Conv( lateral_input_blobs[0], output_blobs[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) # # Step 1: recursively build down starting from the coarsest backbone level # # For other levels add top-down and lateral connections for i in range(num_backbone_stages - 1): add_topdown_lateral_module( model, output_blobs[i], # top-down blob lateral_input_blobs[i + 1], # lateral blob output_blobs[i + 1], # next output blob fpn_dim, # output dimension fpn_dim_lateral[i + 1] # lateral input dimension ) # Post-hoc scale-specific 3x3 convs blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): fpn_blob = model.Conv( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) blobs_fpn += [fpn_blob] spatial_scales += [fpn_level_info.spatial_scales[i]] # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 subsampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKBONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv( fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale): """Add RPN outputs to a single scale model (i.e., no FPN).""" anchors = generate_anchors( stride=1. / spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS ) num_anchors = anchors.shape[0] dim_out = dim_in # RPN hidden representation model.Conv( blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu('conv_rpn', 'conv_rpn') # Proposal classification scores model.Conv( 'conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas model.Conv( 'conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs') model.GenerateProposals( ['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'], ['rpn_rois', 'rpn_roi_probs'], anchors=anchors, spatial_scale=spatial_scale ) if cfg.MODEL.FASTER_RCNN: if model.train: # Add op that generates training labels for in-network RPN proposals model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info']) else: # Alias rois to rpn_rois for inference model.net.Alias('rpn_rois', 'rois')
def add_mask_rcnn_outputs(model, blob_in, dim): """Add Mask R-CNN specific outputs: either mask logits or probs.""" dim_out = 1 if cfg.MRCNN.BBOX2MASK.BBOX2MASK_ON: # Use weight transfer function iff BBOX2MASK_ON is True # Decide the input to the of weight transfer function # - Case 1) From a pre-trained embedding vector (e.g. GloVe) # - Case 2) From the detection weights in the box head if cfg.MRCNN.BBOX2MASK.USE_PRETRAINED_EMBED: # Case 1) From a pre-trained embedding vector (e.g. GloVe) class_embed = cfg.MRCNN.BBOX2MASK.PRETRAINED_EMBED_NAME class_embed_dim = cfg.MRCNN.BBOX2MASK.PRETRAINED_EMBED_DIM # This parameter is meant to be initialized from a pretrained model # instead of learned from scratch. Hence, the default init is HUGE # to cause NaN loss so that the error will not pass silently. model.AddParameter(model.param_init_net.GaussianFill( [], class_embed, shape=[dim_out, class_embed_dim], std=1e12)) # Pretrained embedding should be fixed during training (it doesn't # make sense to update them) model.StopGradient(class_embed, class_embed + '_no_grad') class_embed = class_embed + '_no_grad' else: # Case 2) From the detection weights in the box head # - Subcase a) using cls+box # - Subcase b) using cls # - Subcase c) using box # where 'cls' is RoI classification weights 'cls_score_w' # and 'box' is bounding box regression weights 'bbox_pred_w' if (cfg.MRCNN.BBOX2MASK.INCLUDE_CLS_SCORE and cfg.MRCNN.BBOX2MASK.INCLUDE_BBOX_PRED): # Subcase a) using cls+box concat_cls_score_bbox_pred(model) class_embed = 'cls_score_bbox_pred' class_embed_dim = 1024 + 4096 elif cfg.MRCNN.BBOX2MASK.INCLUDE_CLS_SCORE: # Subcase b) using cls class_embed = 'cls_score_w' class_embed_dim = 1024 elif cfg.MRCNN.BBOX2MASK.INCLUDE_BBOX_PRED: # Subcase c) using box; 'bbox_pred_w' need to be flattened model.net.Reshape( 'bbox_pred_w', ['bbox_pred_w_flat', '_bbox_pred_w_oldshape'], shape=(model.num_classes, -1)) class_embed = 'bbox_pred_w_flat' class_embed_dim = 4096 else: raise ValueError( 'At least one of cfg.MRCNN.BBOX2MASK.INCLUDE_CLS_SCORE and ' 'cfg.MRCNN.BBOX2MASK.INCLUDE_BBOX_PRED needs to be True') # Stop the mask gradient to the detection weights if specified if cfg.MRCNN.BBOX2MASK.STOP_DET_W_GRAD: model.StopGradient(class_embed, class_embed + '_no_grad') class_embed = class_embed + '_no_grad' # Use weights transfer function to predict mask weights mask_w = bbox2mask_weight_transfer( model, class_embed, dim_in=class_embed_dim, dim_h=dim, dim_out=dim) # Mask prediction with predicted mask weights (no bias term) fcn_branch = model.net.Conv( [blob_in, mask_w], 'mask_fcn_logits', kernel=1, pad=0, stride=1) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence # If using class-agnostic mask, scale down init to avoid NaN loss init_filler = ( cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill') fcn_branch = model.Conv( blob_in, 'mask_fcn_logits', dim, dim_out, kernel=1, pad=0, stride=1, weight_init=(init_filler, {'std': 0.001}), bias_init=const_fill(0.0)) # Add a complementary MLP branch if specified if cfg.MRCNN.JOINT_FCN_MLP_HEAD: # Use class-agnostic MLP branch, and class-aware FCN branch mlp_branch = cls_agnostic_mlp_branch( model, blob_in, dim_in=dim * cfg.MRCNN.RESOLUTION**2, num_cls=dim_out) blob_out = model.net.Add([mlp_branch, fcn_branch], 'mask_logits') elif not cfg.MRCNN.USE_FC_OUTPUT: blob_out = fcn_branch if not model.train: # == if test blob_out = model.Softmax(blob_out, 'mask_fcn_probs') return blob_out
def add_rfcn_heads(blob_in,rois,spatial_scale,num_bbox_reg_classes,lvl): # # Bounding-box regression conv # Classification PS RoI pooling if lvl==0: conv_cls=self.Conv( blob_in, 'conv_cls_{}'.format(lvl), dim_in, self.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) conv_bbox_pred=self.Conv( blob_in, 'conv_bbox_pred_{}'.format(lvl), dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: conv_cls=self.ConvShared( blob_in, 'conv_cls_{}'.format(lvl), dim_in, self.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight='conv_cls_0_w', bias='conv_cls_0_b' ) conv_bbox_pred=self.ConvShared( blob_in, 'conv_bbox_pred_{}'.format(lvl), dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight='conv_bbox_pred_0_w', bias='conv_bbox_pred_0_b' ) self.net.PSRoIPool( [conv_cls, rois], ['psroipooled_cls'+str(lvl), '_mapping_channel_cls'+str(lvl)], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=self.num_classes, spatial_scale=spatial_scale ) self.AveragePool( 'psroipooled_cls'+str(lvl), 'cls_score_4d'+str(lvl), kernel=cfg.RFCN.PS_GRID_SIZE ) cls_blob_out,_=self.net.Reshape( 'cls_score_4d'+str(lvl), ['cls_score'+str(lvl), '_cls_scores_shape'+str(lvl)], shape=(-1, cfg.MODEL.NUM_CLASSES) ) if not self.train: self.Softmax('cls_score'+str(lvl), 'cls_prob'+str(lvl), engine='CUDNN') # Bbox regression PS RoI pooling self.net.PSRoIPool( [conv_bbox_pred, rois], ['psroipooled_bbox'+str(lvl), '_mapping_channel_bbox'+str(lvl)], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale ) bbox_blob_out=self.AveragePool( 'psroipooled_bbox'+str(lvl), 'bbox_pred'+str(lvl), kernel=cfg.RFCN.PS_GRID_SIZE ) return cls_blob_out,bbox_blob_out
def add_resnet_head(model, blob_in, blob_out, dim_in, prefix, n_downsampling, num_res_blocks, use_deconv): dilation = cfg.REFINENET.RESNET_FCN.DILATION dim_inner = cfg.REFINENET.RESNET_FCN.DIM_REDUCED current = blob_in # Downsampling for i in range(n_downsampling): if i > 0: dim_inner *= 2 current = model.Conv(current, prefix + '_[refined]_resnet_down' + str(i + 1), dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner # residual blocks for i in range(num_res_blocks): current = add_residual_block(model, prefix + '_[refined]_resnet_res' + str(i + 1), current, dim_in=dim_in, dim_out=dim_inner, dim_inner=dim_inner, dilation=dilation, inplace_sum=True) dim_in = dim_inner # Upsampling for i in range(n_downsampling): if i < n_downsampling - 1: dim_inner = int(dim_inner / 2) current = model.ConvTranspose(current, prefix + '_[refined]_resnet_up' + str(n_downsampling - i), dim_in=dim_in, dim_out=dim_inner, kernel=2, pad=0, out_pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) current = brew.spatial_bn(model, current, current + '_bn', dim_inner, is_test=not model.train) current = model.Relu(current, current) dim_in = dim_inner if use_deconv: current = model.Conv(current, prefix + '_[refined]_resnet_conv' + str(1), dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner model.ConvTranspose(current, blob_out, dim_in, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) else: model.Conv(current, blob_out, dim_in, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) blob_out = model.Relu(blob_out, blob_out) dim_out = dim_inner return blob_out, dim_out
def adaptive_pooling_mask_head_v1upXconvs(model, blobs_pan, dim_pan, spatial_scales_pan, num_convs): """Fuse all PAN extra lateral level using a adaptive pooling""" # Fusion method is indicated in cfg.PAN.FUSION_METHOD assert cfg.MODEL.MASK_ON, "MODEL.MASK_ON = False, can not use PAN mask head" assert cfg.PAN.MASK_ON, "PAN.MASK_ON = False, can not use PAN mask head" pan_level_info = PAN_LEVEL_INFO().val() # If BottomUp_ON, adaptive pooling on pan level # otherwise adaptive pooling on fpn level if cfg.PAN.BottomUp_ON: perfix = 'pan_' else: perfix = '' blobs_pan = [ perfix + (s) for s in pan_level_info.blobs ] # For the finest FPN level: N2 = P2 only seeds recursion blobs_pan[0] = pan_level_info.blobs[0] dim_pan = pan_level_info.dims[0] spatial_scales_pan = pan_level_info.spatial_scales fusion_method = cfg.PAN.FUSION_METHOD assert fusion_method in {'Sum', 'Max', 'Mean'}, \ 'Unknown fusion method: {}'.format(fusion_method) # In mask branch, we fix the fusion place between the first and second conv layers # adaptive_pooling_place = cfg.PAN.AdaptivePooling_Place """v1upXconvs design: X * (conv 3x3), convT 2x2.""" mask_roi_feat = model.RoIFeatureTransform( blobs_pan, blob_out='_[mask]_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scales_pan ) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED # independent fcn1 for all levels mask_fcn1_list = [] for i in range(len(mask_roi_feat)): mask_fcn1_name = '_[mask]_fcn1' + str(mask_roi_feat[i]) model.Conv( mask_roi_feat[i], mask_fcn1_name, dim_pan, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=('ConstantFill', {'value': 0.}) ) mask_fcn1_list += [mask_fcn1_name] # fuse pan_adaptive_pooling_mask_fcn1 = model.net.__getattr__(fusion_method)( mask_fcn1_list, "pan_adaptive_pooling_mask_fcn1" ) model.Relu(pan_adaptive_pooling_mask_fcn1, pan_adaptive_pooling_mask_fcn1) current = pan_adaptive_pooling_mask_fcn1 for i in range(1, num_convs): current = model.Conv( current, '_[mask]_fcn' + str(i + 1), dim_inner, dim_inner, kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=('ConstantFill', {'value': 0.}) ) current = model.Relu(current, current) # upsample layer model.ConvTranspose( current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner
def bottleneck_transformation( model, blob_in, dim_in, dim_out, stride, prefix, dim_inner, dilation=1, group=1 ): """Add a bottleneck transformation to the model.""" # In original resnet, stride=2 is on 1x1. # In fb.torch resnet, stride=2 is on 3x3. (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride) # conv 1x1 -> BN -> ReLU cur = model.ConvAffine( blob_in, prefix + '_branch2a', dim_in, dim_inner, kernel=1, stride=str1x1, pad=0, inplace=True ) cur = model.Relu(cur, cur) # conv 3x3 -> BN -> ReLU cur = model.ConvAffine( cur, prefix + '_branch2b', dim_inner, dim_inner, kernel=3, stride=str3x3, pad=1 * dilation, dilation=dilation, group=group, #moblenet group=dim_inner else group=group inplace=True ) cur = model.Relu(cur, cur) # conv 1x1 -> BN (no ReLU) # NB: for now this AffineChannel op cannot be in-place due to a bug in C2 # gradient computation for graphs like this cur = model.ConvAffine( cur, prefix + '_branch2c', dim_inner, dim_out, kernel=1, stride=1, pad=0, inplace=False ) SE_poo1 = model.AveragePool(cur,prefix+'_branch2c_se_pool',global_pooling=1) SE_conv = model.Conv(SE_poo1, prefix + '_branch2c_se_con1', dim_out, int(dim_out/16), kernel=1, stride=1, pad=0, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) SE_conv = model.Relu(SE_conv,SE_conv) SE_conv = model.Conv(SE_conv, prefix + '_branch2c_se_con2', int(dim_out/16), dim_out, kernel=1, stride=1, pad=0, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) SE_sig = model.net.Sigmoid(SE_conv, SE_conv) #SE = model.net.Scale([SE_sig]) cur = model.net.Mul([cur, SE_sig], prefix + '_branch2c_se', broadcast=1) #cur = model.net.Add([cur,SE], cur, broadcast=1, axis=1,2) return cur
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b' ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b' ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b' ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) rpn_cls_probs_fpn = model.net.Sigmoid( rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl ) model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc )