def add_wsl_context_outputs(model, blobs_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer model.FC( blobs_in[0], 'fc8c', dim, model.num_classes - 1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) model.FC( blobs_in[1], 'fc8d_frame', dim, model.num_classes - 1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) model.net.FC([blobs_in[2], 'fc8d_frame_w', 'fc8d_frame_b'], 'fc8d_context') model.net.Sub(['fc8d_frame', 'fc8d_context'], 'fc8d') model.Softmax('fc8c', 'alpha_cls', axis=1) model.Transpose('fc8d', 'fc8d_t', axes=(1, 0)) model.Softmax('fc8d_t', 'alpha_det_t', axis=1) model.Transpose('alpha_det_t', 'alpha_det', axes=(1, 0)) model.net.Mul(['alpha_cls', 'alpha_det'], 'rois_pred') if not model.train: # == if test # model.net.Alias('rois_pred', 'cls_prob') # Add BackGround predictions model.net.Split( 'rois_pred', ['rois_bg_pred', 'notuse'], split=[1, model.num_classes - 2], axis=1) model.net.Concat(['rois_bg_pred', 'rois_pred'], ['cls_prob', 'cls_prob_concat_dims'], axis=1)
def add_wsl_outputs(model, blob_in, dim, prefix=''): """Add RoI classification and bounding box regression output ops.""" if cfg.WSL.CONTEXT: return add_wsl_context_outputs(model, blob_in, dim, prefix=prefix) # Box classification layer model.FC(blob_in, prefix + 'fc8c', dim, model.num_classes - 1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) model.FC(blob_in, prefix + 'fc8d', dim, model.num_classes - 1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) model.Softmax(prefix + 'fc8c', prefix + 'alpha_cls', axis=1) model.Transpose(prefix + 'fc8d', prefix + 'fc8d_t', axes=(1, 0)) model.Softmax(prefix + 'fc8d_t', prefix + 'alpha_det_t', axis=1) model.Transpose(prefix + 'alpha_det_t', prefix + 'alpha_det', axes=(1, 0)) model.net.Mul([prefix + 'alpha_cls', prefix + 'alpha_det'], prefix + 'rois_pred') if not model.train: # == if test # Add BackGround predictions model.net.Split(prefix + 'rois_pred', [prefix + 'rois_bg_pred', prefix + 'notuse'], split=[1, model.num_classes - 2], axis=1) model.net.Concat( [prefix + 'rois_bg_pred', prefix + 'rois_pred'], [prefix + 'cls_prob', prefix + 'cls_prob_concat_dims'], axis=1)
def _add_instance_level_classifier(model, blob_in, dim_in): from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill def negateGrad(inputs, outputs): outputs[0].feed(inputs[0].data) def grad_negateGrad(inputs, outputs): scale = cfg.TRAIN.DA_INS_GRL_WEIGHT grad_output = inputs[-1] outputs[0].reshape(grad_output.shape) outputs[0].data[...] = -1.0*scale*grad_output.data model.GradientScalerLayer([blob_in], ['dc_grl'], -1.0*cfg.TRAIN.DA_INS_GRL_WEIGHT) model.FC('dc_grl', 'dc_ip1', dim_in, 1024, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('dc_ip1', 'dc_relu_1') model.Dropout('dc_relu_1', 'dc_drop_1', ratio=0.5, is_test=False) model.FC('dc_drop_1', 'dc_ip2', 1024, 1024, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('dc_ip2', 'dc_relu_2') model.Dropout('dc_relu_2', 'dc_drop_2', ratio=0.5, is_test=False) dc_ip3 = model.FC('dc_drop_2', 'dc_ip3', 1024, 1, weight_init=gauss_fill(0.05), bias_init=const_fill(0.0)) loss_gradient = None if model.train: dc_loss = model.net.SigmoidCrossEntropyLoss( [dc_ip3, 'dc_label'], 'loss_dc', scale=model.GetLossScale() ) loss_gradient = blob_utils.get_loss_gradients(model, [dc_loss]) model.AddLosses('loss_dc') return loss_gradient
def add_topdown_lateral_module(model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral): """Add a top-down lateral module.""" # Lateral 1x1 conv if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) else: lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def add_fast_rcnn_outputs(model, blob_in, dim): # Box classification layer model.FC( blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # only add softmax when testing;during training the softmax is combined model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_box_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC( blob_in, 'bbox_pred', dim, num_box_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def add_bottomup_lateral_module(model, fpn_bottom, fpn_lateral, fpn_top, fpn_dim): """Add a bottom-up lateral module.""" lat = model.Relu(fpn_lateral, fpn_lateral + '_relu') # Bottom-up 2x downsampling if cfg.FPN.USE_GN: # use GroupNorm bu = model.ConvGN( fpn_bottom, fpn_top + '_subsampled', dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(dim_top), kernel=3, pad=1, stride=2, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) else: bu = model.Conv( fpn_bottom, fpn_top + '_subsampled', dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) # Sum lateral and bottom-up model.net.Sum([lat, bu], fpn_top)
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability if not cfg.MODEL.WEIGHTED_LOSS: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') else: model.Softmax('cls_score', 'cls_prob1', engine='CUDNN') model.net.Sigmoid('cls_score', 'cls_prob2', engine='CUDNN') model.net.Mean(['cls_prob1', 'cls_prob2'], 'cls_prob') model.FC(blob_in, 'bbox_pred', dim, model.num_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) if cfg.MODEL.CASCADE_ON: # add stage parameters to list if '1' not in model.stage_params: model.stage_params['1'] = [] for idx in range(-2, 0): model.stage_params['1'].append(model.weights[idx]) model.stage_params['1'].append(model.biases[idx])
def add_fast_rcnn_outputs_test(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION roi_resize = roi_size - cfg.FAST_RCNN.NUM_STACKED_CONVS * 2 model.FC(blob_in, 'fc6', hidden_dim * roi_size * roi_size, dim) model.Relu('fc6', 'fc6') model.FC('fc6', 'fc7', dim, dim) model.Relu('fc7', 'fc7') model.FC('fc7', 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) #model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim) #model.Relu('fc6', 'fc6') model.FC(blob_in, 'bbox_pred', hidden_dim * roi_size * roi_size, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0))
def add_fast_rcnn_multilabel_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" model.FC( blob_in, 'cls_score', dim, 2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.FC( blob_in, 'action_cls_logits', dim, 15, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('action_cls_logits', 'action_prob', engine='CUDNN') model.Softmax('cls_score', 'cls_prob', engine='CUDNN') model.FC( blob_in, 'bbox_pred', dim, 2 * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer model.FC( blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = ( 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes ) model.FC( blob_in, 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def add_cascade_rcnn_outputs(model, blob_in, dim, stage): """Add RoI classification and bounding box regression output ops.""" stage_name = "_{}".format(stage) model.FC( blob_in, "cls_score" + stage_name, dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0), ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax("cls_score" + stage_name, "cls_prob" + stage_name, engine="CUDNN") num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes model.FC( blob_in, "bbox_pred" + stage_name, dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0), ) # add stage parameters to list if str(stage) not in model.stage_params: model.stage_params[str(stage)] = [] for idx in range(-2, 0): model.stage_params[str(stage)].append(model.weights[idx]) model.stage_params[str(stage)].append(model.biases[idx]) return "cls_prob" + stage_name, "bbox_pred" + stage_name
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC(blob_in, 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) if cfg.MODEL.CASCADE_ON: # add stage parameters to list if '1' not in model.stage_params: model.stage_params['1'] = [] for idx in range(-2, 0): model.stage_params['1'].append(model.weights[idx]) model.stage_params['1'].append(model.biases[idx])
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC(blob_in, 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) if cfg.PRED_STD: model.FC(blob_in, 'bbox_pred_std', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.0001), bias_init=const_fill(1.0)) model.net.Abs('bbox_pred_std', 'bbox_pred_std_abs')
def add_fast_rcnn_outputs(model, blob_in, dim): model.FC('fc7_newC', 'cls_score_toothbrush', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC('fc7_newC', 'bbox_pred_toothbrush', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) model.FC('fc7_oldC', 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC('fc7_oldC', 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0))
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale): """Add RPN outputs to a single scale model (i.e., no FPN).""" anchors = generate_anchors(stride=1. / spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS) num_anchors = anchors.shape[0] dim_out = dim_in # RPN hidden representation model.Conv(blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('conv_rpn', 'conv_rpn') # Proposal classification scores model.Conv('conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Proposal bbox regression deltas model.Conv('conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs') model.GenerateProposals(['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'], ['rpn_rois', 'rpn_roi_probs'], anchors=anchors, spatial_scale=spatial_scale) if cfg.MODEL.FASTER_RCNN: if model.train: # Add op that generates training labels for in-network RPN proposals model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info']) else: # Alias rois to rpn_rois for inference model.net.Alias('rpn_rois', 'rois') # Alias da_rois to rpn_rois for inference model.net.Alias('rpn_rois', 'da_rois')
def add_keypoint_outputs(model, blob_in, dim): """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.""" # NxKxHxw upsampling_heatmap = (cfg.KRCNN.UP_SCALE > 1) if cfg.KRCNN.USE_DECONV: # blob_in = model.ConvTranspose(blob_in, 'kps_deconv', dim, cfg.KRCNN.DECONV_DIM, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('kps_deconv', 'kps_deconv') dim = cfg.KRCNN.DECONV_DIM if upsampling_heatmap: blob_name = 'kps_score_lowres' else: blob_name = 'kps_score' if cfg.KRCNN.USE_DECONV_OUTPUT: # Use ConvTranspose to predict heatmaps; results in 2x upsampling blob_out = model.ConvTranspose(blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.KRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) else: # blob_out = model.Conv(blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=1, pad=0, stride=1, weight_init=(cfg.KRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) if upsampling_heatmap: # Increase heatmap output size via bilinear upsampling blob_out = model.BilinearInterpolation(blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE) return blob_out
def add_wsl_outputs(model, blob_in, dim, prefix=''): """Add RoI classification and bounding box regression output ops.""" if cfg.WSL.CONTEXT: fc8c, fc8d = add_wsl_context_outputs(model, blob_in, dim, prefix=prefix) else: # Box classification layer fc8c = model.FC( blob_in, prefix + 'fc8c', dim, model.num_classes - 1, weight_init=('XavierFill', {}), # weight_init=gauss_fill(0.01), bias_init=const_fill(0.0), ) fc8d = model.FC( blob_in, prefix + 'fc8d', dim, model.num_classes - 1, weight_init=('XavierFill', {}), # weight_init=gauss_fill(0.01), bias_init=const_fill(0.0), ) if cfg.WSL.CMIL and model.train: fc8c, fc8d = add_wsl_cmil(model, [fc8c, fc8d], dim, prefix=prefix) model.Softmax(fc8c, prefix + 'alpha_cls', axis=1) model.Transpose(fc8d, prefix + 'fc8d_t', axes=(1, 0)) model.Softmax(prefix + 'fc8d_t', prefix + 'alpha_det_t', axis=1) model.Transpose(prefix + 'alpha_det_t', prefix + 'alpha_det', axes=(1, 0)) model.net.Mul([prefix + 'alpha_cls', prefix + 'alpha_det'], prefix + 'rois_pred') if not model.train: # == if test # Add BackGround predictions model.net.Split(prefix + 'rois_pred', [prefix + 'rois_bg_pred', prefix + 'notuse'], split=[1, model.num_classes - 2], axis=1) model.net.Concat( [prefix + 'rois_bg_pred', prefix + 'rois_pred'], [prefix + 'cls_prob', prefix + 'cls_prob_concat_dims'], axis=1) if cfg.WSL.CONTEXT: blob_in = blob_in[0] dim = dim if cfg.WSL.CMIL: add_wsl_cmil_outputs(model, blob_in, dim, prefix=prefix) elif cfg.WSL.OICR: add_wsl_oicr_outputs(model, blob_in, dim, prefix=prefix) elif cfg.WSL.PCL: add_wsl_pcl_outputs(model, blob_in, dim, prefix=prefix)
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): if dim_reduce is not None: # Optional dim reduction blob_in = model.Conv(blob_in, 'conv_dim_reduce', dim_in, dim_reduce, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) blob_in = model.Relu(blob_in, blob_in) dim_in = dim_reduce # Classification conv model.Conv(blob_in, 'conv_cls', dim_in, model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Bounding-box regression conv num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.Conv(blob_in, 'conv_bbox_pred', dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Classification PS RoI pooling model.net.PSRoIPool(['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=model.num_classes, spatial_scale=spatial_scale) model.AveragePool('psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE) model.net.Reshape('cls_score_4d', ['cls_score', '_cls_scores_shape'], shape=(-1, cfg.MODEL.NUM_CLASSES)) if not model.train: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Bbox regression PS RoI pooling model.net.PSRoIPool(['conv_bbox_pred', 'rois'], ['psroipooled_bbox', '_mapping_channel_bbox'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale) model.AveragePool('psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE)
def add_topdown_lateral_module(model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral, dim_bottom): """Add a top-down lateral module.""" # model.net.Copy(fpn_lateral, fpn_bottom) # return assert dim_top == dim_bottom or dim_lateral == dim_bottom # Lateral 1x1 conv if dim_lateral == dim_bottom: lat = fpn_lateral else: lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_bottom, kernel=1, pad=0, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) lat = model.SpatialBN(lat, lat + '_bn', dim_bottom, is_test=not model.train) lat = model.Relu(lat, lat) # Top-down if dim_top == dim_bottom: td = fpn_top else: td = model.Conv( fpn_top, fpn_bottom + '_lateral', dim_in=dim_top, dim_out=dim_bottom, kernel=1, pad=0, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) td = model.SpatialBN(td, fpn_bottom + '_lateral_bn', dim_bottom, is_test=not model.train) td = model.Relu(td, td) # Top-down 2x upsampling if 'res5_2_sum' not in str(fpn_top) or cfg.RESNETS.RES5_STRIDE == 2: td = model.net.UpsampleNearest(td, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def add_seg_outputs(model, blob_in, dim): if 'deeplab' in cfg.MRCNN.ROI_MASK_HEAD: return add_deeplab_outputs(model, blob_in, dim) num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 if cfg.MRCNN.USE_FC_OUTPUT: # Predict masks with a fully connected layer (ignore 'fcn' in the blob # name) blob_out = model.FC(blob_in, 'mask_fcn_logits', dim, num_cls * cfg.MRCNN.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = (cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill') blob_out = model.Conv(blob_in, 'mask_fcn_logits', dim, num_cls - 1, kernel=1, pad=0, stride=1, weight_init=(fill, { 'std': 0.001 }), bias_init=const_fill(0.0)) if cfg.MRCNN.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation('mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls, cfg.MRCNN.UPSAMPLE_RATIO) if not model.train: # == if test # blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') # Add BackGround predictions model.net.Split(blob_out, ['mask_fcn_logits_bg', 'mask_notuse'], split=[1, model.num_classes - 2], axis=1) model.net.Concat(['mask_fcn_logits_bg', blob_out], ['mask_fcn_logits_', 'mask_fcn_logits_concat_dims'], axis=1) blob_out = model.net.Sigmoid('mask_fcn_logits_', 'mask_fcn_probs') return blob_out
def _add_instance_level_classifier(model, blob_in, dim_in, spatial_scale): from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill # def negateGrad(inputs, outputs): # outputs[0].feed(inputs[0].data) # def grad_negateGrad(inputs, outputs): # scale = cfg.TRAIN.DA_INS_GRL_WEIGHT # grad_output = inputs[-1] # outputs[0].reshape(grad_output.shape) # outputs[0].data[...] = -1.0*scale*grad_output.data model.RoIFeatureTransform( blob_in, 'da_pool5', blob_rois='da_rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=7, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) model.FCShared('da_pool5', 'da_fc6', dim_in * 7 * 7, 4096, weight='fc6_w', bias='fc6_b') model.Relu('da_fc6', 'da_fc6') model.FCShared('da_fc6', 'da_fc7', 4096, 4096, weight='fc7_w', bias='fc7_b') da_blobs = model.Relu('da_fc7', 'da_fc7') model.GradientScalerLayer([da_blobs], ['dc_grl'], -1.0*cfg.TRAIN.DA_INS_GRL_WEIGHT) model.FC('dc_grl', 'dc_ip1', 4096, 1024, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('dc_ip1', 'dc_relu_1') model.Dropout('dc_relu_1', 'dc_drop_1', ratio=0.5, is_test=False) model.FC('dc_drop_1', 'dc_ip2', 1024, 1024, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('dc_ip2', 'dc_relu_2') model.Dropout('dc_relu_2', 'dc_drop_2', ratio=0.5, is_test=False) dc_ip3 = model.FC('dc_drop_2', 'dc_ip3', 1024, 1, weight_init=gauss_fill(0.05), bias_init=const_fill(0.0)) if cfg.TRAIN.PADA: dc_ip3 = model.PADAbyGradientWeightingLayerD(dc_ip3,'pada_dc_ip3','pada_roi_weights') loss_gradient = None if model.train: dc_loss = model.net.SigmoidCrossEntropyLoss( [dc_ip3, 'dc_label'], 'loss_dc', scale=model.GetLossScale() ) loss_gradient = blob_utils.get_loss_gradients(model, [dc_loss]) model.AddLosses('loss_dc') return loss_gradient, da_blobs, 4096
def add_mask_rcnn_outputs(model, blob_in, dim): if cfg.MRCNN.DP_CASCADE_MASK_ON: return add_dp_cascaded_mask_outputs(model, blob_in, dim) if cfg.MRCNN.BBOX_CASCADE_MASK_ON: if cfg.MRCNN.USE_CLS_EMBS: return add_mask_emb_outputs(model, blob_in, dim) return add_cascaded_mask_outputs(model, blob_in, dim) """Add Mask R-CNN specific outputs: either mask logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 if cfg.MRCNN.USE_CLS_EMBS: return add_mask_emb_outputs(model, blob_in, dim) if cfg.MRCNN.USE_FC_OUTPUT: # Predict masks with a fully connected layer (ignore 'fcn' in the blob # name) blob_out = model.FC(blob_in, 'mask_fcn_logits', dim, num_cls * cfg.MRCNN.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = (cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill') blob_out = model.Conv(blob_in, 'mask_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, { 'std': 0.001 }), bias_init=const_fill(0.0)) if cfg.MRCNN.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation('mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls, cfg.MRCNN.UPSAMPLE_RATIO) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') return blob_out
def add_roi_Xconv2fc_head(model, blob_in, dim_in, spatial_scale): """Add a X conv + 2fc head""" hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.Conv(current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3, stride=1, pad=1, weight_init=('GaussianFill', { 'std': 0.01 }), bias_init=('ConstantFill', { 'value': 0. }), no_bias=0) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('fc6', 'fc6') model.FC('fc6', 'fc7', fc_dim, fc_dim, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('fc7', 'fc7') return 'fc7', fc_dim
def add_apm_outputs2(model, blob_in, dim): bpm_heads.add_bpm_outputs(model, blob_in, dim) prefix_list = ['feature_' + str(i) for i in range(cfg.REID.BPM_STRIP_NUM)] dim_inner = cfg.REID.BPM_DIM im_per_batch = cfg.TRAIN.IMS_PER_BATCH if model.train else 1 fc_list = [prefix + '_fc' for prefix in prefix_list] model.net.Concat(fc_list, ['fc8c', 'fc8c_split_info'], add_axis=1, axis=1) feature_list = [prefix + '_conv' for prefix in prefix_list] model.net.Concat( feature_list, ['fc7', 'fc7_split_info'], add_axis=1, axis=1) model.net.Reshape( 'fc7', ['fc7_', 'fc7_shape'], shape=[im_per_batch * cfg.REID.BPM_STRIP_NUM, dim_inner]) model.FC( 'fc7_', 'fc8d_', dim_inner, model.num_classes - 1, weight_init=('XavierFill', {}), bias_init=const_fill(0.0)) model.net.Reshape( 'fc8d_', ['fc8d', 'fc8d__shape'], shape=[im_per_batch, cfg.REID.BPM_STRIP_NUM, model.num_classes - 1]) model.Softmax('fc8c', 'alpha_cls', axis=2) model.Transpose('fc8d', 'fc8d_t', axes=(0, 2, 1)) model.Softmax('fc8d_t', 'alpha_det_t', axis=2) model.Transpose('alpha_det_t', 'alpha_det', axes=(0, 2, 1)) model.net.Mul(['alpha_cls', 'alpha_det'], 'rois_pred')
def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale): """Use a ResNet "conv5" / "stage5" head for mask prediction. Weights and computation are shared with the conv5 box head. Computation can only be shared during training, since inference is cascaded. v0upshare design: conv5, convT 2x2. """ # Since box and mask head are shared, these must match assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION if model.train: # share computation with bbox head at training time dim_conv5 = 2048 blob_conv5 = model.net.SampleAs(['res5_2_sum', 'roi_has_mask_int32'], ['_[mask]_res5_2_sum_sliced']) else: # re-compute at test time blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks( model, blob_in, dim_in, spatial_scale) dim_reduced = cfg.MRCNN.DIM_REDUCED blob_mask = model.ConvTranspose( blob_conv5, 'conv5_mask', dim_conv5, dim_reduced, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), # std only for gauss bias_init=const_fill(0.0)) model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_reduced
def mask_rcnn_fcn_head_v0up(model, blob_in, dim_in, spatial_scale): """v0up design: conv5, deconv 2x2 (no weight sharing with the box head).""" blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks( model, blob_in, dim_in, spatial_scale ) dim_reduced = cfg.MRCNN.DIM_REDUCED model.ConvTranspose( blob_conv5, 'conv5_mask', dim_conv5, dim_reduced, kernel=2, pad=0, stride=2, weight_init=('GaussianFill', {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_reduced
def add_body_uv_outputs(model, blob_in, dim): """Add DensePose body UV specific outputs: heatmaps of dense mask, patch index and patch-specific UV coordinates. All dense masks are mapped to labels in [0, ... S] for S semantically meaningful body parts. """ # Apply ConvTranspose to the feature representation; results in 2x upsampling for name in ['AnnIndex', 'Index_UV', 'U', 'V']: if name == 'AnnIndex': dim_out = cfg.BODY_UV_RCNN.NUM_SEMANTIC_PARTS + 1 else: dim_out = cfg.BODY_UV_RCNN.NUM_PATCHES + 1 model.ConvTranspose(blob_in, name + '_lowres', dim, dim_out, cfg.BODY_UV_RCNN.DECONV_KERNEL, pad=int(cfg.BODY_UV_RCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.BODY_UV_RCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) # Increase heatmap output size via bilinear upsampling blob_outputs = [] for name in ['AnnIndex', 'Index_UV', 'U', 'V']: blob_outputs.append( model.BilinearInterpolation( name + '_lowres', name + '_estimated' if name in ['U', 'V'] else name, cfg.BODY_UV_RCNN.NUM_PATCHES + 1, cfg.BODY_UV_RCNN.NUM_PATCHES + 1, cfg.BODY_UV_RCNN.UP_SCALE)) return blob_outputs
def add_track_head(model, blob_in, dim_in, spatial_scale): """Add a Mask R-CNN track head.""" head_dim = cfg.TRCNN.MLP_HEAD_DIM roi_size = cfg.TRCNN.ROI_XFORM_RESOLUTION roi_feat = model.RoIFeatureTransform( blob_in, 'track_roi_feat', blob_rois='track_rois', method=cfg.TRCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.TRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) # Bottleneck operation if cfg.TRCNN.MLP_HEAD_ON: model.FC( roi_feat, "track_fc", dim_in * roi_size * roi_size, head_dim, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0), ) track_fc = model.Relu("track_fc", "track_fc") return track_fc, head_dim # No bottleneck operation -> flattern feature vector else: model.Flatten(roi_feat, "track_fc") track_fc = model.Relu("track_fc", "track_fc") return roi_feat, dim_in * roi_size * roi_size
def add_roi_body_uv_head_v1convX(model, blob_in, dim_in, spatial_scale): """Add a DensePose body UV head. v1convX design: X * (conv).""" hidden_dim = cfg.BODY_UV_RCNN.CONV_HEAD_DIM kernel_size = cfg.BODY_UV_RCNN.CONV_HEAD_KERNEL pad_size = kernel_size // 2 current = model.RoIFeatureTransform( blob_in, '_[body_uv]_roi_feat', blob_rois='body_uv_rois', method=cfg.BODY_UV_RCNN.ROI_XFORM_METHOD, resolution=cfg.BODY_UV_RCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.BODY_UV_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) for i in range(cfg.BODY_UV_RCNN.NUM_STACKED_CONVS): current = model.Conv(current, 'body_conv_fcn' + str(i + 1), dim_in, hidden_dim, kernel_size, stride=1, pad=pad_size, weight_init=(cfg.BODY_UV_RCNN.CONV_INIT, { 'std': 0.01 }), bias_init=const_fill(0.0)) current = model.Relu(current, current) dim_in = hidden_dim return current, hidden_dim
def add_mask_rcnn_outputs(model, blob_in, dim): """Add Mask R-CNN specific outputs: either mask logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 if cfg.MRCNN.USE_FC_OUTPUT: # Predict masks with a fully connected layer (ignore 'fcn' in the blob # name) dim_fc = int(dim * (cfg.MRCNN.RESOLUTION / cfg.MRCNN.UPSAMPLE_RATIO)**2) blob_out = model.FC( blob_in, 'mask_fcn_logits', dim_fc, num_cls * cfg.MRCNN.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = ( cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill' ) blob_out = model.Conv( blob_in, 'mask_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, {'std': 0.001}), bias_init=const_fill(0.0) ) if cfg.MRCNN.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation( 'mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls, cfg.MRCNN.UPSAMPLE_RATIO ) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') return blob_out
def add_attention_backbone_fpn_topdown_module(model, lateral_input_blobs, output_blobs, fpn_dim_lateral, fpn_dim, num_backbone_stages): for index in range(num_backbone_stages - 1): mul_blobs = [] fpn_top_up = model.net.UpsampleNearest(output_blobs[index], output_blobs[index] + '_topdown', scale=2) feature_lat = model.Conv( lateral_input_blobs[index + 1], lateral_input_blobs[index + 1] + '_lat', dim_in=fpn_dim_lateral[index + 1], dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) mul_blobs.append(feature_lat) if index == 0: model.net.Sum([feature_lat, fpn_top_up], output_blobs[index + 1]) else: for i in range(index): re_scale = 2**(1 + index - i) feature_c = model.Conv( output_blobs[i], output_blobs[i] + '_To_' + output_blobs[index + 1] + '_c', dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0)) feature_up = model.net.UpsampleNearest(feature_c, feature_c + '_up', scale=re_scale) mul_blobs.append(feature_up) feature_mul = model.net.MulAll(mul_blobs, output_blobs[index + 1] + "_mulall") model.net.Sum([feature_mul, fpn_top_up], output_blobs[index + 1])
def _add_image_level_classifier(model, blob_in, dim_in, spatial_scale_in): from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill def negateGrad(inputs, outputs): outputs[0].feed(inputs[0].data) def grad_negateGrad(inputs, outputs): scale = cfg.TRAIN.DA_IMG_GRL_WEIGHT grad_output = inputs[-1] outputs[0].reshape(grad_output.shape) outputs[0].data[...] = -1.0 * scale * grad_output.data model.GradientScalerLayer([blob_in], ['da_grl'], -1.0 * cfg.TRAIN.DA_IMG_GRL_WEIGHT) model.Conv('da_grl', 'da_conv_1', dim_in, 512, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) model.Relu('da_conv_1', 'da_conv_1') model.Conv('da_conv_1', 'da_conv_2', 512, 1, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) if model.train: model.net.SpatialNarrowAs(['da_label_wide', 'da_conv_2'], 'da_label') loss_da = model.net.SigmoidCrossEntropyLoss(['da_conv_2', 'da_label'], 'loss_da', scale=model.GetLossScale()) loss_gradient = blob_utils.get_loss_gradients(model, [loss_da]) model.AddLosses('loss_da') return loss_gradient else: return None
def add_topdown_lateral_module( model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral ): """Add a top-down lateral module.""" # Lateral 1x1 conv if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0) ) else: lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def mask_rcnn_fcn_head_v1upXconvs_gn( model, blob_in, dim_in, spatial_scale, num_convs ): """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm""" current = model.RoIFeatureTransform( blob_in, blob_out='_mask_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED for i in range(num_convs): current = model.ConvGN( current, '_mask_fcn' + str(i + 1), dim_in, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=('ConstantFill', {'value': 0.}) ) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose( current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner
def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale): """Use a ResNet "conv5" / "stage5" head for mask prediction. Weights and computation are shared with the conv5 box head. Computation can only be shared during training, since inference is cascaded. v0upshare design: conv5, convT 2x2. """ # Since box and mask head are shared, these must match assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION if model.train: # share computation with bbox head at training time dim_conv5 = 2048 blob_conv5 = model.net.SampleAs( ['res5_2_sum', 'roi_has_mask_int32'], ['_[mask]_res5_2_sum_sliced'] ) else: # re-compute at test time blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks( model, blob_in, dim_in, spatial_scale ) dim_reduced = cfg.MRCNN.DIM_REDUCED blob_mask = model.ConvTranspose( blob_conv5, 'conv5_mask', dim_conv5, dim_reduced, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), # std only for gauss bias_init=const_fill(0.0) ) model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_reduced
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): if dim_reduce is not None: # Optional dim reduction blob_in = model.Conv( blob_in, 'conv_dim_reduce', dim_in, dim_reduce, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) blob_in = model.Relu(blob_in, blob_in) dim_in = dim_reduce # Classification conv model.Conv( blob_in, 'conv_cls', dim_in, model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # # Bounding-box regression conv num_bbox_reg_classes = ( 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes ) model.Conv( blob_in, 'conv_bbox_pred', dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Classification PS RoI pooling model.net.PSRoIPool( ['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=model.num_classes, spatial_scale=spatial_scale ) model.AveragePool( 'psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE ) model.net.Reshape( 'cls_score_4d', ['cls_score', '_cls_scores_shape'], shape=(-1, cfg.MODEL.NUM_CLASSES) ) if not model.train: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Bbox regression PS RoI pooling model.net.PSRoIPool( ['conv_bbox_pred', 'rois'], ['psroipooled_bbox', '_mapping_channel_bbox'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale ) model.AveragePool( 'psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE )
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b' ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b' ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b' ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) rpn_cls_probs_fpn = model.net.Sigmoid( rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl ) model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc )
def add_keypoint_outputs(model, blob_in, dim): """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.""" # NxKxHxW upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1) if cfg.KRCNN.USE_DECONV: # Apply ConvTranspose to the feature representation; results in 2x # upsampling blob_in = model.ConvTranspose( blob_in, 'kps_deconv', dim, cfg.KRCNN.DECONV_DIM, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu('kps_deconv', 'kps_deconv') dim = cfg.KRCNN.DECONV_DIM if upsample_heatmap: blob_name = 'kps_score_lowres' else: blob_name = 'kps_score' if cfg.KRCNN.USE_DECONV_OUTPUT: # Use ConvTranspose to predict heatmaps; results in 2x upsampling blob_out = model.ConvTranspose( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) else: # Use Conv to predict heatmaps; does no upsampling blob_out = model.Conv( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=1, pad=0, stride=1, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) if upsample_heatmap: # Increase heatmap output size via bilinear upsampling blob_out = model.BilinearInterpolation( blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE ) return blob_out
def add_fpn(model, fpn_level_info): """Add FPN connections based on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. num_backbone_stages = ( len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) ) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill', {}) # For the coarsest backbone level: 1x1 conv only seeds recursion if cfg.FPN.USE_GN: # use GroupNorm c = model.ConvGN( lateral_input_blobs[0], output_blobs[0], # note: this is a prefix dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) output_blobs[0] = c # rename it else: model.Conv( lateral_input_blobs[0], output_blobs[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) # # Step 1: recursively build down starting from the coarsest backbone level # # For other levels add top-down and lateral connections for i in range(num_backbone_stages - 1): add_topdown_lateral_module( model, output_blobs[i], # top-down blob lateral_input_blobs[i + 1], # lateral blob output_blobs[i + 1], # next output blob fpn_dim, # output dimension fpn_dim_lateral[i + 1] # lateral input dimension ) # Post-hoc scale-specific 3x3 convs blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): if cfg.FPN.USE_GN: # use GroupNorm fpn_blob = model.ConvGN( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) else: fpn_blob = model.Conv( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) blobs_fpn += [fpn_blob] spatial_scales += [fpn_level_info.spatial_scales[i]] # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 subsampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKBONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv( fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales