def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC(blob_in, 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) if cfg.MODEL.CASCADE_ON: # add stage parameters to list if '1' not in model.stage_params: model.stage_params['1'] = [] for idx in range(-2, 0): model.stage_params['1'].append(model.weights[idx]) model.stage_params['1'].append(model.biases[idx])
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability if not cfg.MODEL.WEIGHTED_LOSS: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') else: model.Softmax('cls_score', 'cls_prob1', engine='CUDNN') model.net.Sigmoid('cls_score', 'cls_prob2', engine='CUDNN') model.net.Mean(['cls_prob1', 'cls_prob2'], 'cls_prob') model.FC(blob_in, 'bbox_pred', dim, model.num_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) if cfg.MODEL.CASCADE_ON: # add stage parameters to list if '1' not in model.stage_params: model.stage_params['1'] = [] for idx in range(-2, 0): model.stage_params['1'].append(model.weights[idx]) model.stage_params['1'].append(model.biases[idx])
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer model.FC( blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = ( 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes ) model.FC( blob_in, 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def add_fast_rcnn_outputs_test(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION roi_resize = roi_size - cfg.FAST_RCNN.NUM_STACKED_CONVS * 2 model.FC(blob_in, 'fc6', hidden_dim * roi_size * roi_size, dim) model.Relu('fc6', 'fc6') model.FC('fc6', 'fc7', dim, dim) model.Relu('fc7', 'fc7') model.FC('fc7', 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) #model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim) #model.Relu('fc6', 'fc6') model.FC(blob_in, 'bbox_pred', hidden_dim * roi_size * roi_size, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0))
def add_cascade_rcnn_outputs(model, blob_in, dim, stage): """Add RoI classification and bounding box regression output ops.""" stage_name = "_{}".format(stage) model.FC( blob_in, "cls_score" + stage_name, dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0), ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax("cls_score" + stage_name, "cls_prob" + stage_name, engine="CUDNN") num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes model.FC( blob_in, "bbox_pred" + stage_name, dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0), ) # add stage parameters to list if str(stage) not in model.stage_params: model.stage_params[str(stage)] = [] for idx in range(-2, 0): model.stage_params[str(stage)].append(model.weights[idx]) model.stage_params[str(stage)].append(model.biases[idx]) return "cls_prob" + stage_name, "bbox_pred" + stage_name
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC(blob_in, 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) if cfg.PRED_STD: model.FC(blob_in, 'bbox_pred_std', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.0001), bias_init=const_fill(1.0)) model.net.Abs('bbox_pred_std', 'bbox_pred_std_abs')
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale): """Add RPN outputs to a single scale model (i.e., no FPN).""" anchors = generate_anchors(stride=1. / spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS) num_anchors = anchors.shape[0] dim_out = dim_in # RPN hidden representation model.Conv(blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('conv_rpn', 'conv_rpn') # Proposal classification scores model.Conv('conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Proposal bbox regression deltas model.Conv('conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs') model.GenerateProposals(['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'], ['rpn_rois', 'rpn_roi_probs'], anchors=anchors, spatial_scale=spatial_scale) if cfg.MODEL.FASTER_RCNN: if model.train: # Add op that generates training labels for in-network RPN proposals model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info']) else: # Alias rois to rpn_rois for inference model.net.Alias('rpn_rois', 'rois') # Alias da_rois to rpn_rois for inference model.net.Alias('rpn_rois', 'da_rois')
def add_fast_rcnn_multilabel_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" model.FC( blob_in, 'cls_score', dim, 2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.FC( blob_in, 'action_cls_logits', dim, 15, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('action_cls_logits', 'action_prob', engine='CUDNN') model.Softmax('cls_score', 'cls_prob', engine='CUDNN') model.FC( blob_in, 'bbox_pred', dim, 2 * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def _add_instance_level_classifier(model, blob_in, dim_in): from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill def negateGrad(inputs, outputs): outputs[0].feed(inputs[0].data) def grad_negateGrad(inputs, outputs): scale = cfg.TRAIN.DA_INS_GRL_WEIGHT grad_output = inputs[-1] outputs[0].reshape(grad_output.shape) outputs[0].data[...] = -1.0*scale*grad_output.data model.GradientScalerLayer([blob_in], ['dc_grl'], -1.0*cfg.TRAIN.DA_INS_GRL_WEIGHT) model.FC('dc_grl', 'dc_ip1', dim_in, 1024, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('dc_ip1', 'dc_relu_1') model.Dropout('dc_relu_1', 'dc_drop_1', ratio=0.5, is_test=False) model.FC('dc_drop_1', 'dc_ip2', 1024, 1024, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('dc_ip2', 'dc_relu_2') model.Dropout('dc_relu_2', 'dc_drop_2', ratio=0.5, is_test=False) dc_ip3 = model.FC('dc_drop_2', 'dc_ip3', 1024, 1, weight_init=gauss_fill(0.05), bias_init=const_fill(0.0)) loss_gradient = None if model.train: dc_loss = model.net.SigmoidCrossEntropyLoss( [dc_ip3, 'dc_label'], 'loss_dc', scale=model.GetLossScale() ) loss_gradient = blob_utils.get_loss_gradients(model, [dc_loss]) model.AddLosses('loss_dc') return loss_gradient
def add_fast_rcnn_outputs(model, blob_in, dim): model.FC('fc7_newC', 'cls_score_toothbrush', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC('fc7_newC', 'bbox_pred_toothbrush', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) model.FC('fc7_oldC', 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC('fc7_oldC', 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0))
def add_fast_rcnn_outputs(model, blob_in, dim): # Box classification layer model.FC( blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # only add softmax when testing;during training the softmax is combined model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_box_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC( blob_in, 'bbox_pred', dim, num_box_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): if dim_reduce is not None: # Optional dim reduction blob_in = model.Conv(blob_in, 'conv_dim_reduce', dim_in, dim_reduce, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) blob_in = model.Relu(blob_in, blob_in) dim_in = dim_reduce # Classification conv model.Conv(blob_in, 'conv_cls', dim_in, model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Bounding-box regression conv num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.Conv(blob_in, 'conv_bbox_pred', dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Classification PS RoI pooling model.net.PSRoIPool(['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=model.num_classes, spatial_scale=spatial_scale) model.AveragePool('psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE) model.net.Reshape('cls_score_4d', ['cls_score', '_cls_scores_shape'], shape=(-1, cfg.MODEL.NUM_CLASSES)) if not model.train: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Bbox regression PS RoI pooling model.net.PSRoIPool(['conv_bbox_pred', 'rois'], ['psroipooled_bbox', '_mapping_channel_bbox'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale) model.AveragePool('psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE)
def _add_instance_level_classifier(model, blob_in, dim_in, spatial_scale): from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill # def negateGrad(inputs, outputs): # outputs[0].feed(inputs[0].data) # def grad_negateGrad(inputs, outputs): # scale = cfg.TRAIN.DA_INS_GRL_WEIGHT # grad_output = inputs[-1] # outputs[0].reshape(grad_output.shape) # outputs[0].data[...] = -1.0*scale*grad_output.data model.RoIFeatureTransform( blob_in, 'da_pool5', blob_rois='da_rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=7, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) model.FCShared('da_pool5', 'da_fc6', dim_in * 7 * 7, 4096, weight='fc6_w', bias='fc6_b') model.Relu('da_fc6', 'da_fc6') model.FCShared('da_fc6', 'da_fc7', 4096, 4096, weight='fc7_w', bias='fc7_b') da_blobs = model.Relu('da_fc7', 'da_fc7') model.GradientScalerLayer([da_blobs], ['dc_grl'], -1.0*cfg.TRAIN.DA_INS_GRL_WEIGHT) model.FC('dc_grl', 'dc_ip1', 4096, 1024, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('dc_ip1', 'dc_relu_1') model.Dropout('dc_relu_1', 'dc_drop_1', ratio=0.5, is_test=False) model.FC('dc_drop_1', 'dc_ip2', 1024, 1024, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('dc_ip2', 'dc_relu_2') model.Dropout('dc_relu_2', 'dc_drop_2', ratio=0.5, is_test=False) dc_ip3 = model.FC('dc_drop_2', 'dc_ip3', 1024, 1, weight_init=gauss_fill(0.05), bias_init=const_fill(0.0)) if cfg.TRAIN.PADA: dc_ip3 = model.PADAbyGradientWeightingLayerD(dc_ip3,'pada_dc_ip3','pada_roi_weights') loss_gradient = None if model.train: dc_loss = model.net.SigmoidCrossEntropyLoss( [dc_ip3, 'dc_label'], 'loss_dc', scale=model.GetLossScale() ) loss_gradient = blob_utils.get_loss_gradients(model, [dc_loss]) model.AddLosses('loss_dc') return loss_gradient, da_blobs, 4096
def add_roi_Xconv2fc_head(model, blob_in, dim_in, spatial_scale): """Add a X conv + 2fc head""" hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) current = roi_feat for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS): current = model.Conv(current, 'head_conv' + str(i + 1), dim_in, hidden_dim, 3, stride=1, pad=1, weight_init=('GaussianFill', { 'std': 0.01 }), bias_init=('ConstantFill', { 'value': 0. }), no_bias=0) current = model.Relu(current, current) dim_in = hidden_dim fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM model.FC(current, 'fc6', dim_in * roi_size * roi_size, fc_dim, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('fc6', 'fc6') model.FC('fc6', 'fc7', fc_dim, fc_dim, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('fc7', 'fc7') return 'fc7', fc_dim
def add_track_head(model, blob_in, dim_in, spatial_scale): """Add a Mask R-CNN track head.""" head_dim = cfg.TRCNN.MLP_HEAD_DIM roi_size = cfg.TRCNN.ROI_XFORM_RESOLUTION roi_feat = model.RoIFeatureTransform( blob_in, 'track_roi_feat', blob_rois='track_rois', method=cfg.TRCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.TRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) # Bottleneck operation if cfg.TRCNN.MLP_HEAD_ON: model.FC( roi_feat, "track_fc", dim_in * roi_size * roi_size, head_dim, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0), ) track_fc = model.Relu("track_fc", "track_fc") return track_fc, head_dim # No bottleneck operation -> flattern feature vector else: model.Flatten(roi_feat, "track_fc") track_fc = model.Relu("track_fc", "track_fc") return roi_feat, dim_in * roi_size * roi_size
def add_keypoint_outputs(model, blob_in, dim): """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.""" # NxKxHxw upsampling_heatmap = (cfg.KRCNN.UP_SCALE > 1) if cfg.KRCNN.USE_DECONV: # blob_in = model.ConvTranspose(blob_in, 'kps_deconv', dim, cfg.KRCNN.DECONV_DIM, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('kps_deconv', 'kps_deconv') dim = cfg.KRCNN.DECONV_DIM if upsampling_heatmap: blob_name = 'kps_score_lowres' else: blob_name = 'kps_score' if cfg.KRCNN.USE_DECONV_OUTPUT: # Use ConvTranspose to predict heatmaps; results in 2x upsampling blob_out = model.ConvTranspose(blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.KRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) else: # blob_out = model.Conv(blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=1, pad=0, stride=1, weight_init=(cfg.KRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) if upsampling_heatmap: # Increase heatmap output size via bilinear upsampling blob_out = model.BilinearInterpolation(blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE) return blob_out
def _add_image_level_classifier(model, blob_in, dim_in, spatial_scale_in): from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill def negateGrad(inputs, outputs): outputs[0].feed(inputs[0].data) def grad_negateGrad(inputs, outputs): scale = cfg.TRAIN.DA_IMG_GRL_WEIGHT grad_output = inputs[-1] outputs[0].reshape(grad_output.shape) outputs[0].data[...] = -1.0 * scale * grad_output.data model.GradientScalerLayer([blob_in], ['da_grl'], -1.0 * cfg.TRAIN.DA_IMG_GRL_WEIGHT) model.Conv('da_grl', 'da_conv_1', dim_in, 512, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) model.Relu('da_conv_1', 'da_conv_1') model.Conv('da_conv_1', 'da_conv_2', 512, 1, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) if model.train: model.net.SpatialNarrowAs(['da_label_wide', 'da_conv_2'], 'da_label') loss_da = model.net.SigmoidCrossEntropyLoss(['da_conv_2', 'da_label'], 'loss_da', scale=model.GetLossScale()) loss_gradient = blob_utils.get_loss_gradients(model, [loss_da]) model.AddLosses('loss_da') return loss_gradient else: return None
def add_fast_rcnn_outputs(model, blob_in, dim): model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.StopGradient('cls_score', 'cls_score') num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.FC(blob_in, 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) model.StopGradient('bbox_pred', 'bbox_pred')
def add_seg_outputs(model, blob_in, dim): if 'deeplab' in cfg.MRCNN.ROI_MASK_HEAD: return add_deeplab_outputs(model, blob_in, dim) num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 if cfg.MRCNN.USE_FC_OUTPUT: # Predict masks with a fully connected layer (ignore 'fcn' in the blob # name) blob_out = model.FC(blob_in, 'mask_fcn_logits', dim, num_cls * cfg.MRCNN.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = (cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill') blob_out = model.Conv(blob_in, 'mask_fcn_logits', dim, num_cls - 1, kernel=1, pad=0, stride=1, weight_init=(fill, { 'std': 0.001 }), bias_init=const_fill(0.0)) if cfg.MRCNN.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation('mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls, cfg.MRCNN.UPSAMPLE_RATIO) if not model.train: # == if test # blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') # Add BackGround predictions model.net.Split(blob_out, ['mask_fcn_logits_bg', 'mask_notuse'], split=[1, model.num_classes - 2], axis=1) model.net.Concat(['mask_fcn_logits_bg', blob_out], ['mask_fcn_logits_', 'mask_fcn_logits_concat_dims'], axis=1) blob_out = model.net.Sigmoid('mask_fcn_logits_', 'mask_fcn_probs') return blob_out
def add_mask_match_heads(model): # construct inputs model.net.Concat(['instances_data', 'person_mask'], ['matched_fake_masks', 'matched_fake_mask_shape'], axis=1) model.net.Concat(['instances_fake_data', 'person_mask'], ['unmatched_fake_masks', 'unmatched_fake_mask_shape'], axis=1) current, _ = model.net.Concat( ['matched_real_masks', 'matched_fake_masks', 'unmatched_fake_masks'], ['dnet_inputs', 'dnet_inputs_shape'], axis=0) dim = 5 hidden_dim = 64 for i in range(4): current = model.Conv(current, 'dnet_conv_fcn' + str(i + 1), dim, hidden_dim, 3, stride=1, pad=1, weight_init=(cfg.BODY_UV_RCNN.CONV_INIT, { 'std': 0.01 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) if i < 3: current = model.MaxPool(current, 'dnet_pool' + str(i + 1), kernel=2, stride=2) else: current = model.AveragePool(current, 'dnet_pool' + str(i + 1), kernel=7) dim = hidden_dim hidden_dim *= 2 dlogits = model.FC(current, 'dnet_logits', hidden_dim, 2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) model.net.Concat([ 'dnet_matched_labels', 'dnet_unmatched_labels', 'dnet_unmatched_labels' ], ['dnet_labels', 'dnet_labels_shape'], axis=0) dnet_prob, loss_dnet = model.net.SoftmaxWithLoss( ['dnet_logits', 'dnet_labels'], ['dnet_prob', 'loss_dnet'], scale=cfg.BODY_UV_RCNN.INDEX_WEIGHTS / cfg.NUM_GPUS) return dlogits, loss_dnet
def add_cluster_rcnn_outputs(model, blob_in, dim): """Add Cluster RoI classification and bounding box regression output ops.""" # cluster Box classification layer model.FC(blob_in, 'cluster_cls_score', dim, 2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cluster_cls_score', 'cluster_cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = 2 model.FC(blob_in, 'cluster_bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0))
def add_bpm_attr_outputs(model, blob_in, dim): prefix = 'attr' current = model.AveragePool(blob_in, 'attr_pool', global_pooling=True) current = model.FC(current, 'attr_fc', dim, cfg.REID.PSE_VIEW, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) add_bpm_outputs(model, blob_in, dim)
def _add_instance_level_classifier(model, blob_in, dim_in): from detectron.utils.c2 import const_fill from detectron.utils.c2 import gauss_fill dc_grl = model.net.NegateGradient(blob_in, 'dc_grl') model.StopGradient('dc_grl', 'dc_grl') dc_ip1 = model.FC(dc_grl, 'dc_ip1', dim_in, 1024, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) da_relu_1 = model.Relu(dc_ip1, 'dc_relu_1') da_drop_1 = model.Dropout(da_relu_1, 'da_drop_1', ratio=0.5, is_test=0) dc_ip2 = model.FC(da_drop_1, 'dc_ip2', 1024, 1024, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) da_relu_2 = model.Relu(dc_ip2, 'dc_relu_2') da_drop_2 = model.Dropout(da_relu_2, 'da_drop_2', ratio=0.5, is_test=0) dc_ip3 = model.FC(da_drop_2, 'dc_ip3', 1024, 1, weight_init=gauss_fill(0.05), bias_init=const_fill(0.0)) dc_loss = None if model.train: dc_loss = model.net.SigmoidCrossEntropyLoss([dc_ip3, 'dc_label'], 'loss_dc', scale=0.0 * model.GetLossScale()) dc_loss = blob_utils.get_loss_gradients(model, [dc_loss]) model.AddLosses('loss_dc') return dc_loss, dc_ip3
def add_mlp_outputs(model, blob_in, dim): """Add classification ops.""" model.FC(blob_in, 'logits', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('logits', 'cls_prob', engine='CUDNN')
def add_mask_rcnn_outputs(model, blob_in, dim): if cfg.MRCNN.DP_CASCADE_MASK_ON: return add_dp_cascaded_mask_outputs(model, blob_in, dim) if cfg.MRCNN.BBOX_CASCADE_MASK_ON: if cfg.MRCNN.USE_CLS_EMBS: return add_mask_emb_outputs(model, blob_in, dim) return add_cascaded_mask_outputs(model, blob_in, dim) """Add Mask R-CNN specific outputs: either mask logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 if cfg.MRCNN.USE_CLS_EMBS: return add_mask_emb_outputs(model, blob_in, dim) if cfg.MRCNN.USE_FC_OUTPUT: # Predict masks with a fully connected layer (ignore 'fcn' in the blob # name) blob_out = model.FC(blob_in, 'mask_fcn_logits', dim, num_cls * cfg.MRCNN.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = (cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill') blob_out = model.Conv(blob_in, 'mask_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, { 'std': 0.001 }), bias_init=const_fill(0.0)) if cfg.MRCNN.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation('mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls, cfg.MRCNN.UPSAMPLE_RATIO) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') return blob_out
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" # Box classification layer model.FC( blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if model.train and cfg.TRAIN.PADA: model.PADAbyGradientWeightingLayer('cls_score','pada_cls_score','source_labels_int32') if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Box regression layer num_bbox_reg_classes = ( 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes ) if model.train and cfg.TRAIN.PADA and not cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # The class-specific bbox predictors are independant of each other, so no pada weighting needed for this last layer. blob_in = model.PADAbyGradientWeightingLayer(blob_in, 'pada_weighted_feats', 'source_labels_int32') # blob_in = blob_weighted model.FC( blob_in, 'bbox_pred', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def add_mask_rcnn_outputs(model, blob_in, dim): """Add Mask R-CNN specific outputs: either mask logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 if cfg.MRCNN.USE_FC_OUTPUT: # Predict masks with a fully connected layer (ignore 'fcn' in the blob # name) dim_fc = int(dim * (cfg.MRCNN.RESOLUTION / cfg.MRCNN.UPSAMPLE_RATIO)**2) blob_out = model.FC( blob_in, 'mask_fcn_logits', dim_fc, num_cls * cfg.MRCNN.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = ( cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill' ) blob_out = model.Conv( blob_in, 'mask_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, {'std': 0.001}), bias_init=const_fill(0.0) ) if cfg.MRCNN.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation( 'mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls, cfg.MRCNN.UPSAMPLE_RATIO ) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') return blob_out
def add_wsl_oicr_outputs(model, blob_in, dim, prefix=''): K = 3 for k in range(1, K + 1): # Box classification layer model.FC(blob_in, prefix + 'cls_score' + str(k), dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability all_cls_prob = [] for k in range(1, K + 1): cls_prob = model.Softmax(prefix + 'cls_score' + str(k), prefix + 'cls_prob' + str(k), axis=1) all_cls_prob += [cls_prob] model.net.Mean(all_cls_prob, prefix + 'cls_prob')
def mask_rcnn_fcn_head_v1upXconvs_gn(model, blob_in, dim_in, spatial_scale, num_convs): """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm""" current = model.RoIFeatureTransform( blob_in, blob_out='_mask_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED split_i = 0 # to keep track of i for i in range(num_convs - 1): # default-> range(num_convs) # branches out from one layer before the last layer current = model.ConvGN(current, '_[mask]_fcn' + str(i + 1), dim_in, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, dilation=dilation, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner split_i = i + 1 # Splitting into branches # Branch 1 - FCN convfcn1 = model.ConvGN(current, '_[mask]_fcn' + str(split_i + 1), dim_inner, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) convfcn1_r = model.Relu(convfcn1, convfcn1) # upsample layer model.ConvTranspose(convfcn1_r, 'conv5_mask_fcn', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) blob_mask_fcn = model.Relu('conv5_mask_fcn', 'conv5_mask_fcn') # Branch 2 - fc + FCN convfc1 = model.ConvGN(current, '_[mask]_fc' + str(split_i + 1), dim_inner, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) convfc1_r = model.Relu(convfc1, convfc1) # Conv layer to reduce no. of channels to reduce computation convfc2 = model.ConvGN(convfc1_r, '_[mask]_fc' + str(split_i + 2), dim_inner, int(dim_inner / 2), group_gn=get_group_gn(int(dim_inner / 2)), kernel=3, pad=1, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) convfc2_r = model.Relu(convfc2, convfc2) # fc layer convfc3 = model.FC( convfc2_r, '_[mask]_fc' + str(split_i + 3), int(dim_inner / 2) * cfg.MRCNN.ROI_XFORM_RESOLUTION**2, # 128*14*14 4 * cfg.MRCNN.ROI_XFORM_RESOLUTION**2, # 4*14*14 = 28*28 weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) # Intentional error to stop code and read values in log #model.net.Reshape(3,a) # Reshape fc layer to add to FCN layer of the other branch # Note that this shape is different from the final FCN layer of the other branch model.net.Reshape( ['_[mask]_fc' + str(split_i + 3)], # [Input] ['_[mask]_fc_reshaped', '_[mask]_fc_old_shaped' + str(split_i + 3) ], # [Output, old_shape] shape=(-1, 1, cfg.MRCNN.ROI_XFORM_RESOLUTION * 2, cfg.MRCNN.ROI_XFORM_RESOLUTION * 2) # shape = (n,c,h,w) ) # Reshape with 1x1 conv to match shape of the final FCN layer of the other branch # This next step is not recommended, change it when you get a better idea in order to save memory. # TODO: Freeze this layer convfc_mask = model.Conv('_[mask]_fc_reshaped', '_[mask]_fc_bg_fg', 1, dim_inner, kernel=1, pad=0, stride=1, weight_init=const_fill(1.0), bias_init=const_fill(0.0)) blob_mask_fc = model.Relu('_[mask]_fc_bg_fg', '_[mask]_fc_bg_fg') # Adding the 2 branches blob_mask = model.net.Sum([blob_mask_fcn, blob_mask_fc], 'fc_fusion_mask') return blob_mask, dim_inner
def mask_rcnn_fcn_head_v1upXconvs(model, blob_in, dim_in, spatial_scale, num_convs): # Implemented fc fusion similar to PANet: https://arxiv.org/pdf/1803.01534.pdf # TODO: Modify config file to include option to implement fc_fusion # TODO: Add fc_fusion layers in a if condition. """v1upXconvs design: X * (conv 3x3), convT 2x2.""" current = model.RoIFeatureTransform( blob_in, blob_out='_[mask]_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED split_i = 0 # to keep track of i, may be redundant ## Printing out variables important for implementing fc fusion logger = logging.getLogger(__name__) logger.info('Implementing FC Fusion + Dilated Convolutions in Mask Branch') # Split branches from penultimate layer for i in range(num_convs - 1): current = model.Conv(current, '_[mask]_fcn' + str(i + 1), dim_in, dim_inner, kernel=3, dilation=dilation, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner split_i = i + 1 # Implementing FC Fusion # Splitting into 2 branches # First branch consists of FCN, the second branch as a fc layer along with FCN # Branch 1 - FCN # TODO: no dilation in branches convfcn1 = model.Conv( current, '_[mask]_fcn' + str(split_i + 1), dim_in, dim_inner, kernel=3, pad=1, # dilation=dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) convfcn1_r = model.Relu(convfcn1, convfcn1) # Upsample layer model.ConvTranspose(convfcn1_r, 'conv5_mask_fcn', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) blob_mask_fcn = model.Relu('conv5_mask_fcn', 'conv5_mask_fcn') # Branch 2 - fc + FCN convfc1 = model.Conv(current, '_[mask]_fc' + str(split_i + 1), dim_inner, dim_inner, kernel=3, pad=1 * dilation, dilation=dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) convfc1_r = model.Relu(convfc1, convfc1) # Conv layer to reduce no. of channels to reduce computation convfc2 = model.Conv( convfc1_r, '_[mask]_fc' + str(split_i + 2), dim_inner, int(dim_inner / 2), kernel=3, pad=1, #dilation=dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) convfc2_r = model.Relu(convfc2, convfc2) # fc layer convfc3 = model.FC( convfc2_r, '_[mask]_fc' + str(split_i + 3), int(dim_inner / 2) * cfg.MRCNN.ROI_XFORM_RESOLUTION**2, # 128*14*14 4 * cfg.MRCNN.ROI_XFORM_RESOLUTION**2, # 4*14*14 = 28*28 weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) # Reshape fc layer to add to FCN layer of the other branch # Note that this shape is different from the final FCN layer of the other branch model.net.Reshape( ['_[mask]_fc' + str(split_i + 3)], # [Input] ['_[mask]_fc_reshaped', '_[mask]_fc_old_shaped' + str(split_i + 3) ], # [Output, old_shape] shape=(-1, 1, cfg.MRCNN.ROI_XFORM_RESOLUTION * 2, cfg.MRCNN.ROI_XFORM_RESOLUTION * 2) # shape = (n,c,h,w) ) # Reshape with 1x1 conv to match shape of the final FCN layer of the other branch # This next step is not recommended, change it when you get a better idea in order to save memory. # TODO: Freeze this layer convfc_mask = model.Conv('_[mask]_fc_reshaped', '_[mask]_fc_bg_fg', 1, dim_inner, kernel=1, pad=0, stride=1, weight_init=const_fill(1.0), bias_init=const_fill(0.0)) blob_mask_fc = model.Relu('_[mask]_fc_bg_fg', '_[mask]_fc_bg_fg') # Adding the 2 branches blob_mask = model.net.Sum([blob_mask_fcn, blob_mask_fc], 'fc_fusion_mask') return blob_mask, dim_inner
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv(bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv(conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv(conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b') model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b') # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b') if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS) rpn_cls_probs_fpn = model.net.Sigmoid(rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl) model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc)
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): if dim_reduce is not None: # Optional dim reduction blob_in = model.Conv( blob_in, 'conv_dim_reduce', dim_in, dim_reduce, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) blob_in = model.Relu(blob_in, blob_in) dim_in = dim_reduce # Classification conv model.Conv( blob_in, 'conv_cls', dim_in, model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # # Bounding-box regression conv num_bbox_reg_classes = ( 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes ) model.Conv( blob_in, 'conv_bbox_pred', dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Classification PS RoI pooling model.net.PSRoIPool( ['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=model.num_classes, spatial_scale=spatial_scale ) model.AveragePool( 'psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE ) model.net.Reshape( 'cls_score_4d', ['cls_score', '_cls_scores_shape'], shape=(-1, cfg.MODEL.NUM_CLASSES) ) if not model.train: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Bbox regression PS RoI pooling model.net.PSRoIPool( ['conv_bbox_pred', 'rois'], ['psroipooled_bbox', '_mapping_channel_bbox'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale ) model.AveragePool( 'psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE )
def add_track_outputs(model, blob_in, dim): model.EnsureCPUOutput("track_n_rois", "track_n_rois_cpu") if model.train: model.Split(["track_ids_int32", "track_n_rois_cpu"], ["track_ids_one_int32", "track_ids_two_int32"], axis=0) model.GenerateTrackingLabels( ["track_ids_one_int32", "track_ids_two_int32"], "track_int32") model.Split([blob_in, "track_n_rois_cpu"], ["track_fc_one", "track_fc_two"], axis=0) repeat_outputs = ["track_fc_one_repeat"] if model.train: repeat_outputs.append("track_fc_one_repeat_lengths") model.Repeat(["track_fc_one", "track_n_rois_two"], repeat_outputs) # (n_pairs, mlp_dim) model.Tile(["track_fc_two", "track_n_rois_one"], "track_fc_two_tile", axis=0) # (n_pairs, mlp_dim) # Cosine tracking head architecture if cfg.TRCNN.OUTPUT == 'Cosine': model.CosineSimilarity(["track_fc_one_repeat", "track_fc_two_tile"], "track_cos_similarity") # (n_pairs,) blob_out = model.ExpandDims("track_cos_similarity", "track_similarity", dims=[0]) # (1, n_pairs) # MatchNet tracking head architecture elif cfg.TRCNN.OUTPUT == 'MatchNet': hidden_dim = cfg.TRCNN.MLP_HIDDEN_DIM model.Concat(["track_fc_one_repeat", "track_fc_two_tile"], "track_pairs") model.FC( "track_pairs", "track_pairs_fc1", 2 * dim, hidden_dim, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0), ) model.Relu("track_pairs_fc1", "track_pairs_fc1") model.FC( "track_pairs_fc1", "track_pairs_fc2", hidden_dim, hidden_dim, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0), ) model.Relu("track_pairs_fc2", "track_pairs_fc2") blob_out = model.FC( "track_pairs_fc2", "track_score", hidden_dim, 2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0), ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax("track_score", "track_prob", axis=1, engine='CUDNN') model.Slice("track_prob", "track_similarity_", starts=[0, 1], ends=[-1, -1]) blob_out = model.Transpose("track_similarity_", "track_similarity") return blob_out
def add_keypoint_outputs(model, blob_in, dim): """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.""" # NxKxHxW upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1) if cfg.KRCNN.USE_DECONV: # Apply ConvTranspose to the feature representation; results in 2x # upsampling blob_in = model.ConvTranspose( blob_in, 'kps_deconv', dim, cfg.KRCNN.DECONV_DIM, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu('kps_deconv', 'kps_deconv') dim = cfg.KRCNN.DECONV_DIM if upsample_heatmap: blob_name = 'kps_score_lowres' else: blob_name = 'kps_score' if cfg.KRCNN.USE_DECONV_OUTPUT: # Use ConvTranspose to predict heatmaps; results in 2x upsampling blob_out = model.ConvTranspose( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) else: # Use Conv to predict heatmaps; does no upsampling blob_out = model.Conv( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=1, pad=0, stride=1, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) if upsample_heatmap: # Increase heatmap output size via bilinear upsampling blob_out = model.BilinearInterpolation( blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE ) return blob_out
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b' ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b' ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b' ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) rpn_cls_probs_fpn = model.net.Sigmoid( rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl ) model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc )