Ejemplo n.º 1
0
def add_topdown_lateral_module(model, fpn_top, fpn_lateral, fpn_bottom,
                               dim_top, dim_lateral):
    """Add a top-down lateral module."""
    # Lateral 1x1 conv
    if cfg.FPN.USE_GN:
        # use GroupNorm
        lat = model.ConvGN(
            fpn_lateral,
            fpn_bottom + '_lateral',
            dim_in=dim_lateral,
            dim_out=dim_top,
            group_gn=get_group_gn(dim_top),
            kernel=1,
            pad=0,
            stride=1,
            weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else
                         ('XavierFill', {})),
            bias_init=const_fill(0.0))
    else:
        lat = model.Conv(
            fpn_lateral,
            fpn_bottom + '_lateral',
            dim_in=dim_lateral,
            dim_out=dim_top,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=(const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else
                         ('XavierFill', {})),
            bias_init=const_fill(0.0))
    # Top-down 2x upsampling
    td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2)
    # Sum lateral and top-down
    model.net.Sum([lat, td], fpn_bottom)
Ejemplo n.º 2
0
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale):
    """Add RPN outputs to a single scale model (i.e., no FPN)."""
    anchors = generate_anchors(stride=1. / spatial_scale,
                               sizes=cfg.RPN.SIZES,
                               aspect_ratios=cfg.RPN.ASPECT_RATIOS)
    num_anchors = anchors.shape[0]
    dim_out = dim_in
    # RPN hidden representation
    model.Conv(blob_in,
               'conv_rpn',
               dim_in,
               dim_out,
               kernel=3,
               pad=1,
               stride=1,
               weight_init=gauss_fill(0.01),
               bias_init=const_fill(0.0))
    model.Relu('conv_rpn', 'conv_rpn')
    # Proposal classification scores
    model.Conv('conv_rpn',
               'rpn_cls_logits',
               dim_in,
               num_anchors,
               kernel=1,
               pad=0,
               stride=1,
               weight_init=gauss_fill(0.01),
               bias_init=const_fill(0.0))
    # Proposal bbox regression deltas
    model.Conv('conv_rpn',
               'rpn_bbox_pred',
               dim_in,
               4 * num_anchors,
               kernel=1,
               pad=0,
               stride=1,
               weight_init=gauss_fill(0.01),
               bias_init=const_fill(0.0))

    if not model.train or cfg.MODEL.FASTER_RCNN:
        # Proposals are needed during:
        #  1) inference (== not model.train) for RPN only and Faster R-CNN
        #  OR
        #  2) training for Faster R-CNN
        # Otherwise (== training for RPN only), proposals are not needed
        model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs')
        model.GenerateProposals(['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'],
                                ['rpn_rois', 'rpn_roi_probs'],
                                anchors=anchors,
                                spatial_scale=spatial_scale)

    if cfg.MODEL.FASTER_RCNN:
        if model.train:
            # Add op that generates training labels for in-network RPN proposals
            model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info'])
        else:
            # Alias rois to rpn_rois for inference
            model.net.Alias('rpn_rois', 'rois')
Ejemplo n.º 3
0
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale):
    if dim_reduce is not None:
        # Optional dim reduction
        blob_in = model.Conv(blob_in,
                             'conv_dim_reduce',
                             dim_in,
                             dim_reduce,
                             kernel=1,
                             pad=0,
                             stride=1,
                             weight_init=gauss_fill(0.01),
                             bias_init=const_fill(0.0))
        blob_in = model.Relu(blob_in, blob_in)
        dim_in = dim_reduce
    # Classification conv
    model.Conv(blob_in,
               'conv_cls',
               dim_in,
               model.num_classes * cfg.RFCN.PS_GRID_SIZE**2,
               kernel=1,
               pad=0,
               stride=1,
               weight_init=gauss_fill(0.01),
               bias_init=const_fill(0.0))
    # # Bounding-box regression conv
    num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else
                            model.num_classes)
    model.Conv(blob_in,
               'conv_bbox_pred',
               dim_in,
               4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2,
               kernel=1,
               pad=0,
               stride=1,
               weight_init=gauss_fill(0.01),
               bias_init=const_fill(0.0))
    # Classification PS RoI pooling
    model.net.PSRoIPool(['conv_cls', 'rois'],
                        ['psroipooled_cls', '_mapping_channel_cls'],
                        group_size=cfg.RFCN.PS_GRID_SIZE,
                        output_dim=model.num_classes,
                        spatial_scale=spatial_scale)
    model.AveragePool('psroipooled_cls',
                      'cls_score_4d',
                      kernel=cfg.RFCN.PS_GRID_SIZE)
    model.net.Reshape('cls_score_4d', ['cls_score', '_cls_scores_shape'],
                      shape=(-1, cfg.MODEL.NUM_CLASSES))
    if not model.train:
        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
    # Bbox regression PS RoI pooling
    model.net.PSRoIPool(['conv_bbox_pred', 'rois'],
                        ['psroipooled_bbox', '_mapping_channel_bbox'],
                        group_size=cfg.RFCN.PS_GRID_SIZE,
                        output_dim=4 * num_bbox_reg_classes,
                        spatial_scale=spatial_scale)
    model.AveragePool('psroipooled_bbox',
                      'bbox_pred',
                      kernel=cfg.RFCN.PS_GRID_SIZE)
Ejemplo n.º 4
0
def add_fast_rcnn_outputs(model, blob_in, dim):
    """Add RoI classification and bounding box regression output ops."""
    model.FC(blob_in,
             'cls_score',
             dim,
             model.num_classes,
             weight_init=gauss_fill(0.01),
             bias_init=const_fill(0.0))
    if not model.train:  # == if test
        # Only add softmax when testing; during training the softmax is combined
        # with the label cross entropy loss for numerical stability
        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
    model.FC(blob_in,
             'bbox_pred',
             dim,
             model.num_classes * 4,
             weight_init=gauss_fill(0.001),
             bias_init=const_fill(0.0))

    if cfg.MODEL.ATTR and model.train:
        in_dim = dim
        if cfg.MODEL.CLS_EMBED:
            # first slice the fc7 feature
            model.net.SelectFG([blob_in, 'fg_idx'], 'fc7_fg')
            model.create_param(param_name='class_embedding',
                               initializer=initializers.Initializer(
                                   "GaussianFill", std=0.01),
                               shape=[model.num_classes, 256])
            # op that just takes the class index and returns the corresponding row
            model.net.Embed(['class_embedding', 'labels_int32_fg'], 'embed_fg')
            # then do concatenation
            model.net.Concat(['fc7_fg', 'embed_fg'],
                             ['concat_attr', 'concat_split'],
                             axis=1)
            in_dim += 256
        else:
            model.net.SelectFG([blob_in, 'fg_idx'], 'concat_attr')

        model.FC('concat_attr',
                 'fc_attr',
                 in_dim,
                 512,
                 weight_init=gauss_fill(0.01),
                 bias_init=const_fill(0.0))
        model.Relu('fc_attr', 'fc_attr')
        model.FC('fc_attr',
                 'attr_score',
                 512,
                 model.num_attributes,
                 weight_init=gauss_fill(0.01),
                 bias_init=const_fill(0.0))
Ejemplo n.º 5
0
def mask_rcnn_fcn_head_v0up(model, blob_in, dim_in, spatial_scale):
    """v0up design: conv5, deconv 2x2 (no weight sharing with the box head)."""
    blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(
        model,
        blob_in,
        dim_in,
        spatial_scale
    )

    dim_reduced = cfg.MRCNN.DIM_REDUCED

    model.ConvTranspose(
        blob_conv5,
        'conv5_mask',
        dim_conv5,
        dim_reduced,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=('GaussianFill', {'std': 0.001}),
        bias_init=const_fill(0.0)
    )
    blob_mask = model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_reduced
Ejemplo n.º 6
0
def mask_rcnn_fcn_head_v0up(model, blob_in, dim_in, spatial_scale):
    """v0up design: conv5, deconv 2x2 (no weight sharing with the box head)."""
    blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(
        model,
        blob_in,
        dim_in,
        spatial_scale
    )

    dim_reduced = cfg.MRCNN.DIM_REDUCED

    model.ConvTranspose(
        blob_conv5,
        'conv5_mask',
        dim_conv5,
        dim_reduced,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=('GaussianFill', {'std': 0.001}),
        bias_init=const_fill(0.0)
    )
    blob_mask = model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_reduced
Ejemplo n.º 7
0
def add_refine_net_mask_outputs(model, blob_in, dim_in):
    """ add Refine Net output
    blob_in: 'refine_mask_net_feat'
    blob_out: 'refined_mask_logits' or 'refined_mask_probs'
    """
    num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1

    # Use GaussianFill for class-agnostic mask prediction; fills based on
    # fan-in can be too large in this case and cause divergence
    fill = (cfg.MRCNN.CONV_INIT
            if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill')
    blob_out = model.Conv(blob_in,
                          'refined_mask_logits',
                          dim_in,
                          num_cls,
                          kernel=1,
                          pad=0,
                          stride=1,
                          weight_init=(fill, {
                              'std': 0.001
                          }),
                          bias_init=const_fill(0.0))

    if not model.train:  # == if test
        blob_out = model.net.Sigmoid(blob_out, 'refined_mask_probs')

    return blob_out
Ejemplo n.º 8
0
def fcn_head_v1up4convs(model, blob_in, dim_in, spatial_scale, num_convs=4):

    dilation = cfg.FCN.DILATION
    dim_inner = cfg.FCN.DIM_REDUCED

    for i in range(num_convs):
        current = model.Conv(
            blob_in,
            '_[mask]_fcn' + str(i + 1),
            dim_in,
            dim_inner,
            kernel=3,
            pad=1 * dilation,
            stride=1,
            weight_init=(cfg.FCN.CONV_INIT, {'std': 0.001}),
            bias_init=('ConstantFill', {'value': 0.})
        )
        current = model.Relu(current, current)
        dim_in = dim_inner

    # upsample layer
    model.ConvTranspose(
        current,
        'conv5_mask',
        dim_inner,
        dim_inner,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=(cfg.FCN.CONV_INIT, {'std': 0.001}),
        bias_init=const_fill(0.0)
    )
    blob_mask = model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_inner
Ejemplo n.º 9
0
def mask_rcnn_hourglass_head(model, blob_in, dim_in, spatial_scale):
    current = model.RoIFeatureTransform(
        blob_in,
        blob_out='_[mask]_roi_feat',
        blob_rois='mask_rois',
        method=cfg.MRCNN.ROI_XFORM_METHOD,
        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale)

    prefix = 'mask_head_hg'
    n = cfg.MRCNN.NUM_HG_MODULES
    current, dim_inner = Hourglass.add_hourglass_head(model, current,
                                                      'mask_head_hg_out',
                                                      dim_in, prefix, n)

    # upsample layer
    model.ConvTranspose(current,
                        'conv5_mask',
                        dim_inner,
                        dim_inner,
                        kernel=2,
                        pad=0,
                        stride=2,
                        weight_init=(cfg.MRCNN.CONV_INIT, {
                            'std': 0.001
                        }),
                        bias_init=const_fill(0.0))
    blob_mask = model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_inner
Ejemplo n.º 10
0
def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale):
    """Use a ResNet "conv5" / "stage5" head for mask prediction. Weights and
    computation are shared with the conv5 box head. Computation can only be
    shared during training, since inference is cascaded.

    v0upshare design: conv5, convT 2x2.
    """
    # Since box and mask head are shared, these must match
    assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION

    if model.train:  # share computation with bbox head at training time
        dim_conv5 = 2048
        blob_conv5 = model.net.SampleAs(['res5_2_sum', 'roi_has_mask_int32'],
                                        ['_[mask]_res5_2_sum_sliced'])
    else:  # re-compute at test time
        blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(
            model, blob_in, dim_in, spatial_scale)

    dim_reduced = cfg.MRCNN.DIM_REDUCED

    blob_mask = model.ConvTranspose(
        blob_conv5,
        'conv5_mask',
        dim_conv5,
        dim_reduced,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=(cfg.MRCNN.CONV_INIT, {
            'std': 0.001
        }),  # std only for gauss
        bias_init=const_fill(0.0))
    model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_reduced
Ejemplo n.º 11
0
def add_semantic_segms_outputs(model, blob_in, dim):
    """Add Mask R-CNN specific outputs: either mask logits or probs."""
    num_cls = cfg.MODEL.NUM_CLASSES

    # Predict mask using Conv

    # Use GaussianFill for class-agnostic mask prediction; fills based on
    # fan-in can be too large in this case and cause divergence
    fill = (cfg.MRCNN.CONV_INIT
            if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill')
    blob_out = model.Conv(blob_in,
                          'semantic_segms_fcn_logits',
                          dim,
                          num_cls,
                          kernel=1,
                          pad=0,
                          stride=1,
                          weight_init=(fill, {
                              'std': 0.001
                          }),
                          bias_init=const_fill(0.0))

    if not model.train:  # == if test
        blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs')

    return blob_out
Ejemplo n.º 12
0
def add_boundary_rcnn_outputs(model, blob_in, dim):
    """Add Mask R-CNN specific outputs: either boundary logits or probs."""
    num_cls = cfg.MODEL.NUM_CLASSES if cfg.BOUNDARY.CLS_SPECIFIC_MASK else 1

    if cfg.BOUNDARY.USE_FC_OUTPUT:
        # Predict boundarys with a fully connected layer (ignore 'fcn' in the blob
        # name)
        blob_out = model.FC(
            blob_in,
            'boundary_fcn_logits',
            dim,
            num_cls * cfg.BOUNDARY.RESOLUTION**2,
            weight_init=gauss_fill(0.001),
            bias_init=const_fill(0.0)
        )
    else:
        # Predict boundary using Conv

        # Use GaussianFill for class-agnostic boundary prediction; fills based on
        # fan-in can be too large in this case and cause divergence
        fill = (
            cfg.BOUNDARY.CONV_INIT
            if cfg.BOUNDARY.CLS_SPECIFIC_MASK else 'GaussianFill'
        )
        blob_out = model.Conv(
            blob_in,
            'boundary_fcn_logits',
            dim,
            num_cls,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=(fill, {'std': 0.001}),
            bias_init=const_fill(0.0)
        )

        if cfg.BOUNDARY.UPSAMPLE_RATIO > 1:
            blob_out = model.BilinearInterpolation(
                'boundary_fcn_logits', 'boundary_fcn_logits_up', num_cls, num_cls,
                cfg.BOUNDARY.UPSAMPLE_RATIO
            )

    if not model.train:  # == if test
        blob_out = model.net.Sigmoid(blob_out, 'boundary_fcn_probs')

    return blob_out
Ejemplo n.º 13
0
def add_mask_rcnn_outputs(model, blob_in, dim):
    """Add Mask R-CNN specific outputs: either mask logits or probs."""
    num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1

    if cfg.MRCNN.USE_FC_OUTPUT:
        # Predict masks with a fully connected layer (ignore 'fcn' in the blob
        # name)
        blob_out = model.FC(
            blob_in,
            'mask_fcn_logits',
            dim,
            num_cls * cfg.MRCNN.RESOLUTION**2,
            weight_init=gauss_fill(0.001),
            bias_init=const_fill(0.0)
        )
    else:
        # Predict mask using Conv

        # Use GaussianFill for class-agnostic mask prediction; fills based on
        # fan-in can be too large in this case and cause divergence
        fill = (
            cfg.MRCNN.CONV_INIT
            if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill'
        )
        blob_out = model.Conv(
            blob_in,
            'mask_fcn_logits',
            dim,
            num_cls,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=(fill, {'std': 0.001}),
            bias_init=const_fill(0.0)
        )

        if cfg.MRCNN.UPSAMPLE_RATIO > 1:
            blob_out = model.BilinearInterpolation(
                'mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls,
                cfg.MRCNN.UPSAMPLE_RATIO
            )

    if not model.train:  # == if test
        blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs')

    return blob_out
Ejemplo n.º 14
0
def add_fast_rcnn_outputs(model, blob_in, dim):
    """Add RoI classification and bounding box regression output ops."""
    model.FC(blob_in,
             'cls_score',
             dim,
             model.num_classes,
             weight_init=gauss_fill(0.01),
             bias_init=const_fill(0.0))
    if not model.train:  # == if test
        # Only add softmax when testing; during training the softmax is combined
        # with the label cross entropy loss for numerical stability
        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
    model.FC(blob_in,
             'bbox_pred',
             dim,
             model.num_classes * 4,
             weight_init=gauss_fill(0.001),
             bias_init=const_fill(0.0))
Ejemplo n.º 15
0
def add_refine_net_head_isolate(model, blob_in, dim_in, prefix):
    """
    Function that abstracts away different choices of fcn model.
    Note that the refine head is free of indicator type.
    """
    # note that prefix must be 'mask' or 'keypoint'
    assert prefix in {'mask', 'keypoint'}, \
        'prefix must be mask/keypoints'
    blob_out = 'refine_' + prefix + '_net_feat'
    if cfg.REFINENET.HEAD == 'HOURGLASS':
        n = cfg.REFINENET.NUM_HG_MODULES
        current, dim_inner = Hourglass.add_hourglass_head(
            model, blob_in, 'refined_hg_out', dim_in, prefix, n)
        # upsample layer
        model.ConvTranspose(current,
                            blob_out,
                            dim_inner,
                            dim_inner,
                            kernel=2,
                            pad=0,
                            stride=2,
                            weight_init=(cfg.MRCNN.CONV_INIT, {
                                'std': 0.001
                            }),
                            bias_init=const_fill(0.0))
        return blob_out, dim_inner
    elif cfg.REFINENET.HEAD == 'MRCNN_FCN':
        # Use similar heads as Mask head, but changed the names.
        # Note that this head occupies huge GPU memories(~7GB for batch 512).
        num_convs = cfg.REFINENET.MRCNN_FCN.NUM_CONVS
        use_deconv = cfg.REFINENET.MRCNN_FCN.USE_DECONV
        blob_out, dim_out = add_fcn_head(model, blob_in, blob_out, dim_in,
                                         prefix, num_convs, use_deconv)
        return blob_out, dim_out
    elif cfg.REFINENET.HEAD == 'RESNET_FCN':
        # Use resnet-like structures as the head, this should be memory
        # efficiency. (~ 1GB for batch 512)
        n_downsampling = cfg.REFINENET.RESNET_FCN.NUM_DOWNSAMPLING_LAYERS
        num_res_blocks = cfg.REFINENET.RESNET_FCN.NUM_RES_BLOCKS
        use_deconv = cfg.REFINENET.RESNET_FCN.USE_DECONV
        blob_out, dim_out = add_resnet_head(model, blob_in, blob_out, dim_in,
                                            prefix, n_downsampling,
                                            num_res_blocks, use_deconv)
        return blob_out, dim_out
    elif cfg.REFINENET.HEAD == 'KRCNN':
        # Use keypoint rcnn like head
        blob_out, dim_out = add_krcnn_head(model, blob_in, blob_out, dim_in,
                                           prefix)
        return blob_out, dim_out
    else:
        raise NotImplementedError('{} not supported'.format(
            cfg.REFINENET.HEAD))
Ejemplo n.º 16
0
def add_fast_rcnn_outputs(model, blob_in, dim):
    """Add RoI classification and bounding box regression output ops."""
    model.FC(
        blob_in,
        'cls_score',
        dim,
        model.num_classes,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    if not model.train:  # == if test
        # Only add softmax when testing; during training the softmax is combined
        # with the label cross entropy loss for numerical stability
        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
    model.FC(
        blob_in,
        'bbox_pred',
        dim,
        model.num_classes * 4,
        weight_init=gauss_fill(0.001),
        bias_init=const_fill(0.0)
    )
Ejemplo n.º 17
0
def add_topdown_lateral_module(
    model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral
):
    """Add a top-down lateral module."""
    # Lateral 1x1 conv
    lat = model.Conv(
        fpn_lateral,
        fpn_bottom + '_lateral',
        dim_in=dim_lateral,
        dim_out=dim_top,
        kernel=1,
        pad=0,
        stride=1,
        weight_init=(
            const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL
            else ('XavierFill', {})
        ),
        bias_init=const_fill(0.0)
    )
    # Top-down 2x upsampling
    td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2)
    # Sum lateral and top-down
    model.net.Sum([lat, td], fpn_bottom)
Ejemplo n.º 18
0
def add_prn_outputs(model, blob_in, dim):
    """Add RoI classification output ops."""
    blob_out = model.FC(blob_in,
                        'prn_logits',
                        dim,
                        model.num_classes,
                        weight_init=gauss_fill(0.01),
                        bias_init=const_fill(0.0))
    if not model.train:  # == if test
        # Only add sigmoid when testing; during training the sigmoid is
        # combined with the label cross entropy loss for numerical stability
        blob_out = model.net.Sigmoid('prn_logits', 'prn_probs', engine='CUDNN')

    return blob_out
Ejemplo n.º 19
0
def add_mlp_outputs(model, blob_in, dim):
    """Add  classification  ops."""

    model.FC(
        blob_in,
        'logits',
        dim,
        model.num_classes,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    if not model.train:  # == if test
        # Only add softmax when testing; during training the softmax is combined
        # with the label cross entropy loss for numerical stability
        model.Softmax('logits', 'cls_prob', engine='CUDNN')
Ejemplo n.º 20
0
def mask_rcnn_fcn_head_v1upXconvs_gn(model, blob_in, dim_in, spatial_scale,
                                     num_convs):
    """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm"""
    current = model.RoIFeatureTransform(
        blob_in,
        blob_out='_mask_roi_feat',
        blob_rois='mask_rois',
        method=cfg.MRCNN.ROI_XFORM_METHOD,
        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale)

    dilation = cfg.MRCNN.DILATION
    dim_inner = cfg.MRCNN.DIM_REDUCED

    for i in range(num_convs):
        current = model.ConvGN(current,
                               '_mask_fcn' + str(i + 1),
                               dim_in,
                               dim_inner,
                               group_gn=get_group_gn(dim_inner),
                               kernel=3,
                               pad=1 * dilation,
                               stride=1,
                               weight_init=(cfg.MRCNN.CONV_INIT, {
                                   'std': 0.001
                               }),
                               bias_init=('ConstantFill', {
                                   'value': 0.
                               }))
        current = model.Relu(current, current)
        dim_in = dim_inner

    # upsample layer
    model.ConvTranspose(current,
                        'conv5_mask',
                        dim_inner,
                        dim_inner,
                        kernel=2,
                        pad=0,
                        stride=2,
                        weight_init=(cfg.MRCNN.CONV_INIT, {
                            'std': 0.001
                        }),
                        bias_init=const_fill(0.0))
    blob_mask = model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_inner
Ejemplo n.º 21
0
def boundary_rcnn_fcn_head_v1upXconvs(
    model, blob_in, dim_in, spatial_scale, num_convs
):
    """v1upXconvs design: X * (conv 3x3), convT 2x2."""
    current = model.RoIFeatureTransform(
        blob_in,
        blob_out='_[boundary]_roi_feat',
        blob_rois='boundary_rois',
        method=cfg.BOUNDARY.ROI_XFORM_METHOD,
        resolution=cfg.BOUNDARY.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.BOUNDARY.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )

    dilation = cfg.BOUNDARY.DILATION
    dim_inner = cfg.BOUNDARY.DIM_REDUCED

    for i in range(num_convs):
        current = model.Conv(
            current,
            '_[boundary]_fcn' + str(i + 1),
            dim_in,
            dim_inner,
            kernel=3,
            pad=1 * dilation,
            stride=1,
            weight_init=(cfg.BOUNDARY.CONV_INIT, {'std': 0.001}),
            bias_init=('ConstantFill', {'value': 0.})
        )
        current = model.Relu(current, current)
        dim_in = dim_inner

    # upsample layer
    model.ConvTranspose(
        current,
        'conv5_boundary',
        dim_inner,
        dim_inner,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=(cfg.BOUNDARY.CONV_INIT, {'std': 0.001}),
        bias_init=const_fill(0.0)
    )
    blob_boundary = model.Relu('conv5_boundary', 'conv5_boundary')

    return blob_boundary, dim_inner
Ejemplo n.º 22
0
def mask_rcnn_fcn_head_v1upXconvs(
    model, blob_in, dim_in, spatial_scale, num_convs
):
    """v1upXconvs design: X * (conv 3x3), convT 2x2."""
    current = model.RoIFeatureTransform(
        blob_in,
        blob_out='_[mask]_roi_feat',
        blob_rois='mask_rois',
        method=cfg.MRCNN.ROI_XFORM_METHOD,
        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scale
    )

    dilation = cfg.MRCNN.DILATION
    dim_inner = cfg.MRCNN.DIM_REDUCED

    for i in range(num_convs):
        current = model.Conv(
            current,
            '_[mask]_fcn' + str(i + 1),
            dim_in,
            dim_inner,
            kernel=3,
            pad=1 * dilation,
            stride=1,
            weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
            bias_init=('ConstantFill', {'value': 0.})
        )
        current = model.Relu(current, current)
        dim_in = dim_inner

    # upsample layer
    model.ConvTranspose(
        current,
        'conv5_mask',
        dim_inner,
        dim_inner,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
        bias_init=const_fill(0.0)
    )
    blob_mask = model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_inner
Ejemplo n.º 23
0
def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale):
    """Use a ResNet "conv5" / "stage5" head for mask prediction. Weights and
    computation are shared with the conv5 box head. Computation can only be
    shared during training, since inference is cascaded.

    v0upshare design: conv5, convT 2x2.
    """
    # Since box and mask head are shared, these must match
    assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION

    if model.train:  # share computation with bbox head at training time
        dim_conv5 = 2048
        blob_conv5 = model.net.SampleAs(
            ['res5_2_sum', 'roi_has_mask_int32'],
            ['_[mask]_res5_2_sum_sliced']
        )
    else:  # re-compute at test time
        blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(
            model,
            blob_in,
            dim_in,
            spatial_scale
        )

    dim_reduced = cfg.MRCNN.DIM_REDUCED

    blob_mask = model.ConvTranspose(
        blob_conv5,
        'conv5_mask',
        dim_conv5,
        dim_reduced,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),  # std only for gauss
        bias_init=const_fill(0.0)
    )
    model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_reduced
Ejemplo n.º 24
0
def add_pan_bottom_up_path_lateral(model, pan_level_info):
    """Add PAN connections based on the model described in the PAN paper."""
    # PAN levels are built starting from the finest level of the FPN.
    # First we recurisvely constructing higher resolution FPN levels.
    # In details:
    # N2 = P2, 
    # N3 = Conv(Conv(N2, 3x3, s=2) + P3, 3x3, s=1)
    # N4 = Conv(Conv(N3, 3x3, s=2) + P4, 3x3, s=1)
    # N5 = Conv(Conv(N4, 3x3, s=2) + P5, 3x3, s=1)
    # It seems there is no higher level than N5 (i.e. P5) in PAN
    pan_dim = cfg.PAN.DIM
    xavier_fill = ('XavierFill', {})
    num_backbone_stages = (
        len(pan_level_info.blobs)# - (min_level - LOWEST_BACKBONE_LVL)
    )

    fpn_input_blobs = pan_level_info.blobs
    pan_blobs = [
        'pan_{}'.format(s)
        for s in pan_level_info.blobs
    ]
    spatial_scales = [
        sp
        for sp in pan_level_info.spatial_scales
    ]
    pan_dim_lateral = pan_level_info.dims

    # For the finest FPN level: N2 = P2 only seeds recursion
    pan_blobs[0] = pan_level_info.blobs[0]

    # For other levels add bottom-up path
    for i in range(num_backbone_stages - 1):
        # Buttom-up 3x3 subsample conv
        subsample = model.Conv(
            pan_blobs[i],
            pan_blobs[i] + '_sub',
            dim_in=pan_dim,
            dim_out=pan_dim_lateral[i],
            kernel=3,
            pad=1,
            stride=2,
            weight_init=xavier_fill,
            bias_init=const_fill(0.0)
        )
        model.Relu(subsample, subsample)
        # Sum lateral and buttom-up subsampled conv
        model.net.Sum([subsample, fpn_input_blobs[i + 1]], pan_blobs[i] + '_sum')

        # Post-hoc scale-specific 3x3 convs
        pan_blob = model.Conv(
            pan_blobs[i] + '_sum',
            pan_blobs[i + 1],
            dim_in=pan_dim_lateral[i],
            dim_out=pan_dim,
            kernel=3,
            pad=1,
            stride=1,
            weight_init=xavier_fill,
            bias_init=const_fill(0.0)
        )
        model.Relu(pan_blob, pan_blob)

    return pan_blobs, pan_dim, spatial_scales
Ejemplo n.º 25
0
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):
    """Add RPN on FPN specific outputs."""
    num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS)
    dim_out = dim_in

    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid
    assert len(blobs_in) == k_max - k_min + 1
    for lvl in range(k_min, k_max + 1):
        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
        sc = spatial_scales[k_max - lvl]  # in reversed order
        slvl = str(lvl)

        if lvl == k_min:
            # Create conv ops with randomly initialized weights and
            # zeroed biases for the first FPN level; these will be shared by
            # all other FPN levels
            # RPN hidden representation
            conv_rpn_fpn = model.Conv(
                bl_in,
                'conv_rpn_fpn' + slvl,
                dim_in,
                dim_out,
                kernel=3,
                pad=1,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.Conv(
                conv_rpn_fpn,
                'rpn_cls_logits_fpn' + slvl,
                dim_in,
                num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.Conv(
                conv_rpn_fpn,
                'rpn_bbox_pred_fpn' + slvl,
                dim_in,
                4 * num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
        else:
            # Share weights and biases
            sk_min = str(k_min)
            # RPN hidden representation
            conv_rpn_fpn = model.ConvShared(
                bl_in,
                'conv_rpn_fpn' + slvl,
                dim_in,
                dim_out,
                kernel=3,
                pad=1,
                stride=1,
                weight='conv_rpn_fpn' + sk_min + '_w',
                bias='conv_rpn_fpn' + sk_min + '_b'
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.ConvShared(
                conv_rpn_fpn,
                'rpn_cls_logits_fpn' + slvl,
                dim_in,
                num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight='rpn_cls_logits_fpn' + sk_min + '_w',
                bias='rpn_cls_logits_fpn' + sk_min + '_b'
            )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.ConvShared(
                conv_rpn_fpn,
                'rpn_bbox_pred_fpn' + slvl,
                dim_in,
                4 * num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight='rpn_bbox_pred_fpn' + sk_min + '_w',
                bias='rpn_bbox_pred_fpn' + sk_min + '_b'
            )

        if not model.train or cfg.MODEL.FASTER_RCNN:
            # Proposals are needed during:
            #  1) inference (== not model.train) for RPN only and Faster R-CNN
            #  OR
            #  2) training for Faster R-CNN
            # Otherwise (== training for RPN only), proposals are not needed
            lvl_anchors = generate_anchors(
                stride=2.**lvl,
                sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),
                aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS
            )
            rpn_cls_probs_fpn = model.net.Sigmoid(
                rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl
            )
            model.GenerateProposals(
                [UnscopeGPUName(rpn_cls_probs_fpn._name), UnscopeGPUName(rpn_bbox_pred_fpn._name), 'im_info'],
                ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],
                anchors=lvl_anchors,
                spatial_scale=sc
            )
Ejemplo n.º 26
0
def add_fpn(model, fpn_level_info):
    """Add FPN connections based on the model described in the FPN paper."""
    # FPN levels are built starting from the highest/coarest level of the
    # backbone (usually "conv5"). First we build down, recursively constructing
    # lower/finer resolution FPN levels. Then we build up, constructing levels
    # that are even higher/coarser than the starting level.
    fpn_dim = cfg.FPN.DIM
    min_level, max_level = get_min_max_levels()
    # Count the number of backbone stages that we will generate FPN levels for
    # starting from the coarest backbone stage (usually the "conv5"-like level)
    # E.g., if the backbone level info defines stages 4 stages: "conv5",
    # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4
    # backbone stages to add FPN to.
    num_backbone_stages = (
        len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL)
    )

    lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages]
    output_blobs = [
        'fpn_inner_{}'.format(s)
        for s in fpn_level_info.blobs[:num_backbone_stages]
    ]
    fpn_dim_lateral = fpn_level_info.dims
    xavier_fill = ('XavierFill', {})

    # For the coarsest backbone level: 1x1 conv only seeds recursion
    if cfg.FPN.USE_GN:
        # use GroupNorm
        c = model.ConvGN(
            lateral_input_blobs[0],
            output_blobs[0],  # note: this is a prefix
            dim_in=fpn_dim_lateral[0],
            dim_out=fpn_dim,
            group_gn=get_group_gn(fpn_dim),
            kernel=1,
            pad=0,
            stride=1,
            weight_init=xavier_fill,
            bias_init=const_fill(0.0)
        )
        output_blobs[0] = c  # rename it
    else:
        model.Conv(
            lateral_input_blobs[0],
            output_blobs[0],
            dim_in=fpn_dim_lateral[0],
            dim_out=fpn_dim,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=xavier_fill,
            bias_init=const_fill(0.0)
        )

    #
    # Step 1: recursively build down starting from the coarsest backbone level
    #

    # For other levels add top-down and lateral connections
    for i in range(num_backbone_stages - 1):
        add_topdown_lateral_module(
            model,
            output_blobs[i],             # top-down blob
            lateral_input_blobs[i + 1],  # lateral blob
            output_blobs[i + 1],         # next output blob
            fpn_dim,                     # output dimension
            fpn_dim_lateral[i + 1]       # lateral input dimension
        )

    # Post-hoc scale-specific 3x3 convs
    blobs_fpn = []
    spatial_scales = []
    for i in range(num_backbone_stages):
        if cfg.FPN.USE_GN:
            # use GroupNorm
            fpn_blob = model.ConvGN(
                output_blobs[i],
                'fpn_{}'.format(fpn_level_info.blobs[i]),
                dim_in=fpn_dim,
                dim_out=fpn_dim,
                group_gn=get_group_gn(fpn_dim),
                kernel=3,
                pad=1,
                stride=1,
                weight_init=xavier_fill,
                bias_init=const_fill(0.0)
            )
        else:
            fpn_blob = model.Conv(
                output_blobs[i],
                'fpn_{}'.format(fpn_level_info.blobs[i]),
                dim_in=fpn_dim,
                dim_out=fpn_dim,
                kernel=3,
                pad=1,
                stride=1,
                weight_init=xavier_fill,
                bias_init=const_fill(0.0)
            )
        blobs_fpn += [fpn_blob]
        spatial_scales += [fpn_level_info.spatial_scales[i]]

    #
    # Step 2: build up starting from the coarsest backbone level
    #

    # Check if we need the P6 feature map
    if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:
        # Original FPN P6 level implementation from our CVPR'17 FPN paper
        P6_blob_in = blobs_fpn[0]
        P6_name = P6_blob_in + '_subsampled_2x'
        # Use max pooling to simulate stride 2 subsampling
        P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2)
        blobs_fpn.insert(0, P6_blob)
        spatial_scales.insert(0, spatial_scales[0] * 0.5)

    # Coarser FPN levels introduced for RetinaNet
    if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:
        fpn_blob = fpn_level_info.blobs[0]
        dim_in = fpn_level_info.dims[0]
        for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):
            fpn_blob_in = fpn_blob
            if i > HIGHEST_BACKBONE_LVL + 1:
                fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu')
            fpn_blob = model.Conv(
                fpn_blob_in,
                'fpn_' + str(i),
                dim_in=dim_in,
                dim_out=fpn_dim,
                kernel=3,
                pad=1,
                stride=2,
                weight_init=xavier_fill,
                bias_init=const_fill(0.0)
            )
            dim_in = fpn_dim
            blobs_fpn.insert(0, fpn_blob)
            spatial_scales.insert(0, spatial_scales[0] * 0.5)

    return blobs_fpn, fpn_dim, spatial_scales
Ejemplo n.º 27
0
def add_semantic_segms_head(model, blob_in, dim_in):
    dilation = cfg.MRCNN.DILATION
    dim_inner = cfg.MRCNN.DIM_REDUCED

    num_convs = cfg.SEMANTIC_NET.NUM_CONVS
    use_deconv = cfg.SEMANTIC_NET.USE_DECONV

    current = blob_in
    for i in range(num_convs - 1):
        current = model.Conv(current,
                             'semantic_segms_fcn' + str(i + 1),
                             dim_in,
                             dim_inner,
                             kernel=3,
                             pad=1 * dilation,
                             stride=1,
                             weight_init=(cfg.MRCNN.CONV_INIT, {
                                 'std': 0.001
                             }),
                             bias_init=('ConstantFill', {
                                 'value': 0.
                             }))
        current = model.Relu(current, current)
        dim_in = dim_inner

    if use_deconv:
        current = model.Conv(current,
                             'semantic_segms_fcn' + str(num_convs),
                             dim_in,
                             dim_inner,
                             kernel=3,
                             pad=1 * dilation,
                             stride=1,
                             weight_init=(cfg.MRCNN.CONV_INIT, {
                                 'std': 0.001
                             }),
                             bias_init=('ConstantFill', {
                                 'value': 0.
                             }))
        # upsample layer
        current = model.ConvTranspose(current,
                                      'semantic_segms_feature',
                                      dim_inner,
                                      dim_inner,
                                      kernel=2,
                                      pad=0,
                                      stride=2,
                                      weight_init=(cfg.MRCNN.CONV_INIT, {
                                          'std': 0.001
                                      }),
                                      bias_init=const_fill(0.0))
    else:
        current = model.Conv(current,
                             'semantic_segms_feature',
                             dim_in,
                             dim_inner,
                             kernel=3,
                             pad=1 * dilation,
                             stride=1,
                             weight_init=(cfg.MRCNN.CONV_INIT, {
                                 'std': 0.001
                             }),
                             bias_init=('ConstantFill', {
                                 'value': 0.
                             }))

    blob_mask = model.Relu(current, current)
    return blob_mask, dim_inner
Ejemplo n.º 28
0
def add_keypoint_outputs(model, blob_in, dim):
    """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps."""
    # NxKxHxW
    upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1)

    if cfg.KRCNN.USE_DECONV:
        # Apply ConvTranspose to the feature representation; results in 2x
        # upsampling
        blob_in = model.ConvTranspose(
            blob_in,
            'kps_deconv',
            dim,
            cfg.KRCNN.DECONV_DIM,
            kernel=cfg.KRCNN.DECONV_KERNEL,
            pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),
            stride=2,
            weight_init=gauss_fill(0.01),
            bias_init=const_fill(0.0)
        )
        model.Relu('kps_deconv', 'kps_deconv')
        dim = cfg.KRCNN.DECONV_DIM

    if upsample_heatmap:
        blob_name = 'kps_score_lowres'
    else:
        blob_name = 'kps_score'

    if cfg.KRCNN.USE_DECONV_OUTPUT:
        # Use ConvTranspose to predict heatmaps; results in 2x upsampling
        blob_out = model.ConvTranspose(
            blob_in,
            blob_name,
            dim,
            cfg.KRCNN.NUM_KEYPOINTS,
            kernel=cfg.KRCNN.DECONV_KERNEL,
            pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),
            stride=2,
            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}),
            bias_init=const_fill(0.0)
        )
    else:
        # Use Conv to predict heatmaps; does no upsampling
        blob_out = model.Conv(
            blob_in,
            blob_name,
            dim,
            cfg.KRCNN.NUM_KEYPOINTS,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}),
            bias_init=const_fill(0.0)
        )

    if upsample_heatmap:
        # Increase heatmap output size via bilinear upsampling
        blob_out = model.BilinearInterpolation(
            blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS,
            cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE
        )

    return blob_out
Ejemplo n.º 29
0
def add_keypoint_outputs(model, blob_in, dim):
    """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps."""
    # NxKxHxW
    upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1)

    if cfg.KRCNN.USE_DECONV:
        # Apply ConvTranspose to the feature representation; results in 2x
        # upsampling
        blob_in = model.ConvTranspose(blob_in,
                                      'kps_deconv',
                                      dim,
                                      cfg.KRCNN.DECONV_DIM,
                                      kernel=cfg.KRCNN.DECONV_KERNEL,
                                      pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),
                                      stride=2,
                                      weight_init=gauss_fill(0.01),
                                      bias_init=const_fill(0.0))
        model.Relu('kps_deconv', 'kps_deconv')
        dim = cfg.KRCNN.DECONV_DIM

    if upsample_heatmap:
        blob_name = 'kps_score_lowres'
    else:
        blob_name = 'kps_score'

    if cfg.KRCNN.USE_DECONV_OUTPUT:
        # Use ConvTranspose to predict heatmaps; results in 2x upsampling
        blob_out = model.ConvTranspose(blob_in,
                                       blob_name,
                                       dim,
                                       cfg.KRCNN.NUM_KEYPOINTS,
                                       kernel=cfg.KRCNN.DECONV_KERNEL,
                                       pad=int(cfg.KRCNN.DECONV_KERNEL / 2 -
                                               1),
                                       stride=2,
                                       weight_init=(cfg.KRCNN.CONV_INIT, {
                                           'std': 0.001
                                       }),
                                       bias_init=const_fill(0.0))
    else:
        # Use Conv to predict heatmaps; does no upsampling
        blob_out = model.Conv(blob_in,
                              blob_name,
                              dim,
                              cfg.KRCNN.NUM_KEYPOINTS,
                              kernel=1,
                              pad=0,
                              stride=1,
                              weight_init=(cfg.KRCNN.CONV_INIT, {
                                  'std': 0.001
                              }),
                              bias_init=const_fill(0.0))

    if upsample_heatmap:
        # Increase heatmap output size via bilinear upsampling
        blob_out = model.BilinearInterpolation(blob_out, 'kps_score',
                                               cfg.KRCNN.NUM_KEYPOINTS,
                                               cfg.KRCNN.NUM_KEYPOINTS,
                                               cfg.KRCNN.UP_SCALE)

    return blob_out
Ejemplo n.º 30
0
def add_fcn_head(model, blob_in, blob_out, dim_in, prefix, num_convs,
                 use_deconv):

    dilation = cfg.MRCNN.DILATION
    dim_inner = cfg.MRCNN.DIM_REDUCED

    current = blob_in
    for i in range(num_convs - 1):
        current = model.Conv(current,
                             prefix + '_[refined_mask]_fcn' + str(i + 1),
                             dim_in,
                             dim_inner,
                             kernel=3,
                             pad=1 * dilation,
                             stride=1,
                             weight_init=(cfg.MRCNN.CONV_INIT, {
                                 'std': 0.001
                             }),
                             bias_init=('ConstantFill', {
                                 'value': 0.
                             }))
        current = model.Relu(current, current)
        dim_in = dim_inner

    if use_deconv:
        current = model.Conv(current,
                             prefix + '_[refined_mask]_fcn' + str(num_convs),
                             dim_in,
                             dim_inner,
                             kernel=3,
                             pad=1 * dilation,
                             stride=1,
                             weight_init=(cfg.MRCNN.CONV_INIT, {
                                 'std': 0.001
                             }),
                             bias_init=('ConstantFill', {
                                 'value': 0.
                             }))
        current = model.Relu(current, current)
        dim_in = dim_inner

        model.ConvTranspose(current,
                            blob_out,
                            dim_in,
                            dim_inner,
                            kernel=2,
                            pad=0,
                            stride=2,
                            weight_init=(cfg.MRCNN.CONV_INIT, {
                                'std': 0.001
                            }),
                            bias_init=const_fill(0.0))
    else:
        model.Conv(current,
                   blob_out,
                   dim_in,
                   dim_inner,
                   kernel=3,
                   pad=1 * dilation,
                   stride=1,
                   weight_init=(cfg.MRCNN.CONV_INIT, {
                       'std': 0.001
                   }),
                   bias_init=('ConstantFill', {
                       'value': 0.
                   }))

    blob_out = model.Relu(blob_out, blob_out)
    dim_out = dim_inner

    return blob_out, dim_out
Ejemplo n.º 31
0
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale):
    if dim_reduce is not None:
        # Optional dim reduction
        blob_in = model.Conv(
            blob_in,
            'conv_dim_reduce',
            dim_in,
            dim_reduce,
            kernel=1,
            pad=0,
            stride=1,
            weight_init=gauss_fill(0.01),
            bias_init=const_fill(0.0)
        )
        blob_in = model.Relu(blob_in, blob_in)
        dim_in = dim_reduce
    # Classification conv
    model.Conv(
        blob_in,
        'conv_cls',
        dim_in,
        model.num_classes * cfg.RFCN.PS_GRID_SIZE**2,
        kernel=1,
        pad=0,
        stride=1,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    # # Bounding-box regression conv
    num_bbox_reg_classes = (
        2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes
    )
    model.Conv(
        blob_in,
        'conv_bbox_pred',
        dim_in,
        4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2,
        kernel=1,
        pad=0,
        stride=1,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    # Classification PS RoI pooling
    model.net.PSRoIPool(
        ['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'],
        group_size=cfg.RFCN.PS_GRID_SIZE,
        output_dim=model.num_classes,
        spatial_scale=spatial_scale
    )
    model.AveragePool(
        'psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE
    )
    model.net.Reshape(
        'cls_score_4d', ['cls_score', '_cls_scores_shape'],
        shape=(-1, cfg.MODEL.NUM_CLASSES)
    )
    if not model.train:
        model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
    # Bbox regression PS RoI pooling
    model.net.PSRoIPool(
        ['conv_bbox_pred', 'rois'],
        ['psroipooled_bbox', '_mapping_channel_bbox'],
        group_size=cfg.RFCN.PS_GRID_SIZE,
        output_dim=4 * num_bbox_reg_classes,
        spatial_scale=spatial_scale
    )
    model.AveragePool(
        'psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE
    )
Ejemplo n.º 32
0
def add_fpn(model, fpn_level_info):
    """Add FPN connections based on the model described in the FPN paper."""
    # FPN levels are built starting from the highest/coarest level of the
    # backbone (usually "conv5"). First we build down, recursively constructing
    # lower/finer resolution FPN levels. Then we build up, constructing levels
    # that are even higher/coarser than the starting level.
    fpn_dim = cfg.FPN.DIM
    min_level, max_level = get_min_max_levels()
    # Count the number of backbone stages that we will generate FPN levels for
    # starting from the coarest backbone stage (usually the "conv5"-like level)
    # E.g., if the backbone level info defines stages 4 stages: "conv5",
    # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4
    # backbone stages to add FPN to.
    num_backbone_stages = (
        len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL)
    )

    lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages]
    output_blobs = [
        'fpn_inner_{}'.format(s)
        for s in fpn_level_info.blobs[:num_backbone_stages]
    ]
    fpn_dim_lateral = fpn_level_info.dims
    xavier_fill = ('XavierFill', {})

    # For the coarest backbone level: 1x1 conv only seeds recursion
    model.Conv(
        lateral_input_blobs[0],
        output_blobs[0],
        dim_in=fpn_dim_lateral[0],
        dim_out=fpn_dim,
        kernel=1,
        pad=0,
        stride=1,
        weight_init=xavier_fill,
        bias_init=const_fill(0.0)
    )

    #
    # Step 1: recursively build down starting from the coarsest backbone level
    #

    # For other levels add top-down and lateral connections
    for i in range(num_backbone_stages - 1):
        add_topdown_lateral_module(
            model,
            output_blobs[i],             # top-down blob
            lateral_input_blobs[i + 1],  # lateral blob
            output_blobs[i + 1],         # next output blob
            fpn_dim,                     # output dimension
            fpn_dim_lateral[i + 1]       # lateral input dimension
        )

    # Post-hoc scale-specific 3x3 convs
    blobs_fpn = []
    spatial_scales = []
    for i in range(num_backbone_stages):
        fpn_blob = model.Conv(
            output_blobs[i],
            'fpn_{}'.format(fpn_level_info.blobs[i]),
            dim_in=fpn_dim,
            dim_out=fpn_dim,
            kernel=3,
            pad=1,
            stride=1,
            weight_init=xavier_fill,
            bias_init=const_fill(0.0)
        )
        blobs_fpn += [fpn_blob]
        spatial_scales += [fpn_level_info.spatial_scales[i]]

    #
    # Step 2: build up starting from the coarsest backbone level
    #

    # Check if we need the P6 feature map
    if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:
        # Original FPN P6 level implementation from our CVPR'17 FPN paper
        P6_blob_in = blobs_fpn[0]
        P6_name = P6_blob_in + '_subsampled_2x'
        # Use max pooling to simulate stride 2 subsampling
        P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2)
        blobs_fpn.insert(0, P6_blob)
        spatial_scales.insert(0, spatial_scales[0] * 0.5)

    # Coarser FPN levels introduced for RetinaNet
    if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:
        fpn_blob = fpn_level_info.blobs[0]
        dim_in = fpn_level_info.dims[0]
        for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):
            fpn_blob_in = fpn_blob
            if i > HIGHEST_BACKBONE_LVL + 1:
                fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu')
            fpn_blob = model.Conv(
                fpn_blob_in,
                'fpn_' + str(i),
                dim_in=dim_in,
                dim_out=fpn_dim,
                kernel=3,
                pad=1,
                stride=2,
                weight_init=xavier_fill,
                bias_init=const_fill(0.0)
            )
            dim_in = fpn_dim
            blobs_fpn.insert(0, fpn_blob)
            spatial_scales.insert(0, spatial_scales[0] * 0.5)

    return blobs_fpn, fpn_dim, spatial_scales
Ejemplo n.º 33
0
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale):
    """Add RPN outputs to a single scale model (i.e., no FPN)."""
    anchors = generate_anchors(
        stride=1. / spatial_scale,
        sizes=cfg.RPN.SIZES,
        aspect_ratios=cfg.RPN.ASPECT_RATIOS
    )
    num_anchors = anchors.shape[0]
    dim_out = dim_in
    # RPN hidden representation
    model.Conv(
        blob_in,
        'conv_rpn',
        dim_in,
        dim_out,
        kernel=3,
        pad=1,
        stride=1,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    model.Relu('conv_rpn', 'conv_rpn')
    # Proposal classification scores
    model.Conv(
        'conv_rpn',
        'rpn_cls_logits',
        dim_in,
        num_anchors,
        kernel=1,
        pad=0,
        stride=1,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )
    # Proposal bbox regression deltas
    model.Conv(
        'conv_rpn',
        'rpn_bbox_pred',
        dim_in,
        4 * num_anchors,
        kernel=1,
        pad=0,
        stride=1,
        weight_init=gauss_fill(0.01),
        bias_init=const_fill(0.0)
    )

    if not model.train or cfg.MODEL.FASTER_RCNN:
        # Proposals are needed during:
        #  1) inference (== not model.train) for RPN only and Faster R-CNN
        #  OR
        #  2) training for Faster R-CNN
        # Otherwise (== training for RPN only), proposals are not needed
        model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs')
        model.GenerateProposals(
            ['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'],
            ['rpn_rois', 'rpn_roi_probs'],
            anchors=anchors,
            spatial_scale=spatial_scale
        )

    if cfg.MODEL.FASTER_RCNN:
        if model.train:
            # Add op that generates training labels for in-network RPN proposals
            model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info'])
        else:
            # Alias rois to rpn_rois for inference
            model.net.Alias('rpn_rois', 'rois')
Ejemplo n.º 34
0
def add_mask_rcnn_outputs(model, blob_in, dim):
    """Add Mask R-CNN specific outputs: either mask logits or probs."""
    dim_out = 1

    if cfg.MRCNN.BBOX2MASK.BBOX2MASK_ON:
        # Use weight transfer function iff BBOX2MASK_ON is True
        # Decide the input to the of weight transfer function
        #   - Case 1) From a pre-trained embedding vector (e.g. GloVe)
        #   - Case 2) From the detection weights in the box head
        if cfg.MRCNN.BBOX2MASK.USE_PRETRAINED_EMBED:
            # Case 1) From a pre-trained embedding vector (e.g. GloVe)
            class_embed = cfg.MRCNN.BBOX2MASK.PRETRAINED_EMBED_NAME
            class_embed_dim = cfg.MRCNN.BBOX2MASK.PRETRAINED_EMBED_DIM
            # This parameter is meant to be initialized from a pretrained model
            # instead of learned from scratch. Hence, the default init is HUGE
            # to cause NaN loss so that the error will not pass silently.
            model.AddParameter(model.param_init_net.GaussianFill(
                [], class_embed, shape=[dim_out, class_embed_dim], std=1e12))
            # Pretrained embedding should be fixed during training (it doesn't
            # make sense to update them)
            model.StopGradient(class_embed, class_embed + '_no_grad')
            class_embed = class_embed + '_no_grad'
        else:
            # Case 2) From the detection weights in the box head
            #   - Subcase a) using cls+box
            #   - Subcase b) using cls
            #   - Subcase c) using box
            # where 'cls' is RoI classification weights 'cls_score_w'
            # and 'box' is bounding box regression weights 'bbox_pred_w'
            if (cfg.MRCNN.BBOX2MASK.INCLUDE_CLS_SCORE and
                    cfg.MRCNN.BBOX2MASK.INCLUDE_BBOX_PRED):
                # Subcase a) using cls+box
                concat_cls_score_bbox_pred(model)
                class_embed = 'cls_score_bbox_pred'
                class_embed_dim = 1024 + 4096
            elif cfg.MRCNN.BBOX2MASK.INCLUDE_CLS_SCORE:
                # Subcase b) using cls
                class_embed = 'cls_score_w'
                class_embed_dim = 1024
            elif cfg.MRCNN.BBOX2MASK.INCLUDE_BBOX_PRED:
                # Subcase c) using box; 'bbox_pred_w' need to be flattened
                model.net.Reshape(
                    'bbox_pred_w', ['bbox_pred_w_flat', '_bbox_pred_w_oldshape'],
                    shape=(model.num_classes, -1))
                class_embed = 'bbox_pred_w_flat'
                class_embed_dim = 4096
            else:
                raise ValueError(
                    'At least one of cfg.MRCNN.BBOX2MASK.INCLUDE_CLS_SCORE and '
                    'cfg.MRCNN.BBOX2MASK.INCLUDE_BBOX_PRED needs to be True')
            # Stop the mask gradient to the detection weights if specified
            if cfg.MRCNN.BBOX2MASK.STOP_DET_W_GRAD:
                model.StopGradient(class_embed, class_embed + '_no_grad')
                class_embed = class_embed + '_no_grad'



        # Use weights transfer function to predict mask weights
        mask_w = bbox2mask_weight_transfer(
            model, class_embed, dim_in=class_embed_dim, dim_h=dim, dim_out=dim)
        # Mask prediction with predicted mask weights (no bias term)
        fcn_branch = model.net.Conv(
            [blob_in, mask_w], 'mask_fcn_logits', kernel=1, pad=0, stride=1)
    else:
        # Predict mask using Conv
        # Use GaussianFill for class-agnostic mask prediction; fills based on
        # fan-in can be too large in this case and cause divergence
        # If using class-agnostic mask, scale down init to avoid NaN loss
        init_filler = (
            cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill')
            
        fcn_branch = model.Conv(
            blob_in, 
            'mask_fcn_logits', 
            dim, 
            dim_out, 
            kernel=1, 
            pad=0, 
            stride=1,
            weight_init=(init_filler, {'std': 0.001}),
            bias_init=const_fill(0.0))

    # Add a complementary MLP branch if specified
    if cfg.MRCNN.JOINT_FCN_MLP_HEAD:
        # Use class-agnostic MLP branch, and class-aware FCN branch
        mlp_branch = cls_agnostic_mlp_branch(
            model, blob_in, dim_in=dim * cfg.MRCNN.RESOLUTION**2, num_cls=dim_out)
        blob_out = model.net.Add([mlp_branch, fcn_branch], 'mask_logits')
    elif not cfg.MRCNN.USE_FC_OUTPUT:
        blob_out = fcn_branch

    if not model.train:  # == if test
        blob_out = model.Softmax(blob_out, 'mask_fcn_probs')

    return blob_out
Ejemplo n.º 35
0
        def add_rfcn_heads(blob_in,rois,spatial_scale,num_bbox_reg_classes,lvl):
            # # Bounding-box regression conv
            # Classification PS RoI pooling
            if lvl==0:
                conv_cls=self.Conv(
                blob_in,
                'conv_cls_{}'.format(lvl),
                dim_in,
                self.num_classes * cfg.RFCN.PS_GRID_SIZE**2,
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
                )
                conv_bbox_pred=self.Conv(
                    blob_in,
                    'conv_bbox_pred_{}'.format(lvl),
                    dim_in,
                    4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2,
                    kernel=1,
                    pad=0,
                    stride=1,
                    weight_init=gauss_fill(0.01),
                    bias_init=const_fill(0.0)
                )
            else:
                conv_cls=self.ConvShared(
                    blob_in,
                    'conv_cls_{}'.format(lvl),
                    dim_in,
                    self.num_classes * cfg.RFCN.PS_GRID_SIZE**2,
                    kernel=1,
                    pad=0,
                    stride=1,
                    weight='conv_cls_0_w',
                    bias='conv_cls_0_b' 
                )
                conv_bbox_pred=self.ConvShared(
                    blob_in,
                    'conv_bbox_pred_{}'.format(lvl),
                    dim_in,
                    4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2,
                    kernel=1,
                    pad=0,
                    stride=1,
                    weight='conv_bbox_pred_0_w',
                    bias='conv_bbox_pred_0_b' 
                )

            self.net.PSRoIPool(
                [conv_cls, rois], ['psroipooled_cls'+str(lvl), '_mapping_channel_cls'+str(lvl)],
                group_size=cfg.RFCN.PS_GRID_SIZE,
                output_dim=self.num_classes,
                spatial_scale=spatial_scale
            )
            self.AveragePool(
                'psroipooled_cls'+str(lvl), 'cls_score_4d'+str(lvl), kernel=cfg.RFCN.PS_GRID_SIZE
            )
            cls_blob_out,_=self.net.Reshape(
                'cls_score_4d'+str(lvl), ['cls_score'+str(lvl), '_cls_scores_shape'+str(lvl)],
                shape=(-1, cfg.MODEL.NUM_CLASSES)
            )
            if not self.train:
                self.Softmax('cls_score'+str(lvl), 'cls_prob'+str(lvl), engine='CUDNN')
            # Bbox regression PS RoI pooling
            self.net.PSRoIPool(
                [conv_bbox_pred, rois],
                ['psroipooled_bbox'+str(lvl), '_mapping_channel_bbox'+str(lvl)],
                group_size=cfg.RFCN.PS_GRID_SIZE,
                output_dim=4 * num_bbox_reg_classes,
                spatial_scale=spatial_scale
            )
            bbox_blob_out=self.AveragePool(
                'psroipooled_bbox'+str(lvl), 'bbox_pred'+str(lvl), kernel=cfg.RFCN.PS_GRID_SIZE
            )
            return cls_blob_out,bbox_blob_out
Ejemplo n.º 36
0
def add_resnet_head(model, blob_in, blob_out, dim_in, prefix, n_downsampling,
                    num_res_blocks, use_deconv):
    dilation = cfg.REFINENET.RESNET_FCN.DILATION
    dim_inner = cfg.REFINENET.RESNET_FCN.DIM_REDUCED

    current = blob_in

    # Downsampling
    for i in range(n_downsampling):
        if i > 0:
            dim_inner *= 2
        current = model.Conv(current,
                             prefix + '_[refined]_resnet_down' + str(i + 1),
                             dim_in,
                             dim_inner,
                             kernel=3,
                             pad=1 * dilation,
                             stride=2,
                             weight_init=(cfg.MRCNN.CONV_INIT, {
                                 'std': 0.001
                             }),
                             bias_init=('ConstantFill', {
                                 'value': 0.
                             }))
        current = model.Relu(current, current)
        dim_in = dim_inner

    # residual blocks
    for i in range(num_res_blocks):
        current = add_residual_block(model,
                                     prefix + '_[refined]_resnet_res' +
                                     str(i + 1),
                                     current,
                                     dim_in=dim_in,
                                     dim_out=dim_inner,
                                     dim_inner=dim_inner,
                                     dilation=dilation,
                                     inplace_sum=True)
        dim_in = dim_inner

    # Upsampling
    for i in range(n_downsampling):
        if i < n_downsampling - 1:
            dim_inner = int(dim_inner / 2)
        current = model.ConvTranspose(current,
                                      prefix + '_[refined]_resnet_up' +
                                      str(n_downsampling - i),
                                      dim_in=dim_in,
                                      dim_out=dim_inner,
                                      kernel=2,
                                      pad=0,
                                      out_pad=0,
                                      stride=2,
                                      weight_init=(cfg.MRCNN.CONV_INIT, {
                                          'std': 0.001
                                      }),
                                      bias_init=const_fill(0.0))
        current = brew.spatial_bn(model,
                                  current,
                                  current + '_bn',
                                  dim_inner,
                                  is_test=not model.train)
        current = model.Relu(current, current)

        dim_in = dim_inner

    if use_deconv:
        current = model.Conv(current,
                             prefix + '_[refined]_resnet_conv' + str(1),
                             dim_in,
                             dim_inner,
                             kernel=3,
                             pad=1 * dilation,
                             stride=1,
                             weight_init=(cfg.MRCNN.CONV_INIT, {
                                 'std': 0.001
                             }),
                             bias_init=('ConstantFill', {
                                 'value': 0.
                             }))
        current = model.Relu(current, current)
        dim_in = dim_inner

        model.ConvTranspose(current,
                            blob_out,
                            dim_in,
                            dim_inner,
                            kernel=2,
                            pad=0,
                            stride=2,
                            weight_init=(cfg.MRCNN.CONV_INIT, {
                                'std': 0.001
                            }),
                            bias_init=const_fill(0.0))
    else:
        model.Conv(current,
                   blob_out,
                   dim_in,
                   dim_inner,
                   kernel=3,
                   pad=1 * dilation,
                   stride=1,
                   weight_init=(cfg.MRCNN.CONV_INIT, {
                       'std': 0.001
                   }),
                   bias_init=('ConstantFill', {
                       'value': 0.
                   }))

    blob_out = model.Relu(blob_out, blob_out)
    dim_out = dim_inner

    return blob_out, dim_out
Ejemplo n.º 37
0
def adaptive_pooling_mask_head_v1upXconvs(model, blobs_pan, dim_pan, spatial_scales_pan, num_convs):
    """Fuse all PAN extra lateral level using a adaptive pooling"""
    # Fusion method is indicated in cfg.PAN.FUSION_METHOD
    assert cfg.MODEL.MASK_ON, "MODEL.MASK_ON = False, can not use PAN mask head"
    assert cfg.PAN.MASK_ON, "PAN.MASK_ON = False, can not use PAN mask head"

    pan_level_info = PAN_LEVEL_INFO().val()
    # If BottomUp_ON, adaptive pooling on pan level
    # otherwise adaptive pooling on fpn level
    if cfg.PAN.BottomUp_ON:
        perfix = 'pan_'
    else:
        perfix = ''
    blobs_pan = [
        perfix + (s)
        for s in pan_level_info.blobs
    ]
    # For the finest FPN level: N2 = P2 only seeds recursion
    blobs_pan[0] = pan_level_info.blobs[0]
    dim_pan = pan_level_info.dims[0]
    spatial_scales_pan = pan_level_info.spatial_scales
    fusion_method = cfg.PAN.FUSION_METHOD
    assert fusion_method in {'Sum', 'Max', 'Mean'}, \
        'Unknown fusion method: {}'.format(fusion_method)
    # In mask branch, we fix the fusion place between the first and second conv layers
    # adaptive_pooling_place = cfg.PAN.AdaptivePooling_Place

    """v1upXconvs design: X * (conv 3x3), convT 2x2."""
    mask_roi_feat = model.RoIFeatureTransform(
        blobs_pan,
        blob_out='_[mask]_roi_feat',
        blob_rois='mask_rois',
        method=cfg.MRCNN.ROI_XFORM_METHOD,
        resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
        sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
        spatial_scale=spatial_scales_pan
    )

    dilation = cfg.MRCNN.DILATION
    dim_inner = cfg.MRCNN.DIM_REDUCED

    # independent fcn1 for all levels
    mask_fcn1_list = []
    for i in range(len(mask_roi_feat)):
        mask_fcn1_name = '_[mask]_fcn1' + str(mask_roi_feat[i])
        model.Conv(
            mask_roi_feat[i],
            mask_fcn1_name,
            dim_pan,
            dim_inner,
            kernel=3,
            pad=1 * dilation,
            stride=1,
            weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
            bias_init=('ConstantFill', {'value': 0.})
        )
        mask_fcn1_list += [mask_fcn1_name]
    # fuse
    pan_adaptive_pooling_mask_fcn1 = model.net.__getattr__(fusion_method)(
        mask_fcn1_list, "pan_adaptive_pooling_mask_fcn1"
    )
    model.Relu(pan_adaptive_pooling_mask_fcn1, pan_adaptive_pooling_mask_fcn1)

    current = pan_adaptive_pooling_mask_fcn1
    for i in range(1, num_convs):
        current = model.Conv(
            current,
            '_[mask]_fcn' + str(i + 1),
            dim_inner,
            dim_inner,
            kernel=3,
            pad=1 * dilation,
            stride=1,
            weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
            bias_init=('ConstantFill', {'value': 0.})
        )
        current = model.Relu(current, current)

    # upsample layer
    model.ConvTranspose(
        current,
        'conv5_mask',
        dim_inner,
        dim_inner,
        kernel=2,
        pad=0,
        stride=2,
        weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
        bias_init=const_fill(0.0)
    )
    blob_mask = model.Relu('conv5_mask', 'conv5_mask')

    return blob_mask, dim_inner
Ejemplo n.º 38
0
def bottleneck_transformation(
    model,
    blob_in,
    dim_in,
    dim_out,
    stride,
    prefix,
    dim_inner,
    dilation=1,
    group=1
):
    """Add a bottleneck transformation to the model."""
    # In original resnet, stride=2 is on 1x1.
    # In fb.torch resnet, stride=2 is on 3x3.
    (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride)

    # conv 1x1 -> BN -> ReLU
    cur = model.ConvAffine(
        blob_in,
        prefix + '_branch2a',
        dim_in,
        dim_inner,
        kernel=1,
        stride=str1x1,
        pad=0,
        inplace=True
    )
    cur = model.Relu(cur, cur)

    # conv 3x3 -> BN -> ReLU
    cur = model.ConvAffine(
        cur,
        prefix + '_branch2b',
        dim_inner,
        dim_inner,
        kernel=3,
        stride=str3x3,
        pad=1 * dilation,
        dilation=dilation,
        group=group,   #moblenet group=dim_inner else group=group
        inplace=True
    )
    cur = model.Relu(cur, cur)

    # conv 1x1 -> BN (no ReLU)
    # NB: for now this AffineChannel op cannot be in-place due to a bug in C2
    # gradient computation for graphs like this
    cur = model.ConvAffine(
        cur,
        prefix + '_branch2c',
        dim_inner,
        dim_out,
        kernel=1,
        stride=1,
        pad=0,
        inplace=False
    )    
    
    SE_poo1 = model.AveragePool(cur,prefix+'_branch2c_se_pool',global_pooling=1)
    
    SE_conv = model.Conv(SE_poo1,  prefix + '_branch2c_se_con1', dim_out, int(dim_out/16), kernel=1,  
                    stride=1, pad=0, weight_init=gauss_fill(0.01),  bias_init=const_fill(0.0))
    
    SE_conv = model.Relu(SE_conv,SE_conv)
    
    SE_conv = model.Conv(SE_conv,  prefix + '_branch2c_se_con2',   int(dim_out/16), dim_out,
                    kernel=1,  stride=1,  pad=0,  weight_init=gauss_fill(0.01),  bias_init=const_fill(0.0))
    
    SE_sig = model.net.Sigmoid(SE_conv, SE_conv)
    
    #SE = model.net.Scale([SE_sig])
    
    cur = model.net.Mul([cur, SE_sig], prefix + '_branch2c_se', broadcast=1)
    
    #cur = model.net.Add([cur,SE], cur, broadcast=1, axis=1,2)       
    
    return cur
Ejemplo n.º 39
0
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):
    """Add RPN on FPN specific outputs."""
    num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS)
    dim_out = dim_in

    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid
    assert len(blobs_in) == k_max - k_min + 1
    for lvl in range(k_min, k_max + 1):
        bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
        sc = spatial_scales[k_max - lvl]  # in reversed order
        slvl = str(lvl)

        if lvl == k_min:
            # Create conv ops with randomly initialized weights and
            # zeroed biases for the first FPN level; these will be shared by
            # all other FPN levels
            # RPN hidden representation
            conv_rpn_fpn = model.Conv(
                bl_in,
                'conv_rpn_fpn' + slvl,
                dim_in,
                dim_out,
                kernel=3,
                pad=1,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.Conv(
                conv_rpn_fpn,
                'rpn_cls_logits_fpn' + slvl,
                dim_in,
                num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.Conv(
                conv_rpn_fpn,
                'rpn_bbox_pred_fpn' + slvl,
                dim_in,
                4 * num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight_init=gauss_fill(0.01),
                bias_init=const_fill(0.0)
            )
        else:
            # Share weights and biases
            sk_min = str(k_min)
            # RPN hidden representation
            conv_rpn_fpn = model.ConvShared(
                bl_in,
                'conv_rpn_fpn' + slvl,
                dim_in,
                dim_out,
                kernel=3,
                pad=1,
                stride=1,
                weight='conv_rpn_fpn' + sk_min + '_w',
                bias='conv_rpn_fpn' + sk_min + '_b'
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.ConvShared(
                conv_rpn_fpn,
                'rpn_cls_logits_fpn' + slvl,
                dim_in,
                num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight='rpn_cls_logits_fpn' + sk_min + '_w',
                bias='rpn_cls_logits_fpn' + sk_min + '_b'
            )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.ConvShared(
                conv_rpn_fpn,
                'rpn_bbox_pred_fpn' + slvl,
                dim_in,
                4 * num_anchors,
                kernel=1,
                pad=0,
                stride=1,
                weight='rpn_bbox_pred_fpn' + sk_min + '_w',
                bias='rpn_bbox_pred_fpn' + sk_min + '_b'
            )

        if not model.train or cfg.MODEL.FASTER_RCNN:
            # Proposals are needed during:
            #  1) inference (== not model.train) for RPN only and Faster R-CNN
            #  OR
            #  2) training for Faster R-CNN
            # Otherwise (== training for RPN only), proposals are not needed
            lvl_anchors = generate_anchors(
                stride=2.**lvl,
                sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),
                aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS
            )
            rpn_cls_probs_fpn = model.net.Sigmoid(
                rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl
            )
            model.GenerateProposals(
                [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'],
                ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],
                anchors=lvl_anchors,
                spatial_scale=sc
            )