Esempio n. 1
0
    def _cls_subnet(self, conv_feat, conv_channel, num_base_anchor, num_class):
        p = self.p

        # classification subnet
        cls_conv1 = X.conv(
            data=conv_feat,
            kernel=3,
            filter=conv_channel,
            weight=self.cls_conv1_weight,
            bias=self.cls_conv1_bias,
            no_bias=False,
            name="cls_conv1"
        )
        cls_conv1_relu = X.relu(cls_conv1)
        cls_conv2 = X.conv(
            data=cls_conv1_relu,
            kernel=3,
            filter=conv_channel,
            weight=self.cls_conv2_weight,
            bias=self.cls_conv2_bias,
            no_bias=False,
            name="cls_conv2"
        )
        cls_conv2_relu = X.relu(cls_conv2)
        cls_conv3 = X.conv(
            data=cls_conv2_relu,
            kernel=3,
            filter=conv_channel,
            weight=self.cls_conv3_weight,
            bias=self.cls_conv3_bias,
            no_bias=False,
            name="cls_conv3"
        )
        cls_conv3_relu = X.relu(cls_conv3)
        cls_conv4 = X.conv(
            data=cls_conv3_relu,
            kernel=3,
            filter=conv_channel,
            weight=self.cls_conv4_weight,
            bias=self.cls_conv4_bias,
            no_bias=False,
            name="cls_conv4"
        )
        cls_conv4_relu = X.relu(cls_conv4)

        if p.fp16:
            cls_conv4_relu = X.to_fp32(cls_conv4_relu, name="cls_conv4_fp32")

        output_channel = num_base_anchor * (num_class - 1)
        output = X.conv(
            data=cls_conv4_relu,
            kernel=3,
            filter=output_channel,
            weight=self.cls_pred_weight,
            bias=self.cls_pred_bias,
            no_bias=False,
            name="cls_pred"
        )

        return output
Esempio n. 2
0
def se_v2_resnet_v1b_unit(input, name, filter, stride, dilate, proj, norm,
                          **kwargs):
    """
    diff with v1: move the SE module to 3x3 conv
    """
    conv1 = conv(input, name=name + "_conv1", filter=filter // 4)
    bn1 = norm(conv1, name=name + "_bn1")
    relu1 = relu(bn1, name=name + "_relu1")

    conv2 = conv(relu1,
                 name=name + "_conv2",
                 stride=stride,
                 filter=filter // 4,
                 kernel=3)
    bn2 = norm(conv2, name=name + "_bn2")
    relu2 = relu(bn2, name=name + "_relu2")
    relu2 = se(relu2,
               prefix=name + "_se2",
               f_down=filter // 16,
               f_up=filter // 4)

    conv3 = conv(relu2, name=name + "_conv3", filter=filter)
    bn3 = norm(conv3, name=name + "_bn3")

    if proj:
        shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride)
        shortcut = norm(shortcut, name=name + "_sc_bn")
    else:
        shortcut = input

    eltwise = add(bn3, shortcut, name=name + "_plus")

    return relu(eltwise, name=name + "_relu")
Esempio n. 3
0
    def _bbox_subnet(self, conv_feat, conv_channel, num_base_anchor, num_class):
        p = self.p

        # regression subnet
        bbox_conv1 = X.conv(
            data=conv_feat,
            kernel=3,
            filter=conv_channel,
            weight=self.bbox_conv1_weight,
            bias=self.bbox_conv1_bias,
            no_bias=False,
            name="bbox_conv1"
        )
        bbox_conv1_relu = X.relu(bbox_conv1)
        bbox_conv2 = X.conv(
            data=bbox_conv1_relu,
            kernel=3,
            filter=conv_channel,
            weight=self.bbox_conv2_weight,
            bias=self.bbox_conv2_bias,
            no_bias=False,
            name="bbox_conv2"
        )
        bbox_conv2_relu = X.relu(bbox_conv2)
        bbox_conv3 = X.conv(
            data=bbox_conv2_relu,
            kernel=3,
            filter=conv_channel,
            weight=self.bbox_conv3_weight,
            bias=self.bbox_conv3_bias,
            no_bias=False,
            name="bbox_conv3"
        )
        bbox_conv3_relu = X.relu(bbox_conv3)
        bbox_conv4 = X.conv(
            data=bbox_conv3_relu,
            kernel=3,
            filter=conv_channel,
            weight=self.bbox_conv4_weight,
            bias=self.bbox_conv4_bias,
            no_bias=False,
            name="bbox_conv4"
        )
        bbox_conv4_relu = X.relu(bbox_conv4)

        if p.fp16:
            bbox_conv4_relu = X.to_fp32(bbox_conv4_relu, name="bbox_conv4_fp32")

        output_channel = num_base_anchor * 4
        output = X.conv(
            data=bbox_conv4_relu,
            kernel=3,
            filter=output_channel,
            weight=self.bbox_pred_weight,
            bias=self.bbox_pred_bias,
            no_bias=False,
            name="bbox_pred"
        )

        return output
Esempio n. 4
0
def dcn_resnet_unit(input, name, filter, stride, dilate, proj, norm, **kwargs):
    conv1 = conv(input, name=name + "_conv1", filter=filter // 4)
    bn1 = norm(conv1, name=name + "_bn1")
    relu1 = relu(bn1, name=name + "_relu1")

    # conv2 filter router
    conv2_offset = conv(relu1, name=name + "_conv2_offset", filter=72, kernel=3, stride=stride, dilate=dilate)
    conv2 = mx.sym.contrib.DeformableConvolution(relu1, conv2_offset, kernel=(3, 3),
        stride=(stride, stride), dilate=(dilate, dilate), pad=(1, 1), num_filter=filter // 4,
        num_deformable_group=4, no_bias=True, name=name + "_conv2")
    bn2 = norm(conv2, name=name + "_bn2")
    relu2 = relu(bn2, name=name + "_relu2")

    conv3 = conv(relu2, name=name + "_conv3", filter=filter)
    bn3 = norm(conv3, name=name + "_bn3")

    if proj:
        shortcut = conv(input, name=name + "_sc", filter=filter, stride=stride)
        shortcut = norm(shortcut, name=name + "_sc_bn")
    else:
        shortcut = input

    eltwise = add(bn3, shortcut, name=name + "_plus")

    return relu(eltwise, name=name + "_relu")
Esempio n. 5
0
def trident_resnet_v1b_unit(input, name, id, filter, stride, dilate, proj, **kwargs):
    """
    Compared with v1, v1b moves stride=2 to the 3x3 conv instead of the 1x1 conv and use std in pre-processing
    This is also known as the facebook re-implementation of ResNet(a.k.a. the torch ResNet)
    """
    p = kwargs["params"]
    share_bn = p.branch_bn_shared
    share_conv = p.branch_conv_shared
    norm = p.normalizer

    ######################### prepare names #########################
    if id is not None:
        conv_postfix = ("_shared%s" if share_conv else "_branch%s") % id
        bn_postfix = ("_shared%s" if share_bn else "_branch%s") % id
        other_postfix = "_branch%s" % id
    else:
        conv_postfix = ""
        bn_postfix = ""
        other_postfix = ""

    ######################### prepare parameters #########################
    conv_params = lambda x: dict(
        weight=X.shared_var(name + "_%s_weight" % x) if share_conv else None,
        name=name + "_%s" % x + conv_postfix
    )

    def bn_params(x):
        ret = dict(
            gamma=X.shared_var(name + "_%s_gamma" % x) if share_bn else None,
            beta=X.shared_var(name + "_%s_beta" % x) if share_bn else None,
            moving_mean=X.shared_var(name + "_%s_moving_mean" % x) if share_bn else None,
            moving_var=X.shared_var(name + "_%s_moving_var" % x) if share_bn else None,
            name=name + "_%s" % x + bn_postfix
        )
        if norm.__name__ == "gn":
            del ret["moving_mean"], ret["moving_var"]
        return ret

    ######################### construct graph #########################
    conv1 = conv(input, filter=filter // 4, **conv_params("conv1"))
    bn1 = norm(conv1, **bn_params("bn1"))
    relu1 = relu(bn1, name=name + other_postfix)

    conv2 = conv(relu1, filter=filter // 4, kernel=3, stride=stride, dilate=dilate, **conv_params("conv2"))
    bn2 = norm(conv2, **bn_params("bn2"))
    relu2 = relu(bn2, name=name + other_postfix)

    conv3 = conv(relu2, filter=filter, **conv_params("conv3"))
    bn3 = norm(conv3, **bn_params("bn3"))

    if proj:
        shortcut = conv(input, filter=filter, stride=stride, **conv_params("sc"))
        shortcut = norm(shortcut, **bn_params("sc_bn"))
    else:
        shortcut = input

    eltwise = add(bn3, shortcut, name=name + "_plus" + other_postfix)

    return relu(eltwise, name=name + "_relu" + other_postfix)
Esempio n. 6
0
    def _cls_head(self, conv_feat):
        xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3)

        flatten = X.flatten(conv_feat, name="bbox_feat_flatten")
        fc1 = X.fc(flatten, filter=1024, name="bbox_cls_fc1", init=xavier_init)
        fc1 = self.add_norm(fc1)
        fc1 = X.relu(fc1)
        fc2 = X.fc(fc1, filter=1024, name="bbox_cls_fc2", init=xavier_init)
        fc2 = self.add_norm(fc2)
        fc2 = X.relu(fc2)

        return fc2
Esempio n. 7
0
def trident_resnet_v1_unit(input, name, id, filter, stride, dilate, proj, **kwargs):
    p = kwargs["params"]
    share_bn = p.branch_bn_shared
    share_conv = p.branch_conv_shared
    norm = p.normalizer

    ######################### prepare names #########################
    if id is not None:
        conv_postfix = ("_shared%s" if share_conv else "_branch%s") % id
        bn_postfix = ("_shared%s" if share_bn else "_branch%s") % id
        other_postfix = "_branch%s" % id
    else:
        conv_postfix = ""
        bn_postfix = ""
        other_postfix = ""

    ######################### prepare parameters #########################
    conv_params = lambda x: dict(
        weight=X.shared_var(name + "_%s_weight" % x) if share_conv else None,
        name=name + "_%s" % x + conv_postfix
    )

    bn_params = lambda x: dict(
        gamma=X.shared_var(name + "_%s_gamma" % x) if share_bn else None,
        beta=X.shared_var(name + "_%s_beta" % x) if share_bn else None,
        moving_mean=X.shared_var(name + "_%s_moving_mean" % x) if share_bn else None,
        moving_var=X.shared_var(name + "_%s_moving_var" % x) if share_bn else None,
        name=name + "_%s" % x + bn_postfix
    )

    ######################### construct graph #########################
    conv1 = conv(input, filter=filter // 4, stride=stride, **conv_params("conv1"))
    bn1 = norm(conv1, **bn_params("bn1"))
    relu1 = relu(bn1, name=name + other_postfix)

    conv2 = conv(relu1, filter=filter // 4, kernel=3, dilate=dilate, **conv_params("conv2"))
    bn2 = norm(conv2, **bn_params("bn2"))
    relu2 = relu(bn2, name=name + other_postfix)

    conv3 = conv(relu2, filter=filter, **conv_params("conv3"))
    bn3 = norm(conv3, **bn_params("bn3"))

    if proj:
        shortcut = conv(input, filter=filter, stride=stride, **conv_params("sc"))
        shortcut = norm(shortcut, **bn_params("sc_bn"))
    else:
        shortcut = input

    eltwise = add(bn3, shortcut, name=name + "_plus" + other_postfix)

    return relu(eltwise, name=name + "_relu" + other_postfix)
Esempio n. 8
0
    def _refine_pts(self, cls_feat, reg_feat, dcn_offset, pts_init_out):
        p = self.p
        point_conv_channel = p.head.point_conv_channel
        num_class = p.num_class
        output_channel = num_class - 1
        pts_output_channel = p.point_generate.num_points * 2

        cls_conv = mx.symbol.contrib.DeformableConvolution(
            data=cls_feat,
            offset=dcn_offset,
            kernel=(self.dcn_kernel, self.dcn_kernel),
            pad=(self.dcn_pad, self.dcn_pad),
            stride=(1, 1),
            dilate=(1, 1),
            num_filter=point_conv_channel,
            weight=self.cls_conv_weight,
            bias=self.cls_conv_bias,
            no_bias=False,
            name="cls_conv")
        cls_conv_relu = X.relu(cls_conv)
        cls_out = X.conv(data=cls_conv_relu,
                         kernel=1,
                         filter=output_channel,
                         weight=self.cls_out_weight,
                         bias=self.cls_out_bias,
                         no_bias=False,
                         name="cls_out")

        pts_refine_conv = mx.symbol.contrib.DeformableConvolution(
            data=reg_feat,
            offset=dcn_offset,
            kernel=(self.dcn_kernel, self.dcn_kernel),
            pad=(self.dcn_pad, self.dcn_pad),
            stride=(1, 1),
            dilate=(1, 1),
            num_filter=point_conv_channel,
            weight=self.pts_refine_conv_weight,
            bias=self.pts_refine_conv_bias,
            no_bias=False,
            name="pts_refine_conv")
        pts_refine_conv_relu = X.relu(pts_refine_conv)
        pts_refine_out = X.conv(data=pts_refine_conv_relu,
                                kernel=1,
                                filter=pts_output_channel,
                                weight=self.pts_refine_out_weight,
                                bias=self.pts_refine_out_bias,
                                no_bias=False,
                                name="pts_refine_out")
        pts_refine_out = pts_refine_out + X.block_grad(pts_init_out)
        return pts_refine_out, cls_out
Esempio n. 9
0
    def _get_bbox_head_logit(self, conv_feat):
        if self._head_feat is not None:
            return self._head_feat

        xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3)

        flatten = X.flatten(conv_feat, name="bbox_feat_flatten")
        fc1 = X.fc(flatten, filter=1024, name="bbox_fc1", init=xavier_init)
        fc1 = X.relu(fc1)
        fc2 = X.fc(fc1, filter=1024, name="bbox_fc2", init=xavier_init)
        fc2 = X.relu(fc2)

        self._head_feat = fc2

        return self._head_feat
Esempio n. 10
0
    def _get_bbox_head_logit(self, conv_feat):
        if self._head_feat is not None:
            return self._head_feat

        from mxnext.backbone.resnet_v2 import Builder

        unit = Builder.resnet_stage(
            conv_feat,
            name="stage4",
            num_block=3,
            filter=2048,
            stride=1,
            dilate=1,
            norm_type=self.p.normalizer,
            norm_mom=0.9,
            ndev=8
        )
        bn1 = X.fixbn(unit, name='bn1')
        relu1 = X.relu(bn1, name='relu1')
        relu1 = X.to_fp32(relu1, name='c5_to_fp32')
        pool1 = X.pool(relu1, global_pool=True, name='pool1')

        self._head_feat = pool1

        return self._head_feat
Esempio n. 11
0
    def _get_bbox_head_logit(self, conv_feat):
        if self._head_feat is not None:
            return self._head_feat

        xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3)

        flatten = X.reshape(conv_feat, shape=(0, -1, 1, 1), name="bbox_feat_reshape")
        fc1 = X.conv(flatten, filter=1024, name="bbox_fc1", init=xavier_init)
        fc1 = self.add_norm(fc1)
        fc1 = X.relu(fc1)
        fc2 = X.conv(fc1, filter=1024, name="bbox_fc2", init=xavier_init)
        fc2 = self.add_norm(fc2)
        fc2 = X.relu(fc2)

        self._head_feat = fc2

        return self._head_feat
Esempio n. 12
0
    def _get_mask_head_logit(self, conv_feat):
        if self._head_feat is not None:
            return self._head_feat

        up_stride = int(self.pMask.resolution // self.pMaskRoi.out_size)
        dim_reduced = self.pMask.dim_reduced

        msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2)

        current = conv_feat
        for i in range(4):
            current = X.conv(
                current,
                name="mask_fcn_conv{}".format(i + 1),
                filter=dim_reduced,
                kernel=3,
                no_bias=False,
                init=msra_init
            )
            current = self.add_norm(current)
            current = X.relu(current)

        mask_up = current
        for i in range(up_stride // 2):
            weight = X.var(
                name="mask_up{}_weight".format(i),
                init=msra_init,
                lr_mult=1,
                wd_mult=1)
            mask_up = mx.sym.Deconvolution(
                mask_up,
                kernel=(2, 2),
                stride=(2, 2),
                num_filter=dim_reduced,
                no_bias=False,
                weight=weight,
                name="mask_up{}".format(i)
                )
            mask_up = X.relu(
                mask_up,
                name="mask_up{}_relu".format(i))

        mask_up = X.to_fp32(mask_up, name='mask_up_to_fp32')
        self._head_feat = mask_up

        return self._head_feat
Esempio n. 13
0
    def get_output(self, conv_fpn_feat):
        if self.cls_logit_dict is not None and self.bbox_delta_dict is not None:
            return self.cls_logit_dict, self.bbox_delta_dict

        p = self.p
        num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale)
        conv_channel = p.head.conv_channel

        # FPN RPN share weight
        rpn_conv_weight = X.var('rpn_conv_weight', init=X.gauss(0.01))
        rpn_conv_bias = X.var('rpn_conv_bias', init=X.zero_init())
        rpn_conv_cls_weight = X.var('rpn_conv_cls_weight', init=X.gauss(0.01))
        rpn_conv_cls_bias = X.var('rpn_conv_cls_bias', init=X.zero_init())
        rpn_conv_bbox_weight = X.var('rpn_conv_bbox_weight', init=X.gauss(0.01))
        rpn_conv_bbox_bias = X.var('rpn_conv_bbox_bias', init=X.zero_init())

        cls_logit_dict = {}
        bbox_delta_dict = {}

        for stride in p.anchor_generate.stride:
            rpn_conv = X.conv(
                conv_fpn_feat['stride%s' % stride],
                kernel=3,
                filter=conv_channel,
                name="rpn_conv_3x3_%s" % stride,
                no_bias=False,
                weight=rpn_conv_weight,
                bias=rpn_conv_bias
            )
            rpn_relu = X.relu(rpn_conv, name='rpn_relu_%s' % stride)
            if p.fp16:
                rpn_relu = X.to_fp32(rpn_relu, name="rpn_relu_%s_fp32" % stride)

            cls_logit = X.conv(
                rpn_relu,
                filter=2 * num_base_anchor,
                name="rpn_cls_score_stride%s" % stride,
                no_bias=False,
                weight=rpn_conv_cls_weight,
                bias=rpn_conv_cls_bias
            )

            bbox_delta = X.conv(
                rpn_relu,
                filter=4 * num_base_anchor,
                name="rpn_bbox_pred_stride%s" % stride,
                no_bias=False,
                weight=rpn_conv_bbox_weight,
                bias=rpn_conv_bbox_bias
            )

            cls_logit_dict[stride]  = cls_logit
            bbox_delta_dict[stride] = bbox_delta

        self.cls_logit_dict = cls_logit_dict
        self.bbox_delta_dict = bbox_delta_dict

        return self.cls_logit_dict, self.bbox_delta_dict
Esempio n. 14
0
    def _get_output(self, mask_pred_logits, conv_feat):
        num_class = self.pBbox.num_class

        msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2)
        normal_init = mx.init.Normal(0.01)
        kaiming_uniform = mx.init.Xavier(rnd_type='uniform', factor_type='in', magnitude=3)

        mask_pred_logits = mx.sym.expand_dims(mask_pred_logits, axis=1)

        iou_head_maxpool_1 = X.pool(
            mask_pred_logits,
            name='iou_head_maxpool_1',
            kernel=2,
            stride=2,
            pad=0,
        )
        iou_head_input = X.concat([conv_feat, iou_head_maxpool_1], axis=1, name='iou_head_input')
        hi = iou_head_input
        for ii in range(3):
            hi = X.conv(
                hi,
                filter=256,
                kernel=3,
                stride=1,
                name='iou_head_conv_%d'%ii,
                no_bias=False,
                init=msra_init,
            )
            hi = X.relu(hi)
        hi = X.conv(
            hi,
            filter=256,
            kernel=3,
            stride=2,
            name='iou_head_conv_3',
            no_bias=False,
            init=msra_init
        )
        hi = X.relu(hi)
        hi = X.flatten(data=hi)
        fc1 = X.relu(X.fc(hi, filter=1024, name='iou_head_FC1', init=kaiming_uniform))
        fc2 = X.relu(X.fc(fc1, filter=1024, name='iou_head_FC2', init=kaiming_uniform))
        iou_pred_logits = X.fc(fc2, filter=num_class, name='iou_head_pred', init=normal_init)
        return iou_pred_logits
Esempio n. 15
0
def PConvModule(x, out_channels=256, kernel_size=[3, 3, 3], dilation=[1, 1, 1], groups=[1, 1, 1], ibn=None,
                part_deform=False, PConv_idx=-1, start_level=1, norm=None, bilinear_upsample=None, feat_sizes=None):
    assert PConv_idx > -1 and feat_sizes is not None
    name_pref = 'PConv{}_sepc'.format(PConv_idx)
    sepc0_weight, sepc0_bias = X.var(name=name_pref+'0_weight', init=X.gauss(std=0.01)), X.var(name=name_pref+'0_bias', init=X.zero_init())
    sepc1_weight, sepc1_bias = X.var(name=name_pref+'1_weight', init=X.gauss(std=0.01)), X.var(name=name_pref+'1_bias', init=X.zero_init())
    sepc2_weight, sepc2_bias = X.var(name=name_pref+'2_weight', init=X.gauss(std=0.01)), X.var(name=name_pref+'2_bias', init=X.zero_init())
    sepc0_offset_weight, sepc0_offset_bias = None, None
    sepc1_offset_weight, sepc1_offset_bias = None, None
    sepc2_offset_weight, sepc2_offset_bias = None, None
    if part_deform:
        # NOTE zero_init for offset's weight and bias
        sepc0_offset_weight, sepc0_offset_bias = X.var(name=name_pref+'0_offset_weight', init=X.zero_init()), X.var(name=name_pref+'0_offset_bias', init=X.zero_init())
        sepc1_offset_weight, sepc1_offset_bias = X.var(name=name_pref+'1_offset_weight', init=X.zero_init()), X.var(name=name_pref+'1_offset_bias', init=X.zero_init())
        sepc2_offset_weight, sepc2_offset_bias = X.var(name=name_pref+'2_offset_weight', init=X.zero_init()), X.var(name=name_pref+'2_offset_bias', init=X.zero_init())
    norm_func = []
    if ibn:
        assert norm is not None
        norm_func = partial(norm, name=name_pref+'_ibn')

    sepc_conv0_func = partial(
                sepc_conv, name='PConv{}_sepc0_'.format(PConv_idx), out_channels=out_channels,
                kernel_size=kernel_size[0], stride=1, padding=(kernel_size[0]+(dilation[0]-1)*2)//2,
                dilation=dilation[0], groups=groups[0], deformable_groups=1, part_deform=part_deform, start_level=start_level,
                weight=sepc0_weight, bias=sepc0_bias, weight_offset=sepc0_offset_weight, bias_offset=sepc0_offset_bias)
    sepc_conv1_func = partial(
                sepc_conv, name='PConv{}_sepc1_'.format(PConv_idx), out_channels=out_channels,
                kernel_size=kernel_size[1], stride=1, padding=(kernel_size[1]+(dilation[1]-1)*2)//2,
                dilation=dilation[1], groups=groups[1], deformable_groups=1, part_deform=part_deform, start_level=start_level,
                weight=sepc1_weight, bias=sepc1_bias, weight_offset=sepc1_offset_weight, bias_offset=sepc1_offset_bias)
    sepc_conv2_func = partial(
                sepc_conv, name='PConv{}_sepc2_'.format(PConv_idx), out_channels=out_channels,
                kernel_size=kernel_size[2], stride=2, padding=(kernel_size[2]+(dilation[2]-1)*2)//2,
                dilation=dilation[2], groups=groups[2], deformable_groups=1, part_deform=part_deform, start_level=start_level,
                weight=sepc2_weight, bias=sepc2_bias, weight_offset=sepc2_offset_weight, bias_offset=sepc2_offset_bias)
    next_x = []
    for level, feature in enumerate(x):
        temp_fea = sepc_conv1_func(i=level, x=feature)
        if level > 0:
            tmp = sepc_conv2_func(i=level, x=x[level - 1])
            temp_fea = temp_fea + tmp
        if level < len(x) - 1:
            tmp_x = sepc_conv0_func(i=level,x=x[level+1])
            if bilinear_upsample:
                tmp_x = mx.contrib.symbol.BilinearResize2D(tmp_x, scale_height=2, scale_width=2,
                    name='PConv{}_upsampling_level{}'.format(PConv_idx,level))
            else:
                tmp_x = mx.sym.UpSampling(tmp_x, scale=2, sample_type='nearest', num_args=1,
                    name='PConv{}_upsampling_level{}'.format(PConv_idx,level))
            tmp_x = mx.sym.slice_like(tmp_x, temp_fea)
            temp_fea = temp_fea + tmp_x
        next_x.append(temp_fea)
    if ibn:
        next_x = ibn_func(next_x, norm_func, feat_sizes)
    next_x = [relu(item, name='PConv{}_level{}_relu'.format(PConv_idx, level)) for level,item in enumerate(next_x)]
    return next_x
Esempio n. 16
0
    def resnet_c4c5_factory(cls, depth, use_3x3_conv0, use_bn_preprocess,
                            num_branch, branch_dilates, branch_ids, branch_bn_shared, branch_conv_shared, branch_deform,
                            norm_type="local", norm_mom=0.9, ndev=None, fp16=False):
        c1, c2, c3, c4, c5 = cls.resnet_factory(depth, use_3x3_conv0, use_bn_preprocess,
                                                num_branch, branch_dilates, branch_ids, branch_bn_shared, branch_conv_shared, branch_deform,
                                                norm_type, norm_mom, ndev, fp16)
        c5 = X.fixbn(c5, "bn1")
        c5 = X.relu(c5)

        return c4, c5
Esempio n. 17
0
def SEPCFPN(inputs, out_channels=256, pconv_deform=False, lcconv_deform=None, ibn=None, Pconv_num=4,
            start_level=1, norm=None, bilinear_upsample=None, feat_sizes=None):
    assert feat_sizes is not None
    Pconvs_list = []
    for i in range(Pconv_num):
        Pconvs_list.append(partial(
            PConvModule, out_channels=out_channels, ibn=ibn, part_deform=pconv_deform, 
            PConv_idx=i, start_level=start_level, norm=norm, bilinear_upsample=bilinear_upsample, feat_sizes=feat_sizes))
    
    if lcconv_deform is not None:
        assert lcconv_deform in [False, True]
        lconv_weight, lconv_bias = X.var(name='LConv_weight', init=X.gauss(std=0.01)), X.var(name='LConv_bias',init=X.zero_init())
        cconv_weight, cconv_bias = X.var(name='CConv_weight', init=X.gauss(std=0.01)), X.var(name='CConv_bias',init=X.zero_init())
        lconv_offset_weight, lconv_offset_bias = None, None
        cconv_offset_weight, cconv_offset_bias = None, None
        if lcconv_deform:
            lconv_offset_weight, lconv_offset_bias=X.var(name='LConv_offset_weight', init=X.zero_init()), X.var(name='LConv_offset_bias', init=X.zero_init())
            cconv_offset_weight, cconv_offset_bias=X.var(name='CConv_offset_weight', init=X.zero_init()), X.var(name='CConv_offset_bias', init=X.zero_init())
        lconv_func = partial(sepc_conv, name='LConv{}_',out_channels=out_channels, kernel_size=3, stride=1, padding=1,
                dilation=1, groups=1, deformable_groups=1, part_deform=lcconv_deform, start_level=start_level,
                weight=lconv_weight, bias=lconv_bias, weight_offset=lconv_offset_weight, bias_offset=lconv_offset_bias)
        cconv_func = partial(sepc_conv, name='CConv{}_', out_channels=out_channels, kernel_size=3, stride=1, padding=1,
                dilation=1, groups=1, deformable_groups=1, part_deform=lcconv_deform, start_level=start_level,
                weight=cconv_weight, bias=cconv_bias, weight_offset=cconv_offset_weight, bias_offset=cconv_offset_bias)

        if ibn:
            assert norm is not None
            lbn = partial(norm, name='lconv_ibn')
            cbn = partial(norm, name='cconv_ibn')

    x = inputs
    for pconv in Pconvs_list:
        x = pconv(x)
    if lcconv_deform is None:
        return x
    cls_outs = [cconv_func(i=level, x=item) for level, item in enumerate(x)]
    loc_outs = [lconv_func(i=level, x=item) for level, item in enumerate(x)]
    if ibn:
        cls_outs = ibn_func(cls_outs, cbn, feat_sizes)
        loc_outs = ibn_func(loc_outs, lbn, feat_sizes)
    outs = [mx.sym.Concat(*[relu(s), relu(l)], num_args=2, dim=1) for s, l in zip(cls_outs, loc_outs)]
    return outs
Esempio n. 18
0
    def _reg_head(self, conv_feat):
        num_block = self.p.num_block or 4

        for i in range(num_block):
            conv_feat = X.conv(conv_feat,
                               kernel=3,
                               filter=256,
                               init=X.gauss(0.01),
                               name="bbox_reg_block%s" % (i + 1))
            conv_feat = self.add_norm(conv_feat)
            conv_feat = X.relu(conv_feat)

        return conv_feat
Esempio n. 19
0
    def _convs_and_fcs(self, x, num_convs, num_fcs, name, conv_init, fc_init):
        '''
        Args:
            x: [N, C, H, W] feature maps
            num_convs: int
            num_fcs: int
            conv_init: mx initializer
        Returns:
            x: [N, C, H, W] or [N, C, 1, 1]
        '''
        if num_convs == 0 and num_fcs == 0:
            return x

        out_channels = self.p.TSD.conv_out_channels
        out_fc_channels = self.p.TSD.fc_out_channels

        if num_convs > 0:
            for i in range(num_convs):
                x = X.relu(
                    X.conv(x,
                           kernel=3,
                           filter=out_channels,
                           no_bias=False,
                           name=name + '_conv%s' % i,
                           init=conv_init))

        if num_fcs > 0:
            x = X.reshape(x,
                          shape=(0, -1, 1, 1),
                          name=name + '_conv_fc_flatten')
            for i in range(num_fcs):
                x = X.relu(
                    X.conv(x,
                           kernel=1,
                           filter=out_fc_channels,
                           no_bias=False,
                           name=name + '_fc%s' % i,
                           init=fc_init))
        return x
Esempio n. 20
0
    def _cls_subnet(self, conv_feat, stride):
        p = self.p
        norm = p.normalizer
        conv_channel = p.head.conv_channel

        # classification subset
        cls_conv1 = X.conv(data=conv_feat,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.cls_conv1_weight,
                           bias=self.cls_conv1_bias,
                           no_bias=False,
                           name="cls_conv1")
        cls_conv1 = norm(cls_conv1, name="cls_conv1_bn_s{}".format(stride))
        cls_conv1_relu = X.relu(cls_conv1)
        cls_conv2 = X.conv(data=cls_conv1_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.cls_conv2_weight,
                           bias=self.cls_conv2_bias,
                           no_bias=False,
                           name="cls_conv2")
        cls_conv2 = norm(cls_conv2, name="cls_conv2_bn_s{}".format(stride))
        cls_conv2_relu = X.relu(cls_conv2)
        cls_conv3 = X.conv(data=cls_conv2_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.cls_conv3_weight,
                           bias=self.cls_conv3_bias,
                           no_bias=False,
                           name="cls_conv3")
        cls_conv3 = norm(cls_conv3, name="cls_conv3_bn_s{}".format(stride))
        cls_conv3_relu = X.relu(cls_conv3)

        if p.fp16:
            cls_conv3_relu = X.to_fp32(cls_conv3_relu, name="cls_conv3_fp32")

        return cls_conv3_relu
Esempio n. 21
0
    def _reg_subnet(self, conv_feat, stride):
        p = self.p
        norm = p.normalizer
        conv_channel = p.head.conv_channel

        # regression subnet
        reg_conv1 = X.conv(data=conv_feat,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.reg_conv1_weight,
                           bias=self.reg_conv1_bias,
                           no_bias=False,
                           name="reg_conv1")
        reg_conv1 = norm(reg_conv1, name="reg_conv1_bn_s{}".format(stride))
        reg_conv1_relu = X.relu(reg_conv1)
        reg_conv2 = X.conv(data=reg_conv1_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.reg_conv2_weight,
                           bias=self.reg_conv2_bias,
                           no_bias=False,
                           name="reg_conv2")
        reg_conv2 = norm(reg_conv2, name="reg_conv2_bn_s{}".format(stride))
        reg_conv2_relu = X.relu(reg_conv2)
        reg_conv3 = X.conv(data=reg_conv2_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.reg_conv3_weight,
                           bias=self.reg_conv3_bias,
                           no_bias=False,
                           name="reg_conv3")
        reg_conv3 = norm(reg_conv3, name="reg_conv3_bn_s{}".format(stride))
        reg_conv3_relu = X.relu(reg_conv3)

        if p.fp16:
            reg_conv3_relu = X.to_fp32(reg_conv3_relu, name="reg_conv3_fp32")

        return reg_conv3_relu
Esempio n. 22
0
    def _get_bbox_head_logit(self, conv_feat):
        #if self._head_feat is not None:
        #    return self._head_feat

        stage = self.stage

        flatten = X.flatten(conv_feat, name="bbox_feat_flatten_" + stage)
        reshape = X.reshape(flatten, (0, 0, 1, 1),
                            name="bbox_feat_reshape_" + stage)
        fc1 = X.conv(reshape,
                     filter=1024,
                     weight=self.fc1_weight,
                     name="bbox_fc1_" + stage)
        fc1_relu = X.relu(fc1, name="bbox_fc1_relu_" + stage)
        fc2 = X.conv(fc1_relu,
                     filter=1024,
                     weight=self.fc2_weight,
                     name="bbox_fc2_" + stage)
        fc2_relu = X.relu(fc2, name="bbox_fc2_" + stage)

        self._head_feat = fc2_relu

        return self._head_feat
Esempio n. 23
0
    def _init_pts(self, reg_feat):
        p = self.p
        point_conv_channel = p.head.point_conv_channel
        pts_output_channel = p.point_generate.num_points * 2

        pts_init_conv = X.conv(data=reg_feat,
                               kernel=3,
                               filter=point_conv_channel,
                               weight=self.pts_init_conv_weight,
                               bias=self.pts_init_conv_bias,
                               no_bias=False,
                               name="pts_init_conv")
        pts_init_conv_relu = X.relu(pts_init_conv)
        pts_init_out = X.conv(data=pts_init_conv_relu,
                              kernel=1,
                              filter=pts_output_channel,
                              weight=self.pts_init_out_weight,
                              bias=self.pts_init_out_bias,
                              no_bias=False,
                              name="pts_init_out")

        return pts_init_out
Esempio n. 24
0
    def resnet_trident_unit(cls,
                            data,
                            name,
                            filter,
                            stride,
                            dilate,
                            proj,
                            norm_type,
                            norm_mom,
                            ndev,
                            branch_ids,
                            branch_bn_shared,
                            branch_conv_shared,
                            branch_deform=False):
        """
        One resnet unit is comprised of 2 or 3 convolutions and a shortcut.
        :param data:
        :param name:
        :param filter:
        :param stride:
        :param dilate:
        :param proj:
        :param norm_type:
        :param norm_mom:
        :param ndev:
        :param branch_ids:
        :param branch_bn_shared:
        :param branch_conv_shared:
        :param branch_deform:
        :return:
        """
        if branch_ids is None:
            branch_ids = range(len(data))

        norm = X.normalizer_factory(type=norm_type, ndev=ndev, mom=norm_mom)

        bn1 = cls.bn_shared(data,
                            name=name + "_bn1",
                            normalizer=norm,
                            branch_ids=branch_ids,
                            share_weight=branch_bn_shared)
        relu1 = [X.relu(bn) for bn in bn1]
        conv1 = cls.conv_shared(relu1,
                                name=name + "_conv1",
                                num_filter=filter // 4,
                                kernel=(1, 1),
                                branch_ids=branch_ids,
                                share_weight=branch_conv_shared)

        bn2 = cls.bn_shared(conv1,
                            name=name + "_bn2",
                            normalizer=norm,
                            branch_ids=branch_ids,
                            share_weight=branch_bn_shared)
        relu2 = [X.relu(bn) for bn in bn2]
        if not branch_deform:
            conv2 = cls.conv_shared(relu2,
                                    name=name + "_conv2",
                                    num_filter=filter // 4,
                                    kernel=(3, 3),
                                    pad=dilate,
                                    stride=stride,
                                    dilate=dilate,
                                    branch_ids=branch_ids,
                                    share_weight=branch_conv_shared)
        else:
            conv2_offset = cls.conv_shared(relu2,
                                           name=name + "_conv2_offset",
                                           num_filter=72,
                                           kernel=(3, 3),
                                           pad=(1, 1),
                                           stride=(1, 1),
                                           dilate=(1, 1),
                                           no_bias=False,
                                           branch_ids=branch_ids,
                                           share_weight=branch_conv_shared)
            conv2 = cls.deform_conv_shared(relu2,
                                           name=name + "_conv2",
                                           conv_offset=conv2_offset,
                                           num_filter=filter // 4,
                                           kernel=(3, 3),
                                           pad=dilate,
                                           stride=stride,
                                           dilate=dilate,
                                           num_deformable_group=4,
                                           branch_ids=branch_ids,
                                           share_weight=branch_conv_shared)

        bn3 = cls.bn_shared(conv2,
                            name=name + "_bn3",
                            normalizer=norm,
                            branch_ids=branch_ids,
                            share_weight=branch_bn_shared)
        relu3 = [X.relu(bn) for bn in bn3]
        conv3 = cls.conv_shared(relu3,
                                name=name + "_conv3",
                                num_filter=filter,
                                kernel=(1, 1),
                                branch_ids=branch_ids,
                                share_weight=branch_conv_shared)

        if proj:
            shortcut = cls.conv_shared(relu1,
                                       name=name + "_sc",
                                       num_filter=filter,
                                       kernel=(1, 1),
                                       branch_ids=branch_ids,
                                       share_weight=branch_conv_shared)
        else:
            shortcut = data

        return [X.add(conv3_i, shortcut_i, name=name + "_plus_branch{}".format(i)) \
                for i, conv3_i, shortcut_i in zip(branch_ids, conv3, shortcut)]
Esempio n. 25
0
    def get_retinanet_neck(self, data):
        norm = self.p.normalizer
        c2, c3, c4, c5 = data

        import mxnet as mx
        xavier_init = mx.init.Xavier(factor_type="avg",
                                     rnd_type="uniform",
                                     magnitude=3)
        # P5
        p5 = X.conv(data=c5,
                    filter=256,
                    no_bias=False,
                    weight=X.var(name="P5_lateral_weight", init=xavier_init),
                    bias=X.var(name="P5_lateral_bias", init=X.zero_init()),
                    name="P5_lateral")
        p5_conv = X.conv(data=p5,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P5_conv_weight", init=xavier_init),
                         bias=X.var(name="P5_conv_bias", init=X.zero_init()),
                         name="P5_conv")

        # P4
        p5_up = mx.sym.UpSampling(p5,
                                  scale=2,
                                  sample_type="nearest",
                                  name="P5_upsampling",
                                  num_args=1)
        p4_la = X.conv(data=c4,
                       filter=256,
                       no_bias=False,
                       weight=X.var(name="P4_lateral_weight",
                                    init=xavier_init),
                       bias=X.var(name="P4_lateral_bias", init=X.zero_init()),
                       name="P4_lateral")
        p5_clip = mx.sym.slice_like(p5_up, p4_la, name="P4_clip")
        p4 = mx.sym.add_n(p5_clip, p4_la, name="P4_sum")

        p4_conv = X.conv(data=p4,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P4_conv_weight", init=xavier_init),
                         bias=X.var(name="P4_conv_bias", init=X.zero_init()),
                         name="P4_conv")

        # P3
        p4_up = mx.sym.UpSampling(p4,
                                  scale=2,
                                  sample_type="nearest",
                                  name="P4_upsampling",
                                  num_args=1)
        p3_la = X.conv(data=c3,
                       filter=256,
                       no_bias=False,
                       weight=X.var(name="P3_lateral_weight",
                                    init=xavier_init),
                       bias=X.var(name="P3_lateral_bias", init=X.zero_init()),
                       name="P3_lateral")
        p4_clip = mx.sym.slice_like(p4_up, p3_la, name="P3_clip")
        p3 = mx.sym.add_n(p4_clip, p3_la, name="P3_sum")

        p3_conv = X.conv(data=p3,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P3_conv_weight", init=xavier_init),
                         bias=X.var(name="P3_conv_bias", init=X.zero_init()),
                         name="P3_conv")

        # P6
        P6 = X.conv(data=c5,
                    kernel=3,
                    stride=2,
                    filter=256,
                    no_bias=False,
                    weight=X.var(name="P6_conv_weight", init=xavier_init),
                    bias=X.var(name="P6_conv_bias", init=X.zero_init()),
                    name="P6_conv")

        # P7
        P6_relu = X.relu(data=P6, name="P6_relu")
        P7 = X.conv(data=P6_relu,
                    kernel=3,
                    stride=2,
                    filter=256,
                    no_bias=False,
                    weight=X.var(name="P7_conv_weight", init=xavier_init),
                    bias=X.var(name="P7_conv_bias", init=X.zero_init()),
                    name="P7_conv")

        p3_conv = norm(p3_conv, name="P3_conv_bn")
        p4_conv = norm(p4_conv, name="P4_conv_bn")
        p5_conv = norm(p5_conv, name="P5_conv_bn")
        P6 = norm(P6, name="P6_conv_bn")
        P7 = norm(P7, name="P7_conv_bn")

        return p3_conv, p4_conv, p5_conv, P6, P7
Esempio n. 26
0
    def get_retinanet_neck(self, data):
        if self.neck is not None:
            return self.neck

        c2, c3, c4, c5 = data

        import mxnet as mx
        xavier_init = mx.init.Xavier(factor_type="in",
                                     rnd_type="uniform",
                                     magnitude=3)
        # P5
        p5 = X.conv(data=c5,
                    filter=256,
                    no_bias=False,
                    weight=X.var(name="P5_lateral_weight", init=xavier_init),
                    bias=X.var(name="P5_lateral_bias", init=X.zero_init()),
                    name="P5_lateral")
        p5_conv = X.conv(data=p5,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P5_conv_weight", init=xavier_init),
                         bias=X.var(name="P5_conv_bias", init=X.zero_init()),
                         name="P5_conv")

        # P4
        p5_up = mx.sym.UpSampling(p5,
                                  scale=2,
                                  sample_type="nearest",
                                  name="P5_upsampling",
                                  num_args=1)
        p4_la = X.conv(data=c4,
                       filter=256,
                       no_bias=False,
                       weight=X.var(name="P4_lateral_weight",
                                    init=xavier_init),
                       bias=X.var(name="P4_lateral_bias", init=X.zero_init()),
                       name="P4_lateral")
        p5_clip = mx.sym.slice_like(p5_up, p4_la, name="P4_clip")
        p4 = mx.sym.add_n(p5_clip, p4_la, name="P4_sum")

        p4_conv = X.conv(data=p4,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P4_conv_weight", init=xavier_init),
                         bias=X.var(name="P4_conv_bias", init=X.zero_init()),
                         name="P4_conv")

        # P3
        p4_up = mx.sym.UpSampling(p4,
                                  scale=2,
                                  sample_type="nearest",
                                  name="P4_upsampling",
                                  num_args=1)
        p3_la = X.conv(data=c3,
                       filter=256,
                       no_bias=False,
                       weight=X.var(name="P3_lateral_weight",
                                    init=xavier_init),
                       bias=X.var(name="P3_lateral_bias", init=X.zero_init()),
                       name="P3_lateral")
        p4_clip = mx.sym.slice_like(p4_up, p3_la, name="P3_clip")
        p3 = mx.sym.add_n(p4_clip, p3_la, name="P3_sum")

        p3_conv = X.conv(data=p3,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P3_conv_weight", init=xavier_init),
                         bias=X.var(name="P3_conv_bias", init=X.zero_init()),
                         name="P3_conv")

        # P6
        p6 = X.conv(data=c5,
                    kernel=3,
                    stride=2,
                    filter=256,
                    no_bias=False,
                    weight=X.var(name="P6_conv_weight", init=xavier_init),
                    bias=X.var(name="P6_conv_bias", init=X.zero_init()),
                    name="P6_conv")

        # P7
        p6_relu = X.relu(data=p6, name="P6_relu")
        p7 = X.conv(data=p6_relu,
                    kernel=3,
                    stride=2,
                    filter=256,
                    no_bias=False,
                    weight=X.var(name="P7_conv_weight", init=xavier_init),
                    bias=X.var(name="P7_conv_bias", init=X.zero_init()),
                    name="P7_conv")

        self.neck = dict(stride8=p3_conv,
                         stride16=p4_conv,
                         stride32=p5_conv,
                         stride64=p6,
                         stride128=p7)

        return self.neck
Esempio n. 27
0
    def get_output(self, fpn_conv_feats, roi_feat, rois, is_train):
        '''
        Args:
            fpn_conv_feats: dict of FPN features, each [batch_image, in_channels, fh, fw]
            roi_feat: [batch_image * image_roi, 256, roi_size, roi_size]
            rois: [batch_image, image_roi, 4]
            is_train: boolean
        Returns:
            cls_logit: [batch_image * image_roi, num_class]
            bbox_delta: [batch_image * image_roi, num_class * 4]
            tsd_cls_logit: [batch_image * image_roi, num_class]
            tsd_bbox_delta: [batch_image * image_roi, num_class * 4]
            delta_c: [batch_image * image_roi, 2*roi_size*roi_size, 1, 1]
            delta_r: [batch_image * image_roi, 2, 1, 1]
        '''
        xavier_init = mx.init.Xavier(factor_type="in",
                                     rnd_type="uniform",
                                     magnitude=3)
        # roi_feat: [batch_roi, 256, 7, 7]

        flatten = X.reshape(
            roi_feat, shape=(0, -1, 1, 1),
            name="bbox_feat_reshape")  # [batch_roi, 256*7*7, 1, 1]

        x1 = flatten
        x2 = X.relu(X.conv(data=x1,
                           kernel=1,
                           filter=256,
                           name="delta_shared_fc1",
                           no_bias=False),
                    name="delta_shared_fc1_relu")  # [batch_roi, 256, 1, 1]

        delta_c = X.relu(X.conv(x2,
                                filter=256,
                                name="delta_c_fc1",
                                init=X.gauss(0.01)),
                         name="delta_c_fc1_relu")  # [batch_roi, 256, 1, 1]
        delta_c = X.conv(delta_c,
                         filter=2 * self.p.roi_size**2,
                         name="delta_c_fc2",
                         init=X.gauss(0.01))  # [batch_roi, 2*7*7, 1, 1]

        delta_r = X.relu(X.conv(x2,
                                filter=256,
                                name="delta_r_fc1",
                                init=X.gauss(0.01)),
                         name="delta_r_fc1_relu")  # [batch_roi, 256, 1, 1]
        delta_r = X.conv(delta_r,
                         filter=2,
                         name="delta_r_fc2",
                         init=X.gauss(0.01))  # [batch_roi, 2, 1, 1]

        image_roi = self.p.image_roi if is_train else 1000
        batch_image = self.p.batch_image

        TSD_cls_feats = self.delta_c_pool.get_roi_feature(
            fpn_conv_feats,
            rois,
            delta_c,
            image_rois=image_roi,
            batch_image=batch_image)  # [batch_roi, 256, 7, 7]
        TSD_loc_feats = self.delta_r_pool.get_roi_feature(
            fpn_conv_feats,
            rois,
            delta_r,
            image_rois=image_roi,
            batch_image=batch_image)  # [batch_roi, 256, 7, 7]

        TSD_x_cls = self._convs_and_fcs(
            TSD_cls_feats,
            self.p.TSD.num_shared_convs,
            self.p.TSD.num_shared_fcs,
            name='TSD_pc',
            conv_init=xavier_init,
            fc_init=X.gauss(0.01))  # [batch_roi, batch_roi, 1, 1]
        TSD_x_reg = self._convs_and_fcs(
            TSD_loc_feats,
            self.p.TSD.num_shared_convs,
            self.p.TSD.num_shared_fcs,
            name='TSD_pr',
            conv_init=xavier_init,
            fc_init=X.gauss(0.01))  # [batch_roi, batch_roi, 1, 1]

        TSD_x_cls = self._convs_and_fcs(
            TSD_x_cls,
            0,
            self.p.TSD.num_cls_fcs,
            name='TSD_cls',
            conv_init=xavier_init,
            fc_init=X.gauss(0.01))  # [batch_roi, batch_roi, 1, 1]
        TSD_x_reg = self._convs_and_fcs(
            TSD_x_reg,
            0,
            self.p.TSD.num_reg_fcs,
            name='TSD_reg',
            conv_init=xavier_init,
            fc_init=X.gauss(0.01))  # [batch_roi, batch_roi, 1, 1]

        num_class = self.p.num_class
        num_reg_class = 2 if self.p.regress_target.class_agnostic else num_class

        tsd_cls_logit = X.fc(TSD_x_cls,
                             filter=num_class,
                             name='tsd_cls_logit',
                             init=X.gauss(0.01))
        tsd_bbox_delta = X.fc(TSD_x_reg,
                              filter=4 * num_reg_class,
                              name='tsd_reg_delta',
                              init=X.gauss(0.01))

        x = self._convs_and_fcs(roi_feat,
                                self.p.TSD.num_shared_convs,
                                self.p.TSD.num_shared_fcs,
                                name='shared_fc',
                                conv_init=xavier_init,
                                fc_init=X.gauss(0.01))
        x_cls = x
        x_reg = x
        x_cls = self._convs_and_fcs(x_cls,
                                    0,
                                    self.p.TSD.num_cls_fcs,
                                    name='cls',
                                    conv_init=xavier_init,
                                    fc_init=X.gauss(0.01))
        x_reg = self._convs_and_fcs(x_reg,
                                    0,
                                    self.p.TSD.num_reg_fcs,
                                    name='reg',
                                    conv_init=xavier_init,
                                    fc_init=X.gauss(0.01))
        cls_logit = X.fc(x_cls,
                         filter=num_class,
                         name='bbox_cls_logit',
                         init=X.gauss(0.01))
        bbox_delta = X.fc(x_reg,
                          filter=4 * num_reg_class,
                          name='bbox_reg_delta',
                          init=X.gauss(0.01))

        if self.p.fp16:
            cls_logit = X.to_fp32(cls_logit, name="cls_logits_fp32")
            bbox_delta = X.to_fp32(bbox_delta, name="bbox_delta_fp32")
            tsd_cls_logit = X.to_fp32(tsd_cls_logit, name="tsd_cls_logit_fp32")
            tsd_bbox_delta = X.to_fp32(tsd_bbox_delta,
                                       name="tsd_bbox_delta_fp32")
            delta_c = X.to_fp32(delta_c, name="delta_c_fp32")
            delta_r = X.to_fp32(delta_r, name="delta_r_fp32")

        return cls_logit, bbox_delta, tsd_cls_logit, tsd_bbox_delta, delta_c, delta_r
Esempio n. 28
0
    def get_retinanet_neck(data):
        c2, c3, c4, c5 = data

        import mxnet as mx
        xavier_init = mx.init.Xavier(factor_type="in",
                                     rnd_type="uniform",
                                     magnitude=3)
        # P5
        p5 = X.conv(data=c5,
                    filter=256,
                    no_bias=False,
                    weight=X.var(name="P5_lateral_weight", init=xavier_init),
                    bias=X.var(name="P5_lateral_bias", init=X.zero_init()),
                    name="P5_lateral")
        p5_conv = X.conv(data=p5,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P5_conv_weight", init=xavier_init),
                         bias=X.var(name="P5_conv_bias", init=X.zero_init()),
                         name="P5_conv")

        # P4
        p5_up = mx.sym.UpSampling(p5,
                                  scale=2,
                                  sample_type="nearest",
                                  name="P5_upsampling",
                                  num_args=1)
        p4_la = X.conv(data=c4,
                       filter=256,
                       no_bias=False,
                       weight=X.var(name="P4_lateral_weight",
                                    init=xavier_init),
                       bias=X.var(name="P4_lateral_bias", init=X.zero_init()),
                       name="P4_lateral")
        p5_clip = mx.sym.Crop(*[p5_up, p4_la], name="P4_clip")
        p4 = mx.sym.ElementWiseSum(*[p5_clip, p4_la], name="P4_sum")

        p4_conv = X.conv(data=p4,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P4_conv_weight", init=xavier_init),
                         bias=X.var(name="P4_conv_bias", init=X.zero_init()),
                         name="P4_conv")

        # P3
        p4_up = mx.sym.UpSampling(p4,
                                  scale=2,
                                  sample_type="nearest",
                                  name="P4_upsampling",
                                  num_args=1)
        p3_la = X.conv(data=c3,
                       filter=256,
                       no_bias=False,
                       weight=X.var(name="P3_lateral_weight",
                                    init=xavier_init),
                       bias=X.var(name="P3_lateral_bias", init=X.zero_init()),
                       name="P3_lateral")
        p4_clip = mx.sym.Crop(*[p4_up, p3_la], name="P3_clip")
        p3 = mx.sym.ElementWiseSum(*[p4_clip, p3_la], name="P3_sum")

        p3_conv = X.conv(data=p3,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P3_conv_weight", init=xavier_init),
                         bias=X.var(name="P3_conv_bias", init=X.zero_init()),
                         name="P3_conv")

        # P6
        P6 = X.conv(data=c5,
                    kernel=3,
                    stride=2,
                    filter=256,
                    no_bias=False,
                    weight=X.var(name="P6_conv_weight", init=xavier_init),
                    bias=X.var(name="P6_conv_bias", init=X.zero_init()),
                    name="P6_conv")

        # P7
        P6_relu = X.relu(data=P6, name="P6_relu")
        P7 = X.conv(data=P6_relu,
                    kernel=3,
                    stride=2,
                    filter=256,
                    no_bias=False,
                    weight=X.var(name="P7_conv_weight", init=xavier_init),
                    bias=X.var(name="P7_conv_bias", init=X.zero_init()),
                    name="P7_conv")

        return p3_conv, p4_conv, p5_conv, P6, P7
Esempio n. 29
0
    def get_refine_output(self, roi_feature, cls_logit, bbox_delta,
                          cls_sec_logit, bbox_sec_delta):
        p = self.p
        num_class = p.num_class
        repeat_time = p.repeat_time
        class_agnostic = p.regress_target.class_agnostic
        num_reg_class = 2 if class_agnostic else num_class

        cls_logit = mx.sym.slice_axis(mx.sym.softmax(cls_logit),
                                      axis=1,
                                      begin=1,
                                      end=num_class)
        cls_sec_logit = mx.sym.slice_axis(mx.sym.softmax(cls_sec_logit),
                                          axis=1,
                                          begin=1,
                                          end=num_class)
        bbox_delta = mx.sym.slice_axis(bbox_delta,
                                       axis=1,
                                       begin=4,
                                       end=num_reg_class * 4)
        bbox_sec_delta = mx.sym.slice_axis(bbox_sec_delta,
                                           axis=1,
                                           begin=4,
                                           end=num_reg_class * 4)

        pred_feat1 = mx.sym.tile(mx.sym.concat(*[bbox_delta, cls_logit],
                                               dim=1),
                                 reps=(1, repeat_time))
        pred_feat2 = mx.sym.tile(mx.sym.concat(
            *[bbox_sec_delta, cls_sec_logit], dim=1),
                                 reps=(1, repeat_time))

        refine_feat1 = mx.sym.concat(*[roi_feature, pred_feat1], dim=1)
        refine_feat2 = mx.sym.concat(*[roi_feature, pred_feat2], dim=1)

        head_feat1 = X.fc(refine_feat1,
                          filter=1024,
                          weight=self.fc3_weight,
                          bias=self.fc3_bias,
                          name='fc3_conv_refine1')
        head_feat1 = X.relu(head_feat1)
        head_feat2 = X.fc(refine_feat2,
                          filter=1024,
                          weight=self.fc3_weight,
                          bias=self.fc3_bias,
                          name='fc3_conv_refine2')
        head_feat2 = X.relu(head_feat2)
        refine_cls_logit = X.fc(head_feat1,
                                filter=num_class,
                                name='refine_bbox_cls_logit1',
                                init=X.gauss(0.01))

        refine_cls_sec_logit = X.fc(head_feat2,
                                    filter=num_class,
                                    name='refine_bbox_cls_logit2',
                                    init=X.gauss(0.01))

        refine_bbox_delta = X.fc(head_feat1,
                                 filter=4 * num_reg_class,
                                 name='refine_bbox_reg_delta1',
                                 init=X.gauss(0.001))

        refine_bbox_sec_delta = X.fc(head_feat2,
                                     filter=4 * num_reg_class,
                                     name='refine_bbox_reg_delta2',
                                     init=X.gauss(0.001))
        return refine_cls_logit, refine_bbox_delta, refine_cls_sec_logit, refine_bbox_sec_delta
Esempio n. 30
0
    def get_output(self, conv_fpn_feat):
        if self.cls_logit_dict is not None and self.bbox_delta_dict is not None:
            return self.cls_logit_dict, self.bbox_delta_dict

        p = self.p
        num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale)
        conv_channel = p.head.conv_channel

        # FPN RPN share weight
        rpn_conv_weight = X.var('rpn_conv_weight', init=X.gauss(0.01))
        rpn_conv_bias = X.var('rpn_conv_bias', init=X.zero_init())
        rpn_conv_gamma = X.var('rpn_conv_gamma')
        rpn_conv_beta = X.var('rpn_conv_beta')
        rpn_conv_mmean = X.var('rpn_conv_moving_mean')
        rpn_conv_mvar = X.var('rpn_conv_moving_var')
        rpn_conv_cls_weight = X.var('rpn_conv_cls_weight', init=X.gauss(0.01))
        rpn_conv_cls_bias = X.var('rpn_conv_cls_bias', init=X.zero_init())
        rpn_conv_bbox_weight = X.var('rpn_conv_bbox_weight', init=X.gauss(0.01))
        rpn_conv_bbox_bias = X.var('rpn_conv_bbox_bias', init=X.zero_init())

        cls_logit_dict = {}
        bbox_delta_dict = {}

        for stride in p.anchor_generate.stride:
            rpn_conv = X.conv(
                conv_fpn_feat['stride%s' % stride],
                kernel=3,
                filter=conv_channel,
                name="rpn_conv_3x3_%s" % stride,
                no_bias=False,
                weight=rpn_conv_weight,
                bias=rpn_conv_bias
            )

            if p.normalizer.__name__ == "fix_bn":
                pass
            elif p.normalizer.__name__ == "sync_bn":
                rpn_conv = p.normalizer(
                    rpn_conv,
                    gamma=rpn_conv_gamma,
                    beta=rpn_conv_beta,
                    moving_mean=rpn_conv_mmean,
                    moving_var=rpn_conv_mvar,
                    name="rpn_conv_3x3_bn_%s" % stride
                )
            elif p.normalizer.__name__ == "gn":
                rpn_conv = p.normalizer(
                    rpn_conv,
                    gamma=rpn_conv_gamma,
                    beta=rpn_conv_beta,
                    name="rpn_conv_3x3_gn_%s" % stride
                )
            else:
                raise NotImplementedError("Unsupported normalizer {}".format(p.normalizer.__name__))

            rpn_relu = X.relu(rpn_conv, name='rpn_relu_%s' % stride)
            if p.fp16:
                rpn_relu = X.to_fp32(rpn_relu, name="rpn_relu_%s_fp32" % stride)
            cls_logit = X.conv(
                rpn_relu,
                filter=2 * num_base_anchor,
                name="rpn_cls_score_stride%s" % stride,
                no_bias=False,
                weight=rpn_conv_cls_weight,
                bias=rpn_conv_cls_bias
            )

            bbox_delta = X.conv(
                rpn_relu,
                filter=4 * num_base_anchor,
                name="rpn_bbox_pred_stride%s" % stride,
                no_bias=False,
                weight=rpn_conv_bbox_weight,
                bias=rpn_conv_bbox_bias
            )

            cls_logit_dict[stride]  = cls_logit
            bbox_delta_dict[stride] = bbox_delta

        self.cls_logit_dict = cls_logit_dict
        self.bbox_delta_dict = bbox_delta_dict

        return self.cls_logit_dict, self.bbox_delta_dict