def get_symbol_train(num_classes=20):
    """
    Single-shot multi-box detection with VGG 16 layers ConvNet
    This is a modified version, with fc6/fc7 layers replaced by conv layers
    And the network is slightly smaller than original VGG 16 network
    This is a training network with losses

    Parameters:
    ----------
    num_classes: int
        number of object classes not including background

    Returns:
    ----------
    mx.Symbol
    """
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    conv1_1 = mx.symbol.Convolution(data=data,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=64,
                                    name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1,
                                   act_type="relu",
                                   name="relu1_1")
    conv1_2 = mx.symbol.Convolution(data=relu1_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=64,
                                    name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2,
                                   act_type="relu",
                                   name="relu1_2")
    pool1 = mx.symbol.Pooling(data=relu1_2,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool1")
    # group 2
    conv2_1 = mx.symbol.Convolution(data=pool1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=128,
                                    name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1,
                                   act_type="relu",
                                   name="relu2_1")
    conv2_2 = mx.symbol.Convolution(data=relu2_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=128,
                                    name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2,
                                   act_type="relu",
                                   name="relu2_2")
    pool2 = mx.symbol.Pooling(data=relu2_2,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool2")
    # group 3
    conv3_1 = mx.symbol.Convolution(data=pool2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1,
                                   act_type="relu",
                                   name="relu3_1")
    conv3_2 = mx.symbol.Convolution(data=relu3_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2,
                                   act_type="relu",
                                   name="relu3_2")
    conv3_3 = mx.symbol.Convolution(data=relu3_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3,
                                   act_type="relu",
                                   name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")
    # group 4
    conv4_1 = mx.symbol.Convolution(data=pool3,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1,
                                   act_type="relu",
                                   name="relu4_1")
    conv4_2 = mx.symbol.Convolution(data=relu4_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2,
                                   act_type="relu",
                                   name="relu4_2")
    conv4_3 = mx.symbol.Convolution(data=relu4_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3,
                                   act_type="relu",
                                   name="relu4_3")
    pool4 = mx.symbol.Pooling(data=relu4_3,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool4")
    # group 5
    conv5_1 = mx.symbol.Convolution(data=pool4,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1,
                                   act_type="relu",
                                   name="relu5_1")
    conv5_2 = mx.symbol.Convolution(data=relu5_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2,
                                   act_type="relu",
                                   name="relu5_2")
    conv5_3 = mx.symbol.Convolution(data=relu5_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3,
                                   act_type="relu",
                                   name="relu5_3")
    pool5 = mx.symbol.Pooling(data=relu5_3,
                              pool_type="max",
                              kernel=(3, 3),
                              stride=(1, 1),
                              pad=(1, 1),
                              name="pool5")
    # group 6
    conv6 = mx.symbol.Convolution(data=pool5,
                                  kernel=(3, 3),
                                  pad=(6, 6),
                                  dilate=(6, 6),
                                  num_filter=1024,
                                  name="conv6")
    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
    # group 7
    conv7 = mx.symbol.Convolution(data=relu6,
                                  kernel=(1, 1),
                                  pad=(0, 0),
                                  num_filter=1024,
                                  name="conv7")
    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")

    ### ssd extra layers ###
    conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    # global Pooling
    pool10 = mx.symbol.Pooling(data=relu10_2,
                               pool_type="avg",
                               global_pool=True,
                               kernel=(1, 1),
                               name='pool10')

    # specific parameters for VGG16 network
    from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, pool10]
    sizes = [[.1], [.2, .276], [.38, .461], [.56, .644], [.74, .825],
             [.92, 1.01]]
    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
        [1,2,.5,3,1./3], [1,2,.5,3,1./3]]
    normalizations = [20, -1, -1, -1, -1, -1]
    num_channels = [512]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=True, interm_layer=0)

    tmp = mx.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=3., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label])
    return out
def get_symbol_train(num_classes=20):
    """
    Single-shot multi-box detection with VGG 16 layers ConvNet
    This is a modified version, with fc6/fc7 layers replaced by conv layers
    And the network is slightly smaller than original VGG 16 network
    This is a training network with losses

    Parameters:
    ----------
    num_classes: int
        number of object classes not including background

    Returns:
    ----------
    mx.Symbol
    """
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    conv1_1 = mx.symbol.Convolution(
        data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
    conv1_2 = mx.symbol.Convolution(
        data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
    pool1 = mx.symbol.Pooling(
        data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
    # group 2
    conv2_1 = mx.symbol.Convolution(
        data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
    conv2_2 = mx.symbol.Convolution(
        data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
    pool2 = mx.symbol.Pooling(
        data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
    # group 3
    conv3_1 = mx.symbol.Convolution(
        data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
    conv3_2 = mx.symbol.Convolution(
        data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
    conv3_3 = mx.symbol.Convolution(
        data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")
    # group 4
    conv4_1 = mx.symbol.Convolution(
        data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
    conv4_2 = mx.symbol.Convolution(
        data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
    conv4_3 = mx.symbol.Convolution(
        data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
    pool4 = mx.symbol.Pooling(
        data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
    # group 5
    conv5_1 = mx.symbol.Convolution(
        data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
    conv5_2 = mx.symbol.Convolution(
        data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
    conv5_3 = mx.symbol.Convolution(
        data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
    pool5 = mx.symbol.Pooling(
        data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
        pad=(1,1), name="pool5")
    # group 6
    conv6 = mx.symbol.Convolution(
        data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
        num_filter=1024, name="conv6")
    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
    # group 7
    conv7 = mx.symbol.Convolution(
        data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")

    ### ssd extra layers ###
    conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    # global Pooling
    pool10 = mx.symbol.Pooling(data=relu10_2, pool_type="avg",
        global_pool=True, kernel=(1,1), name='pool10')

    # specific parameters for VGG16 network
    from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, pool10]
    sizes = [[.1], [.2,.276], [.38, .461], [.56, .644], [.74, .825], [.92, 1.01]]
    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
        [1,2,.5,3,1./3], [1,2,.5,3,1./3]]
    normalizations = [20, -1, -1, -1, -1, -1]
    num_channels = [512]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=True, interm_layer=0)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=3., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label])
    return out
Beispiel #3
0
def get_symbol_train(num_classes=20,
                     nms_thresh=0.5,
                     force_suppress=False,
                     nms_topk=400):

    label = mx.symbol.Variable(name="label")

    feature_net1, feature_net2, feature_net3, feature_net4 = get_feature_layer(
    )

    conv1, relu1 = conv_act_layer(feature_net4, "8_1", 512, stride=(2, 2))
    conv2, relu2 = conv_act_layer(relu1, "9_1", 512, stride=(2, 2))
    conv3, relu3 = conv_act_layer(relu2, "10_1", 512, stride=(2, 2))
    conv4, relu4 = conv_act_layer(relu3,
                                  "11_1",
                                  512,
                                  stride=(1, 1),
                                  pad=(0, 0),
                                  kernel=(3, 3))
    deconv1 = deconv_layer(relu4,
                           relu3,
                           deconv_kernel=(3, 3),
                           deconv_pad=(0, 0))
    deconv2 = deconv_layer(deconv1, relu2)
    deconv3 = deconv_layer(deconv2,
                           relu1,
                           deconv_kernel=(2, 2),
                           deconv_pad=(0, 0))
    deconv4 = deconv_layer(deconv3,
                           feature_net4,
                           deconv_kernel=(2, 2),
                           deconv_pad=(0, 0))
    deconv5 = deconv_layer(deconv4,
                           feature_net2,
                           deconv_kernel=(2, 2),
                           deconv_pad=(0, 0))
    layer1 = residual_predict(relu4)
    layer2 = residual_predict(deconv1)
    layer3 = residual_predict(deconv2)
    layer4 = residual_predict(deconv3)
    layer5 = residual_predict(deconv4)
    layer6 = residual_predict(deconv5)
    from_layers = [layer6, layer5, layer4, layer3, layer2, layer1]
    sizes = [[.1, .141], [.2, .272], [.37, .447], [.54, .619], [.71, .79],
             [.88, .961]]
    ratios = [[1, 2, .5], [1, 2, .5, 3, 1. / 3], [1, 2, .5, 3, 1. / 3],
              [1, 2, .5, 3, 1. / 3], [1, 2, .5], [1, 2, .5]]
    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers,
                                                        num_classes,
                                                        sizes=sizes,
                                                        ratios=ratios,
                                                        clip=False,
                                                        interm_layer=0)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
def resnetsub_concat(units,
                     num_stages,
                     filter_list,
                     num_classes,
                     image_shape,
                     bottle_neck=True,
                     bn_mom=0.9,
                     workspace=256,
                     memonger=False):
    """Return Resnetsub symbol of
    Parameters
    ----------
    units : list
        Number of units in each stage
    num_stages : int
        Number of stage
    filter_list : list
        Channel size of each stage
    num_classes : int
        Ouput size of symbol
    dataset : str
        Dataset type, only cifar10 and imagenet supports
    workspace : int
        Workspace used in convolution operator
    """
    num_unit = len(units)
    assert (num_unit == num_stages)
    data = mx.sym.Variable(name='data')
    data = mx.sym.identity(data=data, name='id')
    data = mx.sym.BatchNorm(data=data,
                            fix_gamma=True,
                            eps=2e-5,
                            momentum=bn_mom,
                            name='bn_data')
    (nchannel, height, width) = image_shape

    data = mx.sym.split(data=data, axis=1, num_outputs=2, name='split')
    # sub network 1
    if height <= 32:  # such as cifar10
        body = mx.sym.Convolution(data=data[0],
                                  num_filter=filter_list[0],
                                  kernel=(3, 3),
                                  stride=(1, 1),
                                  pad=(1, 1),
                                  no_bias=True,
                                  name="conv0",
                                  workspace=workspace)
    else:  # often expected to be 224 such as imagenet
        body = mx.sym.Convolution(data=data[0],
                                  num_filter=filter_list[0],
                                  kernel=(7, 7),
                                  stride=(2, 2),
                                  pad=(3, 3),
                                  no_bias=True,
                                  name="conv0",
                                  workspace=workspace)
        body = mx.sym.BatchNorm(data=body,
                                fix_gamma=False,
                                eps=2e-5,
                                momentum=bn_mom,
                                name='bn0')
        body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
        body_sub1 = mx.symbol.Pooling(data=body,
                                      kernel=(3, 3),
                                      stride=(2, 2),
                                      pad=(1, 1),
                                      pool_type='max')

    for i in range(num_stages):
        body_sub1 = residual_unit(body_sub1,
                                  filter_list[i + 1],
                                  (1 if i == 0 else 2, 1 if i == 0 else 2),
                                  False,
                                  name='stage%d_unit%d' % (i + 1, 1),
                                  bottle_neck=bottle_neck,
                                  workspace=workspace,
                                  memonger=memonger)
        for j in range(units[i] - 1):
            body_sub1 = residual_unit(body_sub1,
                                      filter_list[i + 1], (1, 1),
                                      True,
                                      name='stage%d_unit%d' % (i + 1, j + 2),
                                      bottle_neck=bottle_neck,
                                      workspace=workspace,
                                      memonger=memonger)
    bn1 = mx.sym.BatchNorm(data=body_sub1,
                           fix_gamma=False,
                           eps=2e-5,
                           momentum=bn_mom,
                           name='bn1')
    relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1')
    # Although kernel is not used here when global_pool=True, we should put one
    pool1 = mx.symbol.Pooling(data=relu1,
                              global_pool=True,
                              kernel=(7, 7),
                              pool_type='avg',
                              name='pool1')
    flat1 = mx.symbol.Flatten(data=pool1)
    fc1 = mx.symbol.FullyConnected(data=flat1,
                                   num_hidden=num_classes,
                                   name='fc1')

    # sub network 2
    prefix = 'sub_'  # differentiate sub-networks
    if height <= 32:  # such as cifar10
        body2 = mx.sym.Convolution(data=data[1],
                                   num_filter=filter_list[0],
                                   kernel=(3, 3),
                                   stride=(1, 1),
                                   pad=(1, 1),
                                   no_bias=True,
                                   name=prefix + "conv0",
                                   workspace=workspace)
    else:  # often expected to be 224 such as imagenet
        body2 = mx.sym.Convolution(data=data[1],
                                   num_filter=filter_list[0],
                                   kernel=(7, 7),
                                   stride=(2, 2),
                                   pad=(3, 3),
                                   no_bias=True,
                                   name=prefix + "conv0",
                                   workspace=workspace)
        body2 = mx.sym.BatchNorm(data=body2,
                                 fix_gamma=False,
                                 eps=2e-5,
                                 momentum=bn_mom,
                                 name=prefix + 'bn0')
        body2 = mx.sym.Activation(data=body2,
                                  act_type='relu',
                                  name=prefix + 'relu0')
        body_sub2 = mx.symbol.Pooling(data=body2,
                                      kernel=(3, 3),
                                      stride=(2, 2),
                                      pad=(1, 1),
                                      pool_type='max')

    for i in range(num_stages):
        body_sub2 = residual_unit(body_sub2,
                                  filter_list[i + 1],
                                  (1 if i == 0 else 2, 1 if i == 0 else 2),
                                  False,
                                  name=prefix + 'stage%d_unit%d' % (i + 1, 1),
                                  bottle_neck=bottle_neck,
                                  workspace=workspace,
                                  memonger=memonger)
        for j in range(units[i] - 1):
            body_sub2 = residual_unit(body_sub2,
                                      filter_list[i + 1], (1, 1),
                                      True,
                                      name=prefix + 'stage%d_unit%d' %
                                      (i + 1, j + 2),
                                      bottle_neck=bottle_neck,
                                      workspace=workspace,
                                      memonger=memonger)

    # padding test (truncated in concat training)
    conv_1x1 = conv_act_layer(body_sub2,
                              'multi_feat_pad_1_conv_1x1',
                              256,
                              kernel=(1, 1),
                              pad=(0, 0),
                              stride=(1, 1),
                              act_type='relu')
    conv_3x3 = conv_act_layer(conv_1x1,
                              'multi_feat_pad_2_conv_3x3',
                              512,
                              kernel=(3, 3),
                              pad=(1, 1),
                              stride=(2, 2),
                              act_type='relu')
    pad2 = mx.symbol.Pad(data=conv_3x3,
                         mode='constant',
                         constant_value=0,
                         pad_width=(0, 0, 0, 0, 4, 4, 5, 5),
                         name='pad')

    bn2 = mx.sym.BatchNorm(data=pad2,
                           fix_gamma=False,
                           eps=2e-5,
                           momentum=bn_mom,
                           name=prefix + 'bn1')
    relu2 = mx.sym.Activation(data=bn2, act_type='relu', name=prefix + 'relu1')
    pool2 = mx.symbol.Pooling(data=relu2,
                              global_pool=True,
                              kernel=(7, 7),
                              pool_type='avg',
                              name=prefix + 'pool1')
    flat2 = mx.symbol.Flatten(data=pool2)

    fc2 = mx.symbol.FullyConnected(data=flat2,
                                   num_hidden=num_classes,
                                   name='fc2')

    #ret = [mx.symbol.SoftmaxOutput(data=fc1, name='softmax'), mx.symbol.SoftmaxOutput(data=fc2, name='softmax')]
    #return ret

    flat = mx.symbol.Concat(flat1, flat2, dim=0, name='concat')
    fc = mx.symbol.FullyConnected(data=flat, num_hidden=num_classes, name='fc')
    return mx.symbol.SoftmaxOutput(data=fc, name='softmax')
Beispiel #5
0
def get_symbol_train(num_classes=20,
                     nms_thresh=0.5,
                     force_suppress=False,
                     nms_topk=400):
    """
    Single-shot multi-box detection with VGG 16 layers ConvNet
    This is a modified version, with fc6/fc7 layers replaced by conv layers
    And the network is slightly smaller than original VGG 16 network
    This is a training network with losses

    Parameters:
    ----------
    num_classes: int
        number of object classes not including background
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns:
    ----------
    mx.Symbol
    """
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    conv1_1 = mx.symbol.Convolution(data=data,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=64,
                                    name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1,
                                   act_type="relu",
                                   name="relu1_1")
    conv1_2 = mx.symbol.Convolution(data=relu1_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=64,
                                    name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2,
                                   act_type="relu",
                                   name="relu1_2")
    pool1 = mx.symbol.Pooling(data=relu1_2,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool1")
    # group 2
    conv2_1 = mx.symbol.Convolution(data=pool1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=128,
                                    name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1,
                                   act_type="relu",
                                   name="relu2_1")
    conv2_2 = mx.symbol.Convolution(data=relu2_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=128,
                                    name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2,
                                   act_type="relu",
                                   name="relu2_2")
    pool2 = mx.symbol.Pooling(data=relu2_2,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool2")
    # group 3
    conv3_1 = mx.symbol.Convolution(data=pool2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1,
                                   act_type="relu",
                                   name="relu3_1")
    conv3_2 = mx.symbol.Convolution(data=relu3_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2,
                                   act_type="relu",
                                   name="relu3_2")
    conv3_3 = mx.symbol.Convolution(data=relu3_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3,
                                   act_type="relu",
                                   name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")
    # group 4
    conv4_1 = mx.symbol.Convolution(data=pool3,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1,
                                   act_type="relu",
                                   name="relu4_1")
    conv4_2 = mx.symbol.Convolution(data=relu4_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2,
                                   act_type="relu",
                                   name="relu4_2")
    conv4_3 = mx.symbol.Convolution(data=relu4_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3,
                                   act_type="relu",
                                   name="relu4_3")
    pool4 = mx.symbol.Pooling(data=relu4_3,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool4")
    # group 5
    conv5_1 = mx.symbol.Convolution(data=pool4,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1,
                                   act_type="relu",
                                   name="relu5_1")
    conv5_2 = mx.symbol.Convolution(data=relu5_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2,
                                   act_type="relu",
                                   name="relu5_2")
    conv5_3 = mx.symbol.Convolution(data=relu5_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3,
                                   act_type="relu",
                                   name="relu5_3")
    pool5 = mx.symbol.Pooling(data=relu5_3,
                              pool_type="max",
                              kernel=(3, 3),
                              stride=(1, 1),
                              pad=(1, 1),
                              name="pool5")
    # group 6
    conv6 = mx.symbol.Convolution(data=pool5,
                                  kernel=(3, 3),
                                  pad=(6, 6),
                                  dilate=(6, 6),
                                  num_filter=1024,
                                  name="conv6")
    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
    # group 7
    conv7 = mx.symbol.Convolution(data=relu6,
                                  kernel=(1, 1),
                                  pad=(0, 0),
                                  num_filter=1024,
                                  name="conv7")
    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")

    ### ssd extra layers ###
    conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv11_1, relu11_1 = conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv11_2, relu11_2 = conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv12_1, relu12_1 = conv_act_layer(relu11_2, "12_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv12_2, relu12_2 = conv_act_layer(relu12_1, "12_2", 256, kernel=(4,4), pad=(1,1), \
        stride=(1,1), act_type="relu", use_batchnorm=False)

    # specific parameters for VGG16 network
    from_layers = [
        relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2, relu12_2
    ]
    sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \
        [.75, .8216], [.9, .9721]]
    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
        [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
    normalizations = [20, -1, -1, -1, -1, -1, -1]
    steps = [x / 512.0 for x in [8, 16, 32, 64, 128, 256, 512]]
    num_channels = [512]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=False, interm_layer=0, steps=steps)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
def get_symbol_train(num_classes=20,
                     nms_thresh=0.5,
                     force_suppress=False,
                     nms_topk=400):
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    '''conv1_1 = mx.symbol.Convolution(
        data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
    conv1_2 = mx.symbol.Convolution(
        data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
    pool1 = mx.symbol.Pooling(
        data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")'''
    conv1 = mx.symbol.Convolution(name='conv1',
                                  data=data,
                                  num_filter=32,
                                  pad=(1, 1),
                                  kernel=(3, 3),
                                  stride=(2, 2),
                                  no_bias=True)
    conv1_bn = mx.symbol.BatchNorm(name='conv1_bn',
                                   data=conv1,
                                   use_global_stats=False,
                                   fix_gamma=False,
                                   eps=0.000100)
    conv1_scale = conv1_bn
    relu1 = mx.symbol.Activation(name='relu1',
                                 data=conv1_scale,
                                 act_type='relu')

    # group 2
    '''conv2_1 = mx.symbol.Convolution(
        data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
    conv2_2 = mx.symbol.Convolution(
        data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
    pool2 = mx.symbol.Pooling(
        data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")'''
    conv2_1_dw = mx.symbol.ChannelwiseConvolution(name='conv2_1_dw',
                                                  data=relu1,
                                                  num_filter=32,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(1, 1),
                                                  no_bias=True,
                                                  num_group=32)
    conv2_1_dw_bn = mx.symbol.BatchNorm(name='conv2_1_dw_bn',
                                        data=conv2_1_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv2_1_dw_scale = conv2_1_dw_bn
    relu2_1_dw = mx.symbol.Activation(name='relu2_1_dw',
                                      data=conv2_1_dw_scale,
                                      act_type='relu')

    conv2_1_sep = mx.symbol.Convolution(name='conv2_1_sep',
                                        data=relu2_1_dw,
                                        num_filter=64,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv2_1_sep_bn = mx.symbol.BatchNorm(name='conv2_1_sep_bn',
                                         data=conv2_1_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv2_1_sep_scale = conv2_1_sep_bn
    relu2_1_sep = mx.symbol.Activation(name='relu2_1_sep',
                                       data=conv2_1_sep_scale,
                                       act_type='relu')

    conv2_2_dw = mx.symbol.ChannelwiseConvolution(name='conv2_2_dw',
                                                  data=relu2_1_sep,
                                                  num_filter=64,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(2, 2),
                                                  no_bias=True,
                                                  num_group=64)
    conv2_2_dw_bn = mx.symbol.BatchNorm(name='conv2_2_dw_bn',
                                        data=conv2_2_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv2_2_dw_scale = conv2_2_dw_bn
    relu2_2_dw = mx.symbol.Activation(name='relu2_2_dw',
                                      data=conv2_2_dw_scale,
                                      act_type='relu')

    conv2_2_sep = mx.symbol.Convolution(name='conv2_2_sep',
                                        data=relu2_2_dw,
                                        num_filter=128,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv2_2_sep_bn = mx.symbol.BatchNorm(name='conv2_2_sep_bn',
                                         data=conv2_2_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv2_2_sep_scale = conv2_2_sep_bn
    relu2_2_sep = mx.symbol.Activation(name='relu2_2_sep',
                                       data=conv2_2_sep_scale,
                                       act_type='relu')

    # group 3
    '''conv3_1 = mx.symbol.Convolution(
        data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
    conv3_2 = mx.symbol.Convolution(
        data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
    conv3_3 = mx.symbol.Convolution(
        data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")'''
    conv3_1_dw = mx.symbol.ChannelwiseConvolution(name='conv3_1_dw',
                                                  data=relu2_2_sep,
                                                  num_filter=128,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(1, 1),
                                                  no_bias=True,
                                                  num_group=128)
    conv3_1_dw_bn = mx.symbol.BatchNorm(name='conv3_1_dw_bn',
                                        data=conv3_1_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv3_1_dw_scale = conv3_1_dw_bn
    relu3_1_dw = mx.symbol.Activation(name='relu3_1_dw',
                                      data=conv3_1_dw_scale,
                                      act_type='relu')

    conv3_1_sep = mx.symbol.Convolution(name='conv3_1_sep',
                                        data=relu3_1_dw,
                                        num_filter=128,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv3_1_sep_bn = mx.symbol.BatchNorm(name='conv3_1_sep_bn',
                                         data=conv3_1_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv3_1_sep_scale = conv3_1_sep_bn
    relu3_1_sep = mx.symbol.Activation(name='relu3_1_sep',
                                       data=conv3_1_sep_scale,
                                       act_type='relu')

    conv3_2_dw = mx.symbol.ChannelwiseConvolution(name='conv3_2_dw',
                                                  data=relu3_1_sep,
                                                  num_filter=128,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(2, 2),
                                                  no_bias=True,
                                                  num_group=128)
    conv3_2_dw_bn = mx.symbol.BatchNorm(name='conv3_2_dw_bn',
                                        data=conv3_2_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv3_2_dw_scale = conv3_2_dw_bn
    relu3_2_dw = mx.symbol.Activation(name='relu3_2_dw',
                                      data=conv3_2_dw_scale,
                                      act_type='relu')

    conv3_2_sep = mx.symbol.Convolution(name='conv3_2_sep',
                                        data=relu3_2_dw,
                                        num_filter=256,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv3_2_sep_bn = mx.symbol.BatchNorm(name='conv3_2_sep_bn',
                                         data=conv3_2_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv3_2_sep_scale = conv3_2_sep_bn
    relu3_2_sep = mx.symbol.Activation(name='relu3_2_sep',
                                       data=conv3_2_sep_scale,
                                       act_type='relu')

    # group 4
    '''conv4_1 = mx.symbol.Convolution(
        data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
    conv4_2 = mx.symbol.Convolution(
        data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
    conv4_3 = mx.symbol.Convolution(
        data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
    pool4 = mx.symbol.Pooling(
        data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")'''
    conv4_1_dw = mx.symbol.ChannelwiseConvolution(name='conv4_1_dw',
                                                  data=relu3_2_sep,
                                                  num_filter=256,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(1, 1),
                                                  no_bias=True,
                                                  num_group=256)
    conv4_1_dw_bn = mx.symbol.BatchNorm(name='conv4_1_dw_bn',
                                        data=conv4_1_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv4_1_dw_scale = conv4_1_dw_bn
    relu4_1_dw = mx.symbol.Activation(name='relu4_1_dw',
                                      data=conv4_1_dw_scale,
                                      act_type='relu')

    conv4_1_sep = mx.symbol.Convolution(name='conv4_1_sep',
                                        data=relu4_1_dw,
                                        num_filter=256,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv4_1_sep_bn = mx.symbol.BatchNorm(name='conv4_1_sep_bn',
                                         data=conv4_1_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv4_1_sep_scale = conv4_1_sep_bn
    relu4_1_sep = mx.symbol.Activation(name='relu4_1_sep',
                                       data=conv4_1_sep_scale,
                                       act_type='relu')

    conv4_2_dw = mx.symbol.ChannelwiseConvolution(name='conv4_2_dw',
                                                  data=relu4_1_sep,
                                                  num_filter=256,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(2, 2),
                                                  no_bias=True,
                                                  num_group=256)
    conv4_2_dw_bn = mx.symbol.BatchNorm(name='conv4_2_dw_bn',
                                        data=conv4_2_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv4_2_dw_scale = conv4_2_dw_bn
    relu4_2_dw = mx.symbol.Activation(name='relu4_2_dw',
                                      data=conv4_2_dw_scale,
                                      act_type='relu')

    conv4_2_sep = mx.symbol.Convolution(name='conv4_2_sep',
                                        data=relu4_2_dw,
                                        num_filter=512,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv4_2_sep_bn = mx.symbol.BatchNorm(name='conv4_2_sep_bn',
                                         data=conv4_2_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv4_2_sep_scale = conv4_2_sep_bn
    relu4_2_sep = mx.symbol.Activation(name='relu4_2_sep',
                                       data=conv4_2_sep_scale,
                                       act_type='relu')

    # group 5
    conv5_1 = mx.symbol.Convolution(data=relu4_2_sep,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1,
                                   act_type="relu",
                                   name="relu5_1")
    conv5_2 = mx.symbol.Convolution(data=relu5_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2,
                                   act_type="relu",
                                   name="relu5_2")
    conv5_3 = mx.symbol.Convolution(data=relu5_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3,
                                   act_type="relu",
                                   name="relu5_3")
    pool5 = mx.symbol.Pooling(data=relu5_3,
                              pool_type="max",
                              kernel=(3, 3),
                              stride=(1, 1),
                              pad=(1, 1),
                              name="pool5")
    # group 6
    conv6 = mx.symbol.Convolution(data=pool5,
                                  kernel=(3, 3),
                                  pad=(6, 6),
                                  dilate=(6, 6),
                                  num_filter=1024,
                                  name="conv6")
    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
    # group 7
    conv7 = mx.symbol.Convolution(data=relu6,
                                  kernel=(1, 1),
                                  pad=(0, 0),
                                  num_filter=1024,
                                  name="conv7")
    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")

    ### ssd extra layers ###
    conv8_2, relu8_2 = conv_act_layer(relu7, "8_2", 512, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv9_2, relu9_2 = conv_act_layer(relu8_2, "9_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv10_2, relu10_2 = conv_act_layer(relu9_2, "10_2", 256, kernel=(3,3), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv11_2, relu11_2 = conv_act_layer(relu10_2, "11_2", 256, kernel=(3,3), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)

    # specific parameters for VGG16 network
    from_layers = [relu4_1_sep, relu7, relu8_2, relu9_2, relu10_2, relu11_2]
    sizes = [[.1, .141], [.2, .272], [.37, .447], [.54, .619], [.71, .79],
             [.88, .961]]
    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
        [1,2,.5], [1,2,.5]]
    normalizations = [20, -1, -1, -1, -1, -1]
    steps = [x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
    num_channels = [512]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=False, interm_layer=0, steps=steps)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out