def get_symbol_train(num_classes=20): """ Single-shot multi-box detection with VGG 16 layers ConvNet This is a modified version, with fc6/fc7 layers replaced by conv layers And the network is slightly smaller than original VGG 16 network This is a training network with losses Parameters: ---------- num_classes: int number of object classes not including background Returns: ---------- mx.Symbol """ data = mx.symbol.Variable(name="data") label = mx.symbol.Variable(name="label") # group 1 conv1_1 = mx.symbol.Convolution(data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1") conv1_2 = mx.symbol.Convolution(data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2") relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2") pool1 = mx.symbol.Pooling(data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1") # group 2 conv2_1 = mx.symbol.Convolution(data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1") conv2_2 = mx.symbol.Convolution(data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2") relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2") pool2 = mx.symbol.Pooling(data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2") # group 3 conv3_1 = mx.symbol.Convolution(data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1") conv3_2 = mx.symbol.Convolution(data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2") conv3_3 = mx.symbol.Convolution(data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3") relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3") pool3 = mx.symbol.Pooling( data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \ pooling_convention="full", name="pool3") # group 4 conv4_1 = mx.symbol.Convolution(data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1") conv4_2 = mx.symbol.Convolution(data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2") conv4_3 = mx.symbol.Convolution(data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3") relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3") pool4 = mx.symbol.Pooling(data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4") # group 5 conv5_1 = mx.symbol.Convolution(data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1") conv5_2 = mx.symbol.Convolution(data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2") conv5_3 = mx.symbol.Convolution(data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3") relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3") pool5 = mx.symbol.Pooling(data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1), pad=(1, 1), name="pool5") # group 6 conv6 = mx.symbol.Convolution(data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6), num_filter=1024, name="conv6") relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6") # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6") # group 7 conv7 = mx.symbol.Convolution(data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7") relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7") # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7") ### ssd extra layers ### conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) # global Pooling pool10 = mx.symbol.Pooling(data=relu10_2, pool_type="avg", global_pool=True, kernel=(1, 1), name='pool10') # specific parameters for VGG16 network from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, pool10] sizes = [[.1], [.2, .276], [.38, .461], [.56, .644], [.74, .825], [.92, 1.01]] ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ [1,2,.5,3,1./3], [1,2,.5,3,1./3]] normalizations = [20, -1, -1, -1, -1, -1] num_channels = [512] loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_channels, clip=True, interm_layer=0) tmp = mx.symbol.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=3., multi_output=True, \ normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label]) return out
def get_symbol_train(num_classes=20): """ Single-shot multi-box detection with VGG 16 layers ConvNet This is a modified version, with fc6/fc7 layers replaced by conv layers And the network is slightly smaller than original VGG 16 network This is a training network with losses Parameters: ---------- num_classes: int number of object classes not including background Returns: ---------- mx.Symbol """ data = mx.symbol.Variable(name="data") label = mx.symbol.Variable(name="label") # group 1 conv1_1 = mx.symbol.Convolution( data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1") conv1_2 = mx.symbol.Convolution( data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2") relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2") pool1 = mx.symbol.Pooling( data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1") # group 2 conv2_1 = mx.symbol.Convolution( data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1") conv2_2 = mx.symbol.Convolution( data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2") relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2") pool2 = mx.symbol.Pooling( data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2") # group 3 conv3_1 = mx.symbol.Convolution( data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1") conv3_2 = mx.symbol.Convolution( data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2") conv3_3 = mx.symbol.Convolution( data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3") relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3") pool3 = mx.symbol.Pooling( data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \ pooling_convention="full", name="pool3") # group 4 conv4_1 = mx.symbol.Convolution( data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1") conv4_2 = mx.symbol.Convolution( data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2") conv4_3 = mx.symbol.Convolution( data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3") relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3") pool4 = mx.symbol.Pooling( data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4") # group 5 conv5_1 = mx.symbol.Convolution( data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1") conv5_2 = mx.symbol.Convolution( data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2") conv5_3 = mx.symbol.Convolution( data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3") relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3") pool5 = mx.symbol.Pooling( data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1), pad=(1,1), name="pool5") # group 6 conv6 = mx.symbol.Convolution( data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6), num_filter=1024, name="conv6") relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6") # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6") # group 7 conv7 = mx.symbol.Convolution( data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7") relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7") # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7") ### ssd extra layers ### conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) # global Pooling pool10 = mx.symbol.Pooling(data=relu10_2, pool_type="avg", global_pool=True, kernel=(1,1), name='pool10') # specific parameters for VGG16 network from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, pool10] sizes = [[.1], [.2,.276], [.38, .461], [.56, .644], [.74, .825], [.92, 1.01]] ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ [1,2,.5,3,1./3], [1,2,.5,3,1./3]] normalizations = [20, -1, -1, -1, -1, -1] num_channels = [512] loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_channels, clip=True, interm_layer=0) tmp = mx.contrib.symbol.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=3., multi_output=True, \ normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label]) return out
def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400): label = mx.symbol.Variable(name="label") feature_net1, feature_net2, feature_net3, feature_net4 = get_feature_layer( ) conv1, relu1 = conv_act_layer(feature_net4, "8_1", 512, stride=(2, 2)) conv2, relu2 = conv_act_layer(relu1, "9_1", 512, stride=(2, 2)) conv3, relu3 = conv_act_layer(relu2, "10_1", 512, stride=(2, 2)) conv4, relu4 = conv_act_layer(relu3, "11_1", 512, stride=(1, 1), pad=(0, 0), kernel=(3, 3)) deconv1 = deconv_layer(relu4, relu3, deconv_kernel=(3, 3), deconv_pad=(0, 0)) deconv2 = deconv_layer(deconv1, relu2) deconv3 = deconv_layer(deconv2, relu1, deconv_kernel=(2, 2), deconv_pad=(0, 0)) deconv4 = deconv_layer(deconv3, feature_net4, deconv_kernel=(2, 2), deconv_pad=(0, 0)) deconv5 = deconv_layer(deconv4, feature_net2, deconv_kernel=(2, 2), deconv_pad=(0, 0)) layer1 = residual_predict(relu4) layer2 = residual_predict(deconv1) layer3 = residual_predict(deconv2) layer4 = residual_predict(deconv3) layer5 = residual_predict(deconv4) layer6 = residual_predict(deconv5) from_layers = [layer6, layer5, layer4, layer3, layer2, layer1] sizes = [[.1, .141], [.2, .272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]] ratios = [[1, 2, .5], [1, 2, .5, 3, 1. / 3], [1, 2, .5, 3, 1. / 3], [1, 2, .5, 3, 1. / 3], [1, 2, .5], [1, 2, .5]] loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, num_classes, sizes=sizes, ratios=ratios, clip=False, interm_layer=0) tmp = mx.contrib.symbol.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) return out
def resnetsub_concat(units, num_stages, filter_list, num_classes, image_shape, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False): """Return Resnetsub symbol of Parameters ---------- units : list Number of units in each stage num_stages : int Number of stage filter_list : list Channel size of each stage num_classes : int Ouput size of symbol dataset : str Dataset type, only cifar10 and imagenet supports workspace : int Workspace used in convolution operator """ num_unit = len(units) assert (num_unit == num_stages) data = mx.sym.Variable(name='data') data = mx.sym.identity(data=data, name='id') data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data') (nchannel, height, width) = image_shape data = mx.sym.split(data=data, axis=1, num_outputs=2, name='split') # sub network 1 if height <= 32: # such as cifar10 body = mx.sym.Convolution(data=data[0], num_filter=filter_list[0], kernel=(3, 3), stride=(1, 1), pad=(1, 1), no_bias=True, name="conv0", workspace=workspace) else: # often expected to be 224 such as imagenet body = mx.sym.Convolution(data=data[0], num_filter=filter_list[0], kernel=(7, 7), stride=(2, 2), pad=(3, 3), no_bias=True, name="conv0", workspace=workspace) body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0') body = mx.sym.Activation(data=body, act_type='relu', name='relu0') body_sub1 = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type='max') for i in range(num_stages): body_sub1 = residual_unit(body_sub1, filter_list[i + 1], (1 if i == 0 else 2, 1 if i == 0 else 2), False, name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace, memonger=memonger) for j in range(units[i] - 1): body_sub1 = residual_unit(body_sub1, filter_list[i + 1], (1, 1), True, name='stage%d_unit%d' % (i + 1, j + 2), bottle_neck=bottle_neck, workspace=workspace, memonger=memonger) bn1 = mx.sym.BatchNorm(data=body_sub1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1') relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1') # Although kernel is not used here when global_pool=True, we should put one pool1 = mx.symbol.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1') flat1 = mx.symbol.Flatten(data=pool1) fc1 = mx.symbol.FullyConnected(data=flat1, num_hidden=num_classes, name='fc1') # sub network 2 prefix = 'sub_' # differentiate sub-networks if height <= 32: # such as cifar10 body2 = mx.sym.Convolution(data=data[1], num_filter=filter_list[0], kernel=(3, 3), stride=(1, 1), pad=(1, 1), no_bias=True, name=prefix + "conv0", workspace=workspace) else: # often expected to be 224 such as imagenet body2 = mx.sym.Convolution(data=data[1], num_filter=filter_list[0], kernel=(7, 7), stride=(2, 2), pad=(3, 3), no_bias=True, name=prefix + "conv0", workspace=workspace) body2 = mx.sym.BatchNorm(data=body2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=prefix + 'bn0') body2 = mx.sym.Activation(data=body2, act_type='relu', name=prefix + 'relu0') body_sub2 = mx.symbol.Pooling(data=body2, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type='max') for i in range(num_stages): body_sub2 = residual_unit(body_sub2, filter_list[i + 1], (1 if i == 0 else 2, 1 if i == 0 else 2), False, name=prefix + 'stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace, memonger=memonger) for j in range(units[i] - 1): body_sub2 = residual_unit(body_sub2, filter_list[i + 1], (1, 1), True, name=prefix + 'stage%d_unit%d' % (i + 1, j + 2), bottle_neck=bottle_neck, workspace=workspace, memonger=memonger) # padding test (truncated in concat training) conv_1x1 = conv_act_layer(body_sub2, 'multi_feat_pad_1_conv_1x1', 256, kernel=(1, 1), pad=(0, 0), stride=(1, 1), act_type='relu') conv_3x3 = conv_act_layer(conv_1x1, 'multi_feat_pad_2_conv_3x3', 512, kernel=(3, 3), pad=(1, 1), stride=(2, 2), act_type='relu') pad2 = mx.symbol.Pad(data=conv_3x3, mode='constant', constant_value=0, pad_width=(0, 0, 0, 0, 4, 4, 5, 5), name='pad') bn2 = mx.sym.BatchNorm(data=pad2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=prefix + 'bn1') relu2 = mx.sym.Activation(data=bn2, act_type='relu', name=prefix + 'relu1') pool2 = mx.symbol.Pooling(data=relu2, global_pool=True, kernel=(7, 7), pool_type='avg', name=prefix + 'pool1') flat2 = mx.symbol.Flatten(data=pool2) fc2 = mx.symbol.FullyConnected(data=flat2, num_hidden=num_classes, name='fc2') #ret = [mx.symbol.SoftmaxOutput(data=fc1, name='softmax'), mx.symbol.SoftmaxOutput(data=fc2, name='softmax')] #return ret flat = mx.symbol.Concat(flat1, flat2, dim=0, name='concat') fc = mx.symbol.FullyConnected(data=flat, num_hidden=num_classes, name='fc') return mx.symbol.SoftmaxOutput(data=fc, name='softmax')
def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400): """ Single-shot multi-box detection with VGG 16 layers ConvNet This is a modified version, with fc6/fc7 layers replaced by conv layers And the network is slightly smaller than original VGG 16 network This is a training network with losses Parameters: ---------- num_classes: int number of object classes not including background nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections Returns: ---------- mx.Symbol """ data = mx.symbol.Variable(name="data") label = mx.symbol.Variable(name="label") # group 1 conv1_1 = mx.symbol.Convolution(data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1") conv1_2 = mx.symbol.Convolution(data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2") relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2") pool1 = mx.symbol.Pooling(data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1") # group 2 conv2_1 = mx.symbol.Convolution(data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1") conv2_2 = mx.symbol.Convolution(data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2") relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2") pool2 = mx.symbol.Pooling(data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2") # group 3 conv3_1 = mx.symbol.Convolution(data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1") conv3_2 = mx.symbol.Convolution(data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2") conv3_3 = mx.symbol.Convolution(data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3") relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3") pool3 = mx.symbol.Pooling( data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \ pooling_convention="full", name="pool3") # group 4 conv4_1 = mx.symbol.Convolution(data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1") conv4_2 = mx.symbol.Convolution(data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2") conv4_3 = mx.symbol.Convolution(data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3") relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3") pool4 = mx.symbol.Pooling(data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4") # group 5 conv5_1 = mx.symbol.Convolution(data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1") conv5_2 = mx.symbol.Convolution(data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2") conv5_3 = mx.symbol.Convolution(data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3") relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3") pool5 = mx.symbol.Pooling(data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1), pad=(1, 1), name="pool5") # group 6 conv6 = mx.symbol.Convolution(data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6), num_filter=1024, name="conv6") relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6") # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6") # group 7 conv7 = mx.symbol.Convolution(data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7") relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7") # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7") ### ssd extra layers ### conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv11_1, relu11_1 = conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv11_2, relu11_2 = conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv12_1, relu12_1 = conv_act_layer(relu11_2, "12_1", 128, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv12_2, relu12_2 = conv_act_layer(relu12_1, "12_2", 256, kernel=(4,4), pad=(1,1), \ stride=(1,1), act_type="relu", use_batchnorm=False) # specific parameters for VGG16 network from_layers = [ relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2, relu12_2 ] sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \ [.75, .8216], [.9, .9721]] ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]] normalizations = [20, -1, -1, -1, -1, -1, -1] steps = [x / 512.0 for x in [8, 16, 32, 64, 128, 256, 512]] num_channels = [512] loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_channels, clip=False, interm_layer=0, steps=steps) tmp = mx.contrib.symbol.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) return out
def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400): data = mx.symbol.Variable(name="data") label = mx.symbol.Variable(name="label") # group 1 '''conv1_1 = mx.symbol.Convolution( data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1") conv1_2 = mx.symbol.Convolution( data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2") relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2") pool1 = mx.symbol.Pooling( data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")''' conv1 = mx.symbol.Convolution(name='conv1', data=data, num_filter=32, pad=(1, 1), kernel=(3, 3), stride=(2, 2), no_bias=True) conv1_bn = mx.symbol.BatchNorm(name='conv1_bn', data=conv1, use_global_stats=False, fix_gamma=False, eps=0.000100) conv1_scale = conv1_bn relu1 = mx.symbol.Activation(name='relu1', data=conv1_scale, act_type='relu') # group 2 '''conv2_1 = mx.symbol.Convolution( data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1") conv2_2 = mx.symbol.Convolution( data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2") relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2") pool2 = mx.symbol.Pooling( data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")''' conv2_1_dw = mx.symbol.ChannelwiseConvolution(name='conv2_1_dw', data=relu1, num_filter=32, pad=(1, 1), kernel=(3, 3), stride=(1, 1), no_bias=True, num_group=32) conv2_1_dw_bn = mx.symbol.BatchNorm(name='conv2_1_dw_bn', data=conv2_1_dw, use_global_stats=False, fix_gamma=False, eps=0.000100) conv2_1_dw_scale = conv2_1_dw_bn relu2_1_dw = mx.symbol.Activation(name='relu2_1_dw', data=conv2_1_dw_scale, act_type='relu') conv2_1_sep = mx.symbol.Convolution(name='conv2_1_sep', data=relu2_1_dw, num_filter=64, pad=(0, 0), kernel=(1, 1), stride=(1, 1), no_bias=True) conv2_1_sep_bn = mx.symbol.BatchNorm(name='conv2_1_sep_bn', data=conv2_1_sep, use_global_stats=False, fix_gamma=False, eps=0.000100) conv2_1_sep_scale = conv2_1_sep_bn relu2_1_sep = mx.symbol.Activation(name='relu2_1_sep', data=conv2_1_sep_scale, act_type='relu') conv2_2_dw = mx.symbol.ChannelwiseConvolution(name='conv2_2_dw', data=relu2_1_sep, num_filter=64, pad=(1, 1), kernel=(3, 3), stride=(2, 2), no_bias=True, num_group=64) conv2_2_dw_bn = mx.symbol.BatchNorm(name='conv2_2_dw_bn', data=conv2_2_dw, use_global_stats=False, fix_gamma=False, eps=0.000100) conv2_2_dw_scale = conv2_2_dw_bn relu2_2_dw = mx.symbol.Activation(name='relu2_2_dw', data=conv2_2_dw_scale, act_type='relu') conv2_2_sep = mx.symbol.Convolution(name='conv2_2_sep', data=relu2_2_dw, num_filter=128, pad=(0, 0), kernel=(1, 1), stride=(1, 1), no_bias=True) conv2_2_sep_bn = mx.symbol.BatchNorm(name='conv2_2_sep_bn', data=conv2_2_sep, use_global_stats=False, fix_gamma=False, eps=0.000100) conv2_2_sep_scale = conv2_2_sep_bn relu2_2_sep = mx.symbol.Activation(name='relu2_2_sep', data=conv2_2_sep_scale, act_type='relu') # group 3 '''conv3_1 = mx.symbol.Convolution( data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1") conv3_2 = mx.symbol.Convolution( data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2") conv3_3 = mx.symbol.Convolution( data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3") relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3") pool3 = mx.symbol.Pooling( data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \ pooling_convention="full", name="pool3")''' conv3_1_dw = mx.symbol.ChannelwiseConvolution(name='conv3_1_dw', data=relu2_2_sep, num_filter=128, pad=(1, 1), kernel=(3, 3), stride=(1, 1), no_bias=True, num_group=128) conv3_1_dw_bn = mx.symbol.BatchNorm(name='conv3_1_dw_bn', data=conv3_1_dw, use_global_stats=False, fix_gamma=False, eps=0.000100) conv3_1_dw_scale = conv3_1_dw_bn relu3_1_dw = mx.symbol.Activation(name='relu3_1_dw', data=conv3_1_dw_scale, act_type='relu') conv3_1_sep = mx.symbol.Convolution(name='conv3_1_sep', data=relu3_1_dw, num_filter=128, pad=(0, 0), kernel=(1, 1), stride=(1, 1), no_bias=True) conv3_1_sep_bn = mx.symbol.BatchNorm(name='conv3_1_sep_bn', data=conv3_1_sep, use_global_stats=False, fix_gamma=False, eps=0.000100) conv3_1_sep_scale = conv3_1_sep_bn relu3_1_sep = mx.symbol.Activation(name='relu3_1_sep', data=conv3_1_sep_scale, act_type='relu') conv3_2_dw = mx.symbol.ChannelwiseConvolution(name='conv3_2_dw', data=relu3_1_sep, num_filter=128, pad=(1, 1), kernel=(3, 3), stride=(2, 2), no_bias=True, num_group=128) conv3_2_dw_bn = mx.symbol.BatchNorm(name='conv3_2_dw_bn', data=conv3_2_dw, use_global_stats=False, fix_gamma=False, eps=0.000100) conv3_2_dw_scale = conv3_2_dw_bn relu3_2_dw = mx.symbol.Activation(name='relu3_2_dw', data=conv3_2_dw_scale, act_type='relu') conv3_2_sep = mx.symbol.Convolution(name='conv3_2_sep', data=relu3_2_dw, num_filter=256, pad=(0, 0), kernel=(1, 1), stride=(1, 1), no_bias=True) conv3_2_sep_bn = mx.symbol.BatchNorm(name='conv3_2_sep_bn', data=conv3_2_sep, use_global_stats=False, fix_gamma=False, eps=0.000100) conv3_2_sep_scale = conv3_2_sep_bn relu3_2_sep = mx.symbol.Activation(name='relu3_2_sep', data=conv3_2_sep_scale, act_type='relu') # group 4 '''conv4_1 = mx.symbol.Convolution( data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1") conv4_2 = mx.symbol.Convolution( data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2") conv4_3 = mx.symbol.Convolution( data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3") relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3") pool4 = mx.symbol.Pooling( data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")''' conv4_1_dw = mx.symbol.ChannelwiseConvolution(name='conv4_1_dw', data=relu3_2_sep, num_filter=256, pad=(1, 1), kernel=(3, 3), stride=(1, 1), no_bias=True, num_group=256) conv4_1_dw_bn = mx.symbol.BatchNorm(name='conv4_1_dw_bn', data=conv4_1_dw, use_global_stats=False, fix_gamma=False, eps=0.000100) conv4_1_dw_scale = conv4_1_dw_bn relu4_1_dw = mx.symbol.Activation(name='relu4_1_dw', data=conv4_1_dw_scale, act_type='relu') conv4_1_sep = mx.symbol.Convolution(name='conv4_1_sep', data=relu4_1_dw, num_filter=256, pad=(0, 0), kernel=(1, 1), stride=(1, 1), no_bias=True) conv4_1_sep_bn = mx.symbol.BatchNorm(name='conv4_1_sep_bn', data=conv4_1_sep, use_global_stats=False, fix_gamma=False, eps=0.000100) conv4_1_sep_scale = conv4_1_sep_bn relu4_1_sep = mx.symbol.Activation(name='relu4_1_sep', data=conv4_1_sep_scale, act_type='relu') conv4_2_dw = mx.symbol.ChannelwiseConvolution(name='conv4_2_dw', data=relu4_1_sep, num_filter=256, pad=(1, 1), kernel=(3, 3), stride=(2, 2), no_bias=True, num_group=256) conv4_2_dw_bn = mx.symbol.BatchNorm(name='conv4_2_dw_bn', data=conv4_2_dw, use_global_stats=False, fix_gamma=False, eps=0.000100) conv4_2_dw_scale = conv4_2_dw_bn relu4_2_dw = mx.symbol.Activation(name='relu4_2_dw', data=conv4_2_dw_scale, act_type='relu') conv4_2_sep = mx.symbol.Convolution(name='conv4_2_sep', data=relu4_2_dw, num_filter=512, pad=(0, 0), kernel=(1, 1), stride=(1, 1), no_bias=True) conv4_2_sep_bn = mx.symbol.BatchNorm(name='conv4_2_sep_bn', data=conv4_2_sep, use_global_stats=False, fix_gamma=False, eps=0.000100) conv4_2_sep_scale = conv4_2_sep_bn relu4_2_sep = mx.symbol.Activation(name='relu4_2_sep', data=conv4_2_sep_scale, act_type='relu') # group 5 conv5_1 = mx.symbol.Convolution(data=relu4_2_sep, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1") conv5_2 = mx.symbol.Convolution(data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2") conv5_3 = mx.symbol.Convolution(data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3") relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3") pool5 = mx.symbol.Pooling(data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1), pad=(1, 1), name="pool5") # group 6 conv6 = mx.symbol.Convolution(data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6), num_filter=1024, name="conv6") relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6") # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6") # group 7 conv7 = mx.symbol.Convolution(data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7") relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7") # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7") ### ssd extra layers ### conv8_2, relu8_2 = conv_act_layer(relu7, "8_2", 512, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) conv9_2, relu9_2 = conv_act_layer(relu8_2, "9_2", 256, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) conv10_2, relu10_2 = conv_act_layer(relu9_2, "10_2", 256, kernel=(3,3), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv11_2, relu11_2 = conv_act_layer(relu10_2, "11_2", 256, kernel=(3,3), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) # specific parameters for VGG16 network from_layers = [relu4_1_sep, relu7, relu8_2, relu9_2, relu10_2, relu11_2] sizes = [[.1, .141], [.2, .272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]] ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ [1,2,.5], [1,2,.5]] normalizations = [20, -1, -1, -1, -1, -1] steps = [x / 300.0 for x in [8, 16, 32, 64, 100, 300]] num_channels = [512] loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_channels, clip=False, interm_layer=0, steps=steps) tmp = mx.contrib.symbol.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) return out