def get_symbol_train(network,
                     num_classes,
                     num_layers,
                     from_layers,
                     num_filters,
                     strides,
                     pads,
                     sizes,
                     ratios,
                     normalizations=-1,
                     steps=[],
                     min_filter=128,
                     nms_thresh=0.5,
                     force_suppress=False,
                     nms_topk=400,
                     minimum_negative_samples=0,
                     **kwargs):
    """Build network symbol for training SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections
    minimum_negative_samples : int
        always have some negative examples, no matter how many positive there are.
        this is useful when training on images with no ground-truth.
    Returns
    -------
    mx.Symbol

    """
    data = mx.sym.Variable('data')
    label = mx.sym.Variable('label')

    conv_feat = get_resnet_conv(data, num_layers)
    _, conv_fpn_feat = get_resnet_conv_down(conv_feat)
    conv_fpn_feat.reverse()  # [P3, P4, P5, P6, P7]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(conv_fpn_feat, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
    # now cls_preds are in shape of  batchsize x num_class x num_anchors

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=1000, minimum_negative_samples=minimum_negative_samples, \
        negative_mining_thresh=.4, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]
    """Focal loss related"""
    cls_prob_ = mx.symbol.SoftmaxActivation(cls_preds, mode="channel")
    cls_prob = mx.sym.Custom(cls_preds,
                             cls_prob_,
                             cls_target,
                             op_type="focal_loss",
                             name="cls_prob",
                             gamma=2.0,
                             alpha=0.25,
                             normalize=True)
    # cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
    #     ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
    #     normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
                                              threshold=0.05,
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
Exemple #2
0
def get_symbol_train(network,
                     num_classes,
                     from_layers,
                     num_filters,
                     strides,
                     pads,
                     sizes,
                     ratios,
                     normalizations=-1,
                     steps=[],
                     min_filter=128,
                     nms_thresh=0.5,
                     force_suppress=False,
                     nms_topk=400,
                     **kwargs):
    """Build network symbol for training SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    label = mx.sym.Variable('label')
    body = import_module(network).get_symbol(num_classes, **kwargs)
    layers = multi_layer_feature(body,
                                 from_layers,
                                 num_filters,
                                 strides,
                                 pads,
                                 min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)

    tmp = mx.symbol.contrib.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")
    det = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
def get_symbol(network,
               num_classes,
               from_layers,
               num_filters,
               sizes,
               ratios,
               strides,
               pads,
               normalizations=-1,
               steps=[],
               min_filter=128,
               nms_thresh=0.5,
               force_suppress=False,
               nms_topk=400,
               **kwargs):
    """Build network for testing SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    body = import_module(network).get_symbol(num_classes=num_classes, **kwargs)
    layers = multi_layer_feature(body,
                                 from_layers,
                                 num_filters,
                                 strides,
                                 pads,
                                 min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)

    cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
        name='cls_prob')
    out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    return out
Exemple #4
0
def get_symbol_train(network, num_classes, alpha_bb8, alpha_loc, use_dilated, use_focalloss, from_layers, num_filters, strides, pads,
                     sizes, ratios, normalizations=-1, steps=[], min_filter=128,
                     nms_thresh=0.5, force_suppress=False, nms_topk=400, minimum_negative_samples=0, **kwargs):
    """Build network symbol for training SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections
    minimum_negative_samples : int
        always have some negative examples, no matter how many positive there are.
        this is useful when training on images with no ground-truth.
    Returns
    -------
    mx.Symbol

    """
    use_focalloss = False
    label = mx.sym.Variable('label')

    body = import_module(network).get_symbol(num_classes=num_classes, use_dilated=use_dilated, **kwargs)

    layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
        min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes, bb8_preds = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
    # now cls_preds are in shape of  batchsize x num_class x num_anchors

    # loc_target, loc_target_mask, cls_target, bb8_target, bb8_target_mask = training_targets(anchors=anchor_boxes,
    #             class_preds=cls_preds, labels=label, use_focalloss=use_focalloss)

    loc_target, loc_target_mask, cls_target, bb8_target, bb8_target_mask = mx.sym.Custom(op_type="training_targets",
                                                                                         name="training_targets",
                                                                                         anchors=anchor_boxes,
                                                                                         cls_preds=cls_preds,
                                                                                         labels=label)

    # tmp = mx.contrib.symbol.MultiBoxTarget(
    #     *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
    #     ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=minimum_negative_samples, \
    #     negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
    #     name="multibox_target")
    # loc_target = tmp[0]
    # loc_target_mask = tmp[1]
    # cls_target = tmp[2]

    # if use_focalloss:
    # cls_prob_ = mx.sym.SoftmaxActivation(cls_preds, mode='channel')
    # cls_prob = mx.sym.Custom(cls_preds, cls_prob_, cls_target, op_type='focal_loss', name='cls_prob',
    #                          gamma=2.0, alpha=0.25, normalize=True)

    # cls_prob = mx.sym.Custom(op_type='FocalLoss', name='cls_prob', data=cls_preds, labels=cls_target, alpha=0.25, gamma=2)

    # else:
    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=alpha_loc, \
        normalization='valid', name="loc_loss")
    bb8_loss_ = mx.symbol.smooth_l1(name="bb8_loss_", \
        data=bb8_target_mask * (bb8_preds - bb8_target), scalar=1.0)
    bb8_loss = mx.symbol.MakeLoss(bb8_loss_, grad_scale=alpha_bb8, \
        normalization='valid', name="bb8_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
    # anchor = mx.symbol.MakeLoss(data=mx.symbol.broadcast_mul(loc_target_mask.reshape((0,-1,4)), anchor_boxes), grad_scale=0, name='anchors')
    anchors = mx.symbol.MakeLoss(data=anchor_boxes, grad_scale=0, name='anchors')
    loc_mae = mx.symbol.MakeLoss(data=mx.sym.abs(loc_target_mask * (loc_preds - loc_target)),
                                 grad_scale=0, name='loc_mae')
    loc_label = mx.symbol.MakeLoss(data=loc_target_mask * loc_target, grad_scale=0., name='loc_label')
    loc_pred_masked = mx.symbol.MakeLoss(data=loc_target_mask * loc_preds, grad_scale=0, name='loc_pred_masked')
    bb8_label = mx.symbol.MakeLoss(data=bb8_target_mask * bb8_target, grad_scale=0, name='bb8_label')
    bb8_pred = mx.symbol.MakeLoss(data=bb8_preds, grad_scale=0, name='bb8_pred')
    bb8_pred_masked = mx.symbol.MakeLoss(data=bb8_target_mask * bb8_preds, grad_scale=0, name='bb8_pred_masked')
    bb8_mae = mx.symbol.MakeLoss(data=mx.sym.abs(bb8_target_mask * (bb8_preds - bb8_target)),
                                 grad_scale=0, name='bb8_mae')

    # det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
    #     name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
    #     variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    # det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
    loc_pred = mx.symbol.MakeLoss(data=loc_preds, grad_scale=0, name='loc_pred')

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, bb8_loss, loc_pred, bb8_pred,
                           anchors, loc_label, loc_pred_masked, loc_mae, bb8_label, bb8_pred_masked, bb8_mae])
    return out
Exemple #5
0
def get_symbol_train(num_classes=20):
    """
    Single-shot multi-box detection with VGG 16 layers ConvNet
    This is a modified version, with fc6/fc7 layers replaced by conv layers
    And the network is slightly smaller than original VGG 16 network
    This is a training network with losses

    Parameters:
    ----------
    num_classes: int
        number of object classes not including background

    Returns:
    ----------
    mx.Symbol
    """
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    conv1_1 = mx.symbol.Convolution(data=data,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=64,
                                    name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1,
                                   act_type="relu",
                                   name="relu1_1")
    conv1_2 = mx.symbol.Convolution(data=relu1_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=64,
                                    name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2,
                                   act_type="relu",
                                   name="relu1_2")
    pool1 = mx.symbol.Pooling(data=relu1_2,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool1")
    # group 2
    conv2_1 = mx.symbol.Convolution(data=pool1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=128,
                                    name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1,
                                   act_type="relu",
                                   name="relu2_1")
    conv2_2 = mx.symbol.Convolution(data=relu2_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=128,
                                    name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2,
                                   act_type="relu",
                                   name="relu2_2")
    pool2 = mx.symbol.Pooling(data=relu2_2,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool2")
    # group 3
    conv3_1 = mx.symbol.Convolution(data=pool2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1,
                                   act_type="relu",
                                   name="relu3_1")
    conv3_2 = mx.symbol.Convolution(data=relu3_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2,
                                   act_type="relu",
                                   name="relu3_2")
    conv3_3 = mx.symbol.Convolution(data=relu3_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3,
                                   act_type="relu",
                                   name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")
    # group 4
    conv4_1 = mx.symbol.Convolution(data=pool3,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1,
                                   act_type="relu",
                                   name="relu4_1")
    conv4_2 = mx.symbol.Convolution(data=relu4_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2,
                                   act_type="relu",
                                   name="relu4_2")
    conv4_3 = mx.symbol.Convolution(data=relu4_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3,
                                   act_type="relu",
                                   name="relu4_3")
    pool4 = mx.symbol.Pooling(data=relu4_3,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool4")
    # group 5
    conv5_1 = mx.symbol.Convolution(data=pool4,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1,
                                   act_type="relu",
                                   name="relu5_1")
    conv5_2 = mx.symbol.Convolution(data=relu5_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2,
                                   act_type="relu",
                                   name="relu5_2")
    conv5_3 = mx.symbol.Convolution(data=relu5_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3,
                                   act_type="relu",
                                   name="relu5_3")
    pool5 = mx.symbol.Pooling(data=relu5_3,
                              pool_type="max",
                              kernel=(3, 3),
                              stride=(1, 1),
                              pad=(1, 1),
                              name="pool5")
    # group 6
    conv6 = mx.symbol.Convolution(data=pool5,
                                  kernel=(3, 3),
                                  pad=(6, 6),
                                  dilate=(6, 6),
                                  num_filter=1024,
                                  name="conv6")
    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
    # group 7
    conv7 = mx.symbol.Convolution(data=relu6,
                                  kernel=(1, 1),
                                  pad=(0, 0),
                                  num_filter=1024,
                                  name="conv7")
    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")

    ### ssd extra layers ###
    conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    # global Pooling
    pool10 = mx.symbol.Pooling(data=relu10_2,
                               pool_type="avg",
                               global_pool=True,
                               kernel=(1, 1),
                               name='pool10')

    # specific parameters for VGG16 network
    from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, pool10]
    sizes = [[.1], [.2, .276], [.38, .461], [.56, .644], [.74, .825],
             [.92, 1.01]]
    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
        [1,2,.5,3,1./3], [1,2,.5,3,1./3]]
    normalizations = [20, -1, -1, -1, -1, -1]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        clip=True, interm_layer=0)

    tmp = mx.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label])
    # out = mx.symbol.Group([loc_preds, cls_preds, anchor_boxes])
    return out
Exemple #6
0
def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pads,
                     sizes, ratios, normalizations=-1, steps=[], min_filter=128,
                     nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
    """Build network symbol for training SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    label = mx.sym.Variable('label')
    body = import_module(network).get_symbol(num_classes, **kwargs)
    layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
        min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)

    tmp = mx.symbol.contrib.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
    det = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
Exemple #7
0
def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios,
               strides, pads, normalizations=-1, steps=[], min_filter=128,
               nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
    """Build network for testing SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    body = import_module(network).get_symbol(num_classes, **kwargs)
    layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
        min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)

    cls_prob = mx.symbol.softmax(data=cls_preds, axis=1, name='cls_prob')
    out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    return out
Exemple #8
0
def get_symbol_train(network, num_classes, from_layers, num_filters,
                     sizes, ratios, batch_size, gpus, normalizations=-1, steps=[], nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
    """Build network symbol for training SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    label = mx.sym.Variable('label')
    times = batch_size / gpus
    body = import_module(network).get_symbol(num_classes, **kwargs)
    layers = multi_layer_feature(body, from_layers)
    arm_loc_preds, arm_cls_preds, arm_anchor_boxes, odm_loc_preds, odm_cls_preds = multibox_layer(layers, num_filters, num_classes, \
                sizes=sizes, ratios=ratios, normalization=normalizations, num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
    # modify arm label
    label_arm = mx.symbol.Custom(label=label, op_type='modify_label')

    arm_tmp = mx.contrib.symbol.MultiBoxTarget(*[arm_anchor_boxes, label_arm, arm_cls_preds], overlap_threshold=.5, \
                                            ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
                                            negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="arm_multibox_target")
    arm_loc_target = arm_tmp[0]
    arm_loc_target_mask = arm_tmp[1]
    arm_cls_target = arm_tmp[2]

    # odm module
    odm_anchor_boxes = mx.symbol.Custom(arm_anchor_boxes=arm_anchor_boxes, arm_loc_preds=arm_loc_preds, arm_loc_mask=arm_loc_target_mask,\
                                        op_type='refine_anchor_generator')
    odm_anchor_boxes_bs = mx.sym.split(data=odm_anchor_boxes, axis=0, num_outputs=times)
    odm_loc_target = []
    odm_loc_target_mask = []
    odm_cls_target = []
    label_bs = mx.sym.split(data=label, axis=0, num_outputs=times)
    odm_cls_preds_bs = mx.sym.split(data=odm_cls_preds, axis=0, num_outputs=times)
    for i in range(times):
        odm_tmp = mx.contrib.symbol.MultiBoxTarget(*[odm_anchor_boxes_bs[i], label_bs[i], odm_cls_preds_bs[i]],\
                                    overlap_threshold=.5, ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0,\
                                    negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="odm_multibox_target_{}".format(i))
        odm_loc_target.append(odm_tmp[0])
        odm_loc_target_mask.append(odm_tmp[1])
        odm_cls_target.append(odm_tmp[2])

    odm_loc_target = mx.symbol.concat(*odm_loc_target, num_args=len(odm_loc_target), dim=0)
    odm_loc_target_mask = mx.symbol.concat(*odm_loc_target_mask, num_args=len(odm_loc_target_mask), dim=0)
    odm_cls_target = mx.symbol.concat(*odm_cls_target, num_args=len(odm_cls_target), dim=0)

    group = mx.symbol.Custom(arm_cls_preds=arm_cls_preds, odm_cls_target=odm_cls_target, odm_loc_target_mask=odm_loc_target_mask,\
                             op_type='negative_filtering')
    odm_cls_target = group[0]
    odm_loc_target_mask = group[1]

    # monitoring training status
    arm_cls_prob = mx.symbol.SoftmaxOutput(data=arm_cls_preds, label=arm_cls_target, ignore_label=-1, use_ignore=True, \
                                           grad_scale=1.0, multi_output=True, normalization='valid', name="arm_cls_prob")
    arm_loc_loss_ = mx.symbol.smooth_l1(name="arm_loc_loss_", data=arm_loc_target_mask * (arm_loc_preds - arm_loc_target), scalar=1.0)
    arm_loc_loss = mx.symbol.MakeLoss(arm_loc_loss_, grad_scale=1.0, normalization='valid', name="arm_loc_loss")
    arm_cls_label = mx.symbol.MakeLoss(data=arm_cls_target, grad_scale=0, name="arm_cls_label")

    odm_cls_prob = mx.symbol.SoftmaxOutput(data=odm_cls_preds, label=odm_cls_target, ignore_label=-1, use_ignore=True, grad_scale=1.0, \
                                           multi_output=True, normalization='valid', name="odm_cls_prob")
    odm_loc_loss_ = mx.symbol.smooth_l1(name="odm_loc_loss_", data=odm_loc_target_mask * (odm_loc_preds - odm_loc_target), scalar=1.0)
    odm_loc_loss = mx.symbol.MakeLoss(odm_loc_loss_, grad_scale=1.0, normalization='valid', name="odm_loc_loss")
    odm_cls_label = mx.symbol.MakeLoss(data=odm_cls_target, grad_scale=0, name="odm_cls_label")

    odm_det = []
    odm_loc_preds_bs = mx.sym.split(data=odm_loc_preds, axis=0, num_outputs=times)
    odm_cls_prob_bs = mx.sym.split(data=odm_cls_prob, axis=0, num_outputs=times)
    for i in range(times):
        odm_det_tmp = mx.contrib.symbol.MultiBoxDetection(*[odm_cls_prob_bs[i], odm_loc_preds_bs[i], odm_anchor_boxes_bs[i]], \
                                    name="odm_detection_{}".format(i), nms_threshold=nms_thresh, force_suppress=force_suppress, \
                                                          variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
        odm_det.append(odm_det_tmp)
    odm_det = mx.symbol.concat(*odm_det, num_args=len(odm_det), dim=0)
    odm_det = mx.symbol.MakeLoss(data=odm_det, grad_scale=0, name="odm_det_out")

    # group output
    out = mx.symbol.Group([arm_cls_prob, arm_loc_loss, arm_cls_label, odm_cls_prob, odm_loc_loss, odm_cls_label, odm_det])
    return out