def add_multibox_for_extra(extra_layers, num_classes, num_filters, sizes, ratios, normalizations=-1, steps=[], nms_thresh=0.5, force_suppress=False, nms_topk=400, rolling_idx=0, mbox_shared_weights=None): if len(sizes) > len(extra_layers): loc_preds, cls_preds, anchor_boxes = branched_multibox_layer(extra_layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps, branch_num=4, \ shared_weights=mbox_shared_weights) elif len(sizes) == len(extra_layers): loc_preds, cls_preds, anchor_boxes = multibox_layer(extra_layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) else: raise ValueError("Wrong number of sizes") cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \ name='cls_prob_%d' % rolling_idx if rolling_idx else "cls_prob") out = mx.contrib.symbol.MultiBoxDetection( *[cls_prob, loc_preds, anchor_boxes], name="detection_%d" % rolling_idx if rolling_idx else "detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) return out
def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pads, sizes, ratios, normalizations=-1, steps=[], min_filter=128, nms_thresh=0.5, force_suppress=False, nms_topk=400, minimum_negative_samples=0, **kwargs): """Build network symbol for training SSD Parameters ---------- network : str base network symbol name num_classes : int number of object classes not including background from_layers : list of str feature extraction layers, use '' for add extra layers For example: from_layers = ['relu4_3', 'fc7', '', '', '', ''] which means extract feature from relu4_3 and fc7, adding 4 extra layers on top of fc7 num_filters : list of int number of filters for extra layers, you can use -1 for extracted features, however, if normalization and scale is applied, the number of filter for that layer must be provided. For example: num_filters = [512, -1, 512, 256, 256, 256] strides : list of int strides for the 3x3 convolution appended, -1 can be used for extracted feature layers pads : list of int paddings for the 3x3 convolution, -1 can be used for extracted layers sizes : list or list of list [min_size, max_size] for all layers or [[], [], []...] for specific layers ratios : list or list of list [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers normalizations : int or list of int use normalizations value for all layers or [...] for specific layers, -1 indicate no normalizations and scales steps : list specify steps for each MultiBoxPrior layer, leave empty, it will calculate according to layer dimensions min_filter : int minimum number of filters used in 1x1 convolution nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections minimum_negative_sample: int always have some negative examples, no matter how many positive there are. this is useful when training on images with no ground-truth Returns ------- mx.Symbol """ label = mx.sym.Variable('label') body = import_module(network).get_symbol(num_classes=num_classes, **kwargs) layers = multi_layer_feature(body, from_layers, num_filters, strides, pads, min_filter=min_filter) loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) tmp = mx.contrib.symbol.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, ignore_label=-1, \ negative_mining_ratio=3, minimum_negative_samples=minimum_negative_samples, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) return out
def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios, strides, pads, normalizations=-1, steps=[], min_filter=128, nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs): """Build network for testing SSD Parameters ---------- network : str base network symbol name num_classes : int number of object classes not including background from_layers : list of str feature extraction layers, use '' for add extra layers For example: from_layers = ['relu4_3', 'fc7', '', '', '', ''] which means extract feature from relu4_3 and fc7, adding 4 extra layers on top of fc7 num_filters : list of int number of filters for extra layers, you can use -1 for extracted features, however, if normalization and scale is applied, the number of filter for that layer must be provided. For example: num_filters = [512, -1, 512, 256, 256, 256] strides : list of int strides for the 3x3 convolution appended, -1 can be used for extracted feature layers pads : list of int paddings for the 3x3 convolution, -1 can be used for extracted layers sizes : list or list of list [min_size, max_size] for all layers or [[], [], []...] for specific layers ratios : list or list of list [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers normalizations : int or list of int use normalizations value for all layers or [...] for specific layers, -1 indicate no normalizations and scales steps : list specify steps for each MultiBoxPrior layer, leave empty, it will calculate according to layer dimensions min_filter : int minimum number of filters used in 1x1 convolution nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections Returns ------- mx.Symbol """ body = import_module(network).get_symbol(num_classes=num_classes, **kwargs) layers = multi_layer_feature(body, from_layers, num_filters, strides, pads, min_filter=min_filter) loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \ name='cls_prob') out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) return out
def get_symbol_rolling_test(rolling_time, network, num_classes, from_layers, num_filters, sizes, ratios, strides, pads, normalizations=-1, steps=[], min_filter=128, nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs): """Build network for testing SSD Parameters ---------- network : str base network symbol name num_classes : int number of object classes not including background from_layers : list of str feature extraction layers, use '' for add extra layers For example: from_layers = ['relu4_3', 'fc7', '', '', '', ''] which means extract feature from relu4_3 and fc7, adding 4 extra layers on top of fc7 num_filters : list of int number of filters for extra layers, you can use -1 for extracted features, however, if normalization and scale is applied, the number of filter for that layer must be provided. For example: num_filters = [512, -1, 512, 256, 256, 256] strides : list of int strides for the 3x3 convolution appended, -1 can be used for extracted feature layers pads : list of int paddings for the 3x3 convolution, -1 can be used for extracted layers sizes : list or list of list [min_size, max_size] for all layers or [[], [], []...] for specific layers ratios : list or list of list [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers normalizations : int or list of int use normalizations value for all layers or [...] for specific layers, -1 indicate no normalizations and scales steps : list specify steps for each MultiBoxPrior layer, leave empty, it will calculate according to layer dimensions min_filter : int minimum number of filters used in 1x1 convolution nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections Returns ------- mx.Symbol """ body = import_module(network).get_symbol(num_classes, **kwargs) layers = multi_layer_feature( body, from_layers, num_filters, strides, pads, min_filter=min_filter) mbox_shared_weights = None if len(sizes) == ((len(from_layers) - 1) * rolling_time + 1): mbox_shared_weights = _get_multibox_shared_weights(len(layers), 4) if len(sizes) == (len(from_layers) - 1) * rolling_time + 1: loc_preds, cls_preds, anchor_boxes = branched_multibox_layer(layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, branch_num=4, shared_weights=mbox_shared_weights) elif len(sizes) == len(from_layers): loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0) cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \ name='cls_prob') out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) outputs = [out] last_rolling_layers = layers shared_weights = _get_shared_weights(len(layers), strides) for roll_idx in range(1, rolling_time + 1): roll_layers = create_rolling_struct( last_rolling_layers, kwargs["data_shape"], num_filters=num_filters, strides=strides, pads=pads, rolling_rate=rolling_rate, roll_idx=roll_idx, conv2=False, normalize=True, shared_weights=shared_weights) out = add_multibox_for_extra( roll_layers, num_classes=num_classes, num_filters=num_filters, sizes=sizes, ratios=ratios, normalizations=normalizations, steps=steps, nms_thresh=nms_thresh, force_suppress=force_suppress, nms_topk=nms_topk, rolling_idx=roll_idx, mbox_shared_weights=mbox_shared_weights) outputs.append(out) last_rolling_layers = roll_layers return mx.sym.Group(outputs)
def add_multibox_and_loss_for_extra(extra_layers, label, num_classes, num_filters, sizes, ratios, normalizations=-1, steps=[], nms_thresh=0.5, force_suppress=False, nms_topk=400, rolling_idx=0, mbox_shared_weights=None): if len(sizes) == len(extra_layers): loc_preds, cls_preds, anchor_boxes = multibox_layer(extra_layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) elif len(sizes) > len(extra_layers): loc_preds, cls_preds, anchor_boxes = branched_multibox_layer(extra_layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps, \ branch_num=4, shared_weights=mbox_shared_weights) else: raise ValueError("wrong number of sizes") tmp = mx.contrib.symbol.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target_%d" % rolling_idx if rolling_idx else "multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ normalization='valid', name="cls_prob_%d" % rolling_idx if rolling_idx else "cls_prob") loc_loss_ = mx.symbol.smooth_l1( name="loc_loss__%d" % rolling_idx if rolling_idx else "loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss_%d" % rolling_idx if rolling_idx else "loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss( data=cls_target, grad_scale=0, name="cls_label_%d" % rolling_idx if rolling_idx else "cls_label") det = mx.contrib.symbol.MultiBoxDetection( *[cls_prob, loc_preds, anchor_boxes], name="detection_%d" % rolling_idx if rolling_idx else "detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) det = mx.symbol.MakeLoss( data=det, grad_scale=0, name="det_out_%d" % rolling_idx if rolling_idx else "det_out") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) return out