def sampled_softmax(num_classes, num_samples, in_dim, inputs, weight, bias, sampled_values, remove_accidental_hits=True): """ Sampled softmax via importance sampling. This under-estimates the full softmax and is only used for training. """ # inputs = (n, in_dim) sample, prob_sample, prob_target = sampled_values # (num_samples, ) sample = S.var('sample', shape=(num_samples,), dtype='float32') # (n, ) label = S.var('label') label = S.reshape(label, shape=(-1,), name="label_reshape") # (num_samples+n, ) sample_label = S.concat(sample, label, dim=0) # lookup weights and biases # (num_samples+n, dim) sample_target_w = S.sparse.Embedding(data=sample_label, weight=weight, input_dim=num_classes, output_dim=in_dim, sparse_grad=True) # (num_samples+n, 1) sample_target_b = S.sparse.Embedding(data=sample_label, weight=bias, input_dim=num_classes, output_dim=1, sparse_grad=True) # (num_samples, dim) sample_w = S.slice(sample_target_w, begin=(0, 0), end=(num_samples, None)) target_w = S.slice(sample_target_w, begin=(num_samples, 0), end=(None, None)) sample_b = S.slice(sample_target_b, begin=(0, 0), end=(num_samples, None)) target_b = S.slice(sample_target_b, begin=(num_samples, 0), end=(None, None)) # target # (n, 1) true_pred = S.sum(target_w * inputs, axis=1, keepdims=True) + target_b # samples # (n, num_samples) sample_b = S.reshape(sample_b, (-1,)) sample_pred = S.FullyConnected(inputs, weight=sample_w, bias=sample_b, num_hidden=num_samples) # remove accidental hits if remove_accidental_hits: label_v = S.reshape(label, (-1, 1)) sample_v = S.reshape(sample, (1, -1)) neg = S.broadcast_equal(label_v, sample_v) * -1e37 sample_pred = sample_pred + neg prob_sample = S.reshape(prob_sample, shape=(1, num_samples)) p_target = true_pred - S.log(prob_target) p_sample = S.broadcast_sub(sample_pred, S.log(prob_sample)) # return logits and new_labels # (n, 1+num_samples) logits = S.concat(p_target, p_sample, dim=1) new_targets = S.zeros_like(label) return logits, new_targets
def atrous_spatial_pyramid_pooling(feat, rate, aspp_with_separable_conv, oc_context=False): conv_1x1 = Conv(feat, num_filter=256, kernel=(1, 1), name="aspp_1x1") conv_1x1 = BN(conv_1x1, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom, name="aspp_1x1_bn", eps=eps, **args) conv_1x1 = Relu(conv_1x1, act_type='relu', name='aspp_1x1_relu') if aspp_with_separable_conv: conv_3x3_d6 = Sepconv(data=feat, in_channel=2048, num_filter=256, stride=1, dilate=6 * rate, name="aspp_3x3_d6") conv_3x3_d6 = BN(conv_3x3_d6, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom, name="aspp_3x3_d6_bn", eps=eps, **args) conv_3x3_d6 = Relu(conv_3x3_d6, act_type='relu', name='aspp_3x3_d6_relu') conv_3x3_d12 = Sepconv(data=feat, in_channel=2048, num_filter=256, stride=1, dilate=12 * rate, name="aspp_3x3_d12") conv_3x3_d12 = BN(conv_3x3_d12, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom, name="aspp_3x3_d12_bn", eps=eps, **args) conv_3x3_d12 = Relu(conv_3x3_d12, act_type='relu', name='aspp_3x3_d12_relu') conv_3x3_d18 = Sepconv(data=feat, in_channel=2048, num_filter=256, stride=1, dilate=18 * rate, name="aspp_3x3_d18") conv_3x3_d18 = BN(conv_3x3_d18, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom, name="aspp_3x3_d18_bn", eps=eps, **args) conv_3x3_d18 = Relu(conv_3x3_d18, act_type='relu', name='aspp_3x3_d18_relu') else: conv_3x3_d6 = Conv(feat, num_filter=256, kernel=(3, 3), dilate=(6 * rate, 6 * rate), pad=(6 * rate, 6 * rate), name="aspp_3x3_d6") conv_3x3_d6 = BN(conv_3x3_d6, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom, name="aspp_3x3_d6_bn", eps=eps) conv_3x3_d6 = Relu(conv_3x3_d6, act_type='relu', name='aspp_3x3_d6_relu') conv_3x3_d12 = Conv(feat, num_filter=256, kernel=(3, 3), dilate=(12 * rate, 12 * rate), pad=(12 * rate, 12 * rate), name="aspp_3x3_d12") conv_3x3_d12 = BN(conv_3x3_d12, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom, name="aspp_3x3_d12_bn", eps=eps) conv_3x3_d12 = Relu(conv_3x3_d12, act_type='relu', name='aspp_3x3_d12_relu') conv_3x3_d18 = Conv(feat, num_filter=256, kernel=(3, 3), dilate=(18 * rate, 18 * rate), pad=(18 * rate, 18 * rate), name="aspp_3x3_d18") conv_3x3_d18 = BN(conv_3x3_d18, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom, name="aspp_3x3_d18_bn", eps=eps) conv_3x3_d18 = Relu(conv_3x3_d18, act_type='relu', name='aspp_3x3_d18_relu') if oc_context: gap = oc_context_block(feat, 128, 256, 256, resample_rate=2) else: gap = Pool(feat, kernel=(1, 1), global_pool=True, pool_type="avg", name="aspp_gap") gap = Conv(gap, num_filter=256, kernel=(1, 1), name="aspp_gap_1x1") gap = BN(gap, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom, name="aspp_gap_1x1_bn", eps=eps, **args) if not oc_context: gap = Relu(gap, act_type='relu', name='aspp_gap_1x1_relu') gap = broadcast_like(gap, conv_1x1, name="aspp_gap_broadcast") aspp = concat(conv_1x1, conv_3x3_d6, conv_3x3_d12, conv_3x3_d18, gap, dim=1, name="aspp_concat") aspp_1x1 = Conv(aspp, num_filter=256, kernel=(1, 1), name="aspp_concat_1x1") aspp_1x1 = BN(aspp_1x1, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom, name="aspp_concat_1x1_bn", eps=eps, **args) aspp_1x1._set_attr(mirror_stage='True') aspp_1x1 = Relu(aspp_1x1, act_type='relu', name='aspp_concat_1x1_relu') return aspp_1x1
def add_loss(self, splits: sym.Variable): """Add loss functions. Below, we splice the network output accordingly to compute losses for the following: 1. Bounding box attributes 2. Class probabilities 3. IOUS as "confidence scores" Below, the ugly splice functions are replacements for reshaping. Instead, split along a dimension into multiple chunks, and then restack the arrays in a consistent way. Due to a quirk in MXNet, we create a placeholder label_score. However, we actually use pred_box and label_box to compute IOU (true labels), which are then compared with pred_score. """ num_splits = int(NUM_OUT_CHANNELS / ANCHORS_PER_GRID) splits = list(sym.split(splits, num_outputs=num_splits)) # Compute loss for bounding box pred_box = sym.concat(*splits[:NUM_BBOX_ATTRS]) loss_box = mx.sym.Custom( data=pred_box, label=self.label_box, op_type='LinearRegressionOutputWithMask') # Compute loss for class probabilities cidx = NUM_BBOX_ATTRS + NUM_CLASSES pred_class = reformat(sym.concat(*splits[NUM_BBOX_ATTRS:cidx]), pkg=sym) label_class = reformat(self.label_class, pkg=sym) loss_class = sym.SoftmaxOutput(data=pred_class, label=label_class) # Compute loss for confidence scores - see doc above for explanation pred_score = splits[cidx] loss_iou = mx.symbol.Custom( data=pred_score, label=sym.concat(self.label_score, pred_box, self.label_box), op_type='IOURegressionOutputWithMask') return mx.sym.Group([loss_box, loss_class, loss_iou])
def __call__(self, inputs, states): # inputs: (batch_size, decoder_num_hidden) # for dot attention decoder_num_hidden must equal encoder_num_hidden if len(states) > 1: states = [symbol.concat(*states, dim=1)] # source: (batch_size, seq_len, encoder_num_hidden) source = states[0] # (batch_size, decoder_num_hidden, 1) inputs = symbol.expand_dims(inputs, axis=2) # (batch_size, seq_len, 1) scores = symbol.batch_dot(source, inputs) # (batch_size, encoder_num_hidden) return _attention_pooling(source, scores), states
def convert_ssd_model(net, input_shape=(1, 3, 512, 512), to_bgr=False, merge_bn=True): """ Convert SSD-like model to Caffe. :param net: mxnet.gluon.nn.HybridBlock Gluon net to convert. :param input_shape: tuple Shape of inputs. :param to_bgr: bool Convert input_type from RGB to BGR. :param merge_bn: bool Merge BatchNorm and Scale layers to Convolution layers. :return: (text_net, binary_weights) text_net: caffe_pb2.NetParameter Structure of net. binary_weights: caffe_pb2.NetParameter Weights of net. """ """ Create symbols """ in_ = symbol.Variable("data", shape=input_shape) __, scores_sym, __ = net(in_) """ Add symbols about box_predictors and cls_predictors """ # box_predictors box_pred_name = net.box_predictors[0].predictor.name box_transpose = _find_symbol_by_bottomname(scores_sym, f"{box_pred_name}_fwd") box_flatten = _find_symbol_by_bottomname(scores_sym, box_transpose.name) box_concat = _find_symbol_by_bottomname(scores_sym, box_flatten.name) # cls_prodictors cls_pred_name = net.class_predictors[0].predictor.name cls_transpose = _find_symbol_by_bottomname(scores_sym, f"{cls_pred_name}_fwd") cls_flatten = _find_symbol_by_bottomname(scores_sym, cls_transpose.name) cls_concat = _find_symbol_by_bottomname(scores_sym, cls_flatten.name) cls_reshape = _find_symbol_by_bottomname(scores_sym, cls_concat.name) cls_softmax = symbol.softmax(cls_reshape, axis=2) cls_flatten = symbol.flatten(cls_softmax) """ Collect attributes needed by Priorbox and DetectionOutput layers """ priorbox_attrs, detection_out_attrs = _extract_ssd_attrs(net) """ Create fake symbol for Priorbox layers """ priorboxes = [] for i, box_pred in enumerate(net.box_predictors): pred_sym = _find_symbol_by_name(scores_sym, f"{box_pred.predictor.name}_fwd") # (ugly) Get Convolution symbol of predictor for c in pred_sym.get_children(): if c.get_children() is not None: conv = c break # Create a new fake symbol for Priorbox priorbox = FakeSymbol(conv, name=f"{conv.name}_priorbox", _op="PriorBox", **priorbox_attrs[i]) priorboxes.append(priorbox) # Concat outputs of Priorbox symbol pbox_concat = symbol.concat(*priorboxes, dim=2) """ Create fake symbol for DetectionOutput layer """ detection_out = FakeSymbol(box_concat, cls_flatten, pbox_concat, _in_num=3, name="detection_out", _op="DetectionOutput", **detection_out_attrs) return convert_model(net, detection_out, input_shape=input_shape, to_bgr=to_bgr, merge_bn=merge_bn)