Exemple #1
0
def sampled_softmax(num_classes, num_samples, in_dim, inputs, weight, bias,
                    sampled_values, remove_accidental_hits=True):
        """ Sampled softmax via importance sampling.
            This under-estimates the full softmax and is only used for training.
        """
        # inputs = (n, in_dim)
        sample, prob_sample, prob_target = sampled_values

        # (num_samples, )
        sample = S.var('sample', shape=(num_samples,), dtype='float32')
        # (n, )
        label = S.var('label')
        label = S.reshape(label, shape=(-1,), name="label_reshape")
        # (num_samples+n, )
        sample_label = S.concat(sample, label, dim=0)
        # lookup weights and biases
        # (num_samples+n, dim)
        sample_target_w = S.sparse.Embedding(data=sample_label, weight=weight,
                                             input_dim=num_classes, output_dim=in_dim,
                                             sparse_grad=True)
        # (num_samples+n, 1)
        sample_target_b = S.sparse.Embedding(data=sample_label, weight=bias,
                                             input_dim=num_classes, output_dim=1,
                                             sparse_grad=True)
        # (num_samples, dim)
        sample_w = S.slice(sample_target_w, begin=(0, 0), end=(num_samples, None))
        target_w = S.slice(sample_target_w, begin=(num_samples, 0), end=(None, None))
        sample_b = S.slice(sample_target_b, begin=(0, 0), end=(num_samples, None))
        target_b = S.slice(sample_target_b, begin=(num_samples, 0), end=(None, None))

        # target
        # (n, 1)
        true_pred = S.sum(target_w * inputs, axis=1, keepdims=True) + target_b
        # samples
        # (n, num_samples)
        sample_b = S.reshape(sample_b, (-1,))
        sample_pred = S.FullyConnected(inputs, weight=sample_w, bias=sample_b,
                                       num_hidden=num_samples)

        # remove accidental hits
        if remove_accidental_hits:
            label_v = S.reshape(label, (-1, 1))
            sample_v = S.reshape(sample, (1, -1))
            neg = S.broadcast_equal(label_v, sample_v) * -1e37
            sample_pred = sample_pred + neg

        prob_sample = S.reshape(prob_sample, shape=(1, num_samples))
        p_target = true_pred - S.log(prob_target)
        p_sample = S.broadcast_sub(sample_pred, S.log(prob_sample))

        # return logits and new_labels
        # (n, 1+num_samples)
        logits = S.concat(p_target, p_sample, dim=1)
        new_targets = S.zeros_like(label)
        return logits, new_targets
def atrous_spatial_pyramid_pooling(feat, rate, aspp_with_separable_conv, oc_context=False):
    conv_1x1 = Conv(feat, num_filter=256, kernel=(1, 1), name="aspp_1x1")
    conv_1x1 = BN(conv_1x1, use_global_stats=use_global_stats, fix_gamma=fix_gamma,
                  momentum=bn_mom, name="aspp_1x1_bn", eps=eps, **args)
    conv_1x1 = Relu(conv_1x1, act_type='relu', name='aspp_1x1_relu')

    if aspp_with_separable_conv:
        conv_3x3_d6 = Sepconv(data=feat, in_channel=2048, num_filter=256, stride=1,
                              dilate=6 * rate, name="aspp_3x3_d6")
        conv_3x3_d6 = BN(conv_3x3_d6, use_global_stats=use_global_stats, fix_gamma=fix_gamma,
                         momentum=bn_mom, name="aspp_3x3_d6_bn", eps=eps, **args)
        conv_3x3_d6 = Relu(conv_3x3_d6, act_type='relu', name='aspp_3x3_d6_relu')
        conv_3x3_d12 = Sepconv(data=feat, in_channel=2048, num_filter=256, stride=1,
                               dilate=12 * rate, name="aspp_3x3_d12")
        conv_3x3_d12 = BN(conv_3x3_d12, use_global_stats=use_global_stats, fix_gamma=fix_gamma,
                          momentum=bn_mom, name="aspp_3x3_d12_bn", eps=eps, **args)
        conv_3x3_d12 = Relu(conv_3x3_d12, act_type='relu', name='aspp_3x3_d12_relu')
        conv_3x3_d18 = Sepconv(data=feat, in_channel=2048, num_filter=256, stride=1,
                               dilate=18 * rate, name="aspp_3x3_d18")
        conv_3x3_d18 = BN(conv_3x3_d18, use_global_stats=use_global_stats, fix_gamma=fix_gamma,
                          momentum=bn_mom, name="aspp_3x3_d18_bn", eps=eps, **args)
        conv_3x3_d18 = Relu(conv_3x3_d18, act_type='relu', name='aspp_3x3_d18_relu')
    else:
        conv_3x3_d6 = Conv(feat, num_filter=256, kernel=(3, 3), dilate=(6 * rate, 6 * rate),
                           pad=(6 * rate, 6 * rate), name="aspp_3x3_d6")
        conv_3x3_d6 = BN(conv_3x3_d6, use_global_stats=use_global_stats, fix_gamma=fix_gamma,
                         momentum=bn_mom, name="aspp_3x3_d6_bn", eps=eps)
        conv_3x3_d6 = Relu(conv_3x3_d6, act_type='relu', name='aspp_3x3_d6_relu')
        conv_3x3_d12 = Conv(feat, num_filter=256, kernel=(3, 3), dilate=(12 * rate, 12 * rate),
                            pad=(12 * rate, 12 * rate), name="aspp_3x3_d12")
        conv_3x3_d12 = BN(conv_3x3_d12, use_global_stats=use_global_stats, fix_gamma=fix_gamma,
                          momentum=bn_mom, name="aspp_3x3_d12_bn", eps=eps)
        conv_3x3_d12 = Relu(conv_3x3_d12, act_type='relu', name='aspp_3x3_d12_relu')
        conv_3x3_d18 = Conv(feat, num_filter=256, kernel=(3, 3), dilate=(18 * rate, 18 * rate),
                            pad=(18 * rate, 18 * rate), name="aspp_3x3_d18")
        conv_3x3_d18 = BN(conv_3x3_d18, use_global_stats=use_global_stats, fix_gamma=fix_gamma,
                          momentum=bn_mom, name="aspp_3x3_d18_bn", eps=eps)
        conv_3x3_d18 = Relu(conv_3x3_d18, act_type='relu', name='aspp_3x3_d18_relu')

    if oc_context:
        gap = oc_context_block(feat, 128, 256, 256, resample_rate=2)
    else:
        gap = Pool(feat, kernel=(1, 1), global_pool=True, pool_type="avg", name="aspp_gap")
    gap = Conv(gap, num_filter=256, kernel=(1, 1), name="aspp_gap_1x1")
    gap = BN(gap, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom,
             name="aspp_gap_1x1_bn", eps=eps, **args)
    if not oc_context:
        gap = Relu(gap, act_type='relu', name='aspp_gap_1x1_relu')
        gap = broadcast_like(gap, conv_1x1, name="aspp_gap_broadcast")
    aspp = concat(conv_1x1, conv_3x3_d6, conv_3x3_d12, conv_3x3_d18, gap, dim=1, name="aspp_concat")
    aspp_1x1 = Conv(aspp, num_filter=256, kernel=(1, 1), name="aspp_concat_1x1")
    aspp_1x1 = BN(aspp_1x1, use_global_stats=use_global_stats, fix_gamma=fix_gamma, momentum=bn_mom,
                  name="aspp_concat_1x1_bn", eps=eps, **args)
    aspp_1x1._set_attr(mirror_stage='True')
    aspp_1x1 = Relu(aspp_1x1, act_type='relu', name='aspp_concat_1x1_relu')
    return aspp_1x1
Exemple #3
0
    def add_loss(self, splits: sym.Variable):
        """Add loss functions.

        Below, we splice the network output accordingly to compute losses for
        the following:

            1. Bounding box attributes
            2. Class probabilities
            3. IOUS as "confidence scores"

        Below, the ugly splice functions are replacements for reshaping.
        Instead, split along a dimension into multiple chunks, and then
        restack the arrays in a consistent way.

        Due to a quirk in MXNet, we create a placeholder label_score. However,
        we actually use pred_box and label_box to compute IOU (true labels),
        which are then compared with pred_score.
        """
        num_splits = int(NUM_OUT_CHANNELS / ANCHORS_PER_GRID)
        splits = list(sym.split(splits, num_outputs=num_splits))

        # Compute loss for bounding box
        pred_box = sym.concat(*splits[:NUM_BBOX_ATTRS])
        loss_box = mx.sym.Custom(
            data=pred_box,
            label=self.label_box,
            op_type='LinearRegressionOutputWithMask')

        # Compute loss for class probabilities
        cidx = NUM_BBOX_ATTRS + NUM_CLASSES
        pred_class = reformat(sym.concat(*splits[NUM_BBOX_ATTRS:cidx]), pkg=sym)
        label_class = reformat(self.label_class, pkg=sym)
        loss_class = sym.SoftmaxOutput(data=pred_class, label=label_class)

        # Compute loss for confidence scores - see doc above for explanation
        pred_score = splits[cidx]
        loss_iou = mx.symbol.Custom(
            data=pred_score,
            label=sym.concat(self.label_score, pred_box, self.label_box),
            op_type='IOURegressionOutputWithMask')

        return mx.sym.Group([loss_box, loss_class, loss_iou])
    def __call__(self, inputs, states):
        # inputs: (batch_size, decoder_num_hidden)
        # for dot attention decoder_num_hidden must equal encoder_num_hidden
        if len(states) > 1:
            states = [symbol.concat(*states, dim=1)]

        # source: (batch_size, seq_len, encoder_num_hidden)
        source = states[0]
        # (batch_size, decoder_num_hidden, 1)
        inputs = symbol.expand_dims(inputs, axis=2)
        # (batch_size, seq_len, 1)
        scores = symbol.batch_dot(source, inputs)
        # (batch_size, encoder_num_hidden)
        return _attention_pooling(source, scores), states
def convert_ssd_model(net,
                      input_shape=(1, 3, 512, 512),
                      to_bgr=False,
                      merge_bn=True):
    """
    Convert SSD-like model to Caffe.
    :param net: mxnet.gluon.nn.HybridBlock
        Gluon net to convert.
    :param input_shape: tuple
        Shape of inputs.
    :param to_bgr: bool
        Convert input_type from RGB to BGR.
    :param merge_bn: bool
        Merge BatchNorm and Scale layers to Convolution layers.
    :return: (text_net, binary_weights)
        text_net: caffe_pb2.NetParameter
            Structure of net.
        binary_weights: caffe_pb2.NetParameter
            Weights of net.
    """
    """ Create symbols """
    in_ = symbol.Variable("data", shape=input_shape)
    __, scores_sym, __ = net(in_)
    """ Add symbols about box_predictors and cls_predictors """
    # box_predictors
    box_pred_name = net.box_predictors[0].predictor.name
    box_transpose = _find_symbol_by_bottomname(scores_sym,
                                               f"{box_pred_name}_fwd")
    box_flatten = _find_symbol_by_bottomname(scores_sym, box_transpose.name)
    box_concat = _find_symbol_by_bottomname(scores_sym, box_flatten.name)
    # cls_prodictors
    cls_pred_name = net.class_predictors[0].predictor.name
    cls_transpose = _find_symbol_by_bottomname(scores_sym,
                                               f"{cls_pred_name}_fwd")
    cls_flatten = _find_symbol_by_bottomname(scores_sym, cls_transpose.name)
    cls_concat = _find_symbol_by_bottomname(scores_sym, cls_flatten.name)
    cls_reshape = _find_symbol_by_bottomname(scores_sym, cls_concat.name)
    cls_softmax = symbol.softmax(cls_reshape, axis=2)
    cls_flatten = symbol.flatten(cls_softmax)
    """ Collect attributes needed by Priorbox and DetectionOutput layers """
    priorbox_attrs, detection_out_attrs = _extract_ssd_attrs(net)
    """ Create fake symbol for Priorbox layers """
    priorboxes = []
    for i, box_pred in enumerate(net.box_predictors):
        pred_sym = _find_symbol_by_name(scores_sym,
                                        f"{box_pred.predictor.name}_fwd")
        # (ugly) Get Convolution symbol of predictor
        for c in pred_sym.get_children():
            if c.get_children() is not None:
                conv = c
                break
        # Create a new fake symbol for Priorbox
        priorbox = FakeSymbol(conv,
                              name=f"{conv.name}_priorbox",
                              _op="PriorBox",
                              **priorbox_attrs[i])
        priorboxes.append(priorbox)
    # Concat outputs of Priorbox symbol
    pbox_concat = symbol.concat(*priorboxes, dim=2)
    """ Create fake symbol for DetectionOutput layer """
    detection_out = FakeSymbol(box_concat,
                               cls_flatten,
                               pbox_concat,
                               _in_num=3,
                               name="detection_out",
                               _op="DetectionOutput",
                               **detection_out_attrs)

    return convert_model(net,
                         detection_out,
                         input_shape=input_shape,
                         to_bgr=to_bgr,
                         merge_bn=merge_bn)