def quantize(self, op, **kwargs):
        precs, buffers = kwargs['precs'], kwargs['buffers']
        features = kwargs['features']
        X = op.get_children()[0]
        name, X_name = op.attr('name'), X.attr('name')
        attrs = op.list_attr()

        # `a_max`, `a_min` and precision should be align with CVM-Runtime
        iscale = buffers[X.attr('name')].get()
        buffers[name] = SBuffer(iscale)
        a_min = int(get_attr(attrs, "a_min") * iscale)
        a_max = int(get_attr(attrs, "a_max") * iscale)
        precs[name][OUT_KEY] = get_bit_exp(max(abs(a_min), a_max))
        return mx.sym.clip(X, a_min=a_min, a_max=a_max, name=name)
Beispiel #2
0
 def mergefunc(node, params, graph):
     name, op_name = node.attr('name'), node.attr('op_name')
     childs, attr = sutils.sym_iter(
         node.get_children()), node.list_attr()
     if op_name in attribute_deps:
         attr_deps = attribute_deps[op_name]
         for attr_name, v in attr_deps.items():
             val = sutils.get_attr(attr, attr_name, 0)
             attr[attr_name] = int(val * mrt_oscales[name_idx[v]])
         node = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name)
     return node
    def _separate_pad(op, **kwargs):
        name, op_name = op.attr('name'), op.attr('op_name')
        attr, childs = op.list_attr(), sutils.sym_iter(op.get_children())

        if op_name not in [Convolution.op_name]:
            return op

        if 'layout' in attr:
            assert attr['layout'] == 'NCHW'
        PH, PW = sutils.get_attr(attr, 'pad', (0, 0))
        if 'pad' in attr:
            del attr['pad']
        if PH == 0 and PW == 0:
            return sutils.get_mxnet_op(op_name)(*childs, **attr, name=name)

        childs[0] = mx.sym.pad(childs[0],
                               pad_width=(0, 0, 0, 0, PH, PH, PW, PW),
                               mode='constant',
                               constant_value=0,
                               name=N.n('pad'))
        op = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name)
        return op
    def quantize(self, op, **kwargs):
        params, graph = kwargs['params'], kwargs['graph']
        buffers, precs = kwargs['buffers'], kwargs['precs']
        features, cfg_dict = kwargs['features'], kwargs['cfg_dict']
        name, op_name = op.attr('name'), op.attr('op_name')
        childs, attr = sym_iter(op.get_children()), op.list_attr()
        cns = [c.attr('name') for c in childs] if childs else []

        oprec = kwargs['op_input_precs'][op_name]
        th = features[cns[0]].get()
        xs = scale_exp(th, oprec)
        quant_type = cfg_dict[cns[0]]['quant_type']
        assert quant_type == USQuantizer.name
        quant = get_quantizer(quant_type)
        X, xprec, xs = quant.quantize(childs[0],
                                      oprec,
                                      oscale=xs,
                                      oname=name,
                                      **kwargs)
        axis = get_attr(attr, 'axis', -1)
        lambd = kwargs['softmax_lambd']
        alpha = int(lambd * xs)
        var = nd_const(alpha, graph, params)
        max_axis = mx.sym.max(X, axis=axis, keepdims=True)
        offset = mx.sym.broadcast_sub(max_axis,
                                      var,
                                      name=N.n('softmax_offset'))
        offset = realize(offset, 0, xprec)
        norm = mx.sym.broadcast_sub(X, offset, name=N.n('softmax_normalize'))
        norm = mx.sym.relu(norm, name=N.n('Softmax_filter'))
        norm = realize(norm, 0, xprec)

        data = sutils.nd_arange(0, alpha + 1)
        table = nd.exp(data / xs)

        tprec = get_bit_exp(math.exp(lambd))
        table = nd.clip(table, a_min=0, a_max=get_range_exp(tprec))
        W_name = N.n('cvm_lut_weight')
        params[W_name] = weight = table.round().reshape(alpha + 1, 1)
        wattr = {'precision': str(tprec)}
        W = graph[W_name] = mx.sym.var(W_name, shape=weight.shape, attr=wattr)
        # lut = mx.sym.Custom(norm, W, in_dim=alpha+1,
        #                     name=name, op_type='cvm_lut')
        lut = mx.sym.Custom(norm,
                            W,
                            in_dim=alpha + 1,
                            name=N.n('softmax_lut'),
                            op_type='cvm_lut')
        sum_lut = mx.sym.sum(lut,
                             axis=axis,
                             keepdims=True,
                             name=N.n("softmax_sum"))

        oprec = min(15, 31 - tprec)
        assert oprec > 8, "operator softmax(%s) lambda(%d) is too large" \
                % (name, lambd)
        oscale = get_range_exp(oprec)
        var_scale = nd_const(oscale, graph, params)
        prob = mx.sym.broadcast_mul(lut,
                                    var_scale,
                                    name=N.n("softmax_output_scale"))
        half_lut = realize(sum_lut, 1, 31)
        prob = mx.sym.broadcast_add(prob, half_lut, name=N.n("softmax_round"))
        op = mx.sym.broadcast_div(prob, sum_lut, name=N.n("softmax_prob"))
        op = op.astype('int32').astype('float32')
        # op = mx.sym.floor(op) # simulate integer division
        # op = realize(op, 0, oprec)
        op = realize(op, 0, oprec, name=name)
        # oname = op.attr('name')
        precs[name][OUT_KEY] = oprec
        # precs[oname] = {OUT_KEY: oprec}
        # scales[oname] = scales[name] = oscale
        buffers[name] = SBuffer(oscale)

        logger = logging.getLogger('log.mrt.realize')
        logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s",
                     op_name, name, buffers[name].serialize(), cns)
        return op