def quantize(self, op, **kwargs):
        features, buffers = kwargs['features'], kwargs['buffers']
        cfg_dict = kwargs['cfg_dict']
        name, op_name = op.attr('name'), op.attr('op_name')
        attr, childs = op.list_attr(), sym_iter(op.get_children())
        cns = [c.attr('name') for c in childs]

        xquant_type = cfg_dict[cns[0]]['quant_type']
        wquant_type = cfg_dict[cns[1]]['quant_type']
        xquant, wquant = \
            get_quantizer(xquant_type), get_quantizer(wquant_type)
        oprec = kwargs['op_input_precs'][op_name]

        if xquant_type == wquant_type == USQuantizer.name:
            X, xs = childs[0], buffers[cns[0]]
            if xs != 1:
                X, _, _ = xquant.quantize(X,
                                          32,
                                          oscale=1,
                                          oname=name,
                                          **kwargs)
            W, _, ws = wquant.quantize(childs[1], oprec, oname=name, **kwargs)
            features[name] = features[cns[1]]
            buffers[name] = SBuffer(ws)
            kwargs['precs'][name][OUT_KEY] = get_bit_exp(features[name].get() *
                                                         ws)
            op = get_mxnet_op(op_name)(X, W, **attr, name=name)
        else:
            raise NotImplementedError(
                "Quantization type not implementated," + \
                " op: %20s, Xquant: %20s, Wquant: %20s",
                op_name, [xquant_type, wquant_type])

        return op
    def quantize(self, op, **kwargs):
        infer_shapes = kwargs['infer_shapes']
        buffers = kwargs['buffers']
        cfg_dict = kwargs['cfg_dict']
        name, op_name = op.attr('name'), op.attr('op_name')
        childs, attr = sym_iter(op.get_children()), op.list_attr()
        cns = [c.attr('name') for c in childs] if childs else []
        oshp = infer_shapes[name][get_entry_id(op)]

        quant_type = cfg_dict[cns[0]]['quant_type']
        assert quant_type == USQuantizer.name, (quant_type, name, op_name)
        quant = get_quantizer(quant_type)
        oprec = kwargs['op_input_precs'][op_name]
        X, xprec, xs = quant.quantize(childs[0], oprec, oname=name, **kwargs)
        buffers[name] = SBuffer(xs)
        op = get_mxnet_op(op_name)(X, **attr, name=name)

        ishp = infer_shapes[cns[0]][get_entry_id(childs[0])]
        k = int(nd.prod(nd_array(ishp)).asscalar() / \
            nd.prod(nd_array(oshp)).asscalar())
        kprec = get_bit_cnt_exp(k)
        infer_prec = kprec + xprec
        kwargs['precs'][name][OUT_KEY] = infer_prec

        logger = logging.getLogger('log.mrt.realize')
        logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s",
                     op_name, name, buffers[name].serialize(), cns)
        return op
Exemple #3
0
 def mergefunc(node, params, graph):
     name, op_name = node.attr('name'), node.attr('op_name')
     childs, attr = sutils.sym_iter(
         node.get_children()), node.list_attr()
     if op_name in attribute_deps:
         attr_deps = attribute_deps[op_name]
         for attr_name, v in attr_deps.items():
             val = sutils.get_attr(attr, attr_name, 0)
             attr[attr_name] = int(val * mrt_oscales[name_idx[v]])
         node = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name)
     return node
def _quantize_scale(op, **kwargs):
    features, precs = kwargs['features'], kwargs['precs']
    buffers, cfg_dict = kwargs['buffers'], kwargs['cfg_dict']
    name, op_name = op.attr('name'), op.attr('op_name')
    attr, childs = op.list_attr(), sym_iter(op.get_children())
    cns = [c.attr('name') for c in childs] if childs else []

    assert all([features[cn].name == FT_TYPE_EXP for cn in cns])
    absmax = max([features[cn].get() for cn in cns])
    oprec = kwargs['op_input_precs'][op_name]
    oscale = scale_exp(absmax, oprec)
    buffers[name] = SBuffer(oscale)
    nodes, cprecs = [], []

    assert all([cfg_dict[cn]['quant_type'] == \
        USQuantizer.name for cn in cns])
    quant = get_quantizer(USQuantizer.name)

    for c in childs:
        c, cprec, _ = quant.quantize(c,
                                     oprec,
                                     oscale=oscale,
                                     oname=name,
                                     **kwargs)
        cprecs.append(cprec)
        nodes.append(c)

    if op_name in [
            Concat.op_name, BroadcastAdd.op_name, ElemwiseAdd.op_name,
            ElemwiseSub.op_name, SliceLike.op_name
    ]:
        op = get_mxnet_op(op_name)(*nodes, **attr, name=name)
        infer_prec = max(cprecs) if op_name == Concat.op_name \
            else max(cprecs)+1
    elif op_name == AddN.op_name:
        while len(nodes) > 1:
            tname = N.n('elemwise_add') if len(nodes) > 2 else name
            a, b = nodes.pop(0), nodes.pop(0)
            tmp = mx.sym.elemwise_add(a, b, name=tname)
            nodes.append(tmp)
        kprec = get_bit_cnt_exp(len(nodes))
        infer_prec = max(cprecs) + kprec
        op = nodes[0]
    else:
        raise NotImplementedError(
            "symbol merge function of op_name: %s has not been " + \
            "implemented, name: %s", op_name, name)
    precs[name][OUT_KEY] = infer_prec

    logger = logging.getLogger('log.mrt.realize')
    logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s", op_name,
                 name, buffers[name].serialize(), cns)
    return op
    def _separate_pad(op, **kwargs):
        name, op_name = op.attr('name'), op.attr('op_name')
        attr, childs = op.list_attr(), sutils.sym_iter(op.get_children())

        if op_name not in [Convolution.op_name]:
            return op

        if 'layout' in attr:
            assert attr['layout'] == 'NCHW'
        PH, PW = sutils.get_attr(attr, 'pad', (0, 0))
        if 'pad' in attr:
            del attr['pad']
        if PH == 0 and PW == 0:
            return sutils.get_mxnet_op(op_name)(*childs, **attr, name=name)

        childs[0] = mx.sym.pad(childs[0],
                               pad_width=(0, 0, 0, 0, PH, PH, PW, PW),
                               mode='constant',
                               constant_value=0,
                               name=N.n('pad'))
        op = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name)
        return op
    def quantize(self, op, **kwargs):
        precs, buffers = kwargs['precs'], kwargs['buffers']
        name, op_name = op.attr('name'), op.attr('op_name')
        cfg_dict = kwargs['cfg_dict']
        childs, attr = sym_iter(op.get_children()), op.list_attr()
        cns = [c.attr('name') for c in childs] if childs else []

        oprec = kwargs['op_input_precs'][op_name]
        xquant_type, bquant_type = \
            cfg_dict[cns[0]]['quant_type'], cfg_dict[cns[1]]['quant_type']
        xquant, bquant = \
            get_quantizer(xquant_type), get_quantizer(bquant_type)
        if xquant_type == bquant_type == USQuantizer.name:
            X, xprec, xs = xquant.quantize(childs[0],
                                           oprec,
                                           oname=name,
                                           **kwargs)
            B, bprec, bs = bquant.quantize(childs[1],
                                           oprec,
                                           oname=name,
                                           **kwargs)

            op = get_mxnet_op(op_name)(X, B, **attr, name=name)

            if bprec == 1 and bs == 1:
                # special case: childs[1] is 0
                buffers[name] = SBuffer(1)
                precs[name][OUT_KEY] = 1
            else:
                buffers[name] = SBuffer(xs * bs)
                infer_prec = xprec + bprec
                precs[name][OUT_KEY] = infer_prec
        else:
            raise NotImplementedError(
                "Quantization type not implementated," + \
                " op: %20s, Xquant: %20s, Wquant: %20s",
                op_name, [xquant_type, bquant_type])

        logger = logging.getLogger('log.mrt.realize')
        logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s",
                     op_name, name, buffers[name].serialize(), cns)
        return op
    def restore(op, **kwargs):
        features, precs, buffers = \
            kwargs['features'], kwargs['precs'], kwargs['buffers']
        name, op_name = op.attr('name'), op.attr('op_name')
        childs, attr = sutils.sym_iter(op.get_children()), op.list_attr()

        childs = [] if childs is None else childs

        new_childs = []
        for c in childs:
            cname = c.attr('name')
            sc = buffers[c.attr('name')].get() \
                if cname in buffers else 1
            new_childs.append(c if sc == 1 else c / sc)

        out = sutils.get_mxnet_op(op_name)(*new_childs, **attr, name=name)
        ft = features[name]
        assert ft.name == FT_TYPE_EXP
        absmax = features[name].get()
        precs[name][OUT_KEY] = get_bit_exp(absmax)
        buffers[name] = get_buffer_exp(1)
        return out
    def _separate_bias(op, **kwargs):
        name, op_name = op.attr('name'), op.attr('op_name')
        attr, childs = op.list_attr(), sutils.sym_iter(op.get_children())

        if childs and len(childs) < 3 or op_name not in \
            [Convolution.op_name, FullyConnected.op_name]:
            return op

        attr['no_bias'] = True
        op = sutils.get_mxnet_op(op_name)(childs[0],
                                          childs[1],
                                          **attr,
                                          name=N.n(name))
        bn = childs[2].attr('name')
        if op_name == Convolution.op_name:
            if 'layout' in attr:
                assert attr['layout'] == 'NCHW'
            B = mx.sym.expand_dims(childs[2], axis=0, name=N.n('expand_dims'))
            B = mx.sym.expand_dims(B, axis=-1, name=N.n('expand_dims'))
            B = mx.sym.expand_dims(B, axis=-1, name=N.n(bn))
        else:
            B = mx.sym.expand_dims(childs[2], axis=0, name=N.n(bn))
        op = mx.sym.broadcast_add(op, B, name=name)
        return op
def _quantize_xw(op, **kwargs):
    features, buffers = kwargs['features'], kwargs['buffers']
    precs = kwargs['precs']
    cfg_dict, params = kwargs['cfg_dict'], kwargs['params']
    name, op_name = op.attr('name'), op.attr('op_name')
    childs, attr = sym_iter(op.get_children()), op.list_attr()
    cns = [c.attr('name') for c in childs] if childs else []

    X, W = childs
    Xquant = get_quantizer(cfg_dict[cns[0]]['quant_type'])
    Wquant = get_quantizer(cfg_dict[cns[1]]['quant_type'])

    oprec = kwargs['op_input_precs'][op_name]
    Xq, xprec, xscale = Xquant.quantize(X, oprec, oname=name, **kwargs)
    Wq, wprec, wscale = Wquant.quantize(W, oprec, oname=name, **kwargs)
    buffers[name] = get_buffer_exp(xscale * wscale)
    op = get_mxnet_op(op_name)(Xq, Wq, **attr, name=name)

    shp = params[cns[1]].shape
    k = int(nd.prod(nd_array(shp[1:])).asscalar())
    kprec = get_bit_cnt_exp(k)
    infer_prec = kprec + xprec + wprec
    precs[name][OUT_KEY] = infer_prec
    return op
    def quantize(self, op, **kwargs):
        features, buffers = kwargs['features'], kwargs['buffers']
        precs, graph = kwargs['precs'], kwargs['graph']
        cfg_dict, params = kwargs['cfg_dict'], kwargs['params']
        name, op_name = op.attr('name'), op.attr('op_name')
        childs, attr = sym_iter(op.get_children()), op.list_attr()
        cns = [c.attr('name') for c in childs] if childs else []

        # assert len(childs) == 2 and 'pad' not in attr
        assert len(childs) == 2
        xquant_type = cfg_dict[cns[0]]['quant_type']
        wquant_type = cfg_dict[cns[1]]['quant_type']
        X, W = childs
        xquant, wquant = \
            get_quantizer(xquant_type), get_quantizer(wquant_type)
        oprec = kwargs['op_input_precs'][op_name]

        if xquant_type == wquant_type == USQuantizer.name:
            op = _quantize_xw(op, **kwargs)
        elif xquant_type == USQuantizer.name and \
            wquant_type == UAQuantizer.name:
            Xq, xprec, xscale = xquant.quantize(X, oprec, oname=name, **kwargs)
            Wq, wprec, wscale, wzpoint = wquant.quantize(W,
                                                         oprec,
                                                         oname=name,
                                                         **kwargs)
            buffers[name] = get_buffer_exp(xscale * wscale)

            Ye1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution'))
            wshp = params[cns[1]].shape
            pd = int(np.product(wshp[1:]))
            infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec

            W1 = nd_full_const(1, wshp, graph, params)
            Ye2 = mx.sym.Convolution(Xq, W1, **attr, name=N.n('Convolution'))
            wzint = round(wzpoint * wscale)
            Wz = nd_const(wzint, graph, params)
            Ye2 = mx.sym.broadcast_mul(Wz, Ye2, name=N.n('broadcast_mul'))
            infer_prec2 = get_bit_cnt_exp(pd) + xprec + get_bit_exp(wzint)

            op = mx.sym.elemwise_add(Ye1, Ye2, name=name)
            precs[name][OUT_KEY] = max(infer_prec1, infer_prec2) + 1
            buffers[name] = get_buffer_exp(xscale * wscale)
        elif xquant_type == UAQuantizer.name and \
            wquant_type == USQuantizer.name:
            Xq, xprec, xscale, Xzp = xquant.quantize(X,
                                                     oprec,
                                                     oname=name,
                                                     **kwargs)
            Wq, wprec, wscale = wquant.quantize(W, oprec, oname=name, **kwargs)
            buffers[name] = get_buffer_exp(xscale * wscale)

            Y1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution'))
            wshp = params[cns[1]].shape
            pd = np.product(wshp[1:])
            infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec + 1

            xshp = params[cns[0]].shape
            X1 = nd_full(1, xshp, graph, params)
            Y2 = mx.sym.Convolution(X1, Wq, **attr, name=N.n('Convolution'))
            xzp = params[Xzp.attr('name')].asscalar()
            infer_prec2 = get_bit_cnt_exp(abs(xzp) * pd) + wprec

            op = mx.sym.elemwise_add(Y1, Y2, name=N.n('elemwise_add'))
            infer_prec = max(infer_prec1, infer_prec2) + 1
            precs[name][OUT_KEY] = infer_prec
        elif xquant_type == wquant_type == UAQuantizer.name:
            Xq, xprec, xscale, Xzp = xquant.quantize(X,
                                                     oprec,
                                                     oname=name,
                                                     **kwargs)
            Wq, wprec, wscale, Wzp = wquant.quantize(W,
                                                     oprec,
                                                     oname=name,
                                                     **kwargs)
            buffers[name] = get_buffer_exp(xscale * wscale)

            nodes, infer_precs = [], []

            Y1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution'))
            nodes.append(Y1)
            wshp = params[cns[1]].shape
            pd = np.product(wshp[1:])
            infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec + 2
            infer_precs.append(infer_prec1)

            W1 = nd_full_const(1, wshp, graph, params)
            Y2 = mx.sym.Convolution(Xq, W1, **attr, name=N.n('Convolution'))
            Y2 = mx.sym.broadcast_mul(Wzp, Y2, name=N.n('broadcast_mul'))
            nodes.append(Y2)
            wzp = params[Wzp.attr('name')].asscalar()
            infer_prec2 = get_bit_cnt_exp(abs(wzp) * pd) + xprec + 1
            infer_precs.append(infer_prec2)

            xshp = params[cns[0]].shape
            X1 = nd_full_const(1, xshp, graph, params)
            Y3 = mx.sym.Convolution(X1, Wq, graph, params)
            Y3 = mx.sym.broadcast_mul(Xzp, Y3, name=N.n('broadcast_mul'))
            nodes.append(Y3)
            xzp = params[Xzp.attr('name')].asscalar()
            infer_prec3 = get_bit_cnt_exp(abs(xzp) * pd) + wprec + 1
            infer_precs.append(infer_prec3)

            val = pd * abs(xzp) * abs(wzp)
            Y4 = nd_const(val, graph, params)
            nodes.append(Y4)
            infer_prec4 = get_bit_cnt_exp(val)
            infer_precs.append(infer_prec4)

            while len(nodes) > 1:
                a, b = nodes.pop(), nodes.pop()
                node = mx.sym.broadcast_add(a, b, name=N.n('broadcast_add'))
                nodes.append(node)
            op = nodes[0]
            infer_prec = max(infer_precs) + 2
            precs[name][OUT_KEY] = infer_prec
        elif xquant_type == GroupConvQuant.name and \
            wquant_type == GroupConvQuant.name:
            num_groups_x = cfg_dict[cns[0]]['gn_info']['num_groups']
            num_groups_w = cfg_dict[cns[1]]['gn_info']['num_groups']
            assert num_groups_x == num_groups_w, \
                "num_groups of x and weight should be equal, " + \
                "num_groups of x: {}, num_groups of weight: {}".format(
                    num_groups_x, num_groups_w)
            Xq, xprec_list, xscale_list = xquant.quantize(
                X, oprec, oname=name, num_groups=num_groups_x, **kwargs)
            Wq, wprec_list, wscale_list = wquant.quantize(
                W, oprec, oname=name, num_groups=num_groups_w, **kwargs)
            op = get_mxnet_op(op_name)(Xq, Wq, **attr, name=name)
            IPG = kwargs['infer_shapes'][cns[1]][get_entry_id(X)][1]
            kprec = get_bit_cnt_exp(IPG)
            infer_prec_list = [
                kprec + wprec_list[i] + xprec_list[i] \
                for i in range(len(wprec_list))
            ]
            oscale_list = [
                xscale_list[i] * wscale_list[i] \
                for i in range(len(wscale_list))
            ]
            assert False, "implementing..."
        else:
            raise NotImplementedError(
                "Quantization type not implementated," + \
                " op: {}, Xquant: {}, Wquant: {}".format(
                op_name, xquant_type, wquant_type))

        logger = logging.getLogger('log.mrt.realize')
        logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s",
                     op_name, name, buffers[name].serialize(), cns)
        return op
    def slice_channel(self, op, **kwargs):
        name, op_name = op.attr('name'), op.attr('op_name')
        attr, childs = op.list_attr(), sym_iter(op.get_children())
        cns = [c.attr('name') for c in childs]
        cfg_dict = kwargs['cfg_dict']
        infer_shapes = kwargs['infer_shapes']

        gn_info = cfg_dict[name]['gn_info']
        ichannel, step = gn_info['ichannel'], gn_info['step']
        assert ichannel == 1

        assert len(childs) == 2
        X, W = childs
        xshp = infer_shapes[cns[0]][get_entry_id(childs[0])]
        wshp = infer_shapes[cns[1]][get_entry_id(childs[1])]
        oshp = infer_shapes[name][get_entry_id(op)]
        assert len(xshp) == len(wshp) == 4 and xshp[1] % step == 0

        xi_cfg_info, wi_cfg_info = cfg_dict[cns[0]], cfg_dict[cns[1]]
        xi_cfg_info['gn_info'] = {'gn_type': LAYER_WISE_TYPE}
        wi_cfg_info['gn_info'] = {'gn_type': LAYER_WISE_TYPE}
        yi_cfg_info = {
            'gn_info': {
                'gn_type': LAYER_WISE_TYPE
            },
            'quant_type': US_QUANT_TYPE,
            'opt_info': cfg_dict[name]['opt_info'],
        }
        num_group = eval(attr['num_group'])
        C, IC, OC = xshp[1], wshp[1], wshp[0]
        assert num_group * IC == C and OC >= num_group and OC % num_group == 0
        if num_group == 1:
            xs = sym_slice(X, ichannel, step, **kwargs)
            ws = sym_slice(W, ichannel, step, **kwargs)
            nodes = []
            j = 0
            for i in range(0, C, step):
                suffix = '_' + str(i) + '-' + str(i + step)
                xni = xs[j].attr('name')
                cfg_dict[xni] = xi_cfg_info
                wni = ws[j].attr('name')
                cfg_dict[wni] = wi_cfg_info
                yni = N.n(name + suffix)
                Yi = get_mxnet_op(op_name)(xs[j], ws[j], **attr, name=yni)
                cfg_dict[yni] = yi_cfg_info
                nodes.append(Yi)
                j += 1
            assert len(nodes) > 1
            op = mx.sym.add_n(*nodes, name=name)

            # # transpose and reshape weight
            # Wt = mx.sym.transpose(W, axes=(1,0,2,3), name=N.n('transpose'))
            # rshp = (OC*IC,1,) + wshp[2:]
            # wrn = N.n('reshape')
            # cfg_dict[wrn] = wi_cfg_info
            # Wr = mx.sym.reshape(Wt, shape=rshp, name=wrn)
            # # groupwise convolution
            # nattr = attr.copy()
            # nattr['num_group'] = IC
            # nattr['num_filter'] = IC * OC
            # conv_name = N.n('groupwise_convolution')
            # cfg_dict[conv_name] = yi_cfg_info
            # print(nattr, name)
            # op = mx.sym.Convolution(X, Wr, **nattr, name=conv_name)
            # # reshape output
            # rname = N.n('reshape')
            # cfg_dict[rname] = yi_cfg_info
            # rshp = (-1, IC, OC,) + oshp[2:]
            # op = mx.sym.reshape(op, shape=rshp, name=rname)
            # # sum
            # sum_name = N.n('sum')
            # cfg_dict[sum_name] = yi_cfg_info
            # op = mx.sym.sum(op, axis=1, keepdims=False, name=sum_name)
        else:
            assert step == 1
            xs = sym_slice(X, ichannel, step, **kwargs)
            ws = kernel_slice_2d(W, **kwargs)
            OPG = OC // num_group
            nattr = attr.copy()
            nattr['num_group'] = '1'
            nattr['num_filter'] = '1'
            nodes = []
            for o in range(OC):
                nnodes = []
                j = int(o / OPG) * IC
                for i in range(IC):
                    suffix = '_' + str(o) + '-' + str(i)
                    k = i + j
                    xk, woi = xs[k], ws[o][i]
                    xnk, wnoi = xk.attr('name'), woi.attr('name')
                    cfg_dict[xnk] = xi_cfg_info
                    cfg_dict[wnoi] = wi_cfg_info
                    ynoi = N.n(name + suffix)
                    yoi = mx.sym.Convolution(xk, woi, **nattr, name=ynoi)
                    cfg_dict[ynoi] = yi_cfg_info
                    nnodes.append(yoi)
                if len(nnodes) > 1:
                    zni = N.n(name + '_add_n_' + str(o))
                    zi = mx.sym.add_n(*nnodes, name=zni)
                    cfg_dict[zni] = yi_cfg_info
                else:
                    zi = nnodes[0]
                nodes.append(zi)
            assert len(nodes) > 1
            op = mx.sym.concat(*nodes, dim=1, name=name)
        return op
def _quantize_scale_zp(op, **kwargs):
    features, precs = kwargs['features'], kwargs['precs']
    buffers, cfg_dict = kwargs['buffers'], kwargs['cfg_dict']
    graph, params = kwargs['graph'], kwargs['params']
    name, op_name = op.attr('name'), op.attr('op_name')
    attr, childs = op.list_attr(), sym_iter(op.get_children())
    cns = [c.attr('name') for c in childs] if childs else []

    oprec = kwargs['op_input_precs'][op_name]
    oscales = []
    for c in childs:
        cquant_type = cfg_dict[c.attr('name')]['quant_type']
        cquant = get_quantizer(cquant_type)
        ft = features[c.attr('name')]
        oscale = cquant.get_scale(oprec, ft)
        oscales.append(oscale)
    oscale = min(oscales)
    buffers[name] = SBuffer(oscale)
    nodes, cprecs = [], []

    for c in childs:
        cquant_type = cfg_dict[c.attr('name')]['quant_type']
        cquant = get_quantizer(cquant_type)
        if cquant.name == USQuantizer.name:
            c, cprec, _ = cquant.quantize(c,
                                          oprec,
                                          oscale=oscale,
                                          oname=name,
                                          **kwargs)
        elif cquant.name == UAQuantizer.name:
            c, cprec, cscale, czpoint = cquant.quantize(c,
                                                        oprec,
                                                        oscale=oscale,
                                                        oname=name,
                                                        **kwargs)
            czint = round(czpoint * cscale)
            Cz = nd_const(czint, graph, params)
            nodes.append(Cz)
            cprecs.append(get_bit_exp(czint))
        cprecs.append(cprec)
        nodes.append(c)

    if op_name in [Concat.op_name]:
        op = get_mxnet_op(op_name)(*nodes, **attr, name=name)
        infer_prec = max(cprecs)
    elif op_name in [BroadcastAdd.op_name]:
        while len(nodes) > 1:
            tname = N.n('broadcast_add') if len(nodes) > 2 else name
            a, b = nodes.pop(0), nodes.pop(0)
            tmp = mx.sym.broadcast_add(a, b, name=tname)
            nodes.append(tmp)
        kprec = get_bit_cnt_exp(len(nodes))
        infer_prec = max(cprecs) + kprec
        op = nodes[0]
    elif op_name in [AddN.op_name]:
        while len(nodes) > 1:
            tname = N.n('elemwise_add') if len(nodes) > 2 else name
            a, b = nodes.pop(0), nodes.pop(0)
            tmp = mx.sym.elemwise_add(a, b, name=tname)
            nodes.append(tmp)
        kprec = get_bit_cnt_exp(len(nodes))
        infer_prec = max(cprecs) + kprec
        op = nodes[0]
    else:
        raise NotADirectoryError(
            "symbol merge function of op_name: %s has not been " + \
            "implemented, name: %s", op_name, name)
    precs[name][OUT_KEY] = infer_prec

    logger = logging.getLogger('log.mrt.realize')
    logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s", op_name,
                 name, buffers[name].serialize(), cns)
    return op