def _quantize_operator(self, X, oprec, oscale=None, **kwargs):
        """ Symmetric Quantization of symbol expansion (int value)
        """
        logger = kwargs.get("logger", logging.getLogger("log.mrt.realize"))
        params, features = kwargs["params"], kwargs["features"]
        precs, buffers = kwargs["precs"], kwargs["buffers"]
        graph, shift_bits = kwargs["graph"], kwargs["shift_bits"]
        xn, xopn = X.attr("name"), X.attr("op_name")
        xqn = N.n(xn)

        oprec = precs[xn].get(kwargs['oname'], oprec)
        iscale, iprec = buffers[xn].get(), precs[xn][OUT_KEY]
        ft = features[xn]
        absmax = ft.get()
        if absmax == 0:
            return X, 1, 1 if oscale is None else oscale
        exactly = oscale is not None
        oscale = self.get_scale(oprec, ft) if oscale is None else oscale

        sb = iprec - oprec
        if sb > shift_bits:
            iprec -= sb
            X = tutils.realize(X, sb, iprec)
            iscale = iscale / (2**sb)

        if exactly or iprec > oprec:
            rescale = oscale / iscale
            bits = MAX_BIT - iprec
            frac, exp = sim.cvm_float(rescale, bits)
            sim_scale = frac * (2**exp)
            scale_err = abs((sim_scale - rescale) / rescale)
            if scale_err > 0.001:
                logger.warn(
                    "Operator  %-20s name=%-40s quantize with sb=%s" +
                    " scale=%s, error=%s", xopn, xn, sb, iscale, scale_err)
            oscale = iscale * frac * (2**exp)
            if frac > 1:
                var = sutils.nd_const(frac, graph, params)
                X = mx.sym.broadcast_mul(X,
                                         var,
                                         name=N.n("mrt_quantize_scale"))
            oprec = self.get_prec(oscale * absmax)
            X = tutils.realize(X, -exp, oprec)
            logger.debug(
                "Operator  %-20s name=%-40s requantize" +
                " with scale=%-16.8f<%d, %d>" +
                " iprec=%s, iscale=%-10.5f, oprec=%s, oscale=%-10.5f", xopn,
                xn, rescale, frac, exp, iprec, iscale, oprec, oscale)
        else:
            oprec, oscale = iprec, iscale
            logger.debug(
                "Operator  %-20s name=%-40s clip with iprec=%s, oprec=%s",
                xopn, xn, iprec, oprec)

        return X, oprec, oscale
    def _quantize_parameter(self, W, oprec, oscale=None, **kwargs):
        """ Symmetric Quantization of weight (real value)
        """
        logger = logging.getLogger("log.mrt.realize")
        params, features = kwargs["params"], kwargs["features"]
        precs = kwargs['precs']
        wn = W.attr("name")
        wqn = N.n(wn)

        oprec = precs[wn].get(kwargs['oname'], oprec)
        ft = features[wn]
        absmax = ft.get()

        if absmax == 0:
            oprec, oscale = 1, 1 if oscale is None else oscale
            params[wqn] = sutils.nd_zeros(params[wn].shape)
        else:
            oscale = self.get_scale(oprec, ft) if oscale is None else oscale
            params[wqn], oprec = self.int_realize(params[wn] * oscale,
                                                  oprec,
                                                  logger=logger)
        attr = {"precision": str(oprec)}
        # TODO: CVM precision update
        # attr = {"precision": "int"+str(oprec)}
        W = mx.sym.var(wqn, shape=params[wqn].shape, attr=attr)
        return W, oprec, oscale
def verify_batch_dot(ashp, bshp, transpose_a, transpose_b):
    A_np = np.random.uniform(size=ashp)
    B_np = np.random.uniform(size=bshp)
    A = nd.array(A_np)
    B = nd.array(B_np)

    # org op
    y = nd.batch_dot(A, B, transpose_a, transpose_b)

    # rewrite op
    andims, bndims = len(ashp), len(bshp)
    assert andims == 3 and bndims == 3, \
        "batch_dot currently only support 3D*3D array." + \
        "name: (%s), op_name: (%s)" % (name, op_name)

    if transpose_a:
        ashp = ashp[:-2] + (ashp[-1], ashp[-2])
        axes = tuple(range(andims - 2)) + (andims - 1, andims - 2)
        A = nd.transpose(A, axes=axes, name=N.n("transpose_a"))

    if transpose_b:
        bshp = bshp[:-2] + (bshp[-1], bshp[-2])
        bndims = len(bshp)
        axes = tuple(range(bndims - 2)) + (bndims - 1, bndims - 2)
        B = nd.transpose(B, axes=axes, name=N.n("transpose_b"))

    assert ashp[-1] == bshp[1]
    C, MATRIX_MAXIMUM_SIZE = ashp[-1], 4096
    if ashp[-1] <= MATRIX_MAXIMUM_SIZE:
        op = nd.batch_dot(A, B, name=N.n("batch_dot"))
    else:
        C, nodes, step, start = \
            ashp[-1], [], MATRIX_MAXIMUM_SIZE, 0
        while start < C:
            stop = min(start + step, C)

            begin, end = (0, 0, start), (ashp[0], ashp[1], stop)
            Ak = nd.slice(A, begin=begin, end=end, name=N.n("slice_a"))

            begin, end = (0, start, 0), (bshp[0], stop, bshp[2])
            Bk = nd.slice(B, begin=begin, end=end, name=N.n("slice_b"))

            tmp = nd.batch_dot(Ak, Bk, name=N.n("batch_dot"))
            nodes.append(tmp)
            start += step

        while len(nodes) > 1:
            A, B = nodes.pop(0), nodes.pop(0)
            tmp = nd.elemwise_add(A, B, name=N.n("elemwise_add"))
            nodes.append(tmp)

        op = nodes[0]

    z = op
    # compare
    assert z.shape == y.shape
    zn, zp = get_norm(z)
    yn, yp = get_norm(y)
    rn = np.linalg.norm(zp - yp)
    print(zn, yn, rn)
    def _quantize_operator(self, X, oprec, oscale=None, **kwargs):
        logger = kwargs.get("logger", logging.getLogger("log.mrt.realize"))
        params, features = kwargs["params"], kwargs["features"]
        precs, buffers = kwargs["precs"], kwargs["buffers"]
        graph, shift_bits = kwargs["graph"], kwargs["shift_bits"]
        xn, xopn = X.attr("name"), X.attr("op_name")
        xqn = N.n(xn)

        oprec = precs[xn].get(kwargs['oname'], oprec)
        iscale, iprec = buffers[xn].get(), precs[xn][OUT_KEY]
        minv, maxv = features[wn].get()
        oscale = (2**(oprec) - 1) / (maxv - minv) if oscale is None else oscale
        zpoint = round(minv * iscale)
        X = mx.sym.broadcast_sub(X, zpoint, name=N.n('minus_zp'))

        sb = iprec - oprec
        if sb > shift_bits:
            iprec -= sb
            X = tutils.realize(X, sb, iprec)
            iscale = iscale / (2**sb)

        rescale = oscale / iscale
        bits = MAX_BIT - iprec
        frac, exp = sim.cvm_float(rescale, bits)
        sim_scale = frac * (2**exp)
        scale_err = abs((sim_scale - rescale) / rescale)
        if scale_err > 0.001:
            logger.warn(
                "Operator  %-20s name=%-40s quantize with sb=%s" +
                " scale=%s, error=%s", xopn, xn, sb, iscale, scale_err)
        oscale = iscale * frac * (2**exp)
        if frac > 1:
            var = sutils.nd_const(frac, graph, params)
            X = mx.sym.broadcast_mul(X, var, name=N.n("mrt_quantize_scale"))
        Zp = sutils.nd_const(zpoint, graph, params)
        X = mx.sym.broadcast_sub(X, Zp, name=N.n('minus_zp'))
        oprec = self.get_prec(oscale * (maxv - minv))
        X = tutils.realize(X, -exp, oprec)
        logger.debug(
            "Operator  %-20s name=%-40s requantize" +
            " with scale=%-16.8f<%d, %d>" +
            " iprec=%s, iscale=%-10.5f, oprec=%s, oscale=%-10.5f", xopn, xn,
            rescale, frac, exp, iprec, iscale, oprec, oscale)

        return X, oprec, oscale, zpoint
Example #5
0
def _quantize_table(op, **kwargs):
    params, graph = kwargs['params'], kwargs['graph']
    features, precs, buffers = \
        kwargs['features'], kwargs['precs'], kwargs['buffers']
    cfg_dict = kwargs['cfg_dict']
    name, op_name = op.attr('name'), op.attr('op_name')
    childs = sym_iter(op.get_children())
    cns = [c.attr('name') for c in childs] if childs else []

    xquant_type = cfg_dict[cns[0]]['quant_type']
    xquant = get_quantizer(xquant_type)

    iprec = kwargs['op_input_precs'][op_name]
    xs = scale_exp(features[cns[0]].get(), iprec)
    X, xprec, xs = xquant.quantize(childs[0],
                                   iprec,
                                   oscale=xs,
                                   oname=name,
                                   **kwargs)
    alpha = get_range_exp(xprec)
    var = nd_const(alpha, graph, params)
    X = mx.sym.broadcast_add(X, var, name=N.n(op_name + '_offset'))

    out = sutils.get_nd_op(op_name)(sutils.nd_arange(-alpha, alpha + 1) / xs)
    oprec = precs[name].get(OUT_KEY, 16)
    oscale = scale_exp(out.abs().max().asscalar(), oprec)
    buffers[name] = SBuffer(oscale)

    W_name = N.n("cvm_lut_weight")
    params[W_name] = weight = (out * oscale).round().reshape(2 * alpha + 1, 1)
    wattr = {'precision': str(oprec)}
    W = graph[W_name] = mx.sym.var(W_name, shape=weight.shape, attr=wattr)
    op = mx.sym.Custom(X,
                       W,
                       in_dim=2 * alpha + 1,
                       name=name,
                       op_type='cvm_lut')
    precs[name][OUT_KEY] = oprec

    logger = logging.getLogger('log.mrt.realize')
    logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s", op_name,
                 name, buffers[name].serialize(), cns)
    return op
Example #6
0
def _quantize_scale(op, **kwargs):
    features, precs = kwargs['features'], kwargs['precs']
    buffers, cfg_dict = kwargs['buffers'], kwargs['cfg_dict']
    name, op_name = op.attr('name'), op.attr('op_name')
    attr, childs = op.list_attr(), sym_iter(op.get_children())
    cns = [c.attr('name') for c in childs] if childs else []

    assert all([features[cn].name == FT_TYPE_EXP for cn in cns])
    absmax = max([features[cn].get() for cn in cns])
    oprec = kwargs['op_input_precs'][op_name]
    oscale = scale_exp(absmax, oprec)
    buffers[name] = SBuffer(oscale)
    nodes, cprecs = [], []

    assert all([cfg_dict[cn]['quant_type'] == \
        USQuantizer.name for cn in cns])
    quant = get_quantizer(USQuantizer.name)

    for c in childs:
        c, cprec, _ = quant.quantize(c,
                                     oprec,
                                     oscale=oscale,
                                     oname=name,
                                     **kwargs)
        cprecs.append(cprec)
        nodes.append(c)

    if op_name in [
            Concat.op_name, BroadcastAdd.op_name, ElemwiseAdd.op_name,
            ElemwiseSub.op_name, SliceLike.op_name
    ]:
        op = get_mxnet_op(op_name)(*nodes, **attr, name=name)
        infer_prec = max(cprecs) if op_name == Concat.op_name \
            else max(cprecs)+1
    elif op_name == AddN.op_name:
        while len(nodes) > 1:
            tname = N.n('elemwise_add') if len(nodes) > 2 else name
            a, b = nodes.pop(0), nodes.pop(0)
            tmp = mx.sym.elemwise_add(a, b, name=tname)
            nodes.append(tmp)
        kprec = get_bit_cnt_exp(len(nodes))
        infer_prec = max(cprecs) + kprec
        op = nodes[0]
    else:
        raise NotImplementedError(
            "symbol merge function of op_name: %s has not been " + \
            "implemented, name: %s", op_name, name)
    precs[name][OUT_KEY] = infer_prec

    logger = logging.getLogger('log.mrt.realize')
    logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s", op_name,
                 name, buffers[name].serialize(), cns)
    return op
 def _realize_ch(self, X, sbs, precs, name=None):
     name = name if name else N.n('realize_ch')
     attrs = {
         "sbs": ','.join([str(sb) for sb in sbs]),
         "precs": ','.join([str(prec) for prec in precs]),
         "op_type": "cvm_right_shift_channel",
     }
     if all([sb > 0 for sb in sbs]):
         sym = mx.sym.Custom(X, name=name, **attrs)
     else:
         raise NotImplementedError(
             "realize_ch has not be implemented for sbs: {}".format(sbs))
     return sym
Example #8
0
    def _separate_bias(op, **kwargs):
        name, op_name = op.attr('name'), op.attr('op_name')
        attr, childs = op.list_attr(), sutils.sym_iter(op.get_children())

        if childs and len(childs) < 3 or op_name not in \
            [Convolution.op_name, FullyConnected.op_name]:
            return op

        attr['no_bias'] = True
        op = sutils.get_mxnet_op(op_name)(childs[0],
                                          childs[1],
                                          **attr,
                                          name=N.n(name))
        bn = childs[2].attr('name')
        if op_name == Convolution.op_name:
            if 'layout' in attr:
                assert attr['layout'] == 'NCHW'
            B = mx.sym.expand_dims(childs[2], axis=0, name=N.n('expand_dims'))
            B = mx.sym.expand_dims(B, axis=-1, name=N.n('expand_dims'))
            B = mx.sym.expand_dims(B, axis=-1, name=N.n(bn))
        else:
            B = mx.sym.expand_dims(childs[2], axis=0, name=N.n(bn))
        op = mx.sym.broadcast_add(op, B, name=name)
        return op
Example #9
0
def sym_slice(X, ichannel, step, **kwargs):
    name = X.attr('name')
    shp = kwargs['infer_shapes'][name][get_entry_id(X)]
    ndims = len(shp)
    nodes = []
    rchannel = ndims - ichannel - 1
    for i in range(0, shp[ichannel], step):
        suffix = '_' + str(i) + '-' + str(i + step)
        Xi = mx.sym.slice(
            X,
            begin=(None, ) * ichannel + (i, ) + (None, ) * rchannel,
            end=(None, ) * ichannel + (i + step, ) + (None, ) * rchannel,
            name=N.n(name + suffix))
        nodes.append(Xi)
    return nodes
Example #10
0
def kernel_slice_2d(W, **kwargs):
    name = W.attr('name')
    shp = kwargs['infer_shapes'][name][get_entry_id(W)]
    OC, IC = shp[:2]
    nodes = []
    for o in range(OC):
        Wo = mx.sym.slice(W,
                          begin=(o, None, None, None),
                          end=(o + 1, None, None, None))
        nnodes = []
        for i in range(IC):
            suffix = '_' + str(o) + '-' + str(i)
            Woi = mx.sym.slice(Wo,
                               begin=(None, i, None, None),
                               end=(None, i + 1, None, None),
                               name=N.n(name + suffix))
            nnodes.append(Woi)
        nodes.append(nnodes[:])
    return nodes
Example #11
0
    def _quantize_parameter(self, W, oprec, oscale=None, **kwargs):
        logger = logging.getLogger("log.mrt.realize")
        params, features = kwargs["params"], kwargs["features"]
        precs = kwargs['precs']
        graph = kwargs['graph']
        wn = W.attr("name")
        wqn = N.n(wn)

        oprec = precs[wn].get(kwargs['oname'], oprec)
        minv, maxv = features[wn].get()
        oscale = (2**(oprec) - 1) / (maxv - minv) if oscale is None else oscale
        zpoint = minv
        params[wqn], oprec = self.int_realize(nd.relu(
            (params[wn] - zpoint) * oscale),
                                              oprec,
                                              logger=logger)
        attr = {"precision": str(oprec)}
        # TODO: CVM precision update
        # attr = {"precision": "uint"+str(oprec)}
        W = mx.sym.var(wqn, shape=params[wqn].shape, attr=attr)
        return W, oprec, oscale, zpoint
Example #12
0
    def _separate_pad(op, **kwargs):
        name, op_name = op.attr('name'), op.attr('op_name')
        attr, childs = op.list_attr(), sutils.sym_iter(op.get_children())

        if op_name not in [Convolution.op_name]:
            return op

        if 'layout' in attr:
            assert attr['layout'] == 'NCHW'
        PH, PW = sutils.get_attr(attr, 'pad', (0, 0))
        if 'pad' in attr:
            del attr['pad']
        if PH == 0 and PW == 0:
            return sutils.get_mxnet_op(op_name)(*childs, **attr, name=name)

        childs[0] = mx.sym.pad(childs[0],
                               pad_width=(0, 0, 0, 0, PH, PH, PW, PW),
                               mode='constant',
                               constant_value=0,
                               name=N.n('pad'))
        op = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name)
        return op
Example #13
0
    def _quant(op, **kwargs):
        op = apply_pass("quantize",
            infer_shapes=kwargs['infer_shapes'],
            features=kwargs['features'],
            cfg_dict=kwargs['cfg_dict'],
        )(op, **kwargs) if op.attr('name') not in restore_names \
            else restore(op, **kwargs)

        if is_var(op, kwargs['params']):
            return op

        name = op.attr('name')
        features, buffers = kwargs['features'], kwargs['buffers']
        precs = kwargs['precs']
        ft = features[name]
        absmax = ft.get_threshold()
        name, op_name = op.attr('name'), op.attr('op_name')
        buf = buffers[name]
        assert buf.name == BUF_TYPE_EXP
        scale = buf.get()
        tight_prec = get_bit_exp(absmax * scale)
        if precs[name][OUT_KEY] > tight_prec:
            op = mx.sym.Custom(op,
                               precision=tight_prec,
                               name=N.n('clip'),
                               op_type='cvm_clip')
            clip_name = op.attr('name')
            infer_shapes[clip_name] = infer_shapes[name]
            features[clip_name] = ft
            precs[clip_name] = {OUT_KEY: tight_prec}
            if name in precs and name in precs[name]:
                oprec = precs[name][name]
                del precs[name][name]
                precs[clip_name][clip_name] = oprec
            buffers[clip_name] = buf
            cfg_dict[clip_name] = cfg_dict[name]

        return op
Example #14
0
    def _quantize_parameter(self, W, oprec, num_groups=None, **kwargs):
        """ Groupwise Convolution Quantizer
            weight (real value)
        """
        params, features = kwargs['params'], kwargs['features']
        logger = logging.getLogger("log.mrt.realize")
        precs = kwargs['precs']

        wn = W.attr('name')
        data = params[wn]
        shp = data.shape
        step = shp[0] // num_groups
        prm_slices = [
            params[wn].slice(begin=(i, None, None, None),
                             end=(i + step, None, None, None))
            for i in range(0, shp[0], step)
        ]

        oprec = precs[wn].get(kwargs['oname'], oprec)
        ft = features[wn]
        absmax_list = ft.get()
        wprec_list, wscale_list, prm_list = [], [], []
        for i, absmax in enumerate(absmax_list):
            if absmax == 0:
                wprec, wscale = 1, 1
                prm = sutils.nd_zeros((step, ) + shp[1:])
            else:
                tmp_ft = AFeature(absmax)
                wscale = self.get_scale(oprec, tmp_ft)
                prm, wprec = self.int_realize(prm_slices[i] * wscale,
                                              oprec,
                                              logger=logger)
            wprec_list.append(wprec)
            wscale_list.append(wscale)
            prm_list.append(prm)
        prm = nd.concat(*prm_list, dim=0)
        W = mx.sym.var(N.n(wn), shape=prm.shape)
        return W, wprec_list, wscale_list
Example #15
0
    def _quantize_operator(self, X, oprec, num_groups=None, **kwargs):
        """ Groupwise Convolution Quantizer
            symbol expansion (int version)
        """
        logger = kwargs.get('logger', logging.getLogger('log.mrt.realize'))
        params, features = kwargs['params'], kwargs['features']
        precs, buffers = kwargs['precs'], kwargs['buffers']
        graph, shift_bits = kwargs['graph'], kwargs['shift_bits']
        xn, xopn = X.attr('name'), X.attr('op_name')

        oprec = precs[xn].get(kwargs['oname'], oprec)
        iscale, iprec = buffers[xn].get(), precs[xn][OUT_KEY]
        ft = features[xn]
        absmax_list = ft.get()

        oscale_list = []
        for absmax in absmax_list:
            if absmax == 0:
                oscale_list.append(None)
            else:
                tmp_ft = AFeature(absmax)
                oscale = self.get_scale(oprec, tmp_ft)
                oscale_list.append(oscale)

        sb = iprec - oprec
        if sb > shift_bits:
            iprec -= sb
            X = tutils.realize(X, sb, iprec)
            iscale = iscale / (2**sb)

        xprec_list, xscale_list, sb_list, var_list = [], [], [], []
        if iprec > oprec:
            for i, absmax in enumerate(absmax_list):
                if absmax == 0:
                    xprec_list.append(1)
                    xscale_list.append(1)
                    sb_list.append(1)
                    var_list.append(sutils.nd_const(1, graph, params))
                else:
                    rescale = oscale_list[i] / iscale
                    bits = MAX_BIT - iprec
                    frac, exp = sim.cvm_float(rescale, bits)
                    sim_scale = frac * (2**exp)
                    scale_err = abs((sim_scale - rescale) / rescale)
                    if scale_err > 0.001:
                        logger.warn(
                            "Operator  %-20s name=%-40s quantize with sb=%s" +
                            " scale=%s, error=%s", xopn, xn, sb, iscale,
                            scale_err)
                    xscale = iscale * frac * (2**exp)
                    if frac > 1:
                        var = sutils.nd_const(frac, graph, params)
                        # X = mx.sym.broadcast_mul(
                        # X, var, name=N.n("mrt_quantize_scale"))
                    else:
                        var = sutils.nd_const(1, graph, params)
                    xprec = self.get_prec(xscale * absmax)
                    # X = tutils.realize(X, -exp, xprec)
                    logger.debug(
                        "Operator  %-20s name=%-40s slice %s requantize" +
                        " with scale=%-16.8f<%d, %d>" +
                        " iprec=%s, iscale=%-10.5f, xprec=%s, xscale=%-10.5f",
                        xopn, xn, i, rescale, frac, exp, iprec, iscale, xprec,
                        xscale)
                    xprec_list.append(xprec)
                    xscale_list.append(xscale)
                    sb_list.append(-exp)
                    var_list.append(var)
            # broadcast_mul list of frac
            xshp = kwargs['infer_shapes'][xn][sutils.get_entry_id(X)]
            frac = mx.sym.concat(*var_list, name=N.n('concat_mul_frac'))
            frac = mx.sym.reshape(frac,
                                  shape=(1, xshp[1], 1, 1),
                                  name=N.n('reshape_mul_frac'))
            X = mx.sym.broadcast_mul(X, frac, name=N.n('mrt_quantize_scale'))
            # realize
            X = self._realize_ch(X, sb_list, xprec_list)
        else:
            xprec_list = [
                iprec if absmax == 0 else 1 for absmax in absmax_list
            ]
            xscale_list = [
                iscale if absmax == 0 else 1 for absmax in absmax_list
            ]
            logger.debug(
                "Operator  %-20s name=%-40s clip with iprec=%s, oprec=%s",
                xopn, xn, iprec, oprec)
        return X, xprec_list, xscale_list
Example #16
0
def _quantize_scale_zp(op, **kwargs):
    features, precs = kwargs['features'], kwargs['precs']
    buffers, cfg_dict = kwargs['buffers'], kwargs['cfg_dict']
    graph, params = kwargs['graph'], kwargs['params']
    name, op_name = op.attr('name'), op.attr('op_name')
    attr, childs = op.list_attr(), sym_iter(op.get_children())
    cns = [c.attr('name') for c in childs] if childs else []

    oprec = kwargs['op_input_precs'][op_name]
    oscales = []
    for c in childs:
        cquant_type = cfg_dict[c.attr('name')]['quant_type']
        cquant = get_quantizer(cquant_type)
        ft = features[c.attr('name')]
        oscale = cquant.get_scale(oprec, ft)
        oscales.append(oscale)
    oscale = min(oscales)
    buffers[name] = SBuffer(oscale)
    nodes, cprecs = [], []

    for c in childs:
        cquant_type = cfg_dict[c.attr('name')]['quant_type']
        cquant = get_quantizer(cquant_type)
        if cquant.name == USQuantizer.name:
            c, cprec, _ = cquant.quantize(c,
                                          oprec,
                                          oscale=oscale,
                                          oname=name,
                                          **kwargs)
        elif cquant.name == UAQuantizer.name:
            c, cprec, cscale, czpoint = cquant.quantize(c,
                                                        oprec,
                                                        oscale=oscale,
                                                        oname=name,
                                                        **kwargs)
            czint = round(czpoint * cscale)
            Cz = nd_const(czint, graph, params)
            nodes.append(Cz)
            cprecs.append(get_bit_exp(czint))
        cprecs.append(cprec)
        nodes.append(c)

    if op_name in [Concat.op_name]:
        op = get_mxnet_op(op_name)(*nodes, **attr, name=name)
        infer_prec = max(cprecs)
    elif op_name in [BroadcastAdd.op_name]:
        while len(nodes) > 1:
            tname = N.n('broadcast_add') if len(nodes) > 2 else name
            a, b = nodes.pop(0), nodes.pop(0)
            tmp = mx.sym.broadcast_add(a, b, name=tname)
            nodes.append(tmp)
        kprec = get_bit_cnt_exp(len(nodes))
        infer_prec = max(cprecs) + kprec
        op = nodes[0]
    elif op_name in [AddN.op_name]:
        while len(nodes) > 1:
            tname = N.n('elemwise_add') if len(nodes) > 2 else name
            a, b = nodes.pop(0), nodes.pop(0)
            tmp = mx.sym.elemwise_add(a, b, name=tname)
            nodes.append(tmp)
        kprec = get_bit_cnt_exp(len(nodes))
        infer_prec = max(cprecs) + kprec
        op = nodes[0]
    else:
        raise NotADirectoryError(
            "symbol merge function of op_name: %s has not been " + \
            "implemented, name: %s", op_name, name)
    precs[name][OUT_KEY] = infer_prec

    logger = logging.getLogger('log.mrt.realize')
    logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s", op_name,
                 name, buffers[name].serialize(), cns)
    return op
Example #17
0
    def quantize(self, op, **kwargs):
        params, graph = kwargs['params'], kwargs['graph']
        buffers, precs = kwargs['buffers'], kwargs['precs']
        features, cfg_dict = kwargs['features'], kwargs['cfg_dict']
        name, op_name = op.attr('name'), op.attr('op_name')
        childs, attr = sym_iter(op.get_children()), op.list_attr()
        cns = [c.attr('name') for c in childs] if childs else []

        oprec = kwargs['op_input_precs'][op_name]
        th = features[cns[0]].get()
        xs = scale_exp(th, oprec)
        quant_type = cfg_dict[cns[0]]['quant_type']
        assert quant_type == USQuantizer.name
        quant = get_quantizer(quant_type)
        X, xprec, xs = quant.quantize(childs[0],
                                      oprec,
                                      oscale=xs,
                                      oname=name,
                                      **kwargs)
        axis = get_attr(attr, 'axis', -1)
        lambd = kwargs['softmax_lambd']
        alpha = int(lambd * xs)
        var = nd_const(alpha, graph, params)
        max_axis = mx.sym.max(X, axis=axis, keepdims=True)
        offset = mx.sym.broadcast_sub(max_axis,
                                      var,
                                      name=N.n('softmax_offset'))
        offset = realize(offset, 0, xprec)
        norm = mx.sym.broadcast_sub(X, offset, name=N.n('softmax_normalize'))
        norm = mx.sym.relu(norm, name=N.n('Softmax_filter'))
        norm = realize(norm, 0, xprec)

        data = sutils.nd_arange(0, alpha + 1)
        table = nd.exp(data / xs)

        tprec = get_bit_exp(math.exp(lambd))
        table = nd.clip(table, a_min=0, a_max=get_range_exp(tprec))
        W_name = N.n('cvm_lut_weight')
        params[W_name] = weight = table.round().reshape(alpha + 1, 1)
        wattr = {'precision': str(tprec)}
        W = graph[W_name] = mx.sym.var(W_name, shape=weight.shape, attr=wattr)
        # lut = mx.sym.Custom(norm, W, in_dim=alpha+1,
        #                     name=name, op_type='cvm_lut')
        lut = mx.sym.Custom(norm,
                            W,
                            in_dim=alpha + 1,
                            name=N.n('softmax_lut'),
                            op_type='cvm_lut')
        sum_lut = mx.sym.sum(lut,
                             axis=axis,
                             keepdims=True,
                             name=N.n("softmax_sum"))

        oprec = min(15, 31 - tprec)
        assert oprec > 8, "operator softmax(%s) lambda(%d) is too large" \
                % (name, lambd)
        oscale = get_range_exp(oprec)
        var_scale = nd_const(oscale, graph, params)
        prob = mx.sym.broadcast_mul(lut,
                                    var_scale,
                                    name=N.n("softmax_output_scale"))
        half_lut = realize(sum_lut, 1, 31)
        prob = mx.sym.broadcast_add(prob, half_lut, name=N.n("softmax_round"))
        op = mx.sym.broadcast_div(prob, sum_lut, name=N.n("softmax_prob"))
        op = op.astype('int32').astype('float32')
        # op = mx.sym.floor(op) # simulate integer division
        # op = realize(op, 0, oprec)
        op = realize(op, 0, oprec, name=name)
        # oname = op.attr('name')
        precs[name][OUT_KEY] = oprec
        # precs[oname] = {OUT_KEY: oprec}
        # scales[oname] = scales[name] = oscale
        buffers[name] = SBuffer(oscale)

        logger = logging.getLogger('log.mrt.realize')
        logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s",
                     op_name, name, buffers[name].serialize(), cns)
        return op
Example #18
0
    def quantize(self, op, **kwargs):
        features, buffers = kwargs['features'], kwargs['buffers']
        precs, graph = kwargs['precs'], kwargs['graph']
        cfg_dict, params = kwargs['cfg_dict'], kwargs['params']
        name, op_name = op.attr('name'), op.attr('op_name')
        childs, attr = sym_iter(op.get_children()), op.list_attr()
        cns = [c.attr('name') for c in childs] if childs else []

        # assert len(childs) == 2 and 'pad' not in attr
        assert len(childs) == 2
        xquant_type = cfg_dict[cns[0]]['quant_type']
        wquant_type = cfg_dict[cns[1]]['quant_type']
        X, W = childs
        xquant, wquant = \
            get_quantizer(xquant_type), get_quantizer(wquant_type)
        oprec = kwargs['op_input_precs'][op_name]

        if xquant_type == wquant_type == USQuantizer.name:
            op = _quantize_xw(op, **kwargs)
        elif xquant_type == USQuantizer.name and \
            wquant_type == UAQuantizer.name:
            Xq, xprec, xscale = xquant.quantize(X, oprec, oname=name, **kwargs)
            Wq, wprec, wscale, wzpoint = wquant.quantize(W,
                                                         oprec,
                                                         oname=name,
                                                         **kwargs)
            buffers[name] = get_buffer_exp(xscale * wscale)

            Ye1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution'))
            wshp = params[cns[1]].shape
            pd = int(np.product(wshp[1:]))
            infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec

            W1 = nd_full_const(1, wshp, graph, params)
            Ye2 = mx.sym.Convolution(Xq, W1, **attr, name=N.n('Convolution'))
            wzint = round(wzpoint * wscale)
            Wz = nd_const(wzint, graph, params)
            Ye2 = mx.sym.broadcast_mul(Wz, Ye2, name=N.n('broadcast_mul'))
            infer_prec2 = get_bit_cnt_exp(pd) + xprec + get_bit_exp(wzint)

            op = mx.sym.elemwise_add(Ye1, Ye2, name=name)
            precs[name][OUT_KEY] = max(infer_prec1, infer_prec2) + 1
            buffers[name] = get_buffer_exp(xscale * wscale)
        elif xquant_type == UAQuantizer.name and \
            wquant_type == USQuantizer.name:
            Xq, xprec, xscale, Xzp = xquant.quantize(X,
                                                     oprec,
                                                     oname=name,
                                                     **kwargs)
            Wq, wprec, wscale = wquant.quantize(W, oprec, oname=name, **kwargs)
            buffers[name] = get_buffer_exp(xscale * wscale)

            Y1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution'))
            wshp = params[cns[1]].shape
            pd = np.product(wshp[1:])
            infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec + 1

            xshp = params[cns[0]].shape
            X1 = nd_full(1, xshp, graph, params)
            Y2 = mx.sym.Convolution(X1, Wq, **attr, name=N.n('Convolution'))
            xzp = params[Xzp.attr('name')].asscalar()
            infer_prec2 = get_bit_cnt_exp(abs(xzp) * pd) + wprec

            op = mx.sym.elemwise_add(Y1, Y2, name=N.n('elemwise_add'))
            infer_prec = max(infer_prec1, infer_prec2) + 1
            precs[name][OUT_KEY] = infer_prec
        elif xquant_type == wquant_type == UAQuantizer.name:
            Xq, xprec, xscale, Xzp = xquant.quantize(X,
                                                     oprec,
                                                     oname=name,
                                                     **kwargs)
            Wq, wprec, wscale, Wzp = wquant.quantize(W,
                                                     oprec,
                                                     oname=name,
                                                     **kwargs)
            buffers[name] = get_buffer_exp(xscale * wscale)

            nodes, infer_precs = [], []

            Y1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution'))
            nodes.append(Y1)
            wshp = params[cns[1]].shape
            pd = np.product(wshp[1:])
            infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec + 2
            infer_precs.append(infer_prec1)

            W1 = nd_full_const(1, wshp, graph, params)
            Y2 = mx.sym.Convolution(Xq, W1, **attr, name=N.n('Convolution'))
            Y2 = mx.sym.broadcast_mul(Wzp, Y2, name=N.n('broadcast_mul'))
            nodes.append(Y2)
            wzp = params[Wzp.attr('name')].asscalar()
            infer_prec2 = get_bit_cnt_exp(abs(wzp) * pd) + xprec + 1
            infer_precs.append(infer_prec2)

            xshp = params[cns[0]].shape
            X1 = nd_full_const(1, xshp, graph, params)
            Y3 = mx.sym.Convolution(X1, Wq, graph, params)
            Y3 = mx.sym.broadcast_mul(Xzp, Y3, name=N.n('broadcast_mul'))
            nodes.append(Y3)
            xzp = params[Xzp.attr('name')].asscalar()
            infer_prec3 = get_bit_cnt_exp(abs(xzp) * pd) + wprec + 1
            infer_precs.append(infer_prec3)

            val = pd * abs(xzp) * abs(wzp)
            Y4 = nd_const(val, graph, params)
            nodes.append(Y4)
            infer_prec4 = get_bit_cnt_exp(val)
            infer_precs.append(infer_prec4)

            while len(nodes) > 1:
                a, b = nodes.pop(), nodes.pop()
                node = mx.sym.broadcast_add(a, b, name=N.n('broadcast_add'))
                nodes.append(node)
            op = nodes[0]
            infer_prec = max(infer_precs) + 2
            precs[name][OUT_KEY] = infer_prec
        elif xquant_type == GroupConvQuant.name and \
            wquant_type == GroupConvQuant.name:
            num_groups_x = cfg_dict[cns[0]]['gn_info']['num_groups']
            num_groups_w = cfg_dict[cns[1]]['gn_info']['num_groups']
            assert num_groups_x == num_groups_w, \
                "num_groups of x and weight should be equal, " + \
                "num_groups of x: {}, num_groups of weight: {}".format(
                    num_groups_x, num_groups_w)
            Xq, xprec_list, xscale_list = xquant.quantize(
                X, oprec, oname=name, num_groups=num_groups_x, **kwargs)
            Wq, wprec_list, wscale_list = wquant.quantize(
                W, oprec, oname=name, num_groups=num_groups_w, **kwargs)
            op = get_mxnet_op(op_name)(Xq, Wq, **attr, name=name)
            IPG = kwargs['infer_shapes'][cns[1]][get_entry_id(X)][1]
            kprec = get_bit_cnt_exp(IPG)
            infer_prec_list = [
                kprec + wprec_list[i] + xprec_list[i] \
                for i in range(len(wprec_list))
            ]
            oscale_list = [
                xscale_list[i] * wscale_list[i] \
                for i in range(len(wscale_list))
            ]
            assert False, "implementing..."
        else:
            raise NotImplementedError(
                "Quantization type not implementated," + \
                " op: {}, Xquant: {}, Wquant: {}".format(
                op_name, xquant_type, wquant_type))

        logger = logging.getLogger('log.mrt.realize')
        logger.debug("operator  %-20s name=%-40s oscale=%s, iscale=%s",
                     op_name, name, buffers[name].serialize(), cns)
        return op
Example #19
0
    def slice_channel(self, op, **kwargs):
        name, op_name = op.attr('name'), op.attr('op_name')
        attr, childs = op.list_attr(), sym_iter(op.get_children())
        cns = [c.attr('name') for c in childs]
        cfg_dict = kwargs['cfg_dict']
        infer_shapes = kwargs['infer_shapes']

        gn_info = cfg_dict[name]['gn_info']
        ichannel, step = gn_info['ichannel'], gn_info['step']
        assert ichannel == 1

        assert len(childs) == 2
        X, W = childs
        xshp = infer_shapes[cns[0]][get_entry_id(childs[0])]
        wshp = infer_shapes[cns[1]][get_entry_id(childs[1])]
        oshp = infer_shapes[name][get_entry_id(op)]
        assert len(xshp) == len(wshp) == 4 and xshp[1] % step == 0

        xi_cfg_info, wi_cfg_info = cfg_dict[cns[0]], cfg_dict[cns[1]]
        xi_cfg_info['gn_info'] = {'gn_type': LAYER_WISE_TYPE}
        wi_cfg_info['gn_info'] = {'gn_type': LAYER_WISE_TYPE}
        yi_cfg_info = {
            'gn_info': {
                'gn_type': LAYER_WISE_TYPE
            },
            'quant_type': US_QUANT_TYPE,
            'opt_info': cfg_dict[name]['opt_info'],
        }
        num_group = eval(attr['num_group'])
        C, IC, OC = xshp[1], wshp[1], wshp[0]
        assert num_group * IC == C and OC >= num_group and OC % num_group == 0
        if num_group == 1:
            xs = sym_slice(X, ichannel, step, **kwargs)
            ws = sym_slice(W, ichannel, step, **kwargs)
            nodes = []
            j = 0
            for i in range(0, C, step):
                suffix = '_' + str(i) + '-' + str(i + step)
                xni = xs[j].attr('name')
                cfg_dict[xni] = xi_cfg_info
                wni = ws[j].attr('name')
                cfg_dict[wni] = wi_cfg_info
                yni = N.n(name + suffix)
                Yi = get_mxnet_op(op_name)(xs[j], ws[j], **attr, name=yni)
                cfg_dict[yni] = yi_cfg_info
                nodes.append(Yi)
                j += 1
            assert len(nodes) > 1
            op = mx.sym.add_n(*nodes, name=name)

            # # transpose and reshape weight
            # Wt = mx.sym.transpose(W, axes=(1,0,2,3), name=N.n('transpose'))
            # rshp = (OC*IC,1,) + wshp[2:]
            # wrn = N.n('reshape')
            # cfg_dict[wrn] = wi_cfg_info
            # Wr = mx.sym.reshape(Wt, shape=rshp, name=wrn)
            # # groupwise convolution
            # nattr = attr.copy()
            # nattr['num_group'] = IC
            # nattr['num_filter'] = IC * OC
            # conv_name = N.n('groupwise_convolution')
            # cfg_dict[conv_name] = yi_cfg_info
            # print(nattr, name)
            # op = mx.sym.Convolution(X, Wr, **nattr, name=conv_name)
            # # reshape output
            # rname = N.n('reshape')
            # cfg_dict[rname] = yi_cfg_info
            # rshp = (-1, IC, OC,) + oshp[2:]
            # op = mx.sym.reshape(op, shape=rshp, name=rname)
            # # sum
            # sum_name = N.n('sum')
            # cfg_dict[sum_name] = yi_cfg_info
            # op = mx.sym.sum(op, axis=1, keepdims=False, name=sum_name)
        else:
            assert step == 1
            xs = sym_slice(X, ichannel, step, **kwargs)
            ws = kernel_slice_2d(W, **kwargs)
            OPG = OC // num_group
            nattr = attr.copy()
            nattr['num_group'] = '1'
            nattr['num_filter'] = '1'
            nodes = []
            for o in range(OC):
                nnodes = []
                j = int(o / OPG) * IC
                for i in range(IC):
                    suffix = '_' + str(o) + '-' + str(i)
                    k = i + j
                    xk, woi = xs[k], ws[o][i]
                    xnk, wnoi = xk.attr('name'), woi.attr('name')
                    cfg_dict[xnk] = xi_cfg_info
                    cfg_dict[wnoi] = wi_cfg_info
                    ynoi = N.n(name + suffix)
                    yoi = mx.sym.Convolution(xk, woi, **nattr, name=ynoi)
                    cfg_dict[ynoi] = yi_cfg_info
                    nnodes.append(yoi)
                if len(nnodes) > 1:
                    zni = N.n(name + '_add_n_' + str(o))
                    zi = mx.sym.add_n(*nnodes, name=zni)
                    cfg_dict[zni] = yi_cfg_info
                else:
                    zi = nnodes[0]
                nodes.append(zi)
            assert len(nodes) > 1
            op = mx.sym.concat(*nodes, dim=1, name=name)
        return op