def _quantize_operator(self, X, oprec, oscale=None, **kwargs): """ Symmetric Quantization of symbol expansion (int value) """ logger = kwargs.get("logger", logging.getLogger("log.mrt.realize")) params, features = kwargs["params"], kwargs["features"] precs, buffers = kwargs["precs"], kwargs["buffers"] graph, shift_bits = kwargs["graph"], kwargs["shift_bits"] xn, xopn = X.attr("name"), X.attr("op_name") xqn = N.n(xn) oprec = precs[xn].get(kwargs['oname'], oprec) iscale, iprec = buffers[xn].get(), precs[xn][OUT_KEY] ft = features[xn] absmax = ft.get() if absmax == 0: return X, 1, 1 if oscale is None else oscale exactly = oscale is not None oscale = self.get_scale(oprec, ft) if oscale is None else oscale sb = iprec - oprec if sb > shift_bits: iprec -= sb X = tutils.realize(X, sb, iprec) iscale = iscale / (2**sb) if exactly or iprec > oprec: rescale = oscale / iscale bits = MAX_BIT - iprec frac, exp = sim.cvm_float(rescale, bits) sim_scale = frac * (2**exp) scale_err = abs((sim_scale - rescale) / rescale) if scale_err > 0.001: logger.warn( "Operator %-20s name=%-40s quantize with sb=%s" + " scale=%s, error=%s", xopn, xn, sb, iscale, scale_err) oscale = iscale * frac * (2**exp) if frac > 1: var = sutils.nd_const(frac, graph, params) X = mx.sym.broadcast_mul(X, var, name=N.n("mrt_quantize_scale")) oprec = self.get_prec(oscale * absmax) X = tutils.realize(X, -exp, oprec) logger.debug( "Operator %-20s name=%-40s requantize" + " with scale=%-16.8f<%d, %d>" + " iprec=%s, iscale=%-10.5f, oprec=%s, oscale=%-10.5f", xopn, xn, rescale, frac, exp, iprec, iscale, oprec, oscale) else: oprec, oscale = iprec, iscale logger.debug( "Operator %-20s name=%-40s clip with iprec=%s, oprec=%s", xopn, xn, iprec, oprec) return X, oprec, oscale
def _quantize_parameter(self, W, oprec, oscale=None, **kwargs): """ Symmetric Quantization of weight (real value) """ logger = logging.getLogger("log.mrt.realize") params, features = kwargs["params"], kwargs["features"] precs = kwargs['precs'] wn = W.attr("name") wqn = N.n(wn) oprec = precs[wn].get(kwargs['oname'], oprec) ft = features[wn] absmax = ft.get() if absmax == 0: oprec, oscale = 1, 1 if oscale is None else oscale params[wqn] = sutils.nd_zeros(params[wn].shape) else: oscale = self.get_scale(oprec, ft) if oscale is None else oscale params[wqn], oprec = self.int_realize(params[wn] * oscale, oprec, logger=logger) attr = {"precision": str(oprec)} # TODO: CVM precision update # attr = {"precision": "int"+str(oprec)} W = mx.sym.var(wqn, shape=params[wqn].shape, attr=attr) return W, oprec, oscale
def verify_batch_dot(ashp, bshp, transpose_a, transpose_b): A_np = np.random.uniform(size=ashp) B_np = np.random.uniform(size=bshp) A = nd.array(A_np) B = nd.array(B_np) # org op y = nd.batch_dot(A, B, transpose_a, transpose_b) # rewrite op andims, bndims = len(ashp), len(bshp) assert andims == 3 and bndims == 3, \ "batch_dot currently only support 3D*3D array." + \ "name: (%s), op_name: (%s)" % (name, op_name) if transpose_a: ashp = ashp[:-2] + (ashp[-1], ashp[-2]) axes = tuple(range(andims - 2)) + (andims - 1, andims - 2) A = nd.transpose(A, axes=axes, name=N.n("transpose_a")) if transpose_b: bshp = bshp[:-2] + (bshp[-1], bshp[-2]) bndims = len(bshp) axes = tuple(range(bndims - 2)) + (bndims - 1, bndims - 2) B = nd.transpose(B, axes=axes, name=N.n("transpose_b")) assert ashp[-1] == bshp[1] C, MATRIX_MAXIMUM_SIZE = ashp[-1], 4096 if ashp[-1] <= MATRIX_MAXIMUM_SIZE: op = nd.batch_dot(A, B, name=N.n("batch_dot")) else: C, nodes, step, start = \ ashp[-1], [], MATRIX_MAXIMUM_SIZE, 0 while start < C: stop = min(start + step, C) begin, end = (0, 0, start), (ashp[0], ashp[1], stop) Ak = nd.slice(A, begin=begin, end=end, name=N.n("slice_a")) begin, end = (0, start, 0), (bshp[0], stop, bshp[2]) Bk = nd.slice(B, begin=begin, end=end, name=N.n("slice_b")) tmp = nd.batch_dot(Ak, Bk, name=N.n("batch_dot")) nodes.append(tmp) start += step while len(nodes) > 1: A, B = nodes.pop(0), nodes.pop(0) tmp = nd.elemwise_add(A, B, name=N.n("elemwise_add")) nodes.append(tmp) op = nodes[0] z = op # compare assert z.shape == y.shape zn, zp = get_norm(z) yn, yp = get_norm(y) rn = np.linalg.norm(zp - yp) print(zn, yn, rn)
def _quantize_operator(self, X, oprec, oscale=None, **kwargs): logger = kwargs.get("logger", logging.getLogger("log.mrt.realize")) params, features = kwargs["params"], kwargs["features"] precs, buffers = kwargs["precs"], kwargs["buffers"] graph, shift_bits = kwargs["graph"], kwargs["shift_bits"] xn, xopn = X.attr("name"), X.attr("op_name") xqn = N.n(xn) oprec = precs[xn].get(kwargs['oname'], oprec) iscale, iprec = buffers[xn].get(), precs[xn][OUT_KEY] minv, maxv = features[wn].get() oscale = (2**(oprec) - 1) / (maxv - minv) if oscale is None else oscale zpoint = round(minv * iscale) X = mx.sym.broadcast_sub(X, zpoint, name=N.n('minus_zp')) sb = iprec - oprec if sb > shift_bits: iprec -= sb X = tutils.realize(X, sb, iprec) iscale = iscale / (2**sb) rescale = oscale / iscale bits = MAX_BIT - iprec frac, exp = sim.cvm_float(rescale, bits) sim_scale = frac * (2**exp) scale_err = abs((sim_scale - rescale) / rescale) if scale_err > 0.001: logger.warn( "Operator %-20s name=%-40s quantize with sb=%s" + " scale=%s, error=%s", xopn, xn, sb, iscale, scale_err) oscale = iscale * frac * (2**exp) if frac > 1: var = sutils.nd_const(frac, graph, params) X = mx.sym.broadcast_mul(X, var, name=N.n("mrt_quantize_scale")) Zp = sutils.nd_const(zpoint, graph, params) X = mx.sym.broadcast_sub(X, Zp, name=N.n('minus_zp')) oprec = self.get_prec(oscale * (maxv - minv)) X = tutils.realize(X, -exp, oprec) logger.debug( "Operator %-20s name=%-40s requantize" + " with scale=%-16.8f<%d, %d>" + " iprec=%s, iscale=%-10.5f, oprec=%s, oscale=%-10.5f", xopn, xn, rescale, frac, exp, iprec, iscale, oprec, oscale) return X, oprec, oscale, zpoint
def _quantize_table(op, **kwargs): params, graph = kwargs['params'], kwargs['graph'] features, precs, buffers = \ kwargs['features'], kwargs['precs'], kwargs['buffers'] cfg_dict = kwargs['cfg_dict'] name, op_name = op.attr('name'), op.attr('op_name') childs = sym_iter(op.get_children()) cns = [c.attr('name') for c in childs] if childs else [] xquant_type = cfg_dict[cns[0]]['quant_type'] xquant = get_quantizer(xquant_type) iprec = kwargs['op_input_precs'][op_name] xs = scale_exp(features[cns[0]].get(), iprec) X, xprec, xs = xquant.quantize(childs[0], iprec, oscale=xs, oname=name, **kwargs) alpha = get_range_exp(xprec) var = nd_const(alpha, graph, params) X = mx.sym.broadcast_add(X, var, name=N.n(op_name + '_offset')) out = sutils.get_nd_op(op_name)(sutils.nd_arange(-alpha, alpha + 1) / xs) oprec = precs[name].get(OUT_KEY, 16) oscale = scale_exp(out.abs().max().asscalar(), oprec) buffers[name] = SBuffer(oscale) W_name = N.n("cvm_lut_weight") params[W_name] = weight = (out * oscale).round().reshape(2 * alpha + 1, 1) wattr = {'precision': str(oprec)} W = graph[W_name] = mx.sym.var(W_name, shape=weight.shape, attr=wattr) op = mx.sym.Custom(X, W, in_dim=2 * alpha + 1, name=name, op_type='cvm_lut') precs[name][OUT_KEY] = oprec logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op
def _quantize_scale(op, **kwargs): features, precs = kwargs['features'], kwargs['precs'] buffers, cfg_dict = kwargs['buffers'], kwargs['cfg_dict'] name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sym_iter(op.get_children()) cns = [c.attr('name') for c in childs] if childs else [] assert all([features[cn].name == FT_TYPE_EXP for cn in cns]) absmax = max([features[cn].get() for cn in cns]) oprec = kwargs['op_input_precs'][op_name] oscale = scale_exp(absmax, oprec) buffers[name] = SBuffer(oscale) nodes, cprecs = [], [] assert all([cfg_dict[cn]['quant_type'] == \ USQuantizer.name for cn in cns]) quant = get_quantizer(USQuantizer.name) for c in childs: c, cprec, _ = quant.quantize(c, oprec, oscale=oscale, oname=name, **kwargs) cprecs.append(cprec) nodes.append(c) if op_name in [ Concat.op_name, BroadcastAdd.op_name, ElemwiseAdd.op_name, ElemwiseSub.op_name, SliceLike.op_name ]: op = get_mxnet_op(op_name)(*nodes, **attr, name=name) infer_prec = max(cprecs) if op_name == Concat.op_name \ else max(cprecs)+1 elif op_name == AddN.op_name: while len(nodes) > 1: tname = N.n('elemwise_add') if len(nodes) > 2 else name a, b = nodes.pop(0), nodes.pop(0) tmp = mx.sym.elemwise_add(a, b, name=tname) nodes.append(tmp) kprec = get_bit_cnt_exp(len(nodes)) infer_prec = max(cprecs) + kprec op = nodes[0] else: raise NotImplementedError( "symbol merge function of op_name: %s has not been " + \ "implemented, name: %s", op_name, name) precs[name][OUT_KEY] = infer_prec logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op
def _realize_ch(self, X, sbs, precs, name=None): name = name if name else N.n('realize_ch') attrs = { "sbs": ','.join([str(sb) for sb in sbs]), "precs": ','.join([str(prec) for prec in precs]), "op_type": "cvm_right_shift_channel", } if all([sb > 0 for sb in sbs]): sym = mx.sym.Custom(X, name=name, **attrs) else: raise NotImplementedError( "realize_ch has not be implemented for sbs: {}".format(sbs)) return sym
def _separate_bias(op, **kwargs): name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sutils.sym_iter(op.get_children()) if childs and len(childs) < 3 or op_name not in \ [Convolution.op_name, FullyConnected.op_name]: return op attr['no_bias'] = True op = sutils.get_mxnet_op(op_name)(childs[0], childs[1], **attr, name=N.n(name)) bn = childs[2].attr('name') if op_name == Convolution.op_name: if 'layout' in attr: assert attr['layout'] == 'NCHW' B = mx.sym.expand_dims(childs[2], axis=0, name=N.n('expand_dims')) B = mx.sym.expand_dims(B, axis=-1, name=N.n('expand_dims')) B = mx.sym.expand_dims(B, axis=-1, name=N.n(bn)) else: B = mx.sym.expand_dims(childs[2], axis=0, name=N.n(bn)) op = mx.sym.broadcast_add(op, B, name=name) return op
def sym_slice(X, ichannel, step, **kwargs): name = X.attr('name') shp = kwargs['infer_shapes'][name][get_entry_id(X)] ndims = len(shp) nodes = [] rchannel = ndims - ichannel - 1 for i in range(0, shp[ichannel], step): suffix = '_' + str(i) + '-' + str(i + step) Xi = mx.sym.slice( X, begin=(None, ) * ichannel + (i, ) + (None, ) * rchannel, end=(None, ) * ichannel + (i + step, ) + (None, ) * rchannel, name=N.n(name + suffix)) nodes.append(Xi) return nodes
def kernel_slice_2d(W, **kwargs): name = W.attr('name') shp = kwargs['infer_shapes'][name][get_entry_id(W)] OC, IC = shp[:2] nodes = [] for o in range(OC): Wo = mx.sym.slice(W, begin=(o, None, None, None), end=(o + 1, None, None, None)) nnodes = [] for i in range(IC): suffix = '_' + str(o) + '-' + str(i) Woi = mx.sym.slice(Wo, begin=(None, i, None, None), end=(None, i + 1, None, None), name=N.n(name + suffix)) nnodes.append(Woi) nodes.append(nnodes[:]) return nodes
def _quantize_parameter(self, W, oprec, oscale=None, **kwargs): logger = logging.getLogger("log.mrt.realize") params, features = kwargs["params"], kwargs["features"] precs = kwargs['precs'] graph = kwargs['graph'] wn = W.attr("name") wqn = N.n(wn) oprec = precs[wn].get(kwargs['oname'], oprec) minv, maxv = features[wn].get() oscale = (2**(oprec) - 1) / (maxv - minv) if oscale is None else oscale zpoint = minv params[wqn], oprec = self.int_realize(nd.relu( (params[wn] - zpoint) * oscale), oprec, logger=logger) attr = {"precision": str(oprec)} # TODO: CVM precision update # attr = {"precision": "uint"+str(oprec)} W = mx.sym.var(wqn, shape=params[wqn].shape, attr=attr) return W, oprec, oscale, zpoint
def _separate_pad(op, **kwargs): name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sutils.sym_iter(op.get_children()) if op_name not in [Convolution.op_name]: return op if 'layout' in attr: assert attr['layout'] == 'NCHW' PH, PW = sutils.get_attr(attr, 'pad', (0, 0)) if 'pad' in attr: del attr['pad'] if PH == 0 and PW == 0: return sutils.get_mxnet_op(op_name)(*childs, **attr, name=name) childs[0] = mx.sym.pad(childs[0], pad_width=(0, 0, 0, 0, PH, PH, PW, PW), mode='constant', constant_value=0, name=N.n('pad')) op = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name) return op
def _quant(op, **kwargs): op = apply_pass("quantize", infer_shapes=kwargs['infer_shapes'], features=kwargs['features'], cfg_dict=kwargs['cfg_dict'], )(op, **kwargs) if op.attr('name') not in restore_names \ else restore(op, **kwargs) if is_var(op, kwargs['params']): return op name = op.attr('name') features, buffers = kwargs['features'], kwargs['buffers'] precs = kwargs['precs'] ft = features[name] absmax = ft.get_threshold() name, op_name = op.attr('name'), op.attr('op_name') buf = buffers[name] assert buf.name == BUF_TYPE_EXP scale = buf.get() tight_prec = get_bit_exp(absmax * scale) if precs[name][OUT_KEY] > tight_prec: op = mx.sym.Custom(op, precision=tight_prec, name=N.n('clip'), op_type='cvm_clip') clip_name = op.attr('name') infer_shapes[clip_name] = infer_shapes[name] features[clip_name] = ft precs[clip_name] = {OUT_KEY: tight_prec} if name in precs and name in precs[name]: oprec = precs[name][name] del precs[name][name] precs[clip_name][clip_name] = oprec buffers[clip_name] = buf cfg_dict[clip_name] = cfg_dict[name] return op
def _quantize_parameter(self, W, oprec, num_groups=None, **kwargs): """ Groupwise Convolution Quantizer weight (real value) """ params, features = kwargs['params'], kwargs['features'] logger = logging.getLogger("log.mrt.realize") precs = kwargs['precs'] wn = W.attr('name') data = params[wn] shp = data.shape step = shp[0] // num_groups prm_slices = [ params[wn].slice(begin=(i, None, None, None), end=(i + step, None, None, None)) for i in range(0, shp[0], step) ] oprec = precs[wn].get(kwargs['oname'], oprec) ft = features[wn] absmax_list = ft.get() wprec_list, wscale_list, prm_list = [], [], [] for i, absmax in enumerate(absmax_list): if absmax == 0: wprec, wscale = 1, 1 prm = sutils.nd_zeros((step, ) + shp[1:]) else: tmp_ft = AFeature(absmax) wscale = self.get_scale(oprec, tmp_ft) prm, wprec = self.int_realize(prm_slices[i] * wscale, oprec, logger=logger) wprec_list.append(wprec) wscale_list.append(wscale) prm_list.append(prm) prm = nd.concat(*prm_list, dim=0) W = mx.sym.var(N.n(wn), shape=prm.shape) return W, wprec_list, wscale_list
def _quantize_operator(self, X, oprec, num_groups=None, **kwargs): """ Groupwise Convolution Quantizer symbol expansion (int version) """ logger = kwargs.get('logger', logging.getLogger('log.mrt.realize')) params, features = kwargs['params'], kwargs['features'] precs, buffers = kwargs['precs'], kwargs['buffers'] graph, shift_bits = kwargs['graph'], kwargs['shift_bits'] xn, xopn = X.attr('name'), X.attr('op_name') oprec = precs[xn].get(kwargs['oname'], oprec) iscale, iprec = buffers[xn].get(), precs[xn][OUT_KEY] ft = features[xn] absmax_list = ft.get() oscale_list = [] for absmax in absmax_list: if absmax == 0: oscale_list.append(None) else: tmp_ft = AFeature(absmax) oscale = self.get_scale(oprec, tmp_ft) oscale_list.append(oscale) sb = iprec - oprec if sb > shift_bits: iprec -= sb X = tutils.realize(X, sb, iprec) iscale = iscale / (2**sb) xprec_list, xscale_list, sb_list, var_list = [], [], [], [] if iprec > oprec: for i, absmax in enumerate(absmax_list): if absmax == 0: xprec_list.append(1) xscale_list.append(1) sb_list.append(1) var_list.append(sutils.nd_const(1, graph, params)) else: rescale = oscale_list[i] / iscale bits = MAX_BIT - iprec frac, exp = sim.cvm_float(rescale, bits) sim_scale = frac * (2**exp) scale_err = abs((sim_scale - rescale) / rescale) if scale_err > 0.001: logger.warn( "Operator %-20s name=%-40s quantize with sb=%s" + " scale=%s, error=%s", xopn, xn, sb, iscale, scale_err) xscale = iscale * frac * (2**exp) if frac > 1: var = sutils.nd_const(frac, graph, params) # X = mx.sym.broadcast_mul( # X, var, name=N.n("mrt_quantize_scale")) else: var = sutils.nd_const(1, graph, params) xprec = self.get_prec(xscale * absmax) # X = tutils.realize(X, -exp, xprec) logger.debug( "Operator %-20s name=%-40s slice %s requantize" + " with scale=%-16.8f<%d, %d>" + " iprec=%s, iscale=%-10.5f, xprec=%s, xscale=%-10.5f", xopn, xn, i, rescale, frac, exp, iprec, iscale, xprec, xscale) xprec_list.append(xprec) xscale_list.append(xscale) sb_list.append(-exp) var_list.append(var) # broadcast_mul list of frac xshp = kwargs['infer_shapes'][xn][sutils.get_entry_id(X)] frac = mx.sym.concat(*var_list, name=N.n('concat_mul_frac')) frac = mx.sym.reshape(frac, shape=(1, xshp[1], 1, 1), name=N.n('reshape_mul_frac')) X = mx.sym.broadcast_mul(X, frac, name=N.n('mrt_quantize_scale')) # realize X = self._realize_ch(X, sb_list, xprec_list) else: xprec_list = [ iprec if absmax == 0 else 1 for absmax in absmax_list ] xscale_list = [ iscale if absmax == 0 else 1 for absmax in absmax_list ] logger.debug( "Operator %-20s name=%-40s clip with iprec=%s, oprec=%s", xopn, xn, iprec, oprec) return X, xprec_list, xscale_list
def _quantize_scale_zp(op, **kwargs): features, precs = kwargs['features'], kwargs['precs'] buffers, cfg_dict = kwargs['buffers'], kwargs['cfg_dict'] graph, params = kwargs['graph'], kwargs['params'] name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sym_iter(op.get_children()) cns = [c.attr('name') for c in childs] if childs else [] oprec = kwargs['op_input_precs'][op_name] oscales = [] for c in childs: cquant_type = cfg_dict[c.attr('name')]['quant_type'] cquant = get_quantizer(cquant_type) ft = features[c.attr('name')] oscale = cquant.get_scale(oprec, ft) oscales.append(oscale) oscale = min(oscales) buffers[name] = SBuffer(oscale) nodes, cprecs = [], [] for c in childs: cquant_type = cfg_dict[c.attr('name')]['quant_type'] cquant = get_quantizer(cquant_type) if cquant.name == USQuantizer.name: c, cprec, _ = cquant.quantize(c, oprec, oscale=oscale, oname=name, **kwargs) elif cquant.name == UAQuantizer.name: c, cprec, cscale, czpoint = cquant.quantize(c, oprec, oscale=oscale, oname=name, **kwargs) czint = round(czpoint * cscale) Cz = nd_const(czint, graph, params) nodes.append(Cz) cprecs.append(get_bit_exp(czint)) cprecs.append(cprec) nodes.append(c) if op_name in [Concat.op_name]: op = get_mxnet_op(op_name)(*nodes, **attr, name=name) infer_prec = max(cprecs) elif op_name in [BroadcastAdd.op_name]: while len(nodes) > 1: tname = N.n('broadcast_add') if len(nodes) > 2 else name a, b = nodes.pop(0), nodes.pop(0) tmp = mx.sym.broadcast_add(a, b, name=tname) nodes.append(tmp) kprec = get_bit_cnt_exp(len(nodes)) infer_prec = max(cprecs) + kprec op = nodes[0] elif op_name in [AddN.op_name]: while len(nodes) > 1: tname = N.n('elemwise_add') if len(nodes) > 2 else name a, b = nodes.pop(0), nodes.pop(0) tmp = mx.sym.elemwise_add(a, b, name=tname) nodes.append(tmp) kprec = get_bit_cnt_exp(len(nodes)) infer_prec = max(cprecs) + kprec op = nodes[0] else: raise NotADirectoryError( "symbol merge function of op_name: %s has not been " + \ "implemented, name: %s", op_name, name) precs[name][OUT_KEY] = infer_prec logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op
def quantize(self, op, **kwargs): params, graph = kwargs['params'], kwargs['graph'] buffers, precs = kwargs['buffers'], kwargs['precs'] features, cfg_dict = kwargs['features'], kwargs['cfg_dict'] name, op_name = op.attr('name'), op.attr('op_name') childs, attr = sym_iter(op.get_children()), op.list_attr() cns = [c.attr('name') for c in childs] if childs else [] oprec = kwargs['op_input_precs'][op_name] th = features[cns[0]].get() xs = scale_exp(th, oprec) quant_type = cfg_dict[cns[0]]['quant_type'] assert quant_type == USQuantizer.name quant = get_quantizer(quant_type) X, xprec, xs = quant.quantize(childs[0], oprec, oscale=xs, oname=name, **kwargs) axis = get_attr(attr, 'axis', -1) lambd = kwargs['softmax_lambd'] alpha = int(lambd * xs) var = nd_const(alpha, graph, params) max_axis = mx.sym.max(X, axis=axis, keepdims=True) offset = mx.sym.broadcast_sub(max_axis, var, name=N.n('softmax_offset')) offset = realize(offset, 0, xprec) norm = mx.sym.broadcast_sub(X, offset, name=N.n('softmax_normalize')) norm = mx.sym.relu(norm, name=N.n('Softmax_filter')) norm = realize(norm, 0, xprec) data = sutils.nd_arange(0, alpha + 1) table = nd.exp(data / xs) tprec = get_bit_exp(math.exp(lambd)) table = nd.clip(table, a_min=0, a_max=get_range_exp(tprec)) W_name = N.n('cvm_lut_weight') params[W_name] = weight = table.round().reshape(alpha + 1, 1) wattr = {'precision': str(tprec)} W = graph[W_name] = mx.sym.var(W_name, shape=weight.shape, attr=wattr) # lut = mx.sym.Custom(norm, W, in_dim=alpha+1, # name=name, op_type='cvm_lut') lut = mx.sym.Custom(norm, W, in_dim=alpha + 1, name=N.n('softmax_lut'), op_type='cvm_lut') sum_lut = mx.sym.sum(lut, axis=axis, keepdims=True, name=N.n("softmax_sum")) oprec = min(15, 31 - tprec) assert oprec > 8, "operator softmax(%s) lambda(%d) is too large" \ % (name, lambd) oscale = get_range_exp(oprec) var_scale = nd_const(oscale, graph, params) prob = mx.sym.broadcast_mul(lut, var_scale, name=N.n("softmax_output_scale")) half_lut = realize(sum_lut, 1, 31) prob = mx.sym.broadcast_add(prob, half_lut, name=N.n("softmax_round")) op = mx.sym.broadcast_div(prob, sum_lut, name=N.n("softmax_prob")) op = op.astype('int32').astype('float32') # op = mx.sym.floor(op) # simulate integer division # op = realize(op, 0, oprec) op = realize(op, 0, oprec, name=name) # oname = op.attr('name') precs[name][OUT_KEY] = oprec # precs[oname] = {OUT_KEY: oprec} # scales[oname] = scales[name] = oscale buffers[name] = SBuffer(oscale) logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op
def quantize(self, op, **kwargs): features, buffers = kwargs['features'], kwargs['buffers'] precs, graph = kwargs['precs'], kwargs['graph'] cfg_dict, params = kwargs['cfg_dict'], kwargs['params'] name, op_name = op.attr('name'), op.attr('op_name') childs, attr = sym_iter(op.get_children()), op.list_attr() cns = [c.attr('name') for c in childs] if childs else [] # assert len(childs) == 2 and 'pad' not in attr assert len(childs) == 2 xquant_type = cfg_dict[cns[0]]['quant_type'] wquant_type = cfg_dict[cns[1]]['quant_type'] X, W = childs xquant, wquant = \ get_quantizer(xquant_type), get_quantizer(wquant_type) oprec = kwargs['op_input_precs'][op_name] if xquant_type == wquant_type == USQuantizer.name: op = _quantize_xw(op, **kwargs) elif xquant_type == USQuantizer.name and \ wquant_type == UAQuantizer.name: Xq, xprec, xscale = xquant.quantize(X, oprec, oname=name, **kwargs) Wq, wprec, wscale, wzpoint = wquant.quantize(W, oprec, oname=name, **kwargs) buffers[name] = get_buffer_exp(xscale * wscale) Ye1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution')) wshp = params[cns[1]].shape pd = int(np.product(wshp[1:])) infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec W1 = nd_full_const(1, wshp, graph, params) Ye2 = mx.sym.Convolution(Xq, W1, **attr, name=N.n('Convolution')) wzint = round(wzpoint * wscale) Wz = nd_const(wzint, graph, params) Ye2 = mx.sym.broadcast_mul(Wz, Ye2, name=N.n('broadcast_mul')) infer_prec2 = get_bit_cnt_exp(pd) + xprec + get_bit_exp(wzint) op = mx.sym.elemwise_add(Ye1, Ye2, name=name) precs[name][OUT_KEY] = max(infer_prec1, infer_prec2) + 1 buffers[name] = get_buffer_exp(xscale * wscale) elif xquant_type == UAQuantizer.name and \ wquant_type == USQuantizer.name: Xq, xprec, xscale, Xzp = xquant.quantize(X, oprec, oname=name, **kwargs) Wq, wprec, wscale = wquant.quantize(W, oprec, oname=name, **kwargs) buffers[name] = get_buffer_exp(xscale * wscale) Y1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution')) wshp = params[cns[1]].shape pd = np.product(wshp[1:]) infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec + 1 xshp = params[cns[0]].shape X1 = nd_full(1, xshp, graph, params) Y2 = mx.sym.Convolution(X1, Wq, **attr, name=N.n('Convolution')) xzp = params[Xzp.attr('name')].asscalar() infer_prec2 = get_bit_cnt_exp(abs(xzp) * pd) + wprec op = mx.sym.elemwise_add(Y1, Y2, name=N.n('elemwise_add')) infer_prec = max(infer_prec1, infer_prec2) + 1 precs[name][OUT_KEY] = infer_prec elif xquant_type == wquant_type == UAQuantizer.name: Xq, xprec, xscale, Xzp = xquant.quantize(X, oprec, oname=name, **kwargs) Wq, wprec, wscale, Wzp = wquant.quantize(W, oprec, oname=name, **kwargs) buffers[name] = get_buffer_exp(xscale * wscale) nodes, infer_precs = [], [] Y1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution')) nodes.append(Y1) wshp = params[cns[1]].shape pd = np.product(wshp[1:]) infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec + 2 infer_precs.append(infer_prec1) W1 = nd_full_const(1, wshp, graph, params) Y2 = mx.sym.Convolution(Xq, W1, **attr, name=N.n('Convolution')) Y2 = mx.sym.broadcast_mul(Wzp, Y2, name=N.n('broadcast_mul')) nodes.append(Y2) wzp = params[Wzp.attr('name')].asscalar() infer_prec2 = get_bit_cnt_exp(abs(wzp) * pd) + xprec + 1 infer_precs.append(infer_prec2) xshp = params[cns[0]].shape X1 = nd_full_const(1, xshp, graph, params) Y3 = mx.sym.Convolution(X1, Wq, graph, params) Y3 = mx.sym.broadcast_mul(Xzp, Y3, name=N.n('broadcast_mul')) nodes.append(Y3) xzp = params[Xzp.attr('name')].asscalar() infer_prec3 = get_bit_cnt_exp(abs(xzp) * pd) + wprec + 1 infer_precs.append(infer_prec3) val = pd * abs(xzp) * abs(wzp) Y4 = nd_const(val, graph, params) nodes.append(Y4) infer_prec4 = get_bit_cnt_exp(val) infer_precs.append(infer_prec4) while len(nodes) > 1: a, b = nodes.pop(), nodes.pop() node = mx.sym.broadcast_add(a, b, name=N.n('broadcast_add')) nodes.append(node) op = nodes[0] infer_prec = max(infer_precs) + 2 precs[name][OUT_KEY] = infer_prec elif xquant_type == GroupConvQuant.name and \ wquant_type == GroupConvQuant.name: num_groups_x = cfg_dict[cns[0]]['gn_info']['num_groups'] num_groups_w = cfg_dict[cns[1]]['gn_info']['num_groups'] assert num_groups_x == num_groups_w, \ "num_groups of x and weight should be equal, " + \ "num_groups of x: {}, num_groups of weight: {}".format( num_groups_x, num_groups_w) Xq, xprec_list, xscale_list = xquant.quantize( X, oprec, oname=name, num_groups=num_groups_x, **kwargs) Wq, wprec_list, wscale_list = wquant.quantize( W, oprec, oname=name, num_groups=num_groups_w, **kwargs) op = get_mxnet_op(op_name)(Xq, Wq, **attr, name=name) IPG = kwargs['infer_shapes'][cns[1]][get_entry_id(X)][1] kprec = get_bit_cnt_exp(IPG) infer_prec_list = [ kprec + wprec_list[i] + xprec_list[i] \ for i in range(len(wprec_list)) ] oscale_list = [ xscale_list[i] * wscale_list[i] \ for i in range(len(wscale_list)) ] assert False, "implementing..." else: raise NotImplementedError( "Quantization type not implementated," + \ " op: {}, Xquant: {}, Wquant: {}".format( op_name, xquant_type, wquant_type)) logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op
def slice_channel(self, op, **kwargs): name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sym_iter(op.get_children()) cns = [c.attr('name') for c in childs] cfg_dict = kwargs['cfg_dict'] infer_shapes = kwargs['infer_shapes'] gn_info = cfg_dict[name]['gn_info'] ichannel, step = gn_info['ichannel'], gn_info['step'] assert ichannel == 1 assert len(childs) == 2 X, W = childs xshp = infer_shapes[cns[0]][get_entry_id(childs[0])] wshp = infer_shapes[cns[1]][get_entry_id(childs[1])] oshp = infer_shapes[name][get_entry_id(op)] assert len(xshp) == len(wshp) == 4 and xshp[1] % step == 0 xi_cfg_info, wi_cfg_info = cfg_dict[cns[0]], cfg_dict[cns[1]] xi_cfg_info['gn_info'] = {'gn_type': LAYER_WISE_TYPE} wi_cfg_info['gn_info'] = {'gn_type': LAYER_WISE_TYPE} yi_cfg_info = { 'gn_info': { 'gn_type': LAYER_WISE_TYPE }, 'quant_type': US_QUANT_TYPE, 'opt_info': cfg_dict[name]['opt_info'], } num_group = eval(attr['num_group']) C, IC, OC = xshp[1], wshp[1], wshp[0] assert num_group * IC == C and OC >= num_group and OC % num_group == 0 if num_group == 1: xs = sym_slice(X, ichannel, step, **kwargs) ws = sym_slice(W, ichannel, step, **kwargs) nodes = [] j = 0 for i in range(0, C, step): suffix = '_' + str(i) + '-' + str(i + step) xni = xs[j].attr('name') cfg_dict[xni] = xi_cfg_info wni = ws[j].attr('name') cfg_dict[wni] = wi_cfg_info yni = N.n(name + suffix) Yi = get_mxnet_op(op_name)(xs[j], ws[j], **attr, name=yni) cfg_dict[yni] = yi_cfg_info nodes.append(Yi) j += 1 assert len(nodes) > 1 op = mx.sym.add_n(*nodes, name=name) # # transpose and reshape weight # Wt = mx.sym.transpose(W, axes=(1,0,2,3), name=N.n('transpose')) # rshp = (OC*IC,1,) + wshp[2:] # wrn = N.n('reshape') # cfg_dict[wrn] = wi_cfg_info # Wr = mx.sym.reshape(Wt, shape=rshp, name=wrn) # # groupwise convolution # nattr = attr.copy() # nattr['num_group'] = IC # nattr['num_filter'] = IC * OC # conv_name = N.n('groupwise_convolution') # cfg_dict[conv_name] = yi_cfg_info # print(nattr, name) # op = mx.sym.Convolution(X, Wr, **nattr, name=conv_name) # # reshape output # rname = N.n('reshape') # cfg_dict[rname] = yi_cfg_info # rshp = (-1, IC, OC,) + oshp[2:] # op = mx.sym.reshape(op, shape=rshp, name=rname) # # sum # sum_name = N.n('sum') # cfg_dict[sum_name] = yi_cfg_info # op = mx.sym.sum(op, axis=1, keepdims=False, name=sum_name) else: assert step == 1 xs = sym_slice(X, ichannel, step, **kwargs) ws = kernel_slice_2d(W, **kwargs) OPG = OC // num_group nattr = attr.copy() nattr['num_group'] = '1' nattr['num_filter'] = '1' nodes = [] for o in range(OC): nnodes = [] j = int(o / OPG) * IC for i in range(IC): suffix = '_' + str(o) + '-' + str(i) k = i + j xk, woi = xs[k], ws[o][i] xnk, wnoi = xk.attr('name'), woi.attr('name') cfg_dict[xnk] = xi_cfg_info cfg_dict[wnoi] = wi_cfg_info ynoi = N.n(name + suffix) yoi = mx.sym.Convolution(xk, woi, **nattr, name=ynoi) cfg_dict[ynoi] = yi_cfg_info nnodes.append(yoi) if len(nnodes) > 1: zni = N.n(name + '_add_n_' + str(o)) zi = mx.sym.add_n(*nnodes, name=zni) cfg_dict[zni] = yi_cfg_info else: zi = nnodes[0] nodes.append(zi) assert len(nodes) > 1 op = mx.sym.concat(*nodes, dim=1, name=name) return op