def quantize(self, op, **kwargs): features, buffers = kwargs['features'], kwargs['buffers'] cfg_dict = kwargs['cfg_dict'] name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sym_iter(op.get_children()) cns = [c.attr('name') for c in childs] xquant_type = cfg_dict[cns[0]]['quant_type'] wquant_type = cfg_dict[cns[1]]['quant_type'] xquant, wquant = \ get_quantizer(xquant_type), get_quantizer(wquant_type) oprec = kwargs['op_input_precs'][op_name] if xquant_type == wquant_type == USQuantizer.name: X, xs = childs[0], buffers[cns[0]] if xs != 1: X, _, _ = xquant.quantize(X, 32, oscale=1, oname=name, **kwargs) W, _, ws = wquant.quantize(childs[1], oprec, oname=name, **kwargs) features[name] = features[cns[1]] buffers[name] = SBuffer(ws) kwargs['precs'][name][OUT_KEY] = get_bit_exp(features[name].get() * ws) op = get_mxnet_op(op_name)(X, W, **attr, name=name) else: raise NotImplementedError( "Quantization type not implementated," + \ " op: %20s, Xquant: %20s, Wquant: %20s", op_name, [xquant_type, wquant_type]) return op
def quantize(self, op, **kwargs): infer_shapes = kwargs['infer_shapes'] buffers = kwargs['buffers'] cfg_dict = kwargs['cfg_dict'] name, op_name = op.attr('name'), op.attr('op_name') childs, attr = sym_iter(op.get_children()), op.list_attr() cns = [c.attr('name') for c in childs] if childs else [] oshp = infer_shapes[name][get_entry_id(op)] quant_type = cfg_dict[cns[0]]['quant_type'] assert quant_type == USQuantizer.name, (quant_type, name, op_name) quant = get_quantizer(quant_type) oprec = kwargs['op_input_precs'][op_name] X, xprec, xs = quant.quantize(childs[0], oprec, oname=name, **kwargs) buffers[name] = SBuffer(xs) op = get_mxnet_op(op_name)(X, **attr, name=name) ishp = infer_shapes[cns[0]][get_entry_id(childs[0])] k = int(nd.prod(nd_array(ishp)).asscalar() / \ nd.prod(nd_array(oshp)).asscalar()) kprec = get_bit_cnt_exp(k) infer_prec = kprec + xprec kwargs['precs'][name][OUT_KEY] = infer_prec logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op
def mergefunc(node, params, graph): name, op_name = node.attr('name'), node.attr('op_name') childs, attr = sutils.sym_iter( node.get_children()), node.list_attr() if op_name in attribute_deps: attr_deps = attribute_deps[op_name] for attr_name, v in attr_deps.items(): val = sutils.get_attr(attr, attr_name, 0) attr[attr_name] = int(val * mrt_oscales[name_idx[v]]) node = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name) return node
def _quantize_scale(op, **kwargs): features, precs = kwargs['features'], kwargs['precs'] buffers, cfg_dict = kwargs['buffers'], kwargs['cfg_dict'] name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sym_iter(op.get_children()) cns = [c.attr('name') for c in childs] if childs else [] assert all([features[cn].name == FT_TYPE_EXP for cn in cns]) absmax = max([features[cn].get() for cn in cns]) oprec = kwargs['op_input_precs'][op_name] oscale = scale_exp(absmax, oprec) buffers[name] = SBuffer(oscale) nodes, cprecs = [], [] assert all([cfg_dict[cn]['quant_type'] == \ USQuantizer.name for cn in cns]) quant = get_quantizer(USQuantizer.name) for c in childs: c, cprec, _ = quant.quantize(c, oprec, oscale=oscale, oname=name, **kwargs) cprecs.append(cprec) nodes.append(c) if op_name in [ Concat.op_name, BroadcastAdd.op_name, ElemwiseAdd.op_name, ElemwiseSub.op_name, SliceLike.op_name ]: op = get_mxnet_op(op_name)(*nodes, **attr, name=name) infer_prec = max(cprecs) if op_name == Concat.op_name \ else max(cprecs)+1 elif op_name == AddN.op_name: while len(nodes) > 1: tname = N.n('elemwise_add') if len(nodes) > 2 else name a, b = nodes.pop(0), nodes.pop(0) tmp = mx.sym.elemwise_add(a, b, name=tname) nodes.append(tmp) kprec = get_bit_cnt_exp(len(nodes)) infer_prec = max(cprecs) + kprec op = nodes[0] else: raise NotImplementedError( "symbol merge function of op_name: %s has not been " + \ "implemented, name: %s", op_name, name) precs[name][OUT_KEY] = infer_prec logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op
def _separate_pad(op, **kwargs): name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sutils.sym_iter(op.get_children()) if op_name not in [Convolution.op_name]: return op if 'layout' in attr: assert attr['layout'] == 'NCHW' PH, PW = sutils.get_attr(attr, 'pad', (0, 0)) if 'pad' in attr: del attr['pad'] if PH == 0 and PW == 0: return sutils.get_mxnet_op(op_name)(*childs, **attr, name=name) childs[0] = mx.sym.pad(childs[0], pad_width=(0, 0, 0, 0, PH, PH, PW, PW), mode='constant', constant_value=0, name=N.n('pad')) op = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name) return op
def quantize(self, op, **kwargs): precs, buffers = kwargs['precs'], kwargs['buffers'] name, op_name = op.attr('name'), op.attr('op_name') cfg_dict = kwargs['cfg_dict'] childs, attr = sym_iter(op.get_children()), op.list_attr() cns = [c.attr('name') for c in childs] if childs else [] oprec = kwargs['op_input_precs'][op_name] xquant_type, bquant_type = \ cfg_dict[cns[0]]['quant_type'], cfg_dict[cns[1]]['quant_type'] xquant, bquant = \ get_quantizer(xquant_type), get_quantizer(bquant_type) if xquant_type == bquant_type == USQuantizer.name: X, xprec, xs = xquant.quantize(childs[0], oprec, oname=name, **kwargs) B, bprec, bs = bquant.quantize(childs[1], oprec, oname=name, **kwargs) op = get_mxnet_op(op_name)(X, B, **attr, name=name) if bprec == 1 and bs == 1: # special case: childs[1] is 0 buffers[name] = SBuffer(1) precs[name][OUT_KEY] = 1 else: buffers[name] = SBuffer(xs * bs) infer_prec = xprec + bprec precs[name][OUT_KEY] = infer_prec else: raise NotImplementedError( "Quantization type not implementated," + \ " op: %20s, Xquant: %20s, Wquant: %20s", op_name, [xquant_type, bquant_type]) logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op
def restore(op, **kwargs): features, precs, buffers = \ kwargs['features'], kwargs['precs'], kwargs['buffers'] name, op_name = op.attr('name'), op.attr('op_name') childs, attr = sutils.sym_iter(op.get_children()), op.list_attr() childs = [] if childs is None else childs new_childs = [] for c in childs: cname = c.attr('name') sc = buffers[c.attr('name')].get() \ if cname in buffers else 1 new_childs.append(c if sc == 1 else c / sc) out = sutils.get_mxnet_op(op_name)(*new_childs, **attr, name=name) ft = features[name] assert ft.name == FT_TYPE_EXP absmax = features[name].get() precs[name][OUT_KEY] = get_bit_exp(absmax) buffers[name] = get_buffer_exp(1) return out
def _separate_bias(op, **kwargs): name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sutils.sym_iter(op.get_children()) if childs and len(childs) < 3 or op_name not in \ [Convolution.op_name, FullyConnected.op_name]: return op attr['no_bias'] = True op = sutils.get_mxnet_op(op_name)(childs[0], childs[1], **attr, name=N.n(name)) bn = childs[2].attr('name') if op_name == Convolution.op_name: if 'layout' in attr: assert attr['layout'] == 'NCHW' B = mx.sym.expand_dims(childs[2], axis=0, name=N.n('expand_dims')) B = mx.sym.expand_dims(B, axis=-1, name=N.n('expand_dims')) B = mx.sym.expand_dims(B, axis=-1, name=N.n(bn)) else: B = mx.sym.expand_dims(childs[2], axis=0, name=N.n(bn)) op = mx.sym.broadcast_add(op, B, name=name) return op
def _quantize_xw(op, **kwargs): features, buffers = kwargs['features'], kwargs['buffers'] precs = kwargs['precs'] cfg_dict, params = kwargs['cfg_dict'], kwargs['params'] name, op_name = op.attr('name'), op.attr('op_name') childs, attr = sym_iter(op.get_children()), op.list_attr() cns = [c.attr('name') for c in childs] if childs else [] X, W = childs Xquant = get_quantizer(cfg_dict[cns[0]]['quant_type']) Wquant = get_quantizer(cfg_dict[cns[1]]['quant_type']) oprec = kwargs['op_input_precs'][op_name] Xq, xprec, xscale = Xquant.quantize(X, oprec, oname=name, **kwargs) Wq, wprec, wscale = Wquant.quantize(W, oprec, oname=name, **kwargs) buffers[name] = get_buffer_exp(xscale * wscale) op = get_mxnet_op(op_name)(Xq, Wq, **attr, name=name) shp = params[cns[1]].shape k = int(nd.prod(nd_array(shp[1:])).asscalar()) kprec = get_bit_cnt_exp(k) infer_prec = kprec + xprec + wprec precs[name][OUT_KEY] = infer_prec return op
def quantize(self, op, **kwargs): features, buffers = kwargs['features'], kwargs['buffers'] precs, graph = kwargs['precs'], kwargs['graph'] cfg_dict, params = kwargs['cfg_dict'], kwargs['params'] name, op_name = op.attr('name'), op.attr('op_name') childs, attr = sym_iter(op.get_children()), op.list_attr() cns = [c.attr('name') for c in childs] if childs else [] # assert len(childs) == 2 and 'pad' not in attr assert len(childs) == 2 xquant_type = cfg_dict[cns[0]]['quant_type'] wquant_type = cfg_dict[cns[1]]['quant_type'] X, W = childs xquant, wquant = \ get_quantizer(xquant_type), get_quantizer(wquant_type) oprec = kwargs['op_input_precs'][op_name] if xquant_type == wquant_type == USQuantizer.name: op = _quantize_xw(op, **kwargs) elif xquant_type == USQuantizer.name and \ wquant_type == UAQuantizer.name: Xq, xprec, xscale = xquant.quantize(X, oprec, oname=name, **kwargs) Wq, wprec, wscale, wzpoint = wquant.quantize(W, oprec, oname=name, **kwargs) buffers[name] = get_buffer_exp(xscale * wscale) Ye1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution')) wshp = params[cns[1]].shape pd = int(np.product(wshp[1:])) infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec W1 = nd_full_const(1, wshp, graph, params) Ye2 = mx.sym.Convolution(Xq, W1, **attr, name=N.n('Convolution')) wzint = round(wzpoint * wscale) Wz = nd_const(wzint, graph, params) Ye2 = mx.sym.broadcast_mul(Wz, Ye2, name=N.n('broadcast_mul')) infer_prec2 = get_bit_cnt_exp(pd) + xprec + get_bit_exp(wzint) op = mx.sym.elemwise_add(Ye1, Ye2, name=name) precs[name][OUT_KEY] = max(infer_prec1, infer_prec2) + 1 buffers[name] = get_buffer_exp(xscale * wscale) elif xquant_type == UAQuantizer.name and \ wquant_type == USQuantizer.name: Xq, xprec, xscale, Xzp = xquant.quantize(X, oprec, oname=name, **kwargs) Wq, wprec, wscale = wquant.quantize(W, oprec, oname=name, **kwargs) buffers[name] = get_buffer_exp(xscale * wscale) Y1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution')) wshp = params[cns[1]].shape pd = np.product(wshp[1:]) infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec + 1 xshp = params[cns[0]].shape X1 = nd_full(1, xshp, graph, params) Y2 = mx.sym.Convolution(X1, Wq, **attr, name=N.n('Convolution')) xzp = params[Xzp.attr('name')].asscalar() infer_prec2 = get_bit_cnt_exp(abs(xzp) * pd) + wprec op = mx.sym.elemwise_add(Y1, Y2, name=N.n('elemwise_add')) infer_prec = max(infer_prec1, infer_prec2) + 1 precs[name][OUT_KEY] = infer_prec elif xquant_type == wquant_type == UAQuantizer.name: Xq, xprec, xscale, Xzp = xquant.quantize(X, oprec, oname=name, **kwargs) Wq, wprec, wscale, Wzp = wquant.quantize(W, oprec, oname=name, **kwargs) buffers[name] = get_buffer_exp(xscale * wscale) nodes, infer_precs = [], [] Y1 = mx.sym.Convolution(Xq, Wq, **attr, name=N.n('Convolution')) nodes.append(Y1) wshp = params[cns[1]].shape pd = np.product(wshp[1:]) infer_prec1 = get_bit_cnt_exp(pd) + xprec + wprec + 2 infer_precs.append(infer_prec1) W1 = nd_full_const(1, wshp, graph, params) Y2 = mx.sym.Convolution(Xq, W1, **attr, name=N.n('Convolution')) Y2 = mx.sym.broadcast_mul(Wzp, Y2, name=N.n('broadcast_mul')) nodes.append(Y2) wzp = params[Wzp.attr('name')].asscalar() infer_prec2 = get_bit_cnt_exp(abs(wzp) * pd) + xprec + 1 infer_precs.append(infer_prec2) xshp = params[cns[0]].shape X1 = nd_full_const(1, xshp, graph, params) Y3 = mx.sym.Convolution(X1, Wq, graph, params) Y3 = mx.sym.broadcast_mul(Xzp, Y3, name=N.n('broadcast_mul')) nodes.append(Y3) xzp = params[Xzp.attr('name')].asscalar() infer_prec3 = get_bit_cnt_exp(abs(xzp) * pd) + wprec + 1 infer_precs.append(infer_prec3) val = pd * abs(xzp) * abs(wzp) Y4 = nd_const(val, graph, params) nodes.append(Y4) infer_prec4 = get_bit_cnt_exp(val) infer_precs.append(infer_prec4) while len(nodes) > 1: a, b = nodes.pop(), nodes.pop() node = mx.sym.broadcast_add(a, b, name=N.n('broadcast_add')) nodes.append(node) op = nodes[0] infer_prec = max(infer_precs) + 2 precs[name][OUT_KEY] = infer_prec elif xquant_type == GroupConvQuant.name and \ wquant_type == GroupConvQuant.name: num_groups_x = cfg_dict[cns[0]]['gn_info']['num_groups'] num_groups_w = cfg_dict[cns[1]]['gn_info']['num_groups'] assert num_groups_x == num_groups_w, \ "num_groups of x and weight should be equal, " + \ "num_groups of x: {}, num_groups of weight: {}".format( num_groups_x, num_groups_w) Xq, xprec_list, xscale_list = xquant.quantize( X, oprec, oname=name, num_groups=num_groups_x, **kwargs) Wq, wprec_list, wscale_list = wquant.quantize( W, oprec, oname=name, num_groups=num_groups_w, **kwargs) op = get_mxnet_op(op_name)(Xq, Wq, **attr, name=name) IPG = kwargs['infer_shapes'][cns[1]][get_entry_id(X)][1] kprec = get_bit_cnt_exp(IPG) infer_prec_list = [ kprec + wprec_list[i] + xprec_list[i] \ for i in range(len(wprec_list)) ] oscale_list = [ xscale_list[i] * wscale_list[i] \ for i in range(len(wscale_list)) ] assert False, "implementing..." else: raise NotImplementedError( "Quantization type not implementated," + \ " op: {}, Xquant: {}, Wquant: {}".format( op_name, xquant_type, wquant_type)) logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op
def slice_channel(self, op, **kwargs): name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sym_iter(op.get_children()) cns = [c.attr('name') for c in childs] cfg_dict = kwargs['cfg_dict'] infer_shapes = kwargs['infer_shapes'] gn_info = cfg_dict[name]['gn_info'] ichannel, step = gn_info['ichannel'], gn_info['step'] assert ichannel == 1 assert len(childs) == 2 X, W = childs xshp = infer_shapes[cns[0]][get_entry_id(childs[0])] wshp = infer_shapes[cns[1]][get_entry_id(childs[1])] oshp = infer_shapes[name][get_entry_id(op)] assert len(xshp) == len(wshp) == 4 and xshp[1] % step == 0 xi_cfg_info, wi_cfg_info = cfg_dict[cns[0]], cfg_dict[cns[1]] xi_cfg_info['gn_info'] = {'gn_type': LAYER_WISE_TYPE} wi_cfg_info['gn_info'] = {'gn_type': LAYER_WISE_TYPE} yi_cfg_info = { 'gn_info': { 'gn_type': LAYER_WISE_TYPE }, 'quant_type': US_QUANT_TYPE, 'opt_info': cfg_dict[name]['opt_info'], } num_group = eval(attr['num_group']) C, IC, OC = xshp[1], wshp[1], wshp[0] assert num_group * IC == C and OC >= num_group and OC % num_group == 0 if num_group == 1: xs = sym_slice(X, ichannel, step, **kwargs) ws = sym_slice(W, ichannel, step, **kwargs) nodes = [] j = 0 for i in range(0, C, step): suffix = '_' + str(i) + '-' + str(i + step) xni = xs[j].attr('name') cfg_dict[xni] = xi_cfg_info wni = ws[j].attr('name') cfg_dict[wni] = wi_cfg_info yni = N.n(name + suffix) Yi = get_mxnet_op(op_name)(xs[j], ws[j], **attr, name=yni) cfg_dict[yni] = yi_cfg_info nodes.append(Yi) j += 1 assert len(nodes) > 1 op = mx.sym.add_n(*nodes, name=name) # # transpose and reshape weight # Wt = mx.sym.transpose(W, axes=(1,0,2,3), name=N.n('transpose')) # rshp = (OC*IC,1,) + wshp[2:] # wrn = N.n('reshape') # cfg_dict[wrn] = wi_cfg_info # Wr = mx.sym.reshape(Wt, shape=rshp, name=wrn) # # groupwise convolution # nattr = attr.copy() # nattr['num_group'] = IC # nattr['num_filter'] = IC * OC # conv_name = N.n('groupwise_convolution') # cfg_dict[conv_name] = yi_cfg_info # print(nattr, name) # op = mx.sym.Convolution(X, Wr, **nattr, name=conv_name) # # reshape output # rname = N.n('reshape') # cfg_dict[rname] = yi_cfg_info # rshp = (-1, IC, OC,) + oshp[2:] # op = mx.sym.reshape(op, shape=rshp, name=rname) # # sum # sum_name = N.n('sum') # cfg_dict[sum_name] = yi_cfg_info # op = mx.sym.sum(op, axis=1, keepdims=False, name=sum_name) else: assert step == 1 xs = sym_slice(X, ichannel, step, **kwargs) ws = kernel_slice_2d(W, **kwargs) OPG = OC // num_group nattr = attr.copy() nattr['num_group'] = '1' nattr['num_filter'] = '1' nodes = [] for o in range(OC): nnodes = [] j = int(o / OPG) * IC for i in range(IC): suffix = '_' + str(o) + '-' + str(i) k = i + j xk, woi = xs[k], ws[o][i] xnk, wnoi = xk.attr('name'), woi.attr('name') cfg_dict[xnk] = xi_cfg_info cfg_dict[wnoi] = wi_cfg_info ynoi = N.n(name + suffix) yoi = mx.sym.Convolution(xk, woi, **nattr, name=ynoi) cfg_dict[ynoi] = yi_cfg_info nnodes.append(yoi) if len(nnodes) > 1: zni = N.n(name + '_add_n_' + str(o)) zi = mx.sym.add_n(*nnodes, name=zni) cfg_dict[zni] = yi_cfg_info else: zi = nnodes[0] nodes.append(zi) assert len(nodes) > 1 op = mx.sym.concat(*nodes, dim=1, name=name) return op
def _quantize_scale_zp(op, **kwargs): features, precs = kwargs['features'], kwargs['precs'] buffers, cfg_dict = kwargs['buffers'], kwargs['cfg_dict'] graph, params = kwargs['graph'], kwargs['params'] name, op_name = op.attr('name'), op.attr('op_name') attr, childs = op.list_attr(), sym_iter(op.get_children()) cns = [c.attr('name') for c in childs] if childs else [] oprec = kwargs['op_input_precs'][op_name] oscales = [] for c in childs: cquant_type = cfg_dict[c.attr('name')]['quant_type'] cquant = get_quantizer(cquant_type) ft = features[c.attr('name')] oscale = cquant.get_scale(oprec, ft) oscales.append(oscale) oscale = min(oscales) buffers[name] = SBuffer(oscale) nodes, cprecs = [], [] for c in childs: cquant_type = cfg_dict[c.attr('name')]['quant_type'] cquant = get_quantizer(cquant_type) if cquant.name == USQuantizer.name: c, cprec, _ = cquant.quantize(c, oprec, oscale=oscale, oname=name, **kwargs) elif cquant.name == UAQuantizer.name: c, cprec, cscale, czpoint = cquant.quantize(c, oprec, oscale=oscale, oname=name, **kwargs) czint = round(czpoint * cscale) Cz = nd_const(czint, graph, params) nodes.append(Cz) cprecs.append(get_bit_exp(czint)) cprecs.append(cprec) nodes.append(c) if op_name in [Concat.op_name]: op = get_mxnet_op(op_name)(*nodes, **attr, name=name) infer_prec = max(cprecs) elif op_name in [BroadcastAdd.op_name]: while len(nodes) > 1: tname = N.n('broadcast_add') if len(nodes) > 2 else name a, b = nodes.pop(0), nodes.pop(0) tmp = mx.sym.broadcast_add(a, b, name=tname) nodes.append(tmp) kprec = get_bit_cnt_exp(len(nodes)) infer_prec = max(cprecs) + kprec op = nodes[0] elif op_name in [AddN.op_name]: while len(nodes) > 1: tname = N.n('elemwise_add') if len(nodes) > 2 else name a, b = nodes.pop(0), nodes.pop(0) tmp = mx.sym.elemwise_add(a, b, name=tname) nodes.append(tmp) kprec = get_bit_cnt_exp(len(nodes)) infer_prec = max(cprecs) + kprec op = nodes[0] else: raise NotADirectoryError( "symbol merge function of op_name: %s has not been " + \ "implemented, name: %s", op_name, name) precs[name][OUT_KEY] = infer_prec logger = logging.getLogger('log.mrt.realize') logger.debug("operator %-20s name=%-40s oscale=%s, iscale=%s", op_name, name, buffers[name].serialize(), cns) return op