def test_sym_nnvm(batch_size, iter_num): logger = logging.getLogger("log.test.nnvm") logger.info("=== Log Test NNVM ===") sym_file, param_file, ext_file = load_fname("mrt.all.quantize", True) sym, params = mx.sym.load(sym_file), nd.load(param_file) inputs_ext, _ = sim.load_ext(ext_file) val_data = dataset.load_voc(1, 512) val_data_iter = iter(val_data) data, _ = next(val_data_iter) if False: data = sim.load_real_data(data, 'data', inputs_ext) inputs_ext['data']['data'] = data spass.sym_dump_ops(sym, params, inputs_ext, datadir="/data/wlt", ctx=mx.gpu(1), cleanDir=True, ops=[ "broadcast_div0", ]) else: _mrt.std_dump(sym, params, inputs_ext, data, "ssd_ryt", max_num=100)
def cvm_quantize(data, label): data = sim.load_real_data(data, 'data', inputs_ext) res = net3.forward(data.as_in_context(ctx)) qacc_top1.update(label, res) _, top1 = qacc_top1.get() qacc_top5.update(label, res) _, top5 = qacc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5)
def net(data): data = sim.load_real_data(data, 'data', net2_inputs_ext) outs = net2(data.as_in_context(qctx)) outs = [ o.as_in_context(ctx) / oscales[i] for i, o in enumerate(outs) ] return outs
def cvm_quantize(data, label): data = sim.load_real_data(data, 'data', inputs_ext) data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False) res = [net2.forward(d) for d in data] res = nd.concatenate(res) qacc_top1.update(label, res) _, top1 = qacc_top1.get() qacc_top5.update(label, res) _, top5 = qacc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5)
def net(data): tmp = base_graph(data.as_in_context(base_ctx)) tmp = [t.as_in_context(ctx) for t in tmp] tmp = [ sim.load_real_data(tmp[i], n, net3_inputs_ext) for i, n in enumerate(top_names) ] out = net3(*tmp) out = [(t / net3_scales[i]) for i, t in enumerate(out)] print([o[0][0][:] for o in out]) return out
def model_func(data, label): data = sim.load_real_data(data, 'data', inputs_qext) \ if inputs_qext else data data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False) res = [net.forward(d) for d in data] res = nd.concatenate(res) acc_top1.update(label, res) _, top1 = acc_top1.get() acc_top5.update(label, res) _, top5 = acc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5)
def std_dump(sym, params, inputs_ext, data, model_name, is_mxnet=True, batch=False, data_dtype="int8", max_num=20, dump_ops=[]): if not batch: for k, v in inputs_ext.items(): v['shape'] = (1, *v['shape'][1:]) data = data[0].reshape(inputs_ext['data']['shape']) datadir = "/data/std_out/" + model_name os.makedirs(datadir, exist_ok=True) if is_mxnet: data = sim.load_real_data(data, 'data', inputs_ext) inputs_ext['data']['data'] = data spass.sym_dump_layer_outputs(sym, params, inputs_ext, datadir, data_dtype=data_dtype, max_num=max_num, dump_ops=dump_ops, ctx=mx.gpu(0)) sym, params = spass.mxnet_to_nnvm(sym, params, inputs_ext) else: tvm_graph, tvm_params, lib = spass.cvm_build(sym, params, inputs_ext, "/dev/null", "/dev/null", runtime="tvm", target="llvm", dtype="int32") model = graph_runtime.create(tvm_graph, lib, tvm.cpu()) model.set_input(**params) model.set_input("data", data) model.run() np.save(datadir + "/data.npy", data.asnumpy().astype('int8')) for i in range(len(sym.list_output_names())): out = model.get_output(i).asnumpy() np.save("%s/result_%d.npy" % (datadir, i), out) return spass.cvm_build(sym, params, inputs_ext, datadir + "/symbol", datadir + "/params")
def net(data): data = sim.load_real_data(data, 'data', qbase_inputs_ext) outs = net2(data.astype("float64").as_in_context(qctx)) outs = [o.as_in_context(ctx) / oscales2[i] \ for i, o in enumerate(outs)] return outs
def net(data): data = sim.load_real_data(data, 'data', net4_inputs_ext) out = net4(data.as_in_context(ctx)) out = [(t / net4_scales[i]) for i, t in enumerate(out)] return out
def cvm_quantize(data): data = sim.load_real_data(data, 'data', inputs_ext) return net2.forward(data.as_in_context(ctx))
def quantize(data, label): data = sim.load_real_data(data, 'data', mrt.get_inputs_ext()) outs = qgraph(data.as_in_context(ctx)) acc = dataset.validate(qmetric, outs, label) return acc
def validate_model(sym_path, prm_path, ctx, num_channel=3, input_size=224, batch_size=16, iter_num=10, ds_name='imagenet', from_scratch=0, lambd=None, dump_model=False, input_shape=None): from gluon_zoo import save_model flag = [False]*from_scratch + [True]*(2-from_scratch) model_name, _ = path.splitext(path.basename(sym_path)) model_dir = path.dirname(sym_path) input_shape = input_shape if input_shape else \ (batch_size, num_channel, input_size, input_size) logger = logging.getLogger("log.validate.%s"%model_name) if not path.exists(sym_path) or not path.exists(prm_path): save_model(model_name) model = Model.load(sym_path, prm_path) model.prepare(input_shape) # model = init(model, input_shape) print(tpass.collect_op_names(model.symbol, model.params)) data_iter_func = ds.data_iter(ds_name, batch_size, input_size=input_size) data, _ = data_iter_func() # prepare mrt = model.get_mrt() # mrt = MRT(model) # calibrate mrt.set_data(data) prefix = path.join(model_dir, model_name+'.mrt.dict') _, _, dump_ext = utils.extend_fname(prefix, True) if flag[0]: th_dict = mrt.calibrate(lambd=lambd) sim.save_ext(dump_ext, th_dict) else: (th_dict,) = sim.load_ext(dump_ext) mrt.set_th_dict(th_dict) mrt.set_input_prec(8) mrt.set_output_prec(8) if flag[1]: mrt.quantize() mrt.save(model_name+".mrt.quantize", datadir=model_dir) else: mrt = MRT.load(model_name+".mrt.quantize", datadir=model_dir) # dump model if dump_model: datadir = "/data/ryt" model_name = model_name + "_tfm" dump_shape = (1, num_channel, input_size, input_size) mrt.current_model.to_cvm( model_name, datadir=datadir, input_shape=input_shape) data = data[0].reshape(dump_shape) data = sim.load_real_data( data.astype("float64"), 'data', mrt.get_inputs_ext()) np.save(datadir+"/"+model_name+"/data.npy", data.astype('int8').asnumpy()) sys.exit(0) # validate org_model = load_model(Model.load(sym_path, prm_path), ctx) cvm_quantize = load_model( mrt.current_model, ctx, inputs_qext=mrt.get_inputs_ext()) utils.multi_validate(org_model, data_iter_func, cvm_quantize, iter_num=iter_num, logger=logging.getLogger('mrt.validate')) logger.info("test %s finished.", model_name)
# resnet.save_graph(mx.gpu()) # zoo.save_model('resnet50_v1') # zoo.save_model('resnet18_v1') # zoo.save_model('resnet50_v1d_0.86') # zoo.save_model('resnet18_v1b_0.89') # zoo.save_model("resnet50_v2") # exit() # save_data() if False: dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True) sym, params = mx.sym.load(dump_sym), nd.load(dump_params) (inputs_ext, ) = sim.load_ext(dump_ext) data_iter = utils.load_dataset(1) while (1000): data = data_iter.next().data[0] inputs_ext['data']['data'] = sim.load_real_data( data, 'data', inputs_ext) spass.sym_dump_ops(sym, params, inputs_ext, datadir="/data/wlt", ctx=mx.gpu(2)) exit() test_sym_pass(batch_size=16, iter_num=10) # test_sym_pass(batch_size=160, iter_num=1000, quantize=False) # test_sym_nnvm(batch_size=1) # test_performance(16, 10)
def cvm_quantize(data, label): data = sim.load_real_data(data, 'data', inputs_ext) res = net2.forward(data.as_in_context(ctx)) qacc.update(label, res) return "accuracy={:6.2%}".format(qacc.get()[1])
def net(data): data = sim.load_real_data(data, 'data', net2_inputs_ext) tmp = list(net2(data.as_in_context(ctx))) tmp = [t / base_oscales[i] for i, t in enumerate(tmp)] return top_graph(*tmp)
open(quant_sym, "w").write(qsym.tojson()) if False: inputs_ext['data']['shape'] = (38, 1) data = data[:, 0].reshape(38, 1) _mrt.std_dump(qsym, qparams, inputs_ext, data, "trec", batch=True, data_dtype="int32", max_num=1000, dump_ops=["sentimentnet0_embedding0_fwd"]) opg.dump_file("take", [ "/data/std_out/trec/sentimentnet0_embedding0_fwd_0.mrt.dump.in.npy", "/data/std_out/trec/sentimentnet0_embedding0_fwd_1.mrt.dump.in.npy" ], ["/data/std_out/trec/sentimentnet0_embedding0_fwd_0.mrt.dump.out.npy"], "/data/std_out/trec/sentimentnet0_embedding0_fwd.attr") exit() if False: while True: data, _ = next(data_iter) data = sim.load_real_data(data, 'data', inputs_ext) inputs_ext['data']['data'] = data spass.sym_dump_ops(qsym, qparams, inputs_ext, ctx=mx.gpu(3)) exit() utils.multi_eval_accuracy(trec, data_iter_func, quantize, iter_num=1000)
def test_mrt_quant(batch_size=1, iter_num=10): ctx = mx.gpu(3) input_shape = (38, batch_size) inputs = [mx.sym.var('data')] utils.log_init() data_iter = ds.load_trec(batch_size) def data_iter_func(): return next(data_iter) data, label = data_iter_func() sym_path, prm_path = load_fname() model_name, _ = path.splitext(path.basename(sym_path)) model_dir = path.dirname(sym_path) model = Model.load(sym_path, prm_path) model = init(model, input_shape) net1 = model.to_graph(ctx=ctx) def trec(data): res = net1(data.as_in_context(ctx)) return res qsym, qparams, inputs_qext = None, None, None if True: mrt = MRT(model) mrt.set_data(data) mrt.calibrate(ctx=ctx) mrt.set_input_prec(16) # mrt.set_fixed('data') mrt.set_output_prec(8) mrt.quantize() mrt.save(model_name + ".mrt.quantize", datadir=model_dir) # mrt.compile("trec_tfm", datadir="/data/ryt") # data = sim.load_real_data(data, 'data', inputs_qext) # np.save("/data/ryt/trec_tfm/data.npy", # sim.load_real_data(data, 'data', inputs_qext).asnumpy().astype('int32')) # exit() else: inputs_qext['data']['data'] = data th_dict = calib.sym_calibrate(sym, params, inputs_qext, ctx=ctx) qsym, qparams, _ = calib.pure_int8_quantize(sym, params, inputs_qext, th_dict) net2 = mrt.current_model.to_graph(ctx=ctx) # net2 = gluon.nn.SymbolBlock(qsym, inputs) # utils.load_parameters(net2, qparams, ctx=ctx) inputs_qext = mrt.get_inputs_ext() def quantize(data): data = sim.load_real_data(data, 'data', inputs_qext) res = net2(data.as_in_context(ctx)) return res if False: inputs_qext['data']['shape'] = (38, 1) data = data[:, 0].reshape(38, 1) _mrt.std_dump(qsym, qparams, qinputs_ext, data, "trec", batch=True, data_dtype="int32", max_num=1000, dump_ops=["sentimentnet0_embedding0_fwd"]) opg.dump_file("take", [ "/data/std_out/trec/sentimentnet0_embedding0_fwd_0.mrt.dump.in.npy", "/data/std_out/trec/sentimentnet0_embedding0_fwd_1.mrt.dump.in.npy" ], [ "/data/std_out/trec/sentimentnet0_embedding0_fwd_0.mrt.dump.out.npy" ], "/data/std_out/trec/sentimentnet0_embedding0_fwd.attr") if True: while True: data, _ = next(data_iter) inputs_qext = mrt.get_inputs_ext() data = sim.load_real_data(data, 'data', inputs_qext) inputs_qext['data']['data'] = data spass.sym_dump_ops(mrt.current_model.symbol, mrt.current_model.params, inputs_qext, ctx=mx.gpu(3)) exit() utils.multi_eval_accuracy(trec, data_iter_func, quantize, iter_num=iter_num)
def quantize(data): data = sim.load_real_data(data, 'data', inputs_ext) res = net2(data.as_in_context(ctx)) return res
def test_sym_pass(batch_size=10, iter_num=10): logger = logging.getLogger("log.test.sym.pass") base_ctx = mx.gpu(1) ctx = mx.gpu(2) input_size = 416 h, w = input_size, input_size inputs_ext = { 'data': { 'shape': (batch_size, 3, h, w), } } val_data = dataset.load_voc(batch_size, input_size) val_data_iter = iter(val_data) def data_iter_func(): data, label = next(val_data_iter) return data, label sym_file, param_file = load_fname("_darknet53_voc") sym, params = mx.sym.load(sym_file), nd.load(param_file) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext) if False: th_dict = {} for i in range(16): data, _ = data_iter_func() for k, v in inputs_ext.items(): v['data'] = data th_dict = calib.sym_calibrate(sym, params, inputs_ext, old_ths=th_dict, ctx=ctx) _, _, dump_ext = load_fname("_darknet53_voc", "dict", True) sim.save_ext(dump_ext, th_dict) _, _, dump_ext = load_fname("_darknet53_voc", "dict", True) (th_dict, ) = sim.load_ext(dump_ext) inputs = [mx.sym.var(name) for name in inputs_ext] net1 = mx.gluon.nn.SymbolBlock(sym, inputs) utils.load_parameters(net1, params, ctx=ctx) metric = dataset.load_voc_metric() metric.reset() def yolov3(data, label): def net(data): out = net1(data.as_in_context(ctx)) print([o[0][0][:] for o in out]) return out acc = validate_data(net, data, label, metric) return "{:6.2%}".format(acc) keys = [ 'yolov30_yolooutputv30_conv0_fwd', 'yolov30_yolooutputv31_conv0_fwd', 'yolov30_yolooutputv32_conv0_fwd', ] base, base_params, base_inputs_ext, top, top_params, top_inputs_ext \ = split_model(sym, params, inputs_ext, keys, logger) dump_sym, dump_params = load_fname("_darknet53_voc", "base") open(dump_sym, "w").write(base.tojson()) dump_sym, dump_params, dump_ext = load_fname("_darknet53_voc", "top", True) open(dump_sym, "w").write(top.tojson()) nd.save(dump_params, top_params) sim.save_ext(dump_ext, top_inputs_ext) base_inputs = [mx.sym.var(n) for n in base_inputs_ext] base_graph = mx.gluon.nn.SymbolBlock(base, base_inputs) utils.load_parameters(base_graph, base_params, ctx=base_ctx) top_inputs = [mx.sym.var(n) for n in top_inputs_ext] top_graph = mx.gluon.nn.SymbolBlock(top, top_inputs) utils.load_parameters(top_graph, top_params, ctx=ctx) # quantize base graph if False: qbase, qbase_params, qbase_prec, base_oscales = calib.sym_simulate( base, base_params, base_inputs_ext, th_dict) qbase, qbase_params = calib.sym_realize(qbase, qbase_params, base_inputs_ext, qbase_prec) dump_sym, dump_params, dump_ext = load_fname("_darknet53_voc", "base.quantize", True) open(dump_sym, "w").write(qbase.tojson()) sim.save_ext(dump_ext, base_inputs_ext, base_oscales) nd.save(dump_params, qbase_params) if False: qb_sym, qb_params, qb_ext = load_fname("_darknet53_voc", "base.quantize", True) net2_inputs_ext, base_oscales = sim.load_ext(qb_ext) net2_inputs = [mx.sym.var(n) for n in net2_inputs_ext] net2 = utils.load_model(qb_sym, qb_params, net2_inputs, ctx=ctx) base_metric = dataset.load_voc_metric() base_metric.reset() def base_quantize(data, label): def net(data): data = sim.load_real_data(data, 'data', net2_inputs_ext) tmp = list(net2(data.as_in_context(ctx))) tmp = [t / base_oscales[i] for i, t in enumerate(tmp)] return top_graph(*tmp) acc = validate_data(net, data, label, base_metric) return "{:6.2%}".format(acc) # quantize top graph if False: in_bit, out_bit = 8, 30 outputs_ext = { 'yolov30_yolooutputv30_expand_dims0': { 'threshold': 1, 'type': 'score' }, 'yolov30_yolooutputv31_expand_dims0': { 'threshold': 1, 'type': 'score' }, 'yolov30_yolooutputv32_expand_dims0': { 'threshold': 1, 'type': 'score' }, 'yolov30_yolooutputv30_tile0': { 'threshold': 416, 'type': 'bbox' }, 'yolov30_yolooutputv31_tile0': { 'threshold': 416, 'type': 'bbox' }, 'yolov30_yolooutputv32_tile0': { 'threshold': 416, 'type': 'bbox' }, 'yolov30_yolooutputv30_broadcast_add1': { 'fixed': True, 'type': 'ids' }, 'yolov30_yolooutputv31_broadcast_add1': { 'fixed': True, 'type': 'ids' }, 'yolov30_yolooutputv32_broadcast_add1': { 'fixed': True, 'type': 'ids' }, } qsym, qparams, type_ext = anno.mixed_precision(top, top_params, top_inputs_ext, th_dict, in_bit=in_bit, out_bit=out_bit, out_ext=outputs_ext, runtime="cvm") out_scales = [type_ext['ids'], type_ext['score'], type_ext['bbox']] dump_sym, dump_params, dump_ext = load_fname("_darknet53_voc", "top.quantize", True) open(dump_sym, "w").write(qsym.tojson()) sim.save_ext(dump_ext, top_inputs_ext, out_scales) nd.save(dump_params, qparams) if True: sym_file, param_file, ext_file = load_fname("_darknet53_voc", "top.quantize", True) net3_inputs_ext, net3_scales = sim.load_ext(ext_file) top_sym = base_graph(mx.sym.Group(base_inputs)) top_names = [c.attr('name') for c in top_sym] net3_inputs = [mx.sym.var(n) for n in net3_inputs_ext] net3 = utils.load_model(sym_file, param_file, net3_inputs, ctx=ctx) top_qmetric = dataset.load_voc_metric() top_qmetric.reset() def top_quantize(data, label): def net(data): tmp = base_graph(data.as_in_context(base_ctx)) tmp = [t.as_in_context(ctx) for t in tmp] tmp = [ sim.load_real_data(tmp[i], n, net3_inputs_ext) for i, n in enumerate(top_names) ] out = net3(*tmp) out = [(t / net3_scales[i]) for i, t in enumerate(out)] print([o[0][0][:] for o in out]) return out acc = validate_data(net, data, label, top_qmetric) return "{:6.2%}".format(acc) # merge quantize model if False: qb_sym, qb_params, qb_ext = load_fname("_darknet53_voc", "base.quantize", True) qbase, qbase_params = mx.sym.load(qb_sym), nd.load(qb_params) qbase_inputs_ext, _ = sim.load_ext(qb_ext) qt_sym, qt_params, qt_ext = load_fname("_darknet53_voc", "top.quantize", True) qtop, qtop_params = mx.sym.load(qt_sym), nd.load(qt_params) _, out_scales = sim.load_ext(qt_ext) maps = dict( zip([c.attr('name') for c in qbase], [c.attr('name') for c in base])) qsym, qparams = merge_model(qbase, qbase_params, qbase_inputs_ext, qtop, qtop_params, maps) sym_file, param_file, ext_file = load_fname("_darknet53_voc", "all.quantize", True) open(sym_file, "w").write(qsym.tojson()) nd.save(param_file, qparams) sim.save_ext(ext_file, qbase_inputs_ext, out_scales) if False: sym_file, param_file, ext_file = load_fname("_darknet53_voc", "all.quantize", True) net4_inputs_ext, net4_scales = sim.load_ext(ext_file) net4_inputs = [mx.sym.var(n) for n in net4_inputs_ext] net4 = utils.load_model(sym_file, param_file, net4_inputs, ctx=ctx) all_qmetric = dataset.load_voc_metric() all_qmetric.reset() def all_quantize(data, label): def net(data): data = sim.load_real_data(data, 'data', net4_inputs_ext) out = net4(data.as_in_context(ctx)) out = [(t / net4_scales[i]) for i, t in enumerate(out)] return out acc = validate_data(net, data, label, all_qmetric) return "{:6.2%}".format(acc) if False: sym_file, param_file, ext_file = load_fname("_darknet53_voc", "all.quantize", True) net4_inputs_ext, net4_scales = sim.load_ext(ext_file) datadir = "/data/voc/data/" for i in range(50): countdir = datadir + "/" + str(i) os.makedirs(countdir, exist_ok=True) data, label = data_iter_func() data = sim.load_real_data(data, 'data', net4_inputs_ext) np.save(countdir + "/data.npy", data.asnumpy().astype('int8')) np.save(countdir + "/label.npy", label.asnumpy()) # data = sim.load_real_data(data, 'data', net4_inputs_ext) # np.save("/tmp/yolo/data", data.asnumpy().astype('int8')) # out = net4(data.as_in_context(ctx)) # for i, o in enumerate(out): # np.save("/tmp/yolo/result"+str(i), o.asnumpy().astype('int32')) exit() utils.multi_validate( yolov3, data_iter_func, top_quantize, # base_quantize, # top_quantize, all_quantize, iter_num=iter_num, logger=logger)