def get_onnx_model(model_name, batch_size, qconfig, original=False, dataset=None): assert model_name == "vit32", "Only support vit32 model!" base = "https://github.com/TheGreatCold/tvm-vit/raw/d2aa1e60eef42e2fdedbd1e13aa85ac5faf0a7fc" logfile = "gtx1660_vit_B32_224.log" onnx_path = "vit_B32_224.onnx" download_file(base, logfile) download_file(base, onnx_path) onnx_graph = onnx.load(open(onnx_path, "rb")) data_shape = (batch_size, 3, 224, 224) mod, params = relay.frontend.from_onnx(onnx_graph, {"data": data_shape}) with tvm.transform.PassContext(opt_level=3): qfunc = relay.quantize.prerequisite_optimize(mod, params=params) logging.debug("original") logging.debug(qfunc.astext(show_meta_data=False)) if original: return qfunc, params, logfile with qconfig: logging.debug("current quantize config") logging.debug(qtz.current_qconfig()) if dataset is not None: with tvm.target.cuda(): with tvm.autotvm.apply_history_best(logfile): qfunc = qtz.quantize(qfunc, params, dataset=dataset) else: qfunc = qtz.quantize(qfunc, params) logging.debug("after quantize") logging.debug(qfunc.astext(show_meta_data=False)) return qfunc, params, logfile
def get_model(model_name, batch_size, qconfig, target=None, original=False, simulated=False): gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True) img_size = 299 if model_name == 'inceptionv3' else 224 data_shape = (batch_size, 3, img_size, img_size) mod, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape}) net = mod['main'] with tvm.transform.PassContext(opt_level=3): qfunc = relay.quantize.prerequisite_optimize(net, params=params) logging.debug('original') logging.debug(qfunc.astext(show_meta_data=False)) if original: return qfunc with qconfig: logging.debug('current quantize config') logging.debug(qtz.current_qconfig()) qfunc = qtz.quantize(qfunc) logging.debug('after quantize') logging.debug(qfunc.astext(show_meta_data=False)) return qfunc
def quantize_relay_module(mod, params, qconfig=None): """ Quantize the relay module with qconfig options. Parameters: ------ mod : tvm.relay.module The original module. qconfig : tvm.relay.quantize.quantize.QConfig The quantization configuration Returns: ------ qfunc : vm.relay.expr.Function The graph after quantization """ # default qconfig if not qconfig: qconfig = qtz.qconfig() with qconfig: logging.debug('current quantize config') logging.debug(qtz.current_qconfig()) mod['main'] = qtz.quantize(mod['main'], params=params) logging.debug('after quantize') logging.debug(mod['main'].astext(show_meta_data=False)) return mod
def get_model(model_name, batch_size, qconfig, target=None, original=False, simulated=False): gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True) img_size = 299 if model_name == 'inceptionv3' else 224 input_shape = (batch_size, 3, img_size, img_size) mod, params = relay.frontend.from_mxnet(gluon_model, {"data": input_shape}) qfunc = mod['main'] start_time = time.time() with relay.build_config(opt_level=3): qfunc = relay.quantize.prerequisite_optimize(qfunc, params=params) logging.debug('original') logging.debug(qfunc.astext(show_meta_data=False)) if original: return qfunc with qconfig: logging.debug('current quantize config') logging.debug(qtz.current_qconfig()) qfunc = qtz.quantize(qfunc,params=params) logging.debug('after quantize') logging.debug(qfunc.astext(show_meta_data=False)) # os._exit(-1) return qfunc, params, input_shape
def test_quantize_pass(): def quantize_weight(arr): maximum = np.amax(np.abs(arr.asnumpy())) scale = 2**math.ceil(math.log(maximum, 2)) out = np.around(arr.asnumpy() / scale * 128).astype('int8') out = np.clip(out, -127, 127) return relay.const(out, 'int8') n, c, h, w = 1, 3, 224, 224 def make_graph(data): weight = relay.var("conv_weight") out = relay.nn.conv2d(data, weight, kernel_size=(3, 3), padding=(1, 1), channels=c) out = relay.Function(relay.ir_pass.free_vars(out), out) return out def make_qgraph(data, weight): out = data * relay.const(32.0) out = relay.round(out) out = relay.clip(out, a_min=-127, a_max=127) out = out.astype('int8') out = relay.nn.conv2d(out, weight, kernel_size=(3, 3), padding=(1, 1), channels=c, out_dtype='int32') out = out.astype('float32') out = relay.multiply(out, relay.const(0.00024414062)) out = relay.Function(relay.ir_pass.free_vars(out), out) return out np.random.seed(42) data = relay.var("data", relay.TensorType((n, c, h, w), "float32")) graph = make_graph(data) dataset, params = make_dataset(graph, 10) with qtz.qconfig(skip_k_conv=0, global_scale=4.0, round_for_shift=False, store_lowbit_output=False): qgraph0 = qtz.quantize(graph, params) qgraph0 = relay.ir_pass.infer_type(qgraph0) conv_weight = quantize_weight(params['conv_weight']) qgraph1 = make_qgraph(data, conv_weight) qgraph1 = relay.ir_pass.infer_type(qgraph1) graph = relay.create_executor('graph') res0 = graph.evaluate(qgraph0)(dataset[0]['data']) res1 = graph.evaluate(qgraph1)(dataset[0]['data']) tvm.testing.assert_allclose(res0.asnumpy(), res1.asnumpy(), rtol=1e-3)
def get_model(model_name, batch_size, qconfig, original=False): gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True) img_size = 299 if model_name == "inceptionv3" else 224 data_shape = (batch_size, 3, img_size, img_size) mod, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape}) logging.debug("original") logging.debug(mod.astext(show_meta_data=False)) if original: return mod, params with qconfig: logging.debug("current quantize config") logging.debug(qtz.current_qconfig()) qfunc = qtz.quantize(mod, params) logging.debug("after quantize") logging.debug(qfunc.astext(show_meta_data=False)) return qfunc, params
dtype_dict = {} # convert nnvm to relay print("convert nnvm symbols into relay function...") from nnvm.to_relay import to_relay func, params = to_relay(sym, shape, 'float32', params=params) # optimization print("optimize relay graph...") with tvm.relay.build_config(opt_level=2): func = tvm.relay.optimize(func, target, params) # quantize print("apply quantization...") from tvm.relay import quantize with quantize.qconfig(): func = quantize.quantize(func, params) # Relay build print("Compiling the model...") print(func.astext(show_meta_data=False)) with tvm.relay.build_config(opt_level=3): graph, lib, params = tvm.relay.build(func, target=target, params=params) # Save the model tmp = util.tempdir() lib_fname = tmp.relpath('model.tar') lib.export_library(lib_fname) # NNVM # with nnvm.compiler.build_config(opt_level=2): # graph, lib, params = nnvm.compiler.build(sym, target, shape, dtype_dict, params)