def load_mxnet_resnet(): symbol_file = "/home/wlt/tvm-cvm/data/resnet-152-symbol.json" params_file = "/home/wlt/tvm-cvm/data/resnet-152-0000.params" resnet_symbol = mx.symbol.load(symbol_file) resnet_params = mx.nd.load(params_file) arg_params = {} aux_params = {} for k, v in resnet_params.items(): tp, name = k.split(':', 1) if tp == 'arg': arg_params[name] = v if tp == 'aux': aux_params[name] = v input_shape = (3, 224, 224) output_shape = ( 1, 1000, ) data_iter = make_dataset(100, input_shape) ctx = mx.gpu() # print ("=== Predict Model ===") # mod = mx.mod.Module(resnet_symbol, context=ctx) # mod.bind(for_training=False, data_shapes=[('data', input_shape)], # label_shapes=[('softmax_label', output_shape)]) # mod.set_params(arg_params, aux_params) # res = mod.predict(data_iter, num_batch=10).asnumpy() # print (res.shape) # res = res[0] # print (res.argmax(), res[res.argmax()], res.max()) print("=== Quantize Model ===") data_iter.reset() # print (data_iter.provide_label) excluded_sym_names = ['flatten0', 'fc1', 'pooling0'] resnet_symbol = resnet_symbol.get_backend_symbol('MKLDNN') qsym = qm.quantize_model(resnet_symbol, arg_params, aux_params, calib_data=data_iter, num_calib_examples=50, calib_mode='naive', quantized_dtype='uint8', excluded_sym_names=excluded_sym_names, label_names=["softmax_label"], calib_quantize_op=True, logger=logger, ctx=ctx) print(qsym)
def test_quantized_fc_bias_overflow(data_min, data_max, weight_min, weight_max): data_shape = (1, 32) data = mx.symbol.Variable('data', shape=data_shape, dtype='float32') weight = mx.symbol.Variable('weight', dtype='float32') bias = mx.symbol.Variable('bias', dtype='float32') sym = mx.symbol.FullyConnected(data=data, weight=weight, bias=bias, name='fc', num_hidden=64) data_nd = mx.random.uniform(data_min, data_max, shape=data_shape, ctx=mx.cpu()) weight_nd = mx.random.uniform(weight_min, weight_max, shape=[64, 32], ctx=mx.cpu()) bias_nd = mx.random.uniform(-1, +1, shape=[64], ctx=mx.cpu()) arg_params = {'weight': weight_nd, 'bias': bias_nd} ex = sym._bind(mx.cpu(), arg_params, args_grad=None) ex.forward(data=data_nd) ex.outputs[0].wait_to_read() sym_sg = sym.optimize_for(QUANTIZE_SG_PASS_NAME, dedup_subgraph=True, skip_infer=True) calib_data = mx.gluon.data.DataLoader(data_nd, batch_size=1) qsym, qarg_params, qaux_params = quantization.quantize_model( sym=sym_sg, arg_params=arg_params, aux_params={}, ctx=mx.cpu(), excluded_sym_names=None, excluded_op_names=None, quantized_dtype='int8', calib_mode='naive', calib_data=calib_data, num_calib_batches=1, quantize_mode='full') qarg_params['data'] = data_nd qsym = qsym.optimize_for(QUANTIZE_SG_PASS_NAME, dedup_subgraph=True, skip_infer=True) qex = qsym._bind(mx.cpu(), qarg_params, args_grad=None) qex.forward() qex.outputs[0].wait_to_read() assert_almost_equal_with_err(ex.outputs[0].asnumpy(), qex.outputs[0].asnumpy(), rtol=1e-2, atol=1e-2, etol=0.01)
exclude_first_conv = True excluded_sym_names = [ "yolov30_yolooutputv30_conv0_fwd", "yolov30_yolooutputv31_conv0_fwd", "yolov30_yolooutputv32_conv0_fwd" ] calib_layer = lambda name: name.endswith('_output') and (name.find( 'conv') != -1 or name.find('fc') != -1) if exclude_first_conv: excluded_sym_names += ['darknetv30_conv0_fwd'] if calib_mode == 'none': qsym, qarg_params, aux_params = quantize_model( sym=sym, arg_params=arg_params, aux_params=aux_params, ctx=ctx, excluded_sym_names=excluded_sym_names, calib_mode=calib_mode, quantized_dtype=quantized_dtype, logger=logger) sym_name = '{}-{}-{}-symbol.json'.format(prefix, 'quantized', quantized_dtype) save_symbol(sym_name, qsym, logger) param_name = '{}-{}-{}-{}.params'.format(prefix, 'quantized', quantized_dtype, "0000") else: data = get_yolo_dataiter(batch_size, num_calib_batches) cqsym, qarg_params, aux_params = quantize_model( sym=sym, arg_params=arg_params, aux_params=aux_params,
'checkpoint-0000.params', logger) calib_layer = lambda name: (name.find('fullyconnected') != -1 or \ name.find('FullyConnected') != -1 or \ name.find('fully_connected') != -1 or \ name.find('concat0_output') != -1) sym = sym.get_backend_symbol('MKLDNN') excluded_sym_names = ['concat0'] cqsym, qarg_params, aux_params = quantize_model( sym=sym, arg_params=arg_params, aux_params=aux_params, data_names=['csr_data', 'dns_data'], label_names=[ 'softmax_label', ], ctx=ctx, excluded_sym_names=excluded_sym_names, calib_mode=calib_mode, calib_data=data, num_calib_examples=num_calib_batches * batch_size, calib_layer=calib_layer, quantized_dtype=args.quantized_dtype, logger=logger) if calib_mode == 'entropy': suffix = '-quantized-%dbatches-entropy' % num_calib_batches elif calib_mode == 'naive': suffix = '-quantized-%dbatches-naive' % num_calib_batches else: raise ValueError( 'unknow calibration mode %s received, only supports `none`, `naive`, and `entropy`' % calib_mode)