def check_quantize_whole_model(out_type): batch_size = 4 data_shape = (batch_size, 4, 10, 10) data = mx.sym.Variable('data') conv0 = mx.sym.Convolution(data, kernel=(1, 1), num_filter=16, name='conv0') sym = mx.sym.Convolution(conv0, kernel=(1, 1), num_filter=16, name='conv1') sym_sg = sym.get_backend_symbol('MKLDNN_QUANTIZE') mod = Module(symbol=sym, label_names=None) mod.bind(for_training=False, data_shapes=[('data', data_shape)]) mod.init_params(mx.init.Normal(0.5)) arg_params, aux_params = mod.get_params() excluded_sym_names = [] calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = mx.io.NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) calib_layer = lambda name: name.endswith('_output') qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym_sg, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), excluded_sym_names=excluded_sym_names, quantized_dtype=out_type, calib_mode='naive', calib_data=calib_data, calib_layer=calib_layer, label_names=None, num_calib_examples=1) qsym = qsym.get_backend_symbol('MKLDNN_QUANTIZE') check_qsym_forward(qsym, qarg_params, qaux_params, data_shape)
def check_quantize_model(qdtype): def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params(qsym, params) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype sym = get_fp32_sym() mod = Module(symbol=sym) batch_size = 4 data_shape = (batch_size, 4, 10, 10) label_shape = (batch_size, 10) mod.bind(data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.init_params() arg_params, aux_params = mod.get_params() qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype)
def get_movielens_iter(filename, batch_size, dummy_iter): """Not particularly fast code to parse the text file and load into NDArrays. return two data iters, one for train, the other for validation. """ print("Preparing data iterators for " + filename + " ... ") user = [] item = [] score = [] with open(filename, 'r') as f: num_samples = 0 for line in f: tks = line.strip().split('::') if len(tks) != 4: continue num_samples += 1 user.append((tks[0])) item.append((tks[1])) score.append((tks[2])) if dummy_iter and num_samples > batch_size * 10: break # convert to ndarrays user = mx.nd.array(user, dtype='int32') item = mx.nd.array(item) score = mx.nd.array(score) # prepare data iters data_train = {'user':user, 'item':item} label_train = {'score':score} iter_train = mx.io.NDArrayIter(data=data_train,label=label_train, batch_size=batch_size, shuffle=True) iter_train = DummyIter(iter_train) if dummy_iter else iter_train return mx.io.PrefetchingIter(iter_train)
def check_quantize(sym, data_shape, out_type, name='conv', check_calibration=True, gluon_forward=False): sg_pass_name = config[name][SG_PASS_NAME] post_sg_pass_name = config[name][POST_SG_PASS_NAME] fc = mx.sym.FullyConnected(data=sym, num_hidden=10, flatten=True, name='fc_softmax') if gluon_forward == True: sym = fc sym_sg = sym.get_backend_symbol(sg_pass_name) mod = Module(symbol=sym, label_names=[]) mod.bind(for_training=False, data_shapes=[('data', data_shape)]) else: sym = mx.sym.SoftmaxOutput(data=fc, name='softmax') sym_sg = sym.get_backend_symbol(sg_pass_name) label_shape = (data_shape[0], 10) mod = Module(symbol=sym) mod.bind(for_training=False, data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.init_params(mx.init.Normal(0.5)) arg_params, aux_params = mod.get_params() data = [mx.random.uniform(-1, 1, shape=shape, ctx=mx.current_context()) for _, shape in mod.data_shapes] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() ref_out = mod.get_outputs() excluded_sym_names = [] if mx.current_context() == mx.cpu() and gluon_forward == True: excluded_sym_names += ['sg_mkldnn_fully_connected_0'] excluded_sym_names += ['fc_softmax'] calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) calib_layer = lambda name: name.endswith('_output') qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym_sg, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), excluded_sym_names=excluded_sym_names, quantized_dtype=out_type, calib_mode='naive', calib_data=calib_data, calib_layer=calib_layer, num_calib_examples=5) qsym = qsym.get_backend_symbol(post_sg_pass_name) if check_calibration: check_qsym_calibrated(qsym, out_type, name=name) if gluon_forward == True: check_qsym_gluon_forward(qsym, qarg_params, qaux_params, data_shape) else: check_qsym_dummy_forward(qsym, batch, data_shape, label_shape) quantized_out = check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape, label_shape) for i in range(len(ref_out)): assert_almost_equal(ref_out[i].asnumpy(), quantized_out[i].asnumpy(), atol = 1)
def check_quantize(sym, data_shape, check_conv=True): fc = mx.sym.FullyConnected(data=sym, num_hidden=10, flatten=True, name='fc') sym = mx.sym.SoftmaxOutput(data=fc, name='softmax') sym_sg = sym.get_backend_symbol("MKLDNN") label_shape = (data_shape[0], 10) mod = Module(symbol=sym) mod.bind(for_training=False, data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.init_params(mx.init.Normal(0.5)) arg_params, aux_params = mod.get_params() data = [ mx.random.uniform(-1, 1, shape=shape, ctx=mx.current_context()) for _, shape in mod.data_shapes ] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() ref_out = mod.get_outputs() excluded_sym_names = [] if mx.current_context() == mx.cpu(): excluded_sym_names += ['fc'] calib_data = mx.nd.random.uniform(shape=data_shape) calib_data = NDArrayIter(data=calib_data) calib_data = DummyIter(calib_data) calib_layer = lambda name: name.endswith('_output') qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=sym_sg, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), excluded_sym_names=excluded_sym_names, quantized_dtype='uint8', calib_mode='naive', calib_data=calib_data, calib_layer=calib_layer, calib_quantize_op=True, num_calib_examples=5) qsym = qsym.get_backend_symbol("MKLDNN_POST_QUANTIZE") if check_conv: check_qsym_calibrated(qsym) quantized_out = check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape, label_shape) for i in range(len(ref_out)): assert_almost_equal(ref_out[i].asnumpy(), quantized_out[i].asnumpy(), atol=1) check_qsym_dummy_forward(qsym, batch, data_shape, label_shape)
def check_quantize_model(qdtype): if is_test_for_native_cpu(): print( 'skipped testing test_quantize_model_with_forward for native cpu since it is not supported yet' ) return elif qdtype == 'int8' and is_test_for_mkldnn(): print( 'skipped testing test_quantize_model_with_forward for mkldnn cpu int8 since it is not supported yet' ) return elif qdtype == 'uint8' and is_test_for_gpu(): print( 'skipped testing test_quantize_model_with_forward for gpu uint8 since it is not supported yet' ) return def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params( qsym, params, th_dict={}) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape): mod = mx.mod.Module(symbol=qsym, context=mx.current_context()) mod.bind(for_training=False, data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)]) mod.set_params(qarg_params, qaux_params) data = [ mx.random.uniform(-1.0, 1.0, shape=shape) for _, shape in mod.data_shapes ] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() sym = get_fp32_residual() batch_size = 4 data_shape = (batch_size, 4, 10, 10) label_shape = (batch_size, 10) length = batch_size # specify num of outputs from split op msym = get_fp32_sym_with_multiple_outputs(length) msym_label_shape = (length, 10) msym_data_shape = (length, 4, 4, 10, 10) for s, dshape, lshape in zip((sym, msym), (data_shape, msym_data_shape), (label_shape, msym_label_shape)): mod = Module(symbol=s) mod.bind(data_shapes=[('data', dshape)], label_shapes=[('softmax_label', lshape)]) mod.init_params() arg_params, aux_params = mod.get_params() excluded_names = [] if mx.current_context() == mx.cpu(): excluded_names += ['fc'] excluded_names += ['concat'] optional_names = ['pool0'] for skip_optional_names in [False, True]: exclude_sym_names = [] if skip_optional_names: excluded_sym_names = excluded_names else: excluded_sym_names = excluded_names + optional_names qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=s, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_forward(qsym, qarg_params, qaux_params, dshape, lshape) calib_data = mx.nd.random.uniform(shape=dshape) calib_data = NDArrayIter(data=calib_data, batch_size=batch_size) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=s, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype) check_qsym_forward(qsym, qarg_params, qaux_params, dshape, lshape)
def check_quantize_model(qdtype): if is_test_for_native_cpu(): print( 'skipped testing test_quantize_model_with_forward for native cpu since it is not supported yet' ) return elif qdtype == 'int8' and is_test_for_mkldnn(): print( 'skipped testing test_quantize_model_with_forward for mkldnn cpu int8 since it is not supported yet' ) return elif qdtype == 'uint8' and is_test_for_gpu(): print( 'skipped testing test_quantize_model_with_forward for gpu uint8 since it is not supported yet' ) return def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params( qsym, params, th_dict={}) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape): mod = mx.mod.Module(symbol=qsym, label_names=None, context=mx.current_context()) mod.bind(for_training=False, data_shapes=[('data', data_shape)]) mod.set_params(qarg_params, qaux_params) data = [ mx.random.uniform(-1.0, 1.0, shape=shape) for _, shape in mod.data_shapes ] batch = mx.io.DataBatch(data, []) mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() batch_size = 4 dshape = (batch_size, 4, 10, 10) data = mx.sym.Variable('data') sym = mx.sym.Convolution(data, kernel=(1, 1), num_filter=16, name='conv0') mod = Module(symbol=sym, label_names=None) mod.bind(data_shapes=[('data', dshape)]) mod.init_params() arg_params, aux_params = mod.get_params() excluded_sym_names = [] qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=sym, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_forward(qsym, qarg_params, qaux_params, dshape) calib_data = mx.nd.random.uniform(shape=dshape) calib_data = NDArrayIter(data=calib_data, batch_size=batch_size) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=sym, arg_params=arg_params, aux_params=aux_params, excluded_sym_names=excluded_sym_names, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype) check_qsym_forward(qsym, qarg_params, qaux_params, dshape)
def check_quantize_model(qdtype): if is_test_for_native_cpu(): print( 'skipped testing quantize_model for native cpu since it is not supported yet' ) return elif qdtype == 'int8' and is_test_for_mkldnn(): print( 'skipped testing quantize_model for mkldnn cpu int8 since it is not supported yet' ) return elif qdtype == 'uint8' and is_test_for_gpu(): print( 'skipped testing quantize_model for gpu uint8 since it is not supported yet' ) return def check_params(params, qparams, qsym=None): if qsym is None: assert len(params) == len(qparams) for k, v in params.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) else: qparams_ground_truth = mx.contrib.quant._quantize_params( qsym, params, th_dict={}) assert len(qparams) == len(qparams_ground_truth) for k, v in qparams_ground_truth.items(): assert k in qparams assert same(v.asnumpy(), qparams[k].asnumpy()) def check_qsym_calibrated(qsym): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('requantize_') != -1: assert 'min_calib_range' in v assert 'max_calib_range' in v def check_qsym_qdtype(qsym, qdtype): attrs = qsym.attr_dict() for k, v in attrs.items(): if k.find('_quantize') != -1: assert 'out_type' in v assert v['out_type'] == qdtype sym = get_fp32_sym() batch_size = 4 label_shape = (batch_size, 10) data_shape = (batch_size, 4, 10, 10) length = batch_size # specify num of outputs from split op msym = get_fp32_sym_with_multiple_outputs(length) msym_label_shape = (length, 10) msym_data_shape = (length, 4, 4, 10, 10) for s, dshape, lshape in zip((sym, msym), (data_shape, msym_data_shape), (label_shape, msym_label_shape)): mod = Module(symbol=s) mod.bind(data_shapes=[('data', dshape)], label_shapes=[('softmax_label', lshape)]) mod.init_params() arg_params, aux_params = mod.get_params() qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=s, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='none') check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) calib_data = mx.nd.random.uniform(shape=dshape) calib_data = NDArrayIter(data=calib_data, batch_size=batch_size) calib_data = DummyIter(calib_data) qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model( sym=s, arg_params=arg_params, aux_params=aux_params, ctx=mx.current_context(), quantized_dtype=qdtype, calib_mode='naive', calib_data=calib_data, num_calib_examples=20) check_params(arg_params, qarg_params, qsym) check_params(aux_params, qaux_params) check_qsym_calibrated(qsym) check_qsym_qdtype(qsym, qdtype)