def _check_result_attr(oup, dtype, dtype_str, is_unsigned=True): metadata = _metadata_dict[dtype_str] assert "mgb_dtype" in oup.dtype.metadata assert is_quantize(oup.dtype) np.testing.assert_equal(oup.dtype.metadata["mgb_dtype"]["name"], metadata.name) np.testing.assert_allclose(get_scale(oup.dtype), get_scale(dtype)) if is_unsigned: np.testing.assert_equal(get_zero_point(oup.dtype), get_zero_point(dtype))
def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(N, OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") def run_batch_conv_bias(inp, w, b): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) result = F.quantized.batch_conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, ) return result.astype("float32") expected = F.conv2d(inp_fp32, w_fp32[0], b_fp32 if has_bias else None)[0] expected = expected.astype(out_dtype).astype("float32") expected = F.flatten(expected) result = run_batch_conv_bias(inp_int8, w_int8, b_int32) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
def test_as_type(): x = TensorWrapper([1, 2, 3], dtype=np.float32) y = x.astype(qint8(0.1)) np.testing.assert_almost_equal(get_scale(y.dtype), 0.1) z = y.astype(qint8(0.2)) np.testing.assert_almost_equal(get_scale(z.dtype), 0.2) a = z.astype(quint8(0.3, 127)) np.testing.assert_almost_equal(get_scale(a.dtype), 0.3) np.testing.assert_equal(get_zero_point(a.dtype), 127) b = a.astype(quint8(0.3, 128)) np.testing.assert_almost_equal(get_scale(b.dtype), 0.3) np.testing.assert_equal(get_zero_point(b.dtype), 128)
def test_dtype_qint8(): dt = qint8(0.01) assert isinstance(dt, np.dtype) assert "mgb_dtype" in dt.metadata np.testing.assert_allclose(dt.metadata["mgb_dtype"]["scale"], 0.01) assert is_quantize(dt) == True np.testing.assert_allclose(get_scale(dt), 0.01)
def test_as_type(is_varnode): if is_varnode: network = Network() else: network = None x_np = np.array([1, 2, 3], dtype=np.float32) x = make_tensor(x_np, network) y = x.astype(qint8(0.1)) np.testing.assert_almost_equal(get_scale(y.dtype), 0.1) z = y.astype(qint8(0.2)) np.testing.assert_almost_equal(get_scale(z.dtype), 0.2) a = z.astype(quint8(0.3, 127)) np.testing.assert_almost_equal(get_scale(a.dtype), 0.3) np.testing.assert_equal(get_zero_point(a.dtype), 127) b = a.astype(quint8(0.3, 128)) np.testing.assert_almost_equal(get_scale(b.dtype), 0.3) np.testing.assert_equal(get_zero_point(b.dtype), 128)
def get_qat_net(inp_dtype, net, num_inp=1, shape=(1, 16, 32, 32)): qat_net = quantize_qat(net) inps = [] for _ in range(num_inp): data1 = mge.tensor(np.random.random(shape)) * 16 data1 = data1.astype(inp_dtype) inp1 = mge.tensor(dtype.convert_from_qint8(data1.numpy())) inp1.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype)) inp1.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"] inps.append(inp1) return qat_net, inps
def get_qat_inputs_quint8(inp_dtype, num_inp=1, shape=(1, 16, 384, 512)): inps = [] for _ in range(num_inp): data1 = mge.tensor(np.random.random(shape)) * 16 data1 = data1.astype(inp_dtype) inp1 = mge.tensor(dtype.convert_from_quint8(data1.numpy())) inp1.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype)) inp1.qparams.zero_point = mge.tensor(dtype.get_zero_point(inp_dtype)) inp1.qparams.dtype_meta = dtype._builtin_quant_dtypes["quint8"] inps.append(inp1) return inps
def test_dtype_quint8(): with pytest.raises(ValueError): blah = quint8(0.05, 0.233) with pytest.raises(ValueError): blah = quint8(0.02, 777) with pytest.raises(ValueError): blah = quint8(0.02, -1) dt = quint8(0.01, 135) assert isinstance(dt, np.dtype) assert "mgb_dtype" in dt.metadata np.testing.assert_allclose(dt.metadata["mgb_dtype"]["scale"], 0.01) np.testing.assert_equal(dt.metadata["mgb_dtype"]["zero_point"], 135) assert is_quantize(dt) np.testing.assert_allclose(get_scale(dt), 0.01) np.testing.assert_equal(get_zero_point(dt), 135)
def test_qat_conv_qint8(): class QConvOpr(M.Module): def __init__(self): super().__init__() self.normal_conv = M.Conv2d( 3, 30, 3, stride=(2, 3), padding=(3, 1), dilation=(2, 2), ) self.normal_conv.bias = mge.Parameter( np.random.random(self.normal_conv.bias.shape).astype( np.float32)) def forward(self, x): x = self.normal_conv(x) return x net = QConvOpr() qat_net = quantize_qat(net) inp_dtype = dtype.qint8(16.0 / 128) data = mge.tensor(np.random.random((1, 3, 224, 224))) * 16 data = data.astype(inp_dtype) inp = mge.tensor(dtype.convert_from_qint8(data.numpy())) inp.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype)) inp.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"] traced_module, tm_result = get_traced_module(qat_net, inp) print(traced_module.flatten().graph) inp = inp.astype(inp_dtype) out_dtype = traced_module.graph.outputs[0].qparams scale = out_dtype.scale.numpy() _test_convert_result( inp, traced_module, tm_result, scale=scale, require_quantize=True, max_err=max_error, )
def test_qat_convrelu(): net = ConvRelu2dOpr() qat_net = quantize_qat(net) inp_dtype = dtype.qint8(16.0 / 128) data = mge.tensor(np.random.random((1, 3, 224, 224))) * 16 data = data.astype(inp_dtype) inp = mge.tensor(dtype.convert_from_qint8(data.numpy())) inp.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype)) inp.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"] traced_module, tm_result = get_traced_module(qat_net, inp) inp = inp.astype(inp_dtype) out_dtype = traced_module.graph.outputs[0].qparams scale = out_dtype.scale.numpy() _test_convert_result( inp, traced_module, tm_result, scale=scale, require_quantize=True, max_err=max_error, )
def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, nonlinear_mode="identity", ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") def convert_to_nchw4(var): var = F.reshape(var, (var.shape[0], var.shape[1] // 4, 4, var.shape[2], var.shape[3])) var = F.transpose(var, (0, 1, 3, 4, 2)) return var def run_conv2d(inp, w, b): O = F.conv2d( inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), ) if nonlinear_mode == "relu": return F.relu(O) else: return O def run_conv_bias(inp, w, b, format="NCHW"): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) if format == "NCHW4": inp = convert_to_nchw4(inp) w = convert_to_nchw4(w) b = convert_to_nchw4(b) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, ) format = "NCHW4" if is_cuda_available() else "NCHW" expected = run_conv2d(inp_fp32, w_fp32, b_fp32) expected = expected.astype(out_dtype).astype("float32") result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype("float32") if format == "NCHW4": result = F.transpose(result, (0, 1, 4, 2, 3)) expected = F.flatten(expected) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, nonlinear_mode="identity", ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_quint4(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint4(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_uint4 = mge.Tensor(inpv, dtype=inp_dtype) w_int4 = mge.Parameter(wv, dtype=w_dtype) b_int32 = mge.Parameter(bv, dtype=b_dtype) inp_fp32 = inp_uint4.astype("float32") w_fp32 = w_int4.astype("float32") b_fp32 = b_int32.astype("float32") def run_conv2d(inp, w, b): O = F.conv2d( inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), ) if nonlinear_mode == "relu": return F.relu(O) else: return O def run_conv_bias(inp, w, b): b = b if has_bias else mge.Parameter(np.zeros_like(b.numpy())) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, ) expected = run_conv2d(inp_fp32, w_fp32, b_fp32) expected = expected.astype(out_dtype).astype("float32") result = run_conv_bias(inp_uint4, w_int4, b_int32).astype("float32") expected = F.flatten(expected) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale)
from test.utils import ConvOpr, dump_mge_model import megengine as mge import numpy as np from megengine.core.tensor import dtype from megengine.quantization.quantize import quantize_qat from megengine.traced_module import trace_module if __name__ == "__main__": net = ConvOpr("normal") traced_module = trace_module(net, mge.tensor(net.data)) mge.save(traced_module, "float_model.tm") dump_mge_model(net, net.data, "float_model") qat_net = quantize_qat(net) inp_dtype = dtype.qint8(16.0 / 128) data = mge.tensor(np.random.random((1, 3, 224, 224))) * 16 data = data.astype(inp_dtype) inp = mge.tensor(dtype.convert_from_qint8(data.numpy())) inp.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype)) inp.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"] qat_module = trace_module(qat_net, inp) mge.save(qat_module, "qat_model.tm")