def test_load_quantized(): from megengine.core.tensor import dtype data_shape = (2, 28) data = tensor(np.random.random(data_shape), dtype="float32") data = data.astype(dtype.qint8(0.1)) mlp = MLP() quantize_qat(mlp) quantize(mlp) mlp.dense0.weight = Parameter( mlp.dense0.weight.astype(dtype.qint8(0.001)).numpy()) mlp.dense1.weight = Parameter( mlp.dense1.weight.astype(dtype.qint8(0.0002)).numpy()) mlp.eval() pred0 = mlp(data) with BytesIO() as fout: mge.save(mlp.state_dict(), fout) fout.seek(0) checkpoint = mge.load(fout) # change mlp weight. mlp.dense0.weight = Parameter( mlp.dense0.weight.astype(dtype.qint8(0.00001)).numpy()) mlp.dense1.weight = Parameter( mlp.dense1.weight.astype(dtype.qint8(0.2)).numpy()) mlp.load_state_dict(checkpoint) pred1 = mlp(data) np.testing.assert_allclose(pred0.astype("float32").numpy(), pred1.astype("float32").numpy(), atol=5e-6)
def test_elemwise_multitype(): op = builtin.ElemwiseMultiType(mode="qadd", dtype=dtype.qint32(2.0)) @trace(symbolic=True, capture_as_const=True) def fwd(x, y): return apply(op, x, y)[0] x = Tensor(np.random.random(10) * 10, dtype=dtype.qint8(2.0)) y = Tensor(np.random.random(10) * 10, dtype=dtype.qint8(2.0)) result = fwd(x, y) check_pygraph_dump(fwd, [x, y], [result])
def test_as_type(): x = TensorWrapper([1, 2, 3], dtype=np.float32) y = x.astype(qint8(0.1)) np.testing.assert_almost_equal(get_scale(y.dtype), 0.1) z = y.astype(qint8(0.2)) np.testing.assert_almost_equal(get_scale(z.dtype), 0.2) a = z.astype(quint8(0.3, 127)) np.testing.assert_almost_equal(get_scale(a.dtype), 0.3) np.testing.assert_equal(get_zero_point(a.dtype), 127) b = a.astype(quint8(0.3, 128)) np.testing.assert_almost_equal(get_scale(b.dtype), 0.3) np.testing.assert_equal(get_zero_point(b.dtype), 128)
def test_convbias(): @trace(symbolic=True, capture_as_const=True) def fwd(inp, weight, bias): return F.quantized.conv_bias_activation( inp, weight, bias, dtype=dtype.qint8(scale=1.0), nonlinear_mode="relu" ) inp = Tensor(np.random.random((1, 3, 64, 64)), dtype=dtype.qint8(scale=1.0)) weight = Tensor(np.random.random((32, 3, 3, 3)), dtype=dtype.qint8(scale=1.0)) bias = Tensor(np.random.random((1, 32, 1, 1)), dtype=dtype.qint32(scale=1.0)) result = fwd(inp, weight, bias) check_pygraph_dump(fwd, [inp, weight, bias], [result])
def test_qadd(): inp_scale = 0.5 outp_scale = 0.2 x = np.arange(6).reshape(2, 3).astype("float32") y = np.arange(6).reshape(2, 3).astype("float32") x = tensor(x, dtype=dtype.qint8(inp_scale)) y = tensor(y, dtype=dtype.qint8(inp_scale)) result_mge = F.elemwise._elemwise_multi_type( x, y, mode="qadd", dtype=dtype.qint8(outp_scale) ) result_mge = result_mge.astype("float32").numpy() result_expect = x.astype("float32").numpy() + y.astype("float32").numpy() np.testing.assert_almost_equal(result_mge, result_expect, decimal=6)
def fwd(inp, weight, bias): return F.quantized.batch_conv_bias_activation( inp, weight, bias, dtype=dtype.qint8(scale=1.0), nonlinear_mode="relu")
def test_elemwise(kind): x1 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32")) x1_scale = np.float32(np.random.rand() + 1) x1 = fake_quant(x1, x1_scale) x1.q_dict["scale"] = x1_scale x1_int8 = quant(x1, x1_scale) x2 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32")) x2_scale = np.float32(np.random.rand() + 1) x2 = fake_quant(x2, x2_scale) x2.q_dict["scale"] = x2_scale x2_int8 = quant(x2, x2_scale) output_scale = np.float32(np.random.rand() + 1) output_dtype = dtype.qint8(output_scale) quantized_kind = "Q" + kind if kind in ("ABS", "SIN"): desired_out = fake_quant(_elwise(x1, mode=kind), output_scale) actual_out = (_elemwise_multi_type( x1_int8, mode=quantized_kind, dtype=output_dtype).numpy() * output_scale) else: desired_out = fake_quant(_elwise(x1, x2, mode=kind), output_scale) actual_out = (_elemwise_multi_type( x1_int8, x2_int8, mode=quantized_kind, dtype=output_dtype).numpy() * output_scale) np.testing.assert_allclose(actual_out, desired_out.numpy())
def test_elemwise(kind): x1 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32")) x1_scale = np.float32(np.random.rand() + 1) x1 = fake_quant(x1, x1_scale) x1.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", x1_scale)) x1_int8 = quant(x1, x1_scale) x2 = mge.tensor(np.random.normal(size=(3, 3)).astype("float32")) x2_scale = np.float32(np.random.rand() + 1) x2 = fake_quant(x2, x2_scale) x2.qparams.update(create_qparams(QuantMode.SYMMERTIC, "qint8", x2_scale)) x2_int8 = quant(x2, x2_scale) output_scale = np.float32(np.random.rand() + 1) output_dtype = dtype.qint8(output_scale) quantized_kind = "q" + kind if kind in ("abs", "sin"): desired_out = fake_quant(_elwise(x1, mode=kind), output_scale) actual_out = (_elemwise_multi_type( x1_int8, mode=quantized_kind, dtype=output_dtype).numpy() * output_scale) else: desired_out = fake_quant(_elwise(x1, x2, mode=kind), output_scale) actual_out = (_elemwise_multi_type( x1_int8, x2_int8, mode=quantized_kind, dtype=output_dtype).numpy() * output_scale) np.testing.assert_allclose(actual_out, desired_out.numpy())
def test_dtype_qint8(): dt = qint8(0.01) assert isinstance(dt, np.dtype) assert "mgb_dtype" in dt.metadata np.testing.assert_allclose(dt.metadata["mgb_dtype"]["scale"], 0.01) assert is_quantize(dt) == True np.testing.assert_allclose(get_scale(dt), 0.01)
def test_as_type(is_varnode): if is_varnode: network = Network() else: network = None x_np = np.array([1, 2, 3], dtype=np.float32) x = make_tensor(x_np, network) y = x.astype(qint8(0.1)) np.testing.assert_almost_equal(get_scale(y.dtype), 0.1) z = y.astype(qint8(0.2)) np.testing.assert_almost_equal(get_scale(z.dtype), 0.2) a = z.astype(quint8(0.3, 127)) np.testing.assert_almost_equal(get_scale(a.dtype), 0.3) np.testing.assert_equal(get_zero_point(a.dtype), 127) b = a.astype(quint8(0.3, 128)) np.testing.assert_almost_equal(get_scale(b.dtype), 0.3) np.testing.assert_equal(get_zero_point(b.dtype), 128)
def test_linear(): net = LinearOpr() inp_dtype = dtype.qint8(16.0 / 128.0) qat_net, inps = get_qat_net(inp_dtype, net, shape=(10, 100)) traced_module, tm_result = get_traced_module(qat_net, inps[0]) inp = inps[0].astype(inp_dtype) _test_convert_result(inp, traced_module, tm_result, max_err, require_quantize=False)
def test_add(): class ElemwiseOpr(M.Module): def __init__(self, ): super().__init__() self.data = np.ones((2, 3, 224, 224)).astype(np.float32) self.data1 = np.random.random((1, 3, 1, 1)).astype(np.float32) self.add1 = M.Elemwise("add") self.add2 = M.Elemwise("add") self.add3 = M.Elemwise("add") scale = mge.tensor((16.0 / 128.0)) self.quant_stub = QuantStub() self.quant_stub.act_fake_quant = FakeQuantize( _builtin_quant_dtypes["qint8"]) self.quant_stub.act_fake_quant.set_qparams( create_qparams( dtype_meta=_builtin_quant_dtypes["qint8"], scale=scale, zero_point=None, )) self.quant_stub1 = QuantStub() self.quant_stub1.act_fake_quant = FakeQuantize( _builtin_quant_dtypes["qint8"]) self.quant_stub1.act_fake_quant.set_qparams( create_qparams( dtype_meta=_builtin_quant_dtypes["qint8"], scale=scale, zero_point=None, )) def forward(self, a): n = self.quant_stub(mge.tensor(np.float32(10))) data1 = self.quant_stub1(mge.tensor(self.data1)) x = self.add1(a, n) y = self.add2(a, data1) z = self.add3(x, y) return z net = ElemwiseOpr() inp_dtype = dtype.qint8(16.0 / 128.0) qat_net, inps = get_qat_net(inp_dtype, net, shape=(1, 3, 1, 1)) traced_module, tm_result = get_traced_module(qat_net, inps[0]) print(traced_module.flatten().graph) out_dtype = traced_module.graph.outputs[0].qparams scale = out_dtype.scale.numpy() inp = inps[0].astype(inp_dtype) _test_convert_result( inp, traced_module, tm_result, scale=scale, require_quantize=True, max_err=max_error, )
def test_det_model(): net = mge.load("models_fire_det.fix_batch.fuse_scale_cpu.pkl") inp_dtype = dtype.qint8(16.0 / 128.0) qat_net, inps = get_qat_net(inp_dtype, net, shape=(1, 3, 512, 512)) traced_module, tm_result = get_traced_module(qat_net, inps[0]) inp = inps[0].astype(inp_dtype) _test_convert_result(inp, traced_module, tm_result, max_err, require_quantize=False)
def test_linear(): net = LinearOpr() inp_dtype = dtype.qint8(16.0 / 128.0) qat_net, inps = get_qat_net(inp_dtype, net, shape=(10, 100)) traced_module, tm_result = get_traced_module(qat_net, inps[0]) print(traced_module.flatten().graph) out_dtype = traced_module.graph.outputs[0].qparams scale = out_dtype.scale.numpy() inp = inps[0].astype(inp_dtype) _test_convert_result( inp, traced_module, tm_result, scale=scale, require_quantize=True, max_err=max_error, )
def test_dtype_int8_ffi_handle(): device = "xpux" shape = (3, 3, 3) data = np.random.random(shape).astype(np.float32) * 5 - 1 def identity(x): return x dtype = quint8(0.01, 127) inp = convert_to_quint8(data, dtype) oup = _get_compiled_result(inp, dtype, shape, device, calc_func=identity) _check_result_attr(oup, dtype, "quint8") np.testing.assert_allclose(convert_from_quint8(oup), convert_from_quint8(inp)) dtype = qint8(0.01) inp = convert_to_qint8(data, dtype) oup = _get_compiled_result(inp, dtype, shape, device, calc_func=identity) _check_result_attr(oup, dtype, "qint8", is_unsigned=False) np.testing.assert_allclose(convert_from_qint8(oup), convert_from_qint8(inp))
def test_qat_conv_qint8(): class QConvOpr(M.Module): def __init__(self): super().__init__() self.normal_conv = M.Conv2d( 3, 30, 3, stride=(2, 3), padding=(3, 1), dilation=(2, 2), ) self.normal_conv.bias = mge.Parameter( np.random.random(self.normal_conv.bias.shape).astype( np.float32)) def forward(self, x): x = self.normal_conv(x) return x net = QConvOpr() qat_net = quantize_qat(net) inp_dtype = dtype.qint8(16.0 / 128) data = mge.tensor(np.random.random((1, 3, 224, 224))) * 16 data = data.astype(inp_dtype) inp = mge.tensor(dtype.convert_from_qint8(data.numpy())) inp.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype)) inp.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"] traced_module, tm_result = get_traced_module(qat_net, inp) print(traced_module.flatten().graph) inp = inp.astype(inp_dtype) out_dtype = traced_module.graph.outputs[0].qparams scale = out_dtype.scale.numpy() _test_convert_result( inp, traced_module, tm_result, scale=scale, require_quantize=True, max_err=max_error, )
def test_qat_convrelu(): net = ConvRelu2dOpr() qat_net = quantize_qat(net) inp_dtype = dtype.qint8(16.0 / 128) data = mge.tensor(np.random.random((1, 3, 224, 224))) * 16 data = data.astype(inp_dtype) inp = mge.tensor(dtype.convert_from_qint8(data.numpy())) inp.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype)) inp.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"] traced_module, tm_result = get_traced_module(qat_net, inp) inp = inp.astype(inp_dtype) out_dtype = traced_module.graph.outputs[0].qparams scale = out_dtype.scale.numpy() _test_convert_result( inp, traced_module, tm_result, scale=scale, require_quantize=True, max_err=max_error, )
def quant(x, scale): inp_dtype = dtype.qint8(scale) return x.astype(inp_dtype)
def quant(x, scale): x_dtype = dtype.qint8(scale) return x.astype(x_dtype)
dt = qint4(0.01) assert isinstance(dt, np.dtype) assert "mgb_dtype" in dt.metadata np.testing.assert_allclose(dt.metadata["mgb_dtype"]["scale"], 0.01) assert is_quantize(dt) np.testing.assert_allclose(get_scale(dt), 0.01) @pytest.mark.parametrize( "dtype, dtype_name", [ (quint4(0.01, 5), "quint4"), (qint4(0.01), "qint4"), (quint8(0.01, 135), "quint8"), (qint8(0.01), "qint8"), ], ) def test_dtype_qint_mgb_ffi_handle(dtype, dtype_name): def identity(x): return x convert_to_dtype = eval("convert_to_%s" % dtype_name) convert_from_dtype = eval("convert_from_%s" % dtype_name) device = "xpux" shape = (3, 3, 3) data = np.random.random(shape).astype(np.float32) * 5 - 1 inp = convert_to_dtype(data, dtype) oup = _get_compiled_result(inp, dtype, shape, device, calc_func=identity) _check_result_attr(oup, dtype, dtype_name, dtype_name.startswith("qu"))
def fwd(data): return data.astype(dtype.qint8(0.8))
def test_func( N, IC, IH, IW, OC, KH, KW, SH, SW, PH, PW, DH, DW, groups=1, has_bias=True, conv_mode: str = "cross_correlation", compute_mode: str = "default", ): inp_scale = np.float32(rng.uniform(low=0.04, high=0.06)) weight_scale = np.float32(rng.uniform(low=0.04, high=0.06)) bias_scale = inp_scale * weight_scale out_scale = np.float32(rng.uniform(low=0.04, high=0.06)) inp_dtype = dtype.qint8(inp_scale) weight_dtype = dtype.qint8(weight_scale) bias_dtype = dtype.qint32(bias_scale) out_dtype = dtype.qint8(out_scale) inp_fp32 = rng.uniform(low=-1, high=1, size=(N, IC, IH, IW)).astype(np.float32) weight_fp32 = rng.uniform(low=-1, high=1, size=(IC, OC, KH, KW)).astype(np.float32) bias_fp32 = rng.uniform(low=-1, high=1, size=(1, OC, 1, 1)).astype(np.float32) inp_int8 = dtype.convert_to_qint8(inp_fp32, inp_dtype) weight_int8 = dtype.convert_to_qint8(weight_fp32, weight_dtype) bias_int32 = dtype.convert_to_qint32(bias_fp32, bias_dtype) inp_int8 = mge.tensor(inp_int8, dtype=inp_dtype) weight_int8 = mge.Parameter(weight_int8, dtype=weight_dtype) bias_int32 = mge.Parameter(bias_int32, dtype=bias_dtype) inp_fp32 = inp_int8.astype("float32") weight_fp32 = weight_int8.astype("float32") bias_fp32 = bias_int32.astype("float32") expected = F.conv_transpose2d( inp_fp32, weight_fp32, bias_fp32 if has_bias else None, stride=(SH, SW), padding=(PH, PW), dilation=(DH, DW), groups=groups, conv_mode=conv_mode, compute_mode=compute_mode, ) expected = dtype.convert_to_qint8(expected.numpy(), out_dtype) expected = dtype.convert_from_qint8(expected) conv_transpose2d = ConvTranspose2d( in_channels=IC, out_channels=OC, kernel_size=(KH, KW), stride=(SH, SW), padding=(PH, PW), dilation=(DH, DW), groups=groups, bias=has_bias, conv_mode=conv_mode, compute_mode=compute_mode, dtype=out_dtype, ) conv_transpose2d.weight = mge.Parameter(weight_int8) if has_bias: conv_transpose2d.bias = mge.Parameter(bias_int32) result = conv_transpose2d.forward(inp_int8).numpy() result = dtype.convert_from_qint8(result) np.testing.assert_allclose(result, expected, atol=out_scale)
def test_conv_bias(): inp_scale = 1.5 w_scale = 2.5 outp_scale = 1.5 inp_dtype = dtype.qint8(inp_scale) w_dtype = dtype.qint8(w_scale) b_dtype = dtype.qint32(inp_scale * w_scale) out_dtype = dtype.qint8(outp_scale) def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, nonlinear_mode="identity", ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") def convert_to_nchw4(var): var = F.reshape(var, (var.shape[0], var.shape[1] // 4, 4, var.shape[2], var.shape[3])) var = F.transpose(var, (0, 1, 3, 4, 2)) return var def run_conv2d(inp, w, b): O = F.conv2d( inp, w, b if has_bias else None, stride=(SH, SW), padding=(PH, PW), ) if nonlinear_mode == "relu": return F.relu(O) else: return O def run_conv_bias(inp, w, b, format="NCHW"): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) if format == "NCHW4": inp = convert_to_nchw4(inp) w = convert_to_nchw4(w) b = convert_to_nchw4(b) return F.quantized.conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, nonlinear_mode=nonlinear_mode, ) format = "NCHW4" if is_cuda_available() else "NCHW" expected = run_conv2d(inp_fp32, w_fp32, b_fp32) expected = expected.astype(out_dtype).astype("float32") result = run_conv_bias(inp_int8, w_int8, b_int32, format=format).astype("float32") if format == "NCHW4": result = F.transpose(result, (0, 1, 4, 2, 3)) expected = F.flatten(expected) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale) run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1, False) run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1, False) run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False) run(1, 4, 4, 24, 33, 1, 1, 2, 3, 1, 1) run(10, 12, 24, 46, 46, 1, 1, 2, 1, 3, 1) run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2) run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "relu") run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "relu")
def test_batch_conv_bias(): inp_scale = 1.5 w_scale = 2.5 outp_scale = 1.5 inp_dtype = dtype.qint8(inp_scale) w_dtype = dtype.qint8(w_scale) b_dtype = dtype.qint32(inp_scale * w_scale) out_dtype = dtype.qint8(outp_scale) def run( N, IC, OC, IH, IW, KH, KW, PH, PW, SH, SW, has_bias=True, ): inp_v = np.random.normal(size=(N, IC, IH, IW)) w_v = np.random.normal(size=(N, OC, IC, KH, KW)) b_v = np.random.normal(size=(1, OC, 1, 1)) inp_scale = dtype.get_scale(inp_dtype) w_scale = dtype.get_scale(w_dtype) b_scale = dtype.get_scale(b_dtype) inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype) wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype) bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype) inp_int8 = tensor(inpv, dtype=inp_dtype) w_int8 = Parameter(wv, dtype=w_dtype) b_int32 = Parameter(bv, dtype=b_dtype) inp_fp32 = inp_int8.astype("float32") w_fp32 = w_int8.astype("float32") b_fp32 = b_int32.astype("float32") def run_batch_conv_bias(inp, w, b): b = b if has_bias else Parameter(np.zeros_like(b.numpy())) result = F.quantized.batch_conv_bias_activation( inp, w, b, stride=(SH, SW), padding=(PH, PW), dtype=out_dtype, ) return result.astype("float32") expected = F.conv2d(inp_fp32, w_fp32[0], b_fp32 if has_bias else None)[0] expected = expected.astype(out_dtype).astype("float32") expected = F.flatten(expected) result = run_batch_conv_bias(inp_int8, w_int8, b_int32) result = F.flatten(result) np.testing.assert_allclose(result.numpy(), expected.numpy(), atol=outp_scale) run(1, 4, 4, 5, 5, 3, 3, 0, 0, 1, 1, True)
from test.utils import ConvOpr, dump_mge_model import megengine as mge import numpy as np from megengine.core.tensor import dtype from megengine.quantization.quantize import quantize_qat from megengine.traced_module import trace_module if __name__ == "__main__": net = ConvOpr("normal") traced_module = trace_module(net, mge.tensor(net.data)) mge.save(traced_module, "float_model.tm") dump_mge_model(net, net.data, "float_model") qat_net = quantize_qat(net) inp_dtype = dtype.qint8(16.0 / 128) data = mge.tensor(np.random.random((1, 3, 224, 224))) * 16 data = data.astype(inp_dtype) inp = mge.tensor(dtype.convert_from_qint8(data.numpy())) inp.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype)) inp.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"] qat_module = trace_module(qat_net, inp) mge.save(qat_module, "qat_model.tm")