예제 #1
0
def _check_result_attr(oup, dtype, dtype_str, is_unsigned=True):
    metadata = _metadata_dict[dtype_str]
    assert "mgb_dtype" in oup.dtype.metadata
    assert is_quantize(oup.dtype)
    np.testing.assert_equal(oup.dtype.metadata["mgb_dtype"]["name"], metadata.name)
    np.testing.assert_allclose(get_scale(oup.dtype), get_scale(dtype))
    if is_unsigned:
        np.testing.assert_equal(get_zero_point(oup.dtype), get_zero_point(dtype))
예제 #2
0
    def run(
        N,
        IC,
        OC,
        IH,
        IW,
        KH,
        KW,
        PH,
        PW,
        SH,
        SW,
        has_bias=True,
    ):
        inp_v = np.random.normal(size=(N, IC, IH, IW))
        w_v = np.random.normal(size=(N, OC, IC, KH, KW))
        b_v = np.random.normal(size=(1, OC, 1, 1))
        inp_scale = dtype.get_scale(inp_dtype)
        w_scale = dtype.get_scale(w_dtype)
        b_scale = dtype.get_scale(b_dtype)

        inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype)
        wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype)
        bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype)

        inp_int8 = tensor(inpv, dtype=inp_dtype)
        w_int8 = Parameter(wv, dtype=w_dtype)
        b_int32 = Parameter(bv, dtype=b_dtype)

        inp_fp32 = inp_int8.astype("float32")
        w_fp32 = w_int8.astype("float32")
        b_fp32 = b_int32.astype("float32")

        def run_batch_conv_bias(inp, w, b):
            b = b if has_bias else Parameter(np.zeros_like(b.numpy()))
            result = F.quantized.batch_conv_bias_activation(
                inp,
                w,
                b,
                stride=(SH, SW),
                padding=(PH, PW),
                dtype=out_dtype,
            )
            return result.astype("float32")

        expected = F.conv2d(inp_fp32, w_fp32[0],
                            b_fp32 if has_bias else None)[0]
        expected = expected.astype(out_dtype).astype("float32")
        expected = F.flatten(expected)

        result = run_batch_conv_bias(inp_int8, w_int8, b_int32)
        result = F.flatten(result)

        np.testing.assert_allclose(result.numpy(),
                                   expected.numpy(),
                                   atol=outp_scale)
예제 #3
0
def test_as_type():
    x = TensorWrapper([1, 2, 3], dtype=np.float32)
    y = x.astype(qint8(0.1))
    np.testing.assert_almost_equal(get_scale(y.dtype), 0.1)
    z = y.astype(qint8(0.2))
    np.testing.assert_almost_equal(get_scale(z.dtype), 0.2)
    a = z.astype(quint8(0.3, 127))
    np.testing.assert_almost_equal(get_scale(a.dtype), 0.3)
    np.testing.assert_equal(get_zero_point(a.dtype), 127)
    b = a.astype(quint8(0.3, 128))
    np.testing.assert_almost_equal(get_scale(b.dtype), 0.3)
    np.testing.assert_equal(get_zero_point(b.dtype), 128)
예제 #4
0
def test_dtype_qint8():
    dt = qint8(0.01)
    assert isinstance(dt, np.dtype)
    assert "mgb_dtype" in dt.metadata
    np.testing.assert_allclose(dt.metadata["mgb_dtype"]["scale"], 0.01)

    assert is_quantize(dt) == True
    np.testing.assert_allclose(get_scale(dt), 0.01)
예제 #5
0
def test_as_type(is_varnode):
    if is_varnode:
        network = Network()
    else:
        network = None

    x_np = np.array([1, 2, 3], dtype=np.float32)
    x = make_tensor(x_np, network)
    y = x.astype(qint8(0.1))
    np.testing.assert_almost_equal(get_scale(y.dtype), 0.1)
    z = y.astype(qint8(0.2))
    np.testing.assert_almost_equal(get_scale(z.dtype), 0.2)
    a = z.astype(quint8(0.3, 127))
    np.testing.assert_almost_equal(get_scale(a.dtype), 0.3)
    np.testing.assert_equal(get_zero_point(a.dtype), 127)
    b = a.astype(quint8(0.3, 128))
    np.testing.assert_almost_equal(get_scale(b.dtype), 0.3)
    np.testing.assert_equal(get_zero_point(b.dtype), 128)
예제 #6
0
def get_qat_net(inp_dtype, net, num_inp=1, shape=(1, 16, 32, 32)):
    qat_net = quantize_qat(net)
    inps = []
    for _ in range(num_inp):
        data1 = mge.tensor(np.random.random(shape)) * 16
        data1 = data1.astype(inp_dtype)
        inp1 = mge.tensor(dtype.convert_from_qint8(data1.numpy()))
        inp1.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype))
        inp1.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"]
        inps.append(inp1)
    return qat_net, inps
예제 #7
0
def get_qat_inputs_quint8(inp_dtype, num_inp=1, shape=(1, 16, 384, 512)):
    inps = []
    for _ in range(num_inp):
        data1 = mge.tensor(np.random.random(shape)) * 16
        data1 = data1.astype(inp_dtype)
        inp1 = mge.tensor(dtype.convert_from_quint8(data1.numpy()))
        inp1.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype))
        inp1.qparams.zero_point = mge.tensor(dtype.get_zero_point(inp_dtype))
        inp1.qparams.dtype_meta = dtype._builtin_quant_dtypes["quint8"]
        inps.append(inp1)
    return inps
예제 #8
0
def test_dtype_quint8():
    with pytest.raises(ValueError):
        blah = quint8(0.05, 0.233)
    with pytest.raises(ValueError):
        blah = quint8(0.02, 777)
    with pytest.raises(ValueError):
        blah = quint8(0.02, -1)
    dt = quint8(0.01, 135)
    assert isinstance(dt, np.dtype)
    assert "mgb_dtype" in dt.metadata
    np.testing.assert_allclose(dt.metadata["mgb_dtype"]["scale"], 0.01)
    np.testing.assert_equal(dt.metadata["mgb_dtype"]["zero_point"], 135)

    assert is_quantize(dt)
    np.testing.assert_allclose(get_scale(dt), 0.01)
    np.testing.assert_equal(get_zero_point(dt), 135)
예제 #9
0
def test_qat_conv_qint8():
    class QConvOpr(M.Module):
        def __init__(self):
            super().__init__()
            self.normal_conv = M.Conv2d(
                3,
                30,
                3,
                stride=(2, 3),
                padding=(3, 1),
                dilation=(2, 2),
            )
            self.normal_conv.bias = mge.Parameter(
                np.random.random(self.normal_conv.bias.shape).astype(
                    np.float32))

        def forward(self, x):
            x = self.normal_conv(x)
            return x

    net = QConvOpr()
    qat_net = quantize_qat(net)

    inp_dtype = dtype.qint8(16.0 / 128)
    data = mge.tensor(np.random.random((1, 3, 224, 224))) * 16
    data = data.astype(inp_dtype)
    inp = mge.tensor(dtype.convert_from_qint8(data.numpy()))
    inp.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype))
    inp.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"]

    traced_module, tm_result = get_traced_module(qat_net, inp)
    print(traced_module.flatten().graph)
    inp = inp.astype(inp_dtype)
    out_dtype = traced_module.graph.outputs[0].qparams
    scale = out_dtype.scale.numpy()
    _test_convert_result(
        inp,
        traced_module,
        tm_result,
        scale=scale,
        require_quantize=True,
        max_err=max_error,
    )
예제 #10
0
def test_qat_convrelu():
    net = ConvRelu2dOpr()
    qat_net = quantize_qat(net)
    inp_dtype = dtype.qint8(16.0 / 128)
    data = mge.tensor(np.random.random((1, 3, 224, 224))) * 16
    data = data.astype(inp_dtype)
    inp = mge.tensor(dtype.convert_from_qint8(data.numpy()))
    inp.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype))
    inp.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"]

    traced_module, tm_result = get_traced_module(qat_net, inp)
    inp = inp.astype(inp_dtype)
    out_dtype = traced_module.graph.outputs[0].qparams
    scale = out_dtype.scale.numpy()
    _test_convert_result(
        inp,
        traced_module,
        tm_result,
        scale=scale,
        require_quantize=True,
        max_err=max_error,
    )
예제 #11
0
    def run(
        N,
        IC,
        OC,
        IH,
        IW,
        KH,
        KW,
        PH,
        PW,
        SH,
        SW,
        has_bias=True,
        nonlinear_mode="identity",
    ):
        inp_v = np.random.normal(size=(N, IC, IH, IW))
        w_v = np.random.normal(size=(OC, IC, KH, KW))
        b_v = np.random.normal(size=(1, OC, 1, 1))
        inp_scale = dtype.get_scale(inp_dtype)
        w_scale = dtype.get_scale(w_dtype)
        b_scale = dtype.get_scale(b_dtype)

        inpv = dtype.convert_to_qint8(inp_v * inp_scale, inp_dtype)
        wv = dtype.convert_to_qint8(w_v * w_scale, w_dtype)
        bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype)

        inp_int8 = tensor(inpv, dtype=inp_dtype)
        w_int8 = Parameter(wv, dtype=w_dtype)
        b_int32 = Parameter(bv, dtype=b_dtype)

        inp_fp32 = inp_int8.astype("float32")
        w_fp32 = w_int8.astype("float32")
        b_fp32 = b_int32.astype("float32")

        def convert_to_nchw4(var):
            var = F.reshape(var, (var.shape[0], var.shape[1] // 4, 4,
                                  var.shape[2], var.shape[3]))
            var = F.transpose(var, (0, 1, 3, 4, 2))
            return var

        def run_conv2d(inp, w, b):
            O = F.conv2d(
                inp,
                w,
                b if has_bias else None,
                stride=(SH, SW),
                padding=(PH, PW),
            )
            if nonlinear_mode == "relu":
                return F.relu(O)
            else:
                return O

        def run_conv_bias(inp, w, b, format="NCHW"):
            b = b if has_bias else Parameter(np.zeros_like(b.numpy()))
            if format == "NCHW4":
                inp = convert_to_nchw4(inp)
                w = convert_to_nchw4(w)
                b = convert_to_nchw4(b)
            return F.quantized.conv_bias_activation(
                inp,
                w,
                b,
                stride=(SH, SW),
                padding=(PH, PW),
                dtype=out_dtype,
                nonlinear_mode=nonlinear_mode,
            )

        format = "NCHW4" if is_cuda_available() else "NCHW"

        expected = run_conv2d(inp_fp32, w_fp32, b_fp32)
        expected = expected.astype(out_dtype).astype("float32")
        result = run_conv_bias(inp_int8, w_int8, b_int32,
                               format=format).astype("float32")
        if format == "NCHW4":
            result = F.transpose(result, (0, 1, 4, 2, 3))
        expected = F.flatten(expected)
        result = F.flatten(result)
        np.testing.assert_allclose(result.numpy(),
                                   expected.numpy(),
                                   atol=outp_scale)
예제 #12
0
    def run(
        N,
        IC,
        OC,
        IH,
        IW,
        KH,
        KW,
        PH,
        PW,
        SH,
        SW,
        has_bias=True,
        nonlinear_mode="identity",
    ):
        inp_v = np.random.normal(size=(N, IC, IH, IW))
        w_v = np.random.normal(size=(OC, IC, KH, KW))
        b_v = np.random.normal(size=(1, OC, 1, 1))
        inp_scale = dtype.get_scale(inp_dtype)
        w_scale = dtype.get_scale(w_dtype)
        b_scale = dtype.get_scale(b_dtype)

        inpv = dtype.convert_to_quint4(inp_v * inp_scale, inp_dtype)
        wv = dtype.convert_to_qint4(w_v * w_scale, w_dtype)
        bv = dtype.convert_to_qint32(b_v * b_scale, b_dtype)

        inp_uint4 = mge.Tensor(inpv, dtype=inp_dtype)
        w_int4 = mge.Parameter(wv, dtype=w_dtype)
        b_int32 = mge.Parameter(bv, dtype=b_dtype)

        inp_fp32 = inp_uint4.astype("float32")
        w_fp32 = w_int4.astype("float32")
        b_fp32 = b_int32.astype("float32")

        def run_conv2d(inp, w, b):
            O = F.conv2d(
                inp,
                w,
                b if has_bias else None,
                stride=(SH, SW),
                padding=(PH, PW),
            )
            if nonlinear_mode == "relu":
                return F.relu(O)
            else:
                return O

        def run_conv_bias(inp, w, b):
            b = b if has_bias else mge.Parameter(np.zeros_like(b.numpy()))
            return F.quantized.conv_bias_activation(
                inp,
                w,
                b,
                stride=(SH, SW),
                padding=(PH, PW),
                dtype=out_dtype,
                nonlinear_mode=nonlinear_mode,
            )

        expected = run_conv2d(inp_fp32, w_fp32, b_fp32)
        expected = expected.astype(out_dtype).astype("float32")
        result = run_conv_bias(inp_uint4, w_int4, b_int32).astype("float32")
        expected = F.flatten(expected)
        result = F.flatten(result)
        np.testing.assert_allclose(result.numpy(),
                                   expected.numpy(),
                                   atol=outp_scale)
예제 #13
0
from test.utils import ConvOpr, dump_mge_model

import megengine as mge
import numpy as np
from megengine.core.tensor import dtype
from megengine.quantization.quantize import quantize_qat
from megengine.traced_module import trace_module

if __name__ == "__main__":
    net = ConvOpr("normal")
    traced_module = trace_module(net, mge.tensor(net.data))
    mge.save(traced_module, "float_model.tm")
    dump_mge_model(net, net.data, "float_model")

    qat_net = quantize_qat(net)
    inp_dtype = dtype.qint8(16.0 / 128)
    data = mge.tensor(np.random.random((1, 3, 224, 224))) * 16
    data = data.astype(inp_dtype)
    inp = mge.tensor(dtype.convert_from_qint8(data.numpy()))
    inp.qparams.scale = mge.tensor(dtype.get_scale(inp_dtype))
    inp.qparams.dtype_meta = dtype._builtin_quant_dtypes["qint8"]

    qat_module = trace_module(qat_net, inp)
    mge.save(qat_module, "qat_model.tm")