Ejemplo n.º 1
0
def quantize(data, shift_bits, target_bits=relay.const(7, dtype='int32')):
    """Quantize output of layer, to be consistent with source code @yx

    Question: should the shift_bits participating to network control flow?
            At mxnet quantization with truman's code, the bits number of max_v
            is converted to normal interger using function `asscalar()`. However,
            I cannot find the related function in relay.
            I am confused with the control flow logic in model network, whether
            the condition `shift_bits == -1` should join in model network or just
            left it in python code flow. By Longtao.Wang

    Parameters
    ----------
    shift_bits: tvm.relay.Expr
        The shift_bits parameter is never used according to @yx's source code,
        which always be constant Expr(-1).
    """
    max_v = relay.max(relay.abs(data))
    min_v = relay.min(data)

    ln_max_v = relay.log(relay.cast(max_v, 'float32'))
    ln_2 = relay.log(relay.const(2.))
    total_bits = relay.ceil(relay.divide(ln_max_v, ln_2)) # ceil( ln(max_v) / ln(2) )
    shift_bits = relay.subtract(total_bits.astype('int32'), target_bits)
    shift_bits = relay.maximum(shift_bits, relay.const(0))

    denominator = relay.left_shift(relay.const(1),
            relay.cast(shift_bits, 'int32'))
    out = relay.divide(data, denominator)
    # According to @yx's code, use divide operation instead of shift op for
    # possible negative number round.
    # out = relay.right_shift(data, shift_bits)

    out = relay.cast(relay.clip(out, a_min=-128, a_max=127), 'int8')
    return out, max_v, min_v, shift_bits
Ejemplo n.º 2
0
def test_tokenize_inf():
    x = relay.var("x", shape=(3, 4), dtype="float32")
    y = relay.clip(x, -np.inf, np.inf)

    f = relay.Function([x], y)
    mod = tvm.IRModule.from_expr(f)

    mod = relay.transform.AnnotateSpans()(mod)
Ejemplo n.º 3
0
def test_clip_type():
    ib = relay.ir_builder.IRBuilder()
    a = ib.param("a", relay.TensorType((10, 4), "float32"))
    with ib.function(a) as func:
        ib.ret(relay.clip(a, 1., 4.))
    ib.ret(func)
    func = relay.ir_pass.infer_type(ib.env, func.to_func())
    ftype = func.checked_type
    assert ftype.ret_type == relay.TensorType((10, 4), "float32")
Ejemplo n.º 4
0
def test_clip():
    a = relay.var('a', relay.TensorType((10, 4), 'float32'))
    y = relay.clip(a, 1.0, 4.0)
    yy = run_infer_type(y)
    assert (yy.checked_type == relay.TensorType((10, 4), 'float32'))
    data = np.random.rand(10, 4).astype('float32')
    intrp = create_executor()
    op_res = intrp.evaluate(y, {a: relay.const(data)})
    ref_res = np.clip(data, 1.0, 4.0)
    np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=0.01)
Ejemplo n.º 5
0
def test_clip():
    a = relay.var("a", relay.TensorType((10, 4), "float32"))
    y = relay.clip(a, 1., 4.)
    yy = relay.ir_pass.infer_type(y)
    assert yy.checked_type == relay.TensorType((10, 4), "float32")

    data = np.random.rand(10, 4).astype('float32')
    intrp = create_executor()
    op_res = intrp.evaluate(y, { a: relay.const(data) })
    ref_res = np.clip(data, 1., 4.)
    np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=0.01)
Ejemplo n.º 6
0
    def make_qgraph(data, weight):
        out = data * relay.const(32.0)
        out = relay.round(out)
        out = relay.clip(out, a_min=-127, a_max=127)
        out = out.astype('int8')

        out = relay.nn.conv2d(out, weight, kernel_size=(3, 3),
                              padding=(1, 1), channels=c, out_dtype='int32')
        out = out.astype('float32')
        out = relay.multiply(out, relay.const(0.00024414062))
        out = relay.Function(relay.ir_pass.free_vars(out), out)
        return out
Ejemplo n.º 7
0
    def test_clip(self):
        data = relay.var("data", relay.TensorType((-1, 6, 4, 4), "float32"))

        net = relay.clip(data, 0.0, 7.0)
        net = relay.Function([data], net)
        mod = tvm.IRModule.from_expr(net)
        mod = relay.transform.InferType()(mod)

        xgraph = xf_relay.from_relay(mod, {})
        layers = xgraph.get_layers()

        assert layers[0].type[0] == "Input"
        assert layers[1].type[0] == "Clip"
        assert layers[1].attrs["a_min"] == 0.0
        assert layers[1].attrs["a_max"] == 7.0
Ejemplo n.º 8
0
 def verify_clip(dshape, a_min, a_max, dtype="float32"):
     x = relay.var("x", relay.ty.TensorType(dshape, dtype))
     y = relay.clip(x, a_min, a_max)
     func = relay.Function([x], y)
     x_data = np.random.uniform(size=dshape).astype(dtype)
     verify_results(func, [x_data], "test_clip", rtol=1e-5, atol=1e-5)
Ejemplo n.º 9
0
def _get_model(shape, dtype, a_min, a_max):
    a = relay.var("a", shape=shape, dtype=dtype)
    relu = relay.clip(a, a_min=a_min, a_max=a_max)
    return relu
Ejemplo n.º 10
0
def _get_model(shape, dtype, a_min, a_max):
    assert a_min >= np.iinfo(dtype).min and a_max <= np.iinfo(dtype).max
    a = relay.var("a", shape=shape, dtype=dtype)
    relu = relay.clip(a, a_min=a_min, a_max=a_max)
    return relu
Ejemplo n.º 11
0
 def test_clip(x_shape=(1, 8, 3, 3)):
     x = relay.var('x', shape=(x_shape), dtype='float32')
     out = relay.clip(x, a_min=-0.2, a_max=0.4)
     f = relay.Function([x], out)
     return f, {'x': x_shape}
Ejemplo n.º 12
0
 def get_graph(x_shape=(1, 8, 3, 3)):
     x = relay.var("x", shape=(x_shape), dtype=dtype)
     out = relay.clip(x, a_min=-0.2, a_max=0.4)
     f = tvm.IRModule.from_expr(out)
     return f, {"x": x_shape}, []
Ejemplo n.º 13
0
def hardswish(x, out_dtype="float16"):
    return x * (
        relay.clip(x + relay.const(3, dtype=out_dtype), a_min=0, a_max=6)
        / relay.const(6, dtype=out_dtype)
    )
Ejemplo n.º 14
0
def test_clip_type():
    a = relay.var("a", relay.TensorType((10, 4), "float32"))
    y = relay.clip(a, 1., 4.)
    yy = relay.ir_pass.infer_type(y)
    assert yy.checked_type == relay.TensorType((10, 4), "float32")
Ejemplo n.º 15
0
def create_qnn_conv2d(qnn_conv2d_params, ifm_expr):
    """Create a relay.Expr of relay.qnn.conv2D given the parameters"""
    v_params = list()
    params = {
        "kernel_size": [
            qnn_conv2d_params.kernel.get_dim_size("H"),
            qnn_conv2d_params.kernel.get_dim_size("W"),
        ],
        "strides":
        [qnn_conv2d_params.strides[0], qnn_conv2d_params.strides[1]],
        "dilation":
        [qnn_conv2d_params.dilation[0], qnn_conv2d_params.dilation[1]],
        "padding": [0, 0, 0, 0],
        "data_layout":
        qnn_conv2d_params.ifm.layout,
    }
    dilated_kernel_h = (qnn_conv2d_params.dilation[0] *
                        (qnn_conv2d_params.kernel.get_dim_size("H") - 1) + 1)
    dilated_kernel_w = (qnn_conv2d_params.dilation[1] *
                        (qnn_conv2d_params.kernel.get_dim_size("W") - 1) + 1)
    if qnn_conv2d_params.pad == "SAME":
        pad_top, pad_bottom = get_pad_value(
            qnn_conv2d_params.ifm.get_dim_size("H"), dilated_kernel_h,
            qnn_conv2d_params.strides[0])
        pad_left, pad_right = get_pad_value(
            qnn_conv2d_params.ifm.get_dim_size("W"), dilated_kernel_w,
            qnn_conv2d_params.strides[1])
        do_pad = not (pad_top == 0 and pad_bottom == 0 and pad_left == 0
                      and pad_right == 0)
        if do_pad:
            params["padding"] = [pad_top, pad_left, pad_bottom, pad_right]
    qnn_conv2d_params.pad = params["padding"]
    params["input_zero_point"] = qnn_conv2d_params.ifm.zp
    params["kernel_zero_point"] = qnn_conv2d_params.kernel.zp
    params["out_dtype"] = "int32"
    params["input_scale"] = qnn_conv2d_params.ifm.sc
    params["kernel_scale"] = qnn_conv2d_params.kernel.sc
    params["channels"] = int(qnn_conv2d_params.kernel.get_dim_size("O"))
    params["kernel_layout"] = qnn_conv2d_params.kernel.layout
    k_shape = qnn_conv2d_params.kernel.shape
    k_dtype = qnn_conv2d_params.kernel.dtype
    w = tvm.nd.array(
        np.random.randint(np.iinfo(k_dtype).min,
                          high=np.iinfo(k_dtype).max,
                          size=k_shape,
                          dtype=k_dtype))
    weight_expr = relay.const(w, k_dtype)
    v_params.append(w)
    qnn_conv2d_expr = qnn.op.conv2d(ifm_expr, weight_expr, **params)
    b = tvm.nd.array(
        np.random.randint(0,
                          high=10,
                          size=(qnn_conv2d_params.kernel.get_dim_size("O")),
                          dtype="int32"))
    v_params.append(b)
    bias_expr = relay.const(b, "int32")
    bias = relay.nn.bias_add(qnn_conv2d_expr,
                             bias_expr,
                             axis=qnn_conv2d_params.ifm.get_dim_index("C"))
    bias_scale = relay.const(
        qnn_conv2d_params.ifm.sc.data.asnumpy() *
        qnn_conv2d_params.kernel.sc.data.asnumpy(),
        "float32",
    )
    req_expr = relay.qnn.op.requantize(
        bias,
        bias_scale,  # input zero scale
        relay.const(0, "int32"),  # input zero point
        qnn_conv2d_params.ofm.sc,  # output zero scale
        qnn_conv2d_params.ofm.zp,  # output zero point
        out_dtype=qnn_conv2d_params.ofm.dtype,
    )
    if qnn_conv2d_params.activation != "NONE":
        assert qnn_conv2d_params.activation == "CLIP"
        clip_expr = relay.clip(req_expr, qnn_conv2d_params.clip_min,
                               qnn_conv2d_params.clip_max)
        return clip_expr, v_params

    return req_expr, v_params
Ejemplo n.º 16
0
def test_clip():
    shape = (10, 10)
    x = relay.var('x', shape=shape)
    y = relay.clip(x, a_min=0.0, a_max=1.0)
    func = relay.Function([x], y)
    _construct_model(func)
Ejemplo n.º 17
0
 def get_graph(x_shape=(1, 8, 3, 3)):
     x = relay.var("x", shape=(x_shape), dtype="float32")
     out = relay.clip(x, a_min=-0.2, a_max=0.4)
     f = relay.Function([x], out)
     return f, {"x": x_shape}, []
Ejemplo n.º 18
0
def get_conv2d_nchw_bias_hardswish(d_shape, w_shape, padding, out_dtype="float16"):
    conv2d_out = get_conv2d_nchw_bias(d_shape, w_shape, padding, out_dtype=out_dtype)
    return conv2d_out * (
        relay.clip(conv2d_out + relay.const(3, dtype=out_dtype), a_min=0, a_max=6)
        / relay.const(6, dtype=out_dtype)
    )