def quantize(data, shift_bits, target_bits=relay.const(7, dtype='int32')): """Quantize output of layer, to be consistent with source code @yx Question: should the shift_bits participating to network control flow? At mxnet quantization with truman's code, the bits number of max_v is converted to normal interger using function `asscalar()`. However, I cannot find the related function in relay. I am confused with the control flow logic in model network, whether the condition `shift_bits == -1` should join in model network or just left it in python code flow. By Longtao.Wang Parameters ---------- shift_bits: tvm.relay.Expr The shift_bits parameter is never used according to @yx's source code, which always be constant Expr(-1). """ max_v = relay.max(relay.abs(data)) min_v = relay.min(data) ln_max_v = relay.log(relay.cast(max_v, 'float32')) ln_2 = relay.log(relay.const(2.)) total_bits = relay.ceil(relay.divide(ln_max_v, ln_2)) # ceil( ln(max_v) / ln(2) ) shift_bits = relay.subtract(total_bits.astype('int32'), target_bits) shift_bits = relay.maximum(shift_bits, relay.const(0)) denominator = relay.left_shift(relay.const(1), relay.cast(shift_bits, 'int32')) out = relay.divide(data, denominator) # According to @yx's code, use divide operation instead of shift op for # possible negative number round. # out = relay.right_shift(data, shift_bits) out = relay.cast(relay.clip(out, a_min=-128, a_max=127), 'int8') return out, max_v, min_v, shift_bits
def test_tokenize_inf(): x = relay.var("x", shape=(3, 4), dtype="float32") y = relay.clip(x, -np.inf, np.inf) f = relay.Function([x], y) mod = tvm.IRModule.from_expr(f) mod = relay.transform.AnnotateSpans()(mod)
def test_clip_type(): ib = relay.ir_builder.IRBuilder() a = ib.param("a", relay.TensorType((10, 4), "float32")) with ib.function(a) as func: ib.ret(relay.clip(a, 1., 4.)) ib.ret(func) func = relay.ir_pass.infer_type(ib.env, func.to_func()) ftype = func.checked_type assert ftype.ret_type == relay.TensorType((10, 4), "float32")
def test_clip(): a = relay.var('a', relay.TensorType((10, 4), 'float32')) y = relay.clip(a, 1.0, 4.0) yy = run_infer_type(y) assert (yy.checked_type == relay.TensorType((10, 4), 'float32')) data = np.random.rand(10, 4).astype('float32') intrp = create_executor() op_res = intrp.evaluate(y, {a: relay.const(data)}) ref_res = np.clip(data, 1.0, 4.0) np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=0.01)
def test_clip(): a = relay.var("a", relay.TensorType((10, 4), "float32")) y = relay.clip(a, 1., 4.) yy = relay.ir_pass.infer_type(y) assert yy.checked_type == relay.TensorType((10, 4), "float32") data = np.random.rand(10, 4).astype('float32') intrp = create_executor() op_res = intrp.evaluate(y, { a: relay.const(data) }) ref_res = np.clip(data, 1., 4.) np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=0.01)
def make_qgraph(data, weight): out = data * relay.const(32.0) out = relay.round(out) out = relay.clip(out, a_min=-127, a_max=127) out = out.astype('int8') out = relay.nn.conv2d(out, weight, kernel_size=(3, 3), padding=(1, 1), channels=c, out_dtype='int32') out = out.astype('float32') out = relay.multiply(out, relay.const(0.00024414062)) out = relay.Function(relay.ir_pass.free_vars(out), out) return out
def test_clip(self): data = relay.var("data", relay.TensorType((-1, 6, 4, 4), "float32")) net = relay.clip(data, 0.0, 7.0) net = relay.Function([data], net) mod = tvm.IRModule.from_expr(net) mod = relay.transform.InferType()(mod) xgraph = xf_relay.from_relay(mod, {}) layers = xgraph.get_layers() assert layers[0].type[0] == "Input" assert layers[1].type[0] == "Clip" assert layers[1].attrs["a_min"] == 0.0 assert layers[1].attrs["a_max"] == 7.0
def verify_clip(dshape, a_min, a_max, dtype="float32"): x = relay.var("x", relay.ty.TensorType(dshape, dtype)) y = relay.clip(x, a_min, a_max) func = relay.Function([x], y) x_data = np.random.uniform(size=dshape).astype(dtype) verify_results(func, [x_data], "test_clip", rtol=1e-5, atol=1e-5)
def _get_model(shape, dtype, a_min, a_max): a = relay.var("a", shape=shape, dtype=dtype) relu = relay.clip(a, a_min=a_min, a_max=a_max) return relu
def _get_model(shape, dtype, a_min, a_max): assert a_min >= np.iinfo(dtype).min and a_max <= np.iinfo(dtype).max a = relay.var("a", shape=shape, dtype=dtype) relu = relay.clip(a, a_min=a_min, a_max=a_max) return relu
def test_clip(x_shape=(1, 8, 3, 3)): x = relay.var('x', shape=(x_shape), dtype='float32') out = relay.clip(x, a_min=-0.2, a_max=0.4) f = relay.Function([x], out) return f, {'x': x_shape}
def get_graph(x_shape=(1, 8, 3, 3)): x = relay.var("x", shape=(x_shape), dtype=dtype) out = relay.clip(x, a_min=-0.2, a_max=0.4) f = tvm.IRModule.from_expr(out) return f, {"x": x_shape}, []
def hardswish(x, out_dtype="float16"): return x * ( relay.clip(x + relay.const(3, dtype=out_dtype), a_min=0, a_max=6) / relay.const(6, dtype=out_dtype) )
def test_clip_type(): a = relay.var("a", relay.TensorType((10, 4), "float32")) y = relay.clip(a, 1., 4.) yy = relay.ir_pass.infer_type(y) assert yy.checked_type == relay.TensorType((10, 4), "float32")
def create_qnn_conv2d(qnn_conv2d_params, ifm_expr): """Create a relay.Expr of relay.qnn.conv2D given the parameters""" v_params = list() params = { "kernel_size": [ qnn_conv2d_params.kernel.get_dim_size("H"), qnn_conv2d_params.kernel.get_dim_size("W"), ], "strides": [qnn_conv2d_params.strides[0], qnn_conv2d_params.strides[1]], "dilation": [qnn_conv2d_params.dilation[0], qnn_conv2d_params.dilation[1]], "padding": [0, 0, 0, 0], "data_layout": qnn_conv2d_params.ifm.layout, } dilated_kernel_h = (qnn_conv2d_params.dilation[0] * (qnn_conv2d_params.kernel.get_dim_size("H") - 1) + 1) dilated_kernel_w = (qnn_conv2d_params.dilation[1] * (qnn_conv2d_params.kernel.get_dim_size("W") - 1) + 1) if qnn_conv2d_params.pad == "SAME": pad_top, pad_bottom = get_pad_value( qnn_conv2d_params.ifm.get_dim_size("H"), dilated_kernel_h, qnn_conv2d_params.strides[0]) pad_left, pad_right = get_pad_value( qnn_conv2d_params.ifm.get_dim_size("W"), dilated_kernel_w, qnn_conv2d_params.strides[1]) do_pad = not (pad_top == 0 and pad_bottom == 0 and pad_left == 0 and pad_right == 0) if do_pad: params["padding"] = [pad_top, pad_left, pad_bottom, pad_right] qnn_conv2d_params.pad = params["padding"] params["input_zero_point"] = qnn_conv2d_params.ifm.zp params["kernel_zero_point"] = qnn_conv2d_params.kernel.zp params["out_dtype"] = "int32" params["input_scale"] = qnn_conv2d_params.ifm.sc params["kernel_scale"] = qnn_conv2d_params.kernel.sc params["channels"] = int(qnn_conv2d_params.kernel.get_dim_size("O")) params["kernel_layout"] = qnn_conv2d_params.kernel.layout k_shape = qnn_conv2d_params.kernel.shape k_dtype = qnn_conv2d_params.kernel.dtype w = tvm.nd.array( np.random.randint(np.iinfo(k_dtype).min, high=np.iinfo(k_dtype).max, size=k_shape, dtype=k_dtype)) weight_expr = relay.const(w, k_dtype) v_params.append(w) qnn_conv2d_expr = qnn.op.conv2d(ifm_expr, weight_expr, **params) b = tvm.nd.array( np.random.randint(0, high=10, size=(qnn_conv2d_params.kernel.get_dim_size("O")), dtype="int32")) v_params.append(b) bias_expr = relay.const(b, "int32") bias = relay.nn.bias_add(qnn_conv2d_expr, bias_expr, axis=qnn_conv2d_params.ifm.get_dim_index("C")) bias_scale = relay.const( qnn_conv2d_params.ifm.sc.data.asnumpy() * qnn_conv2d_params.kernel.sc.data.asnumpy(), "float32", ) req_expr = relay.qnn.op.requantize( bias, bias_scale, # input zero scale relay.const(0, "int32"), # input zero point qnn_conv2d_params.ofm.sc, # output zero scale qnn_conv2d_params.ofm.zp, # output zero point out_dtype=qnn_conv2d_params.ofm.dtype, ) if qnn_conv2d_params.activation != "NONE": assert qnn_conv2d_params.activation == "CLIP" clip_expr = relay.clip(req_expr, qnn_conv2d_params.clip_min, qnn_conv2d_params.clip_max) return clip_expr, v_params return req_expr, v_params
def test_clip(): shape = (10, 10) x = relay.var('x', shape=shape) y = relay.clip(x, a_min=0.0, a_max=1.0) func = relay.Function([x], y) _construct_model(func)
def get_graph(x_shape=(1, 8, 3, 3)): x = relay.var("x", shape=(x_shape), dtype="float32") out = relay.clip(x, a_min=-0.2, a_max=0.4) f = relay.Function([x], out) return f, {"x": x_shape}, []
def get_conv2d_nchw_bias_hardswish(d_shape, w_shape, padding, out_dtype="float16"): conv2d_out = get_conv2d_nchw_bias(d_shape, w_shape, padding, out_dtype=out_dtype) return conv2d_out * ( relay.clip(conv2d_out + relay.const(3, dtype=out_dtype), a_min=0, a_max=6) / relay.const(6, dtype=out_dtype) )