def expected(): x = relay.var("x", shape=[8, 8, 8, 8]) w = relay.var("w", shape=[8, 8, 3, 3]) x_fp16 = relay.cast(x, "float16") w_fp16 = relay.cast(w, "float16") c = relay.nn.conv2d(x_fp16, w_fp16, padding=(1, 1), out_dtype="float16") c_float32 = relay.cast(c, "float32") c_float16 = relay.cast(c_float32, "float16") r = relay.nn.relu(c_float16) r_float32 = relay.cast(r, "float32") return relay.Function([x, w], r_float32)
def test_let_statement_simple(): """A 'simple' let statement example. Noticeable is the mutation of the bound variable types. """ var1 = relay.var("var1", shape=[1, 20]) var2 = relay.var("var2", shape=[1, 20]) data = relay.var("data", shape=[1, 20]) weight = relay.var("weight", shape=[20, 20]) r1 = var1 + var1 r2 = var2 + var2 let2 = relay.Let(var2, relay.nn.dense(r1, weight, units=20), r2) let1 = relay.Let(var1, relay.nn.dense(data, weight, units=20), let2) mod = tvm.IRModule.from_expr(let1) mod_params = { "data": np.random.uniform(-1, 1, size=[1, 20]).astype("float32"), "weight": np.random.uniform(-1, 1, size=[20, 20]).astype("float32"), } output_mod = verify_mixed_precision_output_close(mod, mod_params, atol=0.01, rtol=0.01) # Construct expected structure var1 = relay.var("var1", shape=[1, 20], dtype="float16") var2 = relay.var("var2", shape=[1, 20], dtype="float16") data = relay.cast(relay.var("data", shape=[1, 20]), "float16") weight = relay.cast(relay.var("weight", shape=[20, 20]), "float16") r1 = var1 + var1 r2 = var2 + var2 let2 = relay.Let( var2, relay.cast(relay.nn.dense(r1, weight, units=20, out_dtype="float32"), "float16"), r2, ) let1 = relay.Let( var1, relay.cast(relay.nn.dense(data, weight, units=20, out_dtype="float32"), "float16"), let2, ) expected_mod = tvm.IRModule.from_expr(let1) expected_mod = InferType()(expected_mod) assert tvm.ir.structural_equal(expected_mod, output_mod)
def qnn_conv2d_transpose_legalize(attrs, inputs, types): """Convert kernel and data to int16, subtract offsets upfront and calls into relay.nn.conv2d_transpose.""" # Collect the input exprs. data, kernel, input_zero_point, kernel_zero_point, _, _ = inputs shift_data = relay.subtract( relay.cast(data, dtype="int16"), relay.cast(input_zero_point, "int16") ) shift_kernel = relay.subtract( relay.cast(kernel, dtype="int16"), relay.cast(kernel_zero_point, "int16") ) return relay.nn.conv2d_transpose(shift_data, shift_kernel, **attrs)
def _get_model(shape, typef, sizes, strides, pads, layout, dtype): """Return a model and any parameters it may have""" req = relay.var("a", shape=shape, dtype=dtype) if typef == relay.nn.avg_pool2d: req = relay.cast(req, "int32") req = typef(req, pool_size=sizes, strides=strides, padding=pads, ceil_mode=True, layout=layout) if typef == relay.nn.avg_pool2d: req = relay.cast(req, dtype) return req
def _shift(data, zero_point, out_dtype): """Shifts (adds/subtracts) the qnn tensor by 128)""" if out_dtype == "uint8": shift = 128 elif out_dtype == "int8": shift = -128 else: raise ValueError("Unsupported out dtype.") data_modified = relay.cast(data, "int32") data_modified = relay.add(data_modified, relay.const(shift, "int32")) data_modified = relay.cast(data_modified, out_dtype) zero_point_val = get_scalar_from_constant(zero_point) zero_point_modified = relay.const(zero_point_val + shift, "int32") return (data_modified, zero_point_modified)
def _get_pooling_model(shape, dtype, typef, sizes, strides, dilation, padding, ceil_mode, count_include_pad, var_names): """Return a model and any parameters it may have.""" if len(padding) == 2: padding = (padding[0], padding[1], padding[0], padding[1]) out = relay.var(next(var_names), shape=shape, dtype=dtype) if typef == "nn.max_pool2d": out = relay.nn.max_pool2d( out, pool_size=sizes, strides=strides, dilation=dilation, padding=padding, ceil_mode=ceil_mode, layout="NHWC", ) elif typef == "nn.avg_pool2d": if dtype == "uint8": out = relay.cast(out, "int32") out = relay.nn.avg_pool2d( out, pool_size=sizes, strides=strides, dilation=dilation, padding=padding, ceil_mode=ceil_mode, count_include_pad=count_include_pad, layout="NHWC", ) if dtype == "uint8": out = relay.cast(out, "uint8") elif typef == "nn.l2_pool2d": out = relay.power(out, relay.const(2.0)) out = relay.nn.avg_pool2d( out, pool_size=sizes, strides=strides, padding=padding, ceil_mode=ceil_mode, count_include_pad=count_include_pad, layout="NHWC", ) out = relay.sqrt(out) else: raise ValueError("Function not supported") return out
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) bias = relay.var("bias", shape=(64, )) weight = relay.var("weight", shape=(64, 64, 3, 3)) y = relay.layout_transform(x, "NCHW", "NCHW16c") w = relay.layout_transform(weight, "OIHW", "OIHW16i") y = relay.nn.conv2d(y, w, channels=64, kernel_size=(3, 3), padding=(1, 1), kernel_layout="OIHW16i", data_layout="NCHW16c") b = relay.expand_dims(bias, axis=1, num_newaxis=2) b = relay.expand_dims(b, axis=0, num_newaxis=1) b = relay.layout_transform(b, "NCHW", "NCHW16c") y = relay.add(y, b) y = relay.nn.relu(y) y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout="NCHW16c") y = relay.cast(y, 'int32') y = relay.layout_transform(y, "NCHW16c", "NCHW") y = relay.nn.batch_flatten(y) y = relay.Function(analysis.free_vars(y), y) return y
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 3, 3, 64)) weight2 = relay.var("weight2", shape=(64, 3, 3, 64), dtype='int8') x = relay.layout_transform(x, 'NCHW', 'NHWC') weight1 = relay.layout_transform(weight1, 'OHWI', 'HWIO') out = relay.nn.conv2d(x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NHWC', kernel_layout='HWIO') out = relay.cast(out, 'int8') out = relay.layout_transform(out, 'NHWC', 'NCHW') weight2 = relay.layout_transform(weight2, 'OHWI', 'OIHW') out = relay.qnn.op.conv2d(out, weight2, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NCHW', kernel_layout='OIHW') out = relay.Function(analysis.free_vars(out), out) return out
def before(N, CI, H, W, CO, KH, KW, layout): if layout == "NCHW": data_shape = (N, CI, H, W) weight_shape = (CO, CI, KH, KW) kernel_layout = "OIHW" else: data_shape = (N, H, W, CI) weight_shape = (KH, KW, CI, CO) kernel_layout = "HWIO" bias_shape = (CO,) data = relay.var("data", shape=data_shape, dtype="float32") offset = relay.var("offset") weight = relay.var("weight", shape=weight_shape, dtype="float32") bias = relay.var("bias", shape=bias_shape, dtype="float32") y = relay.nn.deformable_conv2d( data, offset, weight, kernel_size=(KH, KW), channels=CO, data_layout=layout, kernel_layout=kernel_layout, ) y = relay.nn.bias_add(y, bias, axis=-1 if layout == "NHWC" else 1) y = relay.nn.relu(y) y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout=layout) y = relay.cast(y, "int32") y = relay.nn.batch_flatten(y) y = relay.Function(analysis.free_vars(y), y) return y
def _get_global_pooling_model(shape, dtype, typef, var_names): """Return a model and any parameters it may have.""" out = relay.var(next(var_names), shape=shape, dtype=dtype) if typef == "nn.global_max_pool2d": out = relay.nn.global_max_pool2d(out, layout="NHWC") elif typef == "nn.global_avg_pool2d": if dtype == "uint8": out = relay.cast(out, "int32") out = relay.nn.global_avg_pool2d(out, layout="NHWC") if dtype == "uint8": out = relay.cast(out, "uint8") else: raise ValueError("Function not supported") return out
def test_convert_follow_node_with_integer_arguments(): """Tests the conversion of a follow op with integer arguments + constant float args. The follow op should convert the floating point argument into fp16 as constants/vars will always be converted if safe to do so. """ data = relay.var("data", shape=[1, 10], dtype="float32") # We use an addition to make sure the input indices are not a var # (which are always casted if safe) indices = relay.var("indices", shape=[1, 1], dtype="int32") + relay.const( 0, dtype="int32") take = relay.take(data, indices, axis=0) mod = tvm.IRModule.from_expr(take) mod_params = { "data": np.random.uniform(-1, 1, size=[1, 10]).astype("float32"), "indices": np.array([[0]]).astype("int32"), } output_mod = verify_mixed_precision_output_close(mod, mod_params, atol=0.01, rtol=0.01) # Create expected module data = relay.cast(relay.var("data", shape=[1, 10]), "float16") take = relay.take(data, indices, axis=0) expected_mod = tvm.IRModule.from_expr(take) expected_mod = InferType()(expected_mod) assert tvm.ir.structural_equal(expected_mod, output_mod)
def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 3, 3, 64)) weight2 = relay.var("weight2", shape=(64, 3, 3, 64), dtype='int8') out = relay.nn.conv2d(x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NCHW', kernel_layout='OHWI') out = relay.cast(out, 'int8') out = relay.qnn.op.conv2d(out, weight2, relay.const(1, 'int32'), relay.const(1, 'int32'), relay.const(1, 'float32'), relay.const(1, 'float32'), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout='NCHW', kernel_layout='OHWI') out = relay.Function(analysis.free_vars(out), out) return out
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) weight1 = relay.var("weight1", shape=(64, 3, 3, 64)) weight2 = relay.var("weight2", shape=(64, 3, 3, 64), dtype="int8") weight3 = relay.var("weight3", shape=(64, 3, 3, 64)) x = relay.layout_transform(x, "NCHW", "NHWC") weight1 = relay.layout_transform(weight1, "OHWI", "HWIO") out = relay.nn.conv2d( x, weight1, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) out = relay.cast(out, "int8") out = relay.layout_transform(out, "NHWC", "NCHW") weight2 = relay.layout_transform(weight2, "OHWI", "OIHW") out = relay.qnn.op.conv2d( out, weight2, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW", kernel_layout="OIHW", ) out = relay.cast(out, "float32") out = relay.layout_transform(out, "NCHW", "NHWC") weight3 = relay.layout_transform(weight3, "OHWI", "HWIO") out = relay.nn.conv2d_transpose( out, weight3, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) out = relay.layout_transform(out, "NHWC", "NCHW") out = relay.Function(analysis.free_vars(out), out) return out
def test_dnnl_not_compatible(run_module, target="llvm", dtype="float32"): xshape = (1, 32, 14, 14) x_data = np.random.uniform(-1, 1, xshape).astype(dtype) x = relay.var("x", shape=(xshape), dtype=dtype) y = relay.add(x, x) z = relay.cast(relay.cast(y, "int32"), "float32") out = relay.nn.relu(z) f = relay.Function([x], out) mod = tvm.IRModule() mod["main"] = f mod = partition_for_dnnl(mod) for mode in ["graph", "vm"]: with tvm.transform.PassContext(opt_level=3): func = relay.create_executor(mode, mod=mod, device=tvm.cpu(0), target=target).evaluate() if run_module: results = func(x_data)
def __uint64_to_2xuint32_vector(self, ctr): """Convert a uint64 vector to a corresponding uint32 vector. Given uint64 vector with size n is converted to a uint32 vector with size 2n. Each uint64 is split into couple (32 high bits, 32 low bits). Output values order is the same as input, ie., both values from a uint64 remain consecutive in output vector. """ hi = relay.right_shift(ctr, RELAY_UINT64_32) lo = relay.bitwise_and(ctr, RELAY_UINT64_CLEAR_HIGH) hi_32 = relay.cast(hi, "uint32") lo_32 = relay.cast(lo, "uint32") vector_hi_32 = relay.reshape(hi_32, (self.n, 1)) vector_lo_32 = relay.reshape(lo_32, (self.n, 1)) tensor = relay.concatenate([vector_hi_32, vector_lo_32], 1) return relay.reshape(tensor, (2 * self.n))
def expected(): x = relay.var("x", shape=(1, 56, 56, 64), dtype="int8") weight1 = relay.var("weight1", shape=(3, 3, 64, 64), dtype="int8") weight2 = relay.var("weight2", shape=(3, 3, 64, 64), dtype="int8") weight1 = relay.layout_transform(weight1, "HWIO", "OIHW") weight2 = relay.layout_transform(weight2, "HWIO", "OIHW") y = relay.layout_transform(x, "NHWC", "NCHW") y = relay.qnn.op.conv2d( y, weight1, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), ) y1 = relay.qnn.op.conv2d( y, weight2, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), ) y = relay.cast(y, "int8") y1 = relay.cast(y, "int8") ret = relay.qnn.op.add( y, y1, relay.const(1, "float32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "int32"), ) ret = relay.layout_transform(ret, "NCHW", "NHWC") y = relay.Function(analysis.free_vars(ret), ret) return y
def before(): x = relay.var("x", shape=(1, 56, 56, 64), dtype="int8") weight1 = relay.var("weight1", shape=(3, 3, 64, 64), dtype="int8") weight2 = relay.var("weight2", shape=(3, 3, 64, 64), dtype="int8") y = relay.qnn.op.conv2d( x, weight1, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y1 = relay.qnn.op.conv2d( y, weight2, relay.const(1, "int32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "float32"), channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO", ) y = relay.cast(y, "int8") y1 = relay.cast(y, "int8") ret = relay.qnn.op.add( y, y1, relay.const(1, "float32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "int32"), relay.const(1, "float32"), relay.const(1, "int32"), ) y = relay.Function(analysis.free_vars(ret), ret) return y
def approx_exp(x): x = relay.minimum(relay.maximum(x, C((- 88.0))), C(88.0)) x = (C(127.0) + (x * C(1.44269504))) xf = relay.floor(x) i = relay.cast(xf, 'int32') x = (x - xf) Y = (C(0.99992522) + (x * (C(0.69583354) + (x * (C(0.22606716) + (x * C(0.078024523))))))) exponent = relay.left_shift(i, relay.expr.const(23, 'int32')) exponent = relay.reinterpret(exponent, 'float32') return (exponent * Y)
def safe_exp(w): slope = relay.const(np.exp(1, dtype=np.float32)) lin_bool = w > slope lin_region = relay.cast(lin_bool, "float32") lin_out = slope * w exp_out = relay.exp(relay.where(lin_bool, relay.zeros_like(w), w)) out = lin_region * lin_out + (relay.const(1.) - lin_region) * exp_out return out
def make_model( pool_op, shape=(1, 28, 28, 12), pool_size=(3, 3), strides=(2, 2), padding="VALID", dtype="int8", scale=1, zero_point=-33, relu_type="RELU", layout="NHWC", input_op=None, ): """Return a model and any parameters it may have, all parameters are defaulted to known good values """ if input_op: op = input_op else: op = relay.var("input", shape=shape, dtype=dtype) pad_ = (0, 0, 0, 0) if padding == "SAME": dilation = (1, 1) pad_ = get_same_padding((shape[1], shape[2]), pool_size, dilation, strides) op = relay.nn.pad( op, pad_width=[(0, 0), (pad_[0], pad_[2]), (pad_[1], pad_[3]), (0, 0)], pad_value=zero_point, pad_mode="constant", ) if pool_op.__name__ == relay.nn.avg_pool2d.__name__: op = relay.cast(op, "int32") op = pool_op(op, pool_size=pool_size, strides=strides, padding=pad_, ceil_mode=True, layout=layout) if pool_op.__name__ == relay.nn.avg_pool2d.__name__: op = relay.cast(op, dtype) op = make_qnn_relu(op, relu_type, scale, zero_point, dtype) return op
def test_green_gray_propagates_simple(): """Conv is a green listed operation, while addition is gray. As Conv outputs fp16 the add should be done in fp16. """ data_shape = (1, 3, 32, 32) weight_shape = (5, 3, 3, 3) data = relay.var("data", shape=data_shape, dtype="float32") weight = relay.var("weight", shape=weight_shape, dtype="float32") conv = relay.nn.conv2d(data, weight, strides=(1, 1), padding=(1, 1), out_dtype="float32") conv = conv + conv mod = tvm.IRModule.from_expr(conv) mod = tvm.relay.transform.InferType()(mod) mod_params = { "data": np.random.uniform(-1, 1, size=data_shape).astype("float32"), "weight": np.random.uniform(-1, 1, size=weight_shape).astype("float32"), } fp16_mod = verify_mixed_precision_output_close(mod, mod_params, atol=0.01, rtol=1e-3) conv_expr = relay.cast( relay.nn.conv2d( relay.cast(data, "float16"), relay.cast(weight, "float16"), strides=(1, 1), padding=(1, 1), out_dtype="float32", ), "float16", ) expected_mod = tvm.IRModule.from_expr(conv_expr + conv_expr) expected_mod = tvm.relay.transform.InferType()(expected_mod) assert not tvm.ir.structural_equal(fp16_mod, mod) assert tvm.ir.structural_equal(fp16_mod, expected_mod)
def test_convert_single_conv(): """Conv is a green listed operation meaning it will always use fp16 workload. By default it accumulates to fp32 and outputs fp16. """ data_shape = (1, 3, 32, 32) weight_shape = (5, 3, 3, 3) data = relay.var("data", shape=data_shape, dtype="float32") weight = relay.var("weight", shape=weight_shape, dtype="float32") conv = relay.nn.conv2d(data, weight, strides=(1, 1), padding=(1, 1), out_dtype="float32") mod = tvm.IRModule.from_expr(conv) mod = tvm.relay.transform.InferType()(mod) mod_params = { "data": np.random.uniform(-1, 1, size=data_shape).astype("float32"), "weight": np.random.uniform(-1, 1, size=weight_shape).astype("float32"), } fp16_mod = verify_mixed_precision_output_close(mod, mod_params, atol=0.01, rtol=1e-3, keep_orig_output_dtype=True) expected_mod = tvm.IRModule.from_expr( relay.cast( relay.nn.conv2d( relay.cast(data, "float16"), relay.cast(weight, "float16"), strides=(1, 1), padding=(1, 1), out_dtype="float16", ), "float32", )) expected_mod = tvm.relay.transform.InferType()(expected_mod) assert not tvm.ir.structural_equal(fp16_mod, mod) assert tvm.ir.structural_equal(fp16_mod, expected_mod)
def before(data, conv_weight, bias1, bias2): x = relay.nn.conv2d(data, conv_weight, channels=16, kernel_size=(3, 3), padding=(1, 1), out_dtype="int8") x1 = relay.cast(x, dtype="int32") y1 = relay.add(x1, bias1) y2 = relay.add(x1, bias2) y = relay.add(y1, y2) return relay.Function([data, conv_weight, bias1, bias2], y)
def vnni_legalize(inputs, arg_types, op, attrs, need_expand=False): """Legalizes s8, s8 -> s32 GEMM op for VNNI.""" if check_vnni_applicable(arg_types[0], arg_types[1]) and arg_types[0].dtype == "int8": x, y = inputs x = relay.cast(x, "int32") x = relay.add(x, relay.const(128, "int32")) x = relay.cast(x, "uint8") adjust_shift = relay.const(128, "int32") * relay.sum( relay.cast(y, "int32"), axis=[-1]) if need_expand: adjust_shift = relay.expand_dims(adjust_shift, axis=1) out = op(x, y, **attrs) return relay.subtract(out, adjust_shift) return None
def expected(): p0 = relay.var("p0", shape=(16, channel_size)) softmax = relay.nn.softmax(p0) out = relay.cast(softmax, "float16") x = relay.var("x", shape=(16, channel_size)) f0 = relay.Function([p0], out) f0 = f0.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) y = relay.Call(f0, [x]) return relay.Function([x], y)
def test_cast(): x = relay.var('x', relay.TensorType((8, 9, 4), 'float32')) y = x.astype('int32') yy = run_infer_type(y) assert ('dtype=' in yy.astext()) assert (yy.checked_type == relay.TensorType((8, 9, 4), 'int32')) x = relay.var('x', relay.TensorType((8, 9, 4), 'float32')) y = relay.cast(x, 'int32') yy = run_infer_type(y) assert ('dtype=' in yy.astext()) assert (yy.checked_type == relay.TensorType((8, 9, 4), 'int32'))
def approx_exp(x): # An approximation derived from Opus, # https://github.com/xiph/opus/blob/c1c247/celt/mathops.h#L147-L165 x = relay.minimum(relay.maximum(x, C(-88.0)), C(88.0)) x = C(127.0) + x * C(1.44269504) xf = relay.floor(x) i = relay.cast(xf, "int32") x = x - xf Y = C(0.99992522) + x * (C(0.69583354) + x * (C(0.22606716) + x * C(0.078024523))) exponent = relay.left_shift(i, relay.expr.const(23, "int32")) exponent = relay.reinterpret(exponent, "float32") return exponent * Y
def test_cast(): x = relay.var("x", relay.TensorType((8, 9, 4), "float32")) y = x.astype("int32") yy = relay.ir_pass.infer_type(y) assert "dtype=" in yy.astext() assert yy.checked_type == relay.TensorType((8, 9, 4), "int32") x = relay.var("x", relay.TensorType((8, 9, 4), "float32")) y = relay.cast(x, "int32") yy = relay.ir_pass.infer_type(y) assert "dtype=" in yy.astext() assert yy.checked_type == relay.TensorType((8, 9, 4), "int32")
def test_simplify_cast(): dtype = "int32" data = relay.var("data", shape=(3, 4, 5), dtype=dtype) expr1 = relay.cast(data, dtype) dtype_like = relay.var("dtype_like", shape=(2, 2, 2), dtype=dtype) expr2 = relay.cast_like(data, dtype_like) expected = run_infer_type(data) actual1 = run_opt_pass(expr1, relay.transform.SimplifyExpr()) assert tvm.ir.structural_equal(actual1, expected) actual2 = run_opt_pass(expr2, relay.transform.SimplifyExpr()) assert tvm.ir.structural_equal(actual2, expected)
def test_simplify_consecutive_cast(): x = relay.var("x", shape=(3, 4, 5), dtype="int8") y = relay.var("y", shape=(3, 4), dtype="int64") z = relay.var("z", shape=(3, ), dtype="float32") expr1 = relay.cast(x, "int16") expr2 = relay.cast(expr1, "int32") expr3 = relay.cast_like(expr2, y) expr4 = relay.cast_like(expr3, z) actual1 = run_opt_pass(expr2, relay.transform.SimplifyExpr()) expected = run_infer_type(relay.cast(x, "int32")) assert tvm.ir.structural_equal(actual1, expected) actual2 = run_opt_pass(expr3, relay.transform.SimplifyExpr()) expected = run_infer_type(relay.cast(x, "int64")) assert tvm.ir.structural_equal(actual2, expected) actual3 = run_opt_pass(expr4, relay.transform.SimplifyExpr()) expected = run_infer_type(relay.cast(x, "float32")) assert tvm.ir.structural_equal(actual3, expected) # cannot simplify the narrow cast x = relay.var("x", shape=(3, 4, 5), dtype="float32") y = relay.var("y", shape=(3, 4), dtype="float32") expr1 = relay.cast(x, "int32") expr2 = relay.cast_like(expr1, y) actual = run_opt_pass(expr2, relay.transform.SimplifyExpr()) expected = run_infer_type(expr2) assert tvm.ir.structural_equal(actual, expected)
def before(): x = relay.var("x", shape=(1, 64, 56, 56)) bias = relay.var("bias") weight = relay.var("weight") y = relay.nn.conv2d(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) y = relay.nn.bias_add(y, bias) # a useless tuple, which will be eliminated y = relay.Tuple([y])[0] y = relay.nn.relu(y) y = relay.nn.max_pool2d(y, pool_size=(2, 2)) y = relay.cast(y, 'int32') y = relay.nn.batch_flatten(y) y = relay.Function(free_vars(y), y) return y
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) bias = relay.var("bias", shape=(64,)) weight = relay.var("weight", shape=(64, 64, 3, 3)) y = relay.layout_transform(x, "NCHW", "NCHW16c") w = relay.layout_transform(weight, "OIHW", "OIHW16i") y = relay.nn.conv2d(y, w, channels=64, kernel_size=(3, 3), padding=(1, 1), kernel_layout="OIHW16i", data_layout="NCHW16c") b = relay.expand_dims(bias, axis=1, num_newaxis=2) b = relay.layout_transform(b, "CHW", "CHW16c") y = relay.add(y, b) y = relay.nn.relu(y) y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout="NCHW16c") y = relay.cast(y, 'int32') y = relay.layout_transform(y, "NCHW16c", "NCHW") y = relay.nn.batch_flatten(y) y = relay.Function(free_vars(y), y) return y