def convnet(): """Alternating layout of simple convnet (from image super-resolution). """ bias1 = relay.var('bias1', shape=(64,)) bias2 = relay.var('bias2', shape=(64,)) bias3 = relay.var('bias3', shape=(64,)) bias4 = relay.var('bias4', shape=(64,)) weight1 = relay.var('weight1', shape=(64, 1, 5, 5)) weight2 = relay.var('weight2', shape=(64, 64, 3, 3)) weight3 = relay.var('weight3', shape=(64, 64, 3, 3)) weight4 = relay.var('weight4', shape=(64, 64, 3, 3)) data = relay.var("x", shape=(1, 1, 224, 224)) n00 = relay.nn.conv2d(data, weight1, padding=[2, 2], kernel_size=[5, 5]) n01 = relay.expand_dims(bias1, axis=1, num_newaxis=2) n02 = relay.add(n00, n01) n03 = relay.nn.relu(n02) n04 = relay.nn.conv2d(n03, weight2, padding=[1, 1], kernel_size=[3, 3]) n05 = relay.expand_dims(bias2, axis=1, num_newaxis=2) n06 = relay.add(n04, n05) n07 = relay.nn.relu(n06) n08 = relay.nn.conv2d(n07, weight3, padding=[1, 1], kernel_size=[3, 3]) n09 = relay.expand_dims(bias3, axis=1, num_newaxis=2) n10 = relay.add(n08, n09) n11 = relay.nn.relu(n10) n12 = relay.nn.conv2d(n11, weight4, padding=[1, 1], kernel_size=[3, 3]) n13 = relay.expand_dims(bias4, axis=1, num_newaxis=2) n14 = relay.add(n12, n13) n15 = relay.reshape(n14, newshape=[1, 1, 3, 3, 224, 224]) n16 = relay.transpose(n15, axes=[0, 1, 4, 2, 5, 3]) net = relay.reshape(n16, newshape=[1, 1, 672, 672]) args = relay.ir_pass.free_vars(net) return relay.Function(args, net)
def before(x, conv_weight, out_bias, out_scale, channels): args = [x, conv_weight, out_bias, out_scale] out_scale = relay.expand_dims(out_scale, axis=1, num_newaxis=2) out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1)) y = relay.add(y, out_bias) y = relay.nn.relu(y) y = relay.multiply(y, out_scale) return relay.Function(args, y)
def before(x, conv_weight, in_bias, in_scale, channels): args = [x, conv_weight, in_bias, in_scale] in_scale = relay.expand_dims(in_scale, axis=1, num_newaxis=2) in_bias = relay.expand_dims(in_bias, axis=1, num_newaxis=2) x = relay.multiply(x, in_scale) x = relay.nn.relu(x) x = relay.add(x, in_bias) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1)) return relay.Function(args, y)
def simple_bn(x, gamma, beta, moving_mean, moving_var, axis=1, epsilon=1e-5, shape=None): # expect = (x - moving_mean) / sqrt(moving_var + eps) * gamma + beta scale = rly.multiply(rly.const(1, 'float32') / rly.sqrt(moving_var + rly.const(epsilon, 'float32')), gamma) shift = rly.add( rly.multiply(rly.negative(moving_mean), scale), beta) num_newaxis = len(shape) - (axis + 1) if num_newaxis: scale = rly.expand_dims(scale, axis=1, num_newaxis=num_newaxis) shift = rly.expand_dims(shift, axis=1, num_newaxis=num_newaxis) return x * scale + shift
def fail2(x, conv_weight, out_bias, out_scale, channels): args = [x, conv_weight, out_bias, out_scale] out_scale = relay.expand_dims(out_scale, axis=1, num_newaxis=2) out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) y1 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1)) y2 = relay.nn.relu(y1) # fold will fail because y1 is referred also by y2 y1 = relay.multiply(y1, out_scale) y = relay.add(y1, y2) return relay.Function(args, y)
def expected(x, conv_weight, in_bias, in_scale, channels): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight, in_bias] in_bias = relay.expand_dims(in_bias, axis=1, num_newaxis=2) squeezed_scale = relay.squeeze(in_scale, axis=[1,2]) x = relay.nn.relu(x) in_bias = relay.divide(in_bias, relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) x = relay.add(x, in_bias) conv_weight = relay.multiply( conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2)) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1)) return relay.Function(args, y)
def test_expand_dims_infer_type(): n, t, d = tvm.var("n"), tvm.var("t"), 100 x = relay.var("x", shape=(n, t, d)) y = relay.expand_dims(x, axis=2) assert "axis=2" in y.astext() checked = relay.ir_pass.infer_type(y) assert checked.checked_type == relay.TensorType((n, t, 1, 100))
def verify_expand_dims(dshape, dtype, oshape, axis, num_newaxis): x = relay.Var("x", relay.TensorType(dshape, dtype)) func = relay.Function([x], relay.expand_dims(x, axis, num_newaxis)) for target, ctx in ctx_list(): data = np.random.uniform(size=dshape).astype(dtype) ref_res = data.reshape(oshape) intrp = relay.create_executor("graph", ctx=ctx, target=target) op_res = intrp.evaluate(func)(data) np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=0.01)
def test_call_attrs(): x = relay.var("x") # non default args z = relay.nn.softmax(x, axis=2) assert "axis=2" in z.astext() # default args z = relay.nn.softmax(x) assert "softmax(%x)" in z.astext() # non default args z = relay.expand_dims(x, axis=2, num_newaxis=2) assert "num_newaxis=2" in z.astext()
def fail1(x, conv_weight, out_bias, out_scale, channels): args = [x, conv_weight, out_bias, out_scale] out_scale = relay.expand_dims(out_scale, axis=1, num_newaxis=2) out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) y1 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1)) y1 = relay.nn.relu(y1) y2 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1), out_layout="CNHW") # fold will fail because the axis from two path # differs from each other. y2 = relay.nn.relu(y2) y = relay.add(y1, y2) y = relay.multiply(y, out_scale) return relay.Function(args, y)
def expected(x, conv_weight, out_scale, channels): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight] squeezed_scale = relay.squeeze(out_scale, axis=[1,2]) conv_weight = relay.multiply( conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) y = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1)) return relay.Function(args, y)
def expected(x, conv_weight, out_bias, out_scale, channels): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight, out_bias, out_scale] out_scale = relay.expand_dims(out_scale, axis=1, num_newaxis=2) out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2) squeezed_scale = relay.squeeze(out_scale, axis=[1,2]) def fold_conv_weight(): return relay.multiply( conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) y1 = relay.nn.conv2d(x, fold_conv_weight(), channels=channels, kernel_size=(3, 3), padding=(1, 1)) y1 = relay.nn.relu(y1) y2 = relay.nn.conv2d(x, fold_conv_weight(), channels=channels, kernel_size=(3, 3), padding=(1, 1)) y2 = relay.nn.relu(y2) y = relay.add(y1, y2) return relay.Function(args, y)
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) bias = relay.var("bias", shape=(64,)) scale = relay.var("scale", shape=(64, 1, 1)) weight = relay.var("weight") x = relay.layout_transform(x, "NCHW", "NCHW16c") bias = relay.expand_dims(bias, 1, 2) bias = relay.layout_transform(bias, "CHW", "CHW16c") scale = relay.layout_transform(scale, "CHW", "CHW16c") y = relay.nn.conv2d(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW16c") y = relay.add(y, bias) # test broadcasting to lhs y = relay.multiply(scale, y) # test broadcasting to rhs y = relay.layout_transform(y, "NCHW16c", "NCHW") y = relay.Function(free_vars(y), y) return y
def expected(): x = relay.var("x", shape=(1, 64, 56, 56)) bias = relay.var("bias", shape=(64,)) weight = relay.var("weight", shape=(64, 64, 3, 3)) y = relay.layout_transform(x, "NCHW", "NCHW16c") w = relay.layout_transform(weight, "OIHW", "OIHW16i") y = relay.nn.conv2d(y, w, channels=64, kernel_size=(3, 3), padding=(1, 1), kernel_layout="OIHW16i", data_layout="NCHW16c") b = relay.expand_dims(bias, axis=1, num_newaxis=2) b = relay.layout_transform(b, "CHW", "CHW16c") y = relay.add(y, b) y = relay.nn.relu(y) y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout="NCHW16c") y = relay.cast(y, 'int32') y = relay.layout_transform(y, "NCHW16c", "NCHW") y = relay.nn.batch_flatten(y) y = relay.Function(free_vars(y), y) return y
def get_graph(x_shape=(1, 3), axis=1, num_newaxis=1): x = relay.var("x", shape=(x_shape), dtype="float32") out = relay.expand_dims(x, axis, num_newaxis) f = relay.Function([x], out) return f, {"x": x_shape}, []
def fold_conv_weight(): return relay.multiply( conv_weight, relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3))
def _conv2d_legalize(attrs, inputs, arg_types): """Legalizes Conv2D op. Parameters ---------- attrs : tvm.attrs.Attrs Attributes of current convolution inputs : list of tvm.relay.Expr The args of the Relay expr to be legalized types : list of types List of input and output types Returns ------- result : tvm.relay.Expr The legalized expr """ # Dilation not supported yet. Return None if dilation is not (1, 1) dilation = attrs.get_int_tuple("dilation") if not (dilation[0] == 1 and dilation[1] == 1): return None # No legalization for depthwise convolutions yet. groups = attrs.get_int("groups") if groups != 1: return None # Collect the input tensors. data_tensor, kernel_tensor = arg_types[0], arg_types[1] data_dtype = data_tensor.dtype kernel_dtype = kernel_tensor.dtype # Collect the output tensor. output_tensor = arg_types[2] # Collect the input exprs. data, kernel = inputs # Get the conv attrs new_attrs = {k: attrs[k] for k in attrs.keys()} is_int8_inputs = False # If both the inputs are int8, we can add 128 to make the input dtype uint8, and then adjust the # output. This will help picking up Intel VNNI instructions. # Original --> C = A (conv) B # A and B are int8 # C = (A + 128 - 128) (conv) B # C = (A' conv B) - 128 (conv) B # where A' = A + 128 # and 128 (conv) B is basically a reduce on CRS axis for weights. if data_tensor.dtype == 'int8' and kernel_tensor.dtype == 'int8': is_int8_inputs = True padding = attrs.get_int_tuple("padding") if attrs['data_layout'] == 'NHWC' and attrs['kernel_layout'] == 'HWIO': adjust_shift = relay.sum(relay.cast(kernel, dtype='int32'), axis=(0, 1, 2)) pad_width = ((0, 0), (padding[0], padding[0]), (padding[1], padding[1]), (0, 0)) elif attrs['data_layout'] == 'NCHW' and attrs[ 'kernel_layout'] == 'OIHW': pad_width = ((0, 0), (0, 0), (padding[0], padding[0]), (padding[1], padding[1])) adjust_shift = relay.sum(relay.cast(kernel, dtype='int32'), axis=(1, 2, 3)) adjust_shift = relay.expand_dims(adjust_shift, axis=1, num_newaxis=2) else: return None data = relay.cast(data, 'int32') data = relay.add(data, relay.const(128, 'int32')) data = relay.cast(data, 'uint8') # Do external padding as pad value has to be 128. if not (padding[0] == 0 and padding[1] == 0): data = relay.nn.pad(data, pad_width=pad_width, pad_value=128) new_attrs['padding'] = (0, 0) # The data type is now shifted to uint8 data_dtype = 'uint8' # Multiply 128 to adjust shift. adjust_shift = relay.multiply(adjust_shift, relay.const(128, 'int32')) # Legalize if the datatypes are suitable for fast Int8 instructions. Int8 instructions require # input channel to be a multiple of 4 and output channels to be a multiple of 16. For input # channels, we pad both the inputs and weights input channels. For output channels, we pad the # weight and stride_slice the output. if _is_int8_hw_support(data_dtype, kernel_dtype): # Flags to remember if the expr is modified ic_modified = False oc_modified = False # Find the value of input and output channel. in_channel = -1 out_channel = -1 if attrs['data_layout'] == 'NHWC' and attrs['kernel_layout'] == 'HWIO': in_channel = data_tensor.shape[3].value out_channel = kernel_tensor.shape[3].value elif attrs['data_layout'] == 'NCHW' and attrs[ 'kernel_layout'] == 'OIHW': in_channel = data_tensor.shape[1].value out_channel = kernel_tensor.shape[0].value else: return None if in_channel % 4 != 0: new_in_channel = ((in_channel + 4) // 4) * 4 diff = new_in_channel - in_channel if attrs['data_layout'] == 'NHWC' and attrs[ 'kernel_layout'] == 'HWIO': data = relay.nn.pad(data, pad_width=((0, 0), (0, 0), (0, 0), (0, diff))) kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, diff), (0, 0))) ic_modified = True elif attrs['data_layout'] == 'NCHW' and attrs[ 'kernel_layout'] == 'OIHW': pad_width = ((0, 0), (0, diff), (0, 0), (0, 0)) data = relay.nn.pad(data, pad_width=pad_width) kernel = relay.nn.pad(kernel, pad_width=pad_width) ic_modified = True else: return None new_out_channel = out_channel if out_channel % 16 != 0: new_out_channel = ((out_channel + 16) // 16) * 16 diff = new_out_channel - out_channel if attrs['data_layout'] == 'NHWC' and attrs[ 'kernel_layout'] == 'HWIO': kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, 0), (0, diff))) oc_modified = True elif attrs['data_layout'] == 'NCHW' and attrs[ 'kernel_layout'] == 'OIHW': kernel = relay.nn.pad(kernel, pad_width=((0, diff), (0, 0), (0, 0), (0, 0))) oc_modified = True else: return None if oc_modified: new_attrs['channels'] = new_out_channel out = tvm.relay.nn.conv2d(data, kernel, **new_attrs) original_out_shape = [x.value for x in output_tensor.shape] out = relay.strided_slice(out, begin=(0, 0, 0, 0), end=original_out_shape) else: out = relay.nn.conv2d(data, kernel, **new_attrs) if is_int8_inputs: out = relay.subtract(out, adjust_shift) return out return None
def fold_conv_weight(): return relay.multiply( conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3))
def fold_conv_weight(): squeezed_scale = relay.squeeze(out_scale, axis=[1, 2]) return relay.multiply( conv_weight, relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3))
def verify_expand_dims(dshape, axis, num_newaxis, dtype="float32"): x = relay.var("x", relay.ty.TensorType(dshape, dtype)) y = relay.expand_dims(x, axis, num_newaxis) func = relay.Function([x], y) x_data = np.random.uniform(size=dshape).astype(dtype) verify_results(func, [x_data], "test_expand_dims", rtol=1e-5, atol=1e-5)
def conv2d_alter_int8_common( data, data_tensor, kernel, kernel_tensor, output_tensor, attrs, data_dtype: str, in_channel_vector_length: int, out_channel_vector_length: int, ): """ Convert TE inputs/outputs so that they are suitable for fast Int8 instructions. Int8 instructions require input channels and output channels to be a multiple of the vector length. For input channels, we pad both the inputs and weights channels. For output channels, we pad the weight and stride_slice the output. Arguments --------- data: Expr Data Expr data_tensor: Tensor Data tensor kernel: Expr Kernel Expr kernel_tensor: Tensor Kernel tensor output_tensor: Tensor Output tensor attrs: Conv2dAttrs Attributes of the computation data_dtype: "int8" or "uint8" Desired dtype of data. Data will be converted to this dtype before the main computation. in_channel_vector_length: int Length of vector units on target hardware. Input channels are padded to this length. out_channel_vector_length: int Output size of vector instruction. Output channels are padded to this length. Returns ------- out : Tensor Conv2d computation with inputs in the correct order for tensorization. """ # Dilation not supported yet. Return None if dilation is not (1, 1) dilation = attrs.get_int_tuple("dilation") if not (dilation[0] == 1 and dilation[1] == 1): return None # No legalization for depthwise convolutions yet. groups = attrs.get_int("groups") if groups != 1: return None # Get the conv attrs new_attrs = {k: attrs[k] for k in attrs.keys()} padding = attrs.get_int_tuple("padding") kh, kw = attrs.get_int_tuple("kernel_size") pt, pl, pb, pr = get_pad_tuple(padding, (kh, kw)) if data_tensor.dtype != data_dtype: # How to convert data to int8 # Original --> C = A (conv) B # A and B are int8 # C = (A + 128 - 128) (conv) B # C = (A' conv B) - 128 (conv) B # where A' = A + 128 # and 128 (conv) B is basically a reduce on CRS axis for weights. # # How to convert data to uint8 # C = (A - 128 + 128) (conv) B # C = (A' conv B) + 128 (conv) B # where A' = A - 128 if data_dtype == "int8": # shift data to int8 before_shift = relay.add after_shift = relay.subtract else: # shift data to uint8 before_shift = relay.subtract after_shift = relay.add if attrs["data_layout"] == "NHWC" and attrs["kernel_layout"] == "HWIO": adjust_shift = relay.sum(relay.cast(kernel, dtype="int32"), axis=(0, 1, 2)) pad_width = ((0, 0), (pt, pb), (pl, pr), (0, 0)) elif attrs["data_layout"] == "NCHW" and attrs[ "kernel_layout"] == "OIHW": pad_width = ((0, 0), (0, 0), (pt, pb), (pl, pr)) adjust_shift = relay.sum(relay.cast(kernel, dtype="int32"), axis=(1, 2, 3)) adjust_shift = relay.expand_dims(adjust_shift, axis=1, num_newaxis=2) else: return None data = relay.cast(data, "int32") data = before_shift(data, relay.const(128, "int32")) data = relay.cast(data, data_dtype) # Do external padding as pad value has to be 128. if any(padding): data = relay.nn.pad(data, pad_width=pad_width, pad_value=128) new_attrs["padding"] = (0, 0) # Multiply 128 to adjust shift. adjust_shift = relay.multiply(adjust_shift, relay.const(128, "int32")) # Flags to remember if the expr is modified ic_modified = False oc_modified = False # Find the value of input and output channel. in_channel = -1 out_channel = -1 if attrs["data_layout"] == "NHWC" and attrs["kernel_layout"] == "HWIO": in_channel = data_tensor.shape[3].value out_channel = kernel_tensor.shape[3].value elif attrs["data_layout"] == "NCHW" and attrs["kernel_layout"] == "OIHW": in_channel = data_tensor.shape[1].value out_channel = kernel_tensor.shape[0].value else: return None if in_channel % in_channel_vector_length != 0: new_in_channel = ((in_channel + in_channel_vector_length) // in_channel_vector_length) * in_channel_vector_length diff = new_in_channel - in_channel if attrs["data_layout"] == "NHWC" and attrs["kernel_layout"] == "HWIO": data = relay.nn.pad(data, pad_width=((0, 0), (0, 0), (0, 0), (0, diff))) kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, diff), (0, 0))) ic_modified = True elif attrs["data_layout"] == "NCHW" and attrs[ "kernel_layout"] == "OIHW": pad_width = ((0, 0), (0, diff), (0, 0), (0, 0)) data = relay.nn.pad(data, pad_width=pad_width) kernel = relay.nn.pad(kernel, pad_width=pad_width) ic_modified = True else: return None new_out_channel = out_channel if out_channel % out_channel_vector_length != 0: new_out_channel = ( (out_channel + out_channel_vector_length) // out_channel_vector_length) * out_channel_vector_length diff = new_out_channel - out_channel if attrs["data_layout"] == "NHWC" and attrs["kernel_layout"] == "HWIO": kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, 0), (0, diff))) oc_modified = True elif attrs["data_layout"] == "NCHW" and attrs[ "kernel_layout"] == "OIHW": kernel = relay.nn.pad(kernel, pad_width=((0, diff), (0, 0), (0, 0), (0, 0))) oc_modified = True else: return None if oc_modified: new_attrs["channels"] = new_out_channel out = relay.nn.conv2d(data, kernel, **new_attrs) original_out_shape = [x.value for x in output_tensor.shape] out = relay.strided_slice(out, begin=[0, 0, 0, 0], end=original_out_shape) else: out = relay.nn.conv2d(data, kernel, **new_attrs) if data_tensor.dtype != data_dtype: out = after_shift(out, adjust_shift) return out
def fold_conv_weight(): squeezed_scale = relay.squeeze(out_scale, axis=[1,2]) return relay.multiply( conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3))
def manual_tir_common(do_tune=False): M, N, K = 1024, 1024, 1024 # pylint: disable=invalid-name data_shape = (M, K) weight_shape = (N, K) data_dtype = "uint8" data = relay.var("data", shape=data_shape, dtype=data_dtype) weight = relay.var("weight", shape=weight_shape, dtype="int8") bias = relay.var("bias", shape=(weight_shape[0], ), dtype="int32") # dense is tuned by the TIR schedule above, bmm is scheduled by TE (topi/x86/batch_matmul.py) dense = relay.nn.dense(data, weight, out_dtype="int32") bias_add = relay.nn.bias_add(dense, bias) + relay.const(1, dtype="int32") out = relay.nn.batch_matmul( relay.cast(relay.expand_dims(bias_add, 0), "uint8"), relay.cast(relay.expand_dims(bias_add, 0), "int8"), out_dtype="int32", ) relay_mod = tvm.IRModule.from_expr(out) target = "llvm -mcpu=cascadelake -num-cores 4" dev = tvm.device(target, 0) data = np.random.uniform(1, 10, size=(M, K)).astype("uint8") weight_np = np.random.uniform(1, 10, size=weight_shape).astype("int8") bias_np = np.random.uniform(1, 10, size=(weight_shape[0], )).astype("int32") ref = (relay.create_executor( "vm", mod=relay_mod, device=dev, target=target).evaluate()(*[data, weight_np, bias_np]).numpy()) params = {"weight": weight_np, "bias": bias_np} if do_tune: extracted_tasks = ms.extract_task_from_relay(relay_mod, target, params) # Filter out tasks that we don't intend to schedule / tune with TIR. tune_tasks = list( filter( lambda task: "dense" in task.task_name, extracted_tasks, )) config = ms.TuneConfig( strategy="replay_trace", num_trials_per_iter=64, max_trials_per_task=20000, max_trials_global=20000, ) with tempfile.TemporaryDirectory() as work_dir: # postprocs=lambda: [] is important to prevent default post processors from # tampering with the manual schedule. database = ms.tune_extracted_tasks( tune_tasks, config, work_dir=work_dir, postprocs=lambda: [], ) else: def schedule_fn(task, sch): if "dense" not in task.task_name: return False block = sch.get_block("compute") # Looks up schedule_rule annotation. # See the comment in test_tune_relay_manual_tir_vnni(). schedule_rule = sch.get(block).annotations["schedule_rule"] assert "dense_vnni" in schedule_rule schedule_dense(block, M, False, sch) return True database = apply_fixed_schedules(relay_mod, target, params, schedule_fn) with ms.ApplyHistoryBest(database): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_meta_schedule": True}, ): # pylint: disable=W0105 """ The log should say Warning: Cannot find workload: tvmgen_default_fused_expand_dims Warning: Cannot find workload: tvmgen_default_fused_cast Warning: Cannot find workload: tvmgen_default_fused_cast_1 Warning: Cannot find workload: tvmgen_default_fused_nn_batch_matmul This means batch matmul and others are scheduled by TE, and dense (the one not warned) is found in the meta schedule tuning database during ApplyHistoryBest """ # pylint: enable=W0105 lib = relay.build(relay_mod, target=target, params=params) runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](dev)) runtime.set_input("data", data) runtime.run() out = runtime.get_output(0).numpy() np.testing.assert_equal(out, ref)
def expected(N, CI, H, W, CO, KH, KW, OH, OW, src_layout, dst_layout): layout_map = {"src": {}, "dst": {}} if src_layout == "NCHW": nchw = layout_map["src"] nhwc = layout_map["dst"] else: nchw = layout_map["dst"] nhwc = layout_map["src"] nchw["data_layout"] = "NCHW" nchw["data_shape"] = (N, CI, H, W) nchw["offset_shape"] = (N, KH * KW * 2, OH, OW) nchw["weight_shape"] = (CO, CI, KH, KW) nchw["kernel_layout"] = "OIHW" nhwc["data_layout"] = "NHWC" nhwc["data_shape"] = (N, H, W, CI) nhwc["offset_shape"] = (N, OH, OW, KH * KW * 2) nhwc["weight_shape"] = (KH, KW, CI, CO) nhwc["kernel_layout"] = "HWIO" bias_shape = (CO,) data = relay.var("data", shape=layout_map["src"]["data_shape"], dtype="float32") offset = relay.var("offset", shape=layout_map["src"]["offset_shape"], dtype="float32") weight = relay.var("weight", shape=layout_map["src"]["weight_shape"], dtype="float32") bias = relay.var("bias", shape=bias_shape, dtype="float32") data = relay.layout_transform( data, layout_map["src"]["data_layout"], layout_map["dst"]["data_layout"] ) offset = relay.layout_transform( offset, layout_map["src"]["data_layout"], layout_map["dst"]["data_layout"] ) weight = relay.layout_transform( weight, layout_map["src"]["kernel_layout"], layout_map["dst"]["kernel_layout"] ) y = relay.nn.deformable_conv2d( data, offset, weight, kernel_size=(KH, KW), channels=CO, data_layout=layout_map["dst"]["data_layout"], kernel_layout=layout_map["dst"]["kernel_layout"], ) if layout_map["src"]["data_layout"] == "NHWC": bias = relay.expand_dims(bias, axis=0, num_newaxis=3) else: bias = relay.expand_dims(bias, axis=1, num_newaxis=2) bias = relay.expand_dims(bias, axis=0) bias = relay.layout_transform( bias, layout_map["src"]["data_layout"], layout_map["dst"]["data_layout"] ) y = relay.add(y, bias) y = relay.nn.relu(y) y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout=layout_map["dst"]["data_layout"]) y = relay.cast(y, "int32") y = relay.layout_transform( y, layout_map["dst"]["data_layout"], layout_map["src"]["data_layout"] ) y = relay.nn.batch_flatten(y) y = relay.Function(analysis.free_vars(y), y) return y
def test_expand_dims(x_shape=(1, 3), axis=1, num_newaxis=1): x = relay.var('x', shape=(x_shape), dtype='float32') out = relay.expand_dims(x, axis, num_newaxis) f = relay.Function([x], out) return f, {'x': x_shape}