def gen_intermediate_tuple(x): y1 = relay.add(x, relay.const(1, "float32")) y2 = relay.add(x, relay.const(1, "float32")) y3 = relay.add(x, relay.const(1, "float32")) concat = relay.concatenate((y1, y2, y3), axis=1) out = relay.add(concat, relay.const(1, "float32")) return out
def expected(): x = relay.var("x", shape=(1, 16)) y = relay.nn.relu(x) y = relay.add(y, relay.const(1.0, "float32")) y = relay.add(y, y) f = relay.Function([x], y) return f
def before(): c = relay.const(c_data) x = relay.var("x") y = relay.Tuple([x, c]) z = relay.add(y[1], c) z = relay.add(z, y[0]) return relay.Function([x], z)
def convnet(): """Alternating layout of simple convnet (from image super-resolution). """ bias1 = relay.var('bias1', shape=(64,)) bias2 = relay.var('bias2', shape=(64,)) bias3 = relay.var('bias3', shape=(64,)) bias4 = relay.var('bias4', shape=(64,)) weight1 = relay.var('weight1', shape=(64, 1, 5, 5)) weight2 = relay.var('weight2', shape=(64, 64, 3, 3)) weight3 = relay.var('weight3', shape=(64, 64, 3, 3)) weight4 = relay.var('weight4', shape=(64, 64, 3, 3)) data = relay.var("x", shape=(1, 1, 224, 224)) n00 = relay.nn.conv2d(data, weight1, padding=[2, 2], kernel_size=[5, 5]) n01 = relay.expand_dims(bias1, axis=1, num_newaxis=2) n02 = relay.add(n00, n01) n03 = relay.nn.relu(n02) n04 = relay.nn.conv2d(n03, weight2, padding=[1, 1], kernel_size=[3, 3]) n05 = relay.expand_dims(bias2, axis=1, num_newaxis=2) n06 = relay.add(n04, n05) n07 = relay.nn.relu(n06) n08 = relay.nn.conv2d(n07, weight3, padding=[1, 1], kernel_size=[3, 3]) n09 = relay.expand_dims(bias3, axis=1, num_newaxis=2) n10 = relay.add(n08, n09) n11 = relay.nn.relu(n10) n12 = relay.nn.conv2d(n11, weight4, padding=[1, 1], kernel_size=[3, 3]) n13 = relay.expand_dims(bias4, axis=1, num_newaxis=2) n14 = relay.add(n12, n13) n15 = relay.reshape(n14, newshape=[1, 1, 3, 3, 224, 224]) n16 = relay.transpose(n15, axes=[0, 1, 4, 2, 5, 3]) net = relay.reshape(n16, newshape=[1, 1, 672, 672]) args = relay.ir_pass.free_vars(net) return relay.Function(args, net)
def before(x): concat = gen_consecutive_tuple(x) pooled = relay.nn.max_pool2d(concat, pool_size=(2, 2), strides=(2, 2), padding=(0, 0)) out = relay.add(pooled, relay.const(1, "float32")) out2 = relay.add(out, relay.const(1, "float32")) out_tup = relay.Tuple((out, out2)) return relay.Function(relay.ir_pass.free_vars(out_tup), out_tup)
def before(): sb = relay.ScopeBuilder() x = relay.var("x") t1 = sb.let("t1", relay.const(c_data)) t2 = sb.let("t2", relay.add(t1, t1)) t3 = sb.let("t3", relay.add(t2, x)) sb.ret(t3) return relay.Function([x], sb.get())
def before(): c = relay.const(c_data) x = relay.var("x") y = relay.add(c, c) y = relay.multiply(y, relay.const(2, "float32")) y = relay.add(x, y) z = relay.add(y, c) return relay.Function([x], z)
def before(): x = relay.var("x", shape=(1, 16)) y1 = relay.nn.relu(x) y2 = relay.nn.relu(x) y1 = relay.add(y1, relay.const(1.0, "float32")) y2 = relay.add(y2, relay.const(1.0, "float32")) y = relay.add(y1, y2) f = relay.Function([x], y) return f
def test_func(): x = relay.var("x", shape=(3, 2)) y = relay.var("y") one = relay.const(10e10, dtype="float32") z = relay.add(x, one) z = relay.add(z, z) f = relay.Function([x, y], z) show(z.astext()) show(f.astext())
def before(x): inj = relay.squeeze(x) y1 = relay.add(inj, relay.const(1, "float32")) tmp = relay.squeeze(inj) tmp = relay.add(tmp, relay.const(1, "float32")) y2 = relay.add(tmp, relay.const(1, "float32")) y3 = relay.add(inj, relay.const(1, "float32")) concat = relay.concatenate((y1, y2, y3), axis=1) out_inj = relay.squeeze(concat) out = relay.add(out_inj, relay.const(1, "float32")) return relay.Function(relay.ir_pass.free_vars(out), out)
def test_env(): x = relay.var("x", "float32") y = relay.var("y", "float32") z = relay.add(x, y) z = relay.add(z, z) f = relay.Function([x, y], z) env = relay.Module() env["myf"] = f text = env.astext() assert "def @myf" in text assert "%1 = add(%0, %0) # ty=float32" in text show(env.astext(annotate=lambda x: str(x.checked_type.dtype))) show(text)
def test_bind_params(): x = relay.var("x") y = relay.var("y") z = relay.add(x, y) f = relay.Function([x, y], z) fbinded = relay.bind(f, {x : relay.const(1, "float32")}) fexpected =relay.Function( [y], relay.add(relay.const(1, "float32"), y)) assert relay.ir_pass.alpha_equal(fbinded, fexpected) zbinded = relay.bind(z, {y: x}) zexpected = relay.add(x, x) assert relay.ir_pass.alpha_equal(zbinded, zexpected)
def test_env(): x = relay.var("x", "float32") y = relay.var("y", "float32") z = relay.add(x, y) z = relay.add(z, z) f = relay.Function([x, y], z) env = relay.Module() env["myf"] = f text = env.astext() assert "def @myf" in text assert "def @myf" in str(env) assert "add(%0, %0) /* ty=float32 */" in text assert "add(%0, %0) /* ty=float32 */" in str(env) show(env.astext(annotate=lambda x: str(x.checked_type.dtype) if type(x) == relay.Call else "")) show(text)
def expected(): sb = relay.ScopeBuilder() x = relay.var("x") c_folded = (c_data + c_data) t3 = sb.let("t3", relay.add(relay.const(c_folded), x)) sb.ret(t3) return relay.Function([x], sb.get())
def before(x, conv_weight, out_bias, out_scale, channels): args = [x, conv_weight, out_bias] y0 = relay.nn.conv2d(x, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1)) y0 = relay.multiply(y0, out_scale) y0 = relay.nn.relu(y0) y1 = relay.nn.conv2d(y0, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1)) y1 = relay.multiply(y1, out_scale) y1 = relay.nn.relu(y1) y2 = relay.nn.conv2d(y0, conv_weight, channels=channels, kernel_size=(3, 3), padding=(1, 1)) y2 = relay.multiply(y2, out_scale) y2 = relay.nn.relu(y2) y = relay.add(y1, y2) return relay.Function(args, y)
def test_binds(): x = relay.var("x") y = relay.add(x, x) intrp = create_executor("debug") xx = np.ones((10, 20)) res = intrp.evaluate(y, binds={x: xx}).asnumpy() tvm.testing.assert_allclose(xx + xx, res)
def expected(x, conv_weight, out_bias, out_scale, channels): # use a fixed order of args so alpha equal check can pass args = [x, conv_weight, out_bias] def fold_conv_weight(): squeezed_scale = relay.squeeze(out_scale, axis=[1,2]) return relay.multiply( conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3)) y0 = relay.nn.conv2d(x, fold_conv_weight(), channels=channels, kernel_size=(3, 3), padding=(1, 1)) y0 = relay.nn.relu(y0) y1 = relay.nn.conv2d(y0, fold_conv_weight(), channels=channels, kernel_size=(3, 3), padding=(1, 1)) y1 = relay.nn.relu(y1) y2 = relay.nn.conv2d(y0, fold_conv_weight(), channels=channels, kernel_size=(3, 3), padding=(1, 1)) y2 = relay.nn.relu(y2) y = relay.add(y1, y2) return relay.Function(args, y)
def test_plan_memory(): # it is sufficient to cycle through two memories. x = relay.var("x", shape=(10,)) y = relay.var("x", shape=(1,)) y2 = relay.exp(y) z = relay.add(x, y2) z = relay.exp(z) z = relay.exp(z) z = relay.exp(z) z = relay.exp(z) z = relay.exp(z) func = relay.Function([x, y], z) func = relay.ir_pass.infer_type(func) func = relay.ir_pass.fuse_ops(func, opt_level=0) func = relay.ir_pass.infer_type(func) smap = relay.backend._backend.GraphPlanMemory(func) storage_ids = set() device_types = set() for k, v in smap.items(): assert len(v) == 2 for x in v[0]: storage_ids.add(x.value) for x in v[1]: device_types.add(x.value) # Current rule requires vars have unique storage id # because we don't do inplace, we will need another # two alternating temporary space. assert len(storage_ids) == 4 assert len(device_types) == 1
def expected(): conv2d_1 = relay.nn.conv2d( data1, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) device_copy1 = relay.device_copy(conv2d_1, dev2, dev1) conv2d_2 = relay.nn.conv2d( data2, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) device_copy2 = relay.device_copy(conv2d_2, dev2, dev1) add = relay.add(device_copy1, device_copy2) device_copy3 = relay.device_copy(add, dev1, dev2) conv2d_3 = relay.nn.conv2d( device_copy3, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) func = relay.Function([data1, weight, data2], conv2d_3) return func
def before(dshape): x = relay.var("x", shape=dshape) pooled = relay.nn.max_pool2d(x, pool_size=(2, 2), strides=(2, 2), padding=(0, 0)) upsampled = relay.nn.upsampling(pooled, scale=2, layout="NCHW") concat = relay.concatenate((upsampled, x), axis=1) out = relay.add(concat, relay.const(1, "float32")) return relay.Function(relay.ir_pass.free_vars(out), out)
def annotated(): conv2d_1 = relay.nn.conv2d( data1, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) _conv2d_1 = relay.annotation.on_device(conv2d_1, dev2) conv2d_2 = relay.nn.conv2d( data2, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) _conv2d_2 = relay.annotation.on_device(conv2d_2, dev2) add = relay.add(conv2d_1, conv2d_2) _add = relay.annotation.on_device(add, dev1) conv2d_3 = relay.nn.conv2d( add, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) _conv2d_3 = relay.annotation.on_device(conv2d_3, dev2) func = relay.Function([data1, data2, weight], relay.Tuple(tvm.convert([_conv2d_1, _conv2d_2, _conv2d_3, _add, conv2d_3]))) func = relay.ir_pass.infer_type(func) func = relay.ir_pass.rewrite_annotated_ops(func, tvm.context(3).device_type) func = relay.ir_pass.infer_type(func) return relay.Function(relay.ir_pass.free_vars(func.body[4]), func.body[4])
def before(): x = relay.var("x", shape=(10, 20)) y = relay.add(x, relay.const(1, "float32")) z = relay.squeeze(y) u = relay.transpose(y, axes=[0, 1]) w = relay.left_shift(z, u) return relay.Function([x], w)
def test_depthwise_conv2d(): batch_size = 1 dshape = (batch_size, 64, 56, 56) weight_conv = relay.var("weight_depthwiseconv", shape=(64, 1, 3, 3)) data1 = relay.var("data1", shape=dshape) data2 = relay.var("data2", shape=dshape) depthwise_conv2d_1 = relay.nn.conv2d( data1, weight_conv, kernel_size=(3, 3), padding=(1, 1), groups=64) depthwise_conv2d_2 = relay.nn.conv2d( data2, weight_conv, kernel_size=(3, 3), padding=(1, 1), groups=64) add = relay.add(depthwise_conv2d_1, depthwise_conv2d_2) func = relay.Function([data1, data2, weight_conv], relay.Tuple(tvm.convert([depthwise_conv2d_1, depthwise_conv2d_2, add]))) func = relay.ir_pass.infer_type(func) compute_count = relay.ir_pass.get_total_mac_number(func) assert compute_count == 2 * np.prod(dshape) * 3*3
def expected(): add = relay.add(a, b) mul = relay.multiply(c, d) copy_mul_sub = relay.device_copy(mul, cpu_ctx, dev_ctx) sub = relay.subtract(add, copy_mul_sub) func = relay.Function([a, b, c, d], sub) return func
def before(): x = relay.var("x", shape=(1, 64, 56, 56)) weight = relay.var("weight") y = relay.nn.conv2d(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) y = relay.add(y, relay.const(1, "float32")) y = relay.Function(free_vars(y), y) return y
def expected(): x = relay.var("p", shape=(10, 20)) y = relay.add(x, relay.const(1, "float32")) z = relay.exp(y) f1 = relay.Function([x], z) x = relay.var("x", shape=(10, 20)) y = relay.Call(f1, [x]) return relay.Function([x], y)
def annotated(): add = relay.add(x, y) sub = relay.subtract(add, z) func = relay.Function([x, y, z], sub) func = relay.ir_pass.infer_type(func) func = relay.ir_pass.rewrite_annotated_ops(func, ctx1.device_type) return func
def get_func(): add = relay.add(x, y) sqrt = relay.sqrt(add) log = relay.log(add) subtract = relay.subtract(sqrt, log) exp = relay.exp(subtract) func = relay.Function([x, y], exp) return func
def expected(): add = relay.add(x, y) sqrt = relay.sqrt(add) log = relay.log(add) subtract = relay.subtract(sqrt, log) copy_sub_exp = relay.device_copy(subtract, dev_ctx, cpu_ctx) exp = relay.exp(copy_sub_exp) func = relay.Function([x, y], exp) return func
def expected(): x = relay.var("p", shape=(10, 20)) y = relay.add(x, relay.const(1, "float32")) z = relay.squeeze(y) u = relay.transpose(y, axes=[0, 1]) w = relay.left_shift(z, u) f1 = relay.Function([x], w) x = relay.var("x", shape=(10, 20)) y = relay.Call(f1, [x]) return relay.Function([x], y)
def before(): x = relay.var("x", shape=(10, 20)) y = relay.add(x, relay.const(1, "float32")) z = relay.exp(y) w = relay.squeeze(z) return relay.Function([x], w)
def test_tuple_getitem(): two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], relay.TupleGetItem(relay.Tuple([relay.const(1), relay.const(2)]), 0))
def expected(): add = relay.add(x, y) copy_add_sub = relay.device_copy(add, ctx2, ctx1) sub = relay.subtract(copy_add_sub, z) func = relay.Function([x, y, z], sub) return func
def expected(): add = relay.add(x, y) copy_add_sub = relay.device_copy(add, dev1, dev2) sub = relay.subtract(copy_add_sub, z) return sub
def before(): a = relay.var('a', shape=(10, 10)) b = relay.var('b', shape=(10, 10)) add_node = relay.add(a, b) r = relay.nn.relu(add_node) return relay.Function([a, b], r)
def legalize_relu(attrs, inputs, types): data = inputs[0] add = relay.add(tvm.relay.const(0, "float32"), data) return relay.nn.relu(add)
def annotated(): in_1 = relay.var("in_1", shape=(10, 10), dtype="float32") in_2 = relay.var("in_2", shape=(10, 10), dtype="float32") in_3 = relay.var("in_3", shape=(10, 10), dtype="float32") in_4 = relay.var("in_4", shape=(10, 10), dtype="float32") in_5 = relay.var("in_5", shape=(10, 10), dtype="float32") in_6 = relay.var("in_6", shape=(10, 10), dtype="float32") in_7 = relay.var("in_7", shape=(10, 10), dtype="float32") in_8 = relay.var("in_8", shape=(10, 10), dtype="float32") in_9 = relay.var("in_9", shape=(10, 10), dtype="float32") in_10 = relay.var("in_10", shape=(10, 10), dtype="float32") begin0 = compiler_begin(in_1, "test") begin1 = compiler_begin(in_2, "test") begin2 = compiler_begin(in_3, "test") begin3 = compiler_begin(in_4, "test") node0 = relay.add(begin0, begin1) node1 = relay.add(begin2, begin3) end0 = compiler_end(node0, "test") end1 = compiler_end(node1, "test") begin4 = compiler_begin(end0, "test") begin5 = compiler_begin(end1, "test") node2 = relay.add(begin4, begin5) end2 = compiler_end(node2, "test") dbegin0 = compiler_begin(in_5, "default") dbegin1 = compiler_begin(in_6, "default") node3 = relay.subtract(dbegin0, dbegin1) dbegin2 = compiler_begin(in_7, "default") dend1 = compiler_end(node3, "default") dbegin3 = compiler_begin(dend1, "default") node4 = relay.subtract(dbegin2, dbegin3) dend2 = compiler_end(node4, "default") begin6 = compiler_begin(end2, "test") begin7 = compiler_begin(dend2, "test") node5 = relay.add(begin6, begin7) end3 = compiler_end(node5, "test") end4 = compiler_end(node5, "test") dbegin4 = compiler_begin(in_8, "default") dbegin5 = compiler_begin(end3, "default") node6 = relay.subtract(dbegin4, dbegin5) begin8 = compiler_begin(in_9, "test") begin9 = compiler_begin(end4, "test") node7 = relay.add(begin8, begin9) end5 = compiler_end(node7, "test") dend3 = compiler_end(node6, "default") begin10 = compiler_begin(dend3, "test") begin11 = compiler_begin(end5, "test") node8 = relay.add(begin10, begin11) end6 = compiler_end(node8, "test") begin12 = compiler_begin(in_10, "test") begin13 = compiler_begin(end6, "test") node9 = relay.add(begin12, begin13) end7 = compiler_end(node9, "test") f = relay.Function( [in_1, in_2, in_3, in_4, in_5, in_6, in_7, in_8, in_9, in_10], end7) mod = tvm.IRModule.from_expr(f) return mod
def test_byoc_microtvm(merge_compiler_regions): """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036""" use_unpacked_api = False interface_api = "packed" test_runner = AOTTestRunner(pass_config={"tir.usmp.enable": True}) x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) # z0 = x + w0 x_ = compiler_begin(x, "ccompiler") w0_ = compiler_begin(w0, "ccompiler") z0_ = relay.add(x_, w0_) z0 = compiler_end(z0_, "ccompiler") # z1 = z0 + w1 z0__ = compiler_begin(z0, "ccompiler") w1_ = compiler_begin(w1, "ccompiler") z1_ = relay.add(z0__, w1_) z1 = compiler_end(z1_, "ccompiler") # z2 = z0 + z1 z2 = relay.add(z0, z1) f = relay.Function([x, w0, w1], z2) mod = tvm.IRModule() mod["main"] = f if merge_compiler_regions: mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph("mod_name")(mod) mod = transform.InferType()(mod) x_data = [("x", np.random.rand(10, 10).astype("float32"))] w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)] map_inputs = OrderedDict(x_data + w_data) output_list = generate_ref_data(mod, map_inputs) compiled_test_mods = compile_models( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_many_sub_graphs(): target = "llvm" dtype = "float32" dshape = (1, 8, 8, 3) layout = "NCHW" target_ops = [relay.nn.conv2d] data = relay.var("data", shape=dshape, dtype=dtype) t0 = relay.transpose(data, (0, 3, 1, 2)) w0 = relay.var("w0_weight") conv0 = relay.nn.conv2d(t0, w0, channels=16, kernel_size=(3, 3), padding=(1, 1)) t1 = relay.transpose(conv0, (0, 2, 3, 1)) w1 = relay.var("w1_weight") t2 = relay.transpose(t1, (0, 3, 1, 2)) conv1 = relay.nn.conv2d(t2, w1, channels=32, kernel_size=(1, 1)) t3 = relay.transpose(conv1, (0, 2, 3, 1)) w2 = relay.var("w2_weight") t4 = relay.transpose(t3, (0, 3, 1, 2)) conv2 = relay.nn.conv2d(t4, w2, channels=32, kernel_size=(3, 3), padding=(1, 1)) t5 = relay.transpose(conv2, (0, 2, 3, 1)) out = relay.add(t3, t5) net = relay.Function(relay.analysis.free_vars(out), out) net, params = relay.testing.create_workload(net) tasks = autotvm.task.extract_from_program(net["main"], target=target, params=params, ops=(relay.op.nn.conv2d, )) wkl_list = [ create_workload((1, 3, 8, 8), (16, 3, 3, 3), (1, 1), (1, 1), (1, 1), layout, layout, dtype, dtype), create_workload((1, 16, 8, 8), (32, 16, 1, 1), (1, 1), (0, 0), (1, 1), layout, layout, dtype, dtype), create_workload((1, 32, 8, 8), (32, 32, 3, 3), (1, 1), (1, 1), (1, 1), layout, layout, dtype, dtype), ] costs = [0.04, 0.012, 0.03, 0.02, 0.02, 0.045] config_list = [] cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [3, 1]], ["tile_oc", "sp", [4, 4]], ["tile_ow", "sp", [4, 2]], ["unroll_kw", "ot", True]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [2, 8]], ["tile_oc", "sp", [1, 32]], ["tile_oh", "ot", 1], ["tile_ow", "sp", [4, 2]]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [8, 4]], ["tile_oc", "sp", [4, 8]], ["tile_ow", "sp", [2, 4]], ["unroll_kw", "ot", False]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [1, 3]], ["tile_oc", "sp", [2, 8]], ["tile_ow", "sp", [4, 2]], ["unroll_kw", "ot", True]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [4, 4]], ["tile_oc", "sp", [2, 16]], ["tile_oh", "ot", 1], ["tile_ow", "sp", [4, 2]]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [16, 2]], ["tile_oc", "sp", [8, 4]], ["tile_ow", "sp", [2, 4]], ["unroll_kw", "ot", False]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) records = [] wkl_list = wkl_list + wkl_list tasks = tasks + tasks for wkl, cost, config, task in zip(wkl_list, costs, config_list, tasks): task.workload = wkl ms_input = MeasureInput(target=target, task=task, config=config) ms_output = MeasureResult(costs=(cost, ), error_no=0, all_cost=-1, timestamp=-1) records.append((ms_input, ms_output)) ltf_records = [] ltf_arg = [ tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c" ] ltf_arg = autotvm.task.topi_integration.serialize_args(ltf_arg) ltf_wkl = ('layout_transform', ) + autotvm.task.args_to_workload(ltf_arg) ltf_task = copy.deepcopy(tasks[0]) ltf_task.workload = ltf_wkl ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05, ), error_no=0, all_cost=-1, timestamp=-1) ltf_records.append((ms_input, ms_output)) executor = DPTuner(net, {"data": dshape}, records, target_ops, target) executor.benchmark_layout_transform(layout_records=ltf_records, infer_layout=True) executor.run() out = [record[0].config for record in executor.get_optimal_records()] expected_out = [ records[3][0].config, records[1][0].config, records[2][0].config ] assert expected_out == out, "Output mismatch: expecting %s but got %s" \ % (str(expected_out), str(out)) executor = PBQPTuner(net, {"data": dshape}, records, target_ops, target) executor.benchmark_layout_transform(layout_records=ltf_records, infer_layout=True) executor.run() out = [record[0].config for record in executor.get_optimal_records()] expected_out = [ records[3][0].config, records[1][0].config, records[2][0].config ] assert expected_out == out, "Output mismatch: expecting %s but got %s" \ % (str(expected_out), str(out))
def add(): """Add together two constants in Relay. """ return relay.Function([], relay.add(relay.const(37), relay.const(5)))
def _conv2d_legalize(attrs, inputs, arg_types): """Legalizes Conv2D op. Parameters ---------- attrs : tvm.ir.Attrs Attributes of current convolution inputs : list of tvm.relay.Expr The args of the Relay expr to be legalized types : list of types List of input and output types Returns ------- result : tvm.relay.Expr The legalized expr """ # Dilation not supported yet. Return None if dilation is not (1, 1) dilation = attrs.get_int_tuple("dilation") if not (dilation[0] == 1 and dilation[1] == 1): return None # No legalization for depthwise convolutions yet. groups = attrs.get_int("groups") if groups != 1: return None # Collect the input tensors. data_tensor, kernel_tensor = arg_types[0], arg_types[1] data_dtype = data_tensor.dtype kernel_dtype = kernel_tensor.dtype # Collect the output tensor. output_tensor = arg_types[2] # Collect the input exprs. data, kernel = inputs # Get the conv attrs new_attrs = {k: attrs[k] for k in attrs.keys()} is_int8_inputs = False # If both the inputs are int8, we can add 128 to make the input dtype uint8, and then adjust the # output. This will help picking up Intel VNNI instructions. # Original --> C = A (conv) B # A and B are int8 # C = (A + 128 - 128) (conv) B # C = (A' conv B) - 128 (conv) B # where A' = A + 128 # and 128 (conv) B is basically a reduce on CRS axis for weights. if data_tensor.dtype == "int8" and kernel_tensor.dtype == "int8": is_int8_inputs = True padding = attrs.get_int_tuple("padding") kh, kw = attrs.get_int_tuple("kernel_size") pt, pl, pb, pr = get_pad_tuple(padding, (kh, kw)) if attrs["data_layout"] == "NHWC" and attrs["kernel_layout"] == "HWIO": adjust_shift = relay.sum(relay.cast(kernel, dtype="int32"), axis=(0, 1, 2)) pad_width = ((0, 0), (pt, pb), (pl, pr), (0, 0)) elif attrs["data_layout"] == "NCHW" and attrs[ "kernel_layout"] == "OIHW": pad_width = ((0, 0), (0, 0), (pt, pb), (pl, pr)) adjust_shift = relay.sum(relay.cast(kernel, dtype="int32"), axis=(1, 2, 3)) adjust_shift = relay.expand_dims(adjust_shift, axis=1, num_newaxis=2) else: return None data = relay.cast(data, "int32") data = relay.add(data, relay.const(128, "int32")) data = relay.cast(data, "uint8") # Do external padding as pad value has to be 128. if any(padding): data = relay.nn.pad(data, pad_width=pad_width, pad_value=128) new_attrs["padding"] = (0, 0) # The data type is now shifted to uint8 data_dtype = "uint8" # Multiply 128 to adjust shift. adjust_shift = relay.multiply(adjust_shift, relay.const(128, "int32")) # Legalize if the datatypes are suitable for fast Int8 instructions. Int8 instructions require # input channel to be a multiple of 4 and output channels to be a multiple of 16. For input # channels, we pad both the inputs and weights input channels. For output channels, we pad the # weight and stride_slice the output. if is_int8_hw_support(data_dtype, kernel_dtype): # Flags to remember if the expr is modified ic_modified = False oc_modified = False # Find the value of input and output channel. in_channel = -1 out_channel = -1 if attrs["data_layout"] == "NHWC" and attrs["kernel_layout"] == "HWIO": in_channel = data_tensor.shape[3].value out_channel = kernel_tensor.shape[3].value elif attrs["data_layout"] == "NCHW" and attrs[ "kernel_layout"] == "OIHW": in_channel = data_tensor.shape[1].value out_channel = kernel_tensor.shape[0].value else: return None if in_channel % 4 != 0: new_in_channel = ((in_channel + 4) // 4) * 4 diff = new_in_channel - in_channel if attrs["data_layout"] == "NHWC" and attrs[ "kernel_layout"] == "HWIO": data = relay.nn.pad(data, pad_width=((0, 0), (0, 0), (0, 0), (0, diff))) kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, diff), (0, 0))) ic_modified = True elif attrs["data_layout"] == "NCHW" and attrs[ "kernel_layout"] == "OIHW": pad_width = ((0, 0), (0, diff), (0, 0), (0, 0)) data = relay.nn.pad(data, pad_width=pad_width) kernel = relay.nn.pad(kernel, pad_width=pad_width) ic_modified = True else: return None new_out_channel = out_channel if out_channel % 16 != 0: new_out_channel = ((out_channel + 16) // 16) * 16 diff = new_out_channel - out_channel if attrs["data_layout"] == "NHWC" and attrs[ "kernel_layout"] == "HWIO": kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, 0), (0, diff))) oc_modified = True elif attrs["data_layout"] == "NCHW" and attrs[ "kernel_layout"] == "OIHW": kernel = relay.nn.pad(kernel, pad_width=((0, diff), (0, 0), (0, 0), (0, 0))) oc_modified = True else: return None if oc_modified: new_attrs["channels"] = new_out_channel out = tvm.relay.nn.conv2d(data, kernel, **new_attrs) original_out_shape = [x.value for x in output_tensor.shape] out = relay.strided_slice(out, begin=[0, 0, 0, 0], end=original_out_shape) else: out = relay.nn.conv2d(data, kernel, **new_attrs) if is_int8_inputs: out = relay.subtract(out, adjust_shift) return out return None
def add_var(): """Add together two variables """ x = relay.var('x', shape=()) y = relay.var('y', shape=()) return relay.Function([x, y], relay.add(x, y))
def test_free_expr(): x = relay.var("x", "float32") y = relay.add(x, x) yy = relay.ir_pass.infer_type(y) assert yy.checked_type == relay.scalar_type("float32") assert x.vid.same_as(yy.args[0].vid)
def _execute(self): self.node_dict = {} # self.node_dict['1'] = relay.const(np.zeros((1, 128)), dtype='int32') gelu_a = relay.var('gelu_a', shape=()) gelu_b = relay.var('gelu_b', shape=()) gelu_c = relay.var('gelu_c', shape=()) gelu_d = relay.var('gelu_d', shape=()) gelu_e = relay.var('gelu_e', shape=()) self.node_dict['1'] = relay.var('input.1', shape=(1,128), dtype='int32') self.node_dict['2'] = relay.var('input.2', shape=(1,128), dtype='int32') for gnode in self.graph: name = gnode['name'] op_type = gnode['op_type'] attrs = gnode['attrs'] del attrs['A_shape'] del attrs['O_shape'] inputs = gnode['inputs'] if op_type == 'Const': arr = np.zeros(attrs['shape'], dtype=np.int32) y = relay.const(arr, dtype='int32') elif op_type == 'expand_dims': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.expand_dims(x, attrs['axis'], attrs['num_newaxis']) elif op_type == 'reshape': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.reshape(x, attrs['newshape']) elif op_type == 'take': data = get_input(self.node_dict, self.params, inputs[0]) indices = get_input(self.node_dict, self.params, inputs[1]) y = relay.take(data, indices, axis=attrs['axis'][0], mode=attrs['mode']) elif op_type == 'one_hot': x = get_input(self.node_dict, self.params, inputs[0]) cc1 = get_input(self.node_dict, self.params, inputs[1]) cc2 = get_input(self.node_dict, self.params, inputs[2]) y = relay.one_hot(x, cc1, cc2, **attrs) elif op_type == 'strided_slice': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.strided_slice(x, **attrs) elif op_type == 'mean': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.mean(x, axis=attrs['axis'], exclude=attrs['exclude'], keepdims=attrs['keepdims']) elif op_type == 'nn.dense': x = get_input(self.node_dict, self.params, inputs[0]) weight = get_input(self.node_dict, self.params, inputs[1]) y = relay.nn.dense(x, weight, units=attrs['units'][0]) elif op_type == 'add': x1 = get_input(self.node_dict, self.params, inputs[0]) x2 = get_input(self.node_dict, self.params, inputs[1]) y = relay.add(x1, x2) elif op_type == 'subtract': x1 = get_input(self.node_dict, self.params, inputs[0]) x2 = get_input(self.node_dict, self.params, inputs[1]) y = relay.subtract(x1, x2) elif op_type == 'multiply': x1 = get_input(self.node_dict, self.params, inputs[0]) x2 = get_input(self.node_dict, self.params, inputs[1]) y = relay.multiply(x1, x2) elif op_type == 'power': x1 = get_input(self.node_dict, self.params, inputs[0]) x2 = get_input(self.node_dict, self.params, inputs[1]) y = relay.power(x1, x2) elif op_type == 'transpose': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.transpose(x, **attrs) elif op_type == 'tanh': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.tanh(x) elif op_type == 'squeeze': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.squeeze(x, **attrs) elif op_type == 'nn.batch_matmul': x1 = get_input(self.node_dict, self.params, inputs[0]) x2 = get_input(self.node_dict, self.params, inputs[1]) y = relay.nn.batch_matmul(x1, x2) elif op_type == 'nn.softmax': x = get_input(self.node_dict, self.params, inputs[0]) y = relay.nn.softmax(x, **attrs) elif op_type == 'gelu': x = get_input(self.node_dict, self.params, inputs[0]) y = x * gelu_a * (gelu_b + relay.tanh( ( gelu_c * (x + gelu_d * relay.power(x, gelu_e))))) else: import pdb; pdb.set_trace() print( 'not supported op %s ' % op_type) self.node_dict[name] = y output_name = self.output_node_ids[0] output = self.node_dict[output_name] inputs = relay.analysis.free_vars(output) # inputs = [self.node_dict['1'], self.node_dict['2']] func = relay.Function(inputs, output) mod = tvm.IRModule() mod['main'] = func with relay.build_config(opt_level=0): graph, lib, params = relay.build(mod, 'llvm', params={}) self.m = graph_runtime.create(graph, lib, tvm.cpu())
def visit_var(self, var): return relay.add(var, var)
def _create_data(target, dshape, dtype, layout): data = relay.var("data", shape=dshape, dtype=dtype) w0 = relay.var("w0_weight") conv0 = relay.nn.conv2d(data, w0, channels=16, kernel_size=(3, 3), padding=(1, 1)) w1 = relay.var("w1_weight") conv1 = relay.nn.conv2d(conv0, w1, channels=32, kernel_size=(1, 1)) w2 = relay.var("w2_weight") conv2 = relay.nn.conv2d(conv1, w2, channels=32, kernel_size=(3, 3), padding=(1, 1)) out = relay.add(conv1, conv2) net = relay.Function(relay.analysis.free_vars(out), out) mod, params = relay.testing.create_workload(net) tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params, ops=(relay.op.nn.conv2d, )) wkl_list = [ create_workload((1, 3, 8, 8), (16, 3, 3, 3), (1, 1), (1, 1), (1, 1), layout, layout, dtype, dtype), create_workload((1, 16, 8, 8), (32, 16, 1, 1), (1, 1), (0, 0), (1, 1), layout, layout, dtype, dtype), create_workload((1, 32, 8, 8), (32, 32, 3, 3), (1, 1), (1, 1), (1, 1), layout, layout, dtype, dtype), ] costs = [0.04, 0.012, 0.03] config_list = [] cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [3, 1]], ["tile_oc", "sp", [4, 4]], ["tile_ow", "sp", [4, 2]], ["unroll_kw", "ot", True]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [2, 8]], ["tile_oc", "sp", [1, 32]], ["tile_oh", "ot", 1], ["tile_ow", "sp", [4, 2]]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) cfg_dict = { "i": -1, "c": None, "e": [["tile_ic", "sp", [8, 4]], ["tile_oc", "sp", [4, 8]], ["tile_ow", "sp", [2, 4]], ["unroll_kw", "ot", False]], "t": "" } config_list.append(ConfigEntity.from_json_dict(cfg_dict)) records = [] for wkl, cost, config, task in zip(wkl_list, costs, config_list, tasks): task.workload = wkl ms_input = MeasureInput(target=target, task=task, config=config) ms_output = MeasureResult(costs=(cost, ), error_no=0, all_cost=-1, timestamp=-1) records.append((ms_input, ms_output)) ltf_records = [] ltf_arg = [ tvm.placeholder((1, 64, 16, 16, 8), dtype=dtype), "NCHW8c", "NCHW512c" ] ltf_arg = autotvm.task.topi_integration.serialize_args(ltf_arg) ltf_wkl = ('layout_transform', ) + autotvm.task.args_to_workload(ltf_arg) ltf_task = copy.deepcopy(tasks[0]) ltf_task.workload = ltf_wkl ms_input = MeasureInput(target=target, task=ltf_task, config=None) ms_output = MeasureResult(costs=(1.91224744e-05, ), error_no=0, all_cost=-1, timestamp=-1) ltf_records.append((ms_input, ms_output)) ltf_keys = [] ltf_arg = [ tvm.placeholder((1, 4, 8, 8, 4), dtype=dtype), "NCHW4c", "NCHW8c" ] ltf_arg = autotvm.task.topi_integration.serialize_args(ltf_arg) ltf_wkl = ('layout_transform', ) + autotvm.task.args_to_workload(ltf_arg) ltf_keys.append(ltf_wkl) ltf_arg = [ tvm.placeholder((1, 1, 8, 8, 32), dtype=dtype), "NCHW32c", "NCHW4c" ] ltf_arg = autotvm.task.topi_integration.serialize_args(ltf_arg) ltf_wkl = ('layout_transform', ) + autotvm.task.args_to_workload(ltf_arg) ltf_keys.append(ltf_wkl) ltf_arg = [ tvm.placeholder((1, 4, 8, 8, 8), dtype=dtype), "NCHW8c", "NCHW32c" ] ltf_arg = autotvm.task.topi_integration.serialize_args(ltf_arg) ltf_wkl = ('layout_transform', ) + autotvm.task.args_to_workload(ltf_arg) ltf_keys.append(ltf_wkl) return net, records, ltf_records, ltf_keys, tasks
def test_byoc_microtvm(workspace_dir, board, west_cmd, microtvm_debug, use_fvp): """This is a simple test case to check BYOC capabilities of microTVM""" model = test_utils.ZEPHYR_BOARDS[board] build_config = {"debug": microtvm_debug} x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) w2 = relay.var("w2", shape=(10, 10)) w3 = relay.var("w3", shape=(10, 10)) w4 = relay.var("w4", shape=(10, 10)) w5 = relay.var("w5", shape=(10, 10)) w6 = relay.var("w6", shape=(10, 10)) w7 = relay.var("w7", shape=(10, 10)) # C compiler z0 = relay.add(x, w0) p0 = relay.subtract(z0, w1) q0 = relay.multiply(p0, w2) z1 = relay.add(x, w3) p1 = relay.subtract(z1, w4) q1 = relay.multiply(p1, w5) # Other parts on TVM z2 = relay.add(x, w6) q2 = relay.subtract(z2, w7) r = relay.concatenate((q0, q1, q2), axis=0) f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r) mod = tvm.IRModule() ann = byoc.CcompilerAnnotator() mod["main"] = ann.visit(f) mod = tvm.relay.transform.PartitionGraph()(mod) mod = tvm.relay.transform.InferType()(mod) x_data = np.random.rand(10, 10).astype("float32") w_data = [] for _ in range(8): w_data.append(np.random.rand(10, 10).astype("float32")) map_inputs = {"w{}".format(i): w_data[i] for i in range(8)} map_inputs["x"] = x_data check_result( temp_dir=workspace_dir, relay_mod=mod, map_inputs=map_inputs, out_shape=(30, 10), result=np.concatenate( ( ((x_data + w_data[0]) - w_data[1]) * w_data[2], ((x_data + w_data[3]) - w_data[4]) * w_data[5], x_data + w_data[6] - w_data[7], ), axis=0, ), model=model, zephyr_board=board, west_cmd=west_cmd, build_config=build_config, use_fvp=use_fvp, )
def test_multi_node_subgraph(check_result): x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) w2 = relay.var("w2", shape=(10, 10)) w3 = relay.var("w3", shape=(10, 10)) w4 = relay.var("w4", shape=(10, 10)) w5 = relay.var("w5", shape=(10, 10)) w6 = relay.var("w6", shape=(10, 10)) w7 = relay.var("w7", shape=(10, 10)) # subgraph0 x0 = relay.var("x0", shape=(10, 10)) w00 = relay.var("w00", shape=(10, 10)) w01 = relay.var("w01", shape=(10, 10)) w02 = relay.var("w02", shape=(10, 10)) z00 = relay.add(x0, w00) p00 = relay.subtract(z00, w01) q00 = relay.multiply(p00, w02) subgraph0 = relay.Function([x0, w00, w01, w02], q00) subgraph0 = set_external_func_attr(subgraph0, "ccompiler", "ccompiler_0") call0 = relay.Call(subgraph0, [x, w0, w1, w2]) # subgraph1 x1 = relay.var("x1", shape=(10, 10)) w10 = relay.var("w10", shape=(10, 10)) w11 = relay.var("w11", shape=(10, 10)) w12 = relay.var("w12", shape=(10, 10)) z10 = relay.add(x1, w10) p10 = relay.subtract(z10, w11) q10 = relay.multiply(p10, w12) subgraph1 = relay.Function([x1, w10, w11, w12], q10) subgraph1 = set_external_func_attr(subgraph1, "ccompiler", "ccompiler_1") call1 = relay.Call(subgraph1, [x, w3, w4, w5]) # Other parts on TVM z2 = relay.add(x, w6) q2 = relay.subtract(z2, w7) r = relay.concatenate((call0, call1, q2), axis=0) f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r) mod = tvm.IRModule() mod["main"] = f mod = relay.transform.InferType()(mod) x_data = np.random.rand(10, 10).astype("float32") w_data = [] for _ in range(8): w_data.append(np.random.rand(10, 10).astype("float32")) map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)]) check_result( mod, map_inputs, (30, 10), np.concatenate( ( ((x_data + w_data[0]) - w_data[1]) * w_data[2], ((x_data + w_data[3]) - w_data[4]) * w_data[5], x_data + w_data[6] - w_data[7], ), axis=0, ), )
VizParser, ) from tvm.contrib.relay_viz.terminal import ( TermGraph, TermPlotter, TermVizParser, ) ###################################################################### # Define a Relay IR Module with multiple GlobalVar # ------------------------------------------------ # Let's build an example Relay IR Module containing multiple ``GlobalVar``. # We define an ``add`` function and call it in the main function. data = relay.var("data") bias = relay.var("bias") add_op = relay.add(data, bias) add_func = relay.Function([data, bias], add_op) add_gvar = relay.GlobalVar("AddFunc") input0 = relay.var("input0") input1 = relay.var("input1") input2 = relay.var("input2") add_01 = relay.Call(add_gvar, [input0, input1]) add_012 = relay.Call(add_gvar, [input2, add_01]) main_func = relay.Function([input0, input1, input2], add_012) main_gvar = relay.GlobalVar("main") mod = tvm.IRModule({main_gvar: main_func, add_gvar: add_func}) ###################################################################### # Render the graph with Relay Visualizer on the terminal
def test_add_const(): two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], two) check_eval(func, [], 2)
def get_func(shape): x = relay.var("x", shape=shape) y = relay.add(x, x) z = relay.add(y, x) f = relay.ir_pass.infer_type(relay.Function([x], z)) return f
def get_ref_log(): ref_log = relay.Function([x], relay.log(relay.add(x, x))) return ref_log
def expected(): add = relay.add(x, y) sub = relay.subtract(add, z) func = relay.Function([x, y, z], sub) return func
def get_ref_sub(): ref_sub = relay.Function([x, y], relay.subtract(relay.add(x, x), relay.add(y, y))) return ref_sub
def test_add_const(): two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], two)
def get_ref_abs(): shape = (5, 10) tp = relay.TensorType(shape, "float32") a = relay.var("a", tp) ref_abs = relay.Function([a], relay.abs(relay.add(a, a))) return ref_abs
def test_op_add(): add = relay.add(relay.const(1), relay.const(2)) add_val = run_as_python(add) assert_tensor_value(add_val, 3)
def visit_constant(self, const): return relay.add(const, const)
def before(dshape): x = relay.var("x", shape=dshape) y = relay.add(x, relay.const(1, "float32")) y = relay.annotation.stop_fusion(y) z = relay.exp(y) return relay.Function([x], z)
def residual_unit( data, num_filter, stride, dim_match, name, bottle_neck=True, data_layout="NCHW", kernel_layout="IOHW", ): """Return ResNet Unit symbol for building ResNet Parameters ---------- data : str Input data num_filter : int Number of output channels bnf : int Bottle neck channels factor with regard to num_filter stride : tuple Stride used in convolution dim_match : bool True means channel number between input and output is the same, otherwise means differ name : str Base name of the operators """ bn_axis = data_layout.index("C") if bottle_neck: conv1 = layers.conv2d( data=data, channels=int(num_filter * 0.25), kernel_size=(1, 1), strides=stride, padding=(0, 0), name=name + "_conv1", data_layout=data_layout, kernel_layout=kernel_layout, ) bn1 = layers.batch_norm_infer(data=conv1, epsilon=2e-5, axis=bn_axis, name=name + "_bn1") act1 = relay.nn.relu(data=bn1) conv2 = layers.conv2d( data=act1, channels=int(num_filter * 0.25), kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), name=name + "_conv2", data_layout=data_layout, kernel_layout=kernel_layout, ) bn2 = layers.batch_norm_infer(data=conv2, epsilon=2e-5, axis=bn_axis, name=name + "_bn2") act2 = relay.nn.relu(data=bn2) conv3 = layers.conv2d( data=act2, channels=num_filter, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), name=name + "_conv3", data_layout=data_layout, kernel_layout=kernel_layout, ) bn3 = layers.batch_norm_infer(data=conv3, epsilon=2e-5, axis=bn_axis, name=name + "_bn3") if dim_match: shortcut = data else: shortcut = layers.conv2d( data=data, channels=num_filter, kernel_size=(1, 1), strides=stride, name=name + "_sc", data_layout=data_layout, kernel_layout=kernel_layout, ) add = relay.add(bn3, shortcut) return relay.nn.relu(data=add) conv1 = layers.conv2d( data=data, channels=num_filter, kernel_size=(3, 3), strides=stride, padding=(1, 1), name=name + "_conv1", data_layout=data_layout, kernel_layout=kernel_layout, ) bn1 = layers.batch_norm_infer(data=conv1, epsilon=2e-5, axis=bn_axis, name=name + "_bn1") act1 = relay.nn.relu(data=bn1) conv2 = layers.conv2d( data=act1, channels=num_filter, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), name=name + "_conv2", data_layout=data_layout, kernel_layout=kernel_layout, ) bn2 = layers.batch_norm_infer(data=conv2, epsilon=2e-5, axis=bn_axis, name=name + "_bn2") if dim_match: shortcut = data else: shortcut = layers.conv2d( data=data, channels=num_filter, kernel_size=(1, 1), strides=stride, name=name + "_sc", data_layout=data_layout, kernel_layout=kernel_layout, ) add = relay.add(bn2, shortcut) return relay.nn.relu(data=add)