def test_different_targets(): @tvm.ir.register_op_attr("nn.relu", "target.different.A") def relu(expr): # pylint: disable=unused-variable return True @tvm.ir.register_op_attr("add", "target.different.B") def relu(expr): # pylint: disable=unused-variable return True def before(): x = relay.var("x", shape=(10, 5)) a_1 = relay.nn.relu(x) b_1 = relay.add(a_1, a_1) mod = tvm.IRModule.from_expr(b_1) return mod for annotate_non_call_ops in [True, False]: mod = before() mod1 = transform.AnnotateTarget("different.A", annotate_non_call_ops)(mod) mod1 = transform.AnnotateTarget("different.B", annotate_non_call_ops)(mod1) mod2 = transform.AnnotateTarget(["different.A", "different.B"], annotate_non_call_ops)(mod) assert tvm.ir.structural_equal(mod1, mod2)
def test_multiple_runs(): @tvm.ir.register_op_attr("nn.relu", "target.A") def relu(expr): # pylint: disable=unused-variable return True @tvm.ir.register_op_attr("add", "target.B") def add(expr): # pylint: disable=unused-variable return True def before(): x = relay.var("x", shape=(10, 5)) a_1 = relay.nn.relu(x) a_2 = relay.abs(a_1) a_3 = relay.nn.relu(a_1) out = relay.add(a_2, a_3) f = relay.Function([x], out) mod = tvm.IRModule.from_expr(f) return mod for annotate_non_call_ops in [True, False]: mod = transform.AnnotateTarget("A", annotate_non_call_ops)(before()) mod = transform.AnnotateTarget("B", annotate_non_call_ops)(mod) expected = transform.AnnotateTarget(["A", "B"], annotate_non_call_ops)(before()) assert tvm.ir.structural_equal(expected, mod)
def partition_for_clml(mod, params=None): """Partition the graph greedily offloading supported operators to CLML Library. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- ret : annotated and partitioned module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential( [ transform.InferType(), transform.FoldConstant(), transform.MergeComposite(clml_pattern_table()), transform.AnnotateTarget("clml", False), transform.MergeCompilerRegions(), transform.PartitionGraph(), ] ) result_mod = seq(mod) return result_mod
def partition_for_cublas( mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None ) -> tvm.IRModule: """Partition the graph to offload for cuBLAS. Parameters ---------- mod : tvm.IRModule The module to partition. params : Optional[Dict[str, tvm.runtime.NDArray]] Constant input parameters. Returns ------- tvm.IRModule The partitioned module. """ seq = tvm.transform.Sequential( [ transform.InferType(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("cublas"), transform.PartitionGraph(), transform.InferType(), ] ) return seq(mod)
def annotate(func, compiler): """ An annotator for Core ML. """ # Bind free variables to the constant values. bind_dict = {} for arg in func.params: name = arg.name_hint if name in params: bind_dict[arg] = relay.const(params[name]) func = relay.bind(func, bind_dict) # Annotate the entire graph for Core ML mod = tvm.IRModule() mod["main"] = func seq = tvm.transform.Sequential([ transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), transform.AnnotateTarget(compiler), transform.MergeCompilerRegions(), transform.PartitionGraph(), ]) with relay.build_config(opt_level=3): mod = seq(mod) return mod
def partition_for_ethosn78(mod, params=None, **opts): """Partition the graph greedily offloading supported operators to Arm Ethos-N NPU. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- ret : annotated and partitioned module. """ if not opts or opts.get("variant", "").lower() != "ethos-n78": raise ValueError("When targeting Ethos(TM)-N78, -variant=Ethos-N78 should be set.") if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential( [ transform.InferType(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("ethos-n"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ] ) return seq(mod)
def partition_for_dnnl(mod, params=None): """Partition the graph greedily offloading supported operators to DNNL. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- mod : Module Annotated and partitioned module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential([ transform.CanonicalizeOps(), transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), # fold consecutive add ops to simplify pattern `conv2d-bias_add-bn-relu` transform.SimplifyExpr(), transform.FoldConstant(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("dnnl"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ]) with tvm.transform.PassContext(opt_level=3): mod = seq(mod) return mod
def partition_for_cmsisnn(mod, params=None, **opts): """Partition the graph greedily offloading supported operators on Cortex-M using CMSIS-NN Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- ret : Module annotated and partitioned module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential( [ transform.InferType(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("cmsisnn"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ] ) return seq(mod)
def offload(mod): """Offload ops based on the registered ops""" backend = "verilator" mod = transform.AnnotateTarget([backend])(mod) mod = transform.PartitionGraph()(mod) return mod
def test_annotate(): mod = annotated(dtype, ishape, w1shape) mod = transform.AnnotateTarget("dnnl")(mod) mod = relay.transform.InferType()(mod) ref_mod = expected(dtype, ishape, w1shape) ref_mod = relay.transform.InferType()(ref_mod) tvm.ir.assert_structural_equal(mod, ref_mod)
def test_constant_tuples(): @reg.register("qnn.concatenate", "target.const_tuples") def add(attrs, args): # pylint: disable=unused-variable return True def create_graph(): a = relay.var('a', shape=(10, 10), dtype="uint8") b = relay.var('b', shape=(10, 10), dtype="uint8") a1 = relay.abs(a) zeroi = relay.const(1, "int32") zerof = relay.const(0, "float32") con = relay.qnn.op.concatenate((a1, b), input_scales=(zerof, zerof), input_zero_points=(zeroi, zeroi), output_scale=zerof, output_zero_point=zeroi, axis=1) f = relay.Function([a, b], con) mod = tvm.IRModule.from_expr(f) return mod seq = tvm.transform.Sequential([ transform.AnnotateTarget("const_tuples"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ]) partitioned = seq(create_graph()) concat = partitioned["const_tuples_0"].body assert type(concat.args[1]) == relay.Tuple assert type(concat.args[2]) == relay.Tuple assert type(concat.args[3]) == relay.Constant assert type(concat.args[4]) == relay.Constant
def partition_for_cmsisnn(mod, params=None, mod_name="default", **opts): """Partition the graph greedily offloading supported operators on Cortex-M using CMSIS-NN Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. mod_name: str, optional The module name Returns ------- ret : Module annotated and partitioned module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential( [ transform.InferType(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("cmsis-nn"), transform.PartitionGraph(mod_name=mod_name), GenerateCMSISNNConstants(), ScalarToTensorConstants(), ExtractConstantsFromPartitionedFunction(), transform.InferType(), ] ) return seq(mod)
def partition_for_cutlass(mod): """Partition the input module into CUTLASS-supported subgraphs.""" dense_pat = ("cutlass.dense", make_gemm_pattern(False, None)) dense_bias_pat = ("cutlass.dense_bias", make_gemm_pattern(True, None)) dense_bias_relu_pat = ("cutlass.dense_bias_relu", make_gemm_pattern(True, "relu")) dense_bias_gelu_fp16_pat = ("cutlass.dense_bias_gelu_fp16", make_gemm_pattern(True, "gelu")) dense_bias_gelu_fp32_pat = ( "cutlass.dense_bias_gelu_fp32", make_gemm_pattern(True, "gelu", out_dtype="float32"), ) cutlass_patterns = [ dense_bias_gelu_fp16_pat, dense_bias_gelu_fp32_pat, dense_bias_relu_pat, dense_bias_pat, dense_pat, ("cutlass.batch_matmul", make_batch_matmul_pattern()), # TODO(masahi): Add more conv2d patterns ("cutlass.conv2d", make_conv2d_pattern()), ] mod = transform.MergeComposite(cutlass_patterns)(mod) mod = transform.AnnotateTarget(["cutlass"])(mod) mod = transform.PartitionGraph()(mod) return mod
def partition_for_arm_compute_lib(mod, params=None, disabled_ops=["concatenate"], **opts): """Partition the graph greedily offloading supported operators to Arm Compute Library. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. disabled_ops : Optional[list] Ops do not want to offload to ACL. Returns ------- ret : annotated and partitioned module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential([ transform.InferType(), transform.MergeComposite(arm_compute_lib_pattern_table(disabled_ops)), transform.AnnotateTarget("arm_compute_lib", False), transform.PartitionGraph(), ]) return seq(mod)
def test_multiple_ends(): def before(): x = relay.var("x", shape=(10, 10)) r = relay.nn.relu(x) a_1 = relay.abs(r) a_2 = relay.abs(r) out = relay.add(a_1, a_2) f = relay.Function([x], out) mod = tvm.IRModule.from_expr(f) return mod def after(): x = relay.var("x", shape=(10, 10)) cb_1 = relay.annotation.compiler_begin(x, "test") r = relay.nn.relu(cb_1) ce_1 = relay.annotation.compiler_end(r, "test") ce_2 = relay.annotation.compiler_end(r, "test") a_1 = relay.abs(ce_1) a_2 = relay.abs(ce_2) out = relay.add(a_1, a_2) f = relay.Function([x], out) mod = tvm.IRModule.from_expr(f) return mod result = transform.AnnotateTarget("test")(before()) expected = transform.InferType()(after()) assert tvm.ir.structural_equal(expected, result)
def partition_for_arm_compute_lib(mod, params=None): """Partition the graph greedily offloading supported operators to Arm Compute Library. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- ret : annotated and partitioned module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential([ transform.MergeComposite(arm_compute_lib_pattern_table()), transform.AnnotateTarget("arm_compute_lib"), transform.PartitionGraph(), ]) return seq(mod)
def test_annotate(): mod = _create_graph() mod = transform.AnnotateTarget("coremlcompiler")(mod) mod = transform.PartitionGraph()(mod) expected = _create_graph_annotated() assert tvm.ir.structural_equal(mod, expected, map_free_vars=True)
def test_extern_dnnl_mobilenet(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = 'float32' ishape = (1, 3, 224, 224) mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype='float32') mod["main"] = bind_params_by_name(mod["main"], params) mod = transform.AnnotateTarget(["dnnl"])(mod) mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype='float32') ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) ref_res = ref_ex.evaluate()(i_data, **params) check_result(mod, {"data": i_data}, (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params)
def test_duplicate_outputs(): target = "test_duplicate_outputs" @tvm.ir.register_op_attr("abs", "target." + target) def abs(attrs, args): # pylint: disable=unused-variable return True def create_graph(): data = relay.var("data", shape=(10, 10)) x = relay.abs(data) out_1 = relay.nn.relu(x) out_2 = relay.tanh(x) out_3 = relay.log(x) out = relay.Tuple([out_1, out_2, out_3]) func = relay.Function([data], out) return func def expected(): mod = tvm.IRModule() # function 0 f0_i0 = relay.var(target + "_0_i0", shape=(10, 10)) f0_o0 = relay.abs(f0_i0) func0 = relay.Function([f0_i0], f0_o0) func0 = func0.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) func0 = func0.with_attr("Inline", tvm.tir.IntImm("int32", 1)) func0 = func0.with_attr("Compiler", target) func0 = func0.with_attr("global_symbol", target + "_0") gv0 = relay.GlobalVar(target + "_0") mod[gv0] = func0 mod = transform.InferType()(mod) # body data = relay.var("data", shape=(10, 10)) function_out = gv0(data) out_1 = relay.nn.relu(function_out) out_2 = relay.tanh(function_out) out_3 = relay.log(function_out) out = relay.Tuple([out_1, out_2, out_3]) func = relay.Function([data], out) mod["main"] = func mod = transform.InferType()(mod) return mod mod = tvm.IRModule() mod["main"] = create_graph() seq = tvm.transform.Sequential( [ transform.AnnotateTarget(target), transform.MergeCompilerRegions(), transform.PartitionGraph(), ] ) ref_mod = expected() partitioned = seq(mod) assert tvm.ir.structural_equal(partitioned, ref_mod, map_free_vars=True)
def get_pass_order(use_patterns): """ Get the pass ordering based on using predicates or patterns. Parameters ---------- use_patterns: Bool True if pass needs to work with op patterns Returns ---------- ret : Sequential Pass object """ return (tvm.transform.Sequential([ transform.InferType(), RemoveDropoutPass(), transform.RemoveUnusedFunctions(), transform.ConvertLayout({ "nn.conv1d": ["NCW", "default"], "nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", "default"], "nn.conv2d_transpose": ["NCHW", "default"], }), transform.FoldConstant(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("tensorrt"), transform.MergeCompilerRegions(), transform.PartitionGraph(), transform.InlineComposites("tensorrt"), transform.InferType(), ]) if use_patterns else tvm.transform.Sequential([ transform.InferType(), RemoveDropoutPass(), transform.RemoveUnusedFunctions(), transform.ConvertLayout({ "nn.conv1d": ["NCW", "default"], "nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", "default"], "nn.conv2d_transpose": ["NCHW", "default"], }), transform.FoldConstant(), transform.AnnotateTarget("tensorrt"), transform.MergeCompilerRegions(), transform.PartitionGraph(), transform.InferType(), ]))
def test_ends_with_tuple(): trgt = "clip" @tvm.ir.register_op_attr("clip", "target." + trgt) def relu(expr): # pylint: disable=unused-variable return True def get_model(get_item): """Return a model""" a = relay.var("a", shape=(1, 16, 16, 4), dtype="uint8") z = relay.op.clip(a, 0, 255) b = relay.op.clip(z, 0, 15) c = relay.op.clip(z, 16, 31) t = relay.Tuple((c, b)) tgi = relay.TupleGetItem(t, 1) if get_item else t foo = relay.Function([a], tgi) return tvm.IRModule.from_expr(tgi) def get_expected(annotate_non_call_ops, get_item): a_ = relay.var("a", shape=(1, 16, 16, 4), dtype="uint8") a = relay.annotation.compiler_begin(a_, trgt) z = relay.op.clip(a, 0, 255) z1 = relay.annotation.compiler_end(z, trgt) z1 = relay.annotation.compiler_begin(z1, trgt) b = relay.op.clip(z1, 0, 15) b = relay.annotation.compiler_end(b, trgt) b = relay.annotation.compiler_begin( b, trgt) if annotate_non_call_ops else b z2 = relay.annotation.compiler_end(z, trgt) z2 = relay.annotation.compiler_begin(z2, trgt) c = relay.op.clip(z2, 16, 31) c = relay.annotation.compiler_end(c, trgt) c = relay.annotation.compiler_begin( c, trgt) if annotate_non_call_ops else c t = relay.Tuple((c, b)) t = relay.annotation.compiler_end(t, trgt) if annotate_non_call_ops else t if get_item: t = relay.annotation.compiler_begin( t, trgt) if annotate_non_call_ops else t tgi = relay.TupleGetItem(t, 1) tgi = relay.annotation.compiler_end( tgi, trgt) if annotate_non_call_ops else tgi else: tgi = t foo = relay.Function([a_], tgi) return tvm.IRModule.from_expr(foo) for get_item in [True, False]: for annotate_non_call_ops in [False, True]: mod = get_model(get_item) mod = transform.AnnotateTarget("clip", annotate_non_call_ops)(mod) expected = transform.InferType()(get_expected( annotate_non_call_ops, get_item)) assert tvm.ir.structural_equal(expected, mod)
def _construct_model(func, m1, m2): mod = tvm.IRModule() mod["main"] = func mod = transform.AnnotateTarget("coremlcompiler")(mod) mod = transform.PartitionGraph()(mod) fcompile = tvm._ffi.get_global_func("relay.ext.coremlcompiler") for var, func in mod.functions.items(): if func.attrs and "Compiler" in func.attrs and func.attrs["Compiler"] == "coremlcompiler": fcompile(func)
def partition_for_tensorrt( mod: tvm.IRModule, params: Optional[Dict[str, tvm.nd.NDArray]] = None, # CAUTION: Can't use default Target("tensorrt") here since the target kind is only available # if is_tensorrt_compiler_enabled() == True. target: Optional[tvm.target.Target] = None, ) -> tvm.IRModule: """Partition all functions in mod to greedily offload supported operators to TensorRT. Parameters ---------- mod : tvm.IRModule The module to partition. target : tvm.target.Target A target of kind "tensorrt" describing additional partitioning and compilation options. params : Optional[Dict[str, tvm.nd.NDArray]] Constant input parameters. Returns ------- partitioned_mod : tvm.IRModule The partitioned module. """ assert is_tensorrt_compiler_enabled( ), "Can only partition for TensorRT if it is enabled" if params: mod["main"] = bind_params_by_name(mod["main"], params) if target is None: # Use a default target. The get_tensorrt_target() function will similarly create an # equivalent default target when compilation continues after partitioning. target = tvm.target.Target("tensorrt") seq = tvm.transform.Sequential([ transform.InferType(), RemoveDropoutPass(), transform.RemoveUnusedFunctions(), transform.ConvertLayout({ "nn.conv1d": ["NCW", "default"], "nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", "default"], "nn.conv2d_transpose": ["NCHW", "default"], }), transform.FoldConstant(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("tensorrt"), transform.MergeCompilerRegions(), transform.PartitionGraph(), transform.InferType(), ]) with target: mod = seq(mod) mod = prune_tensorrt_subgraphs(mod) return mod
def test_partial_constant(): """Test the subgraph with (const, var, const, var) arguments.""" if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (10, 10) in_1 = relay.var("in_1", shape=ishape, dtype=dtype) in_2 = relay.var("in_2", shape=ishape, dtype=dtype) in_3 = relay.var("in_3", shape=ishape, dtype=dtype) in_4 = relay.var("in_4", shape=ishape, dtype=dtype) add1 = relay.add(in_1, in_2) add2 = relay.add(add1, in_3) add3 = relay.add(add2, in_3) add4 = relay.add(add3, in_3) func = relay.Function([in_1, in_2, in_3, in_4], add4) ref_mod = tvm.IRModule.from_expr(func) ref_mod = relay.transform.InferType()(ref_mod) data1 = np.random.uniform(0, 1, ishape).astype(dtype) data3 = np.random.uniform(0, 1, ishape).astype(dtype) params = { "in_1": tvm.nd.array(data1, device=tvm.cpu(0)), "in_3": tvm.nd.array(data3, device=tvm.cpu(0)), } ref_mod["main"] = bind_params_by_name(ref_mod["main"], params) opt_pass = tvm.transform.Sequential([ transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), transform.AnnotateTarget("dnnl"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ]) with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): mod = opt_pass(ref_mod) data2 = np.random.uniform(0, 1, ishape).astype(dtype) data4 = np.random.uniform(0, 1, ishape).astype(dtype) check_result(mod, ref_mod, { "in_2": data2, "in_4": data4 }, (10, 10), tol=1e-5)
def test_tuple(): target = "test_tuple" @tvm.ir.register_op_attr("nn.relu", "target." + target) def relu(expr): # pylint: disable=unused-variable return True @tvm.ir.register_op_attr("concatenate", "target." + target) def concatenate(expr): # pylint: disable=unused-variable return True """Test that TupleNode is included in annotation when surrounded by supported nodes.""" def before(): x = relay.var("x", shape=(10, 5)) y = relay.var("y", shape=(10, 5)) a_1 = relay.nn.relu(x) a_2 = relay.nn.relu(y) out = relay.concatenate((a_1, a_2), axis=1) f = relay.Function([x, y], out) mod = tvm.IRModule.from_expr(f) return mod def after(annotate_non_call_ops): x = relay.var("x", shape=(10, 5)) y = relay.var("y", shape=(10, 5)) cb_1 = relay.annotation.compiler_begin(x, target) cb_2 = relay.annotation.compiler_begin(y, target) a_1 = relay.nn.relu(cb_1) a_2 = relay.nn.relu(cb_2) ce_1 = relay.annotation.compiler_end(a_1, target) ce_2 = relay.annotation.compiler_end(a_2, target) if annotate_non_call_ops: cb_3 = relay.annotation.compiler_begin(ce_1, target) cb_4 = relay.annotation.compiler_begin(ce_2, target) tup = relay.Tuple([cb_3, cb_4]) ce_3 = relay.annotation.compiler_end(tup, target) else: ce_3 = relay.Tuple([ce_1, ce_2]) cb_3 = relay.annotation.compiler_begin(ce_3, target) out = relay.op._make.concatenate(cb_3, 1) ce_4 = relay.annotation.compiler_end(out, target) f = relay.Function([x, y], ce_4) mod = tvm.IRModule.from_expr(f) return mod for annotate_non_call_ops in [False, True]: result = transform.AnnotateTarget(target, annotate_non_call_ops)(before()) expected = transform.InferType()(after(annotate_non_call_ops)) assert tvm.ir.structural_equal(expected, result)
def test_constant(): """Test the subgraph with (var, const, ...) arguments.""" if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, 32, 3, 3) data = relay.var("data", shape=ishape, dtype=dtype) weight = relay.var("weight", shape=wshape, dtype=dtype) bn_gamma = relay.var("bn_gamma") bn_beta = relay.var("bn_beta") bn_mmean = relay.var("bn_mean") bn_mvar = relay.var("bn_var") layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), padding=(1, 1)) bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta, bn_mmean, bn_mvar) out = bn_output[0] out = relay.nn.relu(out) func = relay.Function(relay.analysis.free_vars(out), out) ref_mod, params = tvm.relay.testing.create_workload(func) ref_mod["main"] = bind_params_by_name(ref_mod["main"], params) remove_bn_pass = tvm.transform.Sequential([ transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), ]) dnnl_patterns = get_pattern_table("dnnl") composite_partition = tvm.transform.Sequential([ transform.MergeComposite(dnnl_patterns), transform.AnnotateTarget("dnnl"), transform.PartitionGraph(), ]) with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): ref_mod = remove_bn_pass(ref_mod) mod = composite_partition(ref_mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) check_result(mod, ref_mod, {"data": i_data}, (1, 32, 14, 14), tol=1e-5)
def test_mobilenet_dnnl(): # if not tvm.get_global_func("relay.ext.dnnl", True): # print("skip because DNNL codegen is not available") # return dtype = "float32" ishape = (1, 3, 224, 224) mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32") mod = transform.AnnotateTarget(["dnnl"])(mod) mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype)
def test_free_vars_zeros(): target = "test_free_vars_zeros" """Test that free variables compile correctly on their own""" def before(): func = relay.Function([], relay.zeros(shape=(0), dtype="float32")) mod = tvm.IRModule.from_expr(func) return mod def after(): func = relay.Function([], relay.zeros(shape=(0), dtype="float32")) mod = tvm.IRModule.from_expr(func) return mod result = transform.AnnotateTarget(target)(before()) expected = transform.InferType()(after()) assert tvm.ir.structural_equal(expected, result)
def test_ref_create_read_write(): target = "relu" @tvm.ir.register_op_attr("nn.relu", "target." + target) def annotate(expr): return True def before(): ref = relay.expr.RefCreate(relay.const(1.0)) r = relay.expr.RefWrite(ref, relay.nn.relu(relay.expr.RefRead(ref))) return tvm.IRModule.from_expr(r) def after(annotate_non_call_ops): co = relay.const(1.0) if annotate_non_call_ops: co = relay.annotation.compiler_begin(co, "default") ref = relay.expr.RefCreate(co) ref1 = ref if annotate_non_call_ops: ref = relay.annotation.compiler_end(ref, "default") ref = relay.annotation.compiler_begin(ref, "default") ref1 = relay.annotation.compiler_end(ref1, "default") ref1 = relay.annotation.compiler_begin(ref1, "default") read = relay.expr.RefRead(ref1) if annotate_non_call_ops: read = relay.annotation.compiler_end(read, "default") beg = relay.annotation.compiler_begin(read, target) relu = relay.nn.relu(beg) end = relay.annotation.compiler_end(relu, target) if annotate_non_call_ops: end = relay.annotation.compiler_begin(end, "default") r = relay.expr.RefWrite(ref, end) if annotate_non_call_ops: r = relay.annotation.compiler_end(r, "default") return tvm.IRModule.from_expr(r) for annotate_non_call_ops in [True, False, True]: result = transform.AnnotateTarget(target, annotate_non_call_ops)(before()) expected = transform.InferType()(after(annotate_non_call_ops)) assert tvm.ir.structural_equal(expected, result)
def test_type_propagation(): target = "test_type_propagation" @tvm.ir.register_op_attr("nn.relu", "target." + target) def relu(attrs, args): # pylint: disable=unused-variable return args[0].checked_type.dtype == "float32" def before(): x = relay.var("x", shape=(10, 10)) r = relay.nn.relu(x) out = relay.nn.relu(r) f = relay.Function([x], out) mod = tvm.IRModule.from_expr(f) return mod # If the type isn't propogated, then the relu checker function will fail to get the dtype. assert transform.AnnotateTarget(target)(before())