def test_fold_quantize(): t = relay.TensorType([1, 2, 3], "int8") def before(): data = tvm.nd.array(np.array([1.0, 2.0, 3.0], dtype="float32")) const_fp = relay.const(data, dtype="float32") const_i8 = relay.qnn.op.quantize(const_fp, output_scale=relay.const(0.5), output_zero_point=relay.const(0)) x = relay.var("x", t) sub = relay.op.subtract(x, const_i8) func = relay.Function([x], sub) return func def expected(): data = tvm.nd.array(np.array([2, 4, 6], dtype="int8")) const_i8 = relay.const(data, dtype="int8") x = relay.var("x", t) sub = relay.op.subtract(x, const_i8) func = relay.Function([x], sub) return func # Nothing changed after applying FoldConstant a = run_opt_pass(before(), transform.FoldConstant()) b = run_opt_pass(before(), transform.InferType()) tvm.ir.assert_structural_equal(a, b) # Fold QNN constants a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True)) b = run_opt_pass(expected(), transform.InferType()) tvm.ir.assert_structural_equal(a, b)
def test_fold_requantize(): def before(): data = tvm.nd.array(np.array([1, 2, 3], dtype="int8")) const_i8 = relay.const(data, dtype="int8") op = relay.qnn.op.requantize( const_i8, input_scale=relay.const(2.0, dtype="float32"), input_zero_point=relay.const(1, dtype="int32"), output_scale=relay.const(1.0, dtype="float32"), output_zero_point=relay.const(1, dtype="int32"), ) x = relay.var("x", relay.TensorType([3], "int8")) add = relay.op.add(op, x) func = relay.Function([x], add) return func def expected(): data = tvm.nd.array(np.array([1, 3, 5], dtype="int8")) const_i8 = relay.const(data, dtype="int8") x = relay.var("x", relay.TensorType([3], "int8")) add = relay.op.add(const_i8, x) func = relay.Function([x], add) return func # Nothing changed after applying FoldConstant a = run_opt_pass(before(), transform.FoldConstant()) b = run_opt_pass(before(), transform.InferType()) tvm.ir.assert_structural_equal(a, b) # Fold QNN constants a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True)) b = run_opt_pass(expected(), transform.InferType()) tvm.ir.assert_structural_equal(a, b)
def partition_for_dnnl(mod, params=None): """Partition the graph greedily offloading supported operators to DNNL. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- mod : Module Annotated and partitioned module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential([ transform.CanonicalizeOps(), transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), # fold consecutive add ops to simplify pattern `conv2d-bias_add-bn-relu` transform.SimplifyExpr(), transform.FoldConstant(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("dnnl"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ]) with tvm.transform.PassContext(opt_level=3): mod = seq(mod) return mod
def quantize_model(model, params, input_dtype, input_shape, qeval='power2'): skip_conv_layers = [0] with relay.quantize.qconfig(store_lowbit_output=False, skip_conv_layers=skip_conv_layers): from tvm.relay.quantize.quantize import _bind_params graph = _bind_params(model['main'], params) mod = relay.Module.from_expr(graph) optimize = _transform.Sequential([ _transform.SimplifyInference(), _transform.FoldConstant(), _transform.FoldScaleAxis(), _transform.CanonicalizeOps(), _transform.FoldConstant() ]) with relay.build_config(opt_level=4): mod = optimize(mod) mod = relay.quantize.annotate()(mod) # find scale cache_file = '%s_%s_scales.pkl' % (VIDEO_FILE, MODEL_NAME) if os.path.exists(cache_file): print("Using cached layer statistics...") with open(cache_file, 'rb') as f: scales = pickle.load(f) else: print("Compute layer statistics...") scales = calibrate_on_dataset(mod['main'], params, input_dtype, input_shape) with open(cache_file, 'wb') as f: pickle.dump(scales, f) if qeval == 'power2': scales = list( map( lambda scale: 2**np.math.ceil(np.math.log(scale, 2)) if scale > 0 else 1.0, scales)) weight_scales = 'power2' elif qeval == 'max': weight_scales = 'max' else: raise ValueError("Invalid quantiziation eval: " + qeval) mod['main'] = relay.quantize.calibrate(mod['main'], weight_scales=weight_scales, scales=scales) mod = relay.quantize.realize()(mod) mod = relay.transform.FoldConstant()(mod) return mod
def partition_for_vitis_ai(mod, params=None, dpu=None, **opts): """Partition the Relay expression for offloading operators to Vitis AI DPU Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. dpu : str The DPU identifier (e.g. DPUCZDX8G-zcu104, DPUCADF8H) Returns ------- ret : Module """ if dpu is None: raise ValueError( "Please pass Vitis AI DPU identifier to the partitioning function") if params: mod["main"] = bind_params_by_name(mod["main"], params) desired_layouts_in_partition = { "nn.conv2d": ["NHWC", "default"], "nn.upsampling": ["NHWC"], "image.resize2d": ["NHWC"], } desired_layouts_in_main = { "nn.conv2d": ["NCHW", "default"], "nn.upsampling": ["NCHW"], "image.resize2d": ["NCHW"], } seq = tvm.transform.Sequential([ transform.RemoveUnusedFunctions(), transform.ConvertLayout(desired_layouts_in_partition), transform.FoldConstant(), transform.InferType(), VitisAIAnnotationPass("vitis_ai", dpu, params), transform.MergeCompilerRegions(), transform.PartitionGraph(), transform.RemoveUnusedFunctions(), transform.ConvertLayout(desired_layouts_in_main), transform.FoldConstant(), ]) with tvm.transform.PassContext(opt_level=3): return seq(mod)
def const_folding_test(): data_shape = (2, 4, 2, 4) data_dtype = 'uint8' kernel_shape = (3, 4, 2, 2) kernel_dtype = 'uint8' golden_weight = np.random.random_integers(low=0, high=255, size=kernel_shape).astype(kernel_dtype) data = relay.var("data", shape=data_shape, dtype=data_dtype) kernel = relay.const(golden_weight) qnn_func = get_qnn_func(data, kernel, input_zero_point=8, kernel_zero_point=3, kernel_size=(2, 2), padding=(0, 0), strides=(1, 1), dilation=(1, 1), data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32") folded_mod = transform.FoldConstant()(qnn_func) folded_func = folded_mod["main"] assert "reshape" not in folded_func.astext()
def annotate(func, compiler): """ An annotator for Core ML. """ # Bind free variables to the constant values. bind_dict = {} for arg in func.params: name = arg.name_hint if name in params: bind_dict[arg] = relay.const(params[name]) func = relay.bind(func, bind_dict) # Annotate the entire graph for Core ML mod = tvm.IRModule() mod["main"] = func seq = tvm.transform.Sequential([ transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), transform.AnnotateTarget(compiler), transform.MergeCompilerRegions(), transform.PartitionGraph(), ]) with relay.build_config(opt_level=3): mod = seq(mod) return mod
def partition_for_clml(mod, params=None): """Partition the graph greedily offloading supported operators to CLML Library. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- ret : annotated and partitioned module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential( [ transform.InferType(), transform.FoldConstant(), transform.MergeComposite(clml_pattern_table()), transform.AnnotateTarget("clml", False), transform.MergeCompilerRegions(), transform.PartitionGraph(), ] ) result_mod = seq(mod) return result_mod
def test_const_folding(): data_shape = (2, 4, 2, 4) data_dtype = "uint8" kernel_shape = (4, 3, 2, 2) kernel_dtype = "uint8" golden_weight = np.random.randint(low=0, high=255, size=kernel_shape).astype(kernel_dtype) data = relay.var("data", shape=data_shape, dtype=data_dtype) kernel = relay.const(golden_weight) qnn_func = get_qnn_func( data, kernel, input_zero_point=8, kernel_zero_point=3, kernel_size=(2, 2), input_scale=1.0, kernel_scale=1.0, padding=(0, 0), strides=(1, 1), dilation=(1, 1), data_layout="NCHW", kernel_layout="IOHW", out_dtype="int32", channels=kernel_shape[1], groups=1, ) folded_mod = transform.FoldConstant()(qnn_func) folded_func = folded_mod["main"] assert "reshape" not in folded_func.astext()
def test_fold_const(): c_data = np.array([1, 2, 3]).astype("float32") t = relay.TensorType([1, 2, 3], "float32") def before(): c = relay.const(c_data) x = relay.var("x", t) y = relay.add(c, c) y = relay.multiply(y, relay.const(2, "float32")) y = relay.add(x, y) z = relay.add(y, c) return relay.Function([x], z) def expected(): x = relay.var("x", t) c_folded = (c_data + c_data) * 2 y = relay.add(x, relay.const(c_folded)) z = relay.add(y, relay.const(c_data)) return relay.Function([x], z) def fail(x): raise RuntimeError() # the fold constant should work on any context. with tvm.target.build_config(add_lower_pass=[(0, fail)]): with tvm.target.create("cuda"): zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) assert relay.analysis.alpha_equal(zz, zexpected)
def test_fold_const_with_on_device(): """Make sure on_device annotations don't get in the way of constant folding""" c_data = np.array([1, 2, 3]).astype("float32") t = relay.TensorType([1, 2, 3], "float32") def before(): c = relay.const(c_data) x = relay.var("x", t) y = relay.add(c, c) y = relay.multiply(y, relay.const(2, "float32")) y = relay.add(x, y) z = relay.add(y, c) f = relay.Function([x], z) return annot_func(f) def expected(): x = relay.var("x", t) c_folded = (c_data + c_data) * 2 y = relay.add(x, relay.const(c_folded)) z = relay.add(y, relay.const(c_data)) f = relay.Function([x], z) return annot_func(f) zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) tvm.ir.assert_structural_equal(zz, zexpected)
def test_fold_let_with_on_device(): """Make sure on_device annotations don't get in the way of constant folding, and inlined constants bring their annotations with them.""" c_data = np.array(1).astype("float32") t = relay.TensorType([1], "float32") def before(): sb = relay.ScopeBuilder() x = relay.var("x", t) t1 = sb.let("t1", annot_expr(relay.const(c_data))) t2 = sb.let("t2", annot_expr(relay.add(t1, t1))) t3 = sb.let("t3", annot_expr(relay.add(t2, x))) sb.ret(t3) f = relay.Function([x], sb.get()) return annot_func(f) def expected(): sb = relay.ScopeBuilder() x = relay.var("x", t) c_folded = c_data + c_data t3 = sb.let( "t3", annot_expr(relay.add(annot_expr(relay.const(c_folded)), x))) sb.ret(t3) f = relay.Function([x], sb.get()) return annot_func(f) zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) tvm.ir.assert_structural_equal(zz, zexpected)
def test_fold_const(): c_data = np.array([1, 2, 3]).astype("float32") t = relay.TensorType([1, 2, 3], "float32") def before(): c = relay.const(c_data) x = relay.var("x", t) y = relay.add(c, c) y = relay.multiply(y, relay.const(2, "float32")) y = relay.add(x, y) z = relay.add(y, c) return relay.Function([x], z) def expected(): x = relay.var("x", t) c_folded = (c_data + c_data) * 2 y = relay.add(x, relay.const(c_folded)) z = relay.add(y, relay.const(c_data)) return relay.Function([x], z) def FailPass(): def _transform(m, *args): raise RuntimeError() return tvm.transform.module_pass(_transform, opt_level=0) # the fold constant should work on any context. with tvm.target.build_config(add_lower_pass=[(0, FailPass())]): with tvm.target.create("cuda"): zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(zz, zexpected)
def test_fold_const(): c_data = np.array([1, 2, 3]).astype("float32") t = relay.TensorType([1, 2, 3], "float32") def before(): c = relay.const(c_data) x = relay.var("x", t) y = relay.add(c, c) y = relay.multiply(y, relay.const(2, "float32")) y = relay.add(x, y) z = relay.add(y, c) return relay.Function([x], z) def expected(): x = relay.var("x", t) c_folded = (c_data + c_data) * 2 y = relay.add(x, relay.const(c_folded)) z = relay.add(y, relay.const(c_data)) return relay.Function([x], z) # the fold constant should work on any context. with tvm.target.create("cuda"): zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(zz, zexpected)
def partition(): data = relay.var("data", relay.TensorType((1, 16, 224, 224), "float32")) bn_gamma = relay.var("bn_gamma", relay.TensorType((16, ), "float32")) bn_beta = relay.var("bn_beta", relay.TensorType((16, ), "float32")) bn_mmean = relay.var("bn_mean", relay.TensorType((16, ), "float32")) bn_mvar = relay.var("bn_var", relay.TensorType((16, ), "float32")) bn_output = relay.nn.batch_norm(data, bn_gamma, bn_beta, bn_mmean, bn_mvar) func = relay.Function([data, bn_gamma, bn_beta, bn_mmean, bn_mvar], bn_output.astuple()) mod = tvm.IRModule() mod["main"] = func op_list = ["nn.batch_norm", "nn.conv2d"] mod = WhiteListAnnotator(op_list, "test_compiler")(mod) opt_pass = tvm.transform.Sequential([ transform.InferType(), transform.PartitionGraph(), transform.SimplifyInference(), transform.FoldConstant(), transform.AlterOpLayout(), transform.Inline(), ]) with relay.build_config(opt_level=3): mod = opt_pass(mod) return mod
def test_const_folding(): with TempOpAttr("qnn.conv2d", "FTVMQnnLegalize", legalize_qnn_conv2d): data_shape = (2, 4, 2, 4) data_dtype = 'uint8' kernel_shape = (3, 4, 2, 2) kernel_dtype = 'uint8' golden_weight = np.random.randint( low=0, high=255, size=kernel_shape).astype(kernel_dtype) data = relay.var("data", shape=data_shape, dtype=data_dtype) kernel = relay.const(golden_weight) qnn_func = get_qnn_func(data, kernel, input_zero_point=8, kernel_zero_point=3, kernel_size=(2, 2), input_scale=1.0, kernel_scale=1.0, padding=(0, 0), strides=(1, 1), dilation=(1, 1), data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32", groups=1) folded_mod = transform.FoldConstant()(qnn_func) folded_func = folded_mod["main"] assert "reshape" not in folded_func.astext()
def get_pass_order(use_patterns): """ Get the pass ordering based on using predicates or patterns. Parameters ---------- use_patterns: Bool True if pass needs to work with op patterns Returns ---------- ret : Sequential Pass object """ return (tvm.transform.Sequential([ transform.InferType(), RemoveDropoutPass(), transform.RemoveUnusedFunctions(), transform.ConvertLayout({ "nn.conv1d": ["NCW", "default"], "nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", "default"], "nn.conv2d_transpose": ["NCHW", "default"], }), transform.FoldConstant(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("tensorrt"), transform.MergeCompilerRegions(), transform.PartitionGraph(), transform.InlineComposites("tensorrt"), transform.InferType(), ]) if use_patterns else tvm.transform.Sequential([ transform.InferType(), RemoveDropoutPass(), transform.RemoveUnusedFunctions(), transform.ConvertLayout({ "nn.conv1d": ["NCW", "default"], "nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", "default"], "nn.conv2d_transpose": ["NCHW", "default"], }), transform.FoldConstant(), transform.AnnotateTarget("tensorrt"), transform.MergeCompilerRegions(), transform.PartitionGraph(), transform.InferType(), ]))
def test_partial_constant(): """Test the subgraph with (const, var, const, var) arguments.""" if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (10, 10) in_1 = relay.var("in_1", shape=ishape, dtype=dtype) in_2 = relay.var("in_2", shape=ishape, dtype=dtype) in_3 = relay.var("in_3", shape=ishape, dtype=dtype) in_4 = relay.var("in_4", shape=ishape, dtype=dtype) add1 = relay.add(in_1, in_2) add2 = relay.add(add1, in_3) add3 = relay.add(add2, in_3) add4 = relay.add(add3, in_3) func = relay.Function([in_1, in_2, in_3, in_4], add4) ref_mod = tvm.IRModule.from_expr(func) ref_mod = relay.transform.InferType()(ref_mod) data1 = np.random.uniform(0, 1, ishape).astype(dtype) data3 = np.random.uniform(0, 1, ishape).astype(dtype) params = { "in_1": tvm.nd.array(data1, device=tvm.cpu(0)), "in_3": tvm.nd.array(data3, device=tvm.cpu(0)), } ref_mod["main"] = bind_params_by_name(ref_mod["main"], params) opt_pass = tvm.transform.Sequential([ transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), transform.AnnotateTarget("dnnl"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ]) with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): mod = opt_pass(ref_mod) data2 = np.random.uniform(0, 1, ishape).astype(dtype) data4 = np.random.uniform(0, 1, ishape).astype(dtype) check_result(mod, ref_mod, { "in_2": data2, "in_4": data4 }, (10, 10), tol=1e-5)
def partition_for_tensorrt( mod: tvm.IRModule, params: Optional[Dict[str, tvm.nd.NDArray]] = None, # CAUTION: Can't use default Target("tensorrt") here since the target kind is only available # if is_tensorrt_compiler_enabled() == True. target: Optional[tvm.target.Target] = None, ) -> tvm.IRModule: """Partition all functions in mod to greedily offload supported operators to TensorRT. Parameters ---------- mod : tvm.IRModule The module to partition. target : tvm.target.Target A target of kind "tensorrt" describing additional partitioning and compilation options. params : Optional[Dict[str, tvm.nd.NDArray]] Constant input parameters. Returns ------- partitioned_mod : tvm.IRModule The partitioned module. """ assert is_tensorrt_compiler_enabled( ), "Can only partition for TensorRT if it is enabled" if params: mod["main"] = bind_params_by_name(mod["main"], params) if target is None: # Use a default target. The get_tensorrt_target() function will similarly create an # equivalent default target when compilation continues after partitioning. target = tvm.target.Target("tensorrt") seq = tvm.transform.Sequential([ transform.InferType(), RemoveDropoutPass(), transform.RemoveUnusedFunctions(), transform.ConvertLayout({ "nn.conv1d": ["NCW", "default"], "nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", "default"], "nn.conv2d_transpose": ["NCHW", "default"], }), transform.FoldConstant(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("tensorrt"), transform.MergeCompilerRegions(), transform.PartitionGraph(), transform.InferType(), ]) with target: mod = seq(mod) mod = prune_tensorrt_subgraphs(mod) return mod
def test_fold_qnn_conv2d_qnn_mul(): def before(): dtype = "uint8" op0 = relay.qnn.op.conv2d( relay.const(np.ones((1, 1, 2, 2), dtype=dtype), dtype=dtype), relay.const(np.ones((1, 1, 2, 2), dtype=dtype), dtype=dtype), input_zero_point=relay.const(0, "int32"), kernel_zero_point=relay.const(0, "int32"), input_scale=relay.const(1.0, "float32"), kernel_scale=relay.const(1.0, "float32"), kernel_size=(2, 2), channels=1, ) op = relay.qnn.op.mul( op0, relay.const(np.array([10], dtype="int32"), dtype="int32"), relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), ) func = relay.Function([], op) return func def expected(): data = relay.const(np.array([[[[40]]]], dtype="int32"), dtype="int32") func = relay.Function([], data) return func # Nothing changed after applying FoldConstant a = run_opt_pass(before(), transform.FoldConstant()) b = run_opt_pass(before(), transform.InferType()) tvm.ir.assert_structural_equal(a, b) # Fold QNN constants a = run_opt_pass(before(), transform.FoldConstant(fold_qnn=True)) b = run_opt_pass(expected(), transform.InferType()) tvm.ir.assert_structural_equal(a, b)
def preprocess_module(mod): """ Pre-process a module containing functions ready for ACL codegen. For now we enforce OHWI kernel layout and fold the transforms away. Parameters ---------- mod : Module The module to run passes on. Returns ------- preprocessed_mod : The processed module. """ def convert_layout_conv2d(conv2d_function): def convert_conv(attrs, inputs, tinfos, desired_layouts): new_attrs = dict(attrs) data_info = tinfos[0] weight_info = tinfos[1] desired_data_layout, desired_kernel_layout = map(str, desired_layouts) new_attrs["data_layout"] = desired_data_layout new_attrs["kernel_layout"] = desired_kernel_layout if is_depthwise_conv2d( data_info.shape, attrs["data_layout"], weight_info.shape, attrs["kernel_layout"], attrs["groups"], ): dkl = desired_kernel_layout new_attrs["kernel_layout"] = dkl[3] + dkl[1:3] + dkl[0] return conv2d_function(*inputs, **new_attrs) return convert_conv with OpAttrContext( "nn.conv2d", "FTVMConvertOpLayout", convert_layout_conv2d(tvm.relay.nn.conv2d) ), OpAttrContext( "qnn.conv2d", "FTVMConvertOpLayout", convert_layout_conv2d(tvm.relay.qnn.op.conv2d) ): seq = tvm.transform.Sequential( [ transform.ConvertLayout( {"nn.conv2d": ["NHWC", "OHWI"], "qnn.conv2d": ["NHWC", "OHWI"]} ), transform.FoldConstant(), ] ) preprocessed_mod = seq(mod) return preprocessed_mod
def test_fold_full(): c_shape = (8, 9, 10) def before(): dtype = 'float32' return relay.full(relay.const(1.0, dtype), c_shape, dtype=dtype) def expected(): # expect no changes return before() zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) assert relay.analysis.graph_equal(zz, zexpected)
def partition_for_bnns(mod, params=None): """Partition the graph greedily offloading supported operators to BNNS. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- ret : annotated and partitioned module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential( [ transform.InferType(), transform.FoldConstant(), transform.FoldScaleAxis(), transform.DynamicToStatic(), transform.AlterOpLayout(), # TODO(apeskov): WA. AlterOpLayout call lead to constants shape transformation # Some expand_dims op may appears after constants. It breaks BNNS fusing. # So we have to call FoldConstant right before bnns composite passes. transform.FoldConstant(), transform.MergeComposite(get_pattern_table("bnns")), transform.AnnotateTarget("bnns"), # If you no need in per layer performance statistic you can # uncomment next line # transform.MergeCompilerRegions(), transform.PartitionGraph(), ] ) return seq(mod)
def test_constant(): """Test the subgraph with (var, const, ...) arguments.""" if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, 32, 3, 3) data = relay.var("data", shape=ishape, dtype=dtype) weight = relay.var("weight", shape=wshape, dtype=dtype) bn_gamma = relay.var("bn_gamma") bn_beta = relay.var("bn_beta") bn_mmean = relay.var("bn_mean") bn_mvar = relay.var("bn_var") layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), padding=(1, 1)) bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta, bn_mmean, bn_mvar) out = bn_output[0] out = relay.nn.relu(out) func = relay.Function(relay.analysis.free_vars(out), out) ref_mod, params = tvm.relay.testing.create_workload(func) ref_mod["main"] = bind_params_by_name(ref_mod["main"], params) remove_bn_pass = tvm.transform.Sequential([ transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), ]) dnnl_patterns = get_pattern_table("dnnl") composite_partition = tvm.transform.Sequential([ transform.MergeComposite(dnnl_patterns), transform.AnnotateTarget("dnnl"), transform.PartitionGraph(), ]) with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): ref_mod = remove_bn_pass(ref_mod) mod = composite_partition(ref_mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) check_result(mod, ref_mod, {"data": i_data}, (1, 32, 14, 14), tol=1e-5)
def legalize_qnn_for_dnnl(mod): """Transform qnn primitives to DNNL compatible form. Eliminate source zero point and apply strict sequence of post ops.""" mod["main"] = rewrite(LegalizeQnnOpForDnnl(), mod["main"]) seq = tvm.transform.Sequential([ transform.InferType(), # transform.SimplifyInference(), # TODO: this pass decompose nn.layer_norm # transform.FoldScaleAxis(), # TODO: fail inside TVM in case of grouped convolutions. transform.FoldConstant(), ]) with tvm.transform.PassContext(opt_level=3): mod = seq(mod) return mod
def test_fold_full(): c_shape = (8, 9, 10) def before(): dtype = "float32" return relay.full(relay.const(1.0, dtype), c_shape, dtype=dtype) def expected(): # expect no changes return before() zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(zz, zexpected)
def test_fold_if(): cond_data = np.array(1).astype("bool") x_data = np.array([[1, 2, 3]]).astype("float32") def before(): a = relay.const(cond_data) x = relay.const(x_data) y = relay.const(x_data) iff = relay.If(a, x + y, x - y) return relay.Function([], iff) def expected(): y_data = x_data + x_data y = relay.const(y_data) return relay.Function([], y) zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) tvm.ir.assert_structural_equal(zz, zexpected) cond_data = np.array(0).astype("bool") def before(): a = relay.const(cond_data) x = relay.const(x_data) y = relay.const(x_data) iff = relay.If(a, x + y, x - y) return relay.Function([], iff) def expected(): y_data = x_data - x_data y = relay.const(y_data) return relay.Function([], y) zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) tvm.ir.assert_structural_equal(zz, zexpected)
def simplify_model(mod): """ Simplify execution graph At least merge BatchNorm into convolution. For this purpose decompose BN primitive into simple operation which can be calculated as const expr and after that merged into nearest conv/dense primitive. """ seq = tvm.transform.Sequential([ transform.InferType(), transform.FoldConstant(), transform.SimplifyInference(), transform.FoldScaleAxis(), ]) return seq(mod)
def test_concatenate_const(): def before(): data = tvm.nd.array(np.array([1.0, 2.0, 3.0])) const = relay.const(data) concat = relay.op.concatenate([const, const], axis=0) func = relay.Function([], concat) return func def expected(): data = tvm.nd.array(np.array([1.0, 2.0, 3.0, 1.0, 2.0, 3.0])) const = relay.const(data) func = relay.Function([], const) return func zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(zz, zexpected)
def test_fold_concat(): c_data = np.array([[1, 2, 3]]).astype("float32") def before(): a = relay.const(c_data) b = relay.const(c_data) y = relay.concatenate((a, b), axis=0) return relay.Function([], y) def expected(): y_data = np.concatenate((c_data, c_data), axis=0) y = relay.const(y_data) return relay.Function([], y) zz = run_opt_pass(before(), transform.FoldConstant()) zexpected = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(zz, zexpected)