def run_collage( input_mod, targets, cost_estimator, expected_mod, tvm_max_depth=8, byoc_max_depth=8 ): ctxt = { "relay.collage.tvm_max_depth": tvm_max_depth, "relay.collage.byoc_max_depth": byoc_max_depth, } expected_mod = InferType()(expected_mod) pass_ctxt = tvm.transform.PassContext(config=ctxt) with pass_ctxt: config = make_compilation_config(pass_ctxt, targets) actual_mod = InferType()(input_mod) # Capture indexes only to help debug failing tests actual_mod = CapturePostDfsIndexInSpans()(actual_mod) actual_mod = CollagePartition(config, cost_estimator)(actual_mod) if not tvm.ir.structural_equal(actual_mod, expected_mod, map_free_vars=True): # Print everything in full so we can see what's going on when things fail. print("Input module:") print(input_mod) print("Actual module:") print(actual_mod) print("Expected module:") print(expected_mod) # Assert again so as to see the actual disagreeing sub-expressions. tvm.ir.assert_structural_equal(actual_mod, expected_mod, map_free_vars=True)
def transform_module(self, mod, _): """Invokes the pass""" # TODO(@jroesch): Is there a way to do one shot initialization? # can we have def pass_init? mod.import_from_std("core.rly") mod = InferType()(mod) assert isinstance(self.targets, (dict, container.Map)) if len(self.targets) > 1: pass_ctx = PassContext.current() if "relay.fallback_device_type" in pass_ctx.config: fallback_ctx = nd.context( pass_ctx.config["relay.fallback_device_type"]) else: fallback_ctx = cpu(0) ca = context_analysis(mod, TVMContext(fallback_ctx.device_type, 0)) else: if isinstance(self.targets, dict): dev = list(self.targets.keys())[0] else: dev, _ = self.targets.items()[0] ca = context_analysis(mod, nd.context(dev.value)) # The following code can be used for debugging the module after # annotation. # print(mod.astext(show_meta_data=False, annotate=mk_analysis_annotator(ca))) gv_funcs = mod.functions for gv, f in gv_funcs.items(): ea = ManifestAllocPass(self.target_host, ca) f = ea.visit(f) mod.update_func(gv, f) return mod
def __init__(self, mod=None): self.target_host = None self.build_func = None self.params = None self.target = None self.name = None self.dev = None self.idx = None self.mod = mod self.input_params = InferType()(mod)["main"].params self.output_type = InferType()(mod)["main"].checked_type.ret_type self.input_bindings = PipelineConfig.BindingList(self, "input") self.output_bindings = PipelineConfig.BindingList(self, "output")
def test_batch_matmul_simple(): """Batch matmul is a special case where we try to accumulate to fp16. This is due to the fact heterogenous accumulation dtypes does not work on all platforms at the moment. """ data = relay.var("data", shape=[1, 1, 20]) weight = relay.var("weight", shape=[1, 20, 20]) a = relay.nn.batch_matmul(data, weight) mod = tvm.IRModule.from_expr(a) mod_params = { "data": np.random.uniform(-1, 1, size=[1, 1, 20]).astype("float32"), "weight": np.random.uniform(-1, 1, size=[1, 20, 20]).astype("float32"), } output_mod = verify_mixed_precision_output_close(mod, mod_params, atol=0.01, rtol=0.01) # Create expected module data = relay.cast(relay.var("data", shape=[1, 1, 20]), "float16") weight = relay.cast(relay.var("weight", shape=[1, 20, 20]), "float16") a = relay.nn.batch_matmul(data, weight, out_dtype="float16") expected_mod = tvm.IRModule.from_expr(a) expected_mod = InferType()(expected_mod) assert tvm.ir.structural_equal(expected_mod, output_mod)
def test_where_simple(): data = relay.var("data", shape=[1, 20]) weight = relay.var("weight", shape=[20, 20]) a = relay.nn.dense(data, weight, units=20) b = relay.where(data, a, a) mod = tvm.IRModule.from_expr(b) mod_params = { "data": np.random.uniform(-1, 1, size=[1, 20]).astype("float32"), "weight": np.random.uniform(-1, 1, size=[20, 20]).astype("float32"), } output_mod = verify_mixed_precision_output_close(mod, mod_params, atol=0.01, rtol=0.01) # Create expected module data = relay.cast(relay.var("data", shape=[1, 20]), "float16") weight = relay.cast(relay.var("weight", shape=[20, 20]), "float16") a = relay.nn.dense(data, weight, units=20, out_dtype="float16") b = relay.where(data, a, a) expected_mod = tvm.IRModule.from_expr(b) expected_mod = InferType()(expected_mod) assert tvm.ir.structural_equal(expected_mod, output_mod)
def test_convert_follow_node_with_integer_arguments(): """Tests the conversion of a follow op with integer arguments + constant float args. The follow op should convert the floating point argument into fp16 as constants/vars will always be converted if safe to do so. """ data = relay.var("data", shape=[1, 10], dtype="float32") # We use an addition to make sure the input indices are not a var # (which are always casted if safe) indices = relay.var("indices", shape=[1, 1], dtype="int32") + relay.const( 0, dtype="int32") take = relay.take(data, indices, axis=0) mod = tvm.IRModule.from_expr(take) mod_params = { "data": np.random.uniform(-1, 1, size=[1, 10]).astype("float32"), "indices": np.array([[0]]).astype("int32"), } output_mod = verify_mixed_precision_output_close(mod, mod_params, atol=0.01, rtol=0.01) # Create expected module data = relay.cast(relay.var("data", shape=[1, 10]), "float16") take = relay.take(data, indices, axis=0) expected_mod = tvm.IRModule.from_expr(take) expected_mod = InferType()(expected_mod) assert tvm.ir.structural_equal(expected_mod, output_mod)
def verify_mixed_precision_output_close( mod: tvm.runtime.Module, mod_params: Dict[str, Any], mixed_precision_dtype="float16", rtol: float = 1e-3, atol: float = 0, keep_orig_output_dtype=False, ) -> tvm.runtime.Module: mod = InferType()(mod) result_fp32 = run_module(mod, mod_params) if not keep_orig_output_dtype: fp16_mod = ToMixedPrecision(mixed_precision_dtype)(mod) result_fp16 = run_module(fp16_mod, mod_params) else: with tvm.transform.PassContext( config={"relay.ToMixedPrecision.keep_orig_output_dtype": True }): fp16_mod = ToMixedPrecision(mixed_precision_dtype)(mod) result_fp16 = run_module(fp16_mod, mod_params) # Ensure the results are close for fp32, fp16 in zip(result_fp32, result_fp16): np.testing.assert_allclose(fp32, fp16, rtol=rtol, atol=atol) if keep_orig_output_dtype: assert (np.array(result_fp16).dtype == np.array(result_fp32).dtype ), "output type and original type mismatch" return fp16_mod
def test_green_red_not_use_extraneous_cast(): """Conv. is a green listed operation, while softmax is red. Conv. also by default accumulates to fp32 but outputs fp16. We want to avoid a situation where we have extraneous casts. E.g. because softmax wants to operate on FP32 we might have conv (FP32) -> cast (FP16) -> cast (FP32) -> softmax (FP32) To get around this internally when we cast in the pass we cache the output nodes and the reverse of the cast back to the original node. For example casting the `conv (FP32)` to FP16 would produce: `conv (FP32) -> cast (FP16)` As the outputs. Now anytime we try to cast the `conv (FP32)` node to FP16 it would return the cached result instead of a new cast node: `conv (FP32) -> cast (FP16)` Furthermore, if we try to cast the `cast (FP16)` node back to FP32 it would just return `conv (FP32)`. This test makes sure this behavior occurs. """ data_shape = (1, 3, 32, 32) weight_shape = (5, 3, 3, 3) data = relay.var("data", shape=data_shape, dtype="float32") weight = relay.var("weight", shape=weight_shape, dtype="float32") conv = relay.nn.conv2d(data, weight, strides=(1, 1), padding=(1, 1), out_dtype="float32") result = relay.nn.softmax(conv) mod = tvm.IRModule.from_expr(result) mod_params = { "data": np.random.uniform(-1, 1, size=data_shape).astype("float32"), "weight": np.random.uniform(-1, 1, size=weight_shape).astype("float32"), } fp16_mod = verify_mixed_precision_output_close(mod, mod_params, atol=0.01, rtol=1e-3) # Construct expected structure conv = relay.cast( relay.nn.conv2d( relay.cast(data, "float16"), relay.cast(weight, "float16"), strides=(1, 1), padding=(1, 1), out_dtype="float16", ), "float32", ) result = relay.nn.softmax(conv) expected_mod = tvm.IRModule.from_expr(result) expected_mod = InferType()(expected_mod) assert tvm.ir.structural_equal(expected_mod, fp16_mod)
def test_let_statement_simple(): """A 'simple' let statement example. Noticeable is the mutation of the bound variable types. """ var1 = relay.var("var1", shape=[1, 20]) var2 = relay.var("var2", shape=[1, 20]) data = relay.var("data", shape=[1, 20]) weight = relay.var("weight", shape=[20, 20]) r1 = var1 + var1 r2 = var2 + var2 let2 = relay.Let(var2, relay.nn.dense(r1, weight, units=20), r2) let1 = relay.Let(var1, relay.nn.dense(data, weight, units=20), let2) mod = tvm.IRModule.from_expr(let1) mod_params = { "data": np.random.uniform(-1, 1, size=[1, 20]).astype("float32"), "weight": np.random.uniform(-1, 1, size=[20, 20]).astype("float32"), } output_mod = verify_mixed_precision_output_close(mod, mod_params, atol=0.01, rtol=0.01) # Construct expected structure var1 = relay.var("var1", shape=[1, 20], dtype="float16") var2 = relay.var("var2", shape=[1, 20], dtype="float16") data = relay.cast(relay.var("data", shape=[1, 20]), "float16") weight = relay.cast(relay.var("weight", shape=[20, 20]), "float16") r1 = var1 + var1 r2 = var2 + var2 let2 = relay.Let( var2, relay.cast(relay.nn.dense(r1, weight, units=20, out_dtype="float32"), "float16"), r2, ) let1 = relay.Let( var1, relay.cast(relay.nn.dense(data, weight, units=20, out_dtype="float32"), "float16"), let2, ) expected_mod = tvm.IRModule.from_expr(let1) expected_mod = InferType()(expected_mod) assert tvm.ir.structural_equal(expected_mod, output_mod)
def test_conv2d_bwd(): IC = 16 OC = 8 dshape = (16, IC, 32, 32) wshape = (OC, IC, 3, 3) padding = (0, 0) strides = (1, 1) conv = get_conv2d_nchw( dshape, wshape, padding, strides=strides, out_dtype="float32", data_dtype="float32", weight_dtype="float32", ) fwd_mod = InferType()(tvm.IRModule.from_expr(conv)) # Note: large difference in tvm and cutlass Wgrad results if use fp16. # Cutlass wgrad uses fp32 accumulation even if the output is fp16. use_fp16 = False verify_dgrad = False # False to verify wgrad tol = 1e-5 if verify_dgrad else 1e-4 # Wgrad slightly less accurate if use_fp16: fwd_mod = ToMixedPrecision("float16")(fwd_mod) fwd_bwd_func = FirstOrderGradient()(fwd_mod)["main"] bwd_func = relay.Function( fwd_bwd_func.params, relay.TupleGetItem(relay.TupleGetItem(fwd_bwd_func.body, 1), 0 if verify_dgrad else 1), ) verify_conv2d( bwd_func, bwd_func, dshape, wshape, sm=80, atol=1e-2 if use_fp16 else tol, rtol=1e-2 if use_fp16 else tol, use_cudnn_ref=False, data_dtype="float32", weight_dtype="float32", use_vm=True, )
def verify_mixed_precision_output_close( mod: tvm.runtime.Module, mod_params: Dict[str, Any], mixed_precision_dtype="float16", rtol: float = 1e-3, atol: float = 0, ) -> tvm.runtime.Module: mod = InferType()(mod) result_fp32 = run_module(mod, mod_params) fp16_mod = ToMixedPrecision(mixed_precision_dtype)(mod) result_fp16 = run_module(fp16_mod, mod_params) # Ensure the results are close for fp32, fp16 in zip(result_fp32, result_fp16): np.testing.assert_allclose(fp32, fp16, rtol=rtol, atol=atol) return fp16_mod
def test_unused_function(): cond = relay.const(True) mod = tvm.IRModule() then_name = relay.GlobalVar("times_2") # define unused function else_name = relay.GlobalVar("times_3") t1 = relay.TensorType((2, 2), dtype="float32") x1 = relay.var("x1", t1, dtype="float32") x2 = relay.var("x2", t1, dtype="float32") f2 = relay.multiply(x1, relay.const(2.0)) f3 = relay.multiply(x2, relay.const(3.0)) mod[then_name] = relay.Function([x1], f2) mod[else_name] = relay.Function([x2], f3) mod = InferType()(mod) x3 = relay.var("x3", t1, dtype="float32") # put unused function in else branch f = relay.If(cond, then_name(x3), else_name(x3)) mod["main"] = relay.Function([x3], f) x_data = np.random.rand(2, 2).astype("float32") y_data = x_data * 2 check_result([x_data], y_data, mod=mod)
def _partition_call_operator(self, inputs, attr): """ Convert the Relay Partition call ops into Relay Function calls and function definitions from Tensorflow graph library attribute to Relay global functions Parameters ---------- node: TensorFlow graph node object. A TensorFlow graph node object. inputs : List[tvm.relay.Expr] List of input symbols. attrs : Dict[tvm.Attrs] Dict of operator attributes. Returns ------- op : tvm.relay.Expr Converted relay expression. """ try: from tensorflow.python.framework import function_def_to_graph except ImportError as e: raise ImportError( "Unable to import tensorflow which is required {}".format(e)) main_graph_proto = self._main_graph_proto outer_graph_def = main_graph_proto._graph node_func_name = attr.get("f").name func = next( (f for f in outer_graph_def.library.function if f.signature.name == node_func_name), None, ) if func: devices = set(node.device for node in func.node_def) if len(devices) > 1: raise Exception("Found inconsistent Device assignment in the " "Stateful Partitioned SubGraph. Rejecting " "the subgraph ") # Convert function definition to graph func_input_shapes = func.attr["_input_shapes"].list.shape subgraph, _ = function_def_to_graph.function_def_to_graph_def( func, func_input_shapes) # Computing subgraph's input shape dictionary subgraph_shape_dict, input_expr_dict = {}, {} for f_arg, input in zip(func.signature.input_arg, inputs): input_expr_dict[f_arg.name] = input subgraph_shape_dict[f_arg.name] = _infer_shape( input, main_graph_proto._mod) func_name = "func_{}".format(func.signature.name) try: global_func = main_graph_proto._mod[func_name] sub_func = global_func sub_params = main_graph_proto._params except ValueError: # Construct relay nodes from the subgraph g1 = SubGraphProto(main_graph_proto) sub_func, sub_params = g1.from_tensorflow( subgraph, shape=subgraph_shape_dict) main_graph_proto._params.update(sub_params) func_expr = _function.Function(sub_func.params, sub_func.body) global_func = tvm.relay.GlobalVar(func_name) main_graph_proto._mod[global_func] = func_expr main_graph_proto._mod = InferType()(main_graph_proto._mod) param_exprs = [] for param_expr in sub_func.params: # sub_params is subset of sub_func.params param_name = param_expr.vid.name_hint if param_name in input_expr_dict.keys(): param_exprs.append(input_expr_dict[param_name]) elif param_name in sub_params.keys(): param_exprs.append(param_expr) else: raise Exception( "Input parameter {} not found".format(param_name)) sb = tvm.relay.scope_builder.ScopeBuilder() loop_ret = global_func(*param_exprs) sb.ret(loop_ret) ret = sb.get() else: raise Exception("Function not found - {}".format(node_func_name)) return ret
def _convert_function( module, graph, inputs, attr, node_func_name, prelude, gdef_lib, in_shapes=None ): """Convert given tf node to a relay function call Parameters ---------- module : IRModule where converted function is stored graph: <class 'tensorflow.core.framework.graph_pb2.GraphDef'> top level tf graphdef inputs : List[tvm.relay.Expr] List of input symbols. Parameters for the function. attrs : Dict[tvm.Attrs] Dict of operator attributes. node_func_name : str Name of tf2 node to be converted Returns ------- op : tvm.relay.Expr <class 'tvm.relay.expr.Call'> Examples -------- a tf function "x+1", is implemented as a subgraph in the libary section of the graph. this subgraph is converted to a relay function such as fn (%x: float32) { add(%x, 1f) /* Identity */ } the subgraph has a function name such as __inference_add_95 the tf function call operator is returned as relay expression, such as: free_var %x: float32; @func___inference_add_95(%x) """ func = next( (f for f in graph.library.function if f.signature.name == node_func_name), None, ) if func is None: raise Exception("Function not found - {}".format(node_func_name)) devices = set(node.device for node in func.node_def) if len(devices) > 1: raise Exception( "node_def in function {} contains > 1 types of devices {}".format( node_func_name, devices ) ) subgraph = gdef_lib[node_func_name] # preserve library functions in subgraphs to make them available to nested functions for fn in graph.library.function: subgraph.library.function.add().CopyFrom(fn) # Computing subgraph's input shape and type dictionaries input_expr_dict = {} input_types = {} for f_arg, input_ in zip(func.signature.input_arg, inputs): input_expr_dict[f_arg.name] = input_ input_types[f_arg.name] = _infer_type_with_prelude(input_, prelude) func_name = "func_{}".format(func.signature.name) try: global_func = module.mod[func_name] sub_func = global_func sub_params = module.params except ValueError: # Construct relay nodes from the subgraph g1 = GraphProto(module) output_sig = [func.ret[f.name] for f in func.signature.output_arg] # TODO: unify prelude and main IRModules sub_func, sub_params = g1.from_tensorflow( subgraph, outputs=output_sig, input_types=input_types, gdef_lib=gdef_lib ) module.params.update(sub_params) func_expr = _function.Function(sub_func.params, sub_func.body) global_func = tvm.relay.GlobalVar(func_name) module.mod[global_func] = func_expr module.mod = InferType()(module.mod) prelude.mod = module.mod param_exprs = [] for param_expr in sub_func.params: # sub_params is subset of sub_func.params param_name = param_expr.vid.name_hint if param_name in input_expr_dict.keys(): param_exprs.append(input_expr_dict[param_name]) elif param_name in sub_params.keys(): param_exprs.append(param_expr) else: raise Exception("Input parameter {} not found".format(param_name)) sb = tvm.relay.scope_builder.ScopeBuilder() loop_ret = global_func(*param_exprs) sb.ret(loop_ret) ret = sb.get() return ret