def test_partition_table(): partitioner = UMAPartitioner("test_partition") assert get_pattern_table("test_partition") is None partitioner.register() assert get_pattern_table("test_partition") is not None
def run_qnn_mergecomposite(script_module, input_name, ishape): input_shapes = [(input_name, ishape)] mod, params = relay.frontend.from_pytorch(script_module, input_shapes) pattern_table = get_pattern_table("test_table") with tvm.transform.PassContext(opt_level=3): pass_list = [ tvm.relay.transform.SimplifyInference(), tvm.relay.transform.MergeComposite(pattern_table), ] composite_partition = tvm.transform.Sequential(pass_list) partitioned = composite_partition(mod)
def test_constant(): """Test the subgraph with (var, const, ...) arguments.""" if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, 32, 3, 3) data = relay.var("data", shape=ishape, dtype=dtype) weight = relay.var("weight", shape=wshape, dtype=dtype) bn_gamma = relay.var("bn_gamma") bn_beta = relay.var("bn_beta") bn_mmean = relay.var("bn_mean") bn_mvar = relay.var("bn_var") layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), padding=(1, 1)) bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta, bn_mmean, bn_mvar) out = bn_output[0] out = relay.nn.relu(out) func = relay.Function(relay.analysis.free_vars(out), out) ref_mod, params = tvm.relay.testing.create_workload(func) ref_mod["main"] = bind_params_by_name(ref_mod["main"], params) remove_bn_pass = tvm.transform.Sequential([ transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), ]) dnnl_patterns = get_pattern_table("dnnl") composite_partition = tvm.transform.Sequential([ transform.MergeComposite(dnnl_patterns), transform.AnnotateTarget("dnnl"), transform.PartitionGraph(), ]) with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): ref_mod = remove_bn_pass(ref_mod) mod = composite_partition(ref_mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) check_result(mod, ref_mod, {"data": i_data}, (1, 32, 14, 14), tol=1e-5)
def test_existing_pattern_tables(workload, backend, merge): """Tests that uma partitioner creates the same partitions than default BYOC partitioning""" partitioner = UMAPartitioner(backend, merge) pattern_table = get_pattern_table(backend) for entry in pattern_table: partitioner.add_pattern(*entry) if workload == "resnet": net = resnet.get_net(1, 10) elif workload == "mlp": net = mlp.get_net(1, 10) else: assert False, f"don't know how to find workload for {workload}" mod = tvm.ir.IRModule() mod["main"] = net partitioner.register() partitioned_mod = partitioner.partition(mod) def partition_default(mod): """partitions using default BYOC flow""" sequence = [ relay.transform.MergeComposite(pattern_table), relay.transform.AnnotateTarget(backend), ] if merge: sequence.append(relay.transform.MergeCompilerRegions()) sequence.append(relay.transform.PartitionGraph()) sequential = tvm.transform.Sequential(sequence) return sequential(mod) default_partitioned_mod = partition_default(mod) assert len(partitioned_mod.functions) == len(default_partitioned_mod.functions)
def test_dnnl_fuse(): dnnl_patterns = get_pattern_table("dnnl") conv2d_bias_relu_pat, conv2d_relu_pat = dnnl_patterns def get_blocks(prefix, data, in_channel, out_channel, include_bn=True, include_sigmoid=False): weight = relay.var(prefix + "weight") bn_gamma = relay.var(prefix + "bn_gamma") bn_beta = relay.var(prefix + "bn_beta") bn_mmean = relay.var(prefix + "bn_mean") bn_mvar = relay.var(prefix + "bn_var") layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), channels=out_channel, padding=(1, 1)) if include_bn: bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta, bn_mmean, bn_mvar) layer = bn_output[0] if include_sigmoid: # dummy layer to prevent pattern detection layer = relay.sigmoid(layer) layer = relay.nn.relu(layer) return layer def get_net(include_bn=True, include_sigmoid=False): data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32")) block1 = get_blocks("block1_", data, 3, 8, include_bn, include_sigmoid) # The second block is always conv + relu, to make it more interesting block2 = get_blocks("block2_", block1, 8, 8, False, include_sigmoid) return relay.Function(relay.analysis.free_vars(block2), block2) def get_partitoned_mod(mod, params, pattern_table): # This is required for constant folding mod["main"] = bind_params_by_name(mod["main"], params) remove_bn_pass = tvm.transform.Sequential([ transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), ]) composite_partition = tvm.transform.Sequential([ remove_bn_pass, transform.MergeComposite(pattern_table), transform.AnnotateTarget("dnnl"), transform.PartitionGraph() ]) with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): return composite_partition(mod) def test_detect_pattern(pattern_table, include_bn, include_sigmoid, num_expected_partition): net = get_net(include_bn, include_sigmoid) mod, params = tvm.relay.testing.create_workload(net) mod = get_partitoned_mod(mod, params, pattern_table) assert (len(mod.functions) - 1 == num_expected_partition ) # -1 for main def test_partition(): # conv + bn + relu, conv + relu -> fused conv_bias_relu, conv, and relu test_detect_pattern([conv2d_bias_relu_pat], True, False, 3) # conv + bn + relu, conv + relu -> conv, bias, relu, and fused conv_relu test_detect_pattern([conv2d_relu_pat], True, False, 4) # conv + bn + relu, conv + relu -> fused conv_bias_relu, and fused conv_relu test_detect_pattern([conv2d_bias_relu_pat, conv2d_relu_pat], True, False, 2) # conv + relu, conv + relu -> two fused conv_relu test_detect_pattern([conv2d_relu_pat], False, False, 2) # conv + relu, conv + relu -> no fusion, 4 partition each with a single op test_detect_pattern([conv2d_bias_relu_pat], False, False, 4) # conv + bn + sigmoid + relu, conv + sigmoid + relu -> no fusion test_detect_pattern([conv2d_bias_relu_pat, conv2d_relu_pat], True, True, 5) def test_partition_mobilenet(): mod, params = relay.testing.mobilenet.get_workload() mod = get_partitoned_mod(mod, params, dnnl_patterns) # 27 fused conv + bn + relu and one dense assert (len(mod.functions) - 1 == 28) # -1 for main def test_exec(mod, params, ref_mod, ref_params, out_shape): ishape = (1, 3, 224, 224) i_data = np.random.randn(*ishape).astype(np.float32) ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) ref_res = ref_ex.evaluate()(i_data, **ref_params) compile_engine.get().clear() mod = get_partitoned_mod(mod, params, dnnl_patterns) check_result(mod, {"data": i_data}, out_shape, ref_res.asnumpy(), tol=1e-5, params=params) test_partition() test_partition_mobilenet() if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return net = get_net() mod, params = tvm.relay.testing.create_workload(net) ref_mod, ref_params = tvm.relay.testing.create_workload(net) test_exec(mod, params, ref_mod, ref_params, (1, 8, 224, 224)) mod, params = relay.testing.mobilenet.get_workload() ref_mod, ref_params = relay.testing.mobilenet.get_workload() test_exec(mod, params, ref_mod, ref_params, (1, 1000))