Example #1
0
def check_result(mod,
                 ref_mod,
                 map_inputs,
                 out_shape,
                 tol=1e-5,
                 target="llvm",
                 ctx=tvm.cpu(),
                 params=None):
    if sys.platform == "win32":
        print("Skip test on Windows for now")
        return

    # Run the reference result
    compile_engine.get().clear()
    with tvm.transform.PassContext(opt_level=3):
        json, lib, param = relay.build(ref_mod, target=target, params=params)
    rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx)

    for name, data in map_inputs.items():
        rt_mod.set_input(name, data)
    rt_mod.set_input(**param)
    rt_mod.run()
    out = tvm.nd.empty(out_shape, ctx=ctx)
    out = rt_mod.get_output(0, out)
    ref_result = out.asnumpy()

    def check_vm_result():
        compile_engine.get().clear()
        with relay.build_config(opt_level=3):
            exe = relay.vm.compile(mod, target=target, params=params)
        code, lib = exe.save()
        exe = runtime.vm.Executable.load_exec(code, lib)
        vm = runtime.vm.VirtualMachine(exe)
        vm.init(ctx)
        out = vm.run(**map_inputs)
        tvm.testing.assert_allclose(out.asnumpy(),
                                    ref_result,
                                    rtol=tol,
                                    atol=tol)

    def check_graph_runtime_result():
        compile_engine.get().clear()
        with relay.build_config(opt_level=3):
            json, lib, param = relay.build(mod, target=target, params=params)
        rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx)

        for name, data in map_inputs.items():
            rt_mod.set_input(name, data)
        rt_mod.set_input(**param)
        rt_mod.run()
        out = tvm.nd.empty(out_shape, ctx=ctx)
        out = rt_mod.get_output(0, out)
        tvm.testing.assert_allclose(out.asnumpy(),
                                    ref_result,
                                    rtol=tol,
                                    atol=tol)

    check_vm_result()
    check_graph_runtime_result()
Example #2
0
 def check_vm_result():
     compile_engine.get().clear()
     with relay.build_config(opt_level=3):
         exe = relay.vm.compile(mod, target=target, params=params)
     code, lib = exe.save()
     exe = runtime.vm.Executable.load_exec(code, lib)
     vm = runtime.vm.VirtualMachine(exe, ctx)
     out = vm.run(**map_inputs)
     tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol)
    def test_exec(mod, params, ref_mod, ref_params, out_shape):
        ishape = (1, 3, 224, 224)
        i_data = np.random.randn(*ishape).astype(np.float32)
        ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
        ref_res = ref_ex.evaluate()(i_data, **ref_params)
        compile_engine.get().clear()

        mod = get_partitoned_mod(mod, params, dnnl_patterns)

        check_result(mod, {"data": i_data}, out_shape, ref_res.asnumpy(), tol=1e-5, params=params)
 def check_vm_result():
     compile_engine.get().clear()
     with tvm.transform.PassContext(opt_level=3):
         exe = relay.vm.compile(mod, target=target, params=params)
     code, lib = exe.save()
     lib = update_lib(lib)
     exe = runtime.vm.Executable.load_exec(code, lib)
     vm = runtime.vm.VirtualMachine(exe, ctx)
     outs = vm.run(**map_inputs)
     outs = outs if isinstance(outs, runtime.container.ADT) else [outs]
     results = result if isinstance(result, list) else [result]
     for out, ref in zip(outs, results):
         tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=tol, atol=tol)
Example #5
0
    def check_graph_runtime_result():
        compile_engine.get().clear()
        with relay.build_config(opt_level=3):
            json, lib, param = relay.build(mod, target=target, params=params)
        rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx)

        for name, data in map_inputs.items():
            rt_mod.set_input(name, data)
        rt_mod.set_input(**param)
        rt_mod.run()
        out = tvm.nd.empty(out_shape, ctx=ctx)
        out = rt_mod.get_output(0, out)
        tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol)
Example #6
0
def tune_and_evaluate(mod, params, input_shape, dtype, measure_top_n, target,
                      tuning_opt, graph_log_file, best_results_file):
    """Tune a model with the ranking model and evaluate the performance."""

    sys.stderr.write("Extract conv2d tasks...\n")
    tasks = autotvm.task.extract_from_program(mod["main"],
                                              target=target,
                                              params=params)
    # Run tuning tasks.
    if graph_log_file is not None and not os.path.exists(graph_log_file):
        best_results = tune_kernels(tasks, True, measure_top_n, **tuning_opt)
        tune_graph(mod["main"], input_shape[1], target,
                   tuning_opt['log_filename'], graph_log_file)
    else:
        best_results = tune_kernels(tasks, False, measure_top_n, **tuning_opt)
    with open(best_results_file, 'w') as of:
        json.dump(best_results, of)

    dispatch_ctx = tvm.autotvm.task.DispatchContext.current

    if graph_log_file is not None and os.path.exists(graph_log_file):
        sys.stderr.write("Compile model with graph tuning...\n")
        tvm.autotvm.task.DispatchContext.current = autotvm.apply_graph_best(
            graph_log_file)
    elif os.path.exists(tuning_opt['log_filename']):
        sys.stderr.write("Compile model without graph tuning...\n")
        tvm.autotvm.task.DispatchContext.current = autotvm.apply_history_best(
            tuning_opt['log_filename'])
    else:
        sys.stderr.write("Compile model with fallback + tophub...\n")

    compile_engine.get().clear()
    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build_module.build(mod,
                                                      target=target,
                                                      params=params)
    tvm.autotvm.task.DispatchContext.current = dispatch_ctx

    # Load parameters.
    ctx = tvm.context(str(target), 0)
    module = runtime.create(graph, lib, ctx)
    data_tvm = tvm.nd.array(
        (np.random.uniform(size=input_shape[1])).astype(dtype))
    module.set_input(input_shape[0], data_tvm)
    module.set_input(**params)

    # Evaluate performance.
    sys.stderr.write("Evaluate inference time cost...\n")
    ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
    prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
    sys.stderr.write("Median inference time: %.2f ms\n" % np.median(prof_res))
Example #7
0
 def __init__(self, mod, tgt) -> None:
     super().__init__()
     self.mod = mod
     self.tgt = tgt
     self.engine = compile_engine.get()
     self.bindings = [[]]
     self.gv_map = {}
Example #8
0
def generate_ref_data(mod, input_data, params=None, target="llvm"):
    """Generate reference data through executing the relay module"""
    compile_engine.get().clear()
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod, target=target, params=params)

    lib_name = "mod.so"
    temp = utils.tempdir()
    lib_path = temp.relpath(lib_name)
    lib.export_library(lib_path)
    lib = tvm.runtime.load_module(lib_path)
    grt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu()))
    grt_mod.set_input(**input_data)
    grt_mod.run()
    output_count = grt_mod.get_num_outputs()
    out = [grt_mod.get_output(i).numpy() for i in range(output_count)]
    return out
Example #9
0
 def __init__(self, mod, target) -> None:
     super().__init__()
     self.mod = mod
     self.tgt = target
     self.engine = compile_engine.get()
     self.fun_no = 0
     self.var_no = 0
     self.var_map = {}
def test_extern_dnnl_mobilenet():
    if not tvm.get_global_func("relay.ext.dnnl", True):
        print("skip because DNNL codegen is not available")
        return

    dtype = "float32"
    ishape = (1, 3, 224, 224)
    ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32")
    mod = transform.AnnotateTarget(["dnnl"])(ref_mod)
    mod = transform.MergeCompilerRegions()(mod)
    mod = transform.PartitionGraph()(mod)
    i_data = np.random.uniform(0, 1, ishape).astype(dtype)

    ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
    ref_res = ref_ex.evaluate()(i_data, **params)
    compile_engine.get().clear()

    check_result(mod, {"data": i_data}, (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params)
    def check_graph_runtime_result():
        compile_engine.get().clear()
        with tvm.transform.PassContext(opt_level=3):
            json, lib, param = relay.build(mod, target=target, params=params)
        lib = update_lib(lib)
        rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx)

        for name, data in map_inputs.items():
            rt_mod.set_input(name, data)
        rt_mod.set_input(**param)
        rt_mod.run()

        out_shapes = out_shape if isinstance(out_shape, list) else [out_shape]
        results = result if isinstance(result, list) else [result]

        for idx, shape in enumerate(out_shapes):
            out = tvm.nd.empty(shape, ctx=ctx)
            out = rt_mod.get_output(idx, out)
            tvm.testing.assert_allclose(out.asnumpy(), results[idx], rtol=tol, atol=tol)
Example #12
0
def tune_and_evaluate(mod, params, inputs, dtype, measure_top_n, target,
                      tuning_opt, ref_log_filename):
    """Tune a model with the ranking model and evaluate the performance."""

    sys.stderr.write("Extract conv2d tasks...\n")
    tasks = autotvm.task.extract_from_program(mod["main"],
                                              target=target,
                                              params=params)

    tic = time.time()
    dispatch_ctx = tvm.autotvm.task.DispatchContext.current

    if not os.path.exists(tuning_opt["log_filename"]):
        tune_kernels(tasks, measure_top_n, **tuning_opt)
        sys.stderr.write("Tuning time: %.2f mins\n" %
                         ((time.time() - tic) / 60))

    sys.stderr.write("Compile new tuned model...\n")
    tvm.autotvm.task.DispatchContext.current = autotvm.apply_history_best(
        tuning_opt["log_filename"])

    compile_engine.get().clear()
    with relay.build_config(opt_level=3):
        lib = relay.build_module.build(mod, target=target, params=params)
    tvm.autotvm.task.DispatchContext.current = dispatch_ctx

    sys.stderr.write("Compile baseline model...\n")
    compile_engine.get().clear()
    with relay.build_config(opt_level=3):
        base_lib = relay.build_module.build(mod, target=target, params=params)

    ref_lib = None
    if os.path.exists(ref_log_filename):
        sys.stderr.write("Compile reference model...\n")
        tvm.autotvm.task.DispatchContext.current = autotvm.apply_history_best(
            ref_log_filename)

        compile_engine.get().clear()
        with relay.build_config(opt_level=3):
            ref_lib = relay.build_module.build(mod,
                                               target=target,
                                               params=params)
        tvm.autotvm.task.DispatchContext.current = dispatch_ctx

    ctx = tvm.context(str(target), 0)
    data_tvm = {}
    for name, shape in inputs.items():
        data_tvm[name] = tvm.nd.array(
            (np.random.uniform(size=shape)).astype(dtype))

    # Evaluate baseline module
    sys.stderr.write("Baseline\n")
    evaluate(base_lib, ctx, data_tvm, dtype)

    # Evaluate reference module
    if ref_lib:
        sys.stderr.write("Reference\n")
        evaluate(ref_lib, ctx, data_tvm, dtype)

    # Evaluate module
    evaluate(lib, ctx, data_tvm, dtype)