def check_verify():
        if not tvm.runtime.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        mlib = tvm.build(s, [A, B], "llvm", name="myadd")
        try:
            mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
        except ValueError:
            return

        a = np.random.uniform(size=(n,)).astype(A.dtype)
        mod.set_input(x=a)

        #verify dumproot created
        directory = mod._dump_path
        assert(os.path.exists(directory))

        #verify graph is there
        GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json'
        assert(len(os.listdir(directory)) == 1)

        #verify the file name is proper
        graph_dump_path = os.path.join(directory, GRAPH_DUMP_FILE_NAME)
        assert(os.path.exists(graph_dump_path))

        # verify the graph contains some expected keys
        with open(graph_dump_path) as graph_f:
            dumped_graph = json.load(graph_f)

        assert isinstance(dumped_graph, dict)
        for k in ("nodes", "arg_nodes", "node_row_ptr", "heads", "attrs"):
            assert k in dumped_graph, f"key {k} not in dumped graph {graph!r}"

        mod.run()
        #Verify the tensors are dumped
        assert(len(os.listdir(directory)) > 1)

        CHROME_TRACE_FILE_NAME = '_tvmdbg_execution_trace.json'
        assert(os.path.exists(os.path.join(directory, CHROME_TRACE_FILE_NAME)))

        with open(os.path.join(directory, CHROME_TRACE_FILE_NAME)) as f:
            trace = json.load(f)
        assert trace["displayTimeUnit"] == "ns"
        events = trace["traceEvents"]
        assert len(events) == 4
        assert all(event["ph"] in ('B', 'E') for event in events)
        assert all(event["pid"] == 1 for event in events)
        assert all(event["tid"] == 1 for event in events)
        assert all(event["name"] == 'x' for event in events[:2])
        assert all(event["name"] == 'add' for event in events[2:])
        assert events[0]["ts"] == 0
        assert events[0]["ph"] == 'B'

        #verify the output is correct
        out = mod.get_output(0, tvm.nd.empty((n,)))
        np.testing.assert_equal(out.asnumpy(), a + 1)

        mod.exit()
        #verify dump root delete after cleanup
        assert(not os.path.exists(directory))
Beispiel #2
0
def run(args):
    onnx_model = onnx.load_model(os.path.join(args.test_dir, 'model.onnx'))
    symbol, params = nnvm.frontend.from_onnx(onnx_model)
    input_names = symbol.list_input_names()
    output_names = symbol.list_output_names()

    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = load_test_data(test_data_dir, input_names, output_names)
    inputs = dict(inputs)

    # assert len(input_names) == len(inputs) + len(params)
    # assert len(output_names) == len(outputs)

    graph, lib, params = compile(
        symbol, args.target, input_names, inputs, params,
        args.opt_level, args.autotvm_log)

    if args.dump_nnvm:
        print(graph.ir())
        print(graph.json())

    ctx = tvm.gpu()

    # Prepare inputs.
    tvm_inputs = {}
    for name, value in inputs.items():
        tvm_inputs[name] = tvm.nd.array(value, ctx=ctx)
    for name, value in params.items():
        tvm_inputs[name] = tvm.nd.array(value, ctx=ctx)

    graph_module = None
    if args.debug:
        try:
            graph_module = debug_runtime.create(graph, lib, ctx)
        except:
            print('debug_runtime is disabled. '
                  'Set USE_GRAPH_RUNTIME_DEBUG=ON and rebuild TVM')
    if graph_module is None:
        graph_module = graph_runtime.create(graph, lib, ctx)

    graph_module.set_input(**tvm_inputs)

    graph_module.run()

    for i, (name, expected) in enumerate(outputs):
        tvm_output = tvm.nd.empty(expected.shape, expected.dtype, ctx=ctx)
        actual = graph_module.get_output(i, tvm_output).asnumpy()
        np.testing.assert_allclose(expected, actual,
                                   rtol=1e-3, atol=1e-4), name
        print('%s: OK' % name)
    print('ALL OK')

    if args.iterations > 1:
        num_iterations = args.iterations - 1
        start = time.time()
        for t in range(num_iterations):
            graph_module.run()
            cupy.cuda.device.Device().synchronize()
        elapsed = time.time() - start
        print('Elapsed: %.3f msec' % (elapsed * 1000 / num_iterations))
Beispiel #3
0
    def check_verify():
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        mlib = tvm.build(s, [A, B], "llvm", name="myadd")
        try:
            mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
        except ValueError:
            return

        a = np.random.uniform(size=(n, )).astype(A.dtype)
        mod.set_input(x=a)

        #verify dumproot created
        directory = mod._dump_path
        assert (os.path.exists(directory))

        #verify graph is there
        GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json'
        assert (len(os.listdir(directory)) == 1)

        #verify the file name is proper
        assert (os.path.exists(os.path.join(directory, GRAPH_DUMP_FILE_NAME)))

        mod.run()
        #Verify the tensors are dumped
        assert (len(os.listdir(directory)) > 1)

        #verify the output is correct
        out = mod.get_output(0, tvm.nd.empty((n, )))
        np.testing.assert_equal(out.asnumpy(), a + 1)

        mod.exit()
        #verify dump root delete after cleanup
        assert (not os.path.exists(directory))
    def check_verify():
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        mlib = tvm.build(s, [A, B], "llvm", name="myadd")
        try:
            mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
        except ValueError:
            return

        a = np.random.uniform(size=(n,)).astype(A.dtype)
        mod.set_input(x=a)

        #verify dumproot created
        directory = mod._dump_path
        assert(os.path.exists(directory))

        #verify graph is there
        GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json'
        assert(len(os.listdir(directory)) == 1)

        #verify the file name is proper
        assert(os.path.exists(os.path.join(directory, GRAPH_DUMP_FILE_NAME)))

        mod.run()
        #Verify the tensors are dumped
        assert(len(os.listdir(directory)) > 1)

        #verify the output is correct
        out = mod.get_output(0, tvm.nd.empty((n,)))
        np.testing.assert_equal(out.asnumpy(), a + 1)

        mod.exit()
        #verify dump root delete after cleanup
        assert(not os.path.exists(directory))
Beispiel #5
0
def run():
    passes = [(1, tensorizer.rewrite)]
    config = {
        'tir.add_lower_pass': passes
    } if target.startswith('nvptx') else {}
    with tvm.transform.PassContext(opt_level=3, trace=tracer, config=config):
        graph, lib, params = tvm.relay.build(module, target=target)
        #from tvm.contrib import graph_runtime as runtime
        from tvm.contrib.debugger import debug_runtime as runtime
        func = runtime.create(graph, lib, tvm.gpu())

        x_ = (np.random.randn(n, c, h, w) * 128).astype('float32')
        func.set_input('x', x_)
        timer = func.module.time_evaluator('run',
                                           ctx=tvm.gpu(),
                                           number=1,
                                           repeat=10)
        #timed = []
        #for i in range(10):
        #    func.run()
        #    for node, time in zip(func.debug_datum._nodes_list, func.debug_datum._time_list):
        #        if 'conv2d' in node['name']:
        #            timed.append(time[0])
        timed = timer()
        while np.var(timed.results) > 1e-5:
            timed = timer()
        return timed.mean
Beispiel #6
0
def run(args):
    onnx_model = onnx.load_model(os.path.join(args.test_dir, 'model.onnx'))
    symbol, params = nnvm.frontend.from_onnx(onnx_model)
    input_names = symbol.list_input_names()
    output_names = symbol.list_output_names()

    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = load_test_data(test_data_dir, input_names, output_names)
    inputs = dict(inputs)

    # assert len(input_names) == len(inputs) + len(params)
    # assert len(output_names) == len(outputs)

    graph, lib, params = compile(symbol, args.target, input_names, inputs,
                                 params, args.opt_level, args.autotvm_log)

    if args.dump_nnvm:
        print(graph.ir())
        print(graph.json())

    ctx = tvm.gpu()

    # Prepare inputs.
    tvm_inputs = {}
    for name, value in inputs.items():
        tvm_inputs[name] = tvm.nd.array(value, ctx=ctx)
    for name, value in params.items():
        tvm_inputs[name] = tvm.nd.array(value, ctx=ctx)

    graph_module = None
    if args.debug:
        try:
            graph_module = debug_runtime.create(graph, lib, ctx)
        except:
            print('debug_runtime is disabled. '
                  'Set USE_GRAPH_RUNTIME_DEBUG=ON and rebuild TVM')
    if graph_module is None:
        graph_module = graph_runtime.create(graph, lib, ctx)

    graph_module.set_input(**tvm_inputs)

    graph_module.run()

    for i, (name, expected) in enumerate(outputs):
        tvm_output = tvm.nd.empty(expected.shape, expected.dtype, ctx=ctx)
        actual = graph_module.get_output(i, tvm_output).asnumpy()
        np.testing.assert_allclose(expected, actual, rtol=1e-3,
                                   atol=1e-4), name
        print('%s: OK' % name)
    print('ALL OK')

    if args.iterations > 1:
        num_iterations = args.iterations - 1
        start = time.time()
        for t in range(num_iterations):
            graph_module.run()
            cupy.cuda.device.Device().synchronize()
        elapsed = time.time() - start
        print('Elapsed: %.3f msec' % (elapsed * 1000 / num_iterations))
Beispiel #7
0
def profile(symbol_file, num_inference_images):
    debug = False
    import tvm
    from tvm.contrib import graph_runtime
    from tvm.contrib.debugger import debug_runtime as debug_runtime

    base = os.getcwd() + '/compiled_models/tvm_' + symbol_file.split(
        '/')[-1].replace('.json', '')

    path_lib = base + '_deploy_lib.tar'
    path_graph = base + '_deploy_graph.json'
    path_params = base + '_deploy_params.params'

    graph = open(path_graph).read()
    lib = tvm.runtime.load_module(path_lib)
    params = bytearray(open(path_params, 'rb').read())

    if debug:
        rt_mod = debug_runtime.create(graph, lib, ctx=tvm.cpu(0))
        rt_mod.load_params(params)
        rt_mod.run()
        return

    rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
    rt_mod.load_params(params)

    # warm up
    warm_up = 0
    for i in range(0, 50):
        rt_mod.run()
        warm_up += 1
        if warm_up == 50:
            break

    counter = 0
    time_tvm = list()
    for i in range(0, num_inference_images):
        time0 = time.time()
        rt_mod.run()
        time1 = time.time()
        time_tvm.append(time1 - time0)
        counter += 1
        if counter == num_inference_images:
            break

    avg = lambda x: round(1000 * sum(x) / len(x), 6)
    std = lambda x: round(statistics.stdev(x), 6)

    total_tvm = avg(time_tvm)
    sec_tvm = total_tvm / 1000
    std_tvm = std(time_tvm)
    min_tvm = round(min(time_tvm), 6)
    min_tvm_ms = round(min(time_tvm) * 1000, 6)
    deviation_from_min_tvm = round(sec_tvm / min_tvm * 100 - 100, 6)
    deviation_from_std_tvm = round(std_tvm / sec_tvm * 100, 6)

    net_name = symbol_file.split('/')[-1].replace('.json', '')
    print("Perf", "Tvm", net_name, total_tvm, min_tvm_ms, std_tvm, sep='\t')
Beispiel #8
0
def profile(num_inference_images, prefix):
    debug = True
    # np.random.seed(0)

    static_net = mx.gluon.SymbolBlock.imports('{}.json'.format(prefix),
                                    ['data0', 'data1', 'data2'],
                                    '{}.params'.format(prefix))
    static_net.hybridize(static_alloc=True, static_shape=True)
    mx_ctx = mx.cpu()

    # Prepare input data
    dtype = "float32"
    batch = 1
    seq_length = 128
    inputs = np.random.randint(0, 2000, size=(batch, seq_length)).astype(dtype)
    token_types = np.random.uniform(size=(batch, seq_length)).astype(dtype)
    valid_length = np.asarray([seq_length] * batch).astype(dtype)
    
    # Convert to MXNet NDArray and run the MXNet model
    inputs_nd = mx.nd.array(inputs, ctx=mx_ctx)
    token_types_nd = mx.nd.array(token_types, ctx=mx_ctx)
    valid_length_nd = mx.nd.array(valid_length, ctx=mx_ctx)
    mx_out = static_net(inputs_nd, token_types_nd, valid_length_nd.astype('float32'))
    mx_out.wait_to_read()
    print(mx_out)

    import tvm
    if debug:
        from tvm.contrib.debugger import debug_runtime as grt
    else:
        from tvm.contrib import graph_runtime as grt

    
    base = os.getcwd() + '/compiled/' + prefix.split("/")[-1]

    path_lib = base + '_deploy_lib.tar'
    path_graph =  base + '_deploy_graph.json'
    path_params = base + '_deploy_params.params'

    graph = open(path_graph).read()
    lib = tvm.runtime.load_module(path_lib)
    params = bytearray(open(path_params, 'rb').read())

    rt_mod = grt.create(graph, lib, ctx=tvm.cpu(0))
    rt_mod.load_params(params)
    rt_mod.set_input(data0=inputs, data1=token_types, data2=valid_length)
    if debug:
        rt_mod.run()

    if debug:
        rt_mod.run()
    else:
        ftimer = rt_mod.module.time_evaluator("run", ctx=tvm.cpu(0), number=1, repeat=10)
        res = ftimer().results

        print(np.mean(res) * 1000, np.var(res))
Beispiel #9
0
    def check_verify():
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        mlib = tvm.build(s, [A, B], "llvm", name="myadd")
        try:
            mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
        except ValueError:
            return

        a = np.random.uniform(size=(n,)).astype(A.dtype)
        mod.set_input(x=a)

        #verify dumproot created
        directory = mod._dump_path
        assert(os.path.exists(directory))

        #verify graph is there
        GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json'
        assert(len(os.listdir(directory)) == 1)

        #verify the file name is proper
        assert(os.path.exists(os.path.join(directory, GRAPH_DUMP_FILE_NAME)))

        mod.run()
        #Verify the tensors are dumped
        assert(len(os.listdir(directory)) > 1)

        CHROME_TRACE_FILE_NAME = '_tvmdbg_execution_trace.json'
        assert(os.path.exists(os.path.join(directory, CHROME_TRACE_FILE_NAME)))

        with open(os.path.join(directory, CHROME_TRACE_FILE_NAME)) as f:
            trace = json.load(f)
        assert trace["displayTimeUnit"] == "ns"
        events = trace["traceEvents"]
        assert len(events) == 4
        assert all(event["ph"] in ('B', 'E') for event in events)
        assert all(event["pid"] == 1 for event in events)
        assert all(event["tid"] == 1 for event in events)
        assert all(event["name"] == 'x' for event in events[:2])
        assert all(event["name"] == 'add' for event in events[2:])
        assert events[0]["ts"] == 0
        assert events[0]["ph"] == 'B'

        #verify the output is correct
        out = mod.get_output(0, tvm.nd.empty((n,)))
        np.testing.assert_equal(out.asnumpy(), a + 1)

        #test individual run
        mod.run_individual(20, 2, 1)

        mod.exit()
        #verify dump root delete after cleanup
        assert(not os.path.exists(directory))
Beispiel #10
0
def create_graph_module(args, graph, lib, ctx):
    graph_module = None
    if args.debug:
        try:
            graph_module = debug_runtime.create(graph, lib, ctx)
        except Exception:
            print('debug_runtime is disabled. '
                  'Set USE_GRAPH_RUNTIME_DEBUG=ON and rebuild TVM')
    if graph_module is None:
        graph_module = graph_runtime.create(graph, lib, ctx)

    return graph_module
def relay_micro_build(func,
                      dev_config,
                      target,
                      params=None,
                      lib_headers=None,
                      lib_include_paths=None):
    """Create a graph runtime module with a micro device context from a Relay function.

    Parameters
    ----------
    func : relay.Function
        function to compile

    dev_config : TODO
        TODO

    target : TODO
        TODO

    params : dict
        input parameters that do not change during inference

    lib_headers : TODO
        TODO

    lib_include_paths : TODO
        TODO

    Return
    ------
    mod : tvm.module.Module
        graph runtime module for the target device

    """
    with tvm.target.build_config(opt_level=3, disable_vectorize=True):
        graph, c_mod, params = relay.build(func, target=target, params=params)
    micro_mod = micro.create_micro_mod(c_mod,
                                       dev_config,
                                       lib_headers=lib_headers,
                                       lib_include_paths=lib_include_paths)
    ctx = tvm.micro_dev(0)
    if DEBUG_MODE:
        dump_root = f'{get_repo_root()}/debug/micro'
        mod = debug_runtime.create(graph, micro_mod, ctx, dump_root=dump_root)
    else:
        mod = graph_runtime.create(graph, micro_mod, ctx)
    mod.set_input(**params)
    return mod
Beispiel #12
0
def tvm_lstm(fuse, opt_level=0, rebuild=True, profile=False):
    name = 'lstm'
    if rebuild:
        graph, lib, params = tvm_compile(name, fuse, opt_level)
    else:
        graph, lib, params = tvm_load(get_tvm_model_name(
            name, fuse, opt_level))
    ######################################################################
    # Execute the portable graph on TVM
    # ---------------------------------
    # Now, we would like to reproduce the same forward computation using TVM.
    from tvm.contrib import graph_runtime
    from tvm.contrib.debugger import debug_runtime
    if is_gpu:
        ctx = tvm.gpu()  #tvm.cuda()
    else:
        ctx = tvm.cpu(0)
    dtype = 'float32'
    if profile:
        m = debug_runtime.create(graph, lib, ctx)
    else:
        m = graph_runtime.create(graph, lib, ctx)
    # set inputs
    x = mx.nd.ones(shape=data_shape)
    m.set_input('data', tvm.nd.array(x.asnumpy().astype(dtype)))
    if rebuild:
        m.set_input(**params)
    else:
        m.load_params(params)
    # execute
    ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=100)
    prof_res = np.array(
        ftimer().results) * 1000  # multiply 1000 for converting to millisecond
    # m.run()
    # # get outputs
    # tvm_output = m.get_output(0)
    # print(tvm_output)
    if profile:
        m.run()
    if fuse:
        name = "tvm lstm"
    else:
        name = "tvm lstm_cell"
    print("%-20s %-19s (%s)" %
          ("%s opt=%d" % (name, opt_level), "%.2f ms" % np.mean(prof_res),
           "%.2f ms" % np.std(prof_res)))
def run_model_tvm(graph, lib, params, run_settings, model_name, tuning_records=None):
    """
    Run TVM model. Apply tuning records if they exist.
    """
    profile = run_settings['profile']
    device = run_settings['device']
    repeat = run_settings['repeat']

    session = rpc.LocalSession()
    ctx = session.cpu() if device == "cpu" else session.gpu()
    is_tuned = True if tuning_records else False

    lib_name = "mod.so"
    temp = util.tempdir()
    lib_path = temp.relpath(lib_name)
    lib.export_library(lib_path)
    session.upload(lib_path)
    lib = session.load_module(lib_name)

    if profile:
        module = debug_runtime.create(graph, lib, ctx, dump_root=f"results/prof_{model_name}_tuned={is_tuned}")
    else:
        module = runtime.create(graph, lib, ctx)

    saved_params = relay.save_param_dict(params)
    module.load_params(saved_params)

    shape_dict, dtype_dict = get_input_info(graph, params)
    inputs_dict = make_inputs_dict(shape_dict, dtype_dict)
    module.set_input(**inputs_dict)

    if profile:
        module.run()

    timer = module.module.time_evaluator("run", ctx, 1, repeat=repeat)
    prof_result = timer()
    times = prof_result.results
    header, stats = extract_profile_data(times)

    filename = f'results/stat_table_{model_name}_tuned={is_tuned}'
    with open(filename, 'w') as f:
        print("%s\n%s\n" % (header, stats), filename, file=f)
    print("%s\n%s\n" % (header, stats))
 def check_remote():
     mlib = tvm.build(s, [A, B], "llvm", name="myadd")
     server = rpc.Server("localhost")
     remote = rpc.connect(server.host, server.port)
     temp = util.tempdir()
     ctx = remote.cpu(0)
     path_dso = temp.relpath("dev_lib.so")
     mlib.export_library(path_dso)
     remote.upload(path_dso)
     mlib = remote.load_module("dev_lib.so")
     try:
         mod = graph_runtime.create(graph, mlib, remote.cpu(0))
     except ValueError:
         print("Skip because debug graph_runtime not enabled")
         return
     a = np.random.uniform(size=(n,)).astype(A.dtype)
     mod.run(x=tvm.nd.array(a, ctx))
     out = tvm.nd.empty((n,), ctx=ctx)
     out = mod.get_output(0, out)
     np.testing.assert_equal(out.asnumpy(), a + 1)
    def check_verify():
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        mlib = tvm.build(s, [A, B], "llvm", name="myadd")
        try:
            mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
        except ValueError:
            return

        a = np.random.uniform(size=(n, )).astype(A.dtype)
        mod.set_input(x=a)
        #verify dumproot created
        path = mod.ui_obj.curses_obj._dump_root + mod.ui_obj.curses_obj.dump_folder(
        )
        directory = os.path.dirname(path)
        assert (os.path.exists(directory))
        #verify graph is there
        assert (len(os.listdir(directory)) > 0)
        #verify dump root delete after cleanup
        mod.ui_obj.curses_obj.exit()
        assert (not os.path.exists(directory))
 def check_remote():
     if not tvm.module.enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
     mlib = tvm.build(s, [A, B], "llvm", name="myadd")
     server = rpc.Server("localhost")
     remote = rpc.connect(server.host, server.port)
     temp = util.tempdir()
     ctx = remote.cpu(0)
     path_dso = temp.relpath("dev_lib.so")
     mlib.export_library(path_dso)
     remote.upload(path_dso)
     mlib = remote.load_module("dev_lib.so")
     try:
         mod = graph_runtime.create(graph, mlib, remote.cpu(0))
     except ValueError:
         print("Skip because debug graph_runtime not enabled")
         return
     a = np.random.uniform(size=(n,)).astype(A.dtype)
     mod.run(x=tvm.nd.array(a, ctx))
     out = tvm.nd.empty((n,), ctx=ctx)
     out = mod.get_output(0, out)
     np.testing.assert_equal(out.asnumpy(), a + 1)
    def run_single_model(self, model_name, model):
        inputs = self.input_data[model_name]
        print(self.input_names)
        shape_dict = {self.input_names[model_name]: inputs.shape}
        print(self.input_names[model_name], inputs.shape)

        print(shape_dict)
        print(type(model))
        if self.file_types[model_name] == 'onnx':
            syms, params = relay.frontend.from_onnx(model, shape_dict)
        elif self.file_types[model_name] == 'keras':
            syms, params = relay.frontend.from_keras(model, shape_dict)
        elif self.file_types[model_name] == 'pytorch':
            shapes_list = list = [(k, v) for k, v in shape_dict.items()]
            syms, params = relay.frontend.from_pytorch(model, shapes_list)
        target = self.host.target
        target_host = self.host.target_host

        if self.sparse_cnn:
            print('converting sparsity')
            syms, params = ddo.simplify_fc_transpose.convert(
                syms["main"], params)
            syms, params = ddo.csr_conv2d.convert(syms,
                                                  params,
                                                  sparsity_threshold=0.0)
        with relay.build_config(
                opt_level=self.opt_level,
                # disabled_pass=[
                #     "FoldConstant",
                # ],
                # required_pass=[
                #     "SimplifyInference",
                #     "OpFusion",
                #     # "FoldConstant",
                #     "FoldScaleAxis",
                #     "AlterOpLayout",
                #     "CanonicalizeOps",
                #     "CanonicalizeCast",
                #     "EliminateCommonSubexpr",
                #     "CombineParallelConv2D",
                #     "CombineParallelDense",
                #     "CombineParallelBatchMatmul",
                #     "FastMath"
                # ]
        ):
            graph, lib, params = relay.build_module.build(
                syms, target, params=params, target_host=target_host)

        # After `relay.build`, you will get three return values: graph,
        # library and the new parameter, since we do some optimization that will
        # change the parameters but keep the result of model as the same.

        # Save the library at local temporary directory.
        tmp = utils.tempdir()
        tarname = str(model_name) + '_' + str(self.host.name) + '.tar'
        lib_fname = tmp.relpath(tarname)
        lib.export_library(lib_fname)

        # obtain an RPC session from remote device.
        remote = self.host.session

        # upload the library to remote device and load it
        remote.upload(lib_fname)
        rlib = remote.load_module(tarname)

        # create the remote runtime module
        ctx = self.host.ctx
        if not self.layer_debug:
            module = runtime.create(graph, rlib, ctx)
        else:
            module = debug_runtime.create(graph,
                                          rlib,
                                          ctx,
                                          dump_root=self.layer_output_dir)

        # set parameter (upload params to the remote device.
        # This may take a while)
        module.set_input(**params)
        # set input data
        module.set_input(key=self.input_names[model_name], value=inputs)

        # run
        print('running bruh')
        module.run()
        # get output
        out = module.get_output(0)
        self.outputs[model_name] = out.asnumpy()

        # get median inference time
        # sample mean is skewed in this setting by potentially unbounded
        # high outliers, thus we get the median
        print('running avg bruh')
        start = time.time()
        f = module.module.time_evaluator('run',
                                         ctx,
                                         number=10,
                                         repeat=self.runs)
        results = f().results
        median = np.median(results) * 1000
        mean = np.mean(results) * 1000
        self.inf_time[model_name] = median
        self.med_time[model_name] = median
        self.mean_time[model_name] = mean
        self.std_time[model_name] = np.std(results) * 1000
        self.exper_time[model_name] = (time.time() - start) * 1000
        print('ran a model bruh')
Beispiel #18
0
def main():
    resnetv1 = onnx.load('models/resnet18v1.onnx')
    input_blob = resnetv1.graph.input[0]
    input_shape = tuple(
        map(lambda x: getattr(x, 'dim_value'),
            input_blob.type.tensor_type.shape.dim))
    shape_dict = {input_blob.name: input_shape}
    mod_resnetv1, params_resnetv1 = relay.frontend.from_onnx(
        resnetv1, shape_dict)

    # resnetv2 = onnx.load('models/resnet18v2.onnx')
    # input_blob = resnetv2.graph.input[0]
    # input_shape = tuple(map(lambda x: getattr(x, 'dim_value'), input_blob.type.tensor_type.shape.dim))
    # shape_dict = {input_blob.name: input_shape}
    # mod_resnetv2, params_resnetv2 = relay.frontend.from_onnx(resnetv2, shape_dict)

    mod_q_resnetv1 = quantize(mod_resnetv1, params_resnetv1)
    # mod_q_resnetv2 = quantize(mod_resnetv2, params_resnetv2)

    # mod_resnetv1['main'] = bind_params(mod_resnetv1['main'], params_resnetv1)

    # f = open('graphs/resnetv1_q.log.new', 'w+')
    # f.write(str(mod_q_resnetv1))
    # f.close()

    # f = open('graphs/resnetv2_q.log', 'w+')
    # f.write(str(mod_q_resnetv2))
    # f.close()

    # run_inference(mod_resnetv1)
    # run_inference(mod_q_resnetv1)
    # run_inference(mod_q_resnetv2)

    with autotvm.apply_history_best(log_file):
        #print("Compile...")
        #with relay.build_config(opt_level=3):
        #graph, lib, params = relay.build_module.build(
        #mod_q_resnetv1, target=target, params=params_resnetv1)

        #export library
        #tmp = tempdir()
        #filename = "net.tar"
        #lib.export_library(tmp.relpath(filename))

        # load parameters
        #ctx = tvm.context(str(target), 0)
        #module = runtime.create(graph, lib, ctx)
        #module.set_input(**params)

        #val_data = get_val_data()
        #top1_correct = 0
        #top5_correct = 0
        #total = 0
        #import time
        #start = time.process_time()
        #for i, batch in enumerate(val_data):
        #    data, categories = batch['data'], batch['label']
        #    module.set_input('data', data)
        #    module.run()
        #    prediction = module.get_output(0).asnumpy()
        #    top1_correct += (prediction.argmax(1) == categories).sum()
        #    top5_correct += sum(map(lambda x: x[0] in x[1], zip(categories, prediction.argsort()[:, -5:])))
        #    total += len(data)
        #    print(prediction)
        #    print('Top1 Acc: {}, {}/{}'.format(float(top1_correct) / total, top1_correct, total))
        #    print('Top5 Acc: {}, {}/{}'.format(float(top5_correct) / total, top5_correct, total))
        #end = time.process_time()
        #print('Time: {}'.format(end - start))
        #print('Top1 Acc: {}, {}/{}'.format(float(top1_correct) / total, top1_correct, total))
        #print('Top5 Acc: {}, {}/{}'.format(float(top5_correct) / total, top5_correct, total))

        # evaluate
        #print("Evaluate inference time cost...")
        #ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=600)
        #prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        #print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
        #      (np.mean(prof_res), np.std(prof_res)))

        graph, mod, params = relay.build_module.build(mod_q_resnetv1['main'],
                                                      target=target,
                                                      params=params_resnetv1)

        val_data = get_val_data()
        for i, batch in enumerate(val_data):
            if i > 0:
                break
            data, categories = batch['data'], batch['label']
            m = debug_runtime.create(graph, mod, ctx, dump_root='tvmdbg')
            m.set_input('data', tvm.nd.array(data.astype('float32')))
            m.run()
            tvm_out = m.get_output(0, tvm.nd.empty(tuple([1, 1000]),
                                                   'float32')).asnumpy()
# --------------------------------------------------------------------
test_image = 'dog.jpg'
print("Loading the test image...")
img_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + \
            test_image   +'?raw=true'
download(img_url, test_image)

data = nnvm.testing.darknet.load_image(test_image, net.w, net.h)

######################################################################
# Execute on TVM Runtime
# ----------------------
# The process is no different from other examples.
#from tvm.contrib import graph_runtime
from tvm.contrib.debugger import debug_runtime as graph_runtime
m = graph_runtime.create(graph, lib, ctx)

# set inputs
m.set_input('data', tvm.nd.array(data.astype(dtype)))
m.set_input(**params)
# execute
print("Running the test image...")

m.run()
# get outputs
out_shape = (net.outputs, )
tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()

#do the detection and bring up the bounding boxes
thresh = 0.24
hier_thresh = 0.5
Beispiel #20
0
    global timing
    if bool(is_before):
        timing = time.time()
    else:
        print('Executes: ', info.name, (time.time() - timing) * 1000)

result = 1e9
target = -1
from tensorizer import tune
tune.cpu_idx = 0
while True:
    with tvm.transform.PassContext(opt_level=3, trace=tracer, config={'tir.add_lower_pass': [(1, tensorizer.rewrite)]}):
        graph, lib, params = tvm.relay.build(module, target='llvm -mcpu=cascadelake')
        #from tvm.contrib import graph_runtime as runtime
        from tvm.contrib.debugger import debug_runtime as runtime
        func = runtime.create(graph, lib, tvm.cpu())

        x_ = (np.random.randn(n, c, h, w) * 128).astype('int8')
        func.set_input('x', x_)

        timer = func.module.time_evaluator('run', ctx=tvm.cpu(0), number=3, repeat=10)
        timed = timer()

        if timed.mean < result:
            result = timed.mean
            target = tune.cpu_idx

    relay.backend.compile_engine.get().clear()
    tune.cpu_idx += 1
    if tune.cpu_idx - target > 8:
        break
deploy_graph_path = os.path.join(saved_dir, "deploy_graph.json")
loaded_graph = open(deploy_graph_path).read()

lib_save_path = os.path.join(saved_dir, "deploy_lib.tar")
loaded_lib = tvm.runtime.load_module(lib_save_path)

deploy_param_file_path = os.path.join(saved_dir, "deploy_param.params")
loaded_params = bytearray(open(deploy_param_file_path, "rb").read())

# target = tvm.target.cuda()
target = params_dict['target']
ctx = tvm.context(str(target), 0)

print("testing and evaluating TVM performance")
dtype = params_dict['dtype']
module = graph_runtime.create(loaded_graph, loaded_lib, ctx)
print(dtype)

# set inputs
inference_input_shapes = params_dict['inference_input_shapes']
inference_input_names = params_dict['inference_input_names']
for i in range(len(inference_input_shapes)):
    input_shape = tuple(inference_input_shapes[i])
    input_name = inference_input_names[i]
    print("input name:" + input_name)
    print("input shape: " + str(input_shape))
    temp_data = np.random.uniform(size=input_shape).astype(dtype)

    data_tvm = tvm.nd.array(temp_data)
    module.set_input(input_name, data_tvm)
mod["main"] = func

#
print("print non-tuning tvm op: ")
scale = 1
data = np.random.uniform(-scale, scale, size=dshape).astype(dtype)
weight = np.random.uniform(-scale, scale, size=kshape).astype(dtype)
data = tvm.nd.array(data, ctx)
weight = tvm.nd.array(weight, ctx)

with tvm.transform.PassContext(opt_level=3):
    print("Compiling...")
    graph, lib, params = tvm.relay.build(mod, target=target)

from tvm.contrib.debugger import debug_runtime as graph_runtime
module = graph_runtime.create(graph, lib, ctx)
module.set_input("x", data)
module.set_input("w", weight)
module.set_input(**params)
print("testing non-tuning result of the op......")
ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=600)
prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
      (np.mean(prof_res), np.std(prof_res)))

print("\n")
print("now testing tuned op......")
with autotvm.apply_history_best("batch_matmul_cuda.log"):
    with tvm.transform.PassContext(opt_level=3):
        print("Compiling...")
        graph, lib, params = tvm.relay.build(mod, target=target)
Beispiel #23
0
    def check_verify():
        mlib = tvm.build(s, [A, B], "llvm", name="myadd")

        def myadd(*args):
            to_return = mlib["myadd"](*args)
            time.sleep(0.25)
            return to_return

        mlib_proxy = tvm.support.FrontendTestModule()
        mlib_proxy["myadd"] = myadd
        try:
            mod = debug_runtime.create(graph, mlib_proxy, tvm.cpu(0))
        except ValueError:
            return

        a = np.random.uniform(size=(n, )).astype(A.dtype)
        mod.set_input(x=a)

        # verify dumproot created
        directory = mod._dump_path
        assert os.path.exists(directory)

        # verify graph is there
        GRAPH_DUMP_FILE_NAME = "_tvmdbg_graph_dump.json"
        assert len(os.listdir(directory)) == 1

        # verify the file name is proper
        graph_dump_path = os.path.join(directory, GRAPH_DUMP_FILE_NAME)
        assert os.path.exists(graph_dump_path)

        # verify the graph contains some expected keys
        with open(graph_dump_path) as graph_f:
            dumped_graph = json.load(graph_f)

        assert isinstance(dumped_graph, dict)
        for k in ("nodes", "arg_nodes", "node_row_ptr", "heads", "attrs"):
            assert k in dumped_graph, f"key {k} not in dumped graph {graph!r}"

        mod.run()
        # Verify the tensors are dumped
        assert len(os.listdir(directory)) > 1

        debug_lines = mod.debug_datum.get_debug_result().split("\n")

        def split_debug_line(i):
            to_return = re.split(r"  [ ]*", debug_lines[i])
            assert to_return[-1] == ""
            to_return = to_return[:-1]  # strip empty trailing part
            return to_return

        assert split_debug_line(0) == [
            "Node Name",
            "Ops",
            "Time(us)",
            "Time(%)",
            "Shape",
            "Inputs",
            "Outputs",
        ]
        myadd_lines = split_debug_line(2)
        assert myadd_lines[0] == "add"
        assert myadd_lines[1] == "myadd"
        runtime_sec = float(myadd_lines[2]) / 1e6  # printed in us

        # Ensure runtime is at least the sleep time and less than a unit prefix order of magnitude.
        # Here we just care that the prefix is correct.
        assert runtime_sec > 0.25 and runtime_sec < 0.25 * 1000

        total_lines = split_debug_line(3)
        assert total_lines[0] == "Total_time"
        assert total_lines[2] == myadd_lines[2]

        CHROME_TRACE_FILE_NAME = "_tvmdbg_execution_trace.json"
        assert os.path.exists(os.path.join(directory, CHROME_TRACE_FILE_NAME))

        with open(os.path.join(directory, CHROME_TRACE_FILE_NAME)) as f:
            trace = json.load(f)
        assert trace["displayTimeUnit"] == "ns"
        events = trace["traceEvents"]
        assert len(events) == 4
        assert all(event["ph"] in ("B", "E") for event in events)
        assert all(event["pid"] == 1 for event in events)
        assert all(event["tid"] == 1 for event in events)
        assert all(event["name"] == "x" for event in events[:2])
        assert all(event["name"] == "add" for event in events[2:])
        assert events[0]["ts"] == 0
        assert events[0]["ph"] == "B"

        # verify the output is correct
        out = mod.get_output(0, tvm.nd.empty((n, )))
        np.testing.assert_equal(out.asnumpy(), a + 1)

        mod.exit()
        # verify dump root delete after cleanup
        assert not os.path.exists(directory)
Beispiel #24
0
def run_module(
    module_file,
    hostname,
    port=9090,
    rpc_key=None,
    device=None,
    inputs_file=None,
    fill_mode="random",
    repeat=1,
    profile=False,
):
    """Run a compiled graph runtime module locally or remotely with
    optional input values.

    If input tensors are not specified explicitly, they can be filled
    with zeroes, ones or random data.

    Parameters
    ----------
    module_file : str
        The path to the module file (a .tar file).
    hostname : str
        The hostname of the target device on which to run.
    port : int, optional
        The port of the target device on which to run.
    rpc_key : str, optional
        The tracker key of the target device. If this is set, it
        will be assumed that remote points to a tracker.
    device: str, optional
        the device (e.g. "cpu" or "gpu") to be targeted by the RPC
        session, local or remote).
    inputs_file : str, optional
        Path to an .npz file containing the inputs.
    fill_mode : str, optional
        The fill-mode to use when generating data for input tensors.
        Valid options are "zeros", "ones" and "random".
        Defaults to "random".
    repeat : int, optional
        How many times to repeat the run.
    profile : bool
        Whether to profile the run with the debug runtime.

    Returns
    -------
    outputs : dict
        a dictionary with output tensors, generated by the module
    times : list of str
        execution times generated by the time evaluator
    """

    with tempfile.TemporaryDirectory() as tmp_dir:
        logger.debug("extracting module file %s", module_file)
        t = tarfile.open(module_file)
        t.extractall(tmp_dir)
        graph = open(os.path.join(tmp_dir, "mod.json")).read()
        params = bytearray(
            open(os.path.join(tmp_dir, "mod.params"), "rb").read())

        if hostname:
            # Remote RPC
            if rpc_key:
                logger.debug("running on remote RPC tracker with key %s",
                             rpc_key)
                session = request_remote(rpc_key, hostname, port, timeout=1000)
            else:
                logger.debug("running on remote RPC with no key")
                session = rpc.connect(hostname, port)
        else:
            # Local
            logger.debug("running a local session")
            session = rpc.LocalSession()

        session.upload(os.path.join(tmp_dir, "mod.so"))
        lib = session.load_module("mod.so")

        # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
        logger.debug("device is %s", device)
        if device == "gpu":
            ctx = session.gpu()
        elif device == "cl":
            ctx = session.cl()
        else:
            assert device == "cpu"
            ctx = session.cpu()

        if profile:
            logger.debug("creating runtime with profiling enabled")
            module = debug_runtime.create(graph, lib, ctx, dump_root="./prof")
        else:
            logger.debug("creating runtime with profiling disabled")
            module = runtime.create(graph, lib, ctx)

        logger.debug("load params into the runtime module")
        module.load_params(params)

        shape_dict, dtype_dict = get_input_info(graph, params)
        inputs_dict = make_inputs_dict(inputs_file, shape_dict, dtype_dict,
                                       fill_mode)

        logger.debug("setting inputs to the module")
        module.set_input(**inputs_dict)

        # Run must be called explicitly if profiling
        if profile:
            logger.debug("running the module with profiling enabled")
            module.run()

        # create the module time evaluator (returns a function)
        timer = module.module.time_evaluator("run", ctx, 1, repeat=repeat)
        # call the evaluator function to invoke the module and save execution times
        prof_result = timer()
        # collect a list of execution times from the profiling results
        times = prof_result.results

        logger.debug("collecting the output tensors")
        num_outputs = module.get_num_outputs()
        outputs = {}
        for i in range(num_outputs):
            output_name = "output_{}".format(i)
            outputs[output_name] = module.get_output(i).asnumpy()

        return outputs, times
Beispiel #25
0
                    params=params, target_host=env.target_host)
            else:
                with vta.build_config():
                    graph, lib, params = relay.build(
                        relay_prog, target=target,
                        params=params, target_host=env.target_host)

        # Export library
        temp = util.tempdir()
        lib.save(temp.relpath("graphlib.o"))
        remote.upload(temp.relpath("graphlib.o"))
        lib = remote.load_module("graphlib.o")

        # If detailed runtime info is needed build with debug runtime
        if opt.debug_profile:
            m = debug_runtime.create(graph, lib, ctx)
        else:
            m = graph_runtime.create(graph, lib, ctx)

        # Set the network parameters and synthetic input
        image = tvm.nd.array(
            (np.random.uniform(size=(1, 3, 224, 224))).astype('float32'))
        m.set_input(**params)
        m.set_input('data', image)

        # Perform inference
        timer = m.module.time_evaluator("run", ctx, number=4, repeat=opt.measurements)
        tcost = timer()
        prof_res = np.array(tcost.results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Beispiel #26
0
def run_tvm(data, symbol_file, num_inference_images, sym, devs, label_name):
    debug = False
    import tvm
    from tvm.contrib import graph_runtime
    from tvm.contrib.debugger import debug_runtime as debug_runtime

    base = './compiled/' + symbol_file.split('/')[-1].replace('.json', '')

    path_lib = base + '_deploy_lib.tar'
    path_graph = base + '_deploy_graph.json'
    path_params = base + '_deploy_params.params'

    graph = open(path_graph).read()
    lib = tvm.runtime.load_module(path_lib)
    params = bytearray(open(path_params, 'rb').read())

    if debug:
        rt_mod = debug_runtime.create(graph, lib, ctx=tvm.cpu(0))
        mod = mx.mod.Module(symbol=sym, context=devs)
        mod.bind(for_training=False, data_shapes=data.provide_data)
    else:
        rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
        mod = mx.mod.Module(symbol=sym,
                            context=devs,
                            label_names=[
                                label_name,
                            ])
        mod.bind(for_training=False,
                 data_shapes=data.provide_data,
                 label_shapes=data.provide_label)

    rt_mod.load_params(params)
    mod.set_params(arg_params, aux_params)

    counter = 0
    top_1_raw = 0
    top_5_raw = 0
    top_1_raw_mxnet = 0
    top_5_raw_mxnet = 0
    if debug:
        data = advance_data_iter(data, 0)
    for batch in data:
        # Get the original label.
        correct_label = int(batch.label[0].asnumpy()[0])

        rt_mod.set_input('data', batch.data[0].asnumpy())
        rt_mod.run()
        if debug:
            np.set_printoptions(suppress=False)
            for i in rt_mod.debug_datum.get_output_tensors().keys():
                print(i, rt_mod.debug_get_output(i))
            return
        tvm_res = rt_mod.get_output(0).asnumpy()

        mod.forward(batch, is_train=False)
        mxnet_res = mod.get_outputs()[0].asnumpy()

        if debug:
            print("######## MxNet ###########")
            print(mxnet_res[0][0])
            print("######## TVM ###########")
            print(tvm_res[0][0])
            print("############################")
            print("############################")
            print("############################")
            print("############################")
            print("############################")
            print("############################")
            print("############################")
            print("############################")
            print("############################")
            print("######## MxNet ###########")
            print(mxnet_res)
            print("######## TVM ###########")
            print(tvm_res)
            #print("######## Diff ###########")
            # it = np.nditer(mxnet_res, flags=['multi_index'])
            # while not it.finished:
            #     print("%d <%s>" % (it[0], it.multi_index), end='\n')
            #     it.iternext()
            np.testing.assert_allclose(mxnet_res.astype('int32'),
                                       tvm_res.astype('int32'),
                                       atol=0,
                                       verbose=True)
            try:
                np.testing.assert_allclose(mxnet_res.astype('int32'),
                                           tvm_res.astype('int32'),
                                           atol=0,
                                           verbose=True)
            except:
                np.testing.assert_allclose(mxnet_res.astype('int32'),
                                           tvm_res.astype('int32'),
                                           atol=1,
                                           verbose=True)
        else:
            tvm_pred = np.squeeze(tvm_res).argsort()[-5:][::-1]
            mxnet_pred = np.squeeze(mxnet_res).argsort()[-5:][::-1]

            if correct_label == tvm_pred[0]:
                top_1_raw += 1
                top_5_raw += 1
            elif correct_label in tvm_pred:
                top_5_raw += 1

            if correct_label == mxnet_pred[0]:
                top_1_raw_mxnet += 1
                top_5_raw_mxnet += 1
            elif correct_label in mxnet_pred:
                top_5_raw_mxnet += 1

        counter += 1
        if counter == num_inference_images:
            break

    model_name = symbol_file.split('/')[-1].replace('.json', '')
    top_1 = float(top_1_raw_mxnet) / float(counter)
    top_5 = float(top_5_raw_mxnet) / float(counter)
    print("Mxnet", model_name, top_1, top_5, sep='\t')

    top_1 = float(top_1_raw) / float(counter)
    top_5 = float(top_5_raw) / float(counter)
    print("Tvm", model_name, top_1, top_5, sep='\t')