Ejemplo n.º 1
0
def test_compile_injective_with_tuple():
    x = relay.var("x", shape=(2, 3))
    y = relay.var("y", shape=(2, 3))
    x_transpose = relay.transpose(x)
    output = relay.Tuple([x_transpose, y])
    func = relay.Function([x, y], output)
    relay.build(func, 'llvm')
Ejemplo n.º 2
0
def test_tuple_intermediate():
    def before(x):
        inj = relay.squeeze(x)
        y1 = relay.add(inj, relay.const(1, "float32"))
        tmp = relay.squeeze(inj)
        tmp = relay.add(tmp, relay.const(1, "float32"))
        y2 = relay.add(tmp, relay.const(1, "float32"))
        y3 = relay.add(inj, relay.const(1, "float32"))
        concat = relay.concatenate((y1, y2, y3), axis=1)
        out_inj = relay.squeeze(concat)
        out = relay.add(out_inj, relay.const(1, "float32"))
        return relay.Function(relay.ir_pass.free_vars(out), out)

    def expected(p0):
        f0 = before(p0)
        x = relay.var("x", shape=dshape)
        y = relay.Call(f0, [x])
        return relay.Function([x], y)

    dshape = (1, 16, 64, 64)
    x = relay.var("x", shape=dshape)
    z = before(x)
    z = relay.ir_pass.infer_type(z)
    zz = relay.ir_pass.fuse_ops(z, opt_level=0)
    assert not relay.ir_pass.free_vars(zz)
    zz = relay.ir_pass.fuse_ops(z, opt_level=2)
    relay.build(zz, 'llvm')
    zz = relay.ir_pass.infer_type(zz)
    assert not relay.ir_pass.free_vars(zz)
    after = relay.ir_pass.infer_type(expected(x))
    assert relay.ir_pass.alpha_equal(zz, after)
Ejemplo n.º 3
0
def test_tuple_consecutive():
    def gen_intermediate_tuple(x):
        y1 = relay.add(x, relay.const(1, "float32"))
        y2 = relay.add(x, relay.const(1, "float32"))
        y3 = relay.add(x, relay.const(1, "float32"))
        concat = relay.concatenate((y1, y2, y3), axis=1)
        out = relay.add(concat, relay.const(1, "float32"))
        return out

    def gen_consecutive_tuple(x):
        y1 = gen_intermediate_tuple(x)
        y2 = gen_intermediate_tuple(x)
        y3 = gen_intermediate_tuple(x)
        concat = relay.concatenate((y1, y2, y3), axis=1)
        return concat

    def before(x):
        concat = gen_consecutive_tuple(x)
        pooled = relay.nn.max_pool2d(concat, pool_size=(2, 2), strides=(2, 2), padding=(0, 0))
        out = relay.add(pooled, relay.const(1, "float32"))
        out2 = relay.add(out, relay.const(1, "float32"))
        out_tup = relay.Tuple((out, out2))
        return relay.Function(relay.ir_pass.free_vars(out_tup), out_tup)

    def expected(dshape):
        p0 = relay.var("p0", shape=dshape)
        concat = gen_consecutive_tuple(p0)
        f0 = relay.Function([p0], concat)

        p01 = relay.var("p01", shape=(1, dshape[1]*9, dshape[2], dshape[3]))
        pooled = relay.nn.max_pool2d(p01, pool_size=(2, 2), strides=(2, 2), padding=(0, 0))
        out = relay.add(pooled, relay.const(1, "float32"))
        f1 = relay.Function([p01], out)

        p02 = relay.var("p02", shape=(1, dshape[1]*9, dshape[2]//2, dshape[3]//2))
        out = relay.add(p02, relay.const(1, "float32"))
        f2 = relay.Function([p02], out)

        x = relay.var("x", shape=dshape)
        y = relay.Call(f0, [x])
        z = relay.Call(f1, [y])
        z2 = relay.Call(f2, [z])

        return relay.Function([x], relay.Tuple((z, z2)))

    dshape = (1, 16, 64, 64)
    x = relay.var("x", shape=dshape)
    z = before(x)
    z = relay.ir_pass.infer_type(z)
    zz = relay.ir_pass.fuse_ops(z, opt_level=0)
    assert not relay.ir_pass.free_vars(zz)
    zz = relay.ir_pass.fuse_ops(z, opt_level=2)
    relay.build(zz, 'llvm')
    zz = relay.ir_pass.infer_type(zz)
    assert not relay.ir_pass.free_vars(zz)
    after = relay.ir_pass.infer_type(expected(dshape))
    assert relay.ir_pass.alpha_equal(zz, after)
Ejemplo n.º 4
0
def test_gru_like():
    def unit(rnn_dim):
        X = relay.var("X", shape=(1, rnn_dim))
        W = relay.var("y", shape=(3 * rnn_dim, rnn_dim))
        matmul = relay.nn.dense(X, W)
        splitted = relay.split(matmul, indices_or_sections=3, axis=1)
        out = relay.sigmoid(splitted[0]) + relay.tanh(splitted[1]) * relay.exp(splitted[2])
        return relay.Function([X, W], out)

    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    def unit_numpy(X, W):
        prod = np.dot(X, W.transpose())
        splits = np.split(prod, indices_or_sections=3, axis=1)
        return sigmoid(splits[0]) + np.tanh(splits[1]) * np.exp(splits[2])

    dtype = "float32"
    rnn_dim = 1000
    x = np.random.rand(1, rnn_dim).astype(dtype)
    y = np.random.rand(3*rnn_dim, rnn_dim).astype(dtype) * 0.01 - 0.005
    out_shape = (1, rnn_dim)
    z = unit(rnn_dim)

    for target, ctx in ctx_list():
        with relay.build_config(opt_level=2):
            graph, lib, params = relay.build(z, target)
            m = graph_runtime.create(graph, lib, ctx)
            m.set_input("X", tvm.nd.array(x.astype(dtype)))
            m.set_input("y", tvm.nd.array(y.astype(dtype)))
            m.set_input(**params)
            m.run()
            out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
            ref = unit_numpy(x, y)
            tvm.testing.assert_allclose(out, ref, rtol=1e-5, atol=1e-5)
Ejemplo n.º 5
0
def test_compile_placeholder_bypass():
    engine = relay.backend.compile_engine.get()
    x = relay.var("x", shape=(2, 3))
    y = relay.var("y", shape=(2, 3))
    z = relay.var("z", shape=(2, 3))
    result = relay.Tuple([x, relay.op.concatenate([y, z], axis=0)])
    func = relay.Function(relay.ir_pass.free_vars(result), result)
    with relay.build_config(opt_level=0):
       graph, lib, params = relay.build(func, 'llvm')
Ejemplo n.º 6
0
 def get_tvm_output(xs, target, ctx, dtype='float32'):
     shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, xs)}
     func, params = relay.frontend.from_keras(keras_model, shape_dict)
     with relay.build_module.build_config(opt_level=2):
         graph, lib, params = relay.build(func, target, params=params)
     m = graph_runtime.create(graph, lib, ctx)
     for name, x in zip(keras_model.input_names, xs):
         m.set_input(name, tvm.nd.array(x.astype(dtype)))
     m.set_input(**params)
     m.run()
     return [m.get_output(i).asnumpy() for i in range(m.get_num_outputs())]
Ejemplo n.º 7
0
def get_tvm_output(func, x, params, target, ctx,
                   out_shape=(1, 1000), input_name='image', dtype='float32'):
    with relay.build_module.build_config(opt_level=3):
        graph, lib, params = relay.build(func, target, params=params)
    m = graph_runtime.create(graph, lib, ctx)
    # set inputs
    m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
    m.set_input(**params)
    m.run()
    # get outputs
    out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
    return out.asnumpy()
Ejemplo n.º 8
0
def test_with_params():
    x = relay.var('x', shape=(10, 5))
    y = relay.var('y', shape=(1, 5))
    z = relay.add(x, y)
    z = relay.exp(z)
    func = relay.Function([x, y], z)
    x_data = np.random.rand(10, 5).astype('float32')
    y_data = np.random.rand(1, 5).astype('float32')
    params = {"y": y_data}
    graph, lib, params = relay.build(func, "llvm", params=params)
    mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
    mod.set_input(**params)
    mod.set_input(x=x_data)
    mod.run()
    res = mod.get_output(0).asnumpy()
    ref_res = np.exp(y_data + x_data)
    tvm.testing.assert_allclose(res, ref_res)
Ejemplo n.º 9
0
def run_tvm_graph(tflite_model_buf, input_data, input_node, num_output=1, target='llvm',
                  out_names=None):
    """ Generic function to compile on relay and execute on tvm """
    try:
        import tflite.Model
    except ImportError:
        raise ImportError("The tflite package must be installed")

    # get TFLite model from buffer
    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)

    input_data = convert_to_list(input_data)
    input_node = convert_to_list(input_node)

    shape_dict = {}
    dtype_dict = {}
    for i, e in enumerate(input_node):
        shape_dict[e] = input_data[i].shape
        dtype_dict[e] = input_data[i].dtype.name

    func, params = relay.frontend.from_tflite(tflite_model,
                                              shape_dict=shape_dict,
                                              dtype_dict=dtype_dict)
    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(func, target, params=params)

    ctx = tvm.context(target, 0)
    from tvm.contrib import graph_runtime
    m = graph_runtime.create(graph, lib, ctx)
    # set inputs
    for i, e in enumerate(input_node):
        m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))

    m.set_input(**params)
    # execute
    m.run()
    # get outputs
    assert out_names is None or num_output == len(out_names), "out_names: {} num_output: {}".format(
        out_names, num_output)
    tvm_output_list = []
    for i in range(0, num_output):
        tvm_output = m.get_output(i)
        tvm_output_list.append(tvm_output.asnumpy())
    return tvm_output_list
Ejemplo n.º 10
0
 def get_tvm_output(symbol, x, args, auxs, target, ctx, dtype='float32'):
     shape_dict = {"data": x.shape}
     if gluon_impl:
         new_sym, params = relay.frontend.from_mxnet(symbol, shape_dict)
     else:
         new_sym, params = relay.frontend.from_mxnet(symbol,
                                                     shape_dict,
                                                     arg_params=args,
                                                     aux_params=auxs)
     with relay.build_config(opt_level=3):
         graph, lib, params = relay.build(new_sym, target, params=params)
     m = graph_runtime.create(graph, lib, ctx)
     # set inputs
     m.set_input("data", tvm.nd.array(x.astype(dtype)))
     m.set_input(**params)
     m.run()
     # get outputs
     out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
     return out.asnumpy()
Ejemplo n.º 11
0
def get_tvm_output(graph_def, input_data, target, ctx, output_shape=None, output_dtype='float32'):
    """ Generic function to execute and get tvm output"""
    target = 'llvm'
    if isinstance(input_data, list):
        input_names = {}
        shape_dict = {}
        dtype_dict = {}
        for i, _ in enumerate(input_data):
            input_names[i] = graph_def.graph.input[i].name
            shape_dict[input_names[i]] = input_data[i].shape
            dtype_dict[input_names[i]] = input_data[i].dtype
    else:
        input_names = graph_def.graph.input[0].name
        shape_dict = {input_names: input_data.shape}
        dtype_dict = {input_names: input_data.dtype}

    sym, params = relay.frontend.from_onnx(graph_def, shape_dict)
    with relay.build_config(opt_level=1):
        graph, lib, params = relay.build(sym, target, params=params)

    ctx = tvm.cpu(0)
    from tvm.contrib import graph_runtime
    m = graph_runtime.create(graph, lib, ctx)
    # set inputs
    if isinstance(input_data, list):
        for i, e in enumerate(input_names):
            m.set_input(input_names[i], tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
    else:
        m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))

    m.set_input(**params)
    # execute
    m.run()
    # get outputs
    if isinstance(output_shape, list) and isinstance(output_dtype, list):
        tvm_output_list = []
        for i, _ in enumerate(output_shape):
            tvm_output = m.get_output(i)
            tvm_output_list.append(tvm_output.asnumpy())
        return tvm_output_list
    else:
        tvm_output = m.get_output(0)
        return tvm_output.asnumpy()
Ejemplo n.º 12
0
    def get_tvm_output(net, data, params, target, ctx, dtype='float32'):
        with relay.build_config(opt_level=1):
            graph, lib, params = relay.build(net, target, params=params)

        m = graph_runtime.create(graph, lib, ctx)
        # set inputs
        m.set_input("data", data)
        m.set_input(**params)
        m.run()
        out = m.get_output(0, tvm.nd.empty(out_shape, dtype))

        if measure:
            print("Evaluate graph runtime inference time cost...")
            ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=20)
            # Measure in millisecond.
            prof_res = np.array(ftimer().results) * 1000
            print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
                  (np.mean(prof_res), np.std(prof_res)))

        return out.asnumpy()
Ejemplo n.º 13
0
 def test_runtime(target, device, func, fallback_device=None,
                  expected_index=None):
     params = {"x": x_data, "y": y_data}
     config = {"opt_level": 1}
     if fallback_device:
         config["fallback_device"] = fallback_device
     with relay.build_config(**config):
         graph, lib, params = relay.build(
             func,
             target,
             params=params)
         contexts = [tvm.cpu(0), tvm.context(device)]
         graph_json = json.loads(graph)
         if "device_index" in graph_json["attrs"]:
             device_index = graph_json["attrs"]["device_index"][1]
             assert device_index == expected_index
         mod = graph_runtime.create(graph, lib, contexts)
         mod.set_input(**params)
         mod.run()
         res = mod.get_output(0).asnumpy()
         tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
Ejemplo n.º 14
0
    def verify_graph_runtime(remote, target, shape, dtype):
        x = relay.var('x')
        y = relay.const(1)
        z = relay.add(x, y)
        func = relay.Function([x], z)

        x_in = np.ones(shape).astype(dtype)
        params = {'x': x_in}
        graph, lib, params = relay.build(func, target=target, params=params)

        temp = util.tempdir()
        path_dso = temp.relpath("dev_lib.o")
        lib.save(path_dso)
        remote.upload(path_dso)
        lib = remote.load_module("dev_lib.o")
        ctx = remote.cpu(0)
        mod = graph_runtime.create(graph, lib, ctx)
        mod.load_params(relay.save_param_dict(params))
        mod.run()
        out = mod.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx))
        tvm.testing.assert_allclose(x_in + 1, out.asnumpy())
Ejemplo n.º 15
0
def run_tvm_graph(coreml_model, target, ctx, input_data, input_name, output_shape, output_dtype='float32'):
    """ Generic function to compile on relay and execute on tvm """
    if isinstance(input_data, list):
        shape_dict = {}
        dtype_dict = {}
        for i, e in enumerate(input_name):
            shape_dict[e] = input_data[i].shape
            dtype_dict[e] = input_data[i].dtype
    else:
        shape_dict = {input_name: input_data.shape}
        dtype_dict = {input_name: input_data.dtype}

    func, params = relay.frontend.from_coreml(coreml_model, shape_dict)
    with relay.build_module.build_config(opt_level=3):
        graph, lib, params = relay.build(func, target, params=params)

    from tvm.contrib import graph_runtime
    m = graph_runtime.create(graph, lib, ctx)
    # set inputs
    if isinstance(input_data, list):
        for i, e in enumerate(input_name):
            m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
    else:
        m.set_input(input_name, tvm.nd.array(input_data.astype(input_data.dtype)))

    m.set_input(**params)
    # execute
    m.run()
    # get outputs
    if isinstance(output_shape, list) and isinstance(output_dtype, list):
        tvm_output_list = []
        for i, s in enumerate(output_shape):
            tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
            tvm_output_list.append(tvm_output.asnumpy())
        return tvm_output_list
    else:
        tvm_output = m.get_output(0, tvm.nd.empty((output_shape), output_dtype))
        return tvm_output.asnumpy()
Ejemplo n.º 16
0
def test_tuple_consecutive():
    def gen_intermediate_tuple(x):
        y1 = relay.add(x, relay.const(1, "float32"))
        y2 = relay.add(x, relay.const(1, "float32"))
        y3 = relay.add(x, relay.const(1, "float32"))
        concat = relay.concatenate((y1, y2, y3), axis=1)
        out = relay.add(concat, relay.const(1, "float32"))
        return out

    def gen_consecutive_tuple(x):
        y1 = gen_intermediate_tuple(x)
        y2 = gen_intermediate_tuple(x)
        y3 = gen_intermediate_tuple(x)
        concat = relay.concatenate((y1, y2, y3), axis=1)
        return concat

    def before(x):
        concat = gen_consecutive_tuple(x)
        pooled = relay.nn.max_pool2d(concat,
                                     pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding=(0, 0))
        out = relay.add(pooled, relay.const(1, "float32"))
        out2 = relay.add(out, relay.const(1, "float32"))
        out_tup = relay.Tuple((out, out2))
        return relay.Function(relay.analysis.free_vars(out_tup), out_tup)

    def expected(dshape):
        p0 = relay.var("p0", shape=dshape)
        concat = gen_consecutive_tuple(p0)
        f0 = relay.Function([p0], concat)
        f0 = f0.with_attr("Primitive", tvm.tir.IntImm("int32", 1))

        p01 = relay.var("p01", shape=(1, dshape[1] * 9, dshape[2], dshape[3]))
        pooled = relay.nn.max_pool2d(p01,
                                     pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding=(0, 0))
        out = relay.add(pooled, relay.const(1, "float32"))
        f1 = relay.Function([p01], out)
        f1 = f1.with_attr("Primitive", tvm.tir.IntImm("int32", 1))

        p02 = relay.var("p02",
                        shape=(1, dshape[1] * 9, dshape[2] // 2,
                               dshape[3] // 2))
        out = relay.add(p02, relay.const(1, "float32"))
        f2 = relay.Function([p02], out)
        f2 = f2.with_attr("Primitive", tvm.tir.IntImm("int32", 1))

        x = relay.var("x", shape=dshape)
        y = relay.Call(f0, [x])
        z = relay.Call(f1, [y])
        z2 = relay.Call(f2, [z])

        return relay.Function([x], relay.Tuple((z, z2)))

    dshape = (1, 16, 64, 64)
    x = relay.var("x", shape=dshape)
    orig = before(x)
    fuse0(tvm.IRModule.from_expr(orig))
    m = fuse2(tvm.IRModule.from_expr(orig))
    relay.build(m, "llvm")
    after = run_opt_pass(expected(dshape), transform.InferType())
    assert tvm.ir.structural_equal(m["main"], after)
Ejemplo n.º 17
0
def test_meta_schedule_te2primfunc_argument_order():
    @derived_object
    class TestDummyDatabase(PyDatabase):
        def __init__(self):
            super().__init__()
            self.records = []
            self.workload_reg = []

        def has_workload(self, mod: IRModule) -> Workload:
            for workload in self.workload_reg:
                if tvm.ir.structural_equal(workload.mod, mod):
                    return True
            # The database has already put in all correct workloads
            raise ValueError(
                "The workload searched for is not in given database!"
                + " Incorrect TIR was generated from TE subgraph."
            )

        def commit_tuning_record(self, record: TuningRecord) -> None:
            self.records.append(record)

        def commit_workload(self, mod: IRModule) -> Workload:
            for workload in self.workload_reg:
                if tvm.ir.structural_equal(workload.mod, mod):
                    return workload
            workload = Workload(mod)
            self.workload_reg.append(workload)
            return workload

        def get_top_k(self, workload: Workload, top_k: int) -> List[TuningRecord]:
            return list(
                filter(
                    lambda x: x.workload == workload,
                    sorted(self.records, key=lambda x: sum(x.run_secs) / len(x.run_secs)),
                )
            )[: int(top_k)]

        def __len__(self) -> int:
            return len(self.records)

        def print_results(self) -> None:
            print("\n".join([str(r) for r in self.records]))

    data_shape = (1, 3, 16, 16)
    weight_shape = (8, 3, 5, 5)
    data = relay.var("data", relay.TensorType(data_shape, "float32"))
    weight = relay.var("weight", relay.TensorType(weight_shape, "float32"))
    y = relay.nn.conv2d(
        data,
        weight,
        padding=(2, 2),
        kernel_size=(5, 5),
        kernel_layout="OIHW",
        out_dtype="float32",
    )
    f = relay.Function([data, weight], y)
    mod = tvm.IRModule.from_expr(f)
    mod = relay.transform.InferType()(mod)

    data_sample = np.random.rand(*data_shape).astype("float32")
    weight_sample = np.random.rand(*weight_shape).astype("float32")
    params = {mod["main"].params[1].name_hint: weight_sample}

    input_name = "data"
    dev = tvm.cpu()
    target = Target("llvm --num-cores=16")
    data = tvm.nd.array(data_sample, dev)

    database = TestDummyDatabase()
    database.commit_workload(tvmgen_default_fused_layout_transform)
    database.commit_workload(tvmgen_default_fused_layout_transform_1)
    database.commit_workload(tvmgen_default_fused_nn_contrib_conv2d_NCHWc)

    with ApplyHistoryBest(database):
        with tvm.transform.PassContext(
            opt_level=3,
            config={"relay.backend.use_meta_schedule": True},
        ):
            rt_mod1 = relay.build(mod, target=target, params=params)

    # Compile without meta-scheduler for correctness check
    with tvm.transform.PassContext(opt_level=0):
        rt_mod2 = relay.build(mod, target=target, params=params)

    def get_output(data, lib):
        module = graph_executor.GraphModule(lib["default"](dev))
        module.set_input(input_name, data)
        module.run()
        return module.get_output(0).numpy()

    # Check correctness
    actual_output = get_output(data, rt_mod1)
    expected_output = get_output(data, rt_mod2)
    assert np.allclose(actual_output, expected_output, rtol=1e-4, atol=2e-4)
Ejemplo n.º 18
0
def compile(graph: Graph, batch_size, target, target_host):
    relay_module, params = graph2relay(graph, batch_size)
    with relay.build_config(opt_level=3):
        graph_json, tvm_module, params = relay.build(relay_module, target=target, target_host=target_host, params=params)
    return graph_json, tvm_module, params
Ejemplo n.º 19
0
# Output numerical difference < 10e-4 %.
#
# DGL version: https://github.com/dmlc/dgl/blob/master/examples/mxnet/gcn/gcn.py
from tvm.contrib import graph_runtime
import time

# Set up weights. You can modify this part and use your own trained weights.
params['in_weight'] = np.ones((input_dim, hidden_dim), dtype='float32')
params['out_weight'] = np.ones((hidden_dim, num_classes), dtype='float32')
for i in range(num_hidden):
    params["%s_weight" % (str(i))] = np.ones((hidden_dim, hidden_dim),
                                             dtype='float32')

# Generate graph and library
with relay.build_config(opt_level=0):  # Currently only support opt_level=0
    graph, lib, params = relay.build(func, target, params=params)
    lib.save("lib.o")

# Generate module for llvm
ctx = tvm.context(target, 0)
m = graph_runtime.create(graph, lib, ctx)
m.set_input(**params)

print("finished compiling, testing inference time cost")
totaltime = 0
for i in range(30):
    st = time.time()
    # One forward pass on the entire network
    m.run()
    end = time.time()
    # Retrieve output Tensor as numpy array
Ejemplo n.º 20
0
def build_run_compare(tvm_mod,
                      params1,
                      input_shape,
                      dtype="float32",
                      target="llvm",
                      gpu_preprocess=None):

    if "TVM_TRACKER_HOST" in os.environ and "TVM_TRACKER_PORT" in os.environ:
        rpc_tracker_host = os.environ["TVM_TRACKER_HOST"]
        rpc_tracker_port = os.environ["TVM_TRACKER_PORT"]
        run_on_host = 0
        target_host = "llvm -mtriple=arm64-linux-android"
        rpc_tracker_port = int(rpc_tracker_port)
    else:
        run_on_host = 1
        target_host = "llvm"

    if gpu_preprocess:
        tvm_mod_nchwc = gpu_preprocess(tvm_mod)
    else:
        tvm_mod_nchwc = tvm_mod

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(tvm_mod_nchwc,
                                         target_host=target_host,
                                         target=target,
                                         params=params1)
    if run_on_host:
        ctx = tvm.opencl()
        m = graph_runtime.create(graph, lib, ctx)
    else:
        from tvm import rpc
        from tvm.contrib import utils, ndk

        rpc_key = "android"
        tracker = rpc.connect_tracker(rpc_tracker_host, rpc_tracker_port)
        remote = tracker.request(rpc_key, priority=0, session_timeout=600)
        temp = utils.tempdir()
        dso_binary = "dev_lib_cl.so"
        dso_binary_path = temp.relpath(dso_binary)
        ctx = remote.cl(0)
        lib.export_library(dso_binary_path, ndk.create_shared)
        remote.upload(dso_binary_path)
        rlib = remote.load_module(dso_binary)
        m = graph_runtime.create(graph, rlib, ctx)
    m.set_input(**params)
    inputs = []
    if isinstance(input_shape, dict):
        for key in input_shape:
            inputs.append(
                np.random.normal(size=input_shape[key]).astype(dtype))
            m.set_input(key, inputs[-1])
    else:
        inputs.append(np.random.normal(size=input_shape).astype(dtype))
        m.set_input("data", inputs[-1])
    m.run()

    ref_outputs = get_cpu_reference(tvm_mod, params1, input_shape, inputs)
    for i, ref_output in enumerate(ref_outputs):
        tvm_output = m.get_output(i)
        output = tvm_output.asnumpy()
        # for index, x in np.ndenumerate(ref_output):
        #     if abs(output[index] - x) > 0.01:
        #         print(index, output[index], x)

        np.testing.assert_allclose(output, ref_output, rtol=1e-1, atol=1e-1)
Ejemplo n.º 21
0
])
with tvm.transform.PassContext(opt_level=3):
    mod = seq(mod)

tvm_target = get_tvm_target(device, get_device_type(), get_device_arch(),
                            get_device_attributes())

tvm_targets = tvm.target.Target(tvm_target)
cpu_target = "llvm"
target_host = cpu_target

cpudevice = tvm.runtime.cpu()

with tvm.transform.PassContext(opt_level=3):
    graph_mod = relay.build(mod,
                            tvm_targets,
                            params=params,
                            target_host=target_host)

lib = graph_mod.get_lib()
params = graph_mod.get_params()

# Create a runtime executor module
module = graph_executor.GraphModule(graph_mod["default"](cpudevice))

# Feed input data
module.set_input(input_tensor, tvm.nd.array(image_data))

# Feed related params
module.set_input(**params)

ftimer = module.module.time_evaluator("run", cpudevice, number=1, repeat=10)
Ejemplo n.º 22
0
                env.BATCH,
                env.BLOCK_OUT,
                env.WGT_WIDTH,
                start_name=pack_dict[model][0],
                stop_name=pack_dict[model][1],
                device_annot=(env.TARGET == "intelfocl"),
            )
    else:
        relay_prog = mod["main"]

    # Compile Relay program with AlterOpLayout disabled
    if target.device_name != "vta":
        with tvm.transform.PassContext(opt_level=3,
                                       disabled_pass={"AlterOpLayout"}):
            graph, lib, params = relay.build(relay_prog,
                                             target=target,
                                             params=params,
                                             target_host=env.target_host)
    else:
        if env.TARGET == "intelfocl":
            # multiple targets to run both on cpu and vta
            target = {"cpu": env.target_vta_cpu, "ext_dev": target}
        with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            graph, lib, params = relay.build(relay_prog,
                                             target=target,
                                             params=params,
                                             target_host=env.target_host)

    # Measure Relay build time
    build_time = time.time() - build_start
    print(model + " inference graph built in {0:.2f}s!".format(build_time))
Ejemplo n.º 23
0
def get_ref_rt_mod(mod, params, target="cuda"):
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod, target=target, params=params)
    dev = tvm.device(target, 0)
    rt_mod = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
    return rt_mod, dev
Ejemplo n.º 24
0
if local_demo:
    target_host = None
    target = 'llvm'
elif test_target == 'opencl':
    target_host = target
    target = 'opencl'
elif test_target == 'vulkan':
    target_host = target
    target = 'vulkan'

input_name = 'input_1'
shape_dict = {input_name: x.shape}
func, params = relay.frontend.from_keras(keras_mobilenet_v2, shape_dict)

with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(func, target=target,
                                     target_host=target_host, params=params)

# After `relay.build`, you will get three return values: graph,
# library and the new parameter, since we do some optimization that will
# change the parameters but keep the result of model as the same.

# Save the library at local temporary directory.
tmp = util.tempdir()
lib_fname = tmp.relpath('net.so')
fcompile = ndk.create_shared if not local_demo else None
lib.export_library(lib_fname, fcompile)

######################################################################
# Deploy the Model Remotely by RPC
# ---------------------------------------------
# With RPC, you can deploy the model remotely from your host machine
Ejemplo n.º 25
0
def manual_tir_common(do_tune=False):
    M, N, K = 1024, 1024, 1024  # pylint: disable=invalid-name
    data_shape = (M, K)
    weight_shape = (N, K)

    data_dtype = "uint8"
    data = relay.var("data", shape=data_shape, dtype=data_dtype)
    weight = relay.var("weight", shape=weight_shape, dtype="int8")
    bias = relay.var("bias", shape=(weight_shape[0],), dtype="int32")

    # dense is tuned by the TIR schedule above, bmm is scheduled by TE (topi/x86/batch_matmul.py)
    dense = relay.nn.dense(data, weight, out_dtype="int32")
    bias_add = relay.nn.bias_add(dense, bias) + relay.const(1, dtype="int32")
    out = relay.nn.batch_matmul(
        relay.cast(relay.expand_dims(bias_add, 0), "uint8"),
        relay.cast(relay.expand_dims(bias_add, 0), "int8"),
        out_dtype="int32",
    )

    relay_mod = tvm.IRModule.from_expr(out)

    target = "llvm -mcpu=cascadelake -num-cores 4"
    dev = tvm.device(target, 0)

    data = np.random.uniform(1, 10, size=(M, K)).astype("uint8")
    weight_np = np.random.uniform(1, 10, size=weight_shape).astype("int8")
    bias_np = np.random.uniform(1, 10, size=(weight_shape[0],)).astype("int32")

    ref = (
        relay.create_executor("vm", mod=relay_mod, device=dev, target=target)
        .evaluate()(*[data, weight_np, bias_np])
        .numpy()
    )

    params = {"weight": weight_np, "bias": bias_np}

    if do_tune:
        extracted_tasks = extract_task_from_relay(relay_mod, target, params)

        # Filter out tasks that we don't intend to schedule / tune with TIR.
        tune_tasks = list(
            filter(
                lambda task: "dense" in task.task_name,
                extracted_tasks,
            )
        )
        config = TuneConfig(
            strategy="replay_trace",
            num_trials_per_iter=64,
            max_trials_per_task=20000,
            max_trials_global=20000,
        )

        with tempfile.TemporaryDirectory() as work_dir:
            # postprocs=lambda: [] is important to prevent default post processors from
            # tampering with the manual schedule.
            database = tune_extracted_tasks(
                tune_tasks,
                config,
                work_dir=work_dir,
                postprocs=lambda: [],
            )
    else:

        def schedule_fn(task, sch):
            if "dense" not in task.task_name:
                return False

            block = sch.get_block("compute")

            # Looks up schedule_rule annotation.
            # See the comment in test_tune_relay_manual_tir_vnni().
            schedule_rule = sch.get(block).annotations["schedule_rule"]

            assert "dense_vnni" in schedule_rule

            schedule_dense(block, M, False, sch)

            return True

        database = apply_fixed_schedules(relay_mod, target, params, schedule_fn)

    with ApplyHistoryBest(database):
        with tvm.transform.PassContext(
            opt_level=3,
            config={"relay.backend.use_meta_schedule": True},
        ):
            # pylint: disable=W0105
            """
            The log should say
            Warning: Cannot find workload: tvmgen_default_fused_expand_dims
            Warning: Cannot find workload: tvmgen_default_fused_cast
            Warning: Cannot find workload: tvmgen_default_fused_cast_1
            Warning: Cannot find workload: tvmgen_default_fused_nn_batch_matmul

            This means batch matmul and others are scheduled by TE, and dense (the one not warned)
            is found in the meta schedule tuning database during ApplyHistoryBest
            """
            # pylint: enable=W0105
            lib = relay.build(relay_mod, target=target, params=params)

    runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))

    runtime.set_input("data", data)
    runtime.run()

    out = runtime.get_output(0).numpy()

    np.testing.assert_equal(out, ref)
    def compile_model(self):
        if device == 'vta':
            self.remote = rpc.connect(self.pynq_addr, 9091)
            vta.reconfig_runtime(self.remote)
            vta.program_fpga(self.remote, bitstream=None)
        else:
            self.remote = rpc.LocalSession()

        self.ctx = self.remote.ext_dev(
            0) if device == 'vta' else self.remote.cpu(0)

        # Load pre-configured AutoTVM schedules
        with autotvm.tophub.context(target):

            # Populate the shape and data type dictionary for ResNet input
            dtype_dict = {'data': 'float32'}
            shape_dict = {'data': (env.BATCH, 3, 224, 224)}

            gluon_model = vision.resnet18_v1(
                pretrained=True, ctx=ctx
            ).features if args.nonsplit else splitnet.resnet18_v1_split(
                self.id + 1)

            # Measure build start time
            build_start = time.time()

            # Start front end compilation
            mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)

            # Update shape and type dictionary
            shape_dict.update({k: v.shape for k, v in params.items()})
            dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

            # Perform quantization in Relay
            with relay.quantize.qconfig(global_scale=8.0,
                                        skip_conv_layers=[0]):
                relay_prog = relay.quantize.quantize(mod['main'],
                                                     params=params)

            # Perform graph packing and constant folding for VTA target
            if target.device_name == 'vta':
                assert env.BLOCK_IN == env.BLOCK_OUT
                relay_prog = graph_pack(relay_prog,
                                        env.BATCH,
                                        env.BLOCK_OUT,
                                        env.WGT_WIDTH,
                                        start_name=start_pack,
                                        stop_name=stop_pack)

            # Compile Relay program with AlterOpLayout disabled
            with relay.build_config(opt_level=3,
                                    disabled_pass={'AlterOpLayout'}):
                if target.device_name != 'vta':
                    graph, lib, params = relay.build(
                        relay_prog,
                        target=target,
                        params=params,
                        target_host=env.target_host)
                else:
                    with vta.build_config():
                        graph, lib, params = relay.build(
                            relay_prog,
                            target=target,
                            params=params,
                            target_host=env.target_host)

            self.params = params

            # Measure Relay build time
            build_time = time.time() - build_start
            print(f'inference graph for thread {self.id} built in {0:.4f}s!'.
                  format(build_time))

            # Send the inference library over to the remote RPC server
            temp = util.tempdir()
            lib.save(temp.relpath('graphlib.o'))
            self.remote.upload(temp.relpath('graphlib.o'))
            lib = self.remote.load_module('graphlib.o')

            # Graph runtime
            self.m = graph_runtime.create(graph, lib, self.ctx)
Ejemplo n.º 27
0
def compile_tvm_graph_runtime(model, model_name, layout, compute_layout,
                              batch_size, seq_length, dtype, instance_type):
    key = (model_name, layout, compute_layout, batch_size, seq_length, dtype, instance_type)
    if key in _TVM_RT_CACHE:
        return _TVM_RT_CACHE[key]
    flags = get_ec2_tvm_flags()[instance_type]
    tvm = try_import_tvm()
    from tvm import relay
    from tvm.contrib import graph_runtime
    token_ids_shape = (batch_size, seq_length) if layout == 'NT' else (seq_length, batch_size)
    valid_length_shape = (batch_size,)
    if 'bart' in model_name:
        shape_dict = {
            'data0': token_ids_shape,
            'data1': valid_length_shape,
            'data2': token_ids_shape,
            'data3': valid_length_shape,
        }
        dtype_dict = {
            'data0': 'int32',
            'data1': 'int32',
            'data2': 'int32',
            'data3': 'int32',
        }
    elif 'roberta' in model_name or 'xlmr' in model_name:
        shape_dict = {
            'data0': token_ids_shape,
            'data1': valid_length_shape,
        }
        dtype_dict = {
            'data0': 'int32',
            'data1': 'int32',
        }
    else:
        shape_dict = {
            'data0': token_ids_shape,
            'data1': token_ids_shape,
            'data2': valid_length_shape,
        }
        dtype_dict = {
            'data0': 'int32',
            'data1': 'int32',
            'data2': 'int32'
        }
    sym = model._cached_graph[1]
    params = {}
    for k, v in model.collect_params().items():
        params[v._var_name] = tvm.nd.array(v.data().asnumpy())
    mod, params = relay.frontend.from_mxnet(sym, shape=shape_dict, dtype=dtype_dict, arg_params=params)
    target = flags['target']
    use_gpu = flags['use_gpu']
    opt_level = flags['opt_level']
    required_pass = flags['required_pass']
    with tvm.transform.PassContext(opt_level=opt_level, required_pass=required_pass):
        lib = relay.build(mod, target, params=params)
    if use_gpu:
        ctx = tvm.gpu()
    else:
        ctx = tvm.cpu()
    rt = graph_runtime.GraphModule(lib["default"](ctx))
    _TVM_RT_CACHE[key] = rt
    return rt
Ejemplo n.º 28
0
def verify_model(model_name,
                 input_data=[],
                 custom_convert_map={},
                 ctx_list=ctx_list()):
    """Assert that the output of a compiled model matches with that of its
    baseline."""
    if isinstance(model_name, str):
        baseline_model, baseline_input = load_model(model_name)
    elif isinstance(input_data, list):
        baseline_model = model_name
        baseline_input = input_data
    elif isinstance(input_data, torch.Tensor) or len(input_data.shape) == 0:
        baseline_model = model_name
        baseline_input = [input_data]
    else:
        assert False, "Unexpected input format"

    if torch.cuda.is_available():
        baseline_model = baseline_model.cuda()
        baseline_input = [inp.cuda() for inp in baseline_input]

    with torch.no_grad():
        baseline_outputs = baseline_model(*baseline_input)

    if isinstance(baseline_outputs, tuple):
        baseline_outputs = tuple(out.cpu().numpy() for out in baseline_outputs)
    else:
        baseline_outputs = (baseline_outputs.float().cpu().numpy(), )

    trace = torch.jit.trace(baseline_model, baseline_input).float().eval()

    if torch.cuda.is_available():
        trace = trace.cuda()
    else:
        trace = trace.cpu()

    input_names = get_graph_input_names(trace)
    input_shapes = dict(zip(input_names,
                            [inp.shape for inp in baseline_input]))
    mod, params = relay.frontend.from_pytorch(trace, input_shapes,
                                              custom_convert_map)
    compiled_input = dict(
        zip(input_names, [inp.cpu().numpy() for inp in baseline_input]))

    with relay.build_config(opt_level=3):
        for target, ctx in ctx_list:
            relay_graph, relay_lib, relay_params = relay.build(mod,
                                                               target=target,
                                                               params=params)
            relay_model = graph_runtime.create(relay_graph, relay_lib, ctx)
            relay_model.set_input(**relay_params)
            for name, inp in compiled_input.items():
                relay_model.set_input(name, inp)
            relay_model.run()

            for i, baseline_output in enumerate(baseline_outputs):
                compiled_output = relay_model.get_output(i).asnumpy()

                assert_shapes_match(baseline_output, compiled_output)
                tvm.testing.assert_allclose(baseline_output,
                                            compiled_output,
                                            rtol=1e-3,
                                            atol=1e-3)

    del model_name
    del baseline_model
    torch.cuda.empty_cache()
Ejemplo n.º 29
0
def run_unpropagatable_graph(dev, tgt):
    R""" The network is as following:
            a     b  c     d
             \   /    \   /
              add      mul
                \      /
                subtract
    """
    
    a = relay.var("a", shape=(10, 10))
    b = relay.var("b", shape=(10, 10))
    c = relay.var("c", shape=(10, 10))
    d = relay.var("d", shape=(10, 10))
    a_data = np.random.rand(10, 10).astype('float32')
    b_data = np.random.rand(10, 10).astype('float32')
    c_data = np.random.rand(10, 10).astype('float32')
    d_data = np.random.rand(10, 10).astype('float32')
    tmp_add = a_data + b_data
    tmp_mul = np.multiply(c_data, d_data)
    ref_res = np.subtract(tmp_add, tmp_mul)
    
    fallback_device = tvm.context("cpu")
    target = {"cpu": "llvm", dev: tgt}
    cpu_ctx = fallback_device
    dev_ctx = tvm.context(dev)
    
    def annotated():    
        add = relay.add(a, b)
        _add = relay.annotation.on_device(add, dev_ctx)
        mul = relay.multiply(c, d)
        _mul = relay.annotation.on_device(mul, cpu_ctx)
        sub = relay.subtract(add, mul)
        _sub = relay.annotation.on_device(sub, dev_ctx)
        func = relay.Function([a, b, c, d],
                              relay.Tuple(tvm.convert([_add, _mul,
                                                       _sub, sub])))
        func = relay.ir_pass.infer_type(func)
        func = relay.ir_pass.rewrite_annotated_ops(func,
                                                   dev_ctx.device_type)
        func = relay.ir_pass.infer_type(func)
        return relay.Function(relay.ir_pass.free_vars(func.body[3]),
                              func.body[3])
        
    def expected():    
        add = relay.add(a, b)
        mul = relay.multiply(c, d)
        copy_mul_sub = relay.device_copy(mul, cpu_ctx, dev_ctx)
        sub = relay.subtract(add, copy_mul_sub)
        func = relay.Function([a, b, c, d], sub)
        return func
    
    annotated_func = annotated()
    expected_func = expected()
    expected_index = [2, 2, 2, 1, 1, 1, 2, 2]
    check_annotated_graph(annotated_func, expected_func)
    params = {"a": a_data, "b": b_data, "c": c_data, "d": d_data}
    config = {"opt_level": 0}
    config["fallback_device"] = fallback_device
    with relay.build_config(**config):
        graph, lib, params = relay.build(annotated_func, target, params=params)
        contexts = [tvm.cpu(0), tvm.context(dev)]
        graph_json = json.loads(graph)
        if "device_index" in graph_json["attrs"]:
            device_index = graph_json["attrs"]["device_index"][1]
            assert device_index == expected_index
        mod = graph_runtime.create(graph, lib, contexts)
        mod.set_input(**params)
        mod.run()
        res = mod.get_output(0).asnumpy()
        tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
Ejemplo n.º 30
0
# If we run the example on our x86 server for demonstration, we can simply
# set it as :code:`llvm`. If running it on the Raspberry Pi, we need to
# specify its instruction set. Set :code:`local_demo` to False if you want
# to run this tutorial with a real device.

local_demo = True

if local_demo:
    target = tvm.target.create('llvm')
else:
    target = tvm.target.arm_cpu('rasp3b')
    # The above line is a simple form of
    # target = tvm.target.create('llvm -device=arm_cpu -model=bcm2837 -target=armv7l-linux-gnueabihf -mattr=+neon')

with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(func, target, params=params)

# After `relay.build`, you will get three return values: graph,
# library and the new parameter, since we do some optimization that will
# change the parameters but keep the result of model as the same.

# Save the library at local temporary directory.
tmp = util.tempdir()
lib_fname = tmp.relpath('net.tar')
lib.export_library(lib_fname)

######################################################################
# Deploy the Model Remotely by RPC
# --------------------------------
# With RPC, you can deploy the model remotely from your host machine
# to the remote device.
Ejemplo n.º 31
0
def compile_model(
    mod,
    params,
    target,
    dump_code=None,
    target_host=None,
    tuning_records=None,
    alter_layout=None,
    disabled_pass=None,
):
    """Compile a model from a supported framework into a TVM module.

    This function takes a union of the arguments of both frontends.load_model
    and compiler.compile_relay. The resulting TVM module can be executed using
    the graph executor.

    Parameters
    ----------
    mod: IRModule
        The relay module to be compiled.
    params: dict
        A dictionary containing the module's parameters.
    target : str
        The target for which to compile. Can be a plain string or
        a path.
    dump_code : list, optional
        Dump the generated code for the specified source types, on
        the requested target.
    target_host : str, optional
        The target of the host machine if host-side code
        needs to be generated.
    tuning_records: str, optional
        Path to the file produced by the tuning to be used during
        compilation.
    alter_layout: str, optional
        The layout to convert the graph to. Note, the convert layout
        pass doesn't currently guarantee the whole of the graph will
        be converted to the chosen layout.
    disabled_pass: str, optional
        Comma-separated list of passes which needs to be disabled
        during compilation


    Returns
    -------
    graph : str
        A JSON-serialized TVM execution graph.
    lib : tvm.module.Module
        A TVM module containing the compiled functions.
    params : dict
        The parameters (weights) for the TVM module.
    dumps : dict
        Dictionary containing the dumps specified.

    """
    dump_code = [x.strip()
                 for x in dump_code.split(",")] if dump_code else None
    config = {}

    if alter_layout:
        mod = common.convert_graph_layout(mod, alter_layout)

    tvm_target, extra_targets = common.target_from_cli(target)
    target_host = tvm_target if not target_host else target_host
    tvm_target, target_host = Target.check_and_update_host_consist(
        tvm_target, target_host)

    for codegen_from_cli in extra_targets:
        codegen = composite_target.get_codegen_by_target(
            codegen_from_cli["name"])
        partition_function = codegen["pass_pipeline"]
        mod = partition_function(mod, params, **codegen_from_cli["opts"])
        if codegen["config_key"] is not None:
            config[codegen["config_key"]] = codegen_from_cli["opts"]

    if tuning_records and os.path.exists(tuning_records):
        logger.debug("tuning records file provided: %s", tuning_records)

        use_autoscheduler = True
        try:
            auto_scheduler.load_records(tuning_records)
        except tvm._ffi.base.TVMError:
            use_autoscheduler = False

        if use_autoscheduler:
            with auto_scheduler.ApplyHistoryBest(tuning_records):
                config["relay.backend.use_auto_scheduler"] = True
                with tvm.transform.PassContext(opt_level=3,
                                               config=config,
                                               disabled_pass=disabled_pass):
                    logger.debug("building relay graph with autoscheduler")
                    graph_module = relay.build(mod,
                                               target=target,
                                               params=params)
        else:
            with autotvm.apply_history_best(tuning_records):
                with tvm.transform.PassContext(opt_level=3,
                                               config=config,
                                               disabled_pass=disabled_pass):
                    logger.debug("building relay graph with tuning records")
                    graph_module = relay.build(mod, tvm_target, params=params)
    else:
        with tvm.transform.PassContext(opt_level=3,
                                       config=config,
                                       disabled_pass=disabled_pass):
            logger.debug("building relay graph (no tuning records provided)")
            graph_module = relay.build(mod, tvm_target, params=params)

    # Generate output dump files with sources
    dump_code = dump_code or []
    dumps = {}
    for source_type in dump_code:
        lib = graph_module.get_lib()
        # TODO lib.get_source call have inconsistent behavior for unsupported
        #      formats (@leandron).
        source = str(mod) if source_type == "relay" else lib.get_source(
            source_type)
        dumps[source_type] = source

    # TODO we need to update this return to use the updated graph module APIs
    #      as these getter functions will be deprecated in the next release (@leandron)
    return graph_module.get_json(), graph_module.get_lib(
    ), graph_module.get_params(), dumps
Ejemplo n.º 32
0
                env.BATCH,
                env.BLOCK_OUT,
                env.WGT_WIDTH,
                start_name=pack_dict[model][0],
                stop_name=pack_dict[model][1],
                device_annot=(env.TARGET == "intelfocl"),
            )
    else:
        relay_prog = mod["main"]

    # Compile Relay program with AlterOpLayout disabled
    if target.device_name != "vta":
        with tvm.transform.PassContext(opt_level=3,
                                       disabled_pass={"AlterOpLayout"}):
            graph, lib, params = relay.build(relay_prog,
                                             target=tvm.target.Target(
                                                 target, host=env.target_host),
                                             params=params)
    else:
        if env.TARGET == "intelfocl":
            # multiple targets to run both on cpu and vta
            target = {"cpu": env.target_vta_cpu, "ext_dev": target}
        with vta.build_config(
                opt_level=3,
                disabled_pass={"AlterOpLayout", "tir.CommonSubexprElimTIR"}):
            graph, lib, params = relay.build(relay_prog,
                                             target=tvm.target.Target(
                                                 target, host=env.target_host),
                                             params=params)

    # Measure Relay build time
    build_time = time.time() - build_start
Ejemplo n.º 33
0
def test_meta_schedule_relay_lowering():
    data_shape = (1, 3, 16, 16)
    weight_shape = (8, 3, 5, 5)
    data = relay.var("data", relay.TensorType(data_shape, "float32"))
    weight = relay.var("weight", relay.TensorType(weight_shape, "float32"))
    y = relay.nn.conv2d(
        data,
        weight,
        padding=(2, 2),
        kernel_size=(5, 5),
        kernel_layout="OIHW",
        out_dtype="float32",
    )
    f = relay.Function([data, weight], y)
    mod = tvm.IRModule.from_expr(f)
    mod = relay.transform.InferType()(mod)

    data_sample = np.random.rand(*data_shape).astype("float32")
    weight_sample = np.random.rand(*weight_shape).astype("float32")
    params = {mod["main"].params[1].name_hint: weight_sample}

    input_name = "data"
    dev = tvm.cpu()
    target = Target("llvm --num-cores=16")
    data = tvm.nd.array(data_sample, dev)

    with tempfile.TemporaryDirectory() as work_dir:
        database = JSONDatabase(
            osp.join(work_dir, "workload.json"), osp.join(work_dir, "records.json")
        )

        database.commit_tuning_record(
            TuningRecord(
                Trace([], {}),
                [0.0],
                database.commit_workload(tvmgen_default_fused_nn_contrib_conv2d_NCHWc),
                target=target,
                args_info=[],
            )
        )

        with ApplyHistoryBest(database):
            with tvm.transform.PassContext(
                opt_level=3,
                config={"relay.backend.use_meta_schedule": True},
            ):
                rt_mod1 = relay.build(mod, target=target, params=params)

        # Compile without meta-scheduler for correctness check
        with tvm.transform.PassContext(opt_level=0):
            rt_mod2 = relay.build(mod, target=target, params=params)

        def get_output(data, lib):
            module = graph_executor.GraphModule(lib["default"](dev))
            module.set_input(input_name, data)
            module.run()
            return module.get_output(0).numpy()

        # Check correctness
        actual_output = get_output(data, rt_mod1)
        expected_output = get_output(data, rt_mod2)
        assert np.allclose(actual_output, expected_output, rtol=1e-4, atol=2e-4)
Ejemplo n.º 34
0
def test_compile_return_empty_tuple():
    x = relay.var("x", shape=[16], dtype="float32")
    mod = tvm.IRModule.from_expr(relay.Function([x], relay.Tuple([])))
    graph, lib, _ = relay.build(mod, "llvm")
    mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
    mod.run()
Ejemplo n.º 35
0
for i in range(num_layers + 1):
    params["layers.%d.weight" % (i)] = model_params["layers.%d.weight" % (i)]
    params["layers.%d.bias" % (i)] = model_params["layers.%d.bias" % (i)]

# Set the TVM build target
target = "llvm"  # Currently only support `llvm` as target

func = relay.Function(relay.analysis.free_vars(output), output)
func = relay.build_module.bind_params_by_name(func, params)
mod = tvm.IRModule()
mod["main"] = func
# Build with Relay
with tvm.transform.PassContext(
        opt_level=0):  # Currently only support opt_level=0
    lib = relay.build(mod, target, params=params)

# Generate graph runtime
dev = tvm.device(target, 0)
m = graph_runtime.GraphModule(lib["default"](dev))

######################################################################
# Run the TVM model, test for accuracy and verify with DGL
# --------------------------------------------------------
m.run()
logits_tvm = m.get_output(0).asnumpy()
print("Print the first five outputs from TVM execution\n", logits_tvm[:5])

labels = data.labels
test_mask = data.test_mask
Ejemplo n.º 36
0
def test_tvm_integration(model_name, batch_size, seq_length, layout, ctx):
    tvm = try_import_tvm()
    from tvm import relay
    from tvm.contrib import graph_runtime
    tvm_recommended_flags = get_ec2_tvm_flags()
    if ctx.device_type == 'gpu':
        flags = tvm_recommended_flags['g4']
    elif ctx.device_type == 'cpu':
        flags = tvm_recommended_flags['c4']
        if model_name != 'google_albert_base_v2':
            # Skip all other tests
            return
    else:
        raise NotImplementedError
    with tempfile.TemporaryDirectory() as root, ctx:
        model_cls, cfg, tokenizer, backbone_param_path, _ = get_backbone(
            model_name, root=root)
        cfg.defrost()
        cfg.MODEL.layout = layout
        cfg.freeze()
        model = model_cls.from_cfg(cfg)
        model.load_parameters(backbone_param_path)
        model.hybridize()
        if layout == 'NT':
            token_ids = mx.np.random.randint(0,
                                             cfg.MODEL.vocab_size,
                                             (batch_size, seq_length),
                                             dtype=np.int32)
            token_types = mx.np.random.randint(0,
                                               2, (batch_size, seq_length),
                                               dtype=np.int32)
            valid_length = mx.np.random.randint(seq_length // 2,
                                                seq_length, (batch_size, ),
                                                dtype=np.int32)
        else:
            token_ids = mx.np.random.randint(0,
                                             cfg.MODEL.vocab_size,
                                             (seq_length, batch_size),
                                             dtype=np.int32)
            token_types = mx.np.random.randint(0,
                                               2, (seq_length, batch_size),
                                               dtype=np.int32)
            valid_length = mx.np.random.randint(seq_length // 2,
                                                seq_length, (batch_size, ),
                                                dtype=np.int32)
        if 'bart' in model_name:
            mx_out = model(token_ids, valid_length, token_ids, valid_length)
            shape_dict = {
                'data0': token_ids.shape,
                'data1': valid_length.shape,
                'data2': token_ids.shape,
                'data3': valid_length.shape,
            }
            dtype_dict = {
                'data0': token_ids.dtype.name,
                'data1': valid_length.dtype.name,
                'data2': token_ids.dtype.name,
                'data3': valid_length.dtype.name,
            }
        elif 'roberta' in model_name or 'xlmr' in model_name:
            mx_out = model(token_ids, valid_length)
            shape_dict = {
                'data0': token_ids.shape,
                'data1': valid_length.shape,
            }
            dtype_dict = {
                'data0': token_ids.dtype.name,
                'data1': valid_length.dtype.name,
            }
        else:
            mx_out = model(token_ids, token_types, valid_length)
            shape_dict = {
                'data0': token_ids.shape,
                'data1': token_types.shape,
                'data2': valid_length.shape
            }
            dtype_dict = {
                'data0': token_ids.dtype.name,
                'data1': token_types.dtype.name,
                'data2': valid_length.dtype.name
            }
        sym = model._cached_graph[1]
        params = {}
        for k, v in model.collect_params().items():
            params[v._var_name] = tvm.nd.array(v.data().asnumpy())
        mod, params = relay.frontend.from_mxnet(sym,
                                                shape=shape_dict,
                                                dtype=dtype_dict,
                                                arg_params=params)
        target = flags['target']
        use_gpu = flags['use_gpu']
        opt_level = flags['opt_level']
        required_pass = flags['required_pass']
        with tvm.transform.PassContext(opt_level=opt_level,
                                       required_pass=required_pass):
            lib = relay.build(mod, target, params=params)
        if use_gpu:
            ctx = tvm.gpu()
        else:
            ctx = tvm.cpu()
        rt = graph_runtime.GraphModule(lib["default"](ctx))
        if 'bart' in model_name:
            rt.set_input(data0=token_ids,
                         data1=valid_length,
                         data2=token_ids,
                         data3=valid_length)
        elif 'roberta' in model_name:
            rt.set_input(data0=token_ids, data1=valid_length)
        else:
            rt.set_input(data0=token_ids,
                         data1=token_types,
                         data2=valid_length)
        rt.run()
        for i in range(rt.get_num_outputs()):
            out = rt.get_output(i)
            if rt.get_num_outputs() == 1:
                mx_out_gt = mx_out.asnumpy()
            else:
                mx_out_gt = mx_out[i].asnumpy()
            npt.assert_allclose(out.asnumpy(), mx_out_gt, rtol=1e-3, atol=1e-1)
Ejemplo n.º 37
0
                mod["main"],
                env.BATCH,
                env.BLOCK_OUT,
                env.WGT_WIDTH,
                start_name=pack_dict[MODEL_NAME][0],
                stop_name=pack_dict[MODEL_NAME][1],
                start_name_idx=pack_dict[MODEL_NAME][2],
                stop_name_idx=pack_dict[MODEL_NAME][3])
    else:
        mod = mod["main"]

    # Compile Relay program with AlterOpLayout disabled
    with vta.build_config(disabled_pass={"AlterOpLayout"}):
        graph, lib, params = relay.build(
            mod,
            target=target,
            params=params,
            target_host=env.target_host)

    # Measure Relay build time
    build_time = time.time() - build_start
    print(MODEL_NAME + " inference graph built in {0:.2f}s!".format(build_time))

    # Send the inference library over to the remote RPC server
    temp = util.tempdir()
    lib.save(temp.relpath("graphlib.o"))
    remote.upload(temp.relpath("graphlib.o"))
    lib = remote.load_module("graphlib.o")

    # Graph runtime
    m = graph_runtime.create(graph, lib, ctx)
Ejemplo n.º 38
0
    relay.transform.ConvertLayout(desired_layouts)
])
with tvm.transform.PassContext(opt_level=3):
    mod = seq(mod)

# Build the module against ARM CPU
tvm_target = get_tvm_target(device, get_device_type(), get_device_arch(),
                            get_device_attributes())

cpu_target = "llvm"
tvm_targets = tvm.target.Target(tvm_target, host=cpu_target)

cpudevice = tvm.runtime.cpu()

with tvm.transform.PassContext(opt_level=3):
    graph_mod = relay.build(mod, tvm_targets, params=params)

lib = graph_mod.get_lib()
params = graph_mod.get_params()

# Create a runtime executor module
module = graph_executor.GraphModule(graph_mod["default"](cpudevice))

# Feed input data
module.set_input(input_tensor, tvm.nd.array(image_data))

# Feed related params
module.set_input(**params)

ftimer = module.module.time_evaluator("run", cpudevice, number=1, repeat=10)
prof_res = np.array(
Ejemplo n.º 39
0
def tune_network(network, target):
    # Extract tasks
    mod, params = get_network(network)
    target = tvm.target.Target(target)
    tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)

    with tempfile.NamedTemporaryFile() as fp:
        log_file = fp.name

        # Tuning
        measure_ctx = auto_scheduler.LocalRPCMeasureContext(timeout=60)
        tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
        tune_option = auto_scheduler.TuningOptions(
            num_measure_trials=100,
            num_measures_per_round=2,
            early_stopping=1,
            runner=measure_ctx.runner,
            builder=auto_scheduler.LocalBuilder(timeout=60),
            measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
        )
        tuner.tune(tune_option, search_policy="sketch.random")
        del measure_ctx

        # Compile with the history best
        with auto_scheduler.ApplyHistoryBest(log_file):
            with tvm.transform.PassContext(
                opt_level=3, config={"relay.backend.use_auto_scheduler": True}
            ):
                lib = relay.build(mod, target=target, params=params)

        # Sample a schedule when missing
        with auto_scheduler.ApplyHistoryBestOrSample(None, num_measure=2):
            with tvm.transform.PassContext(
                opt_level=3, config={"relay.backend.use_auto_scheduler": True}
            ):
                lib2 = relay.build(mod, target=target, params=params)

        # Compile without auto-scheduler and any other optimization for correctness check
        with tvm.transform.PassContext(opt_level=0):
            ref_lib = relay.build(mod, target=target, params=params)

        # Check the correctness
        def get_output(data, lib):
            dev = tvm.gpu()
            module = graph_executor.GraphModule(lib["default"](dev))
            module.set_input("data", data)
            module.run()
            return module.get_output(0).asnumpy()

        np.random.seed(0)
        if network == "mlp":
            data = np.random.uniform(size=(1, 32))
        elif network == "winograd-test":
            data = np.random.uniform(size=(1, 23, 40, 32))
        else:
            raise ValueError("Unknown network: " + network)

        actual_output1 = get_output(data, lib)
        actual_output2 = get_output(data, lib2)
        expected_output = get_output(data, ref_lib)

        tvm.testing.assert_allclose(actual_output1, expected_output, rtol=1e-4, atol=1e-4)
        tvm.testing.assert_allclose(actual_output2, expected_output, rtol=1e-4, atol=1e-4)
Ejemplo n.º 40
0
def build(target):
    mod, params = relay.frontend.from_mxnet(block, {"data": dshape})
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod, target, params=params)
    return lib
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--path', required=True, help='Test data path')
    parser.add_argument('-t', '--target', required=True, help='Target device for inference')
    parser.add_argument('-m', '--max', type=int, default=100, help='Retrieve the maximum number of images')
    args = parser.parse_args()
    
    argv_test_data_path = args.path
    argv_target = args.target
    argv_max = args.max
    
    print(argv_test_data_path)
    print(argv_target)
    print(argv_max)
    
    # download pre-trained model from mxnet model_zoo
    block = vision.get_model('MobileNet1.0', pretrained=True)
    
    # ImageNet Label
    # Synset for converting the number of ImageNet classes to human vocabulary
    synset_path = "./imagenet1000_clsid_to_human.txt"

    with open(synset_path) as f:
        # text_labels = [' '.join(l.split()[1:]) for l in f]
        text_labels = eval(f.read())
        
    get_test_data(argv_test_data_path, argv_max)
    
    
    print('Relay: get model from mxnet...')
    img_ = transform_image_np(img_list[0])
    print('img', img_.shape, 'type: ', type(img_))

    shape_dict = {'data': img_.shape}
    print('Block: {0}, Dict_shape: {1}'.format(type(block), type(shape_dict)))

    mod, params = relay.frontend.from_mxnet(block, shape_dict)
    print('Mod: {0}, Params: {1}'.format(type(mod), type(params)))
    func = mod['main']
    func = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs)


    print("Relay: build the graph")
    # target = 'llvm'
    if argv_target == 'llvm':
        target = tvm.target.create('llvm')
        ctx = tvm.cpu(0)
    elif argv_target == 'cuda':
        target = tvm.target.create('cuda')
        ctx = tvm.gpu(0)
    else:
        target = argv_target
        
    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(func, target, params=params)  
    print("Graph: {0}, lib: {1}, params: {2}".format(type(graph), type(lib), type(params)))


    print('Tvm: run the graph')
    dtype = 'float32'
    m = graph_runtime.create(graph, lib, ctx)
    
    print('Input the img')
    start_time_tvm = time.time()
    prob_avg = 0
    count = 0
    
    for img_ in img_list:
        count += 1
        m.set_input('data', tvm.nd.array(transform_image_np(img_).astype(dtype)))
        m.set_input(**params)

        m.run()
    
        tvm_output = m.get_output(0)
        tvm_output = tvm_output.asnumpy()[0]

        idx = np.argsort(tvm_output)[-3:][::-1]
        #     print('With prob = %.5f, it contains %s' % (tvm_output[idx[0]], text_labels[idx[0]]))
    
        prob_avg += tvm_output[idx[0]]

    print('Average accuracy = %0.5f' % float(prob_avg / count))
    print('Cost of time: %.5f sec' % (time.time() - start_time_tvm))
Ejemplo n.º 42
0
sym = mx.sym.load("%s/%s/ssd_resnet50_inference.json" % (model_dir, inference_symbol_folder))
_, arg_params, aux_params = load_checkpoint("%s/%s" % (model_dir, model_name), 0)

import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
    "-f", "--frontend",
    help="Frontend for compilation, nnvm or relay",
    type=str,
    default="nnvm")
args = parser.parse_args()
if args.frontend == "relay":
    net, params = relay.frontend.from_mxnet(sym, {"data": dshape}, arg_params=arg_params, \
                                            aux_params=aux_params)
    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(net, target, params=params)
elif args.frontend == "nnvm":
    net, params = from_mxnet(sym, arg_params, aux_params)
    with compiler.build_config(opt_level=3):
        graph, lib, params = compiler.build(
            net, target, {"data": dshape}, params=params)
else:
    parser.print_help()
    parser.exit()

######################################################################
# Create TVM runtime and do inference

# Preprocess image
image = cv2.imread(test_image_path)
img_data = cv2.resize(image, (dshape[2], dshape[3]))
Ejemplo n.º 43
0
def tune_and_evaluate(tuning_opt):

    # Register VTA tuning tasks
    register_vta_tuning_tasks()

    # Perform task extraction on Relay program
    print("Extract tasks...")
    relay_prog, params = compile_network(env, target, network, start_pack,
                                         stop_pack)
    mod = tvm.IRModule.from_expr(relay_prog)
    tasks = autotvm.task.extract_from_program(
        mod,
        params=params,
        ops=(relay.op.get("nn.conv2d"), ),
        target=target,
        target_host=env.target_host,
    )

    # filter out non-packed conv2d task
    tasks = list(filter(lambda t: len(t.args[0][1]) > 4, tasks))

    # We should have extracted 10 convolution tasks
    assert len(tasks) == 10
    print("Extracted {} conv2d tasks:".format(len(tasks)))
    for tsk in tasks:
        inp = tsk.args[0][1]
        wgt = tsk.args[1][1]
        batch = inp[0] * inp[4]
        in_filter = inp[1] * inp[5]
        out_filter = wgt[0] * wgt[4]
        height, width = inp[2], inp[3]
        hkernel, wkernel = wgt[2], wgt[3]
        hstride, wstride = tsk.args[2][0], tsk.args[2][1]
        hpad, wpad = tsk.args[3][0], tsk.args[3][1]
        print("({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})".format(
            batch,
            height,
            width,
            in_filter,
            out_filter,
            hkernel,
            wkernel,
            hpad,
            wpad,
            hstride,
            wstride,
        ))

    # We do not run the tuning in our webpage server since it takes too long.
    # Comment the following line to run it by yourself.
    return

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # evaluate with tuning history
    if env.TARGET != "sim":
        # Get remote from fleet node
        remote = autotvm.measure.request_remote(env.TARGET,
                                                tracker_host,
                                                tracker_port,
                                                timeout=10000)
        # Reconfigure the JIT runtime and FPGA.
        vta.reconfig_runtime(remote)
        vta.program_fpga(remote, bitstream=None)
    else:
        # In simulation mode, host the RPC server locally.
        remote = rpc.LocalSession()

    # compile kernels with history best records
    with autotvm.tophub.context(target, extra_files=[log_file]):
        # Compile network
        print("Compile...")
        if target.device_name != "vta":
            with tvm.transform.PassContext(opt_level=3,
                                           disabled_pass={"AlterOpLayout"}):
                lib = relay.build(relay_prog,
                                  target=target,
                                  params=params,
                                  target_host=env.target_host)
        else:
            with vta.build_config(opt_level=3,
                                  disabled_pass={"AlterOpLayout"}):
                lib = relay.build(relay_prog,
                                  target=target,
                                  params=params,
                                  target_host=env.target_host)

        # Export library
        print("Upload...")
        temp = utils.tempdir()
        lib.export_library(temp.relpath("graphlib.tar"))
        remote.upload(temp.relpath("graphlib.tar"))
        lib = remote.load_module("graphlib.tar")

        # Generate the graph runtime
        ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
        m = graph_runtime.GraphModule(lib["default"](ctx))

        # upload parameters to device
        image = tvm.nd.array(
            (np.random.uniform(size=(1, 3, 224, 224))).astype("float32"))
        m.set_input("data", image)

        # evaluate
        print("Evaluate inference time cost...")
        timer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
        tcost = timer()
        prof_res = np.array(tcost.results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Ejemplo n.º 44
0
def test_compile_tuple_dup():
    x = relay.var("data", shape=(16, 16))
    log = relay.log(x)
    output = relay.Tuple([log, log])
    f = relay.Function([x], output)
    relay.build(f, 'llvm')
Ejemplo n.º 45
0
        tasks, task_weights = auto_scheduler.extract_tasks(
            mod["main"], params, target=target_host, target_host=target_host)
        for idx, task in enumerate(tasks):
            print("========== Task %d  (workload key: %s) ==========" %
                  (idx, task.workload_key))
            print(task.compute_dag)

        run_tuning(tasks, task_weights, log_file)

    print("Compile...")
    with auto_scheduler.ApplyHistoryBest(log_file):
        with tvm.transform.PassContext(
                opt_level=3, config={"relay.backend.use_auto_scheduler":
                                     True}):
            lib = relay.build(mod,
                              target=target,
                              target_host=target_host,
                              params=params)

    print("Upload")
    tmp = tempdir()
    filename = "net.tar"
    lib.export_library(tmp.relpath(filename))
    remote = auto_scheduler.utils.request_remote("m1", "127.0.0.1", 9190)
    remote.upload(tmp.relpath(filename))
    rlib = remote.load_module(filename)

    print("run")
    input_shape = [1, 128]
    dtype = "int64"
    ctx = remote.device(str(target), 0)
    module = runtime.graph_executor.GraphModule(rlib["default"](ctx))
Ejemplo n.º 46
0
def tune_and_evaluate():
    print("Begin tuning...")
    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
    tune_option = auto_scheduler.TuningOptions(
        num_measure_trials=
        200,  # change this to 20000 to achieve the best performance
        runner=auto_scheduler.RPCRunner(
            device_key,
            host="0.0.0.0",
            port=9191,
            timeout=30,
            repeat=1,
            min_repeat_ms=200,
            enable_cpu_cache_flush=True,
        ),
        measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
    )

    tuner.tune(tune_option)

    # Compile with the history best
    print("Compile...")
    with auto_scheduler.ApplyHistoryBest(log_file):
        with tvm.transform.PassContext(
                opt_level=3, config={"relay.backend.use_auto_scheduler":
                                     True}):
            lib = relay.build(mod, target=target, params=params)

    # Export library
    tmp = tempdir()
    if use_ndk:
        from tvm.contrib import ndk

        filename = "net.so"
        lib.export_library(tmp.relpath(filename), ndk.create_shared)
    else:
        filename = "net.tar"
        lib.export_library(tmp.relpath(filename))

    # Upload module to device
    print("Upload...")
    remote = auto_scheduler.utils.request_remote(device_key,
                                                 "0.0.0.0",
                                                 9191,
                                                 timeout=10000)
    remote.upload(tmp.relpath(filename))
    rlib = remote.load_module(filename)

    # Create graph runtime
    dev = remote.cpu()
    module = graph_runtime.GraphModule(rlib["default"](dev))
    data_tvm = tvm.nd.array(
        (np.random.uniform(size=input_shape)).astype(dtype))
    module.set_input("data", data_tvm)

    # Evaluate
    print("Evaluate inference time cost...")
    ftimer = module.module.time_evaluator("run",
                                          dev,
                                          repeat=3,
                                          min_repeat_ms=500)
    prof_res = np.array(ftimer().results) * 1e3  # convert to millisecond
    print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
          (np.mean(prof_res), np.std(prof_res)))
Ejemplo n.º 47
0
input_image = input_image.transpose([3, 2, 0, 1])
# Compile the model on Relay
# ---------------------------
# We should be familiar with the process right now.
input_tensor = "data"  
input_shape = input_image.shape
shape_dict = {input_tensor:input_shape}
print("shape: ",shape_dict)

target = 'llvm'
# Parse mxnet model and convert into Relay computation graph
mod, params = relay.frontend.from_mxnet(model, shape_dict)

with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(mod,
                                         target,
                                         params=params)

######################################################################
# Execute on TVM
# -------------------
# The process is no different from other example
from tvm.contrib import graph_runtime

ctx = tvm.cpu(0)
m = graph_runtime.create(graph, lib, ctx)
dtype = 'float32'
#complete a inference
m.set_input("data", tvm.nd.array(input_image.astype(dtype)))
m.set_input(**params)
# set start time
Ejemplo n.º 48
0
def check_function(symbol, forward=None, backward=None, grad_input_vars=None,
                   shape=None, dtype=None, in_range=None, values=None,
                   exclude_targets=None, only_targets=None,
                   additional_params=None,
                   numerical_grads=None, numerical_grads_params=None,
                   atol=1e-5, rtol=1e-5, quiet=False):
    """Compute the function and/or its gradients on a random input and raise
    an exception if the result doesn't match the reference implementation.

    Parameters
    ----------
    symbol : nnvm.Symbol
        A symbol representing the output.

    forward : Callable[..., List[numpy.ndarray]], optional
        A reference implementation to compare with.

    backward : Callable[..., List[numpy.ndarray] or Dict[str, numpy.ndarray]], optional
        A reference implementation of gradients. Should also accept head_grads besides
        normal inputs which is a list of gradients of some scalar wrt the outputs or just a
        single gradient if there are multiple outputs.
        Should return either a dict mapping input variable names to the respective
        gradients or a list of gradients wrt variables from grad_input_vars in
        exactly the same order (in alphabetical order by default).

    grad_input_vars : List[nnvm.Symbol or str], optional
        A list of variables with respect to which the gradients will be computed.
        None (default) means that all input variables will be used in an alphabetical order.

    shape : Dict[nnvm.Symbol or str, Tuple[int]] or Tuple[int], optional
        A dict mapping input variable names to shapes, or just a single shape.
        By default shapes will be inferred from variables' attributes (see the Examples).
        Note that this parameter takes precedence over variables' attributes.

    dtype : Dict[nnvm.Symbol or str, str] or str, optional
        A dict mapping input variable names to dtypes, or just a single dtype.
        By default dtypes will be inferred from variables' attributes (see the Examples).
        If dtypes cannot be inferred for some variables then float32 will be used as a fallback.
        Note that this parameter takes precedence over variables' attributes.

    in_range : Dict[nnvm.Symbol or str, (float, float)] or (float, float), optional
        A dict mapping input variable names to ranges or just a single range
        (the same for all variables). Input values will be generated from
        uniform distributions on these ranges. `head_grads` can also be
        assigned a range this way.

    values : Dict[nnvm.Symbol or str, numpy.ndarray], optional
        A dict explicitly providing values for some variables instead of random generation.

    exclude_targets : Set[str], optional
        Skip compiling and running anything for these targets.

    only_targets : Set[str], optional
        Test only for those targets from `ctx_list()` that are also in this set.

    additional_params : dict, optional
        A dict of additional parameters which will be passed to forward and backward.

    numerical_grads : bool or 'if_possible', optional
        Whether to additionally check against numerically computed gradients. If 'if_possible' or
        None is passed (which is the default) then it will try to create a gradient computation
        graph and then check gradients numerically only if this graph can be created (i.e. if there
        are some operations with unimplemented gradients, it will just issue a warning).
        Checking against numerical gradients is done via the `check_numerical_grads` function.

    numerical_grads_params : dict, optional
        Additional parameters for `check_numerical_grads`.

    atol : float, optional
        Absolute tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients.

    rtol : float, optional
        Relative tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients.

    quiet : bool, optional
        Don't dump additional information to stdout on failure.

    Examples
    --------
    .. code-block:: python

        x = sym.Variable("x", shape=(1, 2))
        y = sym.Variable("y", shape=(1, 2))

        # check the function and its gradients both numerically and using a reference function
        check_function(x + 2*y,
                       lambda x, y: x + 2*y,
                       lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads})

        # just check gradients numerically
        check_function(x + 2*y, numerical_grads=True)

        # just check the forward computation
        check_function(x + 2*y, lambda x, y: x + 2*y, numerical_grads=False)

        # specifying dtype
        check_function(x + 2*y, lambda x, y: x + 2*y, dtype='float64')

        # dtypes can also be specified during variable creation with dtype codes
        x = sym.Variable("x", dtype=0)
        check_function(x + 1, shape=(2, 2), numerical_grads=True)
    """
    # validate and preprocess the input params
    if numerical_grads is None and forward is None and backward is None:
        raise ValueError("No reference function was passed to check_function. If you only want to "
                         "check gradients numerically, pass numerical_grads=True explicitly.")

    if numerical_grads is None:
        numerical_grads = 'if_possible'

    if numerical_grads not in [False, True, 'if_possible']:
        raise ValueError("numerical_grads must be a bool or 'if_possible', not {}"
                         .format(numerical_grads))

    if additional_params is None:
        additional_params = {}

    input_vars = symbol.list_input_variables()
    input_dict = {x.attr('name'): x for x in input_vars}

    if grad_input_vars is None:
        grad_input_vars = sorted(input_vars, key=lambda x: x.attr('name'))
    else:
        grad_input_vars = [input_dict[x] if isinstance(x, str) else x for x in grad_input_vars]

    in_range = _dict_var_to_dict_str(in_range)
    values = _dict_var_to_dict_str(values)

    out_len = len(symbol.list_output_names())

    # Infer the output shapes and dtypes, and preprocess the shape and dtype params
    forward_graph, shape, dtype, out_shapes, out_dtypes = \
        infer_shapes_dtypes(nnvm.graph.create(symbol), shape=shape, dtype=dtype,
                            fallback_dtype='float32')

    if not all(out_shapes) or not all(out_dtypes):
        if not quiet:
            print(forward_graph.ir(join_node_attrs=['shape', 'dtype']))
        raise ValueError("Could not infer shapes or dtypes for outputs.\n"
                         "out_shapes = {}\nout_dtypes = {}".format(out_shapes, out_dtypes))

    backward_graph = None

    # If we want gradients, we have to recreate the graph, but now with gradient computations
    # Note that here we need out_shapes for defining the shape of head grads, so we have to
    # create the graph twice
    if backward is not None or numerical_grads:
        try:
            head_grads_symbols = [nnvm.symbol.Variable("head_grads_" + str(i),
                                                       shape=out_shapes[i],
                                                       dtype=DTYPE_TO_TCODE[out_dtypes[i]])
                                  for i in range(out_len)]
            grad_symbols = graph_util.gradients([symbol], grad_input_vars,
                                                grad_ys=head_grads_symbols)
            # Sometimes grads do not depend on head_grads, so head_grads does not appear
            # in the variable list; adding it manually prevents this, making things a bit easier
            backward_graph = \
                nnvm.graph.create(nnvm.symbol.Group([symbol] + grad_symbols + head_grads_symbols))

            backward_graph, shape, dtype, out_shapes, out_dtypes = \
                infer_shapes_dtypes(backward_graph, shape=shape, dtype=dtype,
                                    fallback_dtype='float32')
        except nnvm._base.NNVMError as err:
            if backward is None and numerical_grads == "if_possible":
                logging.warning("Won't check gradients because: %s", str(err).split('\n', 1)[0])
                numerical_grads = False
                backward_graph = None
            else:
                raise

    main_graph = backward_graph if backward_graph is not None else forward_graph

    # Generate random data for inputs (including head_grads)

    np_inputs = {}

    for x in main_graph.symbol.list_input_variables():
        x_name = x.attr('name')
        x_shape = shape[x_name]
        x_dtype = dtype[x_name]

        if values is not None and x_name in values:
            np_inputs[x_name] = values[x_name].astype(x_dtype)
            continue

        low = -1.0
        high = 1.0
        if in_range is not None:
            if isinstance(in_range, dict):
                if x_name in in_range:
                    low = in_range[x_name][0]
                    high = in_range[x_name][1]
            else:
                low = in_range[0]
                high = in_range[1]

        np_inputs[x_name] = np.random.uniform(size=x_shape, low=low, high=high).astype(x_dtype)

    np_inputs_without_head_grads = {k: np_inputs[k] for k in np_inputs
                                    if not k.startswith('head_grads_')}

    nothing_was_done = True

    # Compute and compare the results
    for target, ctx in ctx_list():
        if exclude_targets is not None:
            if target in exclude_targets or str(target) in exclude_targets:
                logging.info("Skipping target = %s, ctx = %s", target, ctx)
                continue
        if only_targets is not None:
            if target not in only_targets and str(target) not in only_targets:
                logging.info("Skipping target = %s, ctx = %s", target, ctx)
                continue

        logging.info("Checking computation on target = %s, ctx = %s", target, ctx)

        debug_stage = None

        try:
            nnvm_res = None

            debug_stage = "compiling"
            main_function = graph_to_function(main_graph, target, ctx)

            # nnvm_res contains the output and gradients (if they are needed)
            debug_stage = "running"
            nnvm_res = main_function(**np_inputs)

            try:
                logging.debug("checking to_relay conversion")
                inputs = np_inputs_without_head_grads.copy()
                func, inputs = to_relay(main_graph, shape, dtype, params=inputs)
                with relay.build_config(opt_level=3):
                    graph, lib, params = relay.build(func, target=target)
                m = graph_runtime.create(graph, lib, ctx)
                m.set_input(**inputs)
                m.set_input(**params)
                m.run()
                for i in range(out_len):
                    relay_out = m.get_output(i).asnumpy()
                    tvm.testing.assert_allclose(nnvm_res[i], relay_out, atol=atol, rtol=rtol)
            except NotImplementedError as err:
                # the NNVM operator is not supported yet
                logging.warning(err)

            if backward_graph is not None:
                grad_var_names = [x.attr('name') for x in grad_input_vars]
                nnvm_grads = {x: v for x, v in zip(grad_var_names, nnvm_res[out_len:])}

            if forward is not None:
                nothing_was_done = False
                debug_stage = "checking forward computation"
                logging.debug(debug_stage)

                params = {}
                params.update(np_inputs_without_head_grads)
                params.update(additional_params)
                numpy_res = forward(**params)

                if isinstance(numpy_res, tuple):
                    numpy_res = list(numpy_res)

                if not isinstance(numpy_res, list):
                    numpy_res = [numpy_res]

                if len(numpy_res) != out_len:
                    raise ValueError("Forward function returned {} values, but "
                                     "the nnvm graph returns {} values"
                                     .format(len(numpy_res), out_len))

                for i in range(out_len):
                    tvm.testing.assert_allclose(nnvm_res[i], numpy_res[i], atol=atol, rtol=rtol)

            if backward is not None:
                nothing_was_done = False
                debug_stage = "checking gradients"
                logging.debug(debug_stage)

                np_head_grads = [np_inputs["head_grads_" + str(i)] for i in range(out_len)]

                if out_len == 1:
                    np_head_grads = np_head_grads[0]

                params = {'head_grads': np_head_grads}
                params.update(np_inputs_without_head_grads)
                params.update(additional_params)
                numpy_grads = backward(**params)

                if not isinstance(numpy_grads, dict):
                    if isinstance(numpy_grads, tuple):
                        numpy_grads = list(numpy_grads)
                    if not isinstance(numpy_grads, list):
                        numpy_grads = [numpy_grads]
                    numpy_grads = {x: v for x, v in zip(grad_var_names, numpy_grads)}
                    if len(numpy_grads) != len(grad_var_names):
                        raise ValueError("The backward function returns a list of gradients which "
                                         "does not contain gradients for these variables: {}"
                                         .format(set(grad_var_names) - set(numpy_grads)))

                for x_name in numpy_grads:
                    tvm.testing.assert_allclose(nnvm_grads[x_name], numpy_grads[x_name],
                                                atol=atol, rtol=rtol)

            if numerical_grads:
                nothing_was_done = False
                debug_stage = "checking gradients numerically"
                logging.debug(debug_stage)

                forward_function = graph_to_function(forward_graph, target, ctx)

                # Since the result may be non-scalar, we have to put another operation on the top,
                # so we just multiple by the randomly generated head_grads and then sum everything.
                # This way we can reuse the gradient values which has been already computed.
                def scalar_function(**kwargs):
                    res = forward_function(**kwargs)
                    return np.sum([np.dot(np_inputs['head_grads_' + str(i)].ravel(), res[i].ravel())
                                   for i in range(out_len)])

                if numerical_grads_params is None:
                    numerical_grads_params = {}

                check_numerical_grads(
                    scalar_function,
                    input_values=np_inputs_without_head_grads,
                    grad_values=nnvm_grads,
                    **numerical_grads_params)

        except:
            if not quiet:
                print("\ncheck_function failed while {}, here is the main graph"
                      .format(debug_stage))
                print(main_graph.ir(join_node_attrs=['shape', 'dtype']))
                if nnvm_res is not None:
                    print("Generated inputs:")
                    print(np_inputs)
                    print()
            raise

    if nothing_was_done:
        logging.warning("Nothing was done in check_function. Check ctx_list().")
Ejemplo n.º 49
0
def run_unpropagatable_graph(dev, tgt):
    R""" The network is as following:
            a     b  c     d
             \   /    \   /
              add      mul
                \      /
                subtract
    """

    a = relay.var("a", shape=(10, 10))
    b = relay.var("b", shape=(10, 10))
    c = relay.var("c", shape=(10, 10))
    d = relay.var("d", shape=(10, 10))
    a_data = np.random.rand(10, 10).astype('float32')
    b_data = np.random.rand(10, 10).astype('float32')
    c_data = np.random.rand(10, 10).astype('float32')
    d_data = np.random.rand(10, 10).astype('float32')
    tmp_add = a_data + b_data
    tmp_mul = np.multiply(c_data, d_data)
    ref_res = np.subtract(tmp_add, tmp_mul)

    fallback_device = tvm.context("cpu")
    target = {"cpu": "llvm", dev: tgt}
    cpu_ctx = fallback_device
    dev_ctx = tvm.context(dev)

    def annotated():
        add = relay.add(a, b)
        _add = relay.annotation.on_device(add, dev_ctx)
        mul = relay.multiply(c, d)
        _mul = relay.annotation.on_device(mul, cpu_ctx)
        sub = relay.subtract(_add, _mul)
        _sub = relay.annotation.on_device(sub, dev_ctx)
        func = relay.Function([a, b, c, d], _sub)
        func = run_opt_pass(func,
                            transform.RewriteAnnotatedOps(dev_ctx.device_type))
        return func

    def expected():
        add = relay.add(a, b)
        mul = relay.multiply(c, d)
        copy_mul_sub = relay.device_copy(mul, cpu_ctx, dev_ctx)
        sub = relay.subtract(add, copy_mul_sub)
        func = relay.Function([a, b, c, d], sub)
        return func

    annotated_func = annotated()
    expected_func = expected()
    expected_index = [2, 2, 2, 1, 1, 1, 2, 2]
    check_annotated_graph(annotated_func, expected_func)
    params = {"a": a_data, "b": b_data, "c": c_data, "d": d_data}
    with tvm.transform.PassContext(
            opt_level=0,
            config={"relay.fallback_device_type":
                    fallback_device.device_type}):
        graph, lib, params = relay.build(annotated_func, target, params=params)
        contexts = [tvm.cpu(0), tvm.context(dev)]
        graph_json = json.loads(graph)
        if "device_index" in graph_json["attrs"]:
            device_index = graph_json["attrs"]["device_index"][1]
            assert device_index == expected_index
        mod = graph_runtime.create(graph, lib, contexts)
        mod.set_input(**params)
        mod.run()
        res = mod.get_output(0).asnumpy()
        tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
Ejemplo n.º 50
0
def test_inception_like():
    def conv(data):
        y = relay.nn.conv2d(data,
                            relay.var("w"),
                            kernel_size=(3, 3),
                            padding=(1, 1),
                            channels=16)
        return relay.nn.relu(data=y)

    def inception_like(data):
        c0 = conv(data)
        c1 = conv(data)
        return relay.concatenate((c0, c1), axis=1)

    def before(dshape):
        x = relay.var("x", shape=dshape)
        in1 = inception_like(x)
        in2 = inception_like(in1)
        return relay.Function(relay.analysis.free_vars(in2), in2)

    def expected(dshape):
        p0 = relay.var("p0", shape=dshape)
        c = conv(p0)
        f0 = relay.Function(relay.analysis.free_vars(c), c)
        f0 = f0.with_attr("Primitive", tvm.tir.IntImm("int32", 1))

        p01 = relay.var("p01", shape=dshape)
        c = conv(p01)
        f1 = relay.Function(relay.analysis.free_vars(c), c)
        f1 = f1.with_attr("Primitive", tvm.tir.IntImm("int32", 1))

        p02 = relay.var("p02", shape=dshape)
        p12 = relay.var("p12", shape=dshape)
        concat1 = relay.concatenate((p02, p12), axis=1)
        f_concat1 = relay.Function([p02, p12], concat1)
        f_concat1 = f_concat1.with_attr("Primitive",
                                        tvm.tir.IntImm("int32", 1))

        dshape2 = (dshape[0], dshape[1] * 2, dshape[2], dshape[3])

        p03 = relay.var("p03", shape=dshape2)
        c = conv(p03)
        f2 = relay.Function(relay.analysis.free_vars(c), c)
        f2 = f2.with_attr("Primitive", tvm.tir.IntImm("int32", 1))

        p04 = relay.var("p04", shape=dshape2)
        c = conv(p04)
        f3 = relay.Function(relay.analysis.free_vars(c), c)
        f3 = f3.with_attr("Primitive", tvm.tir.IntImm("int32", 1))

        p05 = relay.var("p05", shape=dshape)
        p15 = relay.var("p15", shape=dshape)
        concat2 = relay.concatenate((p05, p15), axis=1)
        f_concat2 = relay.Function([p05, p15], concat2)
        f_concat2 = f_concat2.with_attr("Primitive",
                                        tvm.tir.IntImm("int32", 1))

        x = relay.var("x", shape=dshape)
        c1 = relay.Call(f0, [x, relay.var("w1")])
        c2 = relay.Call(f1, [x, relay.var("w2")])
        concat = relay.Call(f_concat1, [c1, c2])
        c3 = relay.Call(f2, [concat, relay.var("w3")])
        c4 = relay.Call(f3, [concat, relay.var("w4")])
        out = relay.Call(f_concat2, [c3, c4])

        return relay.Function(relay.analysis.free_vars(out), out)

    dshape = (1, 16, 64, 64)
    orig = before(dshape)
    fuse0(tvm.IRModule.from_expr(orig))
    m = fuse2(tvm.IRModule.from_expr(orig))
    relay.build(m, "llvm")
    after = run_opt_pass(expected(dshape), transform.InferType())
    assert tvm.ir.structural_equal(m["main"], after)
Ejemplo n.º 51
0
def test_inception_like():
    def conv(data):
        y = relay.nn.conv2d(data, relay.var("w"),
                            kernel_size=(3, 3),
                            padding=(1, 1),
                            channels=16)
        return relay.nn.relu(data=y)

    def inception_like(data):
        c0 = conv(data)
        c1 = conv(data)
        return relay.concatenate((c0, c1), axis=1)

    def before(dshape):
        x = relay.var("x", shape=dshape)
        in1 = inception_like(x)
        in2 = inception_like(in1)
        return relay.Function(relay.ir_pass.free_vars(in2), in2)

    def expected(dshape):
        p0 = relay.var("p0", shape=dshape)
        c = conv(p0)
        f0 = relay.Function(relay.ir_pass.free_vars(c), c)

        p01 = relay.var("p01", shape=dshape)
        c = conv(p01)
        f1 = relay.Function(relay.ir_pass.free_vars(c), c)

        p02 = relay.var("p02", shape=dshape)
        p12 = relay.var("p12", shape=dshape)
        concat1 = relay.concatenate((p02, p12), axis=1)
        f_concat1 = relay.Function([p02, p12], concat1)

        dshape2 = (dshape[0], dshape[1]*2, dshape[2], dshape[3])

        p03 = relay.var("p03", shape=dshape2)
        c = conv(p03)
        f2 = relay.Function(relay.ir_pass.free_vars(c), c)

        p04 = relay.var("p04", shape=dshape2)
        c = conv(p04)
        f3 = relay.Function(relay.ir_pass.free_vars(c), c)

        p05 = relay.var("p05", shape=dshape)
        p15 = relay.var("p15", shape=dshape)
        concat2 = relay.concatenate((p05, p15), axis=1)
        f_concat2 = relay.Function([p05, p15], concat2)

        x = relay.var("x", shape=dshape)
        c1 = relay.Call(f0, [x, relay.var("w1")])
        c2 = relay.Call(f1, [x, relay.var("w2")])
        concat = relay.Call(f_concat1, [c1, c2])
        c3 = relay.Call(f2, [concat, relay.var("w3")])
        c4 = relay.Call(f3, [concat, relay.var("w4")])
        out = relay.Call(f_concat2, [c3, c4])

        return relay.Function(relay.ir_pass.free_vars(out), out)

    dshape = (1, 16, 64, 64)
    z = before(dshape)
    z = relay.ir_pass.infer_type(z)
    zz = relay.ir_pass.fuse_ops(z, opt_level=0)
    assert not relay.ir_pass.free_vars(zz)
    zz = relay.ir_pass.fuse_ops(z, opt_level=2)
    relay.build(zz, 'llvm')
    zz = relay.ir_pass.infer_type(zz)
    assert not relay.ir_pass.free_vars(zz)
    after = relay.ir_pass.infer_type(expected(dshape))
    assert relay.ir_pass.alpha_equal(zz, after)
Ejemplo n.º 52
0
n, h, w, c = 1, 130, 130, 128
o, kc, kh, kw = 128, c, 3, 3

img = relay.var('x', relay.ty.TensorType((n, h, w, c), 'int8'))
knl = relay.var('w', relay.ty.TensorType((kh, kw, o // 16, c // 4, 16, 4), 'int8'))

conv2d_vnni = relay.op.nn.conv2d_vnni(img, knl, strides=1, padding=0)
func = relay.Function([img, knl], conv2d_vnni)

ops = n * (h - kh + 1) * (w - kw + 1) * o * kc * kh * kw / 64

import vnni
import numpy as np
module = tvm.IRModule.from_expr(func)
with tvm.build_config(add_lower_pass= [(1, vnni.vnni_transformation)]):
    graph, module, params = relay.build(func, target='llvm -mcpu=cascadelake')
    x_ = tvm.nd.array((np.random.randn(n, h, w, c) * 255).astype('int8'), ctx=tvm.cpu())
    w_ = tvm.nd.array((np.random.randn(kw, kh, o // 16, c // 4, 16, 4) * 255).astype('int8'),
                     ctx=tvm.cpu())
    y_ = tvm.nd.array((np.random.randn(n, h - kh + 1, w - kw + 1, o) * 255).astype('int32'),
                      ctx=tvm.cpu())

    module = module.time_evaluator(module.entry_name, tvm.cpu(), number=5)
    span = module(x_, w_, y_).mean
    print('Exec Time: ', span)
    print('%.2f GVNNI/s' % (ops / span / 1e9))

    #module = tvm.contrib.graph_runtime.create(graph, module, tvm.cpu())
    #module.set_input('x', x)
    #module.set_input('w', w)
    #module.run()