def test_gru_like():
    def unit(rnn_dim):
        X = relay.var("X", shape=(1, rnn_dim))
        W = relay.var("y", shape=(3 * rnn_dim, rnn_dim))
        matmul = relay.nn.dense(X, W)
        splitted = relay.split(matmul, indices_or_sections=3, axis=1)
        out = relay.sigmoid(splitted[0]) + relay.tanh(splitted[1]) * relay.exp(splitted[2])
        return relay.Function([X, W], out)

    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    def unit_numpy(X, W):
        prod = np.dot(X, W.transpose())
        splits = np.split(prod, indices_or_sections=3, axis=1)
        return sigmoid(splits[0]) + np.tanh(splits[1]) * np.exp(splits[2])

    dtype = "float32"
    rnn_dim = 1000
    x = np.random.rand(1, rnn_dim).astype(dtype)
    y = np.random.rand(3*rnn_dim, rnn_dim).astype(dtype) * 0.01 - 0.005
    out_shape = (1, rnn_dim)
    z = unit(rnn_dim)

    for target, ctx in ctx_list():
        with relay.build_config(opt_level=2):
            graph, lib, params = relay.build(z, target)
            m = graph_runtime.create(graph, lib, ctx)
            m.set_input("X", tvm.nd.array(x.astype(dtype)))
            m.set_input("y", tvm.nd.array(y.astype(dtype)))
            m.set_input(**params)
            m.run()
            out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
            ref = unit_numpy(x, y)
            tvm.testing.assert_allclose(out, ref, rtol=1e-5, atol=1e-5)
Exemple #2
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    net, params, data_shape, out_shape = get_network(model_name, batch_size)
    tasks = autotvm.task.extract_from_program(net, target=target,
                                              params=params, ops=(relay.op.nn.conv2d,))

    # run tuning tasks
    print("Tuning...")
    tune_kernels(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(
                net, target=target,  params=params)

        # upload parameters to device
        ctx = tvm.cpu()
        data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
        module = runtime.create(graph, lib, ctx)
        module.set_input('data', data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Exemple #3
0
def test_alter_layout_conv2d():
    """Additional layout transformations should occour on the graph.
    """

    def convnet():
        """Alternating layout of simple convnet (from image super-resolution).
        """
        bias1 = relay.var('bias1', shape=(64,))
        bias2 = relay.var('bias2', shape=(64,))
        bias3 = relay.var('bias3', shape=(64,))
        bias4 = relay.var('bias4', shape=(64,))
        weight1 = relay.var('weight1', shape=(64, 1, 5, 5))
        weight2 = relay.var('weight2', shape=(64, 64, 3, 3))
        weight3 = relay.var('weight3', shape=(64, 64, 3, 3))
        weight4 = relay.var('weight4', shape=(64, 64, 3, 3))
        data = relay.var("x", shape=(1, 1, 224, 224))
        n00 = relay.nn.conv2d(data, weight1, padding=[2, 2], kernel_size=[5, 5])
        n01 = relay.expand_dims(bias1, axis=1, num_newaxis=2)
        n02 = relay.add(n00, n01)
        n03 = relay.nn.relu(n02)
        n04 = relay.nn.conv2d(n03, weight2, padding=[1, 1], kernel_size=[3, 3])
        n05 = relay.expand_dims(bias2, axis=1, num_newaxis=2)
        n06 = relay.add(n04, n05)
        n07 = relay.nn.relu(n06)
        n08 = relay.nn.conv2d(n07, weight3, padding=[1, 1], kernel_size=[3, 3])
        n09 = relay.expand_dims(bias3, axis=1, num_newaxis=2)
        n10 = relay.add(n08, n09)
        n11 = relay.nn.relu(n10)
        n12 = relay.nn.conv2d(n11, weight4, padding=[1, 1], kernel_size=[3, 3])
        n13 = relay.expand_dims(bias4, axis=1, num_newaxis=2)
        n14 = relay.add(n12, n13)
        n15 = relay.reshape(n14, newshape=[1, 1, 3, 3, 224, 224])
        n16 = relay.transpose(n15, axes=[0, 1, 4, 2, 5, 3])
        net = relay.reshape(n16, newshape=[1, 1, 672, 672])
        args = relay.ir_pass.free_vars(net)
        return relay.Function(args, net)

    # orig net
    N = convnet()
    N = infer_type(N)

    # trigger a test
    # for each known alter_conv2d
    targets=['cuda',
             'opencl -device=mali',
             'opencl -device=intel_graphics',
             'llvm -device=arm_cpu',
             'llvm -device=core-avx-ii']

    for tgt in targets:
        with tvm.target.create(tgt) as target:
            with relay.build_config(opt_level=-1, add_pass='******'):
               with autotvm.tophub.context(target):
                   O = relay.optimize(N, target, params=None)
                   O = relay.ir_pass.infer_type(O)

                   # graph should differ
                   assert not relay.ir_pass.alpha_equal(N, O)
def test_compile_placeholder_bypass():
    engine = relay.backend.compile_engine.get()
    x = relay.var("x", shape=(2, 3))
    y = relay.var("y", shape=(2, 3))
    z = relay.var("z", shape=(2, 3))
    result = relay.Tuple([x, relay.op.concatenate([y, z], axis=0)])
    func = relay.Function(relay.ir_pass.free_vars(result), result)
    with relay.build_config(opt_level=0):
       graph, lib, params = relay.build(func, 'llvm')
Exemple #5
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    net, params, input_shape, _ = get_network(network, batch_size=1)
    tasks = autotvm.task.extract_from_program(net, target=target,
                                            params=params,
                                            ops=(relay.op.nn.conv2d,))

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(
                net, target=target, params=params)

        # export library
        tmp = tempdir()
        if use_android:
            from tvm.contrib import ndk
            filename = "net.so"
            lib.export_library(tmp.relpath(filename), ndk.create_shared)
        else:
            filename = "net.tar"
            lib.export_library(tmp.relpath(filename))

        # upload module to device
        print("Upload...")
        remote = autotvm.measure.request_remote(device_key, '0.0.0.0', 9190,
                                                timeout=10000)
        remote.upload(tmp.relpath(filename))
        rlib = remote.load_module(filename)

        # upload parameters to device
        ctx = remote.context(str(target), 0)
        module = runtime.create(graph, rlib, ctx)
        data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
        module.set_input('data', data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=10)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Exemple #6
0
def run_tvm_graph(tflite_model_buf, input_data, input_node, num_output=1, target='llvm',
                  out_names=None):
    """ Generic function to compile on relay and execute on tvm """
    try:
        import tflite.Model
    except ImportError:
        raise ImportError("The tflite package must be installed")

    # get TFLite model from buffer
    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)

    input_data = convert_to_list(input_data)
    input_node = convert_to_list(input_node)

    shape_dict = {}
    dtype_dict = {}
    for i, e in enumerate(input_node):
        shape_dict[e] = input_data[i].shape
        dtype_dict[e] = input_data[i].dtype.name

    func, params = relay.frontend.from_tflite(tflite_model,
                                              shape_dict=shape_dict,
                                              dtype_dict=dtype_dict)
    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(func, target, params=params)

    ctx = tvm.context(target, 0)
    from tvm.contrib import graph_runtime
    m = graph_runtime.create(graph, lib, ctx)
    # set inputs
    for i, e in enumerate(input_node):
        m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))

    m.set_input(**params)
    # execute
    m.run()
    # get outputs
    assert out_names is None or num_output == len(out_names), "out_names: {} num_output: {}".format(
        out_names, num_output)
    tvm_output_list = []
    for i in range(0, num_output):
        tvm_output = m.get_output(i)
        tvm_output_list.append(tvm_output.asnumpy())
    return tvm_output_list
Exemple #7
0
 def get_tvm_output(symbol, x, args, auxs, target, ctx, dtype='float32'):
     shape_dict = {"data": x.shape}
     if gluon_impl:
         new_sym, params = relay.frontend.from_mxnet(symbol, shape_dict)
     else:
         new_sym, params = relay.frontend.from_mxnet(symbol,
                                                     shape_dict,
                                                     arg_params=args,
                                                     aux_params=auxs)
     with relay.build_config(opt_level=3):
         graph, lib, params = relay.build(new_sym, target, params=params)
     m = graph_runtime.create(graph, lib, ctx)
     # set inputs
     m.set_input("data", tvm.nd.array(x.astype(dtype)))
     m.set_input(**params)
     m.run()
     # get outputs
     out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
     return out.asnumpy()
Exemple #8
0
def get_tvm_output(graph_def, input_data, target, ctx, output_shape=None, output_dtype='float32'):
    """ Generic function to execute and get tvm output"""
    target = 'llvm'
    if isinstance(input_data, list):
        input_names = {}
        shape_dict = {}
        dtype_dict = {}
        for i, _ in enumerate(input_data):
            input_names[i] = graph_def.graph.input[i].name
            shape_dict[input_names[i]] = input_data[i].shape
            dtype_dict[input_names[i]] = input_data[i].dtype
    else:
        input_names = graph_def.graph.input[0].name
        shape_dict = {input_names: input_data.shape}
        dtype_dict = {input_names: input_data.dtype}

    sym, params = relay.frontend.from_onnx(graph_def, shape_dict)
    with relay.build_config(opt_level=1):
        graph, lib, params = relay.build(sym, target, params=params)

    ctx = tvm.cpu(0)
    from tvm.contrib import graph_runtime
    m = graph_runtime.create(graph, lib, ctx)
    # set inputs
    if isinstance(input_data, list):
        for i, e in enumerate(input_names):
            m.set_input(input_names[i], tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
    else:
        m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))

    m.set_input(**params)
    # execute
    m.run()
    # get outputs
    if isinstance(output_shape, list) and isinstance(output_dtype, list):
        tvm_output_list = []
        for i, _ in enumerate(output_shape):
            tvm_output = m.get_output(i)
            tvm_output_list.append(tvm_output.asnumpy())
        return tvm_output_list
    else:
        tvm_output = m.get_output(0)
        return tvm_output.asnumpy()
Exemple #9
0
    def get_tvm_output(net, data, params, target, ctx, dtype='float32'):
        with relay.build_config(opt_level=1):
            graph, lib, params = relay.build(net, target, params=params)

        m = graph_runtime.create(graph, lib, ctx)
        # set inputs
        m.set_input("data", data)
        m.set_input(**params)
        m.run()
        out = m.get_output(0, tvm.nd.empty(out_shape, dtype))

        if measure:
            print("Evaluate graph runtime inference time cost...")
            ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=20)
            # Measure in millisecond.
            prof_res = np.array(ftimer().results) * 1000
            print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
                  (np.mean(prof_res), np.std(prof_res)))

        return out.asnumpy()
Exemple #10
0
 def test_runtime(target, device, func, fallback_device=None,
                  expected_index=None):
     params = {"x": x_data, "y": y_data}
     config = {"opt_level": 1}
     if fallback_device:
         config["fallback_device"] = fallback_device
     with relay.build_config(**config):
         graph, lib, params = relay.build(
             func,
             target,
             params=params)
         contexts = [tvm.cpu(0), tvm.context(device)]
         graph_json = json.loads(graph)
         if "device_index" in graph_json["attrs"]:
             device_index = graph_json["attrs"]["device_index"][1]
             assert device_index == expected_index
         mod = graph_runtime.create(graph, lib, contexts)
         mod.set_input(**params)
         mod.run()
         res = mod.get_output(0).asnumpy()
         tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
Exemple #11
0
def build_module(opts):
    dshape = (1, 3, 224, 224)
    from mxnet.gluon.model_zoo.vision import get_model
    block = get_model('mobilenet0.25', pretrained=True)
    shape_dict = {'data': dshape}
    mod, params = relay.frontend.from_mxnet(block, shape_dict)
    func = mod["main"]
    func = relay.Function(func.params, relay.nn.softmax(func.body), None,
                          func.type_params, func.attrs)

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(func,
                                         'llvm --system-lib',
                                         params=params)

    build_dir = os.path.abspath(opts.out_dir)
    if not os.path.isdir(build_dir):
        os.makedirs(build_dir)

    lib.save(os.path.join(build_dir, 'model.o'))
    with open(os.path.join(build_dir, 'graph.json'), 'w') as f_graph_json:
        f_graph_json.write(graph)
    with open(os.path.join(build_dir, 'params.bin'), 'wb') as f_params:
        f_params.write(relay.save_param_dict(params))
def main():
    dshape = (1, 28, 28)
    net, params = relay.testing.mlp.get_workload(batch_size=dshape[0],
                                                 dtype='float32')

    dshape = (1, 3, 224, 224)
    net, params = relay.testing.resnet.get_workload(layers=18,
                                                    batch_size=dshape[0],
                                                    image_shape=dshape[1:])

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(net,
                                         'llvm --system-lib',
                                         params=params)

    build_dir = osp.abspath(sys.argv[1])
    if not osp.isdir(build_dir):
        os.makedirs(build_dir, exist_ok=True)

    lib.save(osp.join(build_dir, 'model.o'))
    with open(osp.join(build_dir, 'graph.json'), 'w') as f_graph_json:
        f_graph_json.write(graph)
        with open(osp.join(build_dir, 'params.bin'), 'wb') as f_params:
            f_params.write(relay.save_param_dict(params))
Exemple #13
0
    def run_test_conv2d_cuda(dtype, out_dtype, scale, dshape, kshape,
                             padding=(1, 1),
                             groups=1,
                             dilation=(1, 1),
                             **attrs):

        x = relay.var("x", shape=dshape, dtype=dtype)
        w = relay.var("w", shape=kshape, dtype=dtype)
        y = relay.nn.conv2d(x, w,
                            padding=padding,
                            dilation=dilation,
                            groups=groups,
                            **attrs)
        func = relay.Function([x, w], y)
        mod = relay.Module()
        mod['main'] = func
        mod = relay.transform.InferType()(mod)

        data = np.random.uniform(-scale, scale, size=dshape).astype(dtype)
        kernel = np.random.uniform(-scale, scale, size=kshape).astype(dtype)
        ref_res = topi.testing.conv2d_nchw_python(
            data.astype(out_dtype), kernel.astype(out_dtype), 1, padding,
            groups=groups)

        with WinogradFallback(), relay.build_config(opt_level=3):
            for target, ctx in ctx_list():
                if target != 'cuda':
                    continue
                params = {'w': tvm.nd.array(kernel)}
                graph, lib, params = relay.build_module.build(mod, target=target, params=params)
                module = tvm.contrib.graph_runtime.create(graph, lib, ctx)
                module.set_input('x', tvm.nd.array(data))
                module.set_input(**params)
                module.run()
                op_res1 = module.get_output(0)
                tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-3, atol=1e-3)
Exemple #14
0
def test_gru_like():
    def unit(rnn_dim):
        X = relay.var("X", shape=(1, rnn_dim))
        W = relay.var("y", shape=(3 * rnn_dim, rnn_dim))
        matmul = relay.nn.dense(X, W)
        splitted = relay.split(matmul, indices_or_sections=3, axis=1)
        out = relay.sigmoid(
            splitted[0]) + relay.tanh(splitted[1]) * relay.exp(splitted[2])
        return relay.Function([X, W], out)

    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    def unit_numpy(X, W):
        prod = np.dot(X, W.transpose())
        splits = np.split(prod, indices_or_sections=3, axis=1)
        return sigmoid(splits[0]) + np.tanh(splits[1]) * np.exp(splits[2])

    dtype = "float32"
    rnn_dim = 1000
    x = np.random.rand(1, rnn_dim).astype(dtype)
    y = np.random.rand(3 * rnn_dim, rnn_dim).astype(dtype) * 0.01 - 0.005
    out_shape = (1, rnn_dim)
    z = unit(rnn_dim)

    for target, ctx in ctx_list():
        with relay.build_config(opt_level=2):
            graph, lib, params = relay.build(z, target)
            m = graph_runtime.create(graph, lib, ctx)
            m.set_input("X", tvm.nd.array(x.astype(dtype)))
            m.set_input("y", tvm.nd.array(y.astype(dtype)))
            m.set_input(**params)
            m.run()
            out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
            ref = unit_numpy(x, y)
            tvm.testing.assert_allclose(out, ref, rtol=1e-5, atol=1e-5)
Exemple #15
0
def test_cpu():
    mod, params = relay.testing.synthetic.get_workload()
    with relay.build_config(opt_level=3):
        complied_graph_lib = relay.build_module.build(mod,
                                                      "llvm",
                                                      params=params)
    data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
    # raw api
    dev = tvm.cpu()
    gmod = complied_graph_lib["default"](dev)
    set_input = gmod["set_input"]
    run = gmod["run"]
    get_output = gmod["get_output"]
    set_input("data", tvm.nd.array(data))
    run()
    out = get_output(0).numpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

    # graph executor wrapper
    gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev))
    gmod.set_input("data", data)
    gmod.run()
    out = gmod.get_output(0).numpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def test_model():
    """Test a program which uses the graph runtime."""
    if not tvm.runtime.enabled("micro_dev"):
        print("not enable micro_dev")
        return

    import tflite
    model_path = "super_resolution.onnx"
    # now you have super_resolution.onnx on disk
    onnx_model = onnx.load(model_path)

    from PIL import Image

    img_path = "cat.png"

    input_name = "conv2d_input"
    shape_dict = {input_name: x.shape}
    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

    with micro.Session(DEV_RISCV):
        ctx = tvm.micro_dev(0)

        disable_vectorize = tvm.target.build_config(disable_vectorize=True)
        disable_fusion = relay.build_config(disabled_pass={'FuseOps'})
        with disable_vectorize:
            graph, c_mod, params = relay.build(mod,
                                               target=TARGET,
                                               params=params)
        print("I Find the wrong")
        micro_mod = micro.create_micro_mod(c_mod, DEV_RISCV)
        mod = graph_runtime.create(graph, micro_mod, ctx)
        mod.set_input(**params)
        mod.set_input(input_name, tvm.nd.array(x))

        tvm_output = mod.get_output(0).asnumpy()
        print("result is: " + str(tvm_output))
Exemple #17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-o', '--out-dir', default='.')
    opts = parser.parse_args()

    dshape = (1, 3, 224, 224)
    net, params = relay.testing.resnet.get_workload(layers=18,
                                                    batch_size=dshape[0],
                                                    image_shape=dshape[1:])

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(net,
                                         'llvm --system-lib',
                                         params=params)

    build_dir = osp.abspath(opts.out_dir)
    if not osp.isdir(build_dir):
        os.makedirs(build_dir, exist_ok=True)

    lib.save(osp.join(build_dir, 'model.bc'))
    with open(osp.join(build_dir, 'graph.json'), 'w') as f_graph_json:
        f_graph_json.write(graph)
        with open(osp.join(build_dir, 'params.bin'), 'wb') as f_params:
            f_params.write(relay.save_param_dict(params))
Exemple #18
0
def tune_and_evaluate(tuning_opt, number, tune=True):
    op, params, data_shape = get_workload(batch_size, image_shape, out_channel,
                                          kernel_size, strides, padding)
    tasks = autotvm.task.extract_from_program(op,
                                              target=target,
                                              params=params,
                                              ops=(relay.op.nn.conv2d, ))
    log_file = tuning_opt["log_filename"]
    if tune:
        print("Tuning...")
        tune_kernels(tasks, **tuning_opt)

    if not os.path.exists(log_file):
        raise RuntimeError("the log file {} doesn't exists".format(log_file))
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(op,
                                                          target=target,
                                                          params=params)

        ctx = tvm.device(str(target), 0)
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=data_shape)).astype(dtype))
        module = runtime.create(graph, lib, ctx)
        module.set_input("data", data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run",
                                              ctx,
                                              number=1,
                                              repeat=number)
        prof_res = np.array(ftimer().results) * 1e3
        print("Time cost is: ", np.mean(prof_res))
Exemple #19
0
def run_inf(mod,img_path,b_size=1):
	with autotvm.apply_history_best(log_file):
		print("Compile...")
		with relay.build_config(opt_level=3):
			graph, lib, params = relay.build_module.build(mod, target=tvm.target.cuda())

		ctx = tvm.context(str(tvm.target.cuda()), 0)
		module = runtime.create(graph, lib, ctx)
		normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
		val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(img_path, transforms.Compose([
							transforms.Resize(256),transforms.CenterCrop(224),transforms.ToTensor(),normalize,])),
							batch_size=b_size, shuffle=False,num_workers=1, pin_memory=True)
		total = 0
		top1 = 0
		start = time.time()
		total_time = 0
		for i, (batch,target) in enumerate(val_loader):
			data = batch.cpu().numpy()
			total = i
			module.set_input('input0', data)
			module.set_input(**params)
			module.run()
			prediction = module.get_output(0)
			if np.argmax(prediction.asnumpy()[0]) == target.cpu().numpy()[0] :
				top1 = top1+1
				print(top1)
			#if i > 9:  # only run inference on a few samples in this tutorial
			#	break
		end = time.time()
		ftimer = module.module.time_evaluator('run',ctx,1,1000)
		prof_res = np.array(ftimer().results) * 1000 

		print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
		print('total time for 9 images:{}(sec)'.format(end-start))
		print('total :{} top1 : {} accu: {}'.format(total,top1,top1/float(total)))
def tune_and_evaluate(tuning_opt):
    print("Extract tasks...")
    global net, params, input_shape
    tasks = autotvm.task.extract_from_program(
        net,
        target=target,
        params=params,
        ops=(relay.op.nn.conv2d, relay.op.nn.dense,
             relay.op.nn.bitserial_conv2d, relay.op.nn.bitserial_dense))

    # Run tuning tasks.
    print("Tuning...")
    tune_kernels(tasks, **tuning_opt)

    # compile kernels with historgy best records.
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with relay.build_config(opt_level=2):
            graph, lib, params = relay.build_module.build(net,
                                                          target=target,
                                                          params=params)

        # Upload parameters to device.
        ctx = tvm.cpu()
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=input_shape)).astype('float32'))
        module = runtime.create(graph, lib, ctx)
        module.set_input('input_1', data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=10, repeat=1)
        prof_res = np.array(ftimer().results) * 1000  # Convert to milliseconds
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
def test_tflite_output_multiplier_greater_than_one():
    # uint8 input
    data_shape = (2, 1, 2, 4)
    data_dtype = 'uint8'
    kernel_shape = (3, 1, 2, 2)
    kernel_dtype = 'uint8'
    ref_func, qnn_func = get_funcs(data_shape=data_shape,
                                   data_dtype=data_dtype,
                                   kernel_shape=kernel_shape,
                                   kernel_dtype=kernel_dtype,
                                   input_zero_point=128,
                                   kernel_zero_point=128,
                                   kernel_size=(2, 2),
                                   padding=(0, 0),
                                   strides=(2, 2),
                                   dilation=(1, 1),
                                   data_layout="NCHW",
                                   kernel_layout="OIHW",
                                   out_dtype="int32")
    golden_data = 128 + np.array((1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 3, 4, 1, 2, 3,
                                  4)).reshape(data_shape).astype('uint8')
    golden_weight = 128 + np.array(
        (1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1)).reshape(kernel_shape)
    golden_weight = golden_weight.astype('uint8')

    with relay.build_config(opt_level=2):
        params = {'kernel': golden_weight}
        graph, lib, params = relay.build(qnn_func, "llvm", params=params)
        mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
        mod.set_input("data", golden_data)
        mod.set_input(**params)
        mod.run()
        qnn_output = mod.get_output(0).asnumpy()
    golden_output = np.array(
        (17, 17, 0, 0, 2, 2, 16, 36, 2, 2, 0, 0)).reshape(2, 3, 1, 2)
    np.testing.assert_equal(qnn_output, golden_output)
Exemple #22
0
def benchmark(network, target):
    net, params, input_shape, output_shape = get_network(network, batch_size=1)

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(net, target=target, params=params)

    # create runtime
    ctx = tvm.context(str(target), 0)
    module = runtime.create(graph, lib, ctx)
    data_tvm = tvm.nd.array(
        (np.random.uniform(size=input_shape)).astype(dtype))
    module.set_input('data', data_tvm)
    module.set_input(**params)

    # evaluate
    ftimer = module.module.time_evaluator("run",
                                          ctx,
                                          number=1,
                                          repeat=args.repeat)
    prof_res = np.array(
        ftimer().results) * 1000  # multiply 1000 for converting to millisecond
    print(
        "%-20s %-19s (%s)" %
        (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))
    def verify_multi_c_mod_export():
        from shutil import which
        if which("gcc") is None:
            print("Skip test because gcc is not available.")

        for device in ["llvm"]:
            if not tvm.module.enabled(device):
                print("skip because %s is not enabled..." % device)
                return

        resnet18_mod, resnet18_params = relay.testing.resnet.get_workload(
            num_layers=18)
        with relay.build_config(opt_level=3):
            _, resnet18_cpu_lib, _ = relay.build_module.build(
                resnet18_mod, "llvm", params=resnet18_params)

        A = tvm.placeholder((1024, ), name='A')
        B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
        s = tvm.create_schedule(B.op)
        f = tvm.build(s, [A, B], "c", name="myadd")
        engine_module = generate_engine_module()
        from tvm.contrib import util
        temp = util.tempdir()
        file_name = "deploy_lib.so"
        path_lib = temp.relpath(file_name)
        resnet18_cpu_lib.import_module(f)
        resnet18_cpu_lib.import_module(engine_module)
        kwargs = {
            "options":
            ["-O2", "-std=c++11", "-I" + header_file_dir_path.relpath("")]
        }
        resnet18_cpu_lib.export_library(path_lib, fcompile=False, **kwargs)
        loaded_lib = tvm.module.load(path_lib)
        assert loaded_lib.type_key == "library"
        assert loaded_lib.imported_modules[0].type_key == "library"
        assert loaded_lib.imported_modules[1].type_key == "library"
    def partition():
        data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32"))
        weight = relay.var("weight", relay.TensorType((16, 3, 3, 3), "float32"))
        bn_gamma = relay.var("bn_gamma", relay.TensorType((16, ), "float32"))
        bn_beta = relay.var("bn_beta", relay.TensorType((16, ), "float32"))
        bn_mmean = relay.var("bn_mean", relay.TensorType((16, ), "float32"))
        bn_mvar = relay.var("bn_var", relay.TensorType((16, ), "float32"))

        conv = relay.nn.conv2d(
            data=data,
            weight=weight,
            kernel_size=(3, 3),
            channels=16,
            padding=(1, 1))
        bn_output = relay.nn.batch_norm(conv, bn_gamma, bn_beta, bn_mmean,
                                        bn_mvar)

        func = relay.Function([data, weight, bn_gamma, bn_beta, bn_mmean,
                               bn_mvar], bn_output.astuple())
        mod = tvm.IRModule()
        mod["main"] = func
        op_list = ["nn.batch_norm", "nn.conv2d"]
        mod = WhiteListAnnotator(op_list, "test_compiler")(mod)

        opt_pass = tvm.transform.Sequential([
            transform.InferType(),
            transform.PartitionGraph(),
            transform.SimplifyInference(),
            transform.FoldConstant(),
            transform.AlterOpLayout(),
        ])

        with relay.build_config(opt_level=3):
            mod = opt_pass(mod)

        return mod
def test_tflite_anistropic_strides():
    # uint8 input
    data_shape = (1, 1, 3, 6)
    data_dtype = 'uint8'
    kernel_shape = (1, 1, 2, 2)
    kernel_dtype = 'uint8'
    ref_func, qnn_func = get_funcs(data_shape=data_shape,
                                   data_dtype=data_dtype,
                                   kernel_shape=kernel_shape,
                                   kernel_dtype=kernel_dtype,
                                   input_zero_point=127,
                                   kernel_zero_point=127,
                                   kernel_size=(2, 2),
                                   padding=(0, 0),
                                   strides=(1, 3),
                                   dilation=(1, 1),
                                   data_layout="NCHW",
                                   kernel_layout="OIHW",
                                   out_dtype="int32")
    golden_data = np.array(
        (133, 131, 129, 125, 123, 121, 135, 133, 131, 123, 121, 119, 137, 135,
         133, 121, 119, 117)).reshape(data_shape)
    golden_data = golden_data.astype('uint8')
    golden_weight = np.array((129, 131, 133, 135)).reshape(kernel_shape)
    golden_weight = golden_weight.astype('uint8')

    with relay.build_config(opt_level=2):
        params = {'kernel': golden_weight}
        graph, lib, params = relay.build(qnn_func, "llvm", params=params)
        mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
        mod.set_input("data", golden_data)
        mod.set_input(**params)
        mod.run()
        qnn_output = mod.get_output(0).asnumpy()
    golden_output = np.array((124, -92, 164, -132)).reshape(1, 1, 2, 2)
    np.testing.assert_equal(qnn_output, golden_output)
Exemple #26
0
def test_dynamic_dequantize():
    x = relay.var("x", shape=(1, 2, 3, 4), dtype="int8")
    scale_var = relay.var("scale", shape=(), dtype="float32")
    zp_var = relay.var("zp", shape=(), dtype="int32")

    deq_x = relay.qnn.op.dequantize(x, scale_var * scale_var, zp_var + zp_var)
    tt = run_infer_type(deq_x)

    assert tt.checked_type == relay.TensorType((1, 2, 3, 4), "float32")
    func = relay.Function([x, scale_var, zp_var], deq_x)
    data = np.random.uniform(size=(1, 2, 3, 4)).astype("int8")
    scale = np.array(1).astype("float32")
    zp = np.array(0).astype("int32")

    mod = tvm.ir.IRModule.from_expr(func)

    for target, dev in tvm.testing.enabled_targets():
        # TODO: (electriclilies) enable AlterOpLayout when it is fixed
        with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
            lib = relay.build(mod, target=target)

    module = graph_runtime.GraphModule(lib["default"](dev))
    module.set_input(**{"x": data, "scale": scale, "zp": zp})
    module.run()
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, data_shape, out_shape = get_network(model_name, batch_size)
    tasks = autotvm.task.extract_from_program(mod["main"],
                                              target=target,
                                              params=params,
                                              ops=(relay.op.nn.conv2d, ))

    # run tuning tasks
    print("Tuning...")
    tune_kernels(tasks, **tuning_opt)
    tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file)

    # compile kernels with graph-level best records
    with autotvm.apply_graph_best(graph_opt_sch_file):
        print("Compile...")
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(mod,
                                                          target=target,
                                                          params=params)

        # upload parameters to device
        ctx = tvm.cpu()
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=data_shape)).astype(dtype))
        module = runtime.create(graph, lib, ctx)
        module.set_input(input_name, data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    net, params, input_shape, out_shape = get_network(network, batch_size=1)
    tasks = autotvm.task.extract_from_program(net, target=target,
                                            params=params, ops=(relay.op.nn.conv2d,))

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(
                net, target=target, params=params)

        # export library
        tmp = tempdir()
        filename = "net.tar"
        lib.export_library(tmp.relpath(filename))

        # load parameters
        ctx = tvm.device(str(target), 0)
        module = runtime.create(graph, lib, ctx)
        data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
        module.set_input('data', data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=600)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
def test_broadcast_layout():
    with TempOpAttr("qnn.conv2d", "FTVMQnnLegalize", legalize_qnn_conv2d):

        # Test broadcast support for NHWC layout.
        data_shape = (1, 229, 229, 3) # NHWC
        data_dtype = 'uint8'
        kernel_shape = (7, 7, 3, 64) # HWIO
        kernel_dtype = 'int8'
        _, qnn_func = get_funcs(data_shape=data_shape,
                                data_dtype=data_dtype,
                                kernel_shape=kernel_shape,
                                kernel_dtype=kernel_dtype,
                                input_zero_point=8,
                                kernel_zero_point=3,
                                input_scale=1.0,
                                kernel_scale=1.0,
                                kernel_size=(7, 7),
                                padding=(1, 1),
                                strides=(1, 1),
                                dilation=(1, 1),
                                data_layout="NHWC",
                                kernel_layout="HWIO",
                                out_dtype="int32")
        func = qnn_func['main'].body
        bias = relay.var("bias", shape=(64,), dtype="int32")
        bias2 = relay.var("bias2", shape=(1, 225, 225, 1), dtype="int32")

        # Check broadcast support on both lhs and rhs
        func = relay.add(func, bias2)
        func = relay.add(bias2, func)
        func = relay.add(bias, func)
        func = relay.add(func, bias)
        func = relay.Function(relay.analysis.free_vars(func), func)
        mod = tvm.IRModule.from_expr(func)
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build(mod, "llvm -mcpu=skylake-avx512")
def test_tflite_large_irregular():
    with TempOpAttr("qnn.conv2d", "FTVMQnnLegalize", legalize_qnn_conv2d):

        # uint8 input
        data_shape = (1, 1024, 1, 1)
        data_dtype = 'uint8'
        kernel_shape = (1001, 1024, 1, 1)
        kernel_dtype = 'uint8'
        ref_func, qnn_func = get_funcs(data_shape=data_shape,
                                       data_dtype=data_dtype,
                                       kernel_shape=kernel_shape,
                                       kernel_dtype=kernel_dtype,
                                       input_zero_point=127,
                                       kernel_zero_point=127,
                                       input_scale=1.0,
                                       kernel_scale=1.0,
                                       kernel_size=(1, 1),
                                       padding=(0, 0),
                                       strides=(1, 1),
                                       dilation=(1, 1),
                                       data_layout="NCHW",
                                       kernel_layout="OIHW",
                                       out_dtype="int32")
        golden_data = np.full(data_shape, 127).astype('uint8')
        golden_weight = np.full(kernel_shape, 127).astype('uint8')

        with relay.build_config(opt_level=2):
            params = {'kernel': golden_weight}
            graph, lib, params = relay.build(qnn_func, "llvm", params=params)
            mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
            mod.set_input("data", golden_data)
            mod.set_input(**params)
            mod.run()
            qnn_output = mod.get_output(0).asnumpy()
        golden_output = np.full((1, 1001, 1, 1), 0).astype('uint8')
        np.testing.assert_equal(qnn_output, golden_output)
 def quantize_test_driver(in_dtype, quant_args, out_dtype, in_data,
                          verify_output_data):
     shape = in_data.shape
     input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
     output_zero_point = quant_args['out_zero_point']
     output_scale = quant_args['out_scale']
     quantized_output = relay.qnn.op.quantize(
         input_data,
         output_scale=output_scale,
         output_zero_point=output_zero_point,
         out_dtype=out_dtype)
     mod = relay.Function(relay.analysis.free_vars(quantized_output),
                          quantized_output)
     mod = relay.Module.from_expr(mod)
     mod = relay.qnn.transform.CanonicalizeOps()(mod)
     with relay.build_config(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
         rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
         rt_mod.set_input(input_data=in_data)
         rt_mod.set_input(**params)
         rt_mod.run()
         res = rt_mod.get_output(0).asnumpy()
         np.testing.assert_equal(res, verify_output_data)
         assert res.dtype == out_dtype
Exemple #32
0
def tune_and_evaluate(tuning_opt):

    if env.TARGET != "sim":
        # Get remote from fleet node
        remote = autotvm.measure.request_remote(env.TARGET,
                                                tracker_host,
                                                tracker_port,
                                                timeout=10000)
        # Reconfigure the JIT runtime and FPGA.
        vta.reconfig_runtime(remote)
        vta.program_fpga(remote, bitstream=None)
    else:
        # In simulation mode, host the RPC server locally.
        remote = rpc.LocalSession()

    # Register VTA tuning tasks
    register_vta_tuning_tasks()

    # Perform task extraction on Relay program
    print("Extract tasks...")
    relay_prog, params = compile_network(env, target, network, start_pack,
                                         stop_pack)
    mod = tvm.IRModule.from_expr(relay_prog)
    tasks = autotvm.task.extract_from_program(mod,
                                              params=params,
                                              ops=(tvm.relay.op.nn.conv2d, ),
                                              target=target,
                                              target_host=env.target_host)

    # We should have extracted 10 convolution tasks
    assert len(tasks) == 10
    print("Extracted {} conv2d tasks:".format(len(tasks)))
    for tsk in tasks:
        inp = tsk.args[0][1]
        wgt = tsk.args[1][1]
        batch = inp[0] * inp[4]
        in_filter = inp[1] * inp[5]
        out_filter = wgt[0] * wgt[4]
        height, width = inp[2], inp[3]
        hkernel, wkernel = wgt[2], wgt[3]
        hstride, wstride = tsk.args[2][0], tsk.args[2][1]
        hpad, wpad = tsk.args[3][0], tsk.args[3][1]
        print("({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})".format(
            batch, height, width, in_filter, out_filter, hkernel, wkernel,
            hpad, wpad, hstride, wstride))

    # We do not run the tuning in our webpage server since it takes too long.
    # Comment the following line to run it by yourself.
    return

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.tophub.context(target, extra_files=[log_file]):
        # Compile network
        print("Compile...")
        with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            if target.device_name != "vta":
                graph, lib, params = relay.build(relay_prog,
                                                 target=target,
                                                 params=params,
                                                 target_host=env.target_host)
            else:
                with vta.build_config():
                    graph, lib, params = relay.build(
                        relay_prog,
                        target=target,
                        params=params,
                        target_host=env.target_host)

        # Export library
        print("Upload...")
        temp = util.tempdir()
        lib.save(temp.relpath("graphlib.o"))
        remote.upload(temp.relpath("graphlib.o"))
        lib = remote.load_module("graphlib.o")

        # Generate the graph runtime
        ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
        m = graph_runtime.create(graph, lib, ctx)

        # upload parameters to device
        image = tvm.nd.array(
            (np.random.uniform(size=(1, 3, 224, 224))).astype('float32'))
        m.set_input(**params)
        m.set_input('data', image)

        # evaluate
        print("Evaluate inference time cost...")
        timer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
        tcost = timer()
        prof_res = np.array(tcost.results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Exemple #33
0
        'n_trial': 1e9,
        'early_stopping': None,
        'measure_option': autotvm.measure_option(
                builder=autotvm.LocalBuilder(build_func=vta.vta_autotvm_build_func),
                runner=autotvm.RPCRunner(env.TARGET, tracker_host, tracker_port,
                    number=4, min_repeat_ms=150, repeat=opt.measurements, timeout=60,
                    check_correctness=True))
    }
    tune_tasks(tasks, **tuning_opt)

    # Compile kernels with history best records
    with autotvm.tophub.context(target, extra_files=[opt.log_filename]): 

        # Compile network
        print("Compiling network with best tuning parameters...")
        with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            if target.device_name != "vta":
                graph, lib, params = relay.build(
                    relay_prog, target=target,
                    params=params, target_host=env.target_host)
            else:
                with vta.build_config():
                    graph, lib, params = relay.build(
                        relay_prog, target=target,
                        params=params, target_host=env.target_host)

        # Export library
        temp = util.tempdir()
        lib.save(temp.relpath("graphlib.o"))
        remote.upload(temp.relpath("graphlib.o"))
        lib = remote.load_module("graphlib.o")
def compile(target):
    net, params = relay.frontend.from_mxnet(block, {"data": dshape})
    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(net, target, params=params)
    return graph, lib, params
Exemple #35
0
def compile(info):
    if info['model_path'].endswith('.onnx'):
        is_onnx = True
    elif info['model_path'].endswith('.pb'):
        is_onnx = False
    else:
        raise Exception('Model file format not supported')

    # Load model
    if is_onnx:
        onnx_model = onnx.load(info['model_path'])
        mod, params = relay.frontend.from_onnx(onnx_model, info['input_dict'])
        optimization_level = 3
    else:
        with tf.compat.v1.Session() as sess:
            with tf.io.gfile.GFile(info['model_path'], 'rb') as f:
                graph_def = tf.compat.v1.GraphDef()
                graph_def.ParseFromString(f.read())
                tf.import_graph_def(graph_def, name='')
                graph_def = sess.graph.as_graph_def()
                graph_def = tf_testing.ProcessGraphDefParam(graph_def)

        input_shape_dict = {'DecodeJpeg/contents': info['input_list']}
        mod, params = relay.frontend.from_tensorflow(
            graph_def, shape=input_shape_dict, outputs=info['output_names'])
        optimization_level = 2

    # Set compilation params
    target = 'llvm'
    if info['cross_compile']:
        target += ' -target=aarch64-linux-gnu'

    # Compile model
    # Note opt_level cannot be higher than 2 because of a bug:
    # https://discuss.tvm.ai/t/tvm-0-6-1-compile-yolo-v2-tiny-fail-worked-in-v0-5-2/7244
    with relay.build_config(opt_level=optimization_level):
        graph, lib, params = relay.build(mod, target=target, params=params)

    # Write the compiled model to files
    output_model_path = path.join(info['output_path'],
                                  OUTPUT_NETWORK_MODULE_FILENAME)
    output_graph_path = path.join(info['output_path'],
                                  OUTPUT_NETWORK_GRAPH_FILENAME)
    output_param_path = path.join(info['output_path'],
                                  OUTPUT_NETWORK_PARAM_FILENAME)

    print('Writing library to', output_model_path)
    if info['cross_compile']:
        lib.export_library(
            output_model_path,
            cc.build_create_shared_func(options=[
                '--target=aarch64-linux-gnu', '-march=armv8-a', '-mfpu=NEON'
            ],
                                        compile_cmd='/usr/bin/clang'))
    else:
        lib.export_library(output_model_path)

    print('Writing graph to', output_graph_path)
    with open(output_graph_path, 'w') as graph_file:
        graph_file.write(graph)

    print('Writing weights to', output_param_path)
    with open(output_param_path, 'wb') as param_file:
        param_file.write(relay.save_param_dict(params))
Exemple #36
0
def export_tvm(path,
               block,
               data_shape,
               epoch=0,
               preprocess=True,
               layout='HWC',
               ctx=mx.cpu(),
               target='llvm',
               opt_level=3,
               use_autotvm=False):
    """Helper function to export a HybridBlock to TVM executable. Note that tvm package needs
    to be installed(https://tvm.ai/).

    Parameters
    ----------
    path : str
        Path to save model.
        Three files path_deploy_lib.tar, path_deploy_graph.json and path_deploy_xxxx.params
        will be created, where xxxx is the 4 digits epoch number.
    block : mxnet.gluon.HybridBlock
        The hybridizable block. Note that normal gluon.Block is not supported.
    data_shape : tuple of int, required
        Unlike `export_block`, `data_shape` is required here for the purpose of optimization.
        If dynamic shape is required, you can use the shape that most fits the inference tasks,
        but the optimization won't accommodate all situations.
    epoch : int
        Epoch number of saved model.
    preprocess : mxnet.gluon.HybridBlock, default is True.
        Preprocess block prior to the network.
        By default (True), it will subtract mean [123.675, 116.28, 103.53], divide
        std [58.395, 57.12, 57.375], and convert original image (B, H, W, C and range [0, 255]) to
        tensor (B, C, H, W) as network input. This is the default preprocess behavior of all GluonCV
        pre-trained models.
        You can use custom pre-process hybrid block or disable by set ``preprocess=None``.
    layout : str, default is 'HWC'
        The layout for raw input data. By default is HWC. Supports 'HWC' and 'CHW'.
        Note that image channel order is always RGB.
    ctx: mx.Context, default mx.cpu()
        Network context.
    target : str, default is 'llvm'
        Runtime type for code generation, can be ('llvm', 'cuda', 'opencl', 'metal'...)
    opt_level : int, default is 3
        TVM optimization level, if supported, higher `opt_level` may generate more efficient
        runtime library, however, some operator may not support high level optimization, which will
        fallback to lower `opt_level`.
    use_autotvm : bool, default is False
        Use autotvm for performance tuning. Note that this can take very long time, since it's a
        search and model based tuning process.

    Returns
    -------
    None

    """
    try:
        import tvm
        from tvm import autotvm
        from tvm import relay
        from tvm.relay import testing
        from tvm.autotvm.tuner import XGBTuner, RandomTuner
        import tvm.contrib.graph_runtime as runtime
    except ImportError:
        print(
            "TVM package required, please refer https://tvm.ai/ for installation guide."
        )
        raise

    # add preprocess block if necessary
    if preprocess:
        # add preprocess block
        if preprocess is True:
            preprocess = _DefaultPreprocess()
        else:
            if not isinstance(preprocess, HybridBlock):
                raise TypeError(
                    "preprocess must be HybridBlock, given {}".format(
                        type(preprocess)))
        wrapper_block = nn.HybridSequential()
        preprocess.initialize(ctx=ctx)
        wrapper_block.add(preprocess)
        wrapper_block.add(block)
    else:
        wrapper_block = block
    wrapper_block.collect_params().reset_ctx(ctx)

    # convert to relay graph
    sym, params = relay.frontend.from_mxnet(wrapper_block,
                                            shape={"data": data_shape})

    if use_autotvm:

        def tune_kernels(tasks,
                         measure_option,
                         tuner='gridsearch',
                         early_stopping=None,
                         log_filename='tuning.log'):
            for i, tsk in enumerate(tasks):
                prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

                # converting conv2d tasks to conv2d_NCHWc tasks
                op_name = tsk.workload[0]
                if op_name == 'conv2d':
                    func_create = 'topi_x86_conv2d_NCHWc'
                elif op_name == 'depthwise_conv2d_nchw':
                    func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw'
                else:
                    raise ValueError(
                        "Tuning {} is not supported on x86".format(op_name))

                task = autotvm.task.create(func_create,
                                           args=tsk.args,
                                           target=target,
                                           template_key='direct')
                task.workload = tsk.workload

                # create tuner
                if tuner in ('xgb', 'xgb-rank'):
                    tuner_obj = XGBTuner(task, loss_type='rank')
                elif tuner == 'ga':
                    tuner_obj = GATuner(task, pop_size=50)
                elif tuner == 'random':
                    tuner_obj = RandomTuner(task)
                elif tuner == 'gridsearch':
                    tuner_obj = GridSearchTuner(task)
                else:
                    raise ValueError("Invalid tuner: " + tuner)

                # do tuning
                n_trial = len(task.config_space)
                tuner_obj.tune(n_trial=n_trial,
                               early_stopping=early_stopping,
                               measure_option=measure_option,
                               callbacks=[
                                   autotvm.callback.progress_bar(
                                       n_trial, prefix=prefix),
                                   autotvm.callback.log_to_file(log_filename)
                               ])

        #
        tasks = autotvm.task.extract_from_program(sym,
                                                  target=target,
                                                  params=params,
                                                  ops=(relay.op.nn.conv2d, ))
        logging.warning('Start tunning, this can be slow...')
        tuning_option = {
            'log_filename':
            'tune.log',
            'tuner':
            'random',
            'early_stopping':
            None,
            'measure_option':
            autotvm.measure_option(
                builder=autotvm.LocalBuilder(),
                runner=autotvm.LocalRunner(number=10,
                                           repeat=1,
                                           min_repeat_ms=1000),
            ),
        }
        tune_kernels(tasks, **tuning_option)

        with autotvm.apply_history_best(log_file):
            with relay.build_config(opt_level=opt_level):
                graph, lib, params = relay.build_module.build(sym,
                                                              target=target,
                                                              params=params)

    else:
        with relay.build_config(opt_level=opt_level):
            graph, lib, params = relay.build_module.build(sym,
                                                          target,
                                                          params=params)

    # export library, json graph and parameters
    lib.export_library(path + '_deploy_lib.so')
    with open(path + '_deploy_graph.json', 'w') as fo:
        fo.write(graph)
    with open(path + '_deploy_{:04n}.params'.format(epoch), 'wb') as fo:
        try:
            fo.write(relay.compiler.save_param_dict(params))
        except AttributeError:
            fo.write(relay.save_param_dict(params))
Exemple #37
0
def test_alter_layout_strided_slice():
    """Test rewriting strided_slice during alter_iop_layout"""
    def before():
        x = relay.var("x", shape=(1, 32, 28, 28))
        weight = relay.var('weight', shape=(32, 32, 3, 3))
        y = relay.nn.conv2d(x,
                            weight,
                            channels=32,
                            kernel_size=(3, 3),
                            padding=(1, 1))
        y = relay.strided_slice(y,
                                begin=relay.const([0, 16], "int32"),
                                end=relay.const([1, 33], "int32"),
                                strides=relay.const([1, 1], "int32"))
        y = relay.Function(analysis.free_vars(y), y)
        return y

    def alter_conv2d(attrs, inputs, tinfos, out_type):
        data, weight = inputs
        new_attrs = dict(attrs)
        new_attrs['data_layout'] = 'NCHW4c'
        return relay.nn.conv2d(data, weight, **new_attrs)

    def expected():
        x = relay.var("x", shape=(1, 32, 28, 28))
        weight = relay.var("weight", shape=(32, 32, 3, 3))
        weight = relay.layout_transform(weight, "OIHW", "OIHW4i4o")
        x = relay.layout_transform(x, "NCHW", "NCHW4c")
        y = relay.op.nn.contrib_conv2d_nchwc(x,
                                             weight,
                                             channels=32,
                                             kernel_size=(3, 3),
                                             padding=(1, 1),
                                             data_layout="NCHW4c")

        y = relay.strided_slice(y,
                                begin=relay.const([0, 4], "int32"),
                                end=relay.const([1, 21], "int32"),
                                strides=relay.const([1, 1], "int32"))

        y = relay.layout_transform(y, "NCHW4c", "NCHW")
        y = relay.Function(analysis.free_vars(y), y)
        return y

    with TempOpAttr("nn.conv2d", "FTVMAlterOpLayout", alter_conv2d):
        a = before()
        b = run_opt_pass(expected(), transform.InferType())

    # Verify inference result
    mod_before = tvm.IRModule()
    mod_new = tvm.IRModule()
    mod_before['main'] = a
    mod_new['main'] = b
    with relay.build_config(opt_level=3):
        for target, ctx in ctx_list():
            for kind in ["graph", "debug", "vm"]:
                ex_before = relay.create_executor(kind,
                                                  mod=mod_before,
                                                  ctx=ctx,
                                                  target=target)
                ex_new = relay.create_executor(kind,
                                               mod=mod_new,
                                               ctx=ctx,
                                               target=target)
                np_data = np.random.uniform(size=(1, 32, 28,
                                                  28)).astype("float32")
                np_weight = np.random.uniform(size=(32, 32, 3,
                                                    3)).astype("float32")
                result_before = ex_before.evaluate()(np_data, np_weight)
                result_new = ex_new.evaluate()(np_data, np_weight)
                tvm.testing.assert_allclose(result_before.asnumpy(),
                                            result_new.asnumpy(),
                                            rtol=1e-5,
                                            atol=1e-5)
Exemple #38
0
img_path = download_testdata(img_url, 'cat.png', module='data')
img = Image.open(img_path).resize((224, 224))
img_ycbcr = img.convert("YCbCr")  # convert to YCbCr
img_y, img_cb, img_cr = img_ycbcr.split()
x = np.array(img_y)[np.newaxis, np.newaxis, :, :]

######################################################################
# Compile the model with relay
# ---------------------------------------------
target = 'llvm'

input_name = '1'
shape_dict = {input_name: x.shape}
sym, params = relay.frontend.from_onnx(onnx_model, shape_dict)

with relay.build_config(opt_level=1):
    intrp = relay.build_module.create_executor('graph', sym, tvm.cpu(0), target)

######################################################################
# Execute on TVM
# ---------------------------------------------
dtype = 'float32'
tvm_output = intrp.evaluate(sym)(tvm.nd.array(x.astype(dtype)), **params).asnumpy()

######################################################################
# Display results
# ---------------------------------------------
# We put input and output image neck to neck
from matplotlib import pyplot as plt
out_y = Image.fromarray(np.uint8((tvm_output[0, 0]).clip(0, 255)), mode='L')
out_cb = img_cb.resize(out_y.size, Image.BICUBIC)
Exemple #39
0
def check_function(symbol, forward=None, backward=None, grad_input_vars=None,
                   shape=None, dtype=None, in_range=None, values=None,
                   exclude_targets=None, only_targets=None,
                   additional_params=None,
                   numerical_grads=None, numerical_grads_params=None,
                   atol=1e-5, rtol=1e-5, quiet=False):
    """Compute the function and/or its gradients on a random input and raise
    an exception if the result doesn't match the reference implementation.

    Parameters
    ----------
    symbol : nnvm.Symbol
        A symbol representing the output.

    forward : Callable[..., List[numpy.ndarray]], optional
        A reference implementation to compare with.

    backward : Callable[..., List[numpy.ndarray] or Dict[str, numpy.ndarray]], optional
        A reference implementation of gradients. Should also accept head_grads besides
        normal inputs which is a list of gradients of some scalar wrt the outputs or just a
        single gradient if there are multiple outputs.
        Should return either a dict mapping input variable names to the respective
        gradients or a list of gradients wrt variables from grad_input_vars in
        exactly the same order (in alphabetical order by default).

    grad_input_vars : List[nnvm.Symbol or str], optional
        A list of variables with respect to which the gradients will be computed.
        None (default) means that all input variables will be used in an alphabetical order.

    shape : Dict[nnvm.Symbol or str, Tuple[int]] or Tuple[int], optional
        A dict mapping input variable names to shapes, or just a single shape.
        By default shapes will be inferred from variables' attributes (see the Examples).
        Note that this parameter takes precedence over variables' attributes.

    dtype : Dict[nnvm.Symbol or str, str] or str, optional
        A dict mapping input variable names to dtypes, or just a single dtype.
        By default dtypes will be inferred from variables' attributes (see the Examples).
        If dtypes cannot be inferred for some variables then float32 will be used as a fallback.
        Note that this parameter takes precedence over variables' attributes.

    in_range : Dict[nnvm.Symbol or str, (float, float)] or (float, float), optional
        A dict mapping input variable names to ranges or just a single range
        (the same for all variables). Input values will be generated from
        uniform distributions on these ranges. `head_grads` can also be
        assigned a range this way.

    values : Dict[nnvm.Symbol or str, numpy.ndarray], optional
        A dict explicitly providing values for some variables instead of random generation.

    exclude_targets : Set[str], optional
        Skip compiling and running anything for these targets.

    only_targets : Set[str], optional
        Test only for those targets from `ctx_list()` that are also in this set.

    additional_params : dict, optional
        A dict of additional parameters which will be passed to forward and backward.

    numerical_grads : bool or 'if_possible', optional
        Whether to additionally check against numerically computed gradients. If 'if_possible' or
        None is passed (which is the default) then it will try to create a gradient computation
        graph and then check gradients numerically only if this graph can be created (i.e. if there
        are some operations with unimplemented gradients, it will just issue a warning).
        Checking against numerical gradients is done via the `check_numerical_grads` function.

    numerical_grads_params : dict, optional
        Additional parameters for `check_numerical_grads`.

    atol : float, optional
        Absolute tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients.

    rtol : float, optional
        Relative tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients.

    quiet : bool, optional
        Don't dump additional information to stdout on failure.

    Examples
    --------
    .. code-block:: python

        x = sym.Variable("x", shape=(1, 2))
        y = sym.Variable("y", shape=(1, 2))

        # check the function and its gradients both numerically and using a reference function
        check_function(x + 2*y,
                       lambda x, y: x + 2*y,
                       lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads})

        # just check gradients numerically
        check_function(x + 2*y, numerical_grads=True)

        # just check the forward computation
        check_function(x + 2*y, lambda x, y: x + 2*y, numerical_grads=False)

        # specifying dtype
        check_function(x + 2*y, lambda x, y: x + 2*y, dtype='float64')

        # dtypes can also be specified during variable creation with dtype codes
        x = sym.Variable("x", dtype=0)
        check_function(x + 1, shape=(2, 2), numerical_grads=True)
    """
    # validate and preprocess the input params
    if numerical_grads is None and forward is None and backward is None:
        raise ValueError("No reference function was passed to check_function. If you only want to "
                         "check gradients numerically, pass numerical_grads=True explicitly.")

    if numerical_grads is None:
        numerical_grads = 'if_possible'

    if numerical_grads not in [False, True, 'if_possible']:
        raise ValueError("numerical_grads must be a bool or 'if_possible', not {}"
                         .format(numerical_grads))

    if additional_params is None:
        additional_params = {}

    input_vars = symbol.list_input_variables()
    input_dict = {x.attr('name'): x for x in input_vars}

    if grad_input_vars is None:
        grad_input_vars = sorted(input_vars, key=lambda x: x.attr('name'))
    else:
        grad_input_vars = [input_dict[x] if isinstance(x, str) else x for x in grad_input_vars]

    in_range = _dict_var_to_dict_str(in_range)
    values = _dict_var_to_dict_str(values)

    out_len = len(symbol.list_output_names())

    # Infer the output shapes and dtypes, and preprocess the shape and dtype params
    forward_graph, shape, dtype, out_shapes, out_dtypes = \
        infer_shapes_dtypes(nnvm.graph.create(symbol), shape=shape, dtype=dtype,
                            fallback_dtype='float32')

    if not all(out_shapes) or not all(out_dtypes):
        if not quiet:
            print(forward_graph.ir(join_node_attrs=['shape', 'dtype']))
        raise ValueError("Could not infer shapes or dtypes for outputs.\n"
                         "out_shapes = {}\nout_dtypes = {}".format(out_shapes, out_dtypes))

    backward_graph = None

    # If we want gradients, we have to recreate the graph, but now with gradient computations
    # Note that here we need out_shapes for defining the shape of head grads, so we have to
    # create the graph twice
    if backward is not None or numerical_grads:
        try:
            head_grads_symbols = [nnvm.symbol.Variable("head_grads_" + str(i),
                                                       shape=out_shapes[i],
                                                       dtype=DTYPE_TO_TCODE[out_dtypes[i]])
                                  for i in range(out_len)]
            grad_symbols = graph_util.gradients([symbol], grad_input_vars,
                                                grad_ys=head_grads_symbols)
            # Sometimes grads do not depend on head_grads, so head_grads does not appear
            # in the variable list; adding it manually prevents this, making things a bit easier
            backward_graph = \
                nnvm.graph.create(nnvm.symbol.Group([symbol] + grad_symbols + head_grads_symbols))

            backward_graph, shape, dtype, out_shapes, out_dtypes = \
                infer_shapes_dtypes(backward_graph, shape=shape, dtype=dtype,
                                    fallback_dtype='float32')
        except nnvm._base.NNVMError as err:
            if backward is None and numerical_grads == "if_possible":
                logging.warning("Won't check gradients because: %s", str(err).split('\n', 1)[0])
                numerical_grads = False
                backward_graph = None
            else:
                raise

    main_graph = backward_graph if backward_graph is not None else forward_graph

    # Generate random data for inputs (including head_grads)

    np_inputs = {}

    for x in main_graph.symbol.list_input_variables():
        x_name = x.attr('name')
        x_shape = shape[x_name]
        x_dtype = dtype[x_name]

        if values is not None and x_name in values:
            np_inputs[x_name] = values[x_name].astype(x_dtype)
            continue

        low = -1.0
        high = 1.0
        if in_range is not None:
            if isinstance(in_range, dict):
                if x_name in in_range:
                    low = in_range[x_name][0]
                    high = in_range[x_name][1]
            else:
                low = in_range[0]
                high = in_range[1]

        np_inputs[x_name] = np.random.uniform(size=x_shape, low=low, high=high).astype(x_dtype)

    np_inputs_without_head_grads = {k: np_inputs[k] for k in np_inputs
                                    if not k.startswith('head_grads_')}

    nothing_was_done = True

    # Compute and compare the results
    for target, ctx in ctx_list():
        if exclude_targets is not None:
            if target in exclude_targets or str(target) in exclude_targets:
                logging.info("Skipping target = %s, ctx = %s", target, ctx)
                continue
        if only_targets is not None:
            if target not in only_targets and str(target) not in only_targets:
                logging.info("Skipping target = %s, ctx = %s", target, ctx)
                continue

        logging.info("Checking computation on target = %s, ctx = %s", target, ctx)

        debug_stage = None

        try:
            nnvm_res = None

            debug_stage = "compiling"
            main_function = graph_to_function(main_graph, target, ctx)

            # nnvm_res contains the output and gradients (if they are needed)
            debug_stage = "running"
            nnvm_res = main_function(**np_inputs)

            try:
                logging.debug("checking to_relay conversion")
                inputs = np_inputs_without_head_grads.copy()
                func, inputs = to_relay(main_graph, shape, dtype, params=inputs)
                with relay.build_config(opt_level=3):
                    graph, lib, params = relay.build(func, target=target)
                m = graph_runtime.create(graph, lib, ctx)
                m.set_input(**inputs)
                m.set_input(**params)
                m.run()
                for i in range(out_len):
                    relay_out = m.get_output(i).asnumpy()
                    tvm.testing.assert_allclose(nnvm_res[i], relay_out, atol=atol, rtol=rtol)
            except NotImplementedError as err:
                # the NNVM operator is not supported yet
                logging.warning(err)

            if backward_graph is not None:
                grad_var_names = [x.attr('name') for x in grad_input_vars]
                nnvm_grads = {x: v for x, v in zip(grad_var_names, nnvm_res[out_len:])}

            if forward is not None:
                nothing_was_done = False
                debug_stage = "checking forward computation"
                logging.debug(debug_stage)

                params = {}
                params.update(np_inputs_without_head_grads)
                params.update(additional_params)
                numpy_res = forward(**params)

                if isinstance(numpy_res, tuple):
                    numpy_res = list(numpy_res)

                if not isinstance(numpy_res, list):
                    numpy_res = [numpy_res]

                if len(numpy_res) != out_len:
                    raise ValueError("Forward function returned {} values, but "
                                     "the nnvm graph returns {} values"
                                     .format(len(numpy_res), out_len))

                for i in range(out_len):
                    tvm.testing.assert_allclose(nnvm_res[i], numpy_res[i], atol=atol, rtol=rtol)

            if backward is not None:
                nothing_was_done = False
                debug_stage = "checking gradients"
                logging.debug(debug_stage)

                np_head_grads = [np_inputs["head_grads_" + str(i)] for i in range(out_len)]

                if out_len == 1:
                    np_head_grads = np_head_grads[0]

                params = {'head_grads': np_head_grads}
                params.update(np_inputs_without_head_grads)
                params.update(additional_params)
                numpy_grads = backward(**params)

                if not isinstance(numpy_grads, dict):
                    if isinstance(numpy_grads, tuple):
                        numpy_grads = list(numpy_grads)
                    if not isinstance(numpy_grads, list):
                        numpy_grads = [numpy_grads]
                    numpy_grads = {x: v for x, v in zip(grad_var_names, numpy_grads)}
                    if len(numpy_grads) != len(grad_var_names):
                        raise ValueError("The backward function returns a list of gradients which "
                                         "does not contain gradients for these variables: {}"
                                         .format(set(grad_var_names) - set(numpy_grads)))

                for x_name in numpy_grads:
                    tvm.testing.assert_allclose(nnvm_grads[x_name], numpy_grads[x_name],
                                                atol=atol, rtol=rtol)

            if numerical_grads:
                nothing_was_done = False
                debug_stage = "checking gradients numerically"
                logging.debug(debug_stage)

                forward_function = graph_to_function(forward_graph, target, ctx)

                # Since the result may be non-scalar, we have to put another operation on the top,
                # so we just multiple by the randomly generated head_grads and then sum everything.
                # This way we can reuse the gradient values which has been already computed.
                def scalar_function(**kwargs):
                    res = forward_function(**kwargs)
                    return np.sum([np.dot(np_inputs['head_grads_' + str(i)].ravel(), res[i].ravel())
                                   for i in range(out_len)])

                if numerical_grads_params is None:
                    numerical_grads_params = {}

                check_numerical_grads(
                    scalar_function,
                    input_values=np_inputs_without_head_grads,
                    grad_values=nnvm_grads,
                    **numerical_grads_params)

        except:
            if not quiet:
                print("\ncheck_function failed while {}, here is the main graph"
                      .format(debug_stage))
                print(main_graph.ir(join_node_attrs=['shape', 'dtype']))
                if nnvm_res is not None:
                    print("Generated inputs:")
                    print(np_inputs)
                    print()
            raise

    if nothing_was_done:
        logging.warning("Nothing was done in check_function. Check ctx_list().")
    def compile_model(self):
        if device == 'vta':
            self.remote = rpc.connect(self.pynq_addr, 9091)
            vta.reconfig_runtime(self.remote)
            vta.program_fpga(self.remote, bitstream=None)
        else:
            self.remote = rpc.LocalSession()

        self.ctx = self.remote.ext_dev(
            0) if device == 'vta' else self.remote.cpu(0)

        # Load pre-configured AutoTVM schedules
        with autotvm.tophub.context(target):

            # Populate the shape and data type dictionary for ResNet input
            dtype_dict = {'data': 'float32'}
            shape_dict = {'data': (env.BATCH, 3, 224, 224)}

            gluon_model = vision.resnet18_v1(
                pretrained=True, ctx=ctx
            ).features if args.nonsplit else splitnet.resnet18_v1_split(
                self.id + 1)

            # Measure build start time
            build_start = time.time()

            # Start front end compilation
            mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)

            # Update shape and type dictionary
            shape_dict.update({k: v.shape for k, v in params.items()})
            dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

            # Perform quantization in Relay
            with relay.quantize.qconfig(global_scale=8.0,
                                        skip_conv_layers=[0]):
                relay_prog = relay.quantize.quantize(mod['main'],
                                                     params=params)

            # Perform graph packing and constant folding for VTA target
            if target.device_name == 'vta':
                assert env.BLOCK_IN == env.BLOCK_OUT
                relay_prog = graph_pack(relay_prog,
                                        env.BATCH,
                                        env.BLOCK_OUT,
                                        env.WGT_WIDTH,
                                        start_name=start_pack,
                                        stop_name=stop_pack)

            # Compile Relay program with AlterOpLayout disabled
            with relay.build_config(opt_level=3,
                                    disabled_pass={'AlterOpLayout'}):
                if target.device_name != 'vta':
                    graph, lib, params = relay.build(
                        relay_prog,
                        target=target,
                        params=params,
                        target_host=env.target_host)
                else:
                    with vta.build_config():
                        graph, lib, params = relay.build(
                            relay_prog,
                            target=target,
                            params=params,
                            target_host=env.target_host)

            self.params = params

            # Measure Relay build time
            build_time = time.time() - build_start
            print(f'inference graph for thread {self.id} built in {0:.4f}s!'.
                  format(build_time))

            # Send the inference library over to the remote RPC server
            temp = util.tempdir()
            lib.save(temp.relpath('graphlib.o'))
            self.remote.upload(temp.relpath('graphlib.o'))
            lib = self.remote.load_module('graphlib.o')

            # Graph runtime
            self.m = graph_runtime.create(graph, lib, self.ctx)
        debug_unit=args.debug_unit)

# print(func.astext())
###############################################################################
# Tuning
# -----------------
tuning_enable = args.tuning_enable
# log_filename = "./mixed_precision_models/tuning_logs/resnet%d_%s_%s_batch_%d.log" % (num_layers, data_layout, model_type, batch_size)
log_filename = "./mixed_precision_models/tuning_logs/resnet%d_%s_mixed_batch_%d.log" % (
    num_layers, data_layout, batch_size)
tmp_log_file = log_filename + '.temp'

if tuning_enable:
    print("Extracting tasks ...")

    with relay.build_config(opt_level=3):
        tasks = autotvm.task.extract_from_program(func,
                                                  target=TARGET_NAME,
                                                  params=params)

    print(tasks)

    measure_option = autotvm.measure_option(
        builder='local',
        runner=autotvm.LocalRunner(number=20, repeat=3, min_repeat_ms=150)
        # runner=autotvm.RPCRunner(
        #    'T4',  # change the device key to your key
        #    '0.0.0.0', 9190,
        #    number=20, repeat=3, min_repeat_ms=150),
    )
# for this graph on the target hardware, and the parameter blobs of
# the model. During the compilation, Relay does the graph-level
# optimization while TVM does the tensor-level optimization, resulting
# in an optimized runtime module for model serving.
#
# We'll first compile for Nvidia GPU. Behind the scene, `relay.build_module.build`
# first does a number of graph-level optimizations, e.g. pruning, fusing, etc.,
# then registers the operators (i.e. the nodes of the optimized graphs) to
# TVM implementations to generate a `tvm.module`.
# To generate the module library, TVM will first transfer the high level IR
# into the lower intrinsic IR of the specified target backend, which is CUDA
# in this example. Then the machine code will be generated as the module library.

opt_level = 3
target = tvm.target.cuda()
with relay.build_config(opt_level=opt_level):
    graph, lib, params = relay.build_module.build(mod, target, params=params)

#####################################################################
# Run the generate library
# ------------------------
# Now we can create graph runtime and run the module on Nvidia GPU.

# create random input
ctx = tvm.gpu()
data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
# create module
module = graph_runtime.create(graph, lib, ctx)
# set input and parameters
module.set_input("data", data)
module.set_input(**params)
Exemple #43
0
def run_unpropagatable_graph(dev, tgt):
    R""" The network is as following:
            a     b  c     d
             \   /    \   /
              add      mul
                \      /
                subtract
    """
    
    a = relay.var("a", shape=(10, 10))
    b = relay.var("b", shape=(10, 10))
    c = relay.var("c", shape=(10, 10))
    d = relay.var("d", shape=(10, 10))
    a_data = np.random.rand(10, 10).astype('float32')
    b_data = np.random.rand(10, 10).astype('float32')
    c_data = np.random.rand(10, 10).astype('float32')
    d_data = np.random.rand(10, 10).astype('float32')
    tmp_add = a_data + b_data
    tmp_mul = np.multiply(c_data, d_data)
    ref_res = np.subtract(tmp_add, tmp_mul)
    
    fallback_device = tvm.context("cpu")
    target = {"cpu": "llvm", dev: tgt}
    cpu_ctx = fallback_device
    dev_ctx = tvm.context(dev)
    
    def annotated():    
        add = relay.add(a, b)
        _add = relay.annotation.on_device(add, dev_ctx)
        mul = relay.multiply(c, d)
        _mul = relay.annotation.on_device(mul, cpu_ctx)
        sub = relay.subtract(add, mul)
        _sub = relay.annotation.on_device(sub, dev_ctx)
        func = relay.Function([a, b, c, d],
                              relay.Tuple(tvm.convert([_add, _mul,
                                                       _sub, sub])))
        func = relay.ir_pass.infer_type(func)
        func = relay.ir_pass.rewrite_annotated_ops(func,
                                                   dev_ctx.device_type)
        func = relay.ir_pass.infer_type(func)
        return relay.Function(relay.ir_pass.free_vars(func.body[3]),
                              func.body[3])
        
    def expected():    
        add = relay.add(a, b)
        mul = relay.multiply(c, d)
        copy_mul_sub = relay.device_copy(mul, cpu_ctx, dev_ctx)
        sub = relay.subtract(add, copy_mul_sub)
        func = relay.Function([a, b, c, d], sub)
        return func
    
    annotated_func = annotated()
    expected_func = expected()
    expected_index = [2, 2, 2, 1, 1, 1, 2, 2]
    check_annotated_graph(annotated_func, expected_func)
    params = {"a": a_data, "b": b_data, "c": c_data, "d": d_data}
    config = {"opt_level": 0}
    config["fallback_device"] = fallback_device
    with relay.build_config(**config):
        graph, lib, params = relay.build(annotated_func, target, params=params)
        contexts = [tvm.cpu(0), tvm.context(dev)]
        graph_json = json.loads(graph)
        if "device_index" in graph_json["attrs"]:
            device_index = graph_json["attrs"]["device_index"][1]
            assert device_index == expected_index
        mod = graph_runtime.create(graph, lib, contexts)
        mod.set_input(**params)
        mod.run()
        res = mod.get_output(0).asnumpy()
        tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)