Beispiel #1
0
def test_min_repeat_ms():
    tmp = tempdir()
    filename = tmp.relpath("log")

    @tvm.register_func
    def my_debug(filename):
        """one call lasts for 100 ms and writes one character to a file"""
        time.sleep(0.1)
        with open(filename, "a") as fout:
            fout.write("c")

    X = tvm.compute((), lambda : tvm.call_packed("my_debug", filename))
    s = tvm.create_schedule(X.op)
    func = tvm.build(s, [X])

    x = tvm.nd.empty((), dtype="int32")
    ftimer = func.time_evaluator(func.entry_name, tvm.cpu(),
                                 number=1, repeat=1)
    ftimer(x)

    with open(filename, "r") as fin:
        ct = len(fin.readline())

    assert ct == 2


    ftimer = func.time_evaluator(func.entry_name, tvm.cpu(),
                                 number=1, repeat=1, min_repeat_ms=1000)
    ftimer(x)

    # make sure we get more than 10 calls
    with open(filename, "r") as fin:
        ct = len(fin.readline())

    assert ct > 10 + 2
Beispiel #2
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from nnvm graph
    print("Extract tasks...")
    net, params, data_shape, out_shape = get_network(model_name, batch_size)
    tasks = autotvm.task.extract_from_graph(net, target=target,
                                            shape={'data': data_shape}, dtype=dtype,
                                            symbols=(nnvm.sym.conv2d,))

    # run tuning tasks
    print("Tuning...")
    tune_kernels(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with nnvm.compiler.build_config(opt_level=3):
            graph, lib, params = nnvm.compiler.build(
                net, target=target, shape={'data': data_shape}, params=params, dtype=dtype)

        # upload parameters to device
        ctx = tvm.cpu()
        data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
        module = runtime.create(graph, lib, ctx)
        module.set_input('data', data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
    def check_verify():
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        mlib = tvm.build(s, [A, B], "llvm", name="myadd")
        try:
            mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
        except ValueError:
            return

        a = np.random.uniform(size=(n,)).astype(A.dtype)
        mod.set_input(x=a)

        #verify dumproot created
        directory = mod._dump_path
        assert(os.path.exists(directory))

        #verify graph is there
        GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json'
        assert(len(os.listdir(directory)) == 1)

        #verify the file name is proper
        assert(os.path.exists(os.path.join(directory, GRAPH_DUMP_FILE_NAME)))

        mod.run()
        #Verify the tensors are dumped
        assert(len(os.listdir(directory)) > 1)

        #verify the output is correct
        out = mod.get_output(0, tvm.nd.empty((n,)))
        np.testing.assert_equal(out.asnumpy(), a + 1)

        mod.exit()
        #verify dump root delete after cleanup
        assert(not os.path.exists(directory))
def verify_bitserial_dense(batch, in_dim, out_dim, activation_bits, weight_bits, unipolar):
    input_dtype = 'uint32'
    out_dtype = 'int16'

    with tvm.target.create('llvm'):
        A = tvm.placeholder((batch, in_dim), dtype=input_dtype, name='A')
        B = tvm.placeholder((out_dim, in_dim), dtype=input_dtype, name='B')
        C = topi.nn.bitserial_dense(A, B, activation_bits, weight_bits, out_dtype=out_dtype,
                                    unipolar=unipolar)
        s = topi.generic.schedule_bitserial_dense([C])

    a_shape = get_const_tuple(A.shape)
    b_shape = get_const_tuple(B.shape)

    @memoize("topi.tests.test_topi_bitseral_dense")
    def get_ref_data():
        a_np = generate_quantized_np(get_const_tuple(a_shape), activation_bits, input_dtype)
        b_np = generate_quantized_np(get_const_tuple(b_shape), weight_bits, input_dtype)
        if unipolar:
            b_ = np.copy(b_np).astype(out_dtype)
            for x in np.nditer(b_, op_flags=['readwrite']):
                x[...] = 1 if x == 1 else -1
            c_np = np.dot(a_np, b_.T)
        else:
            c_np = np.dot(a_np, b_np.T)
        return a_np, b_np, c_np
    a_np, b_np, c_np = get_ref_data()

    ctx = tvm.cpu(0)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(b_np, ctx)
    c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
    func = tvm.build(s, [A, B, C], "llvm")
    func(a, b, c)
    tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
Beispiel #5
0
 def check_c():
     if not tvm.module.enabled("llvm"):
         return
     # Specifically allow offset to test codepath when offset is available
     Ab = tvm.decl_buffer(
         A.shape, A.dtype,
         elem_offset=tvm.var('Aoffset'),
         offset_factor=8,
         name='A')
     binds = {A : Ab}
     # BUILD and invoke the kernel.
     f1 = tvm.lower(s, [A,B,C], name="fadd_pipeline")
     fsplits = [x for x in tvm.ir_pass.SplitHostDevice(f1)]
     fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0])
     mhost = tvm.codegen.build_module(fsplits[0], "c")
     temp = util.tempdir()
     path_dso = temp.relpath("temp.so")
     mhost.export_library(path_dso)
     m = tvm.module.load(path_dso)
     fadd = m["fadd_pipeline"]
     ctx = tvm.cpu(0)
     # launch the kernel.
     n = nn
     a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
     b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
     c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
     fadd(a, b, c)
     tvm.testing.assert_allclose(
         c.asnumpy(), a.asnumpy() + b.asnumpy())
Beispiel #6
0
def test_in_bounds_vectorize_llvm():
    n = 512
    lanes = 2
    A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes)
    B = tvm.compute((n,), lambda i: A[i], name='B')
    C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C')
    s = tvm.create_schedule(C.op)
    xo, xi = s[C].split(C.op.axis[0], nparts=2)
    _, xi = s[C].split(xi, factor=2)
    s[C].parallel(xo)
    s[C].vectorize(xi)
    s[B].compute_at(s[C], xo)
    xo, xi = s[B].split(B.op.axis[0], factor=2)
    s[B].vectorize(xi)
    # build and invoke the kernel.
    lowered_func = tvm.lower (s, [A, C], "llvm", simple_mode=False)
    print (lowered_func.body)
    f = tvm.build(s, [A, C], "llvm")
    ctx = tvm.cpu(0)
    # launch the kernel.
    a = tvm.nd.empty((n,), A.dtype).copyfrom(
        np.random.uniform(size=(n, lanes)))
    c = tvm.nd.empty((n,), C.dtype, ctx)
    f(a, c)
    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
Beispiel #7
0
def verify_conv2d(batch, in_size, in_channel, num_filter, kernel, stride, padding):
    in_height = in_width = in_size

    with tvm.target.rasp():
        A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
        W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
        B = topi.nn.conv2d(A, W, stride, padding)
        s = topi.generic.schedule_conv2d_nchw([B])

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv2d.verify_conv2d")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = topi.testing.conv2d_nchw_python(a_np, w_np, stride, padding)
        return a_np, w_np, b_np

    a_np, w_np, b_np = get_ref_data()

    ctx = tvm.cpu(0)
    a = tvm.nd.array(a_np, ctx)
    w = tvm.nd.array(w_np, ctx)
    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
    func = tvm.build(s, [A, W, B], "llvm")
    func(a, w, b)
    np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Beispiel #8
0
    def verify(target="llvm",
               algorithm=nnpack.ConvolutionAlgorithm.AUTO,
               with_bias=True):
        if not tvm.module.enabled(target):
            print("skip because %s is not enabled..." % target)
            return
        if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True):
            print("skip because extern function is not available")
            return
        if not nnpack.is_available():
            return

        ctx = tvm.cpu(0)
        transformed_kernel = nnpack.convolution_inference_weight_transform(
            kernel, algorithm=algorithm)
        output = nnpack.convolution_inference_without_weight_transform(
            data, transformed_kernel, bias if with_bias else None,
            [PAD, PAD, PAD, PAD], [STRIDE, STRIDE],
            algorithm=algorithm)

        s = tvm.create_schedule(output.op)

        f = tvm.build(s, [data, kernel, bias, output], target)

        na = np.random.uniform(size=dshape).astype(data.dtype)
        nb = np.random.uniform(size=kshape).astype(kernel.dtype)
        nc = np.random.uniform(size=bshape).astype(bias.dtype) if with_bias else np.zeros(bshape, dtype=bias.dtype)
        ta = tvm.nd.array(na, ctx)
        tb = tvm.nd.array(nb, ctx)
        tc = tvm.nd.array(nc, ctx)
        td = tvm.nd.array(np.zeros(oshape, dtype=output.dtype), ctx)
        f(ta, tb, tc, td)
        nd = np_conv(np.reshape(na, (BATCH, IC, IH, IW)), nb, PAD, STRIDE) + nc.reshape(1, bshape[0], 1, 1)
        tvm.testing.assert_allclose(
            td.asnumpy(), nd.reshape(BATCH, IC, IH, IW), rtol=1e-5)
Beispiel #9
0
def test_sort_np():
    dshape = (1, 2, 3, 4, 5, 6)
    axis = 4
    reduced_shape = (1, 2, 3, 4, 6)
    is_descend = False
    data = tvm.placeholder(dshape, name='data')
    sort_num = tvm.placeholder(reduced_shape, name="sort_num", dtype="int32")
    out = tvm.extern(data.shape, [data, sort_num],
                     lambda ins, outs: tvm.call_packed(
                         "tvm.contrib.sort.argsort", ins[0],
                         ins[1], outs[0], axis, is_descend),
                     dtype='int32', name="sort_tensor")

    ctx = tvm.cpu(0)
    target = "llvm"
    s = tvm.create_schedule(out.op)
    f = tvm.build(s, [data, sort_num, out], target)

    np_data = np.random.uniform(size=dshape)
    np_out = np.argsort(np_data, axis=axis)
    sort_num_input = np.full(reduced_shape, dshape[axis])
    a = tvm.nd.array(np.array(np_data).astype(data.dtype), ctx)
    b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
    c = tvm.nd.array(np.zeros(a.shape, dtype=out.dtype), ctx)
    f(a, b, c)
    tvm.testing.assert_allclose(c.asnumpy(), np_out, rtol=1e-5)
Beispiel #10
0
def test_log_pow_llvm():
    # graph
    n = tvm.var('n')
    A = tvm.placeholder((n,), name='A')
    B = tvm.compute(A.shape, lambda *i: tvm.power(tvm.log(A(*i)), 2.0), name='B')
    s = tvm.create_schedule(B.op)
    # create iter var and assign them tags.
    bx, tx = s[B].split(B.op.axis[0], factor=32)
    # one line to build the function.
    if not tvm.module.enabled("llvm"):
        return

    flog = tvm.build(s, [A, B],
                     "llvm", name="mylog")
    ctx = tvm.cpu(0)
    # launch the kernel.
    n = 1028
    a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
    b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx)
    repeat = 10
    ftimer = flog.time_evaluator(flog.entry_name, ctx, number=1, repeat=repeat)
    res = ftimer(a, b)
    assert(len(res.results) == repeat)
    np.testing.assert_allclose(
        b.asnumpy(), np.power(np.log(a.asnumpy()), 2.0), rtol=1e-5)
Beispiel #11
0
def test_nms():
    dshape = (1, 5, 6)
    data = sym.Variable("data")
    valid_count = sym.Variable("valid_count", dtype="int32")
    nms_threshold = 0.7
    force_suppress = True
    nms_topk = 2
    out = sym.nms(data=data, valid_count=valid_count, nms_threshold=nms_threshold,
                  force_suppress=force_suppress, nms_topk=nms_topk)

    np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80],
                         [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79],
                         [1, 0.5, 100, 60, 70, 110]]]).astype("float32")
    np_valid_count = np.array([4]).astype("int32")
    np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45],
                           [0, 0.4, 4, 21, 19, 40], [-1, 0.9, 35, 61, 52, 79],
                           [-1, -1, -1, -1, -1, -1]]])

    target = "llvm"
    ctx = tvm.cpu()
    graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape, "valid_count": (dshape[0],)},
                                        dtype={"data": "float32", "valid_count": "int32"})
    m = graph_runtime.create(graph, lib, ctx)
    m.set_input(**{"data": np_data, "valid_count": np_valid_count})
    m.run()
    out = m.get_output(0, tvm.nd.empty(np_result.shape, "float32"))
    tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5)
Beispiel #12
0
def test_multibox_transform_loc():
    batch_size = 1
    num_anchors = 3
    num_classes = 3
    cls_prob = sym.Variable("cls_prob")
    loc_preds = sym.Variable("loc_preds")
    anchors = sym.Variable("anchors")
    transform_loc_data, valid_count = sym.multibox_transform_loc(cls_prob=cls_prob, loc_pred=loc_preds,
                                                                 anchor=anchors)
    out = sym.nms(data=transform_loc_data, valid_count=valid_count)

    # Manually create test case
    np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]])
    np_loc_preds = np.array([[0.1, -0.2, 0.3, 0.2, 0.2, 0.4, 0.5, -0.3, 0.7, -0.2, -0.4, -0.8]])
    np_anchors = np.array([[[-0.1, -0.1, 0.1, 0.1], [-0.2, -0.2, 0.2, 0.2], [1.2, 1.2, 1.5, 1.5]]])

    expected_np_out = np.array([[[1, 0.69999999, 0, 0, 0.10818365, 0.10008108],
                                 [0, 0.44999999, 1, 1, 1, 1],
                                 [0, 0.30000001, 0, 0, 0.22903419, 0.20435292]]])

    target = "llvm"
    dtype = "float32"
    ctx = tvm.cpu()
    graph, lib, _ = nnvm.compiler.build(out, target, {"cls_prob": (batch_size, num_anchors, num_classes),
                                                      "loc_preds": (batch_size, num_anchors * 4),
                                                      "anchors": (1, num_anchors, 4)})
    m = graph_runtime.create(graph, lib, ctx)
    m.set_input(**{"cls_prob": np_cls_prob.astype(dtype), "loc_preds": np_loc_preds.astype(dtype), "anchors": np_anchors.astype(dtype)})
    m.run()
    out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype))
    tvm.testing.assert_allclose(out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5)
Beispiel #13
0
def test_in_bounds_conv_llvm(loop_tiling=False):
    HSTR = WSTR = 1
    in_channel = 128
    kernel_height = kernel_width = 3
    out_channel = 64
    batch_size = 1
    in_height = in_width = 64
    out_height = out_width = in_height - kernel_height + 1
    data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data')
    kernel = tvm.placeholder((kernel_height, kernel_width, in_channel,
        out_channel), name='kernel')
    ic = tvm.reduce_axis((0, in_channel), name='ic')
    kh = tvm.reduce_axis((0, kernel_height), name='kh')
    kw = tvm.reduce_axis((0, kernel_width), name='kw')
    conv = tvm.compute((batch_size, out_channel, out_height, out_width),
                       lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
                                                     kernel[kh, kw, ic, oc],
                                                     axis=[ic, kh, kw]),
                       name="conv2d")
    s = tvm.create_schedule(conv.op)

    n, oc, oh, ow = conv.op.axis
    if loop_tiling:
        oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16)
    lowered_func = tvm.lower(s, [data, kernel, conv], simple_mode=True)
    print (lowered_func.body)
    ctx = tvm.cpu (0)

    f = tvm.build(s, [data, kernel, conv], "llvm")
    data_input = tvm.nd.array(np.random.uniform(
          size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx)
    kernel_input = tvm.nd.array(np.random.uniform(
          size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx)
    conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx)
    f(data_input, kernel_input, conv_out)
Beispiel #14
0
def test_dilate():
    target = 'llvm'
    ctx = tvm.cpu(0)

    def _test_dilate(input_size, strides):
        Input = tvm.placeholder((input_size))
        Output = topi.nn.dilate(Input, strides)
        schedule = tvm.create_schedule(Output.op)
        input_np = np.random.uniform(size=input_size).astype(Input.dtype)
        output_np = topi.testing.dilate_python(input_np, strides)
        input_tvm = tvm.nd.array(input_np, ctx=ctx)
        output_size = topi.util.get_const_tuple(Output.shape)
        output_tvm = tvm.nd.array(np.zeros(shape=output_size).astype(Output.dtype), ctx=ctx)
        f = tvm.build(schedule, [Input, Output], target)
        f(input_tvm, output_tvm)
        tvm.testing.assert_allclose(output_tvm.asnumpy(), output_np, rtol=1e-5)

    _test_dilate((32,), (2,))
    _test_dilate((32,32), (2,2))
    _test_dilate((1,3,32,32), (1,1,1,1))
    _test_dilate((1,3,32,32), (2,2,2,2))
    _test_dilate((1,32,32,3,3), (1,1,1,1,1))
    _test_dilate((1,32,32,3,3), (2,2,2,2,2))
    _test_dilate((1,32,32,32,3,3), (1,1,1,2,2,2))
    _test_dilate((1,32,32,32,3,3), (2,2,2,1,1,1))
Beispiel #15
0
def test_sort():
    n = 2
    l = 5
    m = 3
    data = tvm.placeholder((n, l, m), name='data')
    sort_num = tvm.placeholder((n, m), name="sort_num", dtype="int32")
    axis = 1
    is_descend = True
    out = tvm.extern(data.shape, [data, sort_num],
                     lambda ins, outs: tvm.call_packed(
                         "tvm.contrib.sort.argsort", ins[0],
                         ins[1], outs[0], axis, is_descend),
                     dtype='int32', name="sort_tensor")
    input = [[[1, 2, 3], [2, 4.5, 3.5], [1.1, 0.5, 1], [3.2, -5, 0.5], [1.5, 0, 0]],
             [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]]]
    sort_num_input = [[1, 2, 3], [4, 5, 5]]
    sorted_index = [[[0, 1, 1], [1, 0, 0], [2, 2, 2], [3, 3, 3], [4, 4, 4]],
                    [[3, 4, 4], [2, 3, 3], [1, 2, 2], [0, 1, 1], [4, 0, 0]]]

    ctx = tvm.cpu(0)
    target = "llvm"
    s = tvm.create_schedule(out.op)
    f = tvm.build(s, [data, sort_num, out], target)
    a = tvm.nd.array(np.array(input).astype(data.dtype), ctx)
    b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
    c = tvm.nd.array(np.zeros(a.shape, dtype=out.dtype), ctx)
    f(a, b, c)
    tvm.testing.assert_allclose(c.asnumpy(), np.array(sorted_index).astype(out.dtype), rtol=1e-5)
Beispiel #16
0
def ctx_list():
    """Get context list for testcases"""
    device_list = os.environ.get("NNVM_TEST_TARGETS", "")
    device_list = (device_list.split(",") if device_list
                   else ["llvm", "cuda"])
    device_list = set(device_list)
    res = [("llvm", tvm.cpu(0)), ("cuda", tvm.gpu(0))]
    return [x for x in res if x[1].exist and x[0] in device_list]
Beispiel #17
0
def build_and_run(sym, params, data, out_shape):
    ctx = tvm.cpu(0)
    graph, lib, params = nnvm.compiler.build(sym, "llvm", shape={"data":data.shape}, params=params)
    module = runtime.create(graph, lib, ctx)
    module.set_input(**params)
    module.set_input("data", data)
    module.run()
    out =  module.get_output(0, tvm.nd.empty(out_shape))
    return out.asnumpy()
Beispiel #18
0
def test_ctx():
    def test_ctx_func(ctx):
        assert tvm.gpu(7) == ctx
        return tvm.cpu(0)
    x = test_ctx_func(tvm.gpu(7))
    assert x == tvm.cpu(0)
    x = tvm.opencl(10)
    x = tvm._api_internal._context_test(x, x.device_type, x.device_id)
    assert x == tvm.opencl(10)
Beispiel #19
0
 def verify(graph, lib):
     m = graph_runtime.create(graph, lib, tvm.cpu(0))
     # get member functions
     na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
     nb = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
     m.run(x=na, y=nb)
     # get outputs
     out = m.get_output(0, tvm.nd.empty(shape, dtype))
     tvm.testing.assert_allclose(
         out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
Beispiel #20
0
 def check_llvm():
     if not tvm.module.enabled("llvm"):
         return
     f = tvm.build(s, [A, B], "llvm")
     ctx = tvm.cpu(0)
     # launch the kernel.
     a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
     b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx)
     f(a, b)
     tvm.testing.assert_allclose(b.asnumpy(), a.asnumpy() + 1)
Beispiel #21
0
 def check_verify():
     if not tvm.module.enabled("llvm"):
         print("Skip because llvm is not enabled")
         return
     mlib = tvm.build(s, [A, B], "llvm", name="myadd")
     mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
     a = np.random.uniform(size=(n,)).astype(A.dtype)
     mod.run(x=a)
     out = mod.get_output(0, tvm.nd.empty((n,)))
     np.testing.assert_equal(out.asnumpy(), a + 1)
Beispiel #22
0
 def check_device(device):
     if not tvm.module.enabled(device):
         print("Skip because %s is not enabled" % device)
         return
     ctx = tvm.cpu(0) if device == "llvm" else tvm.gpu(0)
     a = tvm.nd.array(a_np, ctx)
     b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
     f = tvm.build(s, [A, B], device, name="clip")
     f(a, b)
     np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Beispiel #23
0
    def check_verify():
        if not tvm.module.enabled("llvm"):
            print("Skip because llvm is not enabled")
            return
        mlib = tvm.build(s, [A, B], "llvm", name="myadd")
        try:
            mod = graph_runtime.create(graph, mlib, tvm.cpu(0))
        except ValueError:
            return

        a = np.random.uniform(size=(n,)).astype(A.dtype)
        mod.set_input(x=a)

        #verify dumproot created
        directory = mod._dump_path
        assert(os.path.exists(directory))

        #verify graph is there
        GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json'
        assert(len(os.listdir(directory)) == 1)

        #verify the file name is proper
        assert(os.path.exists(os.path.join(directory, GRAPH_DUMP_FILE_NAME)))

        mod.run()
        #Verify the tensors are dumped
        assert(len(os.listdir(directory)) > 1)

        CHROME_TRACE_FILE_NAME = '_tvmdbg_execution_trace.json'
        assert(os.path.exists(os.path.join(directory, CHROME_TRACE_FILE_NAME)))

        with open(os.path.join(directory, CHROME_TRACE_FILE_NAME)) as f:
            trace = json.load(f)
        assert trace["displayTimeUnit"] == "ns"
        events = trace["traceEvents"]
        assert len(events) == 4
        assert all(event["ph"] in ('B', 'E') for event in events)
        assert all(event["pid"] == 1 for event in events)
        assert all(event["tid"] == 1 for event in events)
        assert all(event["name"] == 'x' for event in events[:2])
        assert all(event["name"] == 'add' for event in events[2:])
        assert events[0]["ts"] == 0
        assert events[0]["ph"] == 'B'

        #verify the output is correct
        out = mod.get_output(0, tvm.nd.empty((n,)))
        np.testing.assert_equal(out.asnumpy(), a + 1)

        #test individual run
        mod.run_individual(20, 2, 1)

        mod.exit()
        #verify dump root delete after cleanup
        assert(not os.path.exists(directory))
Beispiel #24
0
def test_num_outputs():
    x = sym.Variable('x')
    z = sym.split(x, indices_or_sections=5, axis=1)
    shape = (10, 10)
    dtype = tvm.float32
    nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
    params = {"x": nx}
    graph, lib, params = nnvm.compiler.build(
        z, "llvm", shape={"x": nx.shape}, params=params)
    m = graph_runtime.create(graph, lib, tvm.cpu(0))
    assert m.get_num_outputs() == 5
Beispiel #25
0
def enabled_ctx_list():
    ctx_list = [('cpu', tvm.cpu(0)),
                ('gpu', tvm.gpu(0)),
                ('cl', tvm.opencl(0)),
                ('metal', tvm.metal(0)),
                ('rocm', tvm.rocm(0)),
                ('vulkan', tvm.vulkan(0)),
                ('vpi', tvm.vpi(0))]
    for k, v  in ctx_list:
        assert tvm.context(k, 0) == v
    ctx_list = [x[1] for x in ctx_list if x[1].exist]
    return ctx_list
Beispiel #26
0
 def check_device(device):
     if not tvm.module.enabled(device):
         print("Skip because %s is not enabled" % device)
         return
     target = topi.cpp.TEST_create_target(device)
     s = topi.cpp.generic.default_schedule(target, [B], False)
     ctx = tvm.cpu(0) if device == "llvm" else tvm.gpu(0)
     a = tvm.nd.array(a_np, ctx)
     b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
     f = tvm.build(s, [A, B], device, name="clip")
     f(a, b)
     np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Beispiel #27
0
def verify_leaky_relu(m, alpha):
    A = tvm.placeholder((m,), name='A')
    B = topi.nn.leaky_relu(A, alpha)
    s = tvm.create_schedule([B.op])

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
    b_np = a_np * (a_np > 0) + a_np * (a_np < 0) * alpha
    ctx = tvm.cpu(0)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
    foo = tvm.build(s, [A, B], "llvm", name="leaky_relu")
    foo(a, b)
    tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Beispiel #28
0
 def verify(target):
     if not tvm.module.enabled(target):
         print("Target %s is not enabled" % target)
         return
     f = tvm.codegen.build_module(fapi, target)
     # verify
     ctx = tvm.cpu(0)
     a = tvm.nd.array(np.random.uniform(size=(nn,)).astype(A.dtype), ctx)
     b = tvm.nd.array(np.random.uniform(size=(nn,)).astype(B.dtype), ctx)
     c  = tvm.nd.array(np.zeros((1,), dtype=C.dtype), ctx)
     f(a, b, c)
     tvm.testing.assert_allclose(
         c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()), rtol=1e-4)
Beispiel #29
0
def test_nhwc():
    data_shape = (1, 3, 224, 224)
    out_channel = 8
    nchw_sym = get_sym("NCHW", "OIHW", out_channel)
    nhwc_sym = get_sym("NHWC", "HWIO", out_channel)
    conv_weight = np.random.uniform(-1, 1, (out_channel, 3, 3, 3)).astype(np.float32)
    conv_bias = np.random.uniform(-1, 1, (out_channel)).astype(np.float32)
    nchw_params = {
        "conv2d0_weight" : tvm.nd.array(conv_weight, ctx=tvm.cpu(0)),
        "conv2d0_bias" : tvm.nd.array(conv_bias, ctx=tvm.cpu(0))
    }
    nhwc_params = {
        "conv2d1_weight" : tvm.nd.array(conv_weight.transpose(2, 3, 1, 0), ctx=tvm.cpu(0)),
        "conv2d1_bias" : tvm.nd.array(conv_bias, ctx=tvm.cpu(0))
    }

    data = np.random.uniform(-1, 1, data_shape).astype(np.float32)
    oshape = (1, out_channel, 224, 224)
    oshape_nhwc = (1, 224, 224, out_channel)
    nchw_output = build_and_run(nchw_sym, nchw_params, data, oshape)
    nhwc_output = build_and_run(nhwc_sym, nhwc_params, data.transpose(0, 2, 3, 1), oshape_nhwc)
    tvm.testing.assert_allclose(nchw_output, nhwc_output.transpose(0, 3, 1, 2), rtol=1e-5, atol=1e-5)
Beispiel #30
0
 def check_llvm():
     if not tvm.module.enabled("llvm"):
         return
     # build and invoke the kernel.
     f = tvm.build(s, [A, C], "llvm")
     ctx = tvm.cpu(0)
     # launch the kernel.
     n = nn
     a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
     c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
     f(a, c)
     tvm.testing.assert_allclose(
         c.asnumpy(), a.asnumpy() + 1 + 1)
Beispiel #31
0
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import tvm
from tvm.relay import Function, transform
from tvm.relay.testing import inception_v3
import pytest

cpu_scope = tvm.target.make_se_scope(tvm.cpu(), tvm.target.Target("llvm"))
metatable = {"SEScope": [cpu_scope]}
core = tvm.IRModule()
core.import_from_std("core.rly")


def optimize_and_check(before_program, after_program, passes):
    if isinstance(before_program, str):
        before_program = tvm.parser.parse(before_program)
    if isinstance(after_program, str):
        after_program = tvm.parser.parse(after_program)
    if not isinstance(passes, list):
        passes = [passes]
    optimize = tvm.transform.Sequential(passes)
    optimized_program = optimize(before_program)
    print("Actual:")
Beispiel #32
0
def test_extern_dnnl():
    if not tvm.get_global_func("relay.ext.dnnl", True):
        print("skip because DNNL codegen is not available")
        return

    dtype = 'float32'
    ishape = (1, 32, 14, 14)
    w1shape = (32, 1, 3, 3)

    def expected():
        data0 = relay.var("data", shape=(ishape), dtype=dtype)
        input0 = relay.var("input0", shape=(w1shape), dtype=dtype)
        input1 = relay.var("input1", shape=(w1shape), dtype=dtype)
        depthwise_conv2d_1 = relay.nn.conv2d(data0,
                                             input0,
                                             kernel_size=(3, 3),
                                             padding=(1, 1),
                                             groups=32)
        depthwise_conv2d_2 = relay.nn.conv2d(depthwise_conv2d_1,
                                             input1,
                                             kernel_size=(3, 3),
                                             padding=(1, 1),
                                             groups=32)
        out = relay.add(depthwise_conv2d_1, depthwise_conv2d_2)

        func = relay.Function([data0, input0, input1], out)
        func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
        func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
        func = func.with_attr("Compiler", "dnnl")
        func = func.with_attr("global_symbol", "dnnl_0")
        glb_var = relay.GlobalVar("dnnl_0")
        mod = tvm.IRModule()
        mod[glb_var] = func

        data = relay.var("data", shape=(ishape), dtype=dtype)
        weight = relay.var("input", shape=(w1shape), dtype=dtype)
        main_f = relay.Function([data, weight], glb_var(data, weight, weight))
        mod["main"] = main_f

        return mod

    def get_func():
        data = relay.var("data", shape=(ishape), dtype=dtype)
        weight1 = relay.var("weight1", shape=(w1shape), dtype=dtype)
        depthwise_conv2d_1 = relay.nn.conv2d(data,
                                             weight1,
                                             kernel_size=(3, 3),
                                             padding=(1, 1),
                                             groups=32)
        depthwise_conv2d_2 = relay.nn.conv2d(depthwise_conv2d_1,
                                             weight1,
                                             kernel_size=(3, 3),
                                             padding=(1, 1),
                                             groups=32)
        out = relay.add(depthwise_conv2d_1, depthwise_conv2d_2)

        return relay.Function([data, weight1], out)

    mod = tvm.IRModule()
    mod["main"] = WholeGraphAnnotator("dnnl").visit(get_func())
    mod = transform.PartitionGraph()(mod)

    assert tvm.ir.structural_equal(mod, expected(), map_free_vars=True)

    ref_mod = tvm.IRModule()
    ref_mod["main"] = get_func()

    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
    w1_data = np.random.uniform(0, 1, w1shape).astype(dtype)

    ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu())
    ref_res = ref_ex.evaluate()(i_data, w1_data)
    check_result(mod, {
        "data": i_data,
        "weight1": w1_data
    }, (1, 32, 14, 14),
                 ref_res.asnumpy(),
                 tol=1e-5)
Beispiel #33
0
shape = (oc, ic, h, w)
oc_bn, ic_bn = 3, 2
# (OC, IC, h, w, ic, oc)
out_shape = (oc // oc_bn, ic // ic_bn, h, w, ic_bn, oc_bn)

x = sym.Variable("x")
# y = sym.Variable("y")
# z = sym.elemwise_add(x, sym.sqrt(y))
# z = sym.reshape(x, shape=out_shape)
z = sym.reorder(x, oc_bn=3, ic_bn=2)
compute_graph = nnvm.graph.create(z)
print("-------compute graph-------")
print(compute_graph.ir())

deploy_graph, lib, params = nnvm.compiler.build(compute_graph,
                                                target="llvm",
                                                shape={"x": shape},
                                                dtype="float32")

module = graph_runtime.create(deploy_graph, lib, tvm.cpu(0))
x_np = np.random.uniform(0, 255, size=shape).astype("float32")
print(x_np)
# y_np = np.array([[4, 4], [4, 4], [4, 4]]).astype("float32")
# set input to the graph module
module.set_input(x=x_np)  #, y=y_np)
# run forward computation
module.run()
# get the first output
out = module.get_output(0, out=tvm.nd.empty(out_shape))
print(out.asnumpy())
Beispiel #34
0
)

# Build the module against to x86 CPU
target = "llvm"
with transform.PassContext(opt_level=3):
    lib = relay.build(mod, target, params=params)

######################################################################
# Execute on TVM
# --------------
import tvm
from tvm import te
from tvm.contrib import graph_runtime as runtime

# Create a runtime executor module
module = runtime.GraphModule(lib["default"](tvm.cpu()))

# Feed input data
module.set_input(input_tensor, tvm.nd.array(image_data))

# Run
module.run()

# Get output
tvm_output = module.get_output(0).asnumpy()

######################################################################
# Display results
# ---------------

# Load label file
Beispiel #35
0
graph_json_path = "../tvm_output_lib/mobilenet.json"
# with open(graph_json_path, 'w') as fo:
#     fo.write(graph)

param_path = "../tvm_output_lib/mobilenet.params"
# with open(param_path, 'wb') as fo:
#     fo.write(relay.save_param_dict(params))


# load the module back.
loaded_json = open(graph_json_path).read()
loaded_lib = tvm.module.load(libpath)
loaded_params = bytearray(open(param_path, "rb").read())

ctx = tvm.cpu()

module = graph_runtime.create(loaded_json, loaded_lib, ctx)
module.load_params(loaded_params)
module.set_input("0", x)
module.run()
out_deploy = module.get_output(0).asnumpy()

print(out_deploy)






Beispiel #36
0
import tvm
import numpy as np

# The size of the square matrix
N = 1024
nstep = 1<<16
# The default tensor type in tvm
dtype = "float32"
target = "llvm -mcpu=skylake-avx512"
# target = "llvm"
# Random generated tensor for testing
a = tvm.nd.array(np.random.rand(N,).astype(dtype), tvm.cpu(0))
c = tvm.nd.array(np.zeros((N,), dtype = dtype), tvm.cpu(0))

# The expected answer
answer = a.asnumpy() * nstep

x = tvm.placeholder((N,), name="x")
k = tvm.reduce_axis((0, nstep))
y = tvm.compute((N,), lambda i: tvm.sum(x[i], axis=k), name="y")

s = tvm.create_schedule(y.op)

i, = s[y].op.axis
io, ii = s[y].split(i, factor=32)
s[y].vectorize(ii)

print(tvm.lower(s, [x, y], simple_mode=True))

func = tvm.build(s, [x, y], target=target, name = 'adddd')
assert func
Beispiel #37
0
def build_and_run(
    mod, inputs, outputs, params, ctx=tvm.cpu(), npu=True, expected_host_ops=0, npu_partitions=1
):
    lib = build(mod, params, npu, expected_host_ops, npu_partitions)
    return run(lib, inputs, outputs, npu)
# -------------------

###############################################################################
# Print the top-5 labels for MXNet and TVM inference.
# Checking the labels because the requantize implementation is different between
# TFLite and Relay. This cause final output numbers to mismatch. So, testing accuracy via labels.

print("TVM Top-5 labels:", tvm_pred)
print("TFLite Top-5 labels:", tflite_pred)

##########################################################################
# Measure performance
# -------------------
# Here we give an example of how to measure performance of TVM compiled models.
n_repeat = 100  # should be bigger to make the measurement more accurate
dev = tvm.cpu(0)
ftimer = rt_mod.module.time_evaluator("run", dev, number=1, repeat=n_repeat)
prof_res = np.array(ftimer().results) * 1e3
print("Elapsed average ms:", np.mean(prof_res))

######################################################################
# .. note::
#
#   Unless the hardware has special support for fast 8 bit instructions, quantized models are
#   not expected to be any faster than FP32 models. Without fast 8 bit instructions, TVM does
#   quantized convolution in 16 bit, even if the model itself is 8 bit.
#
#   For x86, the best performance can be achieved on CPUs with AVX512 instructions set.
#   In this case, TVM utilizes the fastest available 8 bit instructions for the given target.
#   This includes support for the VNNI 8 bit dot product instruction (CascadeLake or newer).
#   For EC2 C5.12x large instance, TVM latency for this tutorial is ~2 ms.
Beispiel #39
0
# Compilation
#print(tvm.lower(s, [x, y, x_expand, w, dot, gradient, new_w],
#    simple_mode=True))
func = tvm.build(s, [x, y, w, new_w])
assert func

#print("------func code------")
#print(func.imported_modules[0].get_source())

# Generate x
np_x = np.random.uniform(size=(in_n, in_d), low=0, high=100)
golden_w = np.random.uniform(size=in_d, low=-1, high=1)
noise = np.random.uniform(size=in_n, low=-5, high=5)
np_y = np.array([golden_w.dot(np_x[i]) + noise[i] for i in range(in_n)])

in_y = tvm.nd.array(np_y.astype(y.dtype), tvm.cpu(0))
in_x = tvm.nd.array(np_x.astype(x.dtype), tvm.cpu(0))
in_w = tvm.nd.array(np.zeros(in_d + 1, dtype=w.dtype), tvm.cpu(0))

# Evaluation
display_animation = display_on
if display_on and in_d != 1:
    display_animation = False
    print("WARNING: Will only display the MSE trend " +
          "due to high dimensional data points.")

if display_on:
    import matplotlib.pyplot as plt
    from matplotlib import animation
    fig = plt.figure()
    ax = plt.axes()
Beispiel #40
0
def build_and_run(s,
                  Tensor,
                  control_f,
                  shape,
                  time_count,
                  timeout_build,
                  timeout_cal,
                  count=20,
                  device_id=0,
                  tar="llvm"):
    """ Build and record the time of running.

        Args:
        -----------------------------
        s           : schedule.Schedule get form the student's auto_schedule

        Tensor      : (list)
        the input tensors and the output tensor

        control_f   : the torch function

        shape       : arg for control_f

        time_count  : used for record the running time

        timeout_build:time limit for building
        
        timeout_cal : time limit for culation

        count       : the number rounds repeat testing

        device_id   : the id of CPU
        -----------------------------

        Returns:
        -----------------------------
        [tvm_time, torch_time]:
            [float , flaot]
        which indicates
        the total time of running scheduled tvm calculation and
        the total time of running torch calculation
        -----------------------------
        """
    # Create ctx.
    try:
        ctx = tvm.cpu(device_id)
    except:
        print("Can not found device !!!")
        time_count.put([-1, -1])
        return -1
    # Build function form s and Tensor.
    try:
        timelimit = ceil(timeout_build)
        signal.signal(signal.SIGALRM, handler)
        signal.alarm(timelimit)
        begin = time.time()
        f = tvm.build(s, Tensor, name="my_op")
        timepass = time.time() - begin
        signal.signal(signal.SIGALRM, signal.SIG_IGN)
        if timepass > timeout_build:
            print("Timeout in building!")
            return -1
    except:
        traceback.print_exc()
        print("Can not build successfully !!!")
        time_count.put([-1, -1])
        return -1
    try:
        Output_tensor = Tensor[-1]
        del Tensor[-1]
    except:
        print("The input is not correct !!!")
        time_count.put([-1, -1])
        return -1
    # Craft input data.
    try:
        Input_tvm_batch = []
        Input_torch_batch = []
        for it in range(0, count):
            Input_tvm_data = []
            Input_torch_data = []

            for i in Tensor:
                data = np.random.random([int(j) for j in i.shape]).astype(
                    np.float32) * 100
                tvm_data = tvm.nd.array(data, ctx)
                torch_data = torch.tensor(data)
                Input_tvm_data.append(tvm_data)
                Input_torch_data.append(torch_data)

            Output_holder = tvm.nd.array(
                np.zeros([int(j) for j in Output_tensor.shape],
                         dtype=Output_tensor.dtype), ctx)

            Input_tvm_batch.append(Input_tvm_data + [Output_holder])
            Input_torch_batch.append(Input_torch_data)
    except:
        traceback.print_exc()
        print("Can not create input datas !!!")
        time_count.put([-1, -1])
        return -1

    try:
        f(*Input_tvm_batch[0])
        timelimit = ceil(timeout_cal)
        signal.signal(signal.SIGALRM, handler)
        signal.alarm(timelimit)
        begin = time.time()
        for i in range(0, count):
            f(*Input_tvm_batch[i])
        tvm_time = time.time() - begin
        signal.signal(signal.SIGALRM, signal.SIG_IGN)
        if tvm_time > timeout_cal:
            print("Results of shape", shape, "Timeout!")
            tvm_time = -1
        else:
            tvm_time /= count
    except TimeoutError:
        tvm_time = -1
        print("Results of shape", shape, "Timeout!")
    except:
        tvm_time = -1
        print("Results of shape", shape, "\n| The culation is not correct !!!")

    try:
        control_f(*(Input_torch_batch[0] + [shape]))
        begin = time.time()
        for i in range(0, count):
            control_f(*(Input_torch_batch[i] + [shape]))
        torch_time = time.time() - begin
        torch_time /= count
    except TimeoutError:
        torch_time = -1
        print("Results of shape", shape, "Timeout!")
    except:
        torch_time = -1
        print("Results of shape", shape, "\n| The culation is not correct !!!")

    print("Results of shape", shape, " \n| your time:", tvm_time,
          " s| pytorch time:", torch_time, "s\n")
    time_count.put([tvm_time, torch_time])
Beispiel #41
0
def veval(vm, *args, ctx=tvm.cpu()):
    assert isinstance(vm, _vm.VirtualMachine), "expected VirtualMachine"
    vm.init(ctx)
    ret = vm.run(*args)
    return ret
Beispiel #42
0
def run_timing(device, platform, model, remote=None, autotvm_log=None, batch=1, runs=3, reps=5, log=None):
    """
    Run a time trail on TVM

    :param device: The device to run this on
    :param platform: The platform get the machine learning model on
    :param model: The machine learning model to use
    :param remote: Details about the remote device
    :param autotvm_log: The path to the auto TVM file
    :param batch: The number of pictures to run in one go
    :param runs: The number of runs to run the picture through
    :param reps: The number of times the measurement should be repeated
    :param log: The output file
    """

    # Output details of run
    from cpuinfo import get_cpu_info
    from datetime import datetime

    print("\n──────────────────────────── TVMUI ────────────────────────────\n")
    log.write("TVM Time Trial\n")
    log_print(log, "Started on " + str(datetime.now().strftime("%m/%d/%Y at %H:%M:%S")))
    if remote is None:
        log_print(log, 'Hardware: ' + device)
        if device == 'x86':
            log_print(log, 'CPU Type: ' + get_cpu_info().get('brand_raw'))
    else:
        log_print(log, 'Remote Name: ' + remote["name"])
        log_print(log, 'Remote Device: ' + remote["type"])
        log_print(log, 'Remote Hardware: ' + remote["hardware"])
    log_print(log, 'Backend: ' + platform)
    log_print(log, 'Model: ' + model)
    log_print(log, str(batch) + " picture(s) per run")
    log_print(log, str(runs) + " run average, repeated " + str(reps) + " times.")
    if autotvm_log is None:
        log_print(log, 'AutoTVM: No\n')
    else:
        log_print(log, 'AutoTVM: Yes\n')

    # Get the model and image data
    import numpy as np
    from PIL import Image
    from tvm import relay
    import tvm
    from tvm.contrib.download import download_testdata

    print("Loading models and images...")

    pictures = get_pics(batch)
    dataset = []

    if platform == "MXNet":
        from mxnet.gluon.model_zoo.vision import get_model

        block = get_model(model, pretrained=True)

        synset_url = "".join(
            [
                "https://gist.githubusercontent.com/zhreshold/",
                "4d0b62f3d01426887599d4f7ede23ee5/raw/",
                "596b27d23537e5a1b5751d2b0481ef172f58b539/",
                "imagenet1000_clsid_to_human.txt",
            ]
        )
        synset_name = "imagenet1000_clsid_to_human.txt"
        synset_path = download_testdata(synset_url, synset_name, module="data")
        with open(synset_path) as f:
            synset = eval(f.read())

        def transform_image(image):
            image = np.array(image) - np.array([123.0, 117.0, 104.0])
            image /= np.array([58.395, 57.12, 57.375])
            image = image.transpose((2, 0, 1))
            image = image[np.newaxis, :]
            return image

        if model == 'resnet18_v1' or model == 'mobilenetv2_1.0':
            for img in pictures:
                dataset.append(transform_image(Image.open(img).resize((224, 224))))
            input_shape = [batch, 3, 224, 224]

        elif model == 'inceptionv3':
            for img in pictures:
                dataset.append(transform_image(Image.open(img).resize((299, 299))))
            input_shape = [batch, 3, 299, 299]
        else:
            raise Exception("Invalid Model")

        shape_dict = {"data": input_shape}

        mod, params = relay.frontend.from_mxnet(block, shape_dict)
        func = mod["main"]
        func = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs)

    elif platform == "PyTorch":
        import torch
        import torchvision

        model = getattr(torchvision.models, model)(pretrained=True)
        model = model.eval()

        # We grab the TorchScripted model via tracing
        input_shape = [batch, 3, 224, 224]
        input_data = torch.randn(input_shape)
        scripted_model = torch.jit.trace(model, input_data).eval()

        synset_url = "".join(
            [
                "https://raw.githubusercontent.com/Cadene/",
                "pretrained-models.pytorch/master/data/",
                "imagenet_synsets.txt",
            ]
        )
        synset_name = "imagenet_synsets.txt"
        synset_path = download_testdata(synset_url, synset_name, module="data")
        with open(synset_path) as f:
            synsets = f.readlines()
        synsets = [x.strip() for x in synsets]
        splits = [line.split(" ") for line in synsets]
        key_to_classname = {spl[0]: " ".join(spl[1:]) for spl in splits}

        class_url = "".join(
            [
                "https://raw.githubusercontent.com/Cadene/",
                "pretrained-models.pytorch/master/data/",
                "imagenet_classes.txt",
            ]
        )
        class_name = "imagenet_classes.txt"
        class_path = download_testdata(class_url, class_name, module="data")
        with open(class_path) as f:
            class_id_to_key = f.readlines()
        class_id_to_key = [x.strip() for x in class_id_to_key]

        def transform_image(image):
            from torchvision import transforms

            my_preprocess = transforms.Compose(
                [
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ]
            )
            img = my_preprocess(image)
            return np.expand_dims(img, 0)

        for img in pictures:
            dataset.append(transform_image(Image.open(img).resize((224, 224))))

        input_name = "data"
        shape_list = [(input_name, input_shape)]
        func, params = relay.frontend.from_pytorch(scripted_model, shape_list)
    elif platform == "TensorFlow":
        import tensorflow as tf
        import os

        try:
            tf_compat_v1 = tf.compat.v1
        except ImportError:
            tf_compat_v1 = tf
        import tvm.relay.testing.tf as tf_testing

        # Base location for model related files.
        repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
        model_name = "classify_image_graph_def-with_shapes.pb"
        model_url = os.path.join(repo_base, model_name)

        # Image label map
        map_proto = "imagenet_2012_challenge_label_map_proto.pbtxt"
        map_proto_url = os.path.join(repo_base, map_proto)

        # Human readable text for labels
        label_map = "imagenet_synset_to_human_label_map.txt"
        label_map_url = os.path.join(repo_base, label_map)

        model_path = download_testdata(model_url, model_name, module=["tf", "InceptionV1"])
        map_proto_path = download_testdata(map_proto_url, map_proto, module="data")
        label_path = download_testdata(label_map_url, label_map, module="data")

        with tf_compat_v1.gfile.GFile(model_path, "rb") as f:
            graph_def = tf_compat_v1.GraphDef()
            graph_def.ParseFromString(f.read())
            graph = tf.import_graph_def(graph_def, name="")
            # Call the utility to import the graph definition into default graph.
            graph_def = tf_testing.ProcessGraphDefParam(graph_def)
            # Add shapes to the graph.
            with tf_compat_v1.Session() as sess:
                graph_def = tf_testing.AddShapesToGraphDef(sess, "softmax")
        for img in pictures:
            dataset.append(np.array(Image.open(img).resize((299, 299))))
        shape_dict = {"data": [batch, 3, 299, 299]}
        dtype_dict = {"DecodeJpeg/contents": "uint8"}
        mod, params = relay.frontend.from_tensorflow(graph_def, layout=None, shape=shape_dict)
    else:
        raise Exception('Not Supported!')

    # Build the graph
    if device == 'x86':
        target = "llvm"
        ctx = tvm.cpu(0)
        log_print(log, 'Target: ' + target)
    elif device == 'Metal':
        target = "metal"
        ctx = tvm.metal(0)
        log_print(log, 'Target: ' + target)
    elif device == 'arm_cpu':
        target = tvm.target.arm_cpu(remote["type"])
        ctx = tvm.cpu(0)
        log_print(log, 'Target: ' + remote["type"])
    else:
        target = device
        ctx = tvm.cpu(0)
        log_print(log, 'Target: ' + device)
    log_print(log, 'Actual Model: ' + model + '\n')
    print('Making the graph...')
    if autotvm_log is not None:
        from tvm import autotvm
        log_print(log, 'Using AutoTVM file ' + autotvm_log)
        with autotvm.apply_graph_best(autotvm_log):
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build(func, target, params=params)
    else:
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build(func, target, params=params)

    print("\nSetting up TVM...")
    from tvm.contrib import graph_runtime

    # Remote upload
    if remote is not None:
        from tvm import rpc
        from tvm.contrib import utils, graph_runtime as runtime
        print("Exporting graph...")
        tmp = utils.tempdir()
        lib_fname = tmp.relpath("net.tar")
        lib.export_library(lib_fname)
        print("Connecting to device...")
        remote = rpc.connect(str(remote["ip"]), int(remote["port"]))
        print("Uploading to device...")
        remote.upload(lib_fname)
        lib = remote.load_module("net.tar")
        if device == 'x86':
            ctx = remote.cpu(0)
        elif device == 'Metal':
            ctx = remote.metal(0)
        elif device == 'arm_cpu':
            ctx = remote.cpu(0)
        else:
            ctx = remote.cpu(0)
    dtype = "float32"
    m = graph_runtime.GraphModule(lib["default"](ctx))

    def run_tvm(pics, number, repeat):
        """
        Runs a single inference and gives back the time

        :param pics: The images(s) to run
        :param number: The number of times to run the inference
        :param repeat:  The number of times to repeat the measurement
        :return: An array with the time and the result
        """

        # combine pictures
        arr = np.ndarray(shape=input_shape, dtype=dtype)
        p = 0
        for ip in pics:
            arr[p] = ip.astype(dtype)
            p = p + 1
        m.set_input("data", tvm.nd.array(arr))

        #Actually run inference
        time = m.module.time_evaluator("run", ctx, number=number, repeat=repeat)()

        #Get output
        res = []
        if platform == 'MXNet':
            for i in range(len(pics)):
                res.append(synset[np.argmax(m.get_output(0).asnumpy()[i])])
        if platform == 'PyTorch':
            # Get top-1 result for TVM
            for i in range(len(pics)):
                top1_tvm = np.argmax(m.get_output(0).asnumpy()[i])
                tvm_class_key = class_id_to_key[top1_tvm]
                res.append(key_to_classname[tvm_class_key])
        if platform == 'TensorFlow':
            pre = np.squeeze(m.get_output(0, tvm.nd.empty(((1, 1008)), "float32")).asnumpy())
            node_lookup = tf_testing.NodeLookup(label_lookup_path=map_proto_path, uid_lookup_path=label_path)
            top_k = pre.argsort()[-5:][::-1]
            res = node_lookup.id_to_string(top_k[0])
        return [time, res]

    # Run the inferences
    output = []
    total = 0

    print("\nRunning inferences...")
    for i in range(int(len(dataset) / batch)):
        log_print(log, "\nSet " + str(i + 1) + ":")
        inp = []
        # Create the next batch
        for j in range(batch):
            inp.append(dataset[batch * i + j])
        # Run inference here
        output = run_tvm(inp, runs, reps)
        # Output results
        e = 0
        for rl in output[1]:
            log_print(log, "Image " + str(e + 1) + " Path: " + pictures[batch * i + e])
            log_print(log, "Image " + str(e + 1) + " ID: " + rl)
            e = e + 1
        log_print(log, "Time taken: " + str('%.2f' % (1000 * output[0].mean)) + " ms")
        total = total + output[0].mean
    ave = total / int(len(dataset) / batch)
    log_print(log, '\nAVERAGE TIME: ' + str(ave * 1000) + " ms")
    log_print(log, "Finished on " + str(datetime.now().strftime("%m/%d/%Y at %H:%M:%S")))
    log.close()
    return
Beispiel #43
0
img = Image.open('cat.png').resize((224, 224))
img_ycbcr = img.convert("YCbCr")  # convert to YCbCr
img_y, img_cb, img_cr = img_ycbcr.split()
x = np.array(img_y)[np.newaxis, np.newaxis, :, :]

######################################################################
# Compile the model with relay
# ---------------------------------------------
target = 'llvm'

input_name = '1'
shape_dict = {input_name: x.shape}
sym, params = relay.frontend.from_onnx(onnx_model, shape_dict)

with relay.build_config(opt_level=1):
    intrp = relay.build_module.create_executor('graph', sym, tvm.cpu(0),
                                               target)

######################################################################
# Execute on TVM
# ---------------------------------------------
dtype = 'float32'
tvm_output = intrp.evaluate(sym)(tvm.nd.array(x.astype(dtype)),
                                 **params).asnumpy()

######################################################################
# Display results
# ---------------------------------------------
# We put input and output image neck to neck
from matplotlib import pyplot as plt
out_y = Image.fromarray(np.uint8((tvm_output[0, 0]).clip(0, 255)), mode='L')
Beispiel #44
0
   [ 0.,  1.,  0., -2., -1.,  1., -1.],
   [ -1.,  2., 1.,  0.,  1.,  2., 1.],
   [-1., -1.,  2.,   0,   1,   1,  -1],
   [ 1.,   -1,   0,  0,  1.,   0, -1.],
   [-1.,  2.,   0,  1.,  2., -2.,  0.],
   [ 2., -1.,  1., -1.,  1.,  0.,  1.]],

  [[1.,  -1.,  2.,  2.,  0.,  1., -1.],
   [ -2.,  1.,  0., -2., -1.,  1., 0.],
   [ 1.,  2., -1.,  0.,  0.,  2., -1.],
   [-1., -1.,  2.,   0,   1,   1,  -1],
   [ 0.,   1,   -2,  0,  1.,   0, -1.],
   [-1.,  2.,   0,  1.,  2., -2.,  1.],
   [ 0., -1.,  0., -1.,  -2.,  -1.,  1.]]]]).astype("float32")
   
params['p0'] = tvm.nd.array(temp_param, ctx=tvm.cpu(0))
module = runtime.create(graph, lib, ctx)
module.set_input("data", data)
module.set_input(**params)
print("%%%%%%params%%%%%%%")
print(params)
module.run()
print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%test8%%%%%%%%%%%%%%%%%%%%%%%")
out_shape = (2,2,3,3)
out = (module.get_output(5712, tvm.nd.empty(out_shape))).asnumpy()
print("----------TEST9----------")
print(out)

batch, in_channel, in_height, in_width = data.shape
print(batch, in_channel, in_height, in_width)
Beispiel #45
0
def test_pipe_runtime_error_check():
    # This function is used to trigger runtime error by applying wrong logic.
    if pipeline_executor.pipeline_executor_enabled():
        # Get three pipeline modules here.
        (mod1, mod2, mod3), dshape = get_mannual_mod()

        # The input or output name is illegal and expects a runtime error.
        pipe_error = pipeline_executor.PipelineConfig()
        with pytest.raises(RuntimeError):
            pipe_error[mod1]["output"][9]

        with pytest.raises(RuntimeError):
            pipe_error[mod1]["input"]["data_9"]

        # The module connection will cause a cycle in DAG and expects runtime error.
        with pytest.raises(RuntimeError):
            pipe_error[mod1]["output"][0].connect(
                pipe_error[mod2]["input"]["data_0"])
            pipe_error[mod2]["output"][0].connect(
                pipe_error[mod1]["input"]["data_0"])

        # The module connection is illegal and expects runtime error.

        with pytest.raises(RuntimeError):
            pipe_error[mod1]["output"][0].connect(
                pipe_error[mod1]["input"]["data_0"])

        with pytest.raises(RuntimeError):
            pipe_error[mod1]["input"]["data_0"].connect(
                pipe_error[mod1]["input"]["data_0"])

        with pytest.raises(RuntimeError):
            pipe_error[mod1]["input"]["data_0"].connect(
                pipe_error[mod2]["input"]["data_0"])

        with pytest.raises(RuntimeError):
            pipe_error[mod1]["output"][0].connect(
                pipe_error["input"]["data_0"])

        with pytest.raises(RuntimeError):
            pipe_error["input"]["data_0"].connect(
                pipe_error[mod1]["output"][0])

        with pytest.raises(RuntimeError):
            pipe_error["output"]["0"].connect(pipe_error[mod1]["output"][0])

        # Create pipeline executor to check the executor runtime errors.
        pipe_config = pipeline_executor.PipelineConfig()
        pipe_config[mod1].target = "llvm"
        pipe_config[mod1].dev = tvm.cpu(0)
        pipe_config["param_group"]["param_0"].connect(
            pipe_config[mod1]["param"])
        pipe_config[mod1]["output"][0].connect(pipe_config["output"]["0"])
        # Build and create a pipeline module.
        with tvm.transform.PassContext(opt_level=3):
            pipeline_mod_factory = pipeline_executor.build(pipe_config)
        pipeline_module = pipeline_executor.PipelineModule(
            pipeline_mod_factory)
        customized_parameters, _ = recreate_parameters(mod1)

        # Checking the pipeline executor runtime errors.
        with pytest.raises(RuntimeError):
            pipeline_module.set_params("param_0", None)

        with pytest.raises(RuntimeError):
            pipeline_module.set_params("param_1", customized_parameters)
Beispiel #46
0
def test_pipeline():
    if pipeline_executor.pipeline_executor_enabled():
        target_list = tvm.testing.enabled_targets()
        for target in target_list:
            # Get the three pipeline modules here.
            (mod1, mod2, mod3), dshape = get_mannual_mod()

            # Prepare batch data for pipeline computation.
            datas = []
            for i in range(5):
                datas.append(np.full(dshape, 3 + i).astype("float32"))

            pipe_config = pipeline_executor.PipelineConfig()

            customized_parameters, customized_parameters_mod = recreate_parameters(
                mod1)
            assert customized_parameters_mod == mod1
            # The global parameters group named "param_0" will be connected to "mod1" as parameters.
            pipe_config["param_group"]["param_0"].connect(
                pipe_config[mod1]["param"])
            # The pipeline input named "data_0" will be connected to a input named "data_0"
            # of mod1.
            pipe_config["input"]["data_a"].connect(
                pipe_config[mod1]["input"]["data_0"])

            # The pipeline Input named "data_1" will be connected to a input named "data_1"
            # of mod2.
            pipe_config["input"]["data_b"].connect(
                pipe_config[mod2]["input"]["data_1"])

            # The mod1 output[0] will be connected to a input named "data_0" of mod2.
            pipe_config[mod1]["output"][0].connect(
                pipe_config[mod2]["input"]["data_0"])

            # The mod1 output[1] will be connected to a input named "data_0" of mod3.
            pipe_config[mod1]["output"][1].connect(
                pipe_config[mod3]["input"]["data_0"])

            # The mod2 output[2] will be connected to a input named "data_1" of mod3.
            pipe_config[mod2]["output"][0].connect(
                pipe_config[mod3]["input"]["data_1"])

            # The mod1 output[2] will be connected to pipeline output[0].
            pipe_config[mod1]["output"][2].connect(pipe_config["output"]["0"])

            # The mod3 output[0] will be connected to pipeline output[1].
            pipe_config[mod3]["output"][0].connect(pipe_config["output"]["1"])
            # Print configueration (print(pipe_config)), the result looks like following.
            #
            # Inputs
            #   |data_a: mod1:data_0
            #   |data_b: mod2:data_1
            #
            # output
            #   |output(1) : mod1.output(2)
            #   |output(2) : mod3.output(0)
            #
            # connections
            #   |mod1.output(0)-> mod2.data_0
            #   |mod1.output(1)-> mod3.data_0
            #   |mod2.output(0)-> mod3.data_1

            # Set other parameters.
            pipe_config[mod1].target = target[0]
            pipe_config[mod1].dev = target[1]

            pipe_config[mod2].target = "llvm"
            pipe_config[mod2].dev = tvm.cpu(0)

            pipe_config[mod3].target = "llvm"
            pipe_config[mod3].dev = tvm.cpu(0)

            # Here is to check the correctness of the configuration generated by API.
            mconfig = pipe_config.get_config()
            assert mconfig["module_connection"] == get_manual_conf(
                [mod1, mod2, mod3], target)

            # Build and create a pipeline module.
            with tvm.transform.PassContext(opt_level=3):
                pipeline_mod_factory = pipeline_executor.build(pipe_config)

            # Export the parameter configuration to a file.
            directory_path = tvm.contrib.utils.tempdir().temp_dir
            # If the directory does not exist, create it.
            if not os.path.exists(directory_path):
                os.makedirs(directory_path)
            config_file_name = pipeline_mod_factory.export_library(
                directory_path)

            # Use the output of build to create and initialize PipelineModule.
            pipeline_module = pipeline_executor.PipelineModule(
                pipeline_mod_factory)
            assert pipeline_module

            # Use the import function to create and initialize PipelineModule.
            pipeline_module_test = pipeline_executor.PipelineModule.load_library(
                config_file_name)
            assert pipeline_module_test.num_outputs == 2

            input_map = pipeline_module_test.get_input_pipeline_map("data_b")
            assert input_map[0] == "1" and input_map[1] == "data_1"
            input_map = pipeline_module_test.get_input_pipeline_map("data_a")
            assert input_map[0] == "0" and input_map[1] == "data_0"
            module_index = pipeline_module_test.get_params_group_pipeline_map(
                "param_0")
            assert module_index == 0
            # Using the parameters group name to set parameters.
            pipeline_module_test.set_params("param_0", customized_parameters)
            for data in datas:
                # Getting the result without setting customized parameters.
                wrong_output = run_modules(
                    mconfig["module_connection"],
                    tvm.cpu(),
                    "llvm",
                    "data_0",
                    data,
                    mod2,
                    "data_1",
                    data,
                )
                # Getting the result with setting customized parameters.
                normal_output = run_modules(
                    mconfig["module_connection"],
                    tvm.cpu(),
                    "llvm",
                    "data_0",
                    data,
                    mod2,
                    "data_1",
                    data,
                    customized_parameters_mod,
                    customized_parameters,
                )
                pipeline_module_test.set_input("data_a", data)
                pipeline_module_test.set_input("data_b", data)
                input_data = pipeline_module_test.get_input("data_a")
                tvm.testing.assert_allclose(data, input_data.numpy())
                # Running the pipeline executor in sequential mode.
                pipeline_module_test.run(True)
                outputs = pipeline_module_test.get_output()
                for i in range(len(outputs)):
                    tvm.testing.assert_allclose(normal_output[i],
                                                outputs[i].numpy())
                    assert not (normal_output[i] == wrong_output[i]).all()
            pipeline_module_test.stop()
Beispiel #47
0
def test_cpu(func, cpu_args):
    evaluator = func.time_evaluator(func.entry_name, tvm.cpu(0), number=5)
    ms = evaluator(*cpu_args).mean
    print('CPU Convolution: %.2f ms' % (ms * 1000))
Beispiel #48
0
def get_manual_conf(mods, target):
    # This function is used to generate manual pipeline configuration.
    mod_config = {}
    # The third output is the final output, the second output is for mod3, the first output
    # is for mod2 input.
    pipe_config1 = {
        "mod_idx":
        0,
        "output": [
            {
                "output_idx": 0,
                "dependencies": [{
                    "mod_idx": 1,
                    "input_name": "data_0"
                }]
            },
            {
                "output_idx": 1,
                "dependencies": [{
                    "mod_idx": 2,
                    "input_name": "data_0"
                }]
            },
            {
                "output_idx": 2,
                "dependencies": [{
                    "global_output_index": 0
                }]
            },
        ],
    }
    mod_config[mods[0]] = {
        "pipeline": pipe_config1,
        "target_host": None,
        "mod_name": "default",
        "build": None,
        "params": None,
        "target": target[0],
        "dev": target[1],
    }

    pipe_config2 = {
        "mod_idx":
        1,
        "output": [
            {
                "output_idx": 0,
                "dependencies": [{
                    "mod_idx": 2,
                    "input_name": "data_1"
                }]
            },
        ],
    }
    mod_config[mods[1]] = {
        "pipeline": pipe_config2,
        "target_host": None,
        "mod_name": "default",
        "build": None,
        "params": None,
        "target": "llvm",
        "dev": tvm.cpu(0),
    }

    pipe_config3 = {
        "mod_idx":
        2,
        "output": [{
            "output_idx": 0,
            "dependencies": [{
                "global_output_index": 1
            }]
        }],
    }
    mod_config[mods[2]] = {
        "pipeline": pipe_config3,
        "target_host": None,
        "mod_name": "default",
        "build": None,
        "params": None,
        "target": "llvm",
        "dev": tvm.cpu(0),
    }
    return mod_config
Beispiel #49
0
def run_unpropagatable_graph(dev, tgt):
    R""" The network is as following:
            a     b  c     d
             \   /    \   /
              add      mul
                \      /
                subtract
    """

    a = relay.var("a", shape=(10, 10))
    b = relay.var("b", shape=(10, 10))
    c = relay.var("c", shape=(10, 10))
    d = relay.var("d", shape=(10, 10))
    a_data = np.random.rand(10, 10).astype('float32')
    b_data = np.random.rand(10, 10).astype('float32')
    c_data = np.random.rand(10, 10).astype('float32')
    d_data = np.random.rand(10, 10).astype('float32')
    tmp_add = a_data + b_data
    tmp_mul = np.multiply(c_data, d_data)
    ref_res = np.subtract(tmp_add, tmp_mul)

    fallback_device = tvm.context("cpu")
    target = {"cpu": "llvm", dev: tgt}
    cpu_ctx = fallback_device
    dev_ctx = tvm.context(dev)

    def annotated():
        add = relay.add(a, b)
        _add = relay.annotation.on_device(add, dev_ctx)
        mul = relay.multiply(c, d)
        _mul = relay.annotation.on_device(mul, cpu_ctx)
        sub = relay.subtract(add, mul)
        _sub = relay.annotation.on_device(sub, dev_ctx)
        func = relay.Function([a, b, c, d],
                              relay.Tuple(tvm.convert([_add, _mul, _sub,
                                                       sub])))
        func = relay.ir_pass.infer_type(func)
        func = relay.ir_pass.rewrite_annotated_ops(func, dev_ctx.device_type)
        func = relay.ir_pass.infer_type(func)
        return relay.Function(relay.ir_pass.free_vars(func.body[3]),
                              func.body[3])

    def expected():
        add = relay.add(a, b)
        mul = relay.multiply(c, d)
        copy_mul_sub = relay.device_copy(mul, cpu_ctx, dev_ctx)
        sub = relay.subtract(add, copy_mul_sub)
        func = relay.Function([a, b, c, d], sub)
        return func

    annotated_func = annotated()
    expected_func = expected()
    expected_index = [2, 2, 2, 1, 1, 1, 2, 2]
    check_annotated_graph(annotated_func, expected_func)
    params = {"a": a_data, "b": b_data, "c": c_data, "d": d_data}
    config = {"opt_level": 0}
    config["fallback_device"] = fallback_device
    with relay.build_config(**config):
        graph, lib, params = relay.build(annotated_func, target, params=params)
        contexts = [tvm.cpu(0), tvm.context(dev)]
        graph_json = json.loads(graph)
        if "device_index" in graph_json["attrs"]:
            device_index = graph_json["attrs"]["device_index"][1]
            assert device_index == expected_index
        mod = graph_runtime.create(graph, lib, contexts)
        mod.set_input(**params)
        mod.run()
        res = mod.get_output(0).asnumpy()
        tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
Beispiel #50
0
def test_saturation():
    # Same params
    data_dtype = 'uint8'
    x = relay.var("x", shape=(1, 4), dtype=data_dtype)
    y = relay.var("y", shape=(1, 4), dtype=data_dtype)
    z = relay.qnn.op.add(lhs=x,
                         rhs=y,
                         lhs_scale=relay.const(0.125, 'float32'),
                         lhs_zero_point=relay.const(0, 'int32'),
                         rhs_scale=relay.const(0.125, 'float32'),
                         rhs_zero_point=relay.const(0, 'int32'),
                         output_scale=relay.const(0.125, 'float32'),
                         output_zero_point=relay.const(0, 'int32'))

    func = relay.Function([x, y], z)
    mod = tvm.IRModule.from_expr(func)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    func = mod["main"]

    x_data = np.array((255, 1, 1, 0)).reshape((1, 4))
    y_data = np.array((255, 255, 128, 0)).reshape((1, 4))
    golden_output = np.array((255, 255, 129, 0)).reshape((1, 4))

    intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
    op_res = intrp.evaluate(func)(x_data, y_data)
    np.testing.assert_equal(op_res.asnumpy(), golden_output)

    # Same params, different scale
    z = relay.qnn.op.add(lhs=x,
                         rhs=y,
                         lhs_scale=relay.const(0.125, 'float32'),
                         lhs_zero_point=relay.const(0, 'int32'),
                         rhs_scale=relay.const(0.125, 'float32'),
                         rhs_zero_point=relay.const(0, 'int32'),
                         output_scale=relay.const(0.25, 'float32'),
                         output_zero_point=relay.const(0, 'int32'))

    func = relay.Function([x, y], z)
    mod = tvm.IRModule.from_expr(func)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    func = mod["main"]

    x_data = np.array((255, 1, 1, 0)).reshape((1, 4))
    y_data = np.array((255, 255, 127, 0)).reshape((1, 4))
    golden_output = np.array((255, 129, 65, 0)).reshape((1, 4))

    intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
    op_res = intrp.evaluate(func)(x_data, y_data)
    np.testing.assert_equal(op_res.asnumpy(), golden_output)

    # Same io params, different output scale
    z = relay.qnn.op.add(lhs=x,
                         rhs=y,
                         lhs_scale=relay.const(0.125, 'float32'),
                         lhs_zero_point=relay.const(0, 'int32'),
                         rhs_scale=relay.const(0.125, 'float32'),
                         rhs_zero_point=relay.const(0, 'int32'),
                         output_scale=relay.const(0.25, 'float32'),
                         output_zero_point=relay.const(0, 'int32'))

    func = relay.Function([x, y], z)
    mod = tvm.IRModule.from_expr(func)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    func = mod["main"]

    x_data = np.array((255, 1, 1, 0)).reshape((1, 4))
    y_data = np.array((255, 255, 127, 0)).reshape((1, 4))
    golden_output = np.array((255, 129, 65, 0)).reshape((1, 4))

    intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
    op_res = intrp.evaluate(func)(x_data, y_data)
    np.testing.assert_equal(op_res.asnumpy(), golden_output)

    # All params different
    z = relay.qnn.op.add(lhs=x,
                         rhs=y,
                         lhs_scale=relay.const(0.5, 'float32'),
                         lhs_zero_point=relay.const(0, 'int32'),
                         rhs_scale=relay.const(0.25, 'float32'),
                         rhs_zero_point=relay.const(0, 'int32'),
                         output_scale=relay.const(0.125, 'float32'),
                         output_zero_point=relay.const(0, 'int32'))

    func = relay.Function([x, y], z)
    mod = tvm.IRModule.from_expr(func)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    func = mod["main"]

    x_data = np.array((255, 0, 1, 0)).reshape((1, 4))
    y_data = np.array((0, 128, 64, 0)).reshape((1, 4))
    golden_output = np.array((255, 255, 132, 0)).reshape((1, 4))

    intrp = relay.create_executor("graph", ctx=tvm.cpu(0), target="llvm")
    op_res = intrp.evaluate(func)(x_data, y_data)
    np.testing.assert_equal(op_res.asnumpy(), golden_output)
Beispiel #51
0
 def setup_gmod():
     loaded_lib = tvm.runtime.load_module(path_lib)
     dev = tvm.cpu(0)
     return loaded_lib["default"](dev)
Beispiel #52
0
 def _run_unlinked(lib_mod):
     graph_rt = tvm.contrib.graph_runtime.GraphModule(
         lib_mod["default"](tvm.cpu(0)))
     graph_rt.set_input("rand_input", rand_input, **params)
     graph_rt.run()
     return graph_rt.get_output(0)
Beispiel #53
0
# Typically ONNX models mix model input values with parameter values, with
# the input having the name `1`. This model dependent, and you should check
# with the documentation for your model to determine the full input and
# parameter name space.
#
# Passing in the shape dictionary to the `relay.frontend.from_onnx` method
# tells relay which ONNX parameters are inputs, and which are parameters, and
# provides a static definition of the input size.
target = "llvm"

input_name = "1"
shape_dict = {input_name: x.shape}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

with tvm.transform.PassContext(opt_level=1):
    intrp = relay.build_module.create_executor("graph", mod, tvm.cpu(0), target)

######################################################################
# Execute on TVM
# ---------------------------------------------
dtype = "float32"
tvm_output = intrp.evaluate()(tvm.nd.array(x.astype(dtype)), **params).asnumpy()

######################################################################
# Display results
# ---------------------------------------------
# We put input and output image neck to neck. The luminance channel, `Y` is the output
# from the model. The chroma channels `Cb` and `Cr` are resized to match with a simple
# bicubic algorithm. The image is then recombined and converted back to `RGB`.
from matplotlib import pyplot as plt
Beispiel #54
0
def test_correctness_layout_rewrite_rewrite_for_preTransformed():
    N = 128
    target = tvm.target.Target("llvm")
    task = auto_scheduler.create_task(matmul_auto_scheduler_test, (N, N, N),
                                      target)
    dag = task.compute_dag

    with tempfile.NamedTemporaryFile() as fp:
        log_file = fp.name

        search_policy = auto_scheduler.SketchPolicy(task)

        measure_ctx = auto_scheduler.LocalRPCMeasureContext()
        tuning_options = auto_scheduler.TuningOptions(
            num_measure_trials=2,
            runner=measure_ctx.runner,
            verbose=1,
            measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
        )
        auto_scheduler.auto_schedule(task, search_policy, tuning_options)
        inp, _ = auto_scheduler.load_best(log_file, task.workload_key, target)
        s, bufs = dag.apply_steps_from_state(
            inp.state,
            layout_rewrite=auto_scheduler.compute_dag.ComputeDAG.
            RewriteForPreTransformed)
        s_ref, bufs_ref = dag.apply_steps_from_state(inp.state)
        np_args = [
            np.random.randn(*topi.get_const_tuple(x.shape)).astype(x.dtype)
            for x in bufs
        ]
        np_args_ref = [np.array(x) for x in np_args]

        weight = np_args_ref[1]
        # infer shape for the rewritten layout
        if len(weight.shape) >= 6:
            # For cpu tile structure SSRSRS
            base = len(weight.shape) - 6
            red_dim = weight.shape[2 + base] * weight.shape[4 + base]
            out_dim = weight.shape[3 + base] * weight.shape[5 + base]
            for i in range(base + 2):
                out_dim *= weight.shape[i]
            new_order = ([
                2 + base,
                4 + base,
            ] + list(range(base + 2)) + [
                3 + base,
                5 + base,
            ])
            np_args_ref[1] = np_args_ref[1].transpose(new_order)
            np_args_ref[1] = np_args_ref[1].reshape((red_dim, out_dim))

        func = tvm.build(s, bufs, target=target)
        func_ref = tvm.build(s_ref, bufs_ref, target=target)

        ctx = tvm.context(str(target))
        ctx_ref = tvm.cpu()

        args = [tvm.nd.array(x, ctx=ctx) for x in np_args]
        args_ref = [tvm.nd.array(x, ctx=ctx_ref) for x in np_args_ref]
        ctx.sync()

        func(*args)
        func_ref(*args_ref)
        ctx.sync()

        tvm.testing.assert_allclose(args[0].asnumpy(),
                                    args_ref[0].asnumpy(),
                                    atol=1e-3,
                                    rtol=1e-3)
        tvm.testing.assert_allclose(args[2].asnumpy(),
                                    args_ref[2].asnumpy(),
                                    atol=1e-3,
                                    rtol=1e-3)
        del measure_ctx
Beispiel #55
0
    def convert(self, lst, *, target='cpu', dev_id=0):
        """Converts the list of nodes to a runnable form.

        All the nodes in the list must represent linear flow (no calls,
        branches, ...)

        Returns:
            (fn, inputs, outputs):

            - fn: A callable function
            - inputs: the list of inputs nodes whose values should be
                      provided to the function
            - outputs: the list of output nodes corresponding to the
                       outputs of the function

        Notes:
            This implementation converts the nodes to NNVM and compiles it.

        """
        self.c = count()
        self.eqv = {}
        self.inputs = []
        self.input_names = []
        self.constants = {}
        self.constant_vars = {}
        self.shapes = {}
        self.types = {}

        for n in lst:
            assert n.is_apply()
            assert n.inputs[0].is_constant(Primitive)
            fn = n.inputs[0].value
            conv = self.mapping.get(fn, None)
            if conv is not None:
                self.eqv[n] = conv(self, *n.inputs[1:])
            else:
                raise NotImplementedError(fn)

        outputs = get_outputs(lst, lst[0].graph.manager.uses,
                              set(self.eqv.keys()))

        inmap = dict((self.eqv[i], i) for i in self.inputs)

        # Check for empty functions
        if all(self.eqv[o] in inmap for o in outputs):
            return None, [inmap[self.eqv[o]] for o in outputs], outputs

        if target == 'cpu':
            target = 'llvm'

        g = nnvm.graph.create(sym.Group(list(self.eqv[o] for o in outputs)))
        dg, lib, params = nnvm.compiler.build(g,
                                              target=target,
                                              shape=self.shapes,
                                              dtype=self.types,
                                              params=self.constants)

        shape = dg.json_attr('shape')
        types = dg.json_attr('dtype')
        index = dg.index

        def spec(entry_id):
            return (shape[entry_id],
                    graph_attr.TCODE_TO_DTYPE[types[entry_id]])

        output_specs = [spec(index.entry_id(x)) for x in index.output_entries]
        assert len(output_specs) == len(outputs)

        if target == 'llvm':
            context = tvm.cpu(dev_id)
        elif target == 'cuda':  # pragma: no cover
            context = tvm.gpu(dev_id)
        else:  # pragma: no cover
            raise Exception(f"Unsupported target: {target}")

        module = graph_runtime.create(dg, lib, context)

        for n, p in params.items():
            module.set_input(n, p)

        input_types = [self.types[i] for i in self.input_names]
        return (NNVMRunner(module, self.input_names, input_types, output_specs,
                           context), self.inputs, outputs)
Beispiel #56
0
def test_meta_schedule_tune_relay(model_name: str, batch_size: int,
                                  target: str):
    if model_name == "inception_v3" and batch_size == 1:
        pytest.skip("inception_v3 does not handle batch_size of 1")

    input_shape: Tuple[int, ...]
    input_name = "input0"
    dev = tvm.cpu() if str(target).startswith("llvm") else cuda()
    if MODEL_TYPES[model_name] == MODEL_TYPE.TEXT_CLASSIFICATION:
        seq_length = 128
        input_name = "input_ids"
        input_shape = (batch_size, seq_length)
        data = tvm.nd.array(np.random.randint(0, 30521, size=input_shape),
                            dev)  # embedding size
    else:
        if MODEL_TYPES[model_name] == MODEL_TYPE.IMAGE_CLASSIFICATION:
            input_shape = (batch_size, 3, 299, 299)
        elif MODEL_TYPES[model_name] == MODEL_TYPE.SEGMENTATION:
            input_shape = (batch_size, 3, 299, 299)
        elif MODEL_TYPES[model_name] == MODEL_TYPE.OBJECT_DETECTION:
            input_shape = (1, 3, 300, 300)
        elif MODEL_TYPES[model_name] == MODEL_TYPE.VIDEO_CLASSIFICATION:
            input_shape = (batch_size, 3, 3, 299, 299)
        else:
            raise ValueError("Unsupported model: " + model_name)
        data = tvm.nd.array(
            np.random.randn(*input_shape).astype("float32"), dev)

    output_shape: Tuple[int, int] = (batch_size, 1000)

    mod, params = get_torch_model(
        model_name=model_name,
        input_shape=input_shape,
        output_shape=output_shape,
        dtype="float32",
    )

    with tempfile.TemporaryDirectory() as work_dir:
        target = Target(target)
        database = DummyDatabase()
        rt_mod: tvm.module = tune_relay(
            mod=mod,
            params=params,
            target=target,
            config=ReplayTraceConfig(
                num_trials_per_iter=32,
                num_trials_total=32,
            ),
            work_dir=work_dir,
            database=database,
        )
        # Compile without meta-scheduler for correctness check
        with tvm.transform.PassContext(opt_level=0):
            rt_mod2 = relay.build(mod, target=target, params=params)

        def get_output(data, lib):
            module = graph_executor.GraphModule(lib["default"](dev))
            module.set_input(input_name, data)
            module.run()
            return module.get_output(0).numpy()

        # Check correctness
        actual_output = get_output(data, rt_mod)
        expected_output = get_output(data, rt_mod2)
        assert np.allclose(actual_output,
                           expected_output,
                           rtol=1e-4,
                           atol=2e-4)
print(model_jit.graph)

print("run torchscript...")
for i in range(20):
    t = time.time()
    model_jit(x)
    print(time.time() - t)

option = {
    "input_infos": [
        ("x", (1, 3, 224, 224)),
    ],
    "default_dtype": "float16",
    "export_dir": "pytorch_compiled",
    "num_outputs": 1,
    "tuning_n_trials": 1,  # set zero to skip tuning
    "tuning_log_file": "tuning.log",
    "target": "llvm",
    "device": tvm.cpu(),
}

pytorch_tvm_module = compile(model_jit, option)
torch.jit.script(pytorch_tvm_module).save("model_tvm.pt")

print("Run PyTorch...")
for i in range(20):
    t = time.time()
    outputs = pytorch_tvm_module.forward([x.cpu()])
    print(1000 * (time.time() - t))
print(outputs[0].shape)
Beispiel #58
0
## need to quantized upfront. In the ONNXRuntime Vitis-AI
## execution provider we make use of On-The-Fly (OTF) Quantization
## to remove this additional preprocessing step. In this flow,
## one doesn't need to quantize his/her model upfront but can
## make use of the typical inference execution calls
## (InferenceSession.run) to quantize the model on-the-fly
## using the first N inputs. This will set up and calibrate
## the Vitis-AI DPU and from that point onwards inference
## will be accelerated for all next inputs.
## Set the number of inputs used for quantization to e.g. 8
## using the PX_QUANT_SIZE environment variable if you want
## to quantize on fewer inputs. The default is 128.
############################################################

print("Create InferenceSession for OTF Quantization")
InferenceSession = graph_runtime.GraphModule(lib["default"](tvm.cpu()))

px_quant_size = int(os.environ['PX_QUANT_SIZE']) \
    if 'PX_QUANT_SIZE' in os.environ else 128

print("Start OTF Quantization on first {} images".format(px_quant_size))

quant_files = [
    os.path.join(QUANT_DIR, f) for f in os.listdir(QUANT_DIR)
    if f.endswith(('JPEG', 'jpg', 'png'))
][:px_quant_size]
quant_images = inputs_func(quant_files)
print('Loaded {} inputs successfully.'.format(len(quant_images)))

for i in range(px_quant_size):
    InferenceSession.set_input(input_name, quant_images[i])
Beispiel #59
0
map_proto_url = os.path.join(repo_base, map_proto)

# Human readable text for labels
label_map = "imagenet_synset_to_human_label_map.txt"
label_map_url = os.path.join(repo_base, label_map)

# Target settings
# Use these commented settings to build for cuda.
# target = 'cuda'
# target_host = 'llvm'
# layout = "NCHW"
# ctx = tvm.gpu(0)
target = "llvm"
target_host = "llvm"
layout = None
ctx = tvm.cpu(0)

######################################################################
# Download required files
# -----------------------
# Download files listed above.
from tvm.contrib.download import download_testdata

img_path = download_testdata(image_url, img_name, module="data")
model_path = download_testdata(model_url,
                               model_name,
                               module=["tf", "InceptionV1"])
map_proto_path = download_testdata(map_proto_url, map_proto, module="data")
label_path = download_testdata(label_map_url, label_map, module="data")

######################################################################
Beispiel #60
0
# target x86 CPU
target = "llvm"
with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(mod[mod.entry_func],
                                     target,
                                     params=params)

######################################################################
# Execute on TVM
# ---------------------------------------------
import tvm
from tvm.contrib import graph_runtime as runtime

# create a runtime executor module
module = runtime.create(graph, lib, tvm.cpu())

# feed input data
module.set_input(input_tensor, tvm.nd.array(image_data))

# feed related params
module.set_input(**params)

# run
module.run()

# get output
tvm_output = module.get_output(0).asnumpy()

######################################################################
# Display results