コード例 #1
0
ファイル: common.py プロジェクト: KnowingNothing/FlexTensor
def evaluate(name,
             s,
             bufs,
             target,
             dev_id,
             number=10,
             rpc_info=None,
             result_generator=None):
    if rpc_info is not None:
        use_rpc = rpc_info.use_rpc
        target_host = rpc_info.target_host
        fcompile = rpc_info.fcompile
    else:
        use_rpc, target_host, fcompile = None, None, None

    remote = rpc_info.get_remote()
    dev = (remote if remote else tvm).device(target, dev_id)

    np_arys = [
        np.random.uniform(-10, 10, size=to_tuple(buf.shape)).astype(buf.dtype)
        for buf in bufs
    ]
    tvm_arys = [tvm.nd.array(arr, dev) for arr in np_arys]
    func_file = f"{name}.so"
    time_cost = float("inf")
    try:
        func = tvm.build(s, bufs, target=target, target_host=target_host)
        if use_rpc:
            func.export_library(os.path.join(LIB_DIR, func_file), fcompile)
            remote.upload(os.path.join(LIB_DIR, func_file))
            func = remote.load_module(func_file)
        func(*tvm_arys)
        if result_generator is not None:
            print("Test whether computed...")
            result = tvm_arys[-1].asnumpy()
            test_allclose(result, np_arys[-1], rtol=1e-3, print_diff=True)
            print("Test correctness...")
            expected = result_generator(np_arys)
            test_allclose(result, expected, rtol=1e-3, print_diff=True)
        evaluator = func.time_evaluator(func.entry_name, dev, number=number)
        time_cost = evaluator(*tvm_arys).mean * 1e3
    except Exception as e:
        print(e)
    finally:
        while len(tvm_arys) > 0:
            del tvm_arys[-1]
        if os.path.exists(os.path.join(LIB_DIR, func_file)):
            try:
                os.remove(os.path.join(LIB_DIR, func_file))
            except Exception as e:
                print(e)

    return time_cost
コード例 #2
0
def test_conv2d_nchw():
    #################################
    # test basic case
    inputs_np = np.random.uniform(-1, 1, size=[4, 6, 7, 7]).astype(np.float32) * 1000
    weight_np = np.random.uniform(-1, 1, size=[9, 2, 3, 3]).astype(np.float32) * 1000
    bias_np = np.random.uniform(-1, 1, size=[9]).astype(np.float32) * 1000
    
    inputs_torch = torch.tensor(inputs_np)
    weight_torch = torch.tensor(weight_np)
    bias_torch = torch.tensor(bias_np)
    output_torch = torch.nn.functional.conv2d(
        inputs_torch, weight_torch, bias_torch, stride=2, padding=1, dilation=2, groups=3)

    tvm_ctx = tvm.context("llvm", 0)
    inputs_tvm = tvm.nd.array(inputs_np, tvm_ctx)
    weight_tvm = tvm.nd.array(weight_np, tvm_ctx)
    bias_tvm = tvm.nd.array(bias_np, tvm_ctx)
    output_tvm = tvm.nd.array(np.zeros(output_torch.shape).astype(np.float32), tvm_ctx)
    inputs_t = tvm.placeholder(inputs_np.shape, dtype="float32")
    weight_t = tvm.placeholder(weight_np.shape, dtype="float32")
    bias_t = tvm.placeholder(bias_np.shape, dtype="float32")
    output_t = conv2d_nchw(inputs_t, weight_t, bias_t, stride=2, padding=1, dilation=2, groups=3)
    s = tvm.create_schedule(output_t.op)
    func = tvm.build(s, [inputs_t, weight_t, bias_t, output_t], "llvm")
    func(inputs_tvm, weight_tvm, bias_tvm, output_tvm)

    passed = test_allclose(output_tvm.asnumpy(), output_torch.numpy(), rtol=1e-5 * 1000, print_diff=True)
    if passed == 1:
        print("Conv2d_nchw basic case passed!")
    else:
        print("Conv2d_nchw basic case failed!")
コード例 #3
0
def test_bilinear():
    #################################
    # test basic case
    inputs_np = np.random.random([2, 3, 2, 3, 17]).astype(np.float32) * 100
    another_np = np.random.random([2, 3, 2, 3, 8]).astype(np.float32) * 100
    weight_np = np.random.random([5, 17, 8]).astype(np.float32) * 100
    bias_np = np.random.random([5]).astype(np.float32) * 100
    
    inputs_torch = torch.tensor(inputs_np)
    another_torch = torch.tensor(another_np)
    weight_torch = torch.tensor(weight_np)
    bias_torch = torch.tensor(bias_np)
    output_torch = torch.nn.functional.bilinear(inputs_torch, another_torch, weight_torch, bias_torch)

    tvm_ctx = tvm.context("llvm", 0)
    inputs_tvm = tvm.nd.array(inputs_np, tvm_ctx)
    another_tvm = tvm.nd.array(another_np, tvm_ctx)
    weight_tvm = tvm.nd.array(weight_np, tvm_ctx)
    bias_tvm = tvm.nd.array(bias_np, tvm_ctx)
    output_tvm = tvm.nd.array(np.zeros(output_torch.shape).astype(np.float32), tvm_ctx)
    inputs_t = tvm.placeholder(inputs_np.shape, dtype="float32")
    another_t = tvm.placeholder(another_np.shape, dtype="float32")
    weight_t = tvm.placeholder(weight_np.shape, dtype="float32")
    bias_t = tvm.placeholder(bias_np.shape, dtype="float32")
    output_t = bilinear(inputs_t, another_t, weight_t, bias_t)
    s = tvm.create_schedule(output_t.op)
    func = tvm.build(s, [inputs_t, another_t, weight_t, bias_t, output_t], "llvm")
    func(inputs_tvm, another_tvm, weight_tvm, bias_tvm, output_tvm)

    passed = test_allclose(output_tvm.asnumpy(), output_torch.numpy(), rtol=1e-5, print_diff=True)
    if passed == 1:
        print("Bilinear basic case passed!")
    else:
        print("Bilinear basic case failed!")
コード例 #4
0
def test_gemm_conv2d_nchw():
    #################################
    # test basic case
    inputs_np = np.random.random([1, 384, 27, 27]).astype(np.float32) * 100
    weight_np = np.random.random([64, 384, 1, 1]).astype(np.float32) * 100
    bias_np = np.random.random([64]).astype(np.float32) * 100
    
    inputs_torch = torch.tensor(inputs_np)
    weight_torch = torch.tensor(weight_np)
    bias_torch = torch.tensor(bias_np)
    output_torch = torch.nn.functional.conv2d(
        inputs_torch, weight_torch, bias_torch, stride=1, padding=0, dilation=1, groups=1)

    tvm_ctx = tvm.context("llvm", 0)
    inputs_tvm = tvm.nd.array(inputs_np, tvm_ctx)
    weight_tvm = tvm.nd.array(weight_np, tvm_ctx)
    bias_tvm = tvm.nd.array(bias_np, tvm_ctx)
    output_tvm = tvm.nd.array(np.zeros(output_torch.shape).astype(np.float32), tvm_ctx)
    inputs_t = tvm.placeholder(inputs_np.shape, dtype="float32")
    weight_t = tvm.placeholder(weight_np.shape, dtype="float32")
    bias_t = tvm.placeholder(bias_np.shape, dtype="float32")
    output_t = gemm_conv2d_nchw(inputs_t, weight_t, bias_t, stride=1, padding=0, dilation=1, groups=1)
    s = tvm.create_schedule(output_t.op)
    func = tvm.build(s, [inputs_t, weight_t, bias_t, output_t], "llvm")
    func(inputs_tvm, weight_tvm, bias_tvm, output_tvm)

    passed = test_allclose(output_tvm.asnumpy(), output_torch.numpy(), rtol=1e-5, print_diff=True)
    if passed == 1:
        print("Gemm_conv2d_nchw basic case passed!")
    else:
        print("Gemm_conv2d_nchw basic case failed!")
コード例 #5
0
ファイル: test_ops.py プロジェクト: KnowingNothing/FlexTensor
def test_batch_norm():
    #################################
    # test basic case
    inputs_np = np.random.random([100, 200]).astype(np.float32) * 100

    inputs_torch = torch.tensor(inputs_np)
    running_mean = torch.mean(inputs_torch, dim=0)
    running_var = inputs_torch.var(dim=0)
    output_torch = torch.nn.functional.batch_norm(inputs_torch, running_mean,
                                                  running_var)

    tvm_ctx = tvm.device("llvm", 0)
    inputs_tvm = tvm.nd.array(inputs_np, tvm_ctx)
    output_tvm = tvm.nd.array(
        np.zeros(output_torch.shape).astype(np.float32), tvm_ctx)
    inputs_t = tvm.te.placeholder(inputs_np.shape, dtype="float32")
    output_t = batch_normalization2d(inputs_t)
    s = tvm.te.create_schedule(output_t.op)
    func = tvm.build(s, [inputs_t, output_t], "llvm")
    func(inputs_tvm, output_tvm)

    passed = test_allclose(output_tvm.asnumpy(),
                           output_torch.numpy(),
                           rtol=1e-2,
                           print_diff=True)
    if passed == 1:
        print("Batch_norm basic case passed!")
    else:
        print("Batch_norm basic case failed!")
コード例 #6
0
ファイル: test_ops.py プロジェクト: KnowingNothing/FlexTensor
def test_variance():
    #################################
    # test basic case
    inputs_np = np.random.random([2, 3, 27, 3, 17]).astype(np.float32) * 100

    inputs_torch = torch.tensor(inputs_np)
    output_torch = inputs_torch.var(dim=2)

    tvm_ctx = tvm.device("llvm", 0)
    inputs_tvm = tvm.nd.array(inputs_np, tvm_ctx)
    output_tvm = tvm.nd.array(
        np.zeros(output_torch.shape).astype(np.float32), tvm_ctx)
    inputs_t = tvm.te.placeholder(inputs_np.shape, dtype="float32")
    output_t = variance(inputs_t, dim=2)
    s = tvm.te.create_schedule(output_t.op)
    func = tvm.build(s, [inputs_t, output_t], "llvm")
    func(inputs_tvm, output_tvm)

    passed = test_allclose(output_tvm.asnumpy(),
                           output_torch.numpy(),
                           rtol=1e-5,
                           print_diff=True)
    if passed == 1:
        print("Variance basic case passed!")
    else:
        print("Variance basic case failed!")
コード例 #7
0
def check_result(configs, shape, target="cuda", dev_id=0):
    ctx = tvm.context(target, dev_id)
    name, configs = configs
    batch, in_channel, H, W, out_channel, k, _, stride, padding, dilation, groups = shape
    A_np = np.random.uniform(-10, 10, size=[batch, in_channel, H,
                                            W]).astype("float32")
    A_tvm = tvm.nd.array(A_np, ctx)
    A_torch = torch.tensor(A_np)  # .cuda("cuda:" + str(dev_id))
    W_np = np.random.uniform(-10,
                             10,
                             size=[out_channel, in_channel // groups, k,
                                   k]).astype("float32")
    W_tvm = tvm.nd.array(W_np, ctx)
    W_torch = torch.tensor(W_np)  # .cuda("cuda:" + str(dev_id))
    Output_torch = torch.nn.functional.conv2d(A_torch,
                                              W_torch,
                                              stride=stride,
                                              padding=padding,
                                              dilation=dilation,
                                              groups=groups)
    Output_np = np.zeros(Output_torch.shape).astype(np.float32)
    Output_tvm = tvm.nd.array(Output_np, ctx)
    s, bufs = schedule_with_config(name, configs)
    func = tvm.build(s, bufs, target)
    func(A_tvm, W_tvm, Output_tvm)
    passed = test_allclose(Output_tvm.asnumpy(),
                           Output_torch.cpu().numpy(),
                           rtol=1e-5,
                           print_diff=True)
    if passed == 1:
        print("Passed!")
    else:
        print("Failed!")
コード例 #8
0
ファイル: test_ops.py プロジェクト: KnowingNothing/FlexTensor
def test_conv2d_nchwc():
    #################################
    # test basic case
    inputs_np = np.random.uniform(-1, 1, size=[4, 6, 8, 8, 4]).astype(
        np.float32) * 1000
    weight_np = np.random.uniform(-1, 1, size=[9, 2, 3, 3, 4, 4]).astype(
        np.float32) * 1000
    bias_np = np.random.uniform(-1, 1, size=[9, 4]).astype(np.float32) * 1000
    # inputs_np = np.ones([1, 1, 3, 3, 1], dtype=np.float32)
    # weight_np = np.ones([1, 1, 3, 3, 1, 1], dtype=np.float32)
    # bias_np = np.zeros([1, 1], dtype=np.float32)

    output_np = pyimpl.conv2d_nchwc(inputs_np,
                                    weight_np,
                                    bias_np,
                                    stride=2,
                                    padding=1,
                                    dilation=2,
                                    groups=3)

    tvm_ctx = tvm.device("llvm", 0)
    inputs_tvm = tvm.nd.array(inputs_np, tvm_ctx)
    weight_tvm = tvm.nd.array(weight_np, tvm_ctx)
    bias_tvm = tvm.nd.array(bias_np, tvm_ctx)
    output_tvm = tvm.nd.array(
        np.zeros(output_np.shape).astype(np.float32), tvm_ctx)
    inputs_t = tvm.te.placeholder(inputs_np.shape, dtype="float32")
    weight_t = tvm.te.placeholder(weight_np.shape, dtype="float32")
    bias_t = tvm.te.placeholder(bias_np.shape, dtype="float32")
    output_t = conv2d_nchwc(inputs_t,
                            weight_t,
                            bias_t,
                            stride=2,
                            padding=1,
                            dilation=2,
                            groups=3)
    s = tvm.te.create_schedule(output_t.op)
    func = tvm.build(s, [inputs_t, weight_t, bias_t, output_t], "llvm")
    func(inputs_tvm, weight_tvm, bias_tvm, output_tvm)

    passed = test_allclose(output_tvm.asnumpy(),
                           output_np,
                           rtol=1e-5 * 1000,
                           print_diff=True)

    if passed == 1:
        print("Conv2d_nchwc basic case passed!")
    else:
        print("Conv2d_nchwc basic case failed!")
コード例 #9
0
ファイル: test_ops.py プロジェクト: KnowingNothing/FlexTensor
def test_conv_transpose1d():
    #################################
    # test basic case
    inputs_np = np.random.random([4, 9, 10]).astype(np.float32) * 100
    weight_np = np.random.random([9, 2, 3]).astype(np.float32) * 100
    bias_np = np.random.random([6]).astype(np.float32) * 100

    inputs_torch = torch.tensor(inputs_np)
    weight_torch = torch.tensor(weight_np)
    bias_torch = torch.tensor(bias_np)
    output_torch = torch.nn.functional.conv_transpose1d(inputs_torch,
                                                        weight_torch,
                                                        bias_torch,
                                                        stride=2,
                                                        padding=1,
                                                        output_padding=1,
                                                        dilation=1,
                                                        groups=3)

    tvm_ctx = tvm.device("llvm", 0)
    inputs_tvm = tvm.nd.array(inputs_np, tvm_ctx)
    weight_tvm = tvm.nd.array(weight_np, tvm_ctx)
    bias_tvm = tvm.nd.array(bias_np, tvm_ctx)
    output_tvm = tvm.nd.array(
        np.zeros(output_torch.shape).astype(np.float32), tvm_ctx)
    inputs_t = tvm.te.placeholder(inputs_np.shape, dtype="float32")
    weight_t = tvm.te.placeholder(weight_np.shape, dtype="float32")
    bias_t = tvm.te.placeholder(bias_np.shape, dtype="float32")
    output_t = conv_transpose1d(inputs_t,
                                weight_t,
                                bias_t,
                                stride=2,
                                padding=1,
                                output_padding=1,
                                dilation=1,
                                groups=3)
    s = tvm.te.create_schedule(output_t.op)
    func = tvm.build(s, [inputs_t, weight_t, bias_t, output_t], "llvm")
    func(inputs_tvm, weight_tvm, bias_tvm, output_tvm)

    passed = test_allclose(output_tvm.asnumpy(),
                           output_torch.numpy(),
                           rtol=1e-5,
                           print_diff=True)
    if passed == 1:
        print("Conv_transpose1d basic case passed!")
    else:
        print("Conv_transpose1d basic case failed!")
コード例 #10
0
ファイル: test_ops.py プロジェクト: KnowingNothing/FlexTensor
def test_depthwise_conv2d_nchw():
    #################################
    # test basic case
    inputs_np = np.random.random([4, 6, 7, 7]).astype(np.float32) * 100
    weight_np = np.random.random([18, 1, 3, 3]).astype(np.float32) * 100
    bias_np = np.random.random([18]).astype(np.float32) * 100

    inputs_torch = torch.tensor(inputs_np)
    weight_torch = torch.tensor(weight_np)
    bias_torch = torch.tensor(bias_np)
    output_torch = torch.nn.functional.conv2d(inputs_torch,
                                              weight_torch,
                                              bias_torch,
                                              stride=2,
                                              padding=1,
                                              dilation=2,
                                              groups=6)

    tvm_ctx = tvm.device("llvm", 0)
    # for depthwise
    weight_np = np.reshape(
        weight_np, [6, 3, 3, 3])  # np.ones([6, 3, 3, 3]).astype(np.float32)
    inputs_tvm = tvm.nd.array(inputs_np, tvm_ctx)
    weight_tvm = tvm.nd.array(weight_np, tvm_ctx)
    bias_tvm = tvm.nd.array(bias_np, tvm_ctx)
    output_tvm = tvm.nd.array(
        np.zeros(output_torch.shape).astype(np.float32), tvm_ctx)
    inputs_t = tvm.te.placeholder(inputs_np.shape, dtype="float32")
    weight_t = tvm.te.placeholder(weight_np.shape, dtype="float32")
    bias_t = tvm.te.placeholder(bias_np.shape, dtype="float32")
    output_t = depthwise_conv2d_nchw(inputs_t,
                                     weight_t,
                                     bias_t,
                                     stride=2,
                                     padding=1,
                                     dilation=2)
    s = tvm.te.create_schedule(output_t.op)
    func = tvm.build(s, [inputs_t, weight_t, bias_t, output_t], "llvm")
    func(inputs_tvm, weight_tvm, bias_tvm, output_tvm)

    passed = test_allclose(output_tvm.asnumpy(),
                           output_torch.numpy(),
                           rtol=1e-5,
                           print_diff=True)
    if passed == 1:
        print("Depthwise_conv2d_nchw basic case passed!")
    else:
        print("Depthwise_conv2d_nchw basic case failed!")
コード例 #11
0
ファイル: test_ops.py プロジェクト: KnowingNothing/FlexTensor
def test_block_circulant_matrix():
    ROW, COL, FFT = 1024, 40, 16
    input_np = np.random.random([ROW, COL]).astype(np.float32)
    # input_np = np.ones([ROW, COL], dtype=np.float32)
    output_np = np.zeros([ROW, COL], dtype=np.float32)

    for i in range(ROW // FFT):
        sub_vec = np.zeros([FFT], dtype=np.float32)
        vec = np.zeros([COL], dtype=np.float32)
        for t in range(COL // FFT):
            for m in range(FFT):
                for n in range(FFT):
                    vec[t * FFT + m] += \
                        input_np[FFT * i + n][t * FFT + (m + n) % FFT] / FFT

        for j in range(FFT):
            for k in range(COL // FFT):
                if j >= 1:
                    sub_vec[0] = vec[FFT * (k + 1) - 1]
                    sub_vec[1:FFT] = vec[FFT * k:FFT * (k + 1) - 1]
                    vec[FFT * k:FFT * (k + 1)] = sub_vec
            output_np[FFT * i + j][:] = copy.deepcopy(vec)

    tvm_ctx = tvm.device('llvm', 0)
    input_tvm = tvm.nd.array(input_np, tvm_ctx)
    output_tvm = tvm.nd.array(
        np.zeros(output_np.shape).astype(np.float32), tvm_ctx)
    input_t = tvm.te.placeholder(input_np.shape, dtype='float32')
    output_t = block_circulant_matrix(input_t, FFT)
    s = tvm.te.create_schedule(output_t.op)
    func = tvm.build(s, [input_t, output_t], 'llvm')
    func(input_tvm, output_tvm)

    passed = test_allclose(output_tvm.asnumpy(),
                           output_np,
                           rtol=1e-5,
                           print_diff=True)
    if passed == 1:
        print("Block_circulant_matrix basic case passed")
    else:
        print("Block_circulant_matrix case failed")
コード例 #12
0
def try_yolo_conv(batch_size=2, number=100):
    # get the compute
    yolo_conv = SqueezeNetFire8Gemm()
    input_shape = yolo_conv.get_intput_shape()
    inputs = tvm.te.placeholder((batch_size, *input_shape),
                                dtype="float32",
                                name='inputs')
    weight = yolo_conv.get_weight()
    outputs = yolo_conv(inputs)
    bias = yolo_conv.get_bias()

    s = tvm.te.create_schedule(outputs.op)

    schedule_yolo_conv_x86(s, outputs, inputs, weight, bias)

    arg_bufs = [inputs, weight, bias, outputs]
    stmt = tvm.lower(s, arg_bufs, simple_mode=True)
    print(stmt)
    dev_id = 0
    time_cost = _evaluate(s, arg_bufs, "llvm", dev_id, number=number)
    print("Yolo conv24 use", time_cost, "ms")
    """ For pytorch """
    out_channel, in_channel, kernel_height, kernel_width = yolo_conv.weight_shape
    padding, stride, dilation, groups = (yolo_conv.padding, yolo_conv.stride,
                                         yolo_conv.dilation, yolo_conv.groups)
    conv2d_torch = torch.nn.Conv2d(in_channel,
                                   out_channel, (kernel_height, kernel_width),
                                   padding=padding,
                                   stride=stride,
                                   dilation=dilation,
                                   groups=groups)

    # warm up
    inputs = torch.rand(batch_size, *input_shape)
    res = conv2d_torch(inputs)

    times = time.time()
    for _ in range(number):
        res = conv2d_torch(inputs)
    times = time.time() - times
    print("Pytorch on cpu use: {}ms".format(times / number * 1e3))

    # to test the correctness, currently the result is wrong becasue of the schedule
    # if you change line 148 to 'outer = s[write_cache].fuse(gemm_g, gemm_go)'
    # the result is correct
    ctx = tvm.device("llvm", 0)
    inputs_np = np.random.random(inputs.shape).astype("float32") * 100
    weight_np = np.random.random(to_tuple(weight.shape)).astype(
        weight.dtype) * 100
    outputs_np = np.zeros(shape=to_tuple(outputs.shape), dtype=np.float32)
    bias_np = np.random.random(size=to_tuple(bias.shape)).astype(
        bias.dtype) * 100

    inputs_tvm = tvm.nd.array(inputs_np, ctx)
    weight_tvm = tvm.nd.array(weight_np, ctx)
    outputs_tvm = tvm.nd.array(outputs_np, ctx)
    bias_tvm = tvm.nd.array(bias_np, ctx)

    inputs_torch = torch.tensor(inputs_np)
    weight_torch = torch.tensor(weight_np)
    bias_torch = torch.tensor(bias_np)

    func_tvm = tvm.build(s, arg_bufs, "llvm")
    func_tvm(inputs_tvm, weight_tvm, bias_tvm, outputs_tvm)
    outputs_torch = torch.nn.functional.conv2d(inputs_torch,
                                               weight_torch,
                                               bias=bias_torch,
                                               padding=padding,
                                               stride=stride,
                                               dilation=dilation,
                                               groups=groups)
    the_same = test_allclose(outputs_tvm.asnumpy(),
                             outputs_torch.numpy(),
                             rtol=1e-5,
                             print_diff=True)
    if the_same:
        print("The same!")
    else:
        print("Not the same!")