Ejemplo n.º 1
0
def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, add_bias=False, add_relu=False):
    print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))

    in_height = in_width = in_size

    A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
    W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
    bias = tvm.placeholder((num_filter, 1, 1), name='bias')

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    bias_shape = get_const_tuple(bias.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv2d_nchw.verify_conv2d_nchw")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = np.random.uniform(size=bias_shape).astype(dtype)
        dw_np = topi.testing.dilate_python(w_np, (1, 1, dilation, dilation))
        c_np = topi.testing.conv2d_nchw_python(a_np, dw_np, stride, padding)
        if add_bias:
            b_np = np.random.uniform(size=bias_shape).astype(dtype)
            c_np += b_np
        if add_relu:
            c_np = np.maximum(c_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            C = topi.nn.conv2d(A, W, (stride, stride), (padding, padding),
                               (dilation, dilation), layout='NCHW', out_dtype=dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = topi.generic.schedule_conv2d_nchw([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4)

    for device in get_all_backend():
        with autotvm.tophub.context(device):  # load tophub pre-tuned parameters
            check_device(device)
Ejemplo n.º 2
0
Archivo: nn.py Proyecto: bddppq/tvm
def compute_contrib_conv2d_NCHWc(attrs, inputs, _):
    """Compute definition of conv2d NCHWc"""
    padding = attrs.get_int_tuple("padding")
    strides = attrs.get_int_tuple("strides")
    dilation = attrs.get_int_tuple("dilation")
    out_channel = attrs.get_int("channels")
    groups = attrs.get_int("groups")
    layout = attrs.get_str("layout")
    out_layout = attrs.get_str("out_layout")
    out_dtype = attrs.get_str("out_dtype")
    out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
    if layout == "NCHW":
        _, in_channel, _, _ = get_const_tuple(inputs[0].shape)
    else:
        _, in_channel_chunk, _, _, in_channel_block = get_const_tuple(inputs[0].shape)
        in_channel = in_channel_chunk * in_channel_block
    assert dilation == (1, 1), "not support dilate now"
    if groups == 1:
        # pylint: disable=assignment-from-no-return
        out = topi.nn.conv2d_NCHWc(inputs[0], inputs[1], strides, padding, dilation,
                                   layout, out_layout, out_dtype)
        # pylint: enable=assignment-from-no-return
    elif groups == in_channel and groups == out_channel:
        # pylint: disable=assignment-from-no-return
        out = topi.nn.depthwise_conv2d_NCHWc(inputs[0], inputs[1], strides, padding,
                                             dilation, layout, out_layout, out_dtype)
        # pylint: enable=assignment-from-no-return
    else:
        raise ValueError("not support arbitrary group number > 1 for now")
    if attrs.get_bool("use_bias"):
        bias = inputs[2]
        bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
        out = topi.add(out, bias)
    return out
Ejemplo n.º 3
0
def verify_conv2d(batch, in_size, in_channel, num_filter, kernel, stride, padding):
    in_height = in_width = in_size

    with tvm.target.rasp():
        A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
        W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
        B = topi.nn.conv2d(A, W, stride, padding)
        s = topi.generic.schedule_conv2d_nchw([B])

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv2d.verify_conv2d")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = topi.testing.conv2d_nchw_python(a_np, w_np, stride, padding)
        return a_np, w_np, b_np

    a_np, w_np, b_np = get_ref_data()

    ctx = tvm.cpu(0)
    a = tvm.nd.array(a_np, ctx)
    w = tvm.nd.array(w_np, ctx)
    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
    func = tvm.build(s, [A, W, B], "llvm")
    func(a, w, b)
    np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Ejemplo n.º 4
0
def verify_bitserial_dense(batch, in_dim, out_dim, activation_bits, weight_bits, unipolar):
    input_dtype = 'uint32'
    out_dtype = 'int16'

    with tvm.target.create('llvm'):
        A = tvm.placeholder((batch, in_dim), dtype=input_dtype, name='A')
        B = tvm.placeholder((out_dim, in_dim), dtype=input_dtype, name='B')
        C = topi.nn.bitserial_dense(A, B, activation_bits, weight_bits, out_dtype=out_dtype,
                                    unipolar=unipolar)
        s = topi.generic.schedule_bitserial_dense([C])

    a_shape = get_const_tuple(A.shape)
    b_shape = get_const_tuple(B.shape)

    @memoize("topi.tests.test_topi_bitseral_dense")
    def get_ref_data():
        a_np = generate_quantized_np(get_const_tuple(a_shape), activation_bits, input_dtype)
        b_np = generate_quantized_np(get_const_tuple(b_shape), weight_bits, input_dtype)
        if unipolar:
            b_ = np.copy(b_np).astype(out_dtype)
            for x in np.nditer(b_, op_flags=['readwrite']):
                x[...] = 1 if x == 1 else -1
            c_np = np.dot(a_np, b_.T)
        else:
            c_np = np.dot(a_np, b_np.T)
        return a_np, b_np, c_np
    a_np, b_np, c_np = get_ref_data()

    ctx = tvm.cpu(0)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(b_np, ctx)
    c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
    func = tvm.build(s, [A, B, C], "llvm")
    func(a, b, c)
    tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
Ejemplo n.º 5
0
def test_bilayout_index():
    bilayout = tvm.bijective_layout("NCHW", "NCHW16c")

    dst_index = bilayout.forward_index([0, 18, 6, 6])
    assert get_const_tuple(dst_index) == (0, 1, 6, 6, 2)

    src_index = bilayout.backward_index([0, 1, 6, 6, 2])
    assert get_const_tuple(src_index) == (0, 18, 6, 6)
Ejemplo n.º 6
0
def test_bilayout_shape():
    bilayout = tvm.bijective_layout("NCHW", "NCHW16c")
    assert isinstance(bilayout, tvm.tensor.BijectiveLayout)

    dst_shape = bilayout.forward_shape((1, 32, 7, 7))
    assert get_const_tuple(dst_shape) == (1, 2, 7, 7, 16)

    src_shape = bilayout.backward_shape(dst_shape)
    assert get_const_tuple(src_shape) == (1, 32, 7, 7)
Ejemplo n.º 7
0
def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode):
    iw = ih
    kw = kh
    sw = sh
    ph, pw = padding
    A = tvm.placeholder((n, ic, ih, iw), name='A')
    B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding,
                     pool_type=pool_type, ceil_mode=ceil_mode)
    B = topi.nn.relu(B)
    dtype = A.dtype

    bshape = get_const_tuple(B.shape)
    ashape = get_const_tuple(A.shape)
    if ceil_mode:
        assert bshape[2] == int(math.ceil(float(ashape[2] - kh + ph * 2) / sh) + 1)
        assert bshape[3] == int(math.ceil(float(ashape[3] - kw + pw * 2) / sw) + 1)
    else:
        assert bshape[2] == int(math.floor(float(ashape[2] - kh + ph * 2) / sh) + 1)
        assert bshape[3] == int(math.floor(float(ashape[3] - kw + pw * 2) / sw) + 1)


    a_np = np.random.uniform(size=(n, ic, ih, iw)).astype(dtype)
    pad_np = np.zeros(shape=(n, ic, ih+2*ph, iw+2*pw)).astype(dtype)
    no_zero = (range(n), range(ic), (range(ph, ih+ph)), (range(pw, iw+pw)))
    pad_np[np.ix_(*no_zero)] = a_np
    _, oc, oh, ow = get_const_tuple(B.shape)
    b_np = np.zeros(shape=(n, oc, oh, ow)).astype(dtype)

    if pool_type == 'avg':
        for i in range(oh):
            for j in range(ow):
                b_np[:,:,i,j] = np.mean(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3))
    elif pool_type =='max':
        for i in range(oh):
            for j in range(ow):
                b_np[:,:,i,j] = np.max(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3))
    b_np = np.maximum(b_np, 0.0)

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_pool(B)
        ctx = tvm.context(device, 0)
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
        print(tvm.lower(s, [A, B], simple_mode=True))

        f = tvm.build(s, [A, B], device)
        f(a, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['opengl']:
        check_device(device)
Ejemplo n.º 8
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            # declare
            DepthwiseConv2d = topi.nn.depthwise_conv2d_NCHWc(Input, Filter,
                                                             (stride_h, stride_w),
                                                             padding_args,
                                                             (dilation, dilation),
                                                             in_layout,
                                                             out_layout, dtype)
            # TODO: add scale_shift implement for NCHWc and add test here
            Relu = topi.nn.relu(DepthwiseConv2d)
            # schedule
            s1 = topi.generic.schedule_depthwise_conv2d_nchw(DepthwiseConv2d)
            s2 = topi.generic.schedule_depthwise_conv2d_nchw(Relu)
        # build the kernels
        f1 = tvm.build(s1, [Input, Filter, DepthwiseConv2d], device)
        f2 = tvm.build(s2, [Input, Filter, Relu], device)

        # Prepare pod type for test data closure
        input_shape = (batch, in_channel, in_height, in_width)
        filter_shape = (filter_channel, channel_multiplier, filter_height, filter_width)

        # Use memoize, pickle the test data for next time use.
        @memoize("topi.tests.test_topi_depthwise_conv2d.NCHWc")
        def get_ref_data():
            input_np = np.random.uniform(size=input_shape).astype(dtype)
            filter_np = np.random.uniform(size=filter_shape).astype(dtype)
            # correctness with scipy
            depthwise_conv2d_scipy = topi.testing.depthwise_conv2d_python_nchw(
                input_np, filter_np, stride, padding)
            relu_scipy = np.maximum(depthwise_conv2d_scipy, 0)
            return (_transform_data(input_np, ic_block),
                    _transform_kernel(filter_np, oc_block),
                    _transform_data(depthwise_conv2d_scipy, oc_block),
                    _transform_data(relu_scipy, oc_block))

        # Get the test data
        (input_np, filter_np, depthwise_conv2d_scipy, relu_scipy) = get_ref_data()

        input_tvm = tvm.nd.array(input_np, ctx)
        filter_tvm = tvm.nd.array(filter_np, ctx)
        depthwise_conv2d_tvm = tvm.nd.array(np.zeros(shape=get_const_tuple(DepthwiseConv2d.shape),
                                                     dtype=DepthwiseConv2d.dtype), ctx)
        relu_tvm = tvm.nd.array(np.zeros(shape=get_const_tuple(Relu.shape), dtype=Relu.dtype), ctx)
        # launch kernel 1 (depthwise_conv2d)
        f1(input_tvm, filter_tvm, depthwise_conv2d_tvm)
        # launch kernel 2 (depthwise_conv2d + relu)
        f2(input_tvm, filter_tvm, relu_tvm)
        tvm.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
        tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
Ejemplo n.º 9
0
 def get_ref_data():
     a_np = generate_quantized_np(get_const_tuple(a_shape), activation_bits, input_dtype)
     b_np = generate_quantized_np(get_const_tuple(b_shape), weight_bits, input_dtype)
     if unipolar:
         b_ = np.copy(b_np).astype(out_dtype)
         for x in np.nditer(b_, op_flags=['readwrite']):
             x[...] = 1 if x == 1 else -1
         c_np = np.dot(a_np, b_.T)
     else:
         c_np = np.dot(a_np, b_np.T)
     return a_np, b_np, c_np
Ejemplo n.º 10
0
 def get_ref_data():
     a_np = generate_quantized_np(get_const_tuple(A.shape), activation_bits, input_type)
     w_np = generate_quantized_np(get_const_tuple(W.shape), weight_bits, input_type)
     if unipolar:
         w_ = np.copy(w_np).astype(out_dtype)
         for x in np.nditer(w_, op_flags=['readwrite']):
             x[...] = 1 if x == 1 else -1
         b_np = topi.testing.conv2d_nhwc_python(a_np, w_, stride, padding).astype(out_dtype)
     else:
         b_np = topi.testing.conv2d_nhwc_python(a_np, w_np, stride, padding).astype(out_dtype)
     return a_np, w_np, b_np
Ejemplo n.º 11
0
Archivo: _nn.py Proyecto: bddppq/tvm
def compute_deformable_conv2d(attrs, inputs, out_dtype, target):
    """Compute definition of deformable_conv2d"""
    padding = get_const_tuple(attrs.padding)
    strides = get_const_tuple(attrs.strides)
    dilation = get_const_tuple(attrs.dilation)
    deformable_groups = attrs.deformable_groups
    groups = attrs.groups
    out_dtype = attrs.out_dtype
    out_dtype = inputs[0].dtype if out_dtype in ("same", "") else out_dtype
    with target:
        out = topi.nn.deformable_conv2d_nchw(inputs[0], inputs[1], inputs[2], strides, padding,
                                             dilation, deformable_groups, groups, out_dtype)
    return [out]
Ejemplo n.º 12
0
def verify_leaky_relu(m, alpha):
    A = tvm.placeholder((m,), name='A')
    B = topi.nn.leaky_relu(A, alpha)
    s = tvm.create_schedule([B.op])

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
    b_np = a_np * (a_np > 0) + a_np * (a_np < 0) * alpha
    ctx = tvm.cpu(0)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
    foo = tvm.build(s, [A, B], "llvm", name="leaky_relu")
    foo(a, b)
    tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Ejemplo n.º 13
0
def verify_deformable_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, deformable_groups=1, groups=1):
    print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size,
            num_filter, kernel, stride, padding, dilation, deformable_groups, groups))

    A = tvm.placeholder((batch, in_channel, in_size, in_size), name='A')
    out_size = (in_size - (kernel - 1) * dilation - 1 + 2 * padding) // stride + 1
    Offset = tvm.placeholder((batch, deformable_groups * kernel * kernel * 2, out_size, out_size), name='offset')
    W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
    bias = tvm.placeholder((num_filter, 1, 1), name='bias')

    a_shape = get_const_tuple(A.shape)
    offset_shape = get_const_tuple(Offset.shape)
    w_shape = get_const_tuple(W.shape)
    bias_shape = get_const_tuple(bias.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_deformable_conv2d_nchw.verify_deformable_conv2d_nchw")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        offset_np = np.random.randn(*offset_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = np.random.uniform(size=bias_shape).astype(dtype)
        c_np = topi.testing.deformable_conv2d_nchw_python(a_np, offset_np, w_np, stride, padding,
                                                          dilation, deformable_groups, groups)

        return a_np, offset_np, w_np, c_np

    a_np, offset_np, w_np, c_np = get_ref_data()

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            C = topi.nn.deformable_conv2d_nchw(A, Offset, W, stride, padding, dilation,
                    deformable_groups, groups, out_dtype=dtype)
            s = topi.generic.schedule_deformable_conv2d_nchw([C])

            a = tvm.nd.array(a_np, ctx)
            offset = tvm.nd.array(offset_np, ctx)
            w = tvm.nd.array(w_np, ctx)
            c = tvm.nd.empty(c_np.shape, dtype=c_np.dtype, ctx=ctx)

            func = tvm.build(s, [A, Offset, W, C], device)
            func(a, offset, w, c)
            tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in ['llvm', 'cuda']:
        check_device(device)
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        # build the kernel
        f = tvm.build(schedule, [Filter, Out_grad, In_grad], device)
        # prepare pod type for test data closure
        dtype = Out_grad.dtype
        out_grad_shape = get_const_tuple(Out_grad.shape)
        filter_shape = get_const_tuple(Filter.shape)

        # use memoize to pickle the test data for next time use
        @memoize("topi.tests.test_topi_depthwise_conv2d_backward_input.nhwc")
        def get_ref_data():
            out_grad_np = np.random.uniform(size=out_grad_shape).astype(dtype)
            filter_np = np.random.uniform(size=filter_shape).astype(dtype)
            dilated_out_grad_np = topi.testing.dilate_python(out_grad_np, [1, stride_h, stride_w, 1])
            # padding params in forward propagation
            fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple([padding_h, padding_w], (filter_h, filter_w))
            # padding params in backward propagation
            bpad_top = filter_h - 1 - fpad_top
            bpad_bottom = (filter_h - 1 - fpad_bottom) + (stride_h - 1)
            bpad_left = filter_w - 1 - fpad_left
            bpad_right = (filter_w - 1 - fpad_right) + (stride_w - 1)

            padded_out_grad = np.zeros((batch, dilated_out_grad_np.shape[1]+bpad_top+bpad_bottom,
                dilated_out_grad_np.shape[2]+bpad_left+bpad_right, out_channel))
            padded_out_grad[:, bpad_top:dilated_out_grad_np.shape[1]+bpad_top,
                bpad_left:dilated_out_grad_np.shape[2]+bpad_left, :] = dilated_out_grad_np

            in_grad_np = np.zeros((batch, in_h, in_w, in_channel))
            for b in range(batch):
                for c in range(in_channel):
                    for m in range(channel_multiplier):
                        in_grad_np[b, :, :, c] += signal.convolve2d(padded_out_grad[b, :, :, c*channel_multiplier+m], \
                                filter_np[:, :, c, m], mode='valid')[0:in_h, 0:in_w]
            return (out_grad_np, filter_np, in_grad_np)

        (out_grad_np, filter_np, in_grad_np) = get_ref_data()

        out_grad_tvm = tvm.nd.array(out_grad_np, ctx)
        filter_tvm = tvm.nd.array(filter_np, ctx)
        in_grad_tvm = tvm.nd.array(np.zeros(shape=ishape, dtype=dtype), ctx)
        # launch the kernel
        timer = f.time_evaluator(f.entry_name, ctx, number=1)
        tcost = timer(filter_tvm, out_grad_tvm, in_grad_tvm).mean
        tvm.testing.assert_allclose(in_grad_np, in_grad_tvm.asnumpy(), rtol=1e-5)
Ejemplo n.º 15
0
def verify_leaky_relu(m, alpha):
    A = tvm.placeholder((m,), name='A')
    B = topi.cpp.nn.leaky_relu(A, alpha)
    device = "llvm"
    target = topi.cpp.TEST_create_target(device)
    s = topi.cpp.generic.schedule_injective(target, [B])

    a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
    b_np = a_np * (a_np > 0) + a_np * (a_np < 0) * alpha
    ctx = tvm.cpu(0)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
    foo = tvm.build(s, [A, B], device, name="leaky_relu")
    foo(a, b)
    tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Ejemplo n.º 16
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)

        k = 10.0
        dilation = (1, 1)
        with tvm.target.create(device):
            A = tvm.placeholder((batch, in_channel, in_size, in_size), name='A')
            W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
            B = topi.nn.conv2d(A, W, stride, padding, dilation)
            if typ == "add":
                C = B + k
            elif typ == "sub":
                C = B - k
            elif typ == "mul":
                C = B * k
            elif typ == "div":
                C = B / k
            else:
                raise NotImplementedError()
            s = topi.generic.schedule_conv2d_nchw([C])

        foo = tvm.build(s, [A, W, B, C], device, name="conv2d_scalar_" + typ)

        a_npy = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
        w_npy = np.random.uniform(size=get_const_tuple(W.shape)).astype(W.dtype)
        b_npy = topi.testing.conv2d_nchw_python(a_npy, w_npy, stride, padding)
        c_npy = np.random.uniform(size=get_const_tuple(B.shape)).astype(B.dtype)
        if typ == "add":
            c_npy = b_npy + k
        elif typ == "sub":
            c_npy = b_npy - k
        elif typ == "mul":
            c_npy = b_npy * k
        elif typ == "div":
            c_npy = b_npy / k
        else:
            raise NotImplementedError()

        a_nd = tvm.nd.array(a_npy, ctx)
        w_nd = tvm.nd.array(w_npy, ctx)
        b_nd = tvm.nd.array(np.empty(b_npy.shape).astype(B.dtype), ctx)
        c_nd = tvm.nd.array(np.empty(c_npy.shape).astype(C.dtype), ctx)
        foo(a_nd, w_nd, b_nd, c_nd)
        tvm.testing.assert_allclose(c_nd.asnumpy(), c_npy, rtol=1E-4, atol=1E-4)
Ejemplo n.º 17
0
 def check_device(device):
     ctx = tvm.context(device, 0)
     if not ctx.exist:
         print("Skip because %s is not enabled" % device)
         return
     print("Running on target: %s" % device)
     a = tvm.nd.array(a_np, ctx)
     w = tvm.nd.array(w_np, ctx)
     b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
     c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
     func1 = tvm.build(s1, [A, W, B], device)
     func2 = tvm.build(s2, [A, W, C], device)
     func1(a, w, b)
     func2(a, w, c)
     tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
     tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
Ejemplo n.º 18
0
def verify_softmax(m, n, dtype="float32"):
    A = tvm.placeholder((m, n), dtype=dtype, name='A')
    B = topi.nn.softmax(A)
    # confirm lower works
    s = tvm.create_schedule([B.op])
    tvm.lower(s, [A, B], simple_mode=True)

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
    b_np = topi.testing.softmax_python(a_np)

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_softmax(B)

        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="softmax")
        foo(a, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
        check_device(device)
Ejemplo n.º 19
0
def verify_global_pool(n, c, h, w, pool_type):
    A = tvm.placeholder((n, c, h, w), name='A')
    B = topi.cpp.nn.global_pool(A, pool_code[pool_type])
    B = topi.cpp.nn.relu(B)

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
    if pool_type == 'avg':
        b_np = np.mean(a_np, axis=(2,3), keepdims=True)
    elif pool_type =='max':
        b_np = np.max(a_np, axis=(2,3), keepdims=True)
    b_np = np.maximum(b_np, 0.0)

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        target = topi.cpp.TEST_create_target(device)
        if device == "llvm":
            s = topi.cpp.generic.default_schedule(target, [B], False)
        else:
            s = topi.cpp.cuda.schedule_global_pool(target, [B])
        ctx = tvm.context(device, 0)
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm']:
        check_device(device)
Ejemplo n.º 20
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
            print("Skip because int8 intrinsics are not available")
            return

        print("Running on target: %s" % device)
        with tvm.target.create(device):
            C = topi.nn.group_conv2d_nchw(A, W, stride, padding, dilation, groups, out_dtype=dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = topi.generic.schedule_group_conv2d_nchw([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d" %\
                (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, groups))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d" % \
            (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, groups))
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
Ejemplo n.º 21
0
def verify_relu(m, n, dtype):
    A = tvm.placeholder((m, n), name='A', dtype=dtype)
    B = topi.cpp.nn.relu(A)
    assert B.dtype == dtype

    a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
    b_np = a_np * (a_np > 0)

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        target = topi.cpp.TEST_create_target(device)
        if device == "llvm":
            s = topi.cpp.generic.schedule_injective(target, [B])
        else:
            s = topi.cpp.cuda.schedule_injective(target, [B])
        ctx = tvm.context(device, 0)
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="relu")
        foo(a, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm']:
        check_device(device)
Ejemplo n.º 22
0
def verify_log_softmax(m, n):
    A = tvm.placeholder((m, n), name='A')
    B = topi.nn.log_softmax(A)
    # confirm lower works
    s = tvm.create_schedule([B.op])
    tvm.lower(s, [A, B], simple_mode=True)
    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
    b_np = topi.testing.log_softmax_python(a_np)

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_softmax(B)
        ctx = tvm.context(device, 0)
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="log_softmax")
        foo(a, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ["opengl"]:
        check_device(device)
Ejemplo n.º 23
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            C = topi.nn.conv2d_NCHWc(A, W, (stride, stride), (padding, padding),
                                     (dilation, dilation),
                                     layout='NCHW%dc'%ic_block,
                                     out_layout="NCHW%dc"%oc_block,
                                     out_dtype=dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = topi.generic.schedule_conv2d_NCHWc([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C], device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                                  (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C], device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                                  (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-3)
Ejemplo n.º 24
0
def verify_softmax(m, n):
    A = tvm.placeholder((m, n), name='A')
    B = topi.cpp.nn.softmax(A, 1)
    # confirm lower works
    s = tvm.create_schedule([B.op])
    tvm.lower(s, [A, B], simple_mode=True)

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
    b_np = topi.testing.softmax_python(a_np)

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        target = topi.cpp.TEST_create_target(device)
        if device == "llvm":
            s = topi.cpp.generic.default_schedule(target, [B], False)
        else:
            s = topi.cpp.cuda.schedule_softmax(target, [B])
        ctx = tvm.context(device, 0)
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="softmax")
        foo(a, b)
        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm']:
        check_device(device)
Ejemplo n.º 25
0
def verify_global_pool(n, c, h, w, pool_type):
    A = tvm.placeholder((n, c, h, w), name='A')
    B = topi.nn.global_pool(A, pool_type=pool_type)
    B = topi.nn.relu(B)

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
    if pool_type == 'avg':
        b_np = np.mean(a_np, axis=(2,3), keepdims=True)
    elif pool_type =='max':
        b_np = np.max(a_np, axis=(2,3), keepdims=True)
    b_np = np.maximum(b_np, 0.0)

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_adaptive_pool(B)
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in get_all_backend():
        check_device(device)
Ejemplo n.º 26
0
def verify_region(batch, in_size, in_channel, n, classes, coords, background, l_softmax):
    '''Verify region operator by comparing outputs from tvm and numpy implementation'''
    in_height = in_width = in_size

    A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
    B = topi.vision.yolo2.region(A, n, classes, coords, background, l_softmax)

    a_shape = get_const_tuple(A.shape)
    dtype = A.dtype

    def get_ref_data_region():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        b_np = topi.testing.region_python(a_np, n, classes, coords, background, l_softmax)
        return a_np, b_np

    a_np, b_np = get_ref_data_region()
    def check_device(device):
        '''Cheching devices is enabled or not'''
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.vision.schedule_region([B])
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        func = tvm.build(s, [A, B], device)
        func(a, b)
        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['llvm', 'cuda']:
        check_device(device)
Ejemplo n.º 27
0
 def check_device(device):
     if not tvm.module.enabled(device):
         print("Skip because %s is not enabled" % device)
         return
     ctx = tvm.context(device, 0)
     a = tvm.nd.array(a_np, ctx)
     w = tvm.nd.array(w_np, ctx)
     b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
     c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
     with tvm.build_config(auto_unroll_max_step=128,
                           unroll_explicit=device == 'rocm'):
         func1 = tvm.build(s1, [A, W, B], device)
         func1(a, w, b)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
         func2 = tvm.build(s2, [A, W, C], device)
         func2(a, w, c)
         tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
Ejemplo n.º 28
0
def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding):
    in_height = in_width = in_size

    A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
    W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W')
    B = topi.nn.conv2d_nchw(A, W, stride, padding)
    C = topi.nn.relu(B)

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv2d.verify_con2d_nchw")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = topi.testing.conv2d_nchw_python(a_np, w_np, stride, padding)
        c_np = np.maximum(b_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s1 = topi.generic.schedule_conv2d_nchw([B])
            s2 = topi.generic.schedule_conv2d_nchw([C])
        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
        with tvm.build_config(auto_unroll_max_step=1400,
                              unroll_explicit=(device != "cuda")):
            func1 = tvm.build(s1, [A, W, B], device, name="conv2d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding))
            func2 = tvm.build(s2, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding))
            func1(a, w, b)
            func2(a, w, c)
            np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
            np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in ['opengl']:
        check_device(device)
Ejemplo n.º 29
0
def compute_conv2d_transpose(attrs, inputs, out_dtype, target):
    """Compute definition of conv2d_transpose"""
    padding = get_const_tuple(attrs.padding)
    strides = get_const_tuple(attrs.strides)
    dilation = get_const_tuple(attrs.dilation)
    groups = attrs.groups
    layout = attrs.data_layout
    out_dtype = attrs.out_dtype
    out_dtype = (inputs[0].dtype if (out_dtype == "same" or out_dtype == "")
                 else out_dtype)
    assert layout == "NCHW", "only support nchw for now"
    assert dilation == (1, 1), "not support dilate now"
    assert groups == 1, "only support groups == 1 for now"
    out = topi.nn.conv2d_transpose_nchw(inputs[0], inputs[1], strides, padding, out_dtype)
    output_padding = get_const_tuple(attrs.output_padding)
    out = topi.nn.pad(out,
                      [0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]])
    return [out]
Ejemplo n.º 30
0
def verify_conv2d_hwcn(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1):
    in_height = in_width = in_size

    A = tvm.placeholder((in_height, in_width, in_channel, batch), name='A')
    W = tvm.placeholder((kernel, kernel, in_channel, num_filter), name='W')
    B = topi.nn.conv2d_hwcn(A, W, stride, padding, dilation)
    C = topi.nn.relu(B)
    s1 = topi.cuda.schedule_conv2d_hwcn([B])
    s2 = topi.cuda.schedule_conv2d_hwcn([C])

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv2d_hwcn.verify_hwcn")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        dw_np = topi.testing.dilate_python(w_np, (dilation, dilation, 1, 1))
        b_np = topi.testing.conv2d_hwcn_python(a_np, dw_np, stride, padding)
        c_np = np.maximum(b_np, 0)
        return a_np, w_np, b_np, c_np
    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
        func1 = tvm.build(s1, [A, W, B], device)
        func2 = tvm.build(s2, [A, W, C], device)
        func1(a, w, b)
        func2(a, w, c)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
        check_device(device)
Ejemplo n.º 31
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        if dtype == "float16" and device == "cuda" and not have_fp16(
                tvm.gpu(0).compute_version):
            print("Skip because %s does not have fp16 support" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_elemwise(B)

        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        foo = tvm.build(s, [A, B], device, name="relu")
        foo(a, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
 def check_device(device):
     ctx = tvm.context(device, 0)
     if not ctx.exist:
         print("Skip because %s is not enabled" % device)
         return
     print("Running on target: %s" % device)
     fcompute, fschedule = topi.testing.dispatch(device,
                                                 _conv3d_ndhwc_implement)
     with tvm.target.create(device):
         B = fcompute(A, W, stride, padding, dilation, dtype)
         s = fschedule([B])
     ctx = tvm.context(device, 0)
     a = tvm.nd.array(a_np, ctx)
     w = tvm.nd.array(w_np, ctx)
     b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                      ctx)
     func = tvm.build(s, [A, W, B], device)
     func(a, w, b)
     tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)

        with tvm.target.create(device):
            b = topi.vision.rcnn.roi_align_nchw(a, rois, pooled_size=pooled_size,
                                                spatial_scale=spatial_scale,
                                                sample_ratio=sample_ratio)
            s = topi.generic.schedule_roi_align(b)

        tvm_a = tvm.nd.array(a_np, ctx)
        tvm_rois = tvm.nd.array(rois_np, ctx)
        tvm_b = tvm.nd.array(np.zeros(get_const_tuple(b.shape), dtype=b.dtype), ctx=ctx)
        f = tvm.build(s, [a, rois, b], device)
        f(tvm_a, tvm_rois, tvm_b)
        tvm.testing.assert_allclose(tvm_b.asnumpy(), b_np, rtol=1e-3)
Ejemplo n.º 34
0
def split_shape_func(attrs, inputs, _):
    """
    Shape function for split op.
    """
    if isinstance(attrs.indices_or_sections, (int, tvm.tir.IntImm)):
        indices_or_sections = get_const_int(attrs.indices_or_sections)
    else:
        indices_or_sections = get_const_tuple(attrs.indices_or_sections)

    axis = get_const_int(attrs.axis)

    num_out = indices_or_sections if isinstance(indices_or_sections, int) \
        else len(indices_or_sections) + 1
    if isinstance(indices_or_sections, int):
        indices_or_sections = [indices_or_sections]
    return [_split_shape_func(inputs[0],
                              convert(i),
                              convert(indices_or_sections),
                              convert(axis)) for i in range(num_out)]
Ejemplo n.º 35
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s_func = topi.testing.dispatch(device, _pool_grad_schedule)
            s = s_func(PoolGrad)

        a = tvm.nd.array(a_np, ctx)
        out_grad = tvm.nd.array(out_grad_np, ctx)
        pool_grad = tvm.nd.array(
            np.zeros(get_const_tuple(PoolGrad.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, OutGrad, PoolGrad], device)
        f(a, out_grad, pool_grad)
        tvm.testing.assert_allclose(pool_grad.asnumpy(),
                                    pool_grad_np,
                                    rtol=1e-5)
Ejemplo n.º 36
0
    def check_device(device):
        if not tvm.module.enabled(device.split(' ')[0]):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        ctx = tvm.context(device.split(' ')[0], 0)
        with tvm.target.create(device):
            B = topi.cuda.conv2d_cuda(A, W, stride, 0,
                                      layout='NCHW')  # Return NCHW
            s = topi.cuda.schedule_conv2d_nchw([B])  # Borrow NCHW schedule

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        func = tvm.build(s, [A, W, B], target="cuda", target_host="llvm")
        func(a, w, b)

        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Ejemplo n.º 37
0
    def check_device(device):
        if not tvm.runtime.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        ctx = tvm.context(device, 0)
        # Build the kernel
        f1 = tvm.build(s1, [Input, Filter, DepthwiseConv2d], device)
        f2 = tvm.build(s2, [Input, Filter, Scale, Shift, ScaleShift], device)
        f3 = tvm.build(s3, [Input, Filter, Scale, Shift, Relu], device)
        # Prepare data
        input_tvm = tvm.nd.array(input_np, ctx)
        filter_tvm = tvm.nd.array(filter_np, ctx)
        scale_tvm = tvm.nd.array(scale_np, ctx)
        shift_tvm = tvm.nd.array(shift_np, ctx)

        depthwise_conv2d_tvm = tvm.nd.array(np.zeros(shape=get_const_tuple(DepthwiseConv2d.shape),dtype=DepthwiseConv2d.dtype), ctx)
        scale_shift_tvm = tvm.nd.array(np.zeros(shape=get_const_tuple(ScaleShift.shape), dtype=ScaleShift.dtype), ctx)
        relu_tvm = tvm.nd.array(np.zeros(shape=get_const_tuple(Relu.shape), dtype=Relu.dtype), ctx)
        # Measure time cost of kernel 1 (depthwise_conv2d)
        timer_1 = f1.time_evaluator(f1.entry_name, ctx, number=1000)
        tcost_1 = timer_1(input_tvm, filter_tvm, depthwise_conv2d_tvm).mean
        # Measure time cost of kernel 2 (depthwise_conv2d + scale_shift)
        timer_2 = f2.time_evaluator(f2.entry_name, ctx, number=1000)
        tcost_2 = timer_2(input_tvm, filter_tvm, scale_tvm, shift_tvm, scale_shift_tvm).mean
        # Measure time cost of kernel 3 (depthwise_conv2d + scale_shift + relu)
        timer_3 = f3.time_evaluator(f3.entry_name, ctx, number=1000)
        tcost_3 = timer_3(input_tvm, filter_tvm, scale_tvm, shift_tvm, relu_tvm).mean
        print("Input shape = " + str(get_const_tuple(Input.shape)))
        print("Filter shape = " + str(get_const_tuple(Filter.shape)))
        print("Stride = (%d, %d)" % (stride_h, stride_w))
        print("padding = %s\n" % padding)
        print("Output shape = " + str(get_const_tuple(DepthwiseConv2d.shape)))
        print("average time cost of 1000 runs (depthwise_conv2d) = %g us" % (tcost_1*1e6))
        print("average time cost of 1000 runs (depthwise_conv2d + scale_shift) = %g us" % (tcost_2*1e6))
        print("average time cost of 1000 runs (depthwise_conv2d + scale_shift + relu) = %g us" % (tcost_3*1e6))
        # correctness
        depthwise_conv2d_scipy = topi.testing.depthwise_conv2d_python_nchw(input_np, filter_np, stride=[stride_h, stride_w], padding=padding)
        scale_shift_scipy = np.zeros(shape=get_const_tuple(ScaleShift.shape))
        for c in range(in_channel * channel_multiplier):
            scale_shift_scipy[:,c,:,:] = depthwise_conv2d_scipy[:,c,:,:] * scale_np[c] + shift_np[c]
        relu_scipy = np.maximum(scale_shift_scipy, 0)
        tvm.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
        tvm.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
        tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
        print("success")
Ejemplo n.º 38
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            C = topi.nn.conv2d(A,
                               W,
                               stride,
                               padding,
                               dilation,
                               layout='NCHW',
                               out_dtype=dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = topi.generic.schedule_conv2d_nchw([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter, kernel,
                              stride, padding_sum, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter, kernel,
                              stride, padding_sum, dilation))
            func(a, w, c)

        rtol = 1e-3
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=rtol)
 def check_device(device):
     ctx = tvm.context(device, 0)
     if not ctx.exist:
         print("Skip because %s is not enabled" % device)
         return
     print("Running on target: %s" % device)
     for fcompute, fschedule in topi.testing.dispatch(
             device, _dense_implement):
         with tvm.target.create(device):
             D = fcompute(A, B, C if use_bias else None)
             D = topi.nn.relu(D)
             s = fschedule([D])
         a = tvm.nd.array(a_np, ctx)
         b = tvm.nd.array(b_np, ctx)
         c = tvm.nd.array(c_np, ctx)
         d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype),
                          ctx)
         f = tvm.build(s, [A, B, C, D], device, name="dense")
         f(a, b, c, d)
         tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
Ejemplo n.º 40
0
 def check_device(device):
     if not tvm.runtime.enabled(device):
         print("Skip because %s is not enabled" % device)
         return
     print("Running on target: %s" % device)
     with tvm.target.create(device):
         B = topi.nn.conv2d(A,
                            W, (stride, stride),
                            padding, (dilation, dilation),
                            layout='NHWC',
                            out_dtype=dtype)
         s = topi.generic.schedule_conv2d_nhwc([B])
     ctx = tvm.context(device, 0)
     a = tvm.nd.array(a_np, ctx)
     w = tvm.nd.array(w_np, ctx)
     b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                      ctx)
     func = tvm.build(s, [A, W, B], device)
     func(a, w, b)
     tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Ejemplo n.º 41
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        with tvm.target.create(device):
            B = topi.nn.conv1d(A, W, stride, padding, dilation, layout,
                               'float32')
            if layout == 'NCW':
                s = topi.generic.schedule_conv1d_ncw([B])
            else:
                s = topi.generic.schedule_conv1d_nwc([B])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)

        func = tvm.build(s, [A, W, B], device)
        func(a, w, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Ejemplo n.º 42
0
 def check_device(device):
     if not tvm.module.enabled(device):
         print("Skip because %s is not enabled" % device)
         return
     print("Running on target: %s" % device)
     target = topi.cpp.TEST_create_target(device)
     if device == "llvm":
         s = topi.cpp.generic.schedule_dense(target, [D])
     elif device == "rocm":
         s = topi.cpp.rocm.schedule_dense(target, [D])
     else:
         s = topi.cpp.cuda.schedule_dense(target, [D])
     ctx = tvm.context(device, 0)
     a = tvm.nd.array(a_np, ctx)
     b = tvm.nd.array(b_np, ctx)
     c = tvm.nd.array(c_np, ctx)
     d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
     f = tvm.build(s, [A, B, C, D], device, name="dense")
     f(a, b, c, d)
     np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        if not nvcc.have_tensorcore(ctx.compute_version):
            print("skip because gpu does not support Tensor Cores")
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            fcompute, fschedule = topi.testing.dispatch(
                device, _conv3d_ndhwc_tensorcore_implement)
            C = fcompute(A, W, stride, padding, dilation, 'float32')
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter, kernel,
                              stride, padding_sum, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter, kernel,
                              stride, padding_sum, dilation))
            func(a, w, c)

        rtol = 1e-3
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=rtol)
Ejemplo n.º 44
0
def run_tvm(nwarmup: int,
            nloops: int,
            args: dict,
            out,
            verbose: bool = False,
            debug: bool = False,
            scheduling=None) -> Result:
    """ Take dict[TVM_Tensor, np_array] as args, convert them to TVM tensors and
  call `lam`.  Result of lambda is converted back to numpy array and returned.
  """

    ctx = tvm.cpu(0)
    pls = []  # placeholders
    vals_nd = []  # initial values
    for pl, val in args.items():
        pls.append(pl)
        vals_nd.append(tvm.nd.array(val, ctx=ctx))

    sout = tvm.create_schedule(out.op)
    scheduling(sout) if scheduling is not None else None

    ir = tvm.lower(sout, pls + [out], simple_mode=True)
    # print(type(ir), ir.__str__)
    print(ir) if debug else None
    mout = tvm.build(sout, pls + [out])
    out_nd = tvm.nd.array(np.zeros(get_const_tuple(out.shape),
                                   dtype=out.dtype),
                          ctx=ctx)

    perfs: List[float] = []
    for i in range(nwarmup + nloops):
        tb = perf_counter()

        mout(*(vals_nd + [out_nd]))

        te = perf_counter()
        if i >= nwarmup:
            perfs.append(te - tb)
        if verbose:
            print("TVM", te - tb)
    return Result.fromPasses(out_nd.asnumpy(), perfs)
Ejemplo n.º 45
0
def verify_region(batch, in_size, in_channel, n, classes, coords, background,
                  l_softmax):
    '''Verify region operator by comparing outputs from tvm and numpy implementation'''
    in_height = in_width = in_size

    A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
    B = topi.cpp.yolo.region(A, n, classes, coords, background, l_softmax)

    a_shape = get_const_tuple(A.shape)
    dtype = A.dtype

    def get_ref_data_region():
        '''Randomly initialize the data variables and get refernce output for the region operation'''
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        b_np = topi.testing.region_python(a_np, n, classes, coords, background,
                                          l_softmax)
        return a_np, b_np

    a_np, b_np = get_ref_data_region()

    def check_device(device):
        '''Check the device is available and if so, build and run the program'''
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        target = topi.cpp.TEST_create_target(device)
        if device == "llvm":
            s = topi.cpp.generic.default_schedule(target, [B], False)
        else:
            s = topi.cpp.rocm.schedule_region(target, [B])
        ctx = tvm.context(device, 0)
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        func = tvm.build(s, [A, B], device, name="region")
        func(a, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm', 'vulkan']:
        check_device(device)
Ejemplo n.º 46
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            if bgemm == "direct":
                fcompute, fschedule = topi.testing.dispatch(
                    device, _conv2d_nhwc_winograd_direct)
            elif bgemm == "tensorcore":
                fcompute, fschedule = topi.testing.dispatch(
                    device, _conv2d_nhwc_winograd_tensorcore)
            C = fcompute(A, W, stride, padding, dilation, 'float32')
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter, kernel,
                              stride, padding_sum, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter, kernel,
                              stride, padding_sum, dilation))
            func(a, w, c)

        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=2e-3)
Ejemplo n.º 47
0
def conv1d_strategy(attrs, inputs, out_type, target):
    """conv1d generic strategy"""
    logger.warning("conv1d is not optimized for this platform.")
    layout = attrs.data_layout
    dilation = get_const_tuple(attrs.dilation)
    if dilation[0] < 1:
        raise ValueError("dilation should be a positive value")
    strategy = _op.OpStrategy()
    if layout == "NCW":
        strategy.add_implementation(
            wrap_compute_conv1d(topi.nn.conv1d_ncw),
            wrap_topi_schedule(topi.generic.schedule_conv1d_ncw),
            name="conv1d_ncw.generic")
    elif layout == "NWC":
        strategy.add_implementation(
            wrap_compute_conv1d(topi.nn.conv1d_nwc),
            wrap_topi_schedule(topi.generic.schedule_conv1d_nwc),
            name="conv1d_nwc.generic")
    else:
        raise ValueError("Unsupported conv1d layout {}".format(layout))
    return strategy
Ejemplo n.º 48
0
def squeeze_shape_func(attrs, inputs, _):
    """
    Shape function for squeeze op.
    """
    axis = attrs.axis if attrs.axis is None else get_const_tuple(attrs.axis)
    keep_axes = []
    if axis is not None:
        for i in range(inputs[0].shape[0].value):
            if i not in axis:
                keep_axes.append(i)

    # Due to current relay type system, it is possible even
    # a static kernel function needs shape function. To handle
    # this case, we allow axis to be None in squeeze shape func
    # for now.
    # TODO(kevinthesun): Enhance relay type system to avoid this.
    if keep_axes:
        out = _squeeze_shape_func(inputs[0], convert(keep_axes))
    else:
        out = te.compute((), lambda *indices: 0)
    return [out]
Ejemplo n.º 49
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
            print("Skip because int8 intrinsics are not available")
            return

        print("Running on target: %s" % device)
        with tvm.target.create(device):
            D = topi.nn.dense(A, B, C if use_bias else None, out_dtype=out_dtype)
            D = topi.nn.relu(D)
            s = topi.generic.schedule_dense([D])
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(c_np, ctx)
        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=out_dtype), ctx)
        f = tvm.build(s, [A, B, C, D], device, name="dense")
        f(a, b, c, d)
        tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
Ejemplo n.º 50
0
def verify_region(batch, in_size, in_channel, n, classes, coords, background,
                  l_softmax):
    '''Verify region operator by comparing outputs from tvm and numpy implementation'''
    in_height = in_width = in_size

    A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
    B = topi.vision.yolo2.region(A, n, classes, coords, background, l_softmax)

    a_shape = get_const_tuple(A.shape)
    dtype = A.dtype

    def get_ref_data_region():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        b_np = topi.testing.region_python(a_np, n, classes, coords, background,
                                          l_softmax)
        return a_np, b_np

    a_np, b_np = get_ref_data_region()

    def check_device(device):
        '''Cheching devices is enabled or not'''
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            if device == 'llvm':
                s = topi.generic.vision.schedule_region([B])
            else:
                s = topi.cuda.vision.schedule_region([B])
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        func = tvm.build(s, [A, B], device)
        func(a, b)
        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['llvm', 'cuda']:
        check_device(device)
Ejemplo n.º 51
0
def verify_shortcut(batch, in_size, in_channel):
    '''Verify shortcut operator by comparing outputs from tvm and numpy implementation'''
    in_height = in_width = in_size

    A1 = tvm.placeholder((batch, in_channel, in_height, in_width), name='A1')
    A2 = tvm.placeholder((batch, in_channel, in_height, in_width), name='A2')
    B = topi.vision.shortcut(A1, A2)

    a_shape = get_const_tuple(A1.shape)
    dtype = A1.dtype

    def get_ref_data_shortcut():
        a_np1 = np.random.uniform(size=a_shape).astype(dtype)
        a_np2 = np.random.uniform(size=a_shape).astype(dtype)
        b_np = topi.testing.shortcut_python(a_np1, a_np2)
        return a_np1, a_np2, b_np

    a_np1, a_np2, b_np = get_ref_data_shortcut()

    def check_device(device):
        '''Cheching devices is enabled or not'''
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_injective([B])

        a1 = tvm.nd.array(a_np1, ctx)
        a2 = tvm.nd.array(a_np2, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        func = tvm.build(s, [A1, A2, B], device)
        func(a1, a2, b)
        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['llvm', 'cuda']:
        check_device(device)
Ejemplo n.º 52
0
def weight_prepack_conv2d(attrs, inputs, tinfos):
    import ast
    data = tinfos[0]
    kernel = tinfos[1]
    padding = ast.literal_eval(attrs['padding'])
    stride = ast.literal_eval(attrs['strides'])
    wkl = _get_workload(data, kernel, stride, padding, 'float32')
    sch = _get_schedule_conv(wkl)
    is_kernel_1x1 = isinstance(sch, AVX512Conv1x1Fwd)

    ic_bn, oc_bn = sch.ic_bn, sch.oc_bn

    new_attrs = {k: attrs[k] for k in attrs.keys()}
    new_attrs.pop('layout', None)

    kernel_sym = inputs[1]
    oc, ic, h, w = get_const_tuple(tinfos[1].shape)
    OC = oc // oc_bn
    IC = ic // ic_bn
    trans_kernel = sym.transpose(kernel_sym, axes=(1, 2, 3, 0))
    trans_kernel = sym.reshape(trans_kernel, shape=(ic, h, w, OC, oc_bn))
    trans_kernel = sym.transpose(trans_kernel, axes=(1, 2, 3, 4, 0))
    trans_kernel = sym.reshape(trans_kernel,
                               shape=(h, w, OC, oc_bn, IC, ic_bn))
    if is_kernel_1x1:
        # (oc, ic, h, w) -> (OC, IC, ic, oc, h, w)
        trans_kernel = sym.transpose(trans_kernel, axes=(2, 4, 5, 3, 0, 1))
    else:
        # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
        trans_kernel = sym.transpose(trans_kernel, axes=(2, 4, 0, 1, 5, 3))

    if attrs.get_bool('use_bias'):
        bias = inputs[2]
        bias = sym.reshape(bias, shape=(OC, oc_bn))
        return sym.contrib.conv2d_nchw_kernel_packed(inputs[0], trans_kernel,
                                                     bias, **new_attrs)
    else:
        return sym.contrib.conv2d_nchw_kernel_packed(inputs[0], trans_kernel,
                                                     **new_attrs)
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        if device == "cuda" and not tvm.contrib.nvcc.have_int8(
                ctx.compute_version):
            print("Skip because int8 intrinsics are not available")
            return

        print("Running on target: %s" % device)
        with tvm.target.create(device):
            C = topi.nn.group_conv2d_nchw(A,
                                          W,
                                          stride,
                                          padding,
                                          dilation,
                                          groups,
                                          out_dtype=dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = topi.generic.schedule_group_conv2d_nchw([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d" %\
                (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, groups))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d" % \
            (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, groups))
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
Ejemplo n.º 54
0
def verify_relu(m, n):
    A = tvm.placeholder((m, n), name='A')
    B = topi.nn.relu(A)
    s = topi.cuda.schedule_elemwise(B)

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
    b_np = a_np * (a_np > 0)

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        foo = tvm.build(s, [A, B], device, name="relu")
        foo(a, b)
        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal']:
        check_device(device)
Ejemplo n.º 55
0
    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)

        ctx = tvm.context(device, 0)
        input = tvm.nd.array(input_np, ctx)
        filter_d = tvm.nd.array(filter_np_d, ctx)
        filter_1 = tvm.nd.array(filter_np_1, ctx)
        output = tvm.nd.array(
            np.zeros(get_const_tuple(Output.shape), dtype=Output.dtype), ctx)

        with tvm.target.create(device):
            if layout == "NCHW":
                s = schedule_depth_1by1_fused_nchw([Output])
            else:
                s = schedule_depth_1by1_fused_nhwc([Output])
        print(
            tvm.lower(s, [Input, Filter_d, Filter_1, Output],
                      simple_mode=True))

        func = tvm.build(s, [Input, Filter_d, Filter_1, Output],
                         device,
                         name=("Depthwise1by1Fused_%d_%d" %
                               (Input.shape[1], Input.shape[2])))
        print(func.imported_modules[0].get_source())
        # func(a, w, b)
        timer_1 = func.time_evaluator(func.entry_name, ctx, number=10)
        tcost_1 = timer_1(input, filter_d, filter_1, output).mean
        # np.testing.assert_allclose(output.asnumpy(), output_np, rtol=1e-5)
        d = ~np.isclose(output.asnumpy(), output_np, rtol=1e-5)
        print(output.asnumpy()[d])
        print(output_np[d])
        print(np.where(d))
        print(
            "Depthwise & 1by1 Fused ({}): average running time is {:.2f} us.".
            format(layout, tcost_1 * 1e6))
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            C = topi.nn.conv2d_NCHWc(A, W, (stride, stride), (padding, padding),
                                     (dilation, dilation),
                                     layout='NCHW%dc'%ic_block,
                                     out_layout="NCHW%dc"%oc_block,
                                     out_dtype=dtype)
            s = topi.generic.schedule_conv2d_NCHWc([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
        func = tvm.build(s, [A, W, C], device,
                         name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                              (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
        # print(tvm.lower(s, [A, W, C], simple_mode=True))
        func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-3)
Ejemplo n.º 57
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)

        with tvm.target.create(device):
            B = topi.nn.conv2d(A,
                               W,
                               stride,
                               padding,
                               dilation,
                               layout='NHWC',
                               out_dtype="int32")
            s = topi.x86.schedule_conv2d_nhwc_pack_int8([B])
        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        func = tvm.build(s, [A, W, B], device)
        func(a, w, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
Ejemplo n.º 58
0
def verify_global_pool(dshape, pool_type, layout='NCHW'):
    """verify function of global_pool"""
    assert layout in ["NCHW", "NHWC"]
    A = te.placeholder(shape=dshape, name='A')
    B = topi.nn.global_pool(A, pool_type=pool_type, layout=layout)
    B = topi.nn.relu(B)

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)

    axis = (layout.find('H'), layout.find('W'))
    if pool_type == 'avg':
        b_np = np.mean(a_np, axis=axis, keepdims=True)
    elif pool_type == 'max':
        b_np = np.max(a_np, axis=axis, keepdims=True)
    b_np = np.maximum(b_np, 0.0)

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s_func = topi.testing.dispatch(device, _adaptive_pool_schedule)
            if device == "cuda":
                s = s_func(B, layout)
            else:
                s = s_func(B)
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in get_all_backend():
        check_device(device)
Ejemplo n.º 59
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        fcompute, fschedule = topi.testing.dispatch(device,
                                                    _conv3d_ncdhw_implement)
        with tvm.target.create(device):
            C = fcompute(A, W, (stride, stride, stride), padding,
                         (dilation, dilation, dilation), dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter,
                              space_kernel, stride, padding_sum, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter,
                              space_kernel, stride, padding_sum, dilation))
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4)
Ejemplo n.º 60
0
    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        if default_schedule:
            device += " -libs=cudnn"
        print("Running on target: %s" % device)

        with tvm.target.create(device):
            B = topi.cuda.conv2d.conv2d_cuda(
                autotvm.get_config(),
                A,
                W,
                stride,
                0,
                dilation=1,
                algo=cudnn_algo) if default_schedule else topi.nn.conv2d_nhwc(
                    A, W, stride, padding, dilation=1)
            s = topi.cuda.schedule_conv2d_nchw_cuda(
                None, [B]) if default_schedule else schedule_conv2d_nhwc([B],
                                                                         A)

        ctx = tvm.context(device, 0)
        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)

        func = tvm.build(s, [A, W, B],
                         device,
                         name=("Conv2d_%d_%d" % (in_height, in_width)))
        # func(a, w, b)
        timer_1 = func.time_evaluator(func.entry_name, ctx, number=10)
        tcost_1 = timer_1(a, w, b).mean
        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
        print("1x1 convolution: average running time is {:.2f} us.".format(
            tcost_1 * 1e6))