def test_layout_transform(): in_shape = (1, 32, 8, 8) A = tvm.placeholder(shape=in_shape, dtype="float32", name="A") B = topi.layout_transform(A, "NCHW", "NCHW16c") input = np.random.uniform(size=in_shape).astype(A.dtype) output = np.transpose(input, axes=(0, 2, 3, 1)) output = np.reshape(output, newshape=(1, 8, 8, 2, 16)) output = np.transpose(output, axes=(0, 3, 1, 2, 4)) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return tvm_input = tvm.nd.array(input, ctx) tvm_output = tvm.nd.empty(output.shape, ctx=ctx, dtype=B.dtype) print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) f = tvm.build(s, [A, B], device, name="layout_transform") f(tvm_input, tvm_output) tvm.testing.assert_allclose(tvm_output.asnumpy(), output) for backend in get_all_backend(): check_device(backend)
def verify_gather_nd(src_shape, indices_src, indices_dtype): src_dtype = "float32" indices_src = np.array(indices_src, dtype=indices_dtype) A = tvm.placeholder(shape=src_shape, dtype=src_dtype, name="A") indices = tvm.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices") out_tensor = topi.gather_nd(a=A, indices=indices) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(out_tensor) func = tvm.build(s, [A, indices, out_tensor] , device, name="take") shape_size = 1 for i in range(len(src_shape)): shape_size = shape_size * src_shape[i] data_npy = np.arange(shape_size, dtype=src_dtype).reshape((src_shape)) out_npys = topi.testing.gather_nd_python(data_npy, indices_src) data_nd = tvm.nd.array(data_npy, ctx) indices_nd = tvm.nd.array(indices_src, ctx) out_nd = tvm.nd.empty(out_npys.shape, ctx=ctx, dtype=src_dtype) func(data_nd, indices_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npys) for device in get_all_backend(): check_device(device)
def verify_arange(start, stop, step): if start is None and step is None: A = topi.arange(stop) a_np = np.arange(stop) elif start is None: A = topi.arange(stop, step=step) a_np = np.arange(stop, step=step) elif step is None: A = topi.arange(start, stop) a_np = np.arange(start, stop) else: A = topi.arange(start, stop, step) a_np = np.arange(start, stop, step) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(A) f = tvm.build(s, [A], device, name="arange") a_nd = tvm.nd.empty(a_np.shape, dtype='float32', ctx=ctx) f(a_nd) tvm.testing.assert_allclose(a_nd.asnumpy(), a_np) for device in get_all_backend(): check_device(device)
def verify_relu(m, n, dtype="float32"): A = te.placeholder((m, n), name='A', dtype=dtype) B = topi.nn.relu(A) a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype) b_np = a_np * (a_np > 0) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return if dtype == "float16" and device == "cuda" and not have_fp16( tvm.gpu(0).compute_version): print("Skip because %s does not have fp16 support" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = tvm.topi.testing.get_elemwise_schedule(device)(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) foo = tvm.build(s, [A, B], device, name="relu") foo(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_relu(m, n): A = tvm.placeholder((m, n), name='A') B = topi.nn.relu(A) a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype) b_np = a_np * (a_np > 0) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_elemwise(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) foo = tvm.build(s, [A, B], device, name="relu") foo(a, b) np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify(from_dtype, to_dtype, low=-100, high=100): shape = (5, 4) A = tvm.placeholder(shape, dtype=from_dtype, name="A") B = topi.cast(A, to_dtype) if from_dtype == "bool": a_np = np.random.choice([True, False], size=shape) else: a_np = np.random.uniform(low, high, size=shape).astype(from_dtype) if to_dtype == "bool": a_np = a_np - a_np[2, 3] b_np = a_np.astype(to_dtype) for device in get_all_backend(): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) continue print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) foo = tvm.build(s, [A, B], device) a = tvm.nd.array(a_np, ctx) b = tvm.nd.empty(shape=shape, dtype=to_dtype, ctx=ctx) foo(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np)
def test_shape(): in_shape = (8, 7, 13) dtype = "int32" A = tvm.placeholder(shape=in_shape, dtype="float32", name="A") B = topi.shape(A, dtype) input = np.random.uniform(size=in_shape).astype(A.dtype) output = np.asarray(in_shape).astype(dtype) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return tvm_input = tvm.nd.array(input, ctx) tvm_output = tvm.nd.empty(output.shape, ctx=ctx, dtype=dtype) print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) f = tvm.build(s, [A, B], device, name="shape") f(tvm_input, tvm_output) tvm.testing.assert_allclose(tvm_output.asnumpy(), output) for backend in get_all_backend(): check_device(backend)
def test_apply( func, name, f_numpy, indata, dtype="bool", ): # Build the logic and compile the function A = tvm.placeholder(shape=indata.shape, name="A", dtype=dtype) B = func(A) if isinstance(A, tvm.expr.PrimExpr): assert (isinstance(B, tvm.expr.PrimExpr)) return def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_broadcast(B) foo = tvm.build(s, [A, B], device, name=name) data_npy = indata.astype(A.dtype) data_nd = tvm.nd.array(data_npy, ctx) out_npy = f_numpy(indata) out_nd = tvm.nd.array( np.empty(data_npy.shape).astype(B.dtype), ctx) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_log_softmax(m, n, dtype="float32"): A = tvm.placeholder((m, n), dtype=dtype, name='A') B = topi.nn.log_softmax(A) # confirm lower works s = tvm.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = topi.testing.log_softmax_python(a_np) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_softmax(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) foo = tvm.build(s, [A, B], device, name="log_softmax") foo(a, b) np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_batch_matmul(batch, M, N, K): x = tvm.placeholder((batch, M, K), name='x') y = tvm.placeholder((batch, N, K), name='y') dtype = x.dtype # use memoize to pickle the test data for next time use @memoize("topi.tests.test_topi_batch_matmul") def get_ref_data(): a_np = np.random.uniform(size=(batch, M, K)).astype(dtype) b_np = np.random.uniform(size=(batch, N, K)).astype(dtype) c_np = topi.testing.batch_matmul(a_np, b_np) return (a_np, b_np, c_np) # get the test data a_np, b_np, c_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): out = topi.nn.batch_matmul(x, y) s = topi.generic.schedule_batch_matmul([out]) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(b_np, ctx) c = tvm.nd.array(np.zeros(get_const_tuple(out.shape), dtype=dtype), ctx) f = tvm.build(s, [x, y, out], device, name="dense") f(a, b, c) tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, add_bias=False, add_relu=False): print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation)) in_height = in_width = in_size A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') W = tvm.placeholder((num_filter, in_channel, kernel, kernel), name='W') bias = tvm.placeholder((num_filter, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) bias_shape = get_const_tuple(bias.shape) dtype = A.dtype @memoize("topi.tests.test_topi_conv2d_nchw.verify_conv2d_nchw") def get_ref_data(): a_np = np.random.uniform(size=a_shape).astype(dtype) w_np = np.random.uniform(size=w_shape).astype(dtype) b_np = np.random.uniform(size=bias_shape).astype(dtype) dw_np = topi.testing.dilate_python(w_np, (1, 1, dilation, dilation)) c_np = topi.testing.conv2d_nchw_python(a_np, dw_np, stride, padding) if add_bias: b_np = np.random.uniform(size=bias_shape).astype(dtype) c_np += b_np if add_relu: c_np = np.maximum(c_np, 0) return a_np, w_np, b_np, c_np a_np, w_np, b_np, c_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): C = topi.nn.conv2d(A, W, (stride, stride), (padding, padding), (dilation, dilation), layout='NCHW', out_dtype=dtype) if add_bias: C = topi.add(C, bias) if add_relu: C = topi.nn.relu(C) s = topi.generic.schedule_conv2d_nchw([C]) a = tvm.nd.array(a_np, ctx) w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(b_np, ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) if add_bias: func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation)) func(a, w, b, c) else: func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation)) func(a, w, c) tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4) for device in get_all_backend(): with autotvm.tophub.context(device): # load tophub pre-tuned parameters check_device(device)
def verify_clip(N, a_min, a_max, dtype): A = tvm.placeholder((N, N), dtype=dtype, name='A') B = topi.clip(A, a_min, a_max) s = tvm.create_schedule([B.op]) # use memoize to pickle the test data for next time use @memoize("topi.tests.test_topi_clip") def get_ref_data(): a_np = np.random.uniform(a_min*2, a_max*2, size=(N, N)).astype(dtype) b_np = np.clip(a_np, a_min, a_max) return a_np, b_np a_np, b_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) f = tvm.build(s, [A, B], device, name="clip") f(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_global_pool(n, c, h, w, pool_type): A = tvm.placeholder((n, c, h, w), name='A') B = topi.nn.global_pool(A, pool_type=pool_type) B = topi.nn.relu(B) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) if pool_type == 'avg': b_np = np.mean(a_np, axis=(2,3), keepdims=True) elif pool_type =='max': b_np = np.max(a_np, axis=(2,3), keepdims=True) b_np = np.maximum(b_np, 0.0) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_adaptive_pool(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_log_softmax(m, n, dtype="float32"): A = tvm.placeholder((m, n), dtype=dtype, name='A') B = topi.nn.log_softmax(A) # confirm lower works s = tvm.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = topi.testing.log_softmax_python(a_np) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_softmax(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) foo = tvm.build(s, [A, B], device, name="log_softmax") foo(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_conv3d_ncdhw(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, add_bias=False, add_relu=False): print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation)) in_depth = in_height = in_width = in_size A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') W = tvm.placeholder((num_filter, in_channel, kernel, kernel, kernel), name='W') bias = tvm.placeholder((num_filter, 1, 1, 1), name='bias') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) bias_shape = get_const_tuple(bias.shape) dtype = A.dtype @memoize("topi.tests.test_topi_conv3d_ncdhw.verify_conv3d_ncdhw") def get_ref_data(): a_np = np.random.uniform(size=a_shape).astype(dtype) w_np = np.random.uniform(size=w_shape).astype(dtype) b_np = np.random.uniform(size=bias_shape).astype(dtype) dw_np = topi.testing.dilate_python(w_np, (1, 1, dilation, dilation, dilation)) c_np = topi.testing.conv3d_ncdhw_python(a_np, dw_np, stride, padding) if add_bias: c_np += b_np if add_relu: c_np = np.maximum(c_np, 0) return a_np, w_np, b_np, c_np a_np, w_np, b_np, c_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): C = topi.nn.conv3d(A, W, (stride, stride, stride), (padding, padding, padding), (dilation, dilation, dilation), layout='NCDHW', out_dtype=dtype) if add_bias: C = topi.add(C, bias) if add_relu: C = topi.nn.relu(C) s = topi.generic.schedule_conv3d_ncdhw([C]) a = tvm.nd.array(a_np, ctx) w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(b_np, ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) if add_bias: func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation)) func(a, w, b, c) else: func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation)) func(a, w, c) tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4) for device in get_all_backend(): with autotvm.tophub.context(device): # load tophub pre-tuned parameters check_device(device)
def verify_conv1d(batch, in_channels, in_width, filters, kernel_size=3, stride=1, dilation=1, padding='VALID', layout='NCW'): if layout == 'NCW': in_shape = [batch, in_channels, in_width] kernel_shape = [filters, in_channels, kernel_size] else: in_shape = [batch, in_width, in_channels] kernel_shape = [kernel_size, in_channels, filters] dtype = 'float32' A = te.placeholder(in_shape, name='A', dtype=dtype) W = te.placeholder(kernel_shape, name='W', dtype=dtype) def get_ref_data(layout): a_np = np.random.uniform(size=in_shape).astype(dtype) w_np = np.random.uniform(size=kernel_shape).astype(dtype) if layout == 'NWC': np_in = np.transpose(a_np, [0, 2, 1]) np_w = np.transpose(w_np, [2, 1, 0]) else: np_in = a_np np_w = w_np b_np = tvm.topi.testing.conv1d_ncw_python(np_in, np_w, stride, padding, dilation) if layout == 'NWC': b_np = np.transpose(b_np, [0, 2, 1]) return a_np, w_np, b_np a_np, w_np, b_np = get_ref_data(layout) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return if layout == "NCW": fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv1d_ncw_implement) else: fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv1d_nwc_implement) with tvm.target.create(device): B = fcompute(A, W, stride, padding, dilation, 'float32') s = fschedule([B]) a = tvm.nd.array(a_np, ctx) w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) func = tvm.build(s, [A, W, B], device) func(a, w, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_resize(batch, in_channel, in_height, in_width, out_height, out_width, layout='NCHW', align_corners=False, method="BILINEAR"): if layout == 'NCHW': A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, in_channel, out_height, out_width) a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width)).astype(dtype) elif layout == 'NHWC': A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, out_height, out_width, in_channel) a_np = np.random.uniform(size=(batch, in_height, in_width, in_channel)).astype(dtype) else: raise NotImplementedError('Layout not supported {} '.format(layout)) B = topi.image.resize(A, (out_height, out_width), layout=layout, align_corners=align_corners, method=method) if method == "BILINEAR": b_np = topi.testing.bilinear_resize_python(a_np, (out_height, out_width), layout, align_corners) else: scale_h = out_height / in_height scale_w = out_width / in_width b_np = topi.testing.upsampling_python(a_np, (scale_h, scale_w), layout) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3) for device in get_all_backend(): check_device(device)
def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding, output_padding): in_height, in_width = in_size kernel_height, kernel_width = kernel stride_height, stride_width = stride pad_top, pad_left, pad_bottom, pad_right = padding A = te.placeholder((batch, in_channel, in_height, in_width), name='A') W = te.placeholder((in_channel, num_filter, kernel_height, kernel_width), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) dtype = A.dtype @memoize( "topi.tests.test_topi_conv2d_transpose.verify_conv2d_transpose_nchw") def get_ref_data(): a_np = np.random.uniform(size=a_shape).astype(dtype) w_np = np.random.uniform(size=w_shape).astype(dtype) b_np = topi.testing.conv2d_transpose_nchw_python( a_np, w_np, stride, padding, output_padding) c_np = np.maximum(b_np, 0) return a_np, w_np, b_np, c_np a_np, w_np, b_np, c_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): fcompute, fschedule = topi.testing.dispatch( device, _conv2d_transpose_nchw_implement) B = fcompute(A, W, [stride_height, stride_width], [pad_top, pad_left, pad_bottom, pad_right], A.dtype, output_padding) C = topi.nn.relu(B) s1 = fschedule([B]) s2 = fschedule([C]) a = tvm.nd.array(a_np, ctx) w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) func1 = tvm.build(s1, [A, W, B], device) func2 = tvm.build(s2, [A, W, C], device) func1(a, w, b) func2(a, w, c) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_broadcast_binary_ele(lhs_shape, rhs_shape, ftopi, fnumpy, lhs_min=-100, lhs_max=100, rhs_min=-100, rhs_max=100, dtype="float32"): # Build the logic and compile the function A = (te.var("A", dtype=dtype) if lhs_shape is None else te.placeholder( shape=lhs_shape, name="A", dtype=dtype)) B = (te.var("B", dtype=dtype) if rhs_shape is None else te.placeholder( shape=rhs_shape, name="B", dtype=dtype)) C = ftopi(A, B) if isinstance(A, tvm.tir.PrimExpr) and isinstance(B, tvm.tir.PrimExpr): assert (isinstance(C, tvm.tir.PrimExpr)) return def gen_operand(shape, low, high, ctx): if shape is None: npy = float(np.random.uniform(low=low, high=high)) if dtype.startswith('int'): npy = int(npy) nd = npy else: npy = np.random.uniform(low=low, high=high, size=shape).astype(dtype) nd = tvm.nd.array(npy, ctx) return npy, nd def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.testing.get_broadcast_schedule(device)(C) foo = tvm.build(s, [A, B, C], device, name="broadcast_binary" + "_" + ftopi.__name__) lhs_npy, lhs_nd = gen_operand(lhs_shape, lhs_min, lhs_max, ctx) rhs_npy, rhs_nd = gen_operand(rhs_shape, rhs_min, rhs_max, ctx) out_npy = fnumpy(lhs_npy, rhs_npy) out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(C.dtype), ctx) foo(lhs_nd, rhs_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4) for target in get_all_backend(): check_device(target) check_device("sdaccel")
def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_pad=True, add_relu=False): iw = ih kw = kh sw = sh pt, pl, pb, pr = padding layout = "NCHW" A = tvm.placeholder((n, ic, ih, iw), name='A') B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout="NCHW", count_include_pad=count_include_pad) dtype = A.dtype bshape = get_const_tuple(B.shape) ashape = get_const_tuple(A.shape) if ceil_mode: assert bshape[2] == int(math.ceil(float(ashape[2] - kh + pt + pb) / sh) + 1) assert bshape[3] == int(math.ceil(float(ashape[3] - kw + pl + pr) / sw) + 1) else: assert bshape[2] == int(math.floor(float(ashape[2] - kh + pt + pb) / sh) + 1) assert bshape[3] == int(math.floor(float(ashape[3] - kw + pl + pr) / sw) + 1) OutGrad = tvm.placeholder(bshape, name='OutGrad') PoolGrad = topi.nn.pool_grad(OutGrad, A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout="NCHW", count_include_pad=count_include_pad) if add_relu: PoolGrad = topi.nn.relu(PoolGrad) a_np = np.random.uniform(low=0.001, size=(n, ic, ih, iw)).astype(dtype) out_grad_np = np.random.uniform(low=0.001, size=bshape).astype(dtype) pool_grad_np = topi.testing.pool_grad_nchw(a_np, out_grad_np, pool_size=(kh, kw), strides=(sh, sw), padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, count_include_pad=count_include_pad) if add_relu: pool_grad_np = np.maximum(pool_grad_np, 0.) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_pool_grad(PoolGrad) a = tvm.nd.array(a_np, ctx) out_grad = tvm.nd.array(out_grad_np, ctx) pool_grad = tvm.nd.array(np.zeros(get_const_tuple(PoolGrad.shape), dtype=dtype), ctx) f = tvm.build(s, [A, OutGrad, PoolGrad], device) f(a, out_grad, pool_grad) tvm.testing.assert_allclose(pool_grad.asnumpy(), pool_grad_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_log_softmax(m, n, dtype="float32"): A = tvm.placeholder((m, n), dtype=dtype, name='A') B = topi.nn.log_softmax(A) # confirm lower works s = tvm.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = topi.testing.log_softmax_python(a_np) for device in get_all_backend(): check_device(A, B, a_np, b_np, device, "log_softmax")
def verify_log_softmax(m, n, dtype="float32"): A = te.placeholder((m, n), dtype=dtype, name='A') B = topi.nn.log_softmax(A) # confirm lower works s = te.create_schedule([B.op]) tvm.lower(s, [A, B], simple_mode=True) a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = tvm.topi.testing.log_softmax_python(a_np) for device in get_all_backend(): check_device(A, B, a_np, b_np, device, "log_softmax")
def verify_softmax_4d(shape, dtype="float32"): A = te.placeholder(shape, dtype=dtype, name='A') B = topi.nn.softmax(A, axis=1) _, c, h, w = shape a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) b_np = topi.testing.softmax_python(a_np.transpose(0, 2, 3, 1).reshape(h*w, c)) b_np = b_np.reshape(1, h, w, c).transpose(0, 3, 1, 2) for device in get_all_backend(): check_device(A, B, a_np, b_np, device, "softmax")
def verify_pool3d(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_pad=True, layout='NCDHW'): """verify function of pool3d""" id = iw = ih kd = kw = kh sd = sw = sh input_shape = (n, ic, id, ih, iw) kernel = [kd, kh, kw] stride = [sd, sh, sw] A = te.placeholder(input_shape, name='A') B = topi.nn.pool3d(A, kernel=kernel, stride=stride, padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout=layout, count_include_pad=count_include_pad) B = topi.nn.relu(B) dtype = A.dtype output_shape = [int(i) for i in B.shape] input_np = np.random.uniform(low=0.001, size=input_shape).astype(dtype) ref_np = tvm.topi.testing.pool3d_ncdhw_python(input_np, kernel, stride, padding, output_shape, pool_type, count_include_pad, ceil_mode) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s_func = tvm.topi.testing.dispatch(device, _pool_schedule) s = s_func(B, layout) a = tvm.nd.array(input_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), ref_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_broadcast_binary_ele(lhs_shape, rhs_shape, ftopi, fnumpy, lhs_min=-100, lhs_max=100, rhs_min=-100, rhs_max=100, dtype="float32"): # Build the logic and compile the function A = (tvm.var("A", dtype=dtype) if lhs_shape is None else tvm.placeholder(shape=lhs_shape, name="A", dtype=dtype)) B = (tvm.var("B", dtype=dtype) if rhs_shape is None else tvm.placeholder(shape=rhs_shape, name="B", dtype=dtype)) C = ftopi(A, B) if isinstance(A, tvm.expr.Expr) and isinstance(B, tvm.expr.Expr): assert(isinstance(C, tvm.expr.Expr)) return def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_broadcast(C) foo = tvm.build(s, [A, B, C], device, name="broadcast_binary" + "_" + ftopi.__name__) if lhs_shape is None: lhs_npy = float(np.random.uniform(low=lhs_min, high=lhs_max)) if dtype.startswith('int'): lhs_npy = int(lhs_npy) lhs_nd = lhs_npy else: lhs_npy = np.random.uniform(low=lhs_min, high=lhs_max, size=lhs_shape).astype(A.dtype) lhs_nd = tvm.nd.array(lhs_npy, ctx) if rhs_shape is None: rhs_npy = float(np.random.uniform(low=rhs_min, high=rhs_max)) if dtype.startswith('int'): rhs_npy = int(rhs_npy) rhs_nd = rhs_npy else: rhs_npy = np.random.uniform(low=rhs_min, high=rhs_max, size=rhs_shape).astype(A.dtype) rhs_nd = tvm.nd.array(rhs_npy, ctx) out_npy = fnumpy(lhs_npy, rhs_npy) out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(C.dtype), ctx) foo(lhs_nd, rhs_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4) for target in get_all_backend(): check_device(target) check_device("sdaccel")
def verify_depth_to_space(block_size, batch, in_channel, in_height, in_width, layout='NCHW', mode='DCR'): out_channel = int(in_channel / (block_size * block_size)) out_height = int(in_height * block_size) out_width = int(in_width * block_size) if layout == 'NCHW': in_shape = [batch, in_channel, in_height, in_width] out_shape = [batch, out_channel, out_height, out_width] elif layout == 'NHWC': in_shape = [batch, in_height, in_width, in_channel] out_shape = [batch, out_height, out_width, out_channel] else: raise NotImplementedError('Layout not supported {}'.format(layout)) A = te.placeholder(in_shape, name='A', dtype='float32') dtype = A.dtype a_np = np.random.uniform(size=in_shape).astype(dtype) B = topi.nn.depth_to_space(A, block_size=block_size, layout=layout, mode=mode) if layout == 'NHWC': a_np = np.transpose(a_np, axes=[0, 3, 1, 2]) b_np = tvm.topi.testing.depth_to_space_python(a_np, block_size, mode=mode) if layout == 'NHWC': a_np = np.transpose(a_np, axes=[0, 2, 3, 1]) b_np = np.transpose(b_np, axes=[0, 2, 3, 1]) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = tvm.topi.testing.get_injective_schedule(device)(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3) for device in get_all_backend(): check_device(device)
def verify_adaptive_pool(dshape, out_size, pool_type, layout="NCHW", dtype="float32"): def start_index(index, odim, idim): return int(np.floor(index * idim / odim)) def end_index(index, odim, idim): return int(np.ceil((index + 1) * idim / odim)) np_data = np.random.uniform(low=0, high=255, size=dshape).astype(dtype) n, c, h, w = dshape oh, ow = out_size oshape = (n, c) + out_size np_out = np.zeros(oshape).astype(dtype) np_op = np.mean if pool_type == "avg" else np.max for i in range(n): for j in range(c): for k in range(oh): k_start = start_index(k, oh, h) k_end = end_index(k, oh, h) k_sl = slice(k_start, k_end) for l in range(ow): l_start = start_index(l, ow, w) l_end = end_index(l, ow, w) l_sl = slice(l_start, l_end) np_out[i, j, k, l] = np_op(np_data[i, j, k_sl, l_sl]) data = te.placeholder(dshape, name="data", dtype=dtype) out = topi.nn.adaptive_pool(data, out_size, pool_type, layout) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s_func = topi.testing.dispatch(device, _adaptive_pool_schedule) s = s_func(out) a = tvm.nd.array(np_data, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(oshape), dtype=out.dtype), ctx) f = tvm.build(s, [data, out], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), np_out, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_upsampling(batch, in_channel, in_height, in_width, scale_h, scale_w, layout='NCHW', method="nearest_neighbor", in_batch_block = 0, in_channel_block = 0): if layout == 'NCHW': A = te.placeholder((batch, in_channel, in_height, in_width), name='A') dtype = A.dtype out_shape = (batch, in_channel, int(round(in_height*scale_h)), int(round(in_width*scale_w))) a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width)).astype(dtype) elif nchw_pack_layout(layout): A = te.placeholder((batch, in_channel, in_height, in_width, in_batch_block, in_channel_block), name='A') dtype = A.dtype out_shape = (batch, in_channel, int(round(in_height*scale_h)), int(round(in_width*scale_w)), in_batch_block, in_channel_block) a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width, in_batch_block, in_channel_block)).astype(dtype) elif layout == 'NHWC': A = te.placeholder((batch, in_height, in_width, in_channel), name='A') dtype = A.dtype out_shape = (batch, int(round(in_height*scale_h)), int(round(in_width*scale_w)), in_channel) a_np = np.random.uniform(size=(batch, in_height, in_width, in_channel)).astype(dtype) else: raise NotImplementedError( 'Layout not supported {} '.format(layout)) B = topi.nn.upsampling(A, scale_h, scale_w, layout=layout, method=method, align_corners=False) if method == "bilinear": out_size = (int(round(in_height*scale_h)), int(round(in_width*scale_w))) b_np = tvm.topi.testing.bilinear_resize_python(a_np, out_size, layout, "asymmetric") else: b_np = tvm.topi.testing.upsampling_python(a_np, (scale_h, scale_w), layout) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = tvm.topi.testing.get_injective_schedule(device)(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5) for device in get_all_backend(): check_device(device)
def verify_pool1d(n, ic, iw, kw, sw, padding, pool_type, ceil_mode, count_include_pad=True, layout='NCW'): input_shape = (n, ic, iw) kernel = [kw] stride = [sw] A = tvm.placeholder(input_shape, name='A') B = topi.nn.pool1d(A, kernel=kernel, stride=stride, padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout=layout, count_include_pad=count_include_pad) B = topi.nn.relu(B) dtype = A.dtype output_shape = [int(i) for i in B.shape] input_np = np.random.uniform(low=0.001, size=input_shape).astype(dtype) ref_np = topi.testing.pool1d_ncw_python(input_np, kernel, stride, padding, output_shape, pool_type, count_include_pad, ceil_mode) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_pool(B, layout) a = tvm.nd.array(input_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), ref_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_conv1d_transpose_ncw(batch, in_channel, in_size, num_filter, kernel, stride, padding): in_width = in_size A = tvm.placeholder((batch, in_channel, in_width), name='A') W = tvm.placeholder((in_channel, num_filter, kernel), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) dtype = A.dtype @memoize( "topi.tests.test_topi_conv1d_transpose.verify_conv1d_transpose_ncw") def get_ref_data(): a_np = np.random.uniform(size=a_shape).astype(dtype) w_np = np.random.uniform(size=w_shape).astype(dtype) b_np = topi.testing.conv1d_transpose_ncw_python( a_np, w_np, stride, padding, (0, )) c_np = np.maximum(b_np, 0) return a_np, w_np, b_np, c_np a_np, w_np, b_np, c_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return with tvm.target.create(device): B = topi.nn.conv1d_transpose_ncw(A, W, stride, padding, A.dtype) C = topi.nn.relu(B) s1 = topi.generic.schedule_conv1d_transpose_ncw([B]) s2 = topi.generic.schedule_conv1d_transpose_ncw([C]) a = tvm.nd.array(a_np, ctx) w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) func1 = tvm.build(s1, [A, W, B], device) func2 = tvm.build(s2, [A, W, C], device) func1(a, w, b) func2(a, w, c) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape, xpected): sparse_indices_data = np.array(sparse_indices) sparse_values_data = np.array(sparse_values) output_shape_data = np.array(output_shape) default_value_data = np.array(default_value) A = te.placeholder(shape=sparse_indices_data.shape, name="sparse_indices", dtype=str(sparse_indices_data.dtype)) B = te.placeholder(shape=sparse_values_data.shape, name="sparse_values", dtype=str(sparse_values_data.dtype)) if default_value is None: args = [A, B] D = topi.sparse_to_dense(A, output_shape, B) else: C = te.placeholder(shape=(), name="default_value", dtype=str(default_value_data.dtype)) args = [A, B, C] D = topi.sparse_to_dense(A, output_shape, B, C) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.testing.get_injective_schedule(device)(D) foo = tvm.build(s, args + [D], device, name="sparse_to_dense") sparse_indices_nd = tvm.nd.array(sparse_indices_data, ctx) sparse_values_nd = tvm.nd.array(sparse_values_data, ctx) out_nd = tvm.nd.empty(output_shape_data, ctx=ctx, dtype=B.dtype) if default_value is None: foo(sparse_indices_nd, sparse_values_nd, out_nd) else: default_value_nd = tvm.nd.array(default_value_data, ctx) foo(sparse_indices_nd, sparse_values_nd, default_value_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), np.array(xpected)) for device in get_all_backend(): check_device(device)
def test_depthwise_conv2d(): # load tophub ctx = autotvm.apply_history_best([]) for device in get_all_backend(): context = autotvm.tophub.context(device) context.__enter__() # mobilenet workloads depthwise_conv2d_with_workload_nchw(1, 32, 112, 1, 3, 1, "SAME") depthwise_conv2d_with_workload_nchw(1, 64, 112, 1, 3, 2, "SAME") depthwise_conv2d_with_workload_nchw(1, 128, 56, 1, 3, 1, "SAME") depthwise_conv2d_with_workload_nchw(1, 128, 56, 1, 3, 2, "SAME") depthwise_conv2d_with_workload_nchw(1, 256, 28, 1, 3, 1, "SAME") depthwise_conv2d_with_workload_nchw(1, 256, 28, 1, 3, 2, "SAME") depthwise_conv2d_with_workload_nchw(1, 512, 14, 1, 3, 1, "SAME") depthwise_conv2d_with_workload_nchw(1, 512, 14, 1, 3, 2, "SAME") depthwise_conv2d_with_workload_nchw(1, 1024, 7, 1, 3, 1, "SAME") # NCHW depthwise_conv2d_with_workload_nchw(1, 728, 32, 1, 3, 1, "SAME") depthwise_conv2d_with_workload_nchw(4, 256, 64, 2, 5, 2, "SAME") depthwise_conv2d_with_workload_nchw(1, 728, 32, 1, 3, 1, "VALID") depthwise_conv2d_with_workload_nchw(4, 256, 64, 2, 5, 2, "VALID") # dilation = 2 depthwise_conv2d_with_workload_nchw(1, 728, 64, 1, 3, 1, "SAME", dilation=2) # NHWC depthwise_conv2d_with_workload_nhwc(1, 728, 32, 1, 3, 1, "SAME") depthwise_conv2d_with_workload_nhwc(4, 256, 64, 2, 5, 2, "SAME") depthwise_conv2d_with_workload_nhwc(1, 728, 32, 1, 3, 1, "VALID") depthwise_conv2d_with_workload_nhwc(4, 256, 64, 2, 5, 2, "VALID") # dilation = 2 depthwise_conv2d_with_workload_nhwc(1, 728, 64, 1, 3, 1, "SAME", dilation=2)
def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding): in_height = in_width = in_size A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') W = tvm.placeholder((in_channel, num_filter, kernel, kernel), name='W') a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) dtype = A.dtype @memoize("topi.tests.test_topi_conv2d_transpose.verify_conv2d_transpose_nchw") def get_ref_data(): a_np = np.random.uniform(size=a_shape).astype(dtype) w_np = np.random.uniform(size=w_shape).astype(dtype) b_np = topi.testing.conv2d_transpose_nchw_python(a_np, w_np, stride, padding) c_np = np.maximum(b_np, 0) return a_np, w_np, b_np, c_np a_np, w_np, b_np, c_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): B = topi.nn.conv2d_transpose_nchw(A, W, [stride, stride], [padding, padding], A.dtype) C = topi.nn.relu(B) s1 = topi.generic.schedule_conv2d_transpose_nchw([B]) s2 = topi.generic.schedule_conv2d_transpose_nchw([C]) a = tvm.nd.array(a_np, ctx) w = tvm.nd.array(w_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx) func1 = tvm.build(s1, [A, W, B], device) func2 = tvm.build(s2, [A, W, C], device) func1(a, w, b) func2(a, w, c) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def test_apply( func, name, f_numpy, low, high, shape=(20, 3), dtype=tvm.float32, check_round=False, skip_name_check=False, ): m = tvm.var("m") l = tvm.var("l") A = tvm.placeholder((m, l), dtype=dtype, name="A") B = func(A) assert tuple(B.shape) == tuple(A.shape) if not skip_name_check: assert B.op.body[0].name == name a_np = np.random.uniform(low=low, high=high, size=shape).astype( A.dtype) * 10 # avoid round check too close to boundary if check_round: a_np += ((np.fmod(a_np, 1) - 0.5) < 1e-6) * 1e-5 b_np = f_numpy(a_np) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) foo = tvm.build(s, [A, B], device, name=name) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros_like(b_np), ctx) foo(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5) for device in get_all_backend(): check_device(device)
def verify_upsampling3d(batch, in_channel, in_depth, in_height, in_width, scale_d, scale_h, scale_w, layout='NCDHW', method="nearest_neighbor"): if layout == 'NCDHW': A = tvm.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A') dtype = A.dtype out_shape = (batch, in_channel, int(round(in_depth*scale_d)), int(round(in_height*scale_h)), int(round(in_width*scale_w))) a_np = np.random.uniform(size=(batch, in_channel, in_depth, in_height, in_width)).astype(dtype) elif layout == 'NDHWC': A = tvm.placeholder((batch, in_depth, in_height, in_width, in_channel), name='A') dtype = A.dtype out_shape = (batch, int(round(in_depth*scale_d)), int(round(in_height*scale_h)), int(round(in_width*scale_w)), in_channel) a_np = np.random.uniform(size=(batch, in_depth, in_height, in_width, in_channel)).astype(dtype) else: raise NotImplementedError( 'Layout not supported {} '.format(layout)) B = topi.nn.upsampling3d(A, scale_d, scale_h, scale_w, layout=layout, method=method, coordinate_transformation_mode="half_pixel") if method == "trilinear": out_size = (int(round(in_depth*scale_d)), int(round(in_height*scale_h)), int(round(in_width*scale_w))) b_np = topi.testing.trilinear_resize3d_python(a_np, out_size, layout, coordinate_transformation_mode="half_pixel") else: b_np = topi.testing.upsampling3d_python(a_np, (scale_d, scale_h, scale_w), layout) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5) for device in get_all_backend(): check_device(device)
def verify_crop_and_resize(image_shape, np_boxes, np_box_indices, np_crop_size, layout='NHWC', method="bilinear", extrapolation_value=0.0): images = tvm.placeholder(image_shape, name='images', dtype='float32') np_images = np.random.uniform(size=image_shape).astype("float32") boxes = tvm.placeholder(np_boxes.shape, name="boxes", dtype="float32") box_ind = tvm.placeholder(np_box_indices.shape, name="box_ind", dtype="int32") batch = len(np_box_indices) target_height, target_width = np_crop_size[0], np_crop_size[1] if layout == 'NHWC': channel = image_shape[3] out_shape = (batch, target_height, target_width, channel) elif layout == 'NCHW': channel = image_shape[1] out_shape = (batch, channel, target_height, target_width) else: raise NotImplementedError( 'Layout {} is not supported.'.format(layout)) out = topi.image.crop_and_resize(images, boxes, box_ind, np_crop_size, layout=layout, method=method, extrapolation_value=extrapolation_value) baseline_np = topi.testing.crop_and_resize_python(np_images, np_boxes, np_box_indices, np_crop_size, layout, method, extrapolation_value) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.testing.get_injective_schedule(device)(out) tvm_images = tvm.nd.array(np_images, ctx) tvm_boxes = tvm.nd.array(np_boxes, ctx) tvm_indices = tvm.nd.array(np_box_indices, ctx) tvm_out = tvm.nd.array(np.zeros(out_shape, dtype="float32"), ctx) f = tvm.build(s, [images, boxes, box_ind, out], device, name="crop_and_resize") f(tvm_images, tvm_boxes, tvm_indices, tvm_out) tvm.testing.assert_allclose(tvm_out.asnumpy(), baseline_np, rtol=1e-3, atol=1e-3) for device in get_all_backend(): check_device(device)
def test_isnan( low, high, shape=(20, 3), dtype="float32", check_round=False, skip_name_check=False, ): m = te.var("m") l = te.var("l") A = te.placeholder((m, l), dtype=dtype, name="A") B = topi.isnan(A) assert tuple(B.shape) == tuple(A.shape) if not skip_name_check: assert B.op.body[0].name == "isnan" a_np = np.random.uniform(low=low, high=high, size=shape).astype( A.dtype) * 10 a_np.ravel()[np.random.choice(a_np.size, int(a_np.size * 0.5), replace=False)] = np.nan # avoid round check too close to boundary if check_round: a_np += ((np.abs(np.fmod(a_np, 1)) - 0.5) < 1e-6) * 1e-5 b_np = np.isnan(a_np) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.testing.get_injective_schedule(device)(B) foo = tvm.build(s, [A, B], device, name="isnan") a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros_like(b_np), ctx) foo(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5) for target in get_all_backend(): check_device(target)
def verify_correlation_nchw(data_shape, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply): print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d, %d, %d)" % (data_shape[0], data_shape[1], data_shape[2], data_shape[3], kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply)) A = te.placeholder(data_shape, name='data1') B = te.placeholder(data_shape, name='data2') dtype = A.dtype @memoize("topi.tests.test_topi_correlation_nchw.verify_correlation_nchw") def get_ref_data(): a_np = np.random.uniform(size=data_shape).astype(dtype) b_np = np.random.uniform(size=data_shape).astype(dtype) c_np = tvm.topi.testing.correlation_nchw_python( a_np, b_np, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply) return a_np, b_np, c_np a_np, b_np, c_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) fcompute, fschedule = tvm.topi.testing.dispatch( device, _correlation_implement) with tvm.target.create(device): C = fcompute(A, B, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply) s = fschedule([C]) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(b_np, ctx) c = tvm.nd.empty(c_np.shape, dtype=dtype, ctx=ctx) func = tvm.build(s, [A, B, C], device) func(a, b, c) tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_adaptive_pool(dshape, out_size, pool_type, layout="NCHW", dtype="float32"): def start_index(index, odim, idim): return int(np.floor(index * idim / odim)) def end_index(index, odim, idim): return int(np.ceil((index + 1) * idim / odim)) np_data = np.random.uniform(low=0, high=255, size=dshape).astype(dtype) n, c, h, w = dshape oh, ow = out_size oshape = (n, c) + out_size np_out = np.zeros(oshape).astype(dtype) np_op = np.mean if pool_type == "avg" else np.max for i in range(n): for j in range(c): for k in range(oh): k_start = start_index(k, oh, h) k_end = end_index(k, oh, h) k_sl = slice(k_start, k_end) for l in range(ow): l_start = start_index(l, ow, w) l_end = end_index(l, ow, w) l_sl = slice(l_start, l_end) np_out[i, j, k, l] = np_op(np_data[i, j, k_sl, l_sl]) data = tvm.placeholder(dshape, name="data", dtype=dtype) out = topi.nn.adaptive_pool(data, out_size, pool_type, layout) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_adaptive_pool(out) a = tvm.nd.array(np_data, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(oshape), dtype=out.dtype), ctx) f = tvm.build(s, [data, out], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), np_out, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_upsampling(batch, in_channel, in_height, in_width, scale, layout='NCHW', method="NEAREST_NEIGHBOR"): if layout == 'NCHW': A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') dtype = A.dtype out_shape = (batch, in_channel, in_height*scale, in_width*scale) a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width)).astype(dtype) elif layout == 'NHWC': A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A') dtype = A.dtype out_shape = (batch, in_height*scale, in_width*scale, in_channel) a_np = np.random.uniform(size=(batch, in_height, in_width, in_channel)).astype(dtype) else: raise NotImplementedError( 'Layout not supported {} '.format(layout)) B = topi.nn.upsampling(A, scale, layout=layout, method=method) if method == "BILINEAR": out_size = (in_height*scale, in_width*scale) b_np = topi.testing.bilinear_resize_python(a_np, out_size, layout) else: b_np = topi.testing.upsampling_python(a_np, scale, layout) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5) for device in get_all_backend(): check_device(device)
def verify_tile(in_shape, reps): A = tvm.placeholder(shape=in_shape, name="A") B = topi.tile(A, reps) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_broadcast(B) foo = tvm.build(s, [A, B], device, name="tile") data_npy = np.random.uniform(size=in_shape).astype(A.dtype) out_npy = np.tile(data_npy, reps) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_transpose(in_shape, axes): A = tvm.placeholder(shape=in_shape, name="A") B = topi.transpose(A, axes) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) foo = tvm.build(s, [A, B], device, name="transpose") data_npy = np.arange(np.prod(in_shape)).reshape(in_shape).astype(A.dtype) out_npy = data_npy.transpose(axes) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_reshape(src_shape, dst_shape): A = tvm.placeholder(shape=src_shape, name="A") B = topi.reshape(A, dst_shape) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) foo = tvm.build(s, [A, B], device, name="reshape") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.reshape(data_npy, newshape=dst_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_dense(batch, in_dim, out_dim, use_bias=True): A = tvm.placeholder((batch, in_dim), name='A') B = tvm.placeholder((out_dim, in_dim), name='B') C = tvm.placeholder((out_dim,), name='C') dtype = A.dtype # use memoize to pickle the test data for next time use @memoize("topi.tests.test_topi_dense") def get_ref_data(): a_np = np.random.uniform(size=(batch, in_dim)).astype(dtype) b_np = np.random.uniform(size=(out_dim, in_dim)).astype(dtype) c_np = np.random.uniform(size=(out_dim,)).astype(dtype) if use_bias: d_np = np.maximum(np.dot(a_np, b_np.T) + c_np, 0.0) else: d_np = np.maximum(np.dot(a_np, b_np.T), 0.0) return (a_np, b_np, c_np, d_np) # get the test data a_np, b_np, c_np, d_np = get_ref_data() def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): D = topi.nn.dense(A, B, C if use_bias else None) D = topi.nn.relu(D) s = topi.generic.schedule_dense([D]) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(b_np, ctx) c = tvm.nd.array(c_np, ctx) d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx) f = tvm.build(s, [A, B, C, D], device, name="dense") f(a, b, c, d) tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_split(src_shape, indices_or_sections, axis): A = tvm.placeholder(shape=src_shape, name="A") tensor_l = topi.split(A, indices_or_sections, axis=axis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(tensor_l) foo = tvm.build(s, [A] + list(tensor_l), device, name="split") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npys = np.split(data_npy, indices_or_sections, axis=axis) data_nd = tvm.nd.array(data_npy, ctx) out_nds = [tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys] foo(*([data_nd] + out_nds)) for out_nd, out_npy in zip(out_nds, out_npys): tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_bilinear_scale(batch, in_channel, in_height, in_width, out_height, out_width, layout='NCHW', align_corners=False): if layout == 'NCHW': A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, in_channel, out_height, out_width) a_np = np.random.uniform(size=(batch, in_channel, in_height, in_width)).astype(dtype) elif layout == 'NHWC': A = tvm.placeholder((batch, in_height, in_width, in_channel), name='A', dtype='float32') dtype = A.dtype out_shape = (batch, out_height, out_width, in_channel) a_np = np.random.uniform(size=(batch, in_height, in_width, in_channel)).astype(dtype) else: raise NotImplementedError( 'Layout not supported {} '.format(layout)) B = topi.image.resize(A, (out_height, out_width), layout=layout, align_corners=align_corners) b_np = topi.testing.bilinear_resize_python(a_np, (out_height, out_width), layout, align_corners) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(out_shape, dtype=dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3) for device in get_all_backend(): check_device(device)
def verify_stack(shapes, axis): tensor_l = [] for i, shape in enumerate(shapes): tensor_l.append(tvm.placeholder(shape, name="A" + str(i))) out_tensor = topi.stack(tensor_l, axis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_broadcast(out_tensor) foo = tvm.build(s, tensor_l + [out_tensor], device, name="stack") data_npys = [np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes] out_npy = np.stack(data_npys, axis=axis) data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys] out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=out_tensor.dtype) foo(*(data_nds + [out_nd])) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_broadcast_to_ele(in_shape, out_shape, fbcast): # Build the logic and compile the function A = tvm.placeholder(shape=in_shape, name="A") B = fbcast(A, out_shape) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_broadcast(B) foo = tvm.build(s, [A, B], device, name="broadcast_to") data_npy = np.random.uniform(size=in_shape).astype(A.dtype) out_npy = np.broadcast_to(data_npy, out_shape) data_nd = tvm.nd.array(data_npy, ctx) out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for target in get_all_backend(): check_device(target) check_device("sdaccel")
def verify_relu(m, n): A = tvm.placeholder((m, n), name='A') B = topi.nn.relu(A) a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype) b_np = a_np * (a_np > 0) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_elemwise(B) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) foo = tvm.build(s, [A, B], device, name="relu") foo(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_pad=True): iw = ih kw = kh sw = sh pt, pl, pb, pr = padding layout = "NCHW" A = tvm.placeholder((n, ic, ih, iw), name='A') B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding, pool_type=pool_type, ceil_mode=ceil_mode, layout="NCHW", count_include_pad=count_include_pad) B = topi.nn.relu(B) dtype = A.dtype bshape = get_const_tuple(B.shape) ashape = get_const_tuple(A.shape) if ceil_mode: assert bshape[2] == int(math.ceil(float(ashape[2] - kh + pt + pb) / sh) + 1) assert bshape[3] == int(math.ceil(float(ashape[3] - kw + pl + pr) / sw) + 1) else: assert bshape[2] == int(math.floor(float(ashape[2] - kh + pt + pb) / sh) + 1) assert bshape[3] == int(math.floor(float(ashape[3] - kw + pl + pr) / sw) + 1) a_np = np.random.uniform(low=0.001, size=(n, ic, ih, iw)).astype(dtype) pad_np = np.zeros(shape=(n, ic, ih+pt+pb, iw+pl+pr)).astype(dtype) no_zero = (range(n), range(ic), (range(pt, ih+pt)), (range(pl, iw+pl))) pad_np[np.ix_(*no_zero)] = a_np _, oc, oh, ow = get_const_tuple(B.shape) b_np = np.zeros(shape=(n, oc, oh, ow)).astype(dtype) if pool_type == 'avg': for i in range(oh): for j in range(ow): if count_include_pad: b_np[:,:,i,j] = np.mean(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3)) else: pad_count = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw] > 0, axis=(2,3)) b_np[:,:,i,j] = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3)) / np.maximum(pad_count, 1) elif pool_type =='max': for i in range(oh): for j in range(ow): b_np[:,:,i,j] = np.max(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3)) b_np = np.maximum(b_np, 0.0) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_pool(B, layout) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) f = tvm.build(s, [A, B], device) f(a, b) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) for device in get_all_backend(): check_device(device)
def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_multiplier, filter_height, stride, padding, dilation=1): in_width = in_height filter_channel = in_channel filter_width = filter_height stride_h = stride_w = stride if dilation == 1: # here we transform the padding argument from 'str' to 'tuple' , # because we need this to match the "workload" tuple to the records in TopHub pad_h, pad_w, _, _ = get_pad_tuple(padding, (filter_height, filter_width)) padding_args = (pad_h, pad_w) else: padding_args = padding # placeholder Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input') Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') dtype = 'float32' def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): # declare DepthwiseConv2d = topi.nn.depthwise_conv2d_nchw(Input, Filter, (stride_h, stride_w), padding_args, dilation, dtype) ScaleShift = topi.nn.scale_shift_nchw(DepthwiseConv2d, Scale, Shift) Relu = topi.nn.relu(ScaleShift) # schedule s1 = topi.generic.schedule_depthwise_conv2d_nchw(DepthwiseConv2d) s2 = topi.generic.schedule_depthwise_conv2d_nchw(ScaleShift) s3 = topi.generic.schedule_depthwise_conv2d_nchw(Relu) # build the kernels f1 = tvm.build(s1, [Input, Filter, DepthwiseConv2d], device) f2 = tvm.build(s2, [Input, Filter, Scale, Shift, ScaleShift], device) f3 = tvm.build(s3, [Input, Filter, Scale, Shift, Relu], device) # Prepare pod type for test data closure input_shape = get_const_tuple(Input.shape) filter_shape = get_const_tuple(Filter.shape) scale_shape = get_const_tuple(Scale.shape) shift_shape = get_const_tuple(Shift.shape) scale_shift_shape = get_const_tuple(ScaleShift.shape) # Use memoize, pickle the test data for next time use. @memoize("topi.tests.test_topi_depthwise_conv2d.nchw") def get_ref_data(): input_np = np.random.uniform(size=input_shape).astype(dtype) filter_np = np.random.uniform(size=filter_shape).astype(dtype) dilated_filter_np = topi.testing.dilate_python(filter_np, (1, 1, dilation, dilation)) scale_np = np.random.uniform(size=scale_shape).astype(dtype) shift_np = np.random.uniform(size=shift_shape).astype(dtype) # correctness with scipy depthwise_conv2d_scipy = topi.testing.depthwise_conv2d_python_nchw( input_np, dilated_filter_np, stride, padding) scale_shift_scipy = np.zeros(shape=scale_shift_shape) for c in range(in_channel * channel_multiplier): scale_shift_scipy[:,c,:,:] = depthwise_conv2d_scipy[:,c,:,:] * scale_np[c] + shift_np[c] relu_scipy = np.maximum(scale_shift_scipy, 0) return (input_np, filter_np, scale_np, shift_np, depthwise_conv2d_scipy, scale_shift_scipy, relu_scipy) # Get the test data (input_np, filter_np, scale_np, shift_np, depthwise_conv2d_scipy, scale_shift_scipy, relu_scipy) = get_ref_data() input_tvm = tvm.nd.array(input_np, ctx) filter_tvm = tvm.nd.array(filter_np, ctx) scale_tvm = tvm.nd.array(scale_np, ctx) shift_tvm = tvm.nd.array(shift_np, ctx) depthwise_conv2d_tvm = tvm.nd.array(np.zeros(shape=get_const_tuple(DepthwiseConv2d.shape), dtype=DepthwiseConv2d.dtype), ctx) scale_shift_tvm = tvm.nd.array(np.zeros(shape=get_const_tuple(ScaleShift.shape), dtype=ScaleShift.dtype), ctx) relu_tvm = tvm.nd.array(np.zeros(shape=get_const_tuple(Relu.shape), dtype=Relu.dtype), ctx) # launch kernel 1 (depthwise_conv2d) timer_1 = f1.time_evaluator(f1.entry_name, ctx, number=1) tcost_1 = timer_1(input_tvm, filter_tvm, depthwise_conv2d_tvm).mean # launch kernel 2 (depthwise_conv2d + scale_shift) timer_2 = f2.time_evaluator(f2.entry_name, ctx, number=1) tcost_2 = timer_2(input_tvm, filter_tvm, scale_tvm, shift_tvm, scale_shift_tvm).mean # launch kernel 3 (depthwise_conv2d + scale_shift + relu) timer_3 = f3.time_evaluator(f3.entry_name, ctx, number=1) tcost_3 = timer_3(input_tvm, filter_tvm, scale_tvm, shift_tvm, relu_tvm).mean tvm.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5) tvm.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5) tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5) for device in get_all_backend(): with autotvm.tophub.context(device): # load tophub pre-tuned parameters check_device(device)
def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32"): # Build the logic and compile the function A = tvm.placeholder(shape=in_shape, name="A", dtype=dtype) A1 = topi.sqrt(topi.exp(A)) out_dtype = dtype if type == "sum": B = topi.sum(A1, axis=axis, keepdims=keepdims) elif type == "max": B = topi.max(A1, axis=axis, keepdims=keepdims) elif type == "min": B = topi.min(A1, axis=axis, keepdims=keepdims) elif type == "argmax": B = topi.argmax(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" elif type == "argmin": B = topi.argmin(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" else: raise NotImplementedError def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_reduce(B) foo = tvm.build(s, [A, B], device, name=type) # Test in_npy = np.random.uniform(size=in_shape).astype(dtype) in_npy_map = np.sqrt(np.exp(in_npy)).astype(dtype) if type == "sum": out_npy = in_npy_map.sum(axis=axis, keepdims=keepdims) elif type == "max": out_npy = in_npy_map.max(axis=axis, keepdims=keepdims) elif type == "min": out_npy = in_npy_map.min(axis=axis, keepdims=keepdims) elif type == "argmax": out_npy = _my_npy_argmax(in_npy_map, axis=axis, keepdims=keepdims) elif type == "argmin": out_npy = _my_npy_argmin(in_npy_map, axis=axis, keepdims=keepdims) else: raise NotImplementedError data_tvm = tvm.nd.array(in_npy, ctx=ctx) out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=ctx, dtype=out_dtype) for _ in range(1): foo(data_tvm, out_tvm) if type == "argmax" or type == "argmin": out_tvm_indices = out_tvm.asnumpy() if keepdims: out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis) if axis is None: out_tvm_val = in_npy_map.ravel()[out_tvm_indices] else: other_indices = tuple(np.indices(in_shape[0:axis] + in_shape[(axis+1):])) sel_indices = other_indices[0:axis] + (out_tvm_indices,) + other_indices[axis:] out_tvm_val = in_npy_map[sel_indices] if type == "argmax": tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1E-3, 1E-3) elif type == "argmin": tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3) else: tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3) for device in get_all_backend(): check_device(device)