def verify_squeeze(src_shape, axis): A = tvm.placeholder(shape=src_shape, name="A") B = topi.squeeze(A, axis=axis) s = topi.cuda.schedule_injective(B) def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) foo = tvm.build(s, [A, B], device, name="squeeze") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.squeeze(data_npy, axis=axis) data_nd = tvm.nd.array(data_npy, ctx) if out_npy.shape == (): out_nd_shape = (1, ) else: out_nd_shape = out_npy.shape out_nd = tvm.nd.empty(out_nd_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) np.testing.assert_allclose(out_nd.asnumpy(), out_npy) check_device("cuda") check_device("opencl") check_device("metal")
def verify_squeeze(src_shape, axis): A = tvm.placeholder(shape=src_shape, name="A") B = topi.squeeze(A, axis=axis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) foo = tvm.build(s, [A, B], device, name="squeeze") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.squeeze(data_npy, axis=axis) data_nd = tvm.nd.array(data_npy, ctx) if out_npy.shape == (): out_nd_shape = (1,) else: out_nd_shape = out_npy.shape out_nd = tvm.nd.empty(out_nd_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) np.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm", "vulkan"]: check_device(device)
def Mean(device="llvm", lib_path="./", ndim=None, dtype=None, axis=None, keep_dims=None): ''' mean Args: device: lib_path: ndim: dtype: axis: keepDims: Returns: ''' if axis[-1] >= ndim: return shape = [tvm.var("n" + str(i)) for i in range(ndim)] axis_str = "" for dim in axis: axis_str += str(dim) opname = "Mean_ndim%d_%s_axis%s_%s" % (ndim, dtype, axis_str, "keepDims" if keep_dims else "notkeepDims") print(opname) # define compute in_tensor = tvm.placeholder(shape, dtype=dtype, name='in_tensor') c_shape = shape[:] reduced_num = 1 for dim in axis: c_shape[dim] = 1 reduced_num *= shape[dim] def _ComputeSum(*b_idx): reduce_axis = [tvm.reduce_axis((0, shape[dim])) for dim in axis] a_idx = list(b_idx) for i, dim in enumerate(axis): a_idx[dim] = reduce_axis[i] a_idx = tuple(a_idx) return tvm.sum(in_tensor[a_idx], axis=reduce_axis) out_tensor = tvm.compute(c_shape, _ComputeSum) out_tensor = tvm.compute(c_shape, lambda *i: out_tensor(*i) / reduced_num) if not keep_dims: out_tensor = topi.squeeze(out_tensor, axis) # define schedule & generate lib tensor_list = [in_tensor, out_tensor] s = tvm.create_schedule(out_tensor.op) Genlib(s, tensor_list, device, opname, lib_path)
def test_squeeze(): verify_squeeze((1, 2, 3, 4), 0) verify_squeeze((1, 2, 1, 4), None) verify_squeeze((1, 1, 1, 4), (1, 2)) verify_squeeze((1, 1, 1, 1), None) # a special case to trigger inline let expression A = tvm.placeholder((2, ), 'float32', 'A') E = topi.squeeze(A) C = tvm.compute((1, ), lambda i: E[(2 * A[0] - 1).astype('int32')]) for device in ['cuda', 'opencl']: ctx = tvm.context(device, 0) if ctx.exist: with tvm.target.create(device): s = topi.generic.schedule_injective(C) func = tvm.build(s, [A, C]) a = tvm.nd.array(np.array((1, 2)).astype('float32'), ctx=ctx) c = tvm.nd.empty((1, ), dtype='float32', ctx=ctx) func(a, c) assert c.asnumpy()[0] == 2
def test_squeeze(): verify_squeeze((1, 2, 3, 4), 0) verify_squeeze((1, 2, 1, 4), None) verify_squeeze((1, 1, 1, 4), (1, 2)) verify_squeeze((1, 1, 1, 1), None) # a special case to trigger inline let expression A = tvm.placeholder((2,), 'float32', 'A') E = topi.squeeze(A) C = tvm.compute((1,), lambda i: E[(2 * A[0] - 1).astype('int32')]) for device in ['cuda', 'opencl']: ctx = tvm.context(device, 0) if ctx.exist: with tvm.target.create(device): s = topi.generic.schedule_injective(C) func = tvm.build(s, [A, C]) a = tvm.nd.array(np.array((1, 2)).astype('float32'), ctx=ctx) c = tvm.nd.empty((1,), dtype='float32', ctx=ctx) func(a, c) assert c.asnumpy()[0] == 2
def verify_squeeze(src_shape, axis): A = te.placeholder(shape=src_shape, name="A") B = topi.squeeze(A, axis=axis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.testing.get_injective_schedule(device)(B) foo = tvm.build(s, [A, B], device, name="squeeze") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.squeeze(data_npy, axis=axis) data_nd = tvm.nd.array(data_npy, ctx) out_nd_shape = out_npy.shape out_nd = tvm.nd.empty(out_nd_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def verify_squeeze(src_shape, axis): A = tvm.placeholder(shape=src_shape, name="A") B = topi.squeeze(A, axis=axis) def check_device(device): ctx = tvm.context(device, 0) if not ctx.exist: print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): s = topi.generic.schedule_injective(B) foo = tvm.build(s, [A, B], device, name="squeeze") data_npy = np.random.normal(size=src_shape).astype(A.dtype) out_npy = np.squeeze(data_npy, axis=axis) data_nd = tvm.nd.array(data_npy, ctx) out_nd_shape = out_npy.shape out_nd = tvm.nd.empty(out_nd_shape, ctx=ctx, dtype=B.dtype) foo(data_nd, out_nd) tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy) for device in get_all_backend(): check_device(device)
def compute_squeeze(attrs, inputs, out_info): """Compute definition of reshape""" axis = attrs.get_int_tuple("axis") axis = tuple(axis) if axis else None return topi.squeeze(inputs[0], axis)