def test_add_vtcm(hexagon_session: Session): dtype = "int8" A = tvm.te.placeholder((2, ), dtype=dtype) B = tvm.te.placeholder((1, ), dtype=dtype) C = tvm.te.compute(A.shape, lambda i: A[i] + B[0], name="C") sched = tvm.te.create_schedule(C.op) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build(sched, [A, B, C], tvm.target.Target(target_hexagon, host=target_hexagon), name="add") mod = hexagon_session.load_module(func) A_data = tvm.nd.empty(A.shape, A.dtype, hexagon_session.device, "global.vtcm") A_data.copyfrom(np.array([2, 3])) B_data = tvm.nd.empty(B.shape, B.dtype, hexagon_session.device, "global.vtcm") B_data.copyfrom(np.array([4])) C_data = tvm.nd.empty(C.shape, C.dtype, hexagon_session.device, "global.vtcm") C_data.copyfrom(np.array([0, 0])) mod["add"](A_data, B_data, C_data) result = C_data.numpy() assert (result == np.array([6, 7])).all()
def verify(hexagon_session: Session, schedule, x_tensor, y_tensor, z_tensor, size): """Verify correctness with reference from numpy""" print(tvm.lower(schedule, [x_tensor, y_tensor, z_tensor])) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build( schedule, [x_tensor, y_tensor, z_tensor], tvm.target.Target(target_hexagon, host=target_hexagon), name="dmacpy", ) mod = hexagon_session.load_module(func) x_array = tvm.nd.array( np.random.randint(low=-128, high=127, size=size, dtype=x_tensor.dtype), device=hexagon_session.device, ) y_array = tvm.nd.array( np.random.randint(low=-128, high=127, size=size, dtype=y_tensor.dtype), device=hexagon_session.device, ) z_array = tvm.nd.array( np.random.randint(low=-128, high=127, size=size, dtype=z_tensor.dtype), device=hexagon_session.device, ) mod["dmacpy"](x_array, y_array, z_array) ref = x_array.numpy() + y_array.numpy() np.testing.assert_equal(z_array.numpy(), ref)
def test_add(hexagon_session: Session): dtype = "int8" A = tvm.te.placeholder((2, ), dtype=dtype) B = tvm.te.placeholder((1, ), dtype=dtype) C = tvm.te.compute(A.shape, lambda i: A[i] + B[0], name="C") sched = tvm.te.create_schedule(C.op) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build(sched, [A, B, C], tvm.target.Target(target_hexagon, host=target_hexagon), name="add") mod = hexagon_session.load_module(func) A_data = tvm.nd.array(np.array([2, 3], dtype=dtype), device=hexagon_session.device) assert (A_data.numpy() == np.array([2, 3])).all() B_data = tvm.nd.array(np.array([4], dtype=dtype), device=hexagon_session.device) assert (B_data.numpy() == np.array([4])).all() C_data = tvm.nd.array(np.array([0, 0], dtype=dtype), device=hexagon_session.device) assert (C_data.numpy() == np.array([0, 0])).all() mod["add"](A_data, B_data, C_data) assert (C_data.numpy() == np.array([6, 7])).all()
def test_add_vtcm(hexagon_session: Session): """Test add on VTCM""" dtype = "int8" placeholder_a = tvm.te.placeholder((2, ), dtype=dtype) placeholder_b = tvm.te.placeholder((1, ), dtype=dtype) compute_c = tvm.te.compute(placeholder_a.shape, lambda i: placeholder_a[i] + placeholder_b[0], name="C") sched = tvm.te.create_schedule(compute_c.op) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build( sched, [placeholder_a, placeholder_b, compute_c], tvm.target.Target(target_hexagon, host=target_hexagon), name="add", ) mod = hexagon_session.load_module(func) a_data = tvm.nd.empty(placeholder_a.shape, placeholder_a.dtype, hexagon_session.device, "global.vtcm") a_data.copyfrom(np.array([2, 3])) b_data = tvm.nd.empty(placeholder_b.shape, placeholder_b.dtype, hexagon_session.device, "global.vtcm") b_data.copyfrom(np.array([4])) c_data = tvm.nd.empty(compute_c.shape, compute_c.dtype, hexagon_session.device, "global.vtcm") c_data.copyfrom(np.array([0, 0])) mod["add"](a_data, b_data, c_data) result = c_data.numpy() assert (result == np.array([6, 7])).all()
def test_add(hexagon_session: Session): """Test simple add""" dtype = "int8" placeholder_a = tvm.te.placeholder((2, ), dtype=dtype) placeholder_b = tvm.te.placeholder((1, ), dtype=dtype) compute_c = tvm.te.compute(placeholder_a.shape, lambda i: placeholder_a[i] + placeholder_b[0], name="C") sched = tvm.te.create_schedule(compute_c.op) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build( sched, [placeholder_a, placeholder_b, compute_c], tvm.target.Target(target_hexagon, host=target_hexagon), name="add", ) mod = hexagon_session.load_module(func) a_data = tvm.nd.array(np.array([2, 3], dtype=dtype), device=hexagon_session.device) assert (a_data.numpy() == np.array([2, 3])).all() b_data = tvm.nd.array(np.array([4], dtype=dtype), device=hexagon_session.device) assert (b_data.numpy() == np.array([4])).all() c_data = tvm.nd.array(np.array([0, 0], dtype=dtype), device=hexagon_session.device) assert (c_data.numpy() == np.array([0, 0])).all() mod["add"](a_data, b_data, c_data) assert (c_data.numpy() == np.array([6, 7])).all()
def test_adaptive_pool(self, hexagon_session: Session, dshape, out_size, pool_type, layout): dtype = "float32" np_data = np.random.uniform(low=0, high=255, size=dshape).astype(dtype) np_out = tvm.topi.testing.adaptive_pool(np_data, out_size, pool_type, layout) oshape = np_out.shape data = te.placeholder(dshape, name="data", dtype=dtype) if len(out_size) == 2: out = topi.nn.adaptive_pool(data, out_size, pool_type, layout) else: assert len(out_size) == 3 out = topi.nn.adaptive_pool3d(data, out_size, pool_type, layout) target_hexagon = tvm.target.hexagon("v68") with tvm.target.Target(target_hexagon): fschedule = topi.hexagon.schedule_adaptive_pool s = fschedule(out) func = tvm.build( s, [data, out], tvm.target.Target(target_hexagon, host=target_hexagon), name="adaptive-pool", ) mod = hexagon_session.load_module(func) dev = hexagon_session.device a = tvm.nd.array(np_data, dev) b = tvm.nd.array(np.zeros(get_const_tuple(oshape), dtype=out.dtype), dev) mod["adaptive-pool"](a, b) tvm.testing.assert_allclose(b.numpy(), np_out, rtol=4e-5, atol=1e-6)
def verify(hexagon_session: Session, s, x, y, z, size): print(tvm.lower(s, [x, y, z])) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build(s, [x, y, z], tvm.target.Target(target_hexagon, host=target_hexagon), name="dmacpy") mod = hexagon_session.load_module(func) xt = tvm.nd.array( np.random.randint(low=-128, high=127, size=size, dtype=x.dtype), device=hexagon_session.device, ) yt = tvm.nd.array( np.random.randint(low=-128, high=127, size=size, dtype=y.dtype), device=hexagon_session.device, ) zt = tvm.nd.array( np.random.randint(low=-128, high=127, size=size, dtype=z.dtype), device=hexagon_session.device, ) mod["dmacpy"](xt, yt, zt) ref = xt.numpy() + yt.numpy() np.testing.assert_equal(zt.numpy(), ref)
def test_elemwise_sum_parallel(hexagon_session: Session): target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build( ElemwiseSumIRModule, target=tvm.target.Target(target_hexagon, host=target_hexagon) ) mod = hexagon_session.load_module(func) (a, b, c, n) = generate_add_test_data(hexagon_session) mod["elemwise_sum_parallel"](a, b, c, n) tvm.testing.assert_allclose(c.numpy(), a.numpy() + b.numpy())
def test_speedup(hexagon_session: Session, capsys): target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build( ElemwiseSumIRModule, target=tvm.target.Target(target_hexagon, host=target_hexagon) ) mod = hexagon_session.load_module(func) args = generate_add_test_data(hexagon_session) parallel_mean = benchmark_func(mod, "elemwise_sum_parallel", args, hexagon_session) serial_mean = benchmark_func(mod, "elemwise_sum_serial", args, hexagon_session) with capsys.disabled(): print("... speedup of {:.2f}".format(serial_mean / parallel_mean), end=" ")
def test_conv2d_nhwc( self, hexagon_session: Session, ref_data, batch, in_channel, in_size, num_filter, kernel, dtype, stride, padding, dilation, ): target_hexagon = tvm.target.hexagon("v68") a_np, w_np, b_np = ref_data A = te.placeholder(a_np.shape, name="A", dtype=dtype) W = te.placeholder(w_np.shape, name="W", dtype=dtype) with tvm.target.Target(target_hexagon): fcompute = topi.nn.conv2d_nhwc fschedule = topi.hexagon.schedule_conv2d_nhwc B = fcompute(A, W, stride, padding, dilation, dtype) s = fschedule([B]) func_name = "conv2d_{}_{}_{}_{}_{}_{}_{}_{}_{}".format( dtype, batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, ) func = tvm.build(s, [A, W, B], tvm.target.Target(target_hexagon, host=target_hexagon), name=func_name) mod = hexagon_session.load_module(func) dev = hexagon_session.device a = tvm.nd.array(a_np, dev) w = tvm.nd.array(w_np, dev) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev) mod[func_name](a, w, b) tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5)
def test_dense( hexagon_session: Session, batch_size, in_dim, out_dim, use_bias, in_dtype, out_dtype, dense_ref_data, ): if in_dtype == "float16": pytest.xfail("float16 is not supported.") if "int" in in_dtype: tol = {"atol": 0, "rtol": 0} elif in_dtype == "float32": tol = {"rtol": 1e-5, "atol": 1e-5} A = te.placeholder((batch_size, in_dim), name="A", dtype=in_dtype) B = te.placeholder((out_dim, in_dim), name="B", dtype=in_dtype) C = te.placeholder((out_dim, ), name="C", dtype=out_dtype) a_np, b_np, c_np, d_np = dense_ref_data fcompute = topi.nn.dense fschedule = topi.hexagon.schedule_dense target_hexagon = tvm.target.hexagon("v68") with tvm.target.Target(target_hexagon): D = fcompute(A, B, C if use_bias else None, out_dtype) D = topi.nn.relu(D) s = fschedule([D]) func = tvm.build(s, [A, B, C, D], tvm.target.Target(target_hexagon, host=target_hexagon), name="dense") mod = hexagon_session.load_module(func) dev = hexagon_session.device a = tvm.nd.array(a_np, dev) b = tvm.nd.array(b_np, dev) c = tvm.nd.array(c_np, dev) d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=out_dtype), dev) mod["dense"](a, b, c, d) tvm.testing.assert_allclose(d.numpy(), d_np, **tol)
def test_batch_matmul_int8(self, hexagon_session: Session, x_batch, y_batch, M, N, K): dtype = "int8" out_dtype = "int8" assert x_batch == y_batch or x_batch == 1 or y_batch == 1 x = te.placeholder((x_batch, M, K), name="x", dtype=dtype) y = te.placeholder((y_batch, N, K), name="y", dtype=dtype) def get_ref_data(): a_np = np.random.randint(low=-128, high=127, size=(x_batch, M, K)).astype(dtype) b_np = np.random.randint(low=-128, high=127, size=(y_batch, N, K)).astype(dtype) c_np = tvm.topi.testing.batch_matmul(a_np, b_np, out_dtype=out_dtype) return (a_np, b_np, c_np) # get the test data a_np, b_np, c_np = get_ref_data() target_hexagon = tvm.target.hexagon("v68") with tvm.target.Target(target_hexagon): fcompute = topi.nn.batch_matmul fschedule = topi.hexagon.schedule_batch_matmul out = fcompute(x, y) s = fschedule([out]) func = tvm.build( s, [x, y, out], tvm.target.Target(target_hexagon, host=target_hexagon), name="batch_matmul_int8", ) mod = hexagon_session.load_module(func) dev = hexagon_session.device a = tvm.nd.array(a_np, dev) b = tvm.nd.array(b_np, dev) c = tvm.nd.array(np.zeros(get_const_tuple(out.shape), dtype=out_dtype), dev) mod["batch_matmul_int8"](a, b, c) tvm.testing.assert_allclose(c.numpy(), c_np, rtol=1e-5)
def test_batch_matmul(self, hexagon_session: Session, x_batch, y_batch, M, N, K, dtype): if dtype == "float16": pytest.xfail("float16 is not supported.") x = te.placeholder((x_batch, M, K), name="x") y = te.placeholder((y_batch, N, K), name="y") def get_ref_data(): a_np = np.random.uniform(size=(x_batch, M, K)).astype(dtype) b_np = np.random.uniform(size=(y_batch, N, K)).astype(dtype) c_np = tvm.topi.testing.batch_matmul(a_np, b_np) return (a_np, b_np, c_np) # get the test data a_np, b_np, c_np = get_ref_data() target_hexagon = tvm.target.hexagon("v68") with tvm.target.Target(target_hexagon): fcompute = topi.nn.batch_matmul fschedule = topi.hexagon.schedule_batch_matmul out = fcompute(x, y) s = fschedule([out]) out_shape = out.shape func = tvm.build( s, [x, y, out], tvm.target.Target(target_hexagon, host=target_hexagon), name="batch_matmul", ) mod = hexagon_session.load_module(func) dev = hexagon_session.device a = tvm.nd.array(a_np, dev) b = tvm.nd.array(b_np, dev) c = tvm.nd.array(np.zeros(get_const_tuple(out_shape), dtype=dtype), dev) mod["batch_matmul"](a, b, c) tvm.testing.assert_allclose(c.numpy(), c_np, rtol=1e-5)
def test_softmax(hexagon_session: Session, shape, dtype, softmax_operation): if dtype == "float16": pytest.xfail("float16 is not supported.") A = te.placeholder(shape, dtype=dtype, name="A") topi_op = configs[softmax_operation]["topi"] B = topi_op(A, axis=1) def get_ref_data(shape): ref_func = tvm.topi.testing.softmax_python a_np = np.random.uniform(size=shape).astype(dtype) if len(shape) == 2: b_np = ref_func(a_np) elif len(shape) == 4: _, c, h, w = a_np.shape a_np_2d = a_np.transpose(0, 2, 3, 1).reshape(h * w, c) b_np_2d = tvm.topi.testing.softmax_python(a_np_2d) b_np = b_np_2d.reshape(1, h, w, c).transpose(0, 3, 1, 2) return a_np, b_np # get the test data a_np, b_np = get_ref_data(shape) target_hexagon = tvm.target.hexagon("v68") with tvm.target.Target(target_hexagon): fschedule = topi.hexagon.schedule_softmax s = fschedule(B) func = tvm.build(s, [A, B], tvm.target.Target(target_hexagon, host=target_hexagon), name="softmax") mod = hexagon_session.load_module(func) dev = hexagon_session.device a = tvm.nd.array(a_np, dev) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev) mod["softmax"](a, b) tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5)
def test_conv2d_nchw( self, hexagon_session: Session, batch, in_channel, in_size, num_filter, kernel, stride, padding, dtype, ref_data, dilation, add_bias, apply_relu, ): target_hexagon = tvm.target.hexagon("v68") pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple( padding, (kernel, kernel)) padding_sum = pad_top + pad_left + pad_bottom + pad_right a_np, w_np, b_np, c_np = ref_data A = te.placeholder(a_np.shape, name="A", dtype=dtype) W = te.placeholder(w_np.shape, name="W", dtype=dtype) bias = te.placeholder(b_np.shape, name="bias", dtype=dtype) if "int" in dtype: tol = {"atol": 0, "rtol": 0} elif dtype == "float32": tol = {"rtol": 1e-4, "atol": 2e-4} elif dtype == "float16": # A summation in float16 with a single accumulator very # quickly runs into large rounding errors. At some point, # this tolerance should be schedule-dependent for to avoid # false negatives. num_values_summed = in_channel * kernel * kernel gap_size = np.nextafter(c_np.max(), np.inf, dtype=c_np.dtype) - c_np.max() tol = {"rtol": 1e-3, "atol": num_values_summed * gap_size / 2} with tvm.target.Target(target_hexagon): fcompute = topi.nn.conv2d_nchw fschedule = topi.hexagon.schedule_conv2d_nchw C = fcompute(A, W, (stride, stride), padding, (dilation, dilation), dtype) if add_bias: C = topi.add(C, bias) if apply_relu: C = topi.nn.relu(C) s = fschedule([C]) func_name = "conv2d_{}_{}_{}_{}_{}_{}_{}_{}_{}".format( dtype, batch, in_channel, in_size, num_filter, kernel, stride, padding_sum, dilation, ) func = tvm.build( s, [A, W, bias, C], tvm.target.Target(target_hexagon, host=target_hexagon), name=func_name, ) mod = hexagon_session.load_module(func) dev = hexagon_session.device a = tvm.nd.array(a_np, dev) w = tvm.nd.array(w_np, dev) b = tvm.nd.array(b_np, dev) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), dev) mod[func_name](a, w, b, c) tvm.testing.assert_allclose(c.numpy(), c_np, **tol)
def test_conv2d( self, hexagon_session: Session, batch, in_channel, in_size, num_filter, stride, padding, output_padding, random_seed, ): target_hexagon = tvm.target.hexagon("v68") in_height, in_width = in_size kernel_height, kernel_width = (1, 1) stride_height, stride_width = stride pad_top, pad_left, pad_bottom, pad_right = padding A = te.placeholder((batch, in_channel, in_height, in_width), name="A") W = te.placeholder( (in_channel, num_filter, kernel_height, kernel_width), name="W") a_shape = get_const_tuple(A.shape) w_shape = get_const_tuple(W.shape) dtype = A.dtype def get_ref_data(): np.random.seed(random_seed) a_np = np.random.uniform(size=a_shape).astype(dtype) w_np = np.random.uniform(size=w_shape).astype(dtype) b_np = tvm.topi.testing.conv2d_transpose_nchw_python( a_np, w_np, stride, padding, output_padding) c_np = np.maximum(b_np, 0) return a_np, w_np, b_np, c_np a_np, w_np, b_np, c_np = get_ref_data() fcompute_args = ( A, W, [stride_height, stride_width], [pad_top, pad_left, pad_bottom, pad_right], A.dtype, output_padding, ) with tvm.target.Target(target_hexagon): fcompute = topi.nn.conv2d_transpose_nchw fschedule = topi.hexagon.schedule_conv2d_transpose_nchw B = fcompute(*fcompute_args) C = topi.nn.relu(B) s1 = fschedule([B]) s2 = fschedule([C]) dev = hexagon_session.device a = tvm.nd.array(a_np, dev) w = tvm.nd.array(w_np, dev) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev) c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), dev) func1 = tvm.build( s1, [A, W, B], tvm.target.Target(target_hexagon, host=target_hexagon)) func2 = tvm.build( s2, [A, W, C], tvm.target.Target(target_hexagon, host=target_hexagon)) mod1 = hexagon_session.load_module(func1) mod2 = hexagon_session.load_module(func2) mod1(a, w, b) mod2(a, w, c) tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5) tvm.testing.assert_allclose(c.numpy(), c_np, rtol=1e-5)
def test_maxpool2d_nhwc( self, N, H, W, C, DTYPE, KERNEL, STRIDE, DILATION, PADDING, IO_TENSOR_MEM_SCOPE, hexagon_session: Session, ): keys_dict = { "basic_kernel": "max_pool2d", "sched_type": 1, "input_shape_4d": [N, H, W, C], "block_shape": [8, 8, 32], "DTYPE": DTYPE, "KERNEL": KERNEL, "STRIDE": STRIDE, "DILATION": DILATION, "PADDING": PADDING, "IO_TENSOR_MEM_SCOPE": IO_TENSOR_MEM_SCOPE, } desc = bu.get_benchmark_decription(keys_dict) # Create the host-side directory for this benchmark run's files / logs... host_files_dir_name = bu.get_benchmark_id(keys_dict) host_files_dir_path = os.path.join(self.working_dir, host_files_dir_name) os.mkdir(host_files_dir_path) keys_dict["host_files_dir_path"] = host_files_dir_path log_file_path = os.path.join(host_files_dir_path, "out.txt") with open(log_file_path, "w") as log_file: print(f"CONFIGURATION: {desc}") log_file.write(f"CONFIGURATION: {desc}\n") try: input_tensor_shape_4d = [N, H, W, C] input_tensor_shape_7d = _int8_nhwc_8h8w32c_shape(N, H, W, C) data = te.placeholder(tuple(input_tensor_shape_4d), dtype=DTYPE) output = topi.nn.pool2d( data, KERNEL, STRIDE, DILATION, PADDING, "max", layout="NHWC" ) primfunc = te.create_prim_func([data, output]) sch = tir.Schedule(primfunc, debug_mask="all") sch.transform_layout( block="tensor", buffer="placeholder", index_map=_int8_nhwc_8h8w32c_map ) target_hexagon = tvm.target.hexagon("v69", link_params=True) # func = tvm.build(sch.mod, target=tvm.target.Target(target_hexagon, host=target_hexagon)) built_module = tvm.build( sch.mod, target=tvm.target.Target(target_hexagon, host=target_hexagon) ) # Save a local copy of the Hexagon object code (in the form of a .so file) # to allow post-mortem inspection. host_dso_binary_path = os.path.join(host_files_dir_path, "test_binary.so") built_module.save(host_dso_binary_path) print(f"SAVED BINARY TO HOST PATH: {host_dso_binary_path}") hexagon_mod = hexagon_session.load_module(built_module) # Generate the input tensor's data. # Note that we'll eventually need it in two different layouts: # (1) NHWC as an argument to testing.poolnd_python. # (2) NHWC_8h8w32c for as an argument to our Hexagon primfunc. # a_numpy_4d = np.random.randint(low=-128, high=127, size=input_tensor_shape_4d, dtype=np.int8) a_numpy_4d = _create_test_input(input_tensor_shape_4d, DTYPE) ref_output_4d = testing.poolnd_python( a_numpy_4d.astype("int32"), KERNEL, STRIDE, DILATION, PADDING[0:2], PADDING[2:], pool_type="max", dtype="int32", layout="NHWC", ).astype(DTYPE) output_tensor_shape_4d = ref_output_4d.shape a_numpy_7d = _int8_nhwc_8h8w32c_xform_immediate(a_numpy_4d) a_hexagon_7d = allocate_hexagon_array( hexagon_session.device, tensor_shape=input_tensor_shape_7d, axis_separators=[4], dtype=DTYPE, mem_scope=IO_TENSOR_MEM_SCOPE, ) c_hexagon_4d = allocate_hexagon_array( hexagon_session.device, tensor_shape=output_tensor_shape_4d, axis_separators=[], dtype=DTYPE, mem_scope=IO_TENSOR_MEM_SCOPE, ) a_hexagon_7d.copyfrom(a_numpy_7d) if DTYPE == "int8": rel_tolerance = 0 abs_tolerance = 0 else: assert False, f"TODO: decide acceptable tolerances for DTYPE {DTYPE}" # hexagon_mod(a_hexagon_7d, c_hexagon_4d) # tvm.testing.assert_allclose(ref_output_4d, c_hexagon_4d.numpy(), rtol=rel_tolerance, atol=abs_tolerance) timer = hexagon_mod.time_evaluator( "main", hexagon_session.device, number=10, repeat=1 ) timing_result = timer(a_hexagon_7d, c_hexagon_4d) try: tvm.testing.assert_allclose( ref_output_4d, c_hexagon_4d.numpy(), rtol=rel_tolerance, atol=abs_tolerance ) except AssertionError as e: raise bu.NumericalAccuracyException(str(e)) except bu.NumericalAccuracyException as e: print() print(f"FAIL: Numerical accuracy error. See log file.") log_file.write("\n") log_file.write(f"FAIL: {e}\n") self.benchmark_table.record_fail( **keys_dict, comments=f"Numerical accuracy error. See log file." ) except bu.UnsupportedException as e: print() print(f"SKIP: {e}") log_file.write("\n") log_file.write(f"SKIP: {e}\n") self.benchmark_table.record_skip( **keys_dict, comments=f"Unsupported configuration: {e}" ) self.benchmark_table.record_success(timing_result, **keys_dict)
def test_reduce_map(hexagon_session: Session, ref_data, in_shape, axis, keepdims, reduce_type, dtype): in_npy, in_npy_map, out_npy = ref_data # Build the logic and compile the function A = te.placeholder(shape=in_shape, name="A", dtype=dtype) A1 = topi.sqrt(topi.exp(A)) out_dtype = dtype if reduce_type == "sum": B = topi.sum(A1, axis=axis, keepdims=keepdims) elif reduce_type == "all": B = topi.all(A, axis=axis, keepdims=keepdims) elif reduce_type == "any": B = topi.any(A, axis=axis, keepdims=keepdims) elif reduce_type == "max": B = topi.max(A1, axis=axis, keepdims=keepdims) elif reduce_type == "min": B = topi.min(A1, axis=axis, keepdims=keepdims) elif reduce_type == "argmax": B = topi.argmax(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" elif reduce_type == "argmin": B = topi.argmin(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" else: raise NotImplementedError target_hexagon = tvm.target.hexagon("v68") with tvm.target.Target(target_hexagon): fschedule = topi.hexagon.schedule_reduce s = fschedule(B) func = tvm.build(s, [A, B], tvm.target.Target(target_hexagon, host=target_hexagon), name=reduce_type) mod = hexagon_session.load_module(func) dev = hexagon_session.device data_tvm = tvm.nd.array(in_npy, device=dev) out_tvm = tvm.nd.empty(shape=out_npy.shape, device=dev, dtype=out_dtype) mod[reduce_type](data_tvm, out_tvm) if reduce_type == "argmax" or reduce_type == "argmin": out_tvm_indices = out_tvm.numpy() if keepdims: out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis) if axis is None: out_tvm_val = in_npy_map.ravel()[out_tvm_indices] else: other_indices = tuple( np.indices(in_shape[0:axis] + in_shape[(axis + 1):])) sel_indices = other_indices[0:axis] + ( out_tvm_indices, ) + other_indices[axis:] out_tvm_val = in_npy_map[sel_indices] if reduce_type == "argmax": tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1e-3, 1e-3) elif reduce_type == "argmin": tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1e-3, 1e-3) else: tvm.testing.assert_allclose(out_tvm.numpy(), out_npy, 1e-3, 1e-3)
def test_avg_pool2d_slice( self, stride, kernel, dtype, dilation, padding, count_include_pad, input_layout, output_layout, output_shape, input_shape, input_shape_padded, input_np, input_np_padded, transformed_input_np_padded, transformed_expected_output_np, expected_output_np, hexagon_session: Session, ): if hexagon_session._launcher._serial_number != "simulator": pytest.skip(msg="Due to https://github.com/apache/tvm/issues/11928") target_hexagon = tvm.target.hexagon("v69") A = te.placeholder(input_shape_padded, name="A", dtype=dtype) M = sl.avg_pool2d_compute(A, output_shape, kernel, stride, dilation) # tir schedule tir_schedule = sl.avg_pool2d_STIR_schedule(M, A, output_layout, input_layout) sch = tir_schedule.mod input_axis_separator = [4] if output_layout == "nhwc-8h2w32c2w-2d": output_axis_separator = [4] elif output_layout == "n11c-1024c-2d": output_axis_separator = [4] else: raise RuntimeError(f"Unexpected layout '{output_layout}'") with tvm.transform.PassContext(opt_level=3): func = tvm.build( sch, [A, M], tvm.target.Target(target_hexagon, host=target_hexagon), name="avg_pool2d", ) input_arr = allocate_hexagon_array( hexagon_session.device, data=transformed_input_np_padded, axis_separators=input_axis_separator, mem_scope="global.vtcm", ) output_arr = allocate_hexagon_array( hexagon_session.device, transformed_expected_output_np.shape, dtype, axis_separators=output_axis_separator, mem_scope="global.vtcm", ) mod = hexagon_session.load_module(func) mod(input_arr, output_arr) b, h, w, c = output_shape if output_layout == "nhwc-8h2w32c2w-2d": output_np = output_arr.numpy().reshape([b, h // 8, w // 4, c // 32, 8, 2, 32, 2]) elif output_layout == "n11c-1024c-2d": output_np = output_arr.numpy().reshape([b, 1, 1, c // 1024, 1024]) else: raise RuntimeError(f"Unexpected layout '{output_layout}'") np.testing.assert_allclose(output_np, transformed_expected_output_np, rtol=1e-3, atol=1e-3)
def test_conv2d( self, hexagon_session: Session, in_dtype, out_dtype, layout, input_shape, filter_shape, scale_shape, shift_shape, use_scale_shift, apply_relu, batch, in_channel, channel_multiplier, kernel, stride, padding, dilation, ref_data, ): target_hexagon = tvm.target.hexagon("v68") # Transform the padding argument from 'str' to 'tuple' to # match the "workload" tuple in TopHub. Which padding_args to # use for each layout chosen to reproduce previous behavior. if dilation == 1: padding_args = get_pad_tuple(padding, (kernel, kernel)) padding_args_i = [0, 1, 2, 3] if layout == "NCHW" else [0, 1] padding_args = [padding_args[i] for i in padding_args_i] else: padding_args = padding # placeholder Input = te.placeholder(input_shape, name="Input", dtype=in_dtype) Filter = te.placeholder(filter_shape, name="Filter", dtype=in_dtype) Scale = te.placeholder(scale_shape, name="Scale", dtype=out_dtype) Shift = te.placeholder(shift_shape, name="Shift", dtype=out_dtype) if layout == "NCHW": topi_scale_shift = topi.nn.scale_shift_nchw fcompute_args = (Input, Filter, stride, padding_args, dilation, out_dtype) elif layout == "NHWC": topi_scale_shift = topi.nn.scale_shift_nhwc fcompute_args = (Input, Filter, stride, padding_args, dilation, out_dtype) elif layout == "NCHWc": topi_scale_shift = topi.nn.scale_shift_nchwc in_layout = "NCHW{}c".format(input_shape[-1]) out_layout = "NCHW{}c".format(filter_shape[-1]) fcompute_args = ( Input, Filter, stride, padding, dilation, in_layout, out_layout, out_dtype, ) with tvm.target.Target(target_hexagon): # Declare, build schedule if layout == "NCHW": fcompute = topi.nn.depthwise_conv2d_nchw fschedule = topi.hexagon.schedule_depthwise_conv2d_nchw elif layout == "NHWC": fcompute = topi.nn.depthwise_conv2d_nhwc fschedule = topi.hexagon.schedule_depthwise_conv2d_nhwc C = fcompute(*fcompute_args) if use_scale_shift: C = topi_scale_shift(C, Scale, Shift) if apply_relu: C = topi.nn.relu(C) s = fschedule([C]) # Build and run f = tvm.build( s, [Input, Filter, Scale, Shift, C], tvm.target.Target(target_hexagon, host=target_hexagon), ) mod = hexagon_session.load_module(f) input_np, filter_np, scale_np, shift_np, output_np = ref_data dev = hexagon_session.device input_tvm = tvm.nd.array(input_np, dev) filter_tvm = tvm.nd.array(filter_np, dev) scale_tvm = tvm.nd.array(scale_np, dev) shift_tvm = tvm.nd.array(shift_np, dev) output_tvm = tvm.nd.array( np.zeros(shape=get_const_tuple(C.shape), dtype=C.dtype), dev, ) mod(input_tvm, filter_tvm, scale_tvm, shift_tvm, output_tvm) tol = {"rtol": 1e-4, "atol": 1e-5} tvm.testing.assert_allclose(output_np, output_tvm.numpy(), **tol)