def test_reduce_map(target, dev, ref_data, in_shape, axis, keepdims, reduce_type, dtype): target = tvm.target.Target(target) if target.kind.name == "vulkan" and reduce_type in ["sum", "any", "all"]: pytest.xfail(f"Vulkan backend has known errors on {reduce_type}") in_npy, in_npy_map, out_npy = ref_data # Build the logic and compile the function A = te.placeholder(shape=in_shape, name="A", dtype=dtype) A1 = topi.sqrt(topi.exp(A)) out_dtype = dtype if reduce_type == "sum": B = topi.sum(A1, axis=axis, keepdims=keepdims) elif reduce_type == "all": B = topi.all(A, axis=axis, keepdims=keepdims) elif reduce_type == "any": B = topi.any(A, axis=axis, keepdims=keepdims) elif reduce_type == "max": B = topi.max(A1, axis=axis, keepdims=keepdims) elif reduce_type == "min": B = topi.min(A1, axis=axis, keepdims=keepdims) elif reduce_type == "argmax": B = topi.argmax(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" elif reduce_type == "argmin": B = topi.argmin(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" else: raise NotImplementedError with tvm.target.Target(target): s = tvm.topi.testing.get_reduce_schedule(target)(B) foo = tvm.build(s, [A, B], target, name=reduce_type) data_tvm = tvm.nd.array(in_npy, device=dev) out_tvm = tvm.nd.empty(shape=out_npy.shape, device=dev, dtype=out_dtype) foo(data_tvm, out_tvm) if reduce_type == "argmax" or reduce_type == "argmin": out_tvm_indices = out_tvm.numpy() if keepdims: out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis) if axis is None: out_tvm_val = in_npy_map.ravel()[out_tvm_indices] else: other_indices = tuple( np.indices(in_shape[0:axis] + in_shape[(axis + 1):])) sel_indices = other_indices[0:axis] + ( out_tvm_indices, ) + other_indices[axis:] out_tvm_val = in_npy_map[sel_indices] if reduce_type == "argmax": tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1e-3, 1e-3) elif reduce_type == "argmin": tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1e-3, 1e-3) else: tvm.testing.assert_allclose(out_tvm.numpy(), out_npy, 1e-3, 1e-3)
def batch_norm_bwd(N, C, H, W, dtype="float32"): dshape = (N, C, H, W) oshape = (C, ) bshape = (1, C, 1, 1) sshape = (1, ) data = te.placeholder(dshape, name="data", dtype=dtype) scale = te.placeholder(oshape, name="scale", dtype=dtype) saved_mean = te.placeholder(oshape, name="saved_mean", dtype=dtype) saved_var = te.placeholder(oshape, name="saved_var", dtype=dtype) eps = te.placeholder(sshape, name="eps", dtype=dtype) grad_output = te.placeholder(dshape, name="data", dtype=dtype) axis = (0, 2, 3) num_ele = dshape[0] * dshape[2] * dshape[3] frac_num_ele = 1.0 / num_ele # compute grad_input mean_sum = topi.sum(data, axis, True) mean = topi.multiply(mean_sum, frac_num_ele) var_sub = topi.subtract(data, mean) var_mul = topi.multiply(var_sub, var_sub) var_sum = topi.sum(var_mul, axis, True) var = topi.multiply(var_sum, frac_num_ele) var_eps = topi.add(var, eps) output_sqrt = topi.sqrt(var_eps) x_norm = topi.subtract(data, mean) x_hat = topi.divide(x_norm, output_sqrt) dx_hat = topi.multiply(grad_output, topi.reshape(scale, bshape)) grad_input_sum1 = topi.sum(dx_hat * x_hat, axis, True) grad_input_sum2 = topi.sum(dx_hat, axis, True) grad_input_left = topi.divide(frac_num_ele, topi.sqrt(var_eps)) grad_input_right1 = topi.subtract(topi.multiply(dx_hat, num_ele), grad_input_sum2) grad_input_right2 = topi.multiply(x_hat, grad_input_sum1) grad_input = topi.multiply( grad_input_left, topi.subtract(grad_input_right1, grad_input_right2)) # compute grad_scale and grad_bias grad_scale = topi.sum(grad_output * x_hat, axis) grad_bias = topi.sum(grad_output, axis) return [ data, scale, saved_mean, saved_var, eps, grad_output, grad_input, grad_scale, grad_bias ]
def batch_norm_fwd(N, C, H, W, dtype="float32"): dshape = (N, C, H, W) oshape = (C, ) bshape = (1, C, 1, 1) sshape = (1, ) data = te.placeholder(dshape, name="data", dtype=dtype) scale = te.placeholder(oshape, name="scale", dtype=dtype) bias = te.placeholder(oshape, name="bias", dtype=dtype) running_mean = te.placeholder(oshape, name="running_mean", dtype=dtype) running_var = te.placeholder(oshape, name="running_var", dtype=dtype) eps = te.placeholder(sshape, name="eps", dtype=dtype) momentum = te.placeholder(sshape, name="momentum", dtype=dtype) axis = (0, 2, 3) num_ele = dshape[0] * dshape[2] * dshape[3] frac_num_ele = 1.0 / num_ele # compute batch mean mean_sum = topi.sum(data, axis, keepdims=True) saved_mean = topi.multiply(mean_sum, frac_num_ele) # compute batch rvars var_sub = topi.subtract(data, saved_mean) var_mul = topi.multiply(var_sub, var_sub) var_sum = topi.sum(var_mul, axis, keepdims=True) var = topi.multiply(var_sum, frac_num_ele) output_add = topi.add(var, eps) saved_rvars = topi.sqrt(output_add) # # compute output output_sub = topi.subtract(data, saved_mean) output_norm = topi.divide(output_sub, saved_rvars) scale_board = topi.reshape(scale, bshape) bias_board = topi.reshape(bias, bshape) output = topi.add(topi.multiply(output_norm, scale_board), bias_board) # reshape saved_rvars saved_rvars = topi.reshape(saved_rvars, oshape) # update running mean running_mean_mul1 = topi.multiply(running_mean, topi.subtract(1.0, momentum)) running_mean_mul2 = topi.multiply(topi.reshape(saved_mean, oshape), momentum) running_mean_out = topi.add(running_mean_mul1, running_mean_mul2) # update running var saved_var_mul1 = topi.multiply(running_var, topi.subtract(1.0, momentum)) saved_var_mul2 = topi.multiply(topi.reshape(var, oshape), momentum) running_var_out = topi.add(saved_var_mul1, saved_var_mul2) # reshape saved_mean saved_mean = topi.reshape(saved_mean, oshape) return [ data, scale, bias, running_mean, running_var, momentum, eps, output, saved_mean, saved_rvars, running_mean_out, running_var_out ]
def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32"): # Build the logic and compile the function A = te.placeholder(shape=in_shape, name="A", dtype=dtype) A1 = topi.sqrt(topi.exp(A)) out_dtype = dtype if type == "sum": B = topi.sum(A1, axis=axis, keepdims=keepdims) elif type == "all": B = topi.all(A, axis=axis, keepdims=keepdims) elif type == "any": B = topi.any(A, axis=axis, keepdims=keepdims) elif type == "max": B = topi.max(A1, axis=axis, keepdims=keepdims) elif type == "min": B = topi.min(A1, axis=axis, keepdims=keepdims) elif type == "argmax": B = topi.argmax(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" elif type == "argmin": B = topi.argmin(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" else: raise NotImplementedError def check_device(device, ctx): print("Running on target: %s" % device) with tvm.target.Target(device): s = tvm.topi.testing.get_reduce_schedule(device)(B) foo = tvm.build(s, [A, B], device, name=type) # Test if dtype == "bool": in_npy_map = in_npy = np.random.choice([True, False], size=in_shape) else: in_npy = np.random.uniform(-1, 1, size=in_shape).astype(dtype) in_npy_map = np.sqrt(np.exp(in_npy)).astype(dtype) if type == "sum": out_npy = in_npy_map.sum(axis=axis, keepdims=keepdims) elif type == "all" and dtype == "bool": out_npy = in_npy_map.all(axis=axis, keepdims=keepdims) elif type == "any" and dtype == "bool": out_npy = in_npy_map.any(axis=axis, keepdims=keepdims) elif type == "max": out_npy = in_npy_map.max(axis=axis, keepdims=keepdims) elif type == "min": out_npy = in_npy_map.min(axis=axis, keepdims=keepdims) elif type == "argmax": out_npy = _my_npy_argmax(in_npy_map, axis=axis, keepdims=keepdims) elif type == "argmin": out_npy = _my_npy_argmin(in_npy_map, axis=axis, keepdims=keepdims) else: raise NotImplementedError data_tvm = tvm.nd.array(in_npy, ctx=ctx) out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=ctx, dtype=out_dtype) for _ in range(1): foo(data_tvm, out_tvm) if type == "argmax" or type == "argmin": out_tvm_indices = out_tvm.asnumpy() if keepdims: out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis) if axis is None: out_tvm_val = in_npy_map.ravel()[out_tvm_indices] else: other_indices = tuple( np.indices(in_shape[0:axis] + in_shape[(axis + 1):])) sel_indices = other_indices[0:axis] + ( out_tvm_indices, ) + other_indices[axis:] out_tvm_val = in_npy_map[sel_indices] if type == "argmax": tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1e-3, 1e-3) elif type == "argmin": tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1e-3, 1e-3) else: tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1e-3, 1e-3) for device, ctx in tvm.testing.enabled_targets(): check_device(device, ctx)
def test_reduce_map(hexagon_session: Session, ref_data, in_shape, axis, keepdims, reduce_type, dtype): in_npy, in_npy_map, out_npy = ref_data # Build the logic and compile the function A = te.placeholder(shape=in_shape, name="A", dtype=dtype) A1 = topi.sqrt(topi.exp(A)) out_dtype = dtype if reduce_type == "sum": B = topi.sum(A1, axis=axis, keepdims=keepdims) elif reduce_type == "all": B = topi.all(A, axis=axis, keepdims=keepdims) elif reduce_type == "any": B = topi.any(A, axis=axis, keepdims=keepdims) elif reduce_type == "max": B = topi.max(A1, axis=axis, keepdims=keepdims) elif reduce_type == "min": B = topi.min(A1, axis=axis, keepdims=keepdims) elif reduce_type == "argmax": B = topi.argmax(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" elif reduce_type == "argmin": B = topi.argmin(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" else: raise NotImplementedError target_hexagon = tvm.target.hexagon("v68") with tvm.target.Target(target_hexagon): fschedule = topi.hexagon.schedule_reduce s = fschedule(B) func = tvm.build(s, [A, B], tvm.target.Target(target_hexagon, host=target_hexagon), name=reduce_type) mod = hexagon_session.load_module(func) dev = hexagon_session.device data_tvm = tvm.nd.array(in_npy, device=dev) out_tvm = tvm.nd.empty(shape=out_npy.shape, device=dev, dtype=out_dtype) mod[reduce_type](data_tvm, out_tvm) if reduce_type == "argmax" or reduce_type == "argmin": out_tvm_indices = out_tvm.numpy() if keepdims: out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis) if axis is None: out_tvm_val = in_npy_map.ravel()[out_tvm_indices] else: other_indices = tuple( np.indices(in_shape[0:axis] + in_shape[(axis + 1):])) sel_indices = other_indices[0:axis] + ( out_tvm_indices, ) + other_indices[axis:] out_tvm_val = in_npy_map[sel_indices] if reduce_type == "argmax": tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1e-3, 1e-3) elif reduce_type == "argmin": tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1e-3, 1e-3) else: tvm.testing.assert_allclose(out_tvm.numpy(), out_npy, 1e-3, 1e-3)