def kldiv_loss_run(shape, dtype, reduction='none', kernel_name="kldiv_loss", attrs=None): input_shape = [shape, shape] input_dtype = [dtype, dtype] op_attrs = [reduction] if not product_is_mini(): attrs['enable_multicore'] = True if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(kldiv_loss.kldiv_loss, input_shape, input_dtype, op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, output, prediction, target = gen_data( dtype, reduction, shape) return mod, expect, (prediction, target, output) else: return mod else: mod = utils.op_build_test(kldiv_loss.kldiv_loss, input_shape, input_dtype, op_attrs, kernel_name=kernel_name, attrs=attrs) expect, output, prediction, target = gen_data(dtype, reduction, shape) output = utils.mod_launch(mod, (prediction, target, output), expect=expect) return (prediction, target), output, expect, compare_tensor(output, expect, rtol=0.005, atol=0.005)
def test_fused_bn_reduce_grad(in_shape, in_dtype="float16", layout='NHWC', out_dtype='float16', poly_sch=False): if layout != "NHWC" and layout != "NCHW": raise NotImplementedError('Layout not supported {} '.format(layout)) inter_dtype = 'float32' inputs, output, expect = gen_data(in_shape, in_dtype, inter_dtype, layout, out_dtype) input_shape_list = [i.shape for i in inputs] input_dtype_list = [inter_dtype] * 3 + [in_dtype ] + [inter_dtype] * 3 + [in_dtype] op_attrs = [layout, out_dtype] if poly_sch: mod = utils.op_build(fused_bn_reduce_grad_auto, input_shape_list, input_dtype_list, op_attrs=op_attrs, attrs={"target": "cuda"}) else: mod = utils.op_build(fused_bn_reduce_grad_manual, input_shape_list, input_dtype_list, op_attrs=op_attrs) outputs = [output] arglist = inputs + outputs output = utils.mod_launch(mod, arglist, expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") inputs = to_tvm_nd_array(inputs) expect = to_tvm_nd_array(expect) gpu_profiling(mod, *inputs, expect, 400)
def strided_slice_execute(shape, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, dtype, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = strided_slice_compile(shape, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, dtype, attrs, kernel_name=kernel_name, tuning=t) if t: expect, input, output = gen_data(begin, begin_mask, dtype, ellipsis_mask, end, end_mask, new_axis_mask, shape, shrink_axis_mask, strides) return mod, expect, (input, output) else: return mod else: mod = strided_slice_compile(shape, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, dtype, attrs) expect, input, output = gen_data(begin, begin_mask, dtype, ellipsis_mask, end, end_mask, new_axis_mask, shape, shrink_axis_mask, strides) output = utils.mod_launch(mod, (input, output), expect=expect) rtol, atol = get_rtol_atol("strided_slice", dtype) return input, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def gelu_ad_run(shape, dtype, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) if dtype == 'float16' and not product_is_mini(): mod = utils.op_build_test(gelu_ad.gelu_ad_custom, [shape, shape], [dtype, dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) else: mod = utils.op_build_test(gelu_ad.gelu_ad, [shape, shape], [dtype, dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: input_np, head_np, output, expect = gelu_grad_data(shape, dtype) return mod, expect, (head_np, input_np, output) else: return mod else: if dtype == 'float16' and not product_is_mini(): mod = utils.op_build_test(gelu_ad.gelu_ad_custom, [shape, shape], [dtype, dtype], kernel_name="gelu_ad", attrs=attrs) else: mod = utils.op_build_test(gelu_ad.gelu_ad, [shape, shape], [dtype, dtype], kernel_name="gelu_ad", attrs=attrs) input_np, head_np, output, expect = gelu_grad_data(shape, dtype) output = utils.mod_launch(mod, (head_np, input_np, output), expect=expect) rtol, atol = get_rtol_atol("gelu_ad", dtype) return (input_np, head_np), output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol)
def insn_vec_binary_elemwise_run(shape, dtype, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(insn_vec_binary_elemwise.insn_vec_binary_elemwise, [shape, shape], [dtype, dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: args, exp_output, inputs = gen_data(dtype, shape) return mod, exp_output, args else: return mod else: mod = utils.op_build_test(insn_vec_binary_elemwise.insn_vec_binary_elemwise, [shape, shape], [dtype, dtype], kernel_name='insn_vec_binary_elemwise', attrs=attrs) args, exp_output, inputs = gen_data(dtype, shape) acu_output = utils.mod_launch(mod, args, expect=exp_output) # compare result TestCase_Result = compare_tensor(acu_output, exp_output, rtol=5e-03, equal_nan=True) return inputs, acu_output, exp_output, TestCase_Result
def test_ms_divide(shape, dtype, poly_sch=False): if poly_sch: mod = utils.op_build(divide_auto, [shape, shape], [dtype, dtype], attrs={"target": "cuda"}) else: mod = utils.op_build(divide_manual, [shape, shape], [dtype, dtype]) lhs, rhs, output, expect = gen_data(shape, dtype) output = utils.mod_launch(mod, (lhs, rhs, output), expect=expect) ret = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8, equal_nan=True) print("Test {}".format("Pass" if ret else "Failed")) if not ret: print("Error cuda:==========================") print(mod.imported_modules[0].get_soure()) raise AssertionError("Test fail") lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect]) gpu_profiling(mod, lhs, rhs, expect, 400)
def approximate_equal_run(x_shape, x_dtype, y_shape, y_dtype, tolerance=None, attrs=None): shapes = [x_shape, y_shape] dtypes = [x_dtype, y_dtype] op_attrs = None if tolerance: op_attrs = [tolerance] mod = utils.op_build_test(approximate_equal, shapes, dtypes, op_attrs, kernel_name="approximate_equal", attrs=attrs) benchMark, inputs, output = gen_data(x_dtype, shapes, tolerance) output = utils.mod_launch(mod, inputs + [output], expect=benchMark) return inputs, output, benchMark, np.array_equal(output, benchMark)
def test_fused_is_finite(shape, layout='NHWC', poly_sch=False): dtype = "float32" if poly_sch: mod = utils.op_build_test(fused_is_finite, [shape], [dtype], op_attrs=[layout], kernel_name="fused_is_finite", attrs={"target": "cuda"}) data, expect, output = gen_data(shape, dtype, layout) args = (data, output) output = utils.mod_launch(mod, args, expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") data, expect = to_tvm_nd_array([data, expect]) gpu_profiling(mod, data, expect, 400)
def test_ms_trans_data(shape, axes, dtype, poly_sch=False): if poly_sch: mod = utils.op_build_test(trans_data_auto, [shape], [dtype], op_attrs=[axes], kernel_name="trans_data_auto", attrs={"target": "cuda"}) else: mod = utils.op_build_test(trans_data_manual, [shape], [dtype], op_attrs=[axes], kernel_name="trans_data_manual") data, output, expect = gen_data(shape, axes, dtype) output = utils.mod_launch(mod, (data, output), expect=expect) ret = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8, equal_nan=True) print("Test {}".format("Pass" if ret else "Failed")) data, expect = to_tvm_nd_array([data, expect]) gpu_profiling(mod, data, expect, 400)
def focalloss_ad_run(shape, p_dtype, t_dtype, gamma, kernel_name, attrs): head = np.random.rand(*shape[:2]).astype(p_dtype) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(focalloss_ad, [head.shape, shape, shape], [p_dtype, p_dtype, t_dtype], op_attrs=[gamma], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, output, pred, targ = gen_data(gamma, head, p_dtype, shape, t_dtype) return mod, expect, (head, pred, targ, output) else: return mod else: mod = utils.op_build_test(focalloss_ad, [head.shape, shape, shape], [p_dtype, p_dtype, t_dtype], op_attrs=[gamma], kernel_name=kernel_name, attrs=attrs) expect, output, pred, targ = gen_data(gamma, head, p_dtype, shape, t_dtype) output = utils.mod_launch(mod, [head, pred, targ, output], expect=expect) return [head, pred, targ, gamma], output, expect, compare_tensor(output, expect, rtol=5e-03, atol=5e-03, equal_nan=True)
def test_ms_addn(shape, dtype, n, poly_sch=False): shapes = [] for i in range(n): shapes.append(shape) if poly_sch: mod = utils.op_build_test(addn_auto, [shapes], [dtype], attrs={"target": "cuda"}, kernel_name="addn_auto") else: mod = utils.op_build_test(addn_manual, [shapes], [dtype], kernel_name="addn_manual") expect, inputs, output = gen_data(shape, shapes, dtype, n) output = utils.mod_launch(mod, (*inputs, output), expect=expect) res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") inputs = to_tvm_nd_array(inputs) expect = to_tvm_nd_array(expect) gpu_profiling(mod, *inputs, expect, 400)
def cholesky_run(shape, dtype, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build(cholesky.cholesky, [shape], [dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: exp_output, inputs, output = gen_data(dtype, shape) return mod, exp_output, (inputs, output) else: return mod else: # op_attrs=[shape, dtype] mod = utils.op_build(cholesky.cholesky, [shape], [dtype], kernel_name='cholesky', attrs=attrs) exp_output, inputs, output = gen_data(dtype, shape) # result_tvm acu_output = utils.mod_launch(mod, (inputs, output), expect=exp_output) # 4) compare result TestCase_Result = np.allclose(acu_output, exp_output, rtol=5e-03, equal_nan=True) return inputs, acu_output, exp_output, TestCase_Result
def test_ms_select(shape_cond, shape_x, dtype_cond, dtype_x, poly_sch=False): if poly_sch: mod = utils.op_build_test(select_auto, [shape_cond, shape_x, shape_x], [dtype_cond, dtype_x, dtype_x], kernel_name="select_auto", attrs={"target": "cuda"}) else: mod = utils.op_build_test(select_manual, [shape_cond, shape_x, shape_x], [dtype_cond, dtype_x, dtype_x], kernel_name="select_manual") expect, cond, x1, x2, output = gen_data(shape_cond, shape_x, dtype_cond, dtype_x) output = utils.mod_launch(mod, (cond, x1, x2, output), expect=expect) res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail")
def test_ms_exp(shape, dtype, poly_sch=False): if poly_sch: mod = utils.op_build_test(exp, [shape], [dtype], attrs={"target": "cuda"}, kernel_name="exp") data, output, expect = gen_data(shape, dtype) output = utils.mod_launch(mod, (data, output), expect=expect) ret = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8, equal_nan=True) print("Test {}".format("Pass" if ret else "Failed")) if not ret: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") return True
def reverse_run(shape, dtype, axis, attrs=None): """reduce_any_d_run implementation""" if attrs is None: attrs = {} mod = utils.op_build_test(reverse.reverse, [shape], [dtype], kernel_name='reverse', op_attrs=[axis], attrs=attrs) args, exp_output, x = gen_data(dtype, shape, axis) acu_output = utils.mod_launch(mod, args, expect=exp_output) # compare result rtol, atol = get_rtol_atol("reverse", dtype) testcase_result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) return x, acu_output, exp_output, testcase_result
def test_ms_rsqrt(shape1, dtype, poly_sch=False): if poly_sch: mod = utils.op_build_test(rsqrt_auto, (shape1, ), (dtype, ), attrs={"target": "cuda"}, kernel_name="rsqrt_auto") else: mod = utils.op_build_test(rsqrt_manual, (shape1, ), (dtype, ), kernel_name="rsqrt_auto") expect, input1, output = gen_data(dtype, shape1) args = (input1, output) output = utils.mod_launch(mod, args, expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") input1, expect = to_tvm_nd_array([input1, expect]) gpu_profiling(mod, input1, expect, 400)
def less_equal_execute(shapes, dtype, kernel_name, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = less_equal_compile(shapes, dtype, kernel_name, attrs, tuning=t) if t: expect, inputs, output = gen_data(dtype, shapes) return mod, expect, (inputs + output) else: return mod else: mod = less_equal_compile(shapes, dtype, kernel_name, attrs) expect, inputs, output = gen_data(dtype, shapes) output = utils.mod_launch(mod, inputs + [output], expect=expect) rtol, atol = get_rtol_atol("less_equal", dtype) return inputs, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def get_result(desc, attrs=None): input_for_mod, expect, output_indexes = gen_json_data(desc) if attrs: mod = composite.build(desc, attrs) else: mod = composite.build(desc) output = utils.mod_launch(mod, input_for_mod, output_indexes) rtol, atol = get_rtol_atol("FUSED", "float32") flag = True if len(output_indexes) > 1: if not all( map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), output, expect)): flag = False else: if not compare_tensor(output, expect, rtol=rtol, atol=atol): flag = False return flag
def strided_slice_ad_run(input_shape, begin, end, strides, dtype, attrs_op={}, cce_path="./", attrs={}): out_shape = [(e - b) // s for b, e, s in zip(begin, end, strides)] attrs.update(attrs_op) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(strided_slice_ad.strided_slice_ad, [out_shape, input_shape], [dtype, dtype], [begin, end, strides, dtype], kernel_name, attrs, tuning=t) if t: H_data, expect, input1, output = gen_data(begin, dtype, end, input_shape, out_shape, strides) return mod, expect, (H_data, input1, output) else: return mod else: mod = utils.op_build_test(strided_slice_ad.strided_slice_ad, [out_shape, input_shape], [dtype, dtype], [begin, end, strides, dtype], "strided_slice_ad", attrs) H_data, expect, input1, output = gen_data(begin, dtype, end, input_shape, out_shape, strides) output = utils.mod_launch(mod, (H_data, input1, output), expect=expect) return (H_data, input1), output, expect, compare_tensor(output, expect, rtol=0.1, equal_nan=True)
def square_difference_ad_run(shape1, shape2, dtype, kernel_name, attrs, cce_path="./"): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) expect, input1, input2, out_shape, support_list = gen_input_data( dtype, shape1, shape2) mod = utils.op_build_test(square_difference_ad.square_difference_ad, input_shapes=[out_shape, shape1, shape2], input_types=[dtype, dtype, dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, head_np, output = gen_data(dtype, expect, out_shape, support_list) return mod, expect, (head_np, input1, input2, output) else: return mod else: expect, input1, input2, out_shape, support_list = gen_input_data( dtype, shape1, shape2) expect, head_np, output = gen_data(dtype, expect, out_shape, support_list) mod = utils.op_build_test(square_difference_ad.square_difference_ad, input_shapes=[out_shape, shape1, shape2], input_types=[dtype, dtype, dtype], kernel_name='square_difference_ad', attrs=attrs) output = utils.mod_launch(mod, (head_np, input1, input2, output), expect=expect) return (head_np, input1, input2), output, expect, compare_tensor(output, expect, rtol=5e-03, equal_nan=True)
def csr_gather_run(shape, dtype1, dtype2, nnz=-1, poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} # gen data op_attrs = [shape] dense, col_idx, row_idx, expect = gen_data(shape, dtype1, dtype2, nnz=nnz) output_shape = expect.shape attrs["is_csr"] = True mod = utils.op_build_test(csr_gather, [shape, col_idx.shape, row_idx.shape], [dtype1, dtype2, dtype2], op_attrs=op_attrs, polyhedral=poly_sch, attrs=attrs, kernel_name="csr_gather") if len(expect.shape) == 0: output_shape = (1, ) output = np.zeros(output_shape, expect.dtype) output = utils.mod_launch(mod, (dense, col_idx, row_idx, output), expect=expect) atol, rtol = get_rtol_atol("csr_gather", dtype1) res = compare_tensor(output, expect, rtol=rtol, atol=atol) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: args_list = to_tvm_nd_array([dense, col_idx, row_idx, output, expect], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_time"]) return (dense, col_idx, row_idx), output, expect, res
def test_composite_stitch(ci_path): files = os.listdir(ci_path) flag = True for fi in files: with open(os.path.join(ci_path, fi), 'r') as f: print( "\033[94m%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%file: \033[0m", fi) desc = f.read() poly = True attrs = {} reduce_lib_key = "enable_akg_reduce_lib" attrs[reduce_lib_key] = poly mod = composite.build(desc, attrs, poly=poly) rtol = 0.001 atol = 0.005 max_run_times = 3 case_flag = False for i in range(max_run_times): input_for_mod, expect, output_indexes = gen_json_data(desc) output = utils.mod_launch(mod, input_for_mod, output_indexes) if len(output_indexes) > 1: if all( map( lambda x, y: compare_tensor( x, y, rtol=rtol, atol=atol), output, expect)): case_flag = True break else: if compare_tensor(output, expect, rtol=rtol, atol=atol): case_flag = True break if not case_flag: logging.info("\033[91mComposite Json {} fail!\033[0m".format(fi)) else: logging.info("\033[92mComposite Json {} pass!\033[0m".format(fi)) flag &= case_flag if not flag: raise ValueError("Precision Error") logging.info("All ops are ok!")
def test_fused_bn_update_grad(shape, out_shape, dtype="float16", out_dtype="float32", layout="NHWC", poly_sch=False): shape_list = [shape, out_shape, shape] dtype_list = [dtype, out_dtype, dtype] op_attrs = [layout] if poly_sch: mod = utils.op_build_test(fused_bn_update_grad, shape_list, dtype_list, op_attrs=op_attrs, kernel_name="fused_bn_update_grad", attrs={ "target": "cuda", "enable_akg_reduce_lib": True, "enable_atomic_add": True }) head, data_sum, in_bn, output, expect = gen_data(shape, out_shape, dtype, out_dtype, layout) outputs = [output, output] inputs = [head, data_sum, in_bn] arg_list = inputs + outputs outputs = utils.mod_launch(mod, arg_list, outputs=tuple(range(-len(outputs), 0)), expect=expect) res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") inputs = to_tvm_nd_array(inputs) expect = to_tvm_nd_array(expect) gpu_profiling(mod, *inputs, *expect, 400)
def abs_run(shape, dtype, attrs): # Result_Numpy input_shape = [shape] input_dtype = [dtype] op_attrs = [shape, dtype] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(abs.abs_value, input_shape, input_dtype, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: exp_output, inputs, output = gen_date(dtype, shape) return mod, exp_output, (inputs, output) else: return mod else: # result_tvm mod = utils.op_build_test(abs.abs_value, input_shape, input_dtype, kernel_name='abs', attrs=attrs) exp_output, inputs, output = gen_date(dtype, shape) acu_output = utils.mod_launch(mod, (inputs, output), expect=exp_output) # compare result rtol, atol = get_rtol_atol("abs", dtype) TestCase_Result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) return inputs, acu_output, exp_output, TestCase_Result
def matmul4d_ad_run(shape_x, shape_y, bias, adj_x, adj_y, dtype, out_dtype, kernel_name, attrs): # calculate the shape in fractal type and create the data batch_tuple, m, k, n = extract_dim(shape_x, shape_y, adj_x, adj_y) m = (m + 15) // 16 * 16 n = (n + 15) // 16 * 16 k = (k + 15) // 16 * 16 shape_xx, shape_yy, bias_shape, output_shape, k = get_converted_shapes( m, n, k, batch_tuple, adj_x, adj_y, bias) input_x = random_gaussian(shape_xx, miu=0.5, sigma=0.01).astype(np.float16) input_y = random_gaussian(shape_yy, miu=0.5, sigma=0.01).astype(np.float16) input_head = random_gaussian(output_shape, miu=0.5, sigma=0.01).astype(np.float16) dX_expected = compute_expected(input_y, input_head, adj_x, adj_y, shape_xx) input_shapes = [output_shape, shape_xx, shape_yy, bias_shape] input_types = [out_dtype, dtype, dtype, dtype] op_attrs = [out_dtype, adj_x, adj_y] if bias_shape is None: input_shapes = [output_shape, shape_xx, shape_yy] input_types = [out_dtype, dtype, dtype] op_attrs = [None, out_dtype, adj_x, adj_y] mod = utils.op_build_test(matmul4d_ad.matmul4d_ad, input_shapes, input_types, op_attrs, kernel_name, attrs) # calculate the backward kernel dX = np.full(shape_xx, np.nan, dtype) dX = utils.mod_launch(mod, (input_head, input_x, input_y, dX), expect=dX_expected) return (input_x, input_y, input_head), dX, dX_expected, compare_tensor(dX, dX_expected, rtol=0.01, equal_nan=True)
def test_fused_bn_update(shape, dtype="float32", c1=(1 / (256 * 7 * 7)), c2=1.001e-05, c3=1.00007975, c4=0.100000024, poly_sch=False): input = gen_data(shape, dtype) expect = compute_expect(input, c1, c2, c3, c4) attrs = [dtype, c1, c2, c3, c4] shapes = [input[0].shape] * 4 dtypes = [dtype] * 4 if poly_sch: mod = utils.op_build_test(fused_bn_update_auto, shapes, dtypes, kernel_name="fused_bn_update_auto", op_attrs=attrs, attrs={"target": "cuda"}) else: mod = utils.op_build_test(fused_bn_update_manual, shapes, dtypes, kernel_name="fused_bn_update_manual", op_attrs=attrs) outputs = [np.full(shape, np.nan, dtype)] * 3 attrs_list = input + outputs output = utils.mod_launch(mod, attrs_list, outputs=(range(-len(outputs), 0)), expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Failed")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") data = to_tvm_nd_array(input) expect = to_tvm_nd_array(expect) gpu_profiling(mod, *data, *expect, 400)
def minimum_run(shape1, shape2, dtype, attrs_op=None, attrs=None): """minimum_run""" if attrs_op is not None: if attrs is not None: attrs.update(attrs_op) else: attrs = attrs_op if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(minimum, [shape1, shape2], [dtype, dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, lhd, output, rhd = gen_data(dtype, shape1, shape2) return mod, expect, (lhd, rhd, output) return mod else: mod = utils.op_build_test(minimum, [shape1, shape2], [dtype, dtype], kernel_name='minimum', attrs=attrs) expect, lhd, output, rhd = gen_data(dtype, shape1, shape2) # result_tvm output = utils.mod_launch(mod, (lhd, rhd, output), expect=expect) if attrs.get("profiling", False): import akg target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([lhd, rhd, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) # compare result compare_result = compare_tensor(output, expect, rtol=5e-03, equal_nan=True) return (lhd, rhd), output, expect, compare_result
def confusion_matrix_run(actual_shape, actual_dtype, predict_shape, predict_dtype, num_class, kernel_name="confusion_matrix", attrs=None): # Create op if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(confusion_matrix.confusion_matrix, [actual_shape, predict_shape], [actual_dtype, predict_dtype], op_attrs=[num_class], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: actual_data, expect_data, output_data, predict_data = gen_data( actual_dtype, actual_shape, num_class, predict_dtype, predict_shape) return mod, expect_data, (actual_data, predict_data, output_data) else: return mod else: mod = utils.op_build_test(confusion_matrix.confusion_matrix, [actual_shape, predict_shape], [actual_dtype, predict_dtype], op_attrs=[num_class], kernel_name=kernel_name, attrs=attrs) actual_data, expect_data, output_data, predict_data = gen_data( actual_dtype, actual_shape, num_class, predict_dtype, predict_shape) output_data = utils.mod_launch( mod, (actual_data, predict_data, output_data), expect=expect_data) return (actual_data, predict_data), output_data, expect_data, compare_tensor( expect_data, output_data)
def fused_batch_norm_grad_run(shape, dtype, eps, data_format, axis, kernel_name, attrs): is_special5D = (data_format == "NC1HWC0") if is_special5D: axes = (0, 2, 3) param_shape = [1, shape[1], 1, 1, shape[4]] else: tmp_axis = axis if axis >= 0 else len(shape) + axis axes = tuple([i for i in range(len(shape)) if i != tmp_axis]) param_shape = [shape[axis]] shapes = [shape, shape, param_shape, param_shape, param_shape] dtypes = [dtype, dtype, dtype, dtype, dtype] op_attrs = [eps, data_format, axis] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(fused_batch_norm_grad, [shapes], dtypes, op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: data, dy, expects, gamma, mean, outputs, var = gen_data(axes, dtype, eps, data_format, param_shape, shape, axis) return mod, expects, {"args": (dy, data, mean, var, gamma, *outputs), 'outputs': tuple(range(-len(outputs), 0)), 'tuning': False} else: return mod else: mod = utils.op_build_test(fused_batch_norm_grad, [shapes], dtypes, op_attrs, kernel_name=kernel_name, attrs=attrs) data, dy, expects, gamma, mean, outputs, var = gen_data(axes, dtype, eps, data_format, param_shape, shape, axis) outputs = utils.mod_launch(mod, (dy, data, mean, var, gamma, *outputs), outputs=tuple(range(-len(outputs), 0)), expect=expects) outputs = [outputs] if len(expects) == 1 else list(outputs) rtol, atol = get_rtol_atol("fused_batch_norm_grad", dtype) results = list(map(lambda x, y: np.allclose(x, y, rtol=rtol, atol=atol), outputs, expects)) print("results", results) return (dy, data, gamma), outputs, expects, all(results)
def test_ms_less_equal(shape1, shape2, in_dtype, poly_sch=False): if poly_sch: mod = utils.op_build_test(less_equal_auto, (shape1, shape2), (in_dtype, in_dtype), kernel_name="less_equal_auto", attrs={"target": "cuda"}) else: mod = utils.op_build_test(less_equal_manual, (shape1, shape2), (in_dtype, in_dtype), kernel_name="less_equal_manual") lhs, rhs, output, expect = gen_data(shape1, shape2, in_dtype) args = (lhs, rhs, output) output = utils.mod_launch(mod, args, expect=expect) res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect]) gpu_profiling(mod, lhs, rhs, expect, 400)