def fused_is_finite_run(shape, layout='NHWC', poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} attrs.update({"enable_akg_reduce_lib": True, "enable_atomic_add": True}) dtype = "float32" mod = utils.op_build_test(fused_is_finite, [shape], [dtype], op_attrs=[layout], kernel_name="fused_is_finite", polyhedral=poly_sch, attrs=attrs) data, expect, output = gen_data(shape, dtype, layout) args = (data, output) output = utils.mod_launch(mod, args, expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1e-8) print("Test {}".format("Pass" if res else "Fail")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: data, output = to_tvm_nd_array([data, output], akg.tvm.context(target_name, 0)) target_profiling(mod, data, output, target=target_name, repeat_time=attrs["repeat_times"]) return data, output, expect, res
def select_run(shape_cond, shape_x, dtype_cond, dtype_x, attrs=None): """select_run implementation""" if attrs is None: attrs = {} mod = utils.op_build_test(select, [shape_cond, shape_x, shape_x], [dtype_cond, dtype_x, dtype_x], kernel_name='select', op_attrs=[], attrs=attrs) args, exp_output, cond, x1, x2 = gen_data(shape_cond, shape_x, dtype_cond, dtype_x) acu_output = utils.mod_launch(mod, args, expect=exp_output) if attrs.get("profiling", False): import akg target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array(args, akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) # compare result rtol, atol = get_rtol_atol("select", dtype_x) testcase_result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) return [cond, x1, x2], acu_output, exp_output, testcase_result
def expand_dims_run(shape, axis, dtype, kernel_name="expand_dims", attrs={}): op_attr = [axis] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(ExpandDims, [shape], [dtype], op_attr, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, input, output = gen_data(axis, dtype, shape) return mod, expect, (input, output) else: return mod else: mod = utils.op_build_test(ExpandDims, [shape], [dtype], op_attr, kernel_name=kernel_name, attrs=attrs) expect, input, output = gen_data(axis, dtype, shape) output = utils.mod_launch(mod, (input, output), expect=expect) if attrs.get("profiling", False): import akg target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([input, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return input, output, expect, compare_tensor(output, expect, rtol=5e-03, equal_nan=True)
def fused_mul_div_rsqrt_mul_isfinite_red_run(shape, dtype='float32', poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} attrs.update({"enable_akg_reduce_lib": True, "enable_atomic_add": True}) inputs = gen_data(shape, dtype) expect = compute_expect(inputs) input_shape = [shape, shape] input_dtype = [dtype, dtype] mod = utils.op_build_test(fused_mul_div_rsqrt_mul_isfinite_red, input_shape, input_dtype, kernel_name="fused_mul_div_rsqrt_mul_isfinite_red", polyhedral=poly_sch, attrs=attrs) outputs = [np.full((1,), False, 'bool')] + [np.full(shape, np.nan, dtype)] * 3 output = utils.mod_launch(mod, [*inputs, *outputs], outputs=list(range(-len(outputs), 0)), expect=expect) ret = compare_tensor(output[0], expect[0], rtol=5e-03, atol=1.e-08) ret &= compare_tensor(output[1], expect[1], rtol=5e-03, atol=1.e-08) ret &= compare_tensor(output[2], expect[2], rtol=5e-03, atol=1.e-08) ret &= compare_tensor(output[3], expect[3], rtol=5e-03, atol=1.e-08) print("Test {}".format("Pass" if ret else "Failed")) target_name = attrs["target"].split()[0] if not ret: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: data = to_tvm_nd_array([*inputs, *outputs], akg.tvm.context(target_name, 0)) target_profiling(mod, *data, target=target_name, repeat_time=attrs["repeat_times"]) return inputs, outputs, expect, ret
def standard_normal_run(seed, shape, attrs=None): if not attrs: attrs = {"target": "cuda"} mod = utils.op_build_test(standard_normal, [], [], kernel_name="standard_normal", op_attrs=[seed, shape], attrs=attrs) output, expect = gen_data(shape) output = utils.mod_launch(mod, (output, ), expect=expect) res = output.shape == expect.shape res &= abs(np.mean(output) - 0) < 1e-1 res &= abs(np.std(output) - 1) < 1e-1 print("Test {}".format("Pass" if res else "Fail")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: output = to_tvm_nd_array(output, akg.tvm.context(target_name, 0)) target_profiling(mod, output, target=target_name, repeat_time=attrs["repeat_times"]) return output, output, expect, res
def fused_relu_grad_run(shape, c1=0, poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} dtype='float16' input = gen_data(shape, dtype) expect = compute_expect(input, c1) shapes = [shape] * 3 dtypes = [dtype] * 3 op_attrs = [c1] mod = utils.op_build_test(fused_relu_grad, shapes, dtypes, op_attrs=op_attrs, kernel_name="fused_relu_grad", polyhedral=poly_sch, attrs=attrs) output = np.full(shape, np.nan, dtype) output = utils.mod_launch(mod, (*input, output), expect=expect) res = np.allclose(output, expect, rtol=5e-3, atol=1e-8) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: data = to_tvm_nd_array([*input, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *data, target=target_name, repeat_time=attrs["repeat_times"]) return input, output, expect, res
def reciprocal_run(shape, dtype, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = reciprocal_compile(shape, dtype, attrs, kernel_name=kernel_name, tuning=t) if t: expect, input1, output = gen_data(dtype, shape) return mod, expect, (input1, output) else: return mod else: mod = reciprocal_compile(shape, dtype, attrs) expect, input1, output = gen_data(dtype, shape) output = utils.mod_launch(mod, (input1, output), expect=expect) if attrs["profiling"]: target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([input1, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) rtol, atol = get_rtol_atol("reciprocal", dtype) return (input1, ), output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def fused_bn_update_grad_run(shape, out_shape, dtype="float16", out_dtype="float32", layout="NHWC", poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} attrs.update({"enable_akg_reduce_lib": True, "enable_atomic_add": True}) shape_list = [shape, out_shape, shape] dtype_list = [dtype, out_dtype, dtype] op_attrs = [layout] mod = utils.op_build_test(fused_bn_update_grad, shape_list, dtype_list, op_attrs=op_attrs, kernel_name="fused_bn_update_grad", polyhedral=poly_sch, attrs=attrs) head, data_sum, in_bn, output, expect = gen_data(shape, out_shape, dtype, out_dtype, layout) outputs = [output, output] inputs = [head, data_sum, in_bn] arg_list = inputs + outputs outputs = utils.mod_launch(mod, arg_list, outputs=tuple(range(-len(outputs), 0)), expect=expect) res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: arg_list = to_tvm_nd_array(arg_list, akg.tvm.context(target_name, 0)) target_profiling(mod, *arg_list, target=target_name, repeat_time=attrs["repeat_times"]) return inputs, outputs, expect, res
def sqrt_run(shape, dtype, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(sqrt, [shape], [dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, input, output = gen_data(dtype, shape) return mod, expect, (input, output) else: return mod else: expect, input, output = gen_data(dtype, shape) mod = utils.op_build_test(sqrt, [shape], [dtype], kernel_name='sqrt', attrs=attrs) output = utils.mod_launch(mod, (input, output), expect=expect) if attrs.get("profiling", False): target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([input, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return input, output, expect, compare_tensor(output, expect, rtol=5e-03, equal_nan=True)
def csrmv_run(shape1, dtype1, shape2, dtype2, poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} if attrs["target"] == "cuda": attrs["enable_akg_reduce_lib"] = True attrs["enable_atomic_add"] = True data, indices, indptr, weight, expect = gen_data(shape1, dtype1, shape2, dtype2) attrs["is_csr"] = True mod = utils.op_build_test(csr_mv, [data.shape, indices.shape, indptr.shape, weight.shape], ["float32", "int32", "int32", "float32"], polyhedral=poly_sch, attrs=attrs, kernel_name='csrmv') output_shape = expect.shape output = np.zeros(output_shape, dtype="float32") output = utils.mod_launch(mod, (data, indices, indptr, weight, output), expect=expect) res = compare_tensor(output, expect, rtol=5e-3, atol=1e-8) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: args_list = to_tvm_nd_array([data, indices, indptr, weight, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return (data, indices, indptr, weight), output, expect, res
def tensor_scatter_add_run(data_shape, data_type, indices_shape, indices_type, axis, poly_sch=True, attrs=None): op_attrs = [axis] default_attrs = {"target": "cuda"} if attrs: default_attrs.update(attrs) if len(indices_shape) > 1: updates_shape = indices_shape[:-1] + data_shape[indices_shape[-1]:] else: updates_shape = indices_shape + data_shape[1:] mod = utils.op_build_test(tensor_scatter_add, [data_shape, indices_shape, updates_shape], [data_type, indices_type, data_type], attrs=default_attrs, kernel_name="tensor_scatter_add", polyhedral=poly_sch) # gen data indices_shape = indices_shape + (1, ) if len( indices_shape) == 1 else indices_shape params, indices, updates, expect = gen_data(data_shape, data_type, indices_shape, indices_type) output_shape = expect.shape if len(expect.shape) == 0: output_shape = (1, ) output = np.zeros(output_shape, expect.dtype) output = utils.mod_launch(mod, (params, indices, updates, output), expect=expect) atol, rtol = get_rtol_atol("tensor_scatter_add", data_type) res = compare_tensor(output, expect, rtol=rtol, atol=atol) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: params, indices, updates, output = to_tvm_nd_array( [params, indices, updates, output], akg.tvm.context(target_name, 0)) target_profiling(mod, params, indices, updates, output, target=target_name, repeat_time=attrs["repeat_times"]) return (params, indices, updates), output, expect, res
def batch_matmul_run(shape1, shape2, dtype, out_dtype="float32", layout1="NHDT", layout2="NHDT", layout_out="NHDT", shape_bias=None, add_bias=False, tensor_core=True, poly_sch=True, attrs=None): op_attrs = [out_dtype, layout1, layout2, layout_out, tensor_core, add_bias] default_attrs = attrs if not attrs: default_attrs = {"target": "cuda"} if default_attrs["target"] == "cuda" and tensor_core: default_attrs.update({ "pragma_enable_matmul": True, "enable_auto_inline": False }) elif default_attrs["target"] == "llvm": if "pragma_enable_matmul" not in default_attrs.keys(): default_attrs["pragma_enable_matmul"] = True if "feature" not in default_attrs.keys(): default_attrs["feature"] = "avx" mod = utils.op_build_test(BatchMatMul, (shape1, shape2, shape_bias), (dtype, dtype, out_dtype), op_attrs=op_attrs, attrs=default_attrs, polyhedral=poly_sch, kernel_name="batch_matmul") lhs, rhs, bias, output, expect = gen_data(shape1, shape2, dtype, out_dtype, layout1, layout2, layout_out, shape_bias, add_bias) args = (lhs, rhs, bias, output) output = utils.mod_launch(mod, args, expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) target_name = default_attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: args = to_tvm_nd_array(args, akg.tvm.context(target_name, 0)) target_profiling(mod, *args, target=target_name, repeat_time=attrs["repeat_times"]) return (lhs, rhs, bias), output, expect, res
def fused_gather_gather_add_mul_max_exp_scatter_add_run( input1_shape, input2_shape, input3_shape, input4_shape, data_dtype, indices_type, axis, poly_sch=True, attrs=None): op_attrs = [axis] default_attrs = {"target": "cuda"} if attrs: default_attrs.update(attrs) mod = utils.op_build_test( fused_gather_gather_add_mul_max_exp_scatter_add, [input1_shape, input2_shape, input3_shape, input4_shape], [data_dtype, indices_type, data_dtype, indices_type], op_attrs=op_attrs, attrs=default_attrs, polyhedral=poly_sch, kernel_name="fused_gather_gather_add_mul_max_exp_scatter_add", ) # gen data input1, input2, input3, input4, expect1, expect2 = gen_data( input1_shape, input2_shape, input3_shape, input4_shape, data_dtype, indices_type, axis) output1 = np.zeros(expect1.shape, expect1.dtype) output2 = deepcopy(input1) output1, output2 = utils.mod_launch( mod, (input1, input2, input3, input4, output1, output2), outputs=(-2, -1)) atol, rtol = get_rtol_atol( "fused_gather_gather_add_mul_max_exp_scatter_add", data_dtype) res = compare_tensor(output1, expect1, rtol=rtol, atol=atol) res &= compare_tensor(output2, expect2, rtol=rtol, atol=atol) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: inputs = to_tvm_nd_array( [input1, input2, input3, input4, output1, output2], akg.tvm.context(target_name, 0)) target_profiling(mod, *inputs, target=target_name, repeat_time=attrs["repeat_times"]) return (input1, input2, input3, input4), (output1, output2), (expect1, expect2), res
def conv_fusion_run(shape_data, shape_filter1, shape_filter2, stride1, stride2, padding1, padding2, dilation1, dilation2, dtype, out_dtype="float32", poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} op_attrs = [stride1, stride2, padding1, padding2, dilation1, dilation2] attrs.update({ "enable_auto_fuse": False, "shared_memory_tensors": "out input_1 input_2 input_3", "pragma_disable_loop_fusion": True, "dim": "3 0 1 1 3 1 1 1 3 2 4 4 3 3 52 52 3 4 64 64" }) mod = utils.op_build_test(ConvFusion, (shape_data, shape_filter1, shape_filter2), (dtype, dtype, dtype), op_attrs=op_attrs, attrs=attrs, polyhedral=poly_sch, kernel_name="conv_fusion_auto") data, weight1, weight2, output, expect = fusion_gen_data( shape_data, shape_filter1, shape_filter2, stride1, stride2, padding1, padding2, dilation1, dilation2, dtype, out_dtype) args = (data, weight1, weight2, output) output = utils.mod_launch(mod, args, expect=expect) res = np.allclose(output, expect, rtol=5e-3, atol=1.e-8) print("Test {}".format("Pass")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: data, weight1, weight2, output = to_tvm_nd_array( [data, weight1, weight2, output], akg.tvm.context(target_name, 0)) target_profiling(mod, data, weight1, weight2, output, target=target_name, repeat_time=attrs["repeat_times"]) return (data, weight1, weight2), output, expect, res
def csr_reduce_sum_run(shape, dtype1, dtype2, axis, nnz=-1, poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} if attrs["target"] == "cuda": attrs["enable_akg_reduce_lib"] = True attrs["enable_atomic_add"] = True op_attrs = [axis, shape] # gen data data, col_idx, row_idx, expect = gen_data(shape, dtype1, dtype2, axis, nnz=nnz) output_shape = expect.shape attrs["is_csr"] = True mod = utils.op_build_test(csr_reduce_sum, [data.shape, col_idx.shape, row_idx.shape], [dtype1, dtype2, dtype2], op_attrs=op_attrs, polyhedral=poly_sch, attrs=attrs, kernel_name="csr_reduce_sum") if len(expect.shape) == 0: output_shape = (1, ) output = np.zeros(output_shape, expect.dtype) output = utils.mod_launch(mod, (data, col_idx, row_idx, output), expect=expect) atol, rtol = get_rtol_atol("csr_reduce_sum", dtype1) res = compare_tensor(output, expect, rtol=rtol, atol=atol) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: args_list = to_tvm_nd_array([data, col_idx, row_idx, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return (data, col_idx, row_idx), output, expect, res
def cast_run(shape, srcType, dstType, attrs={}): op_attrs = [dstType] if attrs.get("dynamic"): attrs["enable_double_buffer"] = False var_shape = [] for i in range(len(shape)): var_shape.append(tvm.var("I" + str(i))) build_shape = var_shape else: build_shape = shape if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(Cast, [build_shape], [srcType], op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: args, exp_output, input = gen_data(dstType, shape, srcType) return mod, exp_output, args else: return mod else: mod = utils.op_build_test(Cast, [build_shape], [srcType], op_attrs, kernel_name='cast', attrs=attrs) args, exp_output, input = gen_data(dstType, shape, srcType) if attrs.get("dynamic"): for i in range(len(shape)): args.append(shape[i]) block_dim = compute_blockdim(shape) args.append(block_dim) acu_output = utils.mod_launch(mod, args, outputs=(1, ), expect=exp_output) # compare result rtol, atol = get_rtol_atol("cast", dstType) TestCase_Result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) if attrs.get("profiling", False): target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array(args, akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return input, acu_output, exp_output, TestCase_Result
def equal_run(shapes, dtype, kernel_name="equal", attrs_op={}, cce_path="./", attrs={}): attrs.update(attrs_op) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(Equal, shapes, [dtype, dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: benchMark1, inputs1, output1 = gen_data(dtype, shapes) return mod, benchMark1, inputs1 + [output1] else: return mod else: mod = utils.op_build_test(Equal, shapes, [dtype, dtype], kernel_name=kernel_name, attrs=attrs) benchMark1, inputs1, output1 = gen_data(dtype, shapes) output1 = utils.mod_launch(mod, inputs1 + [output1], expect=benchMark1) if attrs.get("profiling", False): import akg target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([inputs1, output1], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) # Also test the case where the inputs are equal if shapes[0] == shapes[1]: inputs2 = [] inputs2.append(inputs1[0]) inputs2.append(inputs1[0]) benchMark2 = np.equal(inputs2[0], inputs2[1]) output2 = np.full(benchMark2.shape, 0, bool) output2 = utils.mod_launch(mod, inputs2 + [output2], expect=benchMark1) testPass = (np.array_equal(output1, benchMark1) and np.array_equal(output2, benchMark2)) return (inputs1, inputs2), (output1, output2), (benchMark1, benchMark2), testPass else: return inputs1, output1, benchMark1, np.array_equal( output1, benchMark1)
def get_result(desc, poly, attrs=None, profiling=True, need_compare=True): backend = _get_backend(desc) mod = composite.build(desc, attrs, poly=poly) if not need_compare: return True input_for_mod, expect, output_indexes = gen_json_data(desc) output = utils.mod_launch(mod, input_for_mod, output_indexes) # In profiling mode, mod_launch will return compute outputs and profiling value, only compute outputs needed here if isinstance(output, tuple) and len(output) > 0 and isinstance( output[-1], dict): output = output[0] output = output if isinstance(output, (list, tuple)) else [output] expect = expect if isinstance(expect, (list, tuple)) else [expect] output = list(output) expect = list(expect) for i, _ in enumerate(expect): if expect[i].dtype == "complex128" or expect[i].dtype == "complex64": final_shape = functools.reduce(lambda x, y: x * y, output[i].shape) flattern_output = output[i].reshape((final_shape, )) output_real = [] output_imag = [] for k, _ in enumerate(flattern_output): if k % 2 == 0: output_real.append(flattern_output[k]) else: output_imag.append(flattern_output[k]) output[i] = np.vectorize(complex)(output_real, output_imag) output[i] = output[i].reshape(expect[i].shape) if len(output) != len(expect): raise RuntimeError( "output and expect have different length, {} vs {}".format( len(output), len(expect))) compare_tolerance = get_compare_tolerance(desc, output_indexes) compare_res = list(map(_compare_func, output, expect, compare_tolerance)) if not all(compare_res): source = (mod.imported_modules[0] if backend == "cuda" else mod).get_source() logging.debug(source) _dump_info(desc, attrs, poly, input_for_mod, output, expect) logging.warning("Compare results: %s", str(compare_res)) return False if profiling and backend in ["cuda", "cpu"]: ctx = tvm.context(backend, 0) has_complex = False for i in input_for_mod: if i.dtype == "complex64" or i.dtype == "complex128": has_complex = True break if has_complex == False: inputs = to_tvm_nd_array(input_for_mod, ctx) target_profiling(mod, *inputs, target=backend, repeat_time=1000) return True
def cumsum_run(shape, dtype, axis=0, exclusive=False, reverse=False, poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} def cumsum(data): op_attrs = { "axis": axis if isinstance(axis, list) else [axis], "exclusive": exclusive, "reverse": reverse } return cumsum_ir_builder([ data, ], op_attrs) mod = utils.op_build_test(cumsum, [shape], [dtype], kernel_name="cumsum", polyhedral=poly_sch, attrs=attrs) data, output, expect = gen_data(shape, dtype, axis, exclusive, reverse) output = utils.mod_launch(mod, (data, output), expect=expect) ret = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8, equal_nan=True) print("Test {}".format("Pass" if ret else "Failed")) target_name = attrs["target"].split()[0] if not ret: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: args_list = to_tvm_nd_array([data, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return data, output, expect, ret
def abs_run(shape, dtype, attrs={}): # Result_Numpy input_shape = [shape] input_dtype = [dtype] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(Abs, input_shape, input_dtype, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: exp_output, inputs, output = gen_date(dtype, shape) return mod, exp_output, (inputs, output) else: return mod else: mod = utils.op_build_test(Abs, input_shape, input_dtype, kernel_name='abs', attrs=attrs) exp_output, inputs, output = gen_date(dtype, shape) acu_output = utils.mod_launch(mod, (inputs, output), expect=exp_output) # compare result rtol, atol = get_rtol_atol("abs", dtype) TestCase_Result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) target_name = attrs["target"].split()[0] if attrs.get("profiling", False): target_name = attrs["target"].split()[0] data, output = to_tvm_nd_array([inputs, output], akg.tvm.context(target_name, 0)) target_profiling(mod, data, output, target=target_name, repeat_time=attrs["repeat_times"]) return inputs, acu_output, exp_output, TestCase_Result
def fused_bn_update_run(in_shape, dtype="float32", c1=(1 / (256 * 7 * 7)), c2=1.001e-05, c3=1.00007975, c4=0.100000024, poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} inputs = gen_data(in_shape, dtype) expect = compute_expect(inputs, c1, c2, c3, c4) op_attrs = [dtype, c1, c2, c3, c4] shapes = [in_shape] * 4 dtypes = [dtype] * 4 mod = utils.op_build_test(fused_bn_update, shapes, dtypes, kernel_name="fused_bn_update", op_attrs=op_attrs, polyhedral=poly_sch, attrs=attrs) outputs = [np.full(in_shape, np.nan, dtype)] * 3 attrs_list = inputs + outputs output = utils.mod_launch(mod, attrs_list, outputs=(range(-len(outputs), 0)), expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: attrs_list = to_tvm_nd_array(attrs_list, akg.tvm.context(target_name, 0)) target_profiling(mod, *attrs_list, target=target_name, repeat_time=attrs["repeat_times"]) return inputs, outputs, expect, res
def one_hot_run(shape, depth, dtype, on_value, off_value, axis, poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cce"} if attrs["target"] == CCE: return one_hot_ascend(shape, depth, dtype, on_value, off_value, axis, attrs) mod = utils.op_build_test( one_hot, [shape], [dtype], kernel_name="one_hot", op_attrs=[on_value, off_value, depth, axis, dtype], polyhedral=poly_sch, attrs=attrs) # gen data expect, data_tmp, _, _, output = gen_data(axis, depth, dtype, shape, on_value, off_value) data = data_tmp.astype(dtype) output = utils.mod_launch(mod, (data, output), expect=expect) res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8, equal_nan=True) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([data, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return data, output, expect, res
def fused_bn_reduce_grad_run(in_shape, layout='NHWC', in_dtype="float16", out_dtype='float16', poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} if layout != "NHWC" and layout != "NCHW": raise NotImplementedError('Layout not supported {} '.format(layout)) inter_dtype = 'float32' inputs, output, expect = gen_data(in_shape, in_dtype, inter_dtype, layout, out_dtype) input_shape_list = [i.shape for i in inputs] input_dtype_list = [inter_dtype] * 3 + [in_dtype ] + [inter_dtype] * 3 + [in_dtype] op_attrs = [layout, out_dtype] mod = utils.op_build_test(fused_bn_reduce_grad, input_shape_list, input_dtype_list, kernel_name="fused_bn_reduce_grad", op_attrs=op_attrs, polyhedral=poly_sch, attrs=attrs) outputs = [output] arglist = inputs + outputs output = utils.mod_launch(mod, arglist, expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: arglist = to_tvm_nd_array(arglist, akg.tvm.context(target_name, 0)) target_profiling(mod, *arglist, target=target_name, repeat_time=attrs["repeat_times"]) return inputs, outputs, expect, res
def gather_run(shape1, dtype1, shape2, dtype2, axis, poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} op_attrs = [axis] mod = utils.op_build_test(gather, [shape1, shape2], [dtype1, dtype2], op_attrs=op_attrs, polyhedral=poly_sch, attrs=attrs, kernel_name="gather") # gen data params, indices, expect = gen_data(shape1, dtype1, shape2, dtype2, axis) output_shape = expect.shape if len(expect.shape) == 0: output_shape = (1, ) output = np.zeros(output_shape, expect.dtype) output = utils.mod_launch(mod, (params, indices, output), expect=expect) atol, rtol = get_rtol_atol("gather", dtype1) res = compare_tensor(output, expect, rtol=rtol, atol=atol) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: params, indices, output = to_tvm_nd_array([params, indices, output], akg.tvm.context( target_name, 0)) target_profiling(mod, params, indices, output, target=target_name, repeat_time=attrs["repeat_times"]) return (params, indices), output, expect, res
def reduce_max_run(shape, dtype, axis, keepdims, kernel_name="reduce_max", attrs=None): """run function for dsl function reduce_max""" if attrs is None: attrs = {} op_attrs = [axis, keepdims] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(reduce_max, [shape], [dtype], op_attrs=op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, inputs, output = gen_data(axis, dtype, keepdims, shape) return mod, expect, (inputs, output) return mod mod = utils.op_build_test(reduce_max, [shape], [dtype], op_attrs=op_attrs, kernel_name=kernel_name, attrs=attrs) expect, inputs, output = gen_data(axis, dtype, keepdims, shape) output = utils.mod_launch(mod, (inputs, output), expect=expect) rtol, atol = get_rtol_atol("reduce_max", dtype) if attrs.get("profiling", False): import akg target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([inputs, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return inputs, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def unsorted_segment_sum_run_others(data_shape, data_type, indices_shape, indices_type, num, attrs=None): mod = unsortedsegmentsum_compile(data_shape, indices_shape, num, data_type, attrs, kernel_name='unsortedsegmentsum_run', tuning=False) # gen data input1, input2, expect = gen_data(data_shape, data_type, indices_shape, indices_type, num) output_shape = expect.shape if len(expect.shape) == 0: output_shape = (1, ) #output = np.full(output_shape, np.nan, expect.dtype) output = np.zeros(output_shape, expect.dtype) output = utils.mod_launch(mod, (input1, input2, output), expect=expect) atol, rtol = get_rtol_atol("unsorted_segment_sum", data_type) res = compare_tensor(output, expect, rtol=rtol, atol=atol) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: input1, input2, output = to_tvm_nd_array([input1, input2, output], akg.tvm.context( target_name, 0)) target_profiling(mod, input1, input2, output, target=target_name, repeat_time=attrs["repeat_times"]) return (input1, input2), output, expect, res
def assign_run(ref_shape, val_shape, dtype, kernel_name="assign", attrs_op={}, cce_path="./", attrs={}): attrs.update(attrs_op) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(Assign, [ref_shape, val_shape], [dtype, dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: ref, val, expect = gen_data(dtype, ref_shape, val_shape) return mod, expect, (ref, val) else: return mod else: ref, val, expect = gen_data(dtype, ref_shape, val_shape) mod = utils.op_build_test(Assign, [ref_shape, val_shape], [dtype, dtype], kernel_name=kernel_name, attrs=attrs) fake_output = np.full(val_shape, np.nan, dtype) result, _ = utils.mod_launch(mod, (ref, val, fake_output), outputs=(0, -1), expect=expect) if attrs.get("profiling", False): target_name = attrs["target"].split()[0] ref, val, output = to_tvm_nd_array([ref, val, fake_output], akg.tvm.context(target_name, 0)) target_profiling(mod, ref, val, output, target=target_name, repeat_time=attrs["repeat_times"]) return (ref, val), result, expect, compare_tensor(result, expect, atol=5e-01, rtol=5e-03, equal_nan=True)
def reduce_sum_run(shape, reduce_axis, keepdims, dtype, attrs): if attrs is None: attrs = {} if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = sum_compile(shape, reduce_axis, keepdims, dtype, attrs, kernel_name=kernel_name, tuning=t) if t: expect, input1, output = gen_data(dtype, keepdims, reduce_axis, shape) return mod, expect, (input1, output) else: return mod else: # op_attrs = [reduce_axis, keepdims] mod = sum_compile(shape, reduce_axis, keepdims, dtype, attrs) expect, input1, output = gen_data(dtype, keepdims, reduce_axis, shape) args = [input1, output] if attrs.get("dynamic"): for i in range(len(shape)): args.append(shape[i]) block_dim = compute_blockdim(shape) args.append(block_dim) output = utils.mod_launch(mod, args, outputs=(1, ), expect=expect) if attrs.get("profiling", False): import akg target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([input1, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, epeat_time=attrs["repeat_times"]) rtol, atol = get_rtol_atol("sum", dtype) return input1, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def log_run(shape, dtype, kernel_name, attrs_op=None, attrs=None): input_shape = [shape] input_dtype = [dtype] if attrs_op is not None: if attrs is not None: attrs.update(attrs_op) else: attrs = attrs_op if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(log, input_shape, input_dtype, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, input_, output = gen_data(dtype, shape) return mod, expect, (input_, output) else: return mod else: mod = utils.op_build_test(log, input_shape, input_dtype, kernel_name=kernel_name, attrs=attrs) expect, input_, output = gen_data(dtype, shape) output = utils.mod_launch(mod, (input_, output), expect=expect) rtol, atol = get_rtol_atol("log", dtype) if attrs.get("profiling", False): target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([input_, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return input_, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def conv_run(shape_data, shape_weight, stride=(1,1), padding=(0,0,0,0), dilation=(1,1), dtype="float16", out_dtype="float16", layout="NHWC", tensor_core=True, poly_sch=True, attrs=None): if layout != "NHWC" and layout != "NCHW": raise ValueError("Layout NHWC and NCHW supported") use_tensor_core = False if tensor_core and layout == "NHWC" and dtype == "float16": use_tensor_core = True op_attrs = [stride, padding, dilation] default_attrs = {"target": "cuda", "enable_auto_fuse": False} if attrs: default_attrs.update(attrs) if use_tensor_core: op_attrs += [out_dtype] default_attrs.update({"pragma_enable_matmul": True, "pragma_enable_conv_tensor_core": True}) if poly_sch: mod = utils.op_build_test( TensorcoreConv, (shape_data, shape_weight), (dtype, dtype), op_attrs=op_attrs, attrs=default_attrs, kernel_name="tensorcore_conv_auto") elif poly_sch: mod = utils.op_build_test(Conv, (shape_data, shape_weight), (dtype, dtype), op_attrs=op_attrs, attrs=default_attrs, kernel_name="conv_auto") data, weight, output, expect = gen_data( shape_data, shape_weight, layout, stride, padding, dilation, dtype, out_dtype) args = (data, weight, output) output = utils.mod_launch(mod, args, expect=expect) rtol = 1e-3 if dtype == "float16" else 1e-4 atol = 1e-3 if dtype == "float16" else 1e-4 res = np.allclose(output, expect, rtol=rtol, atol=atol) print("Test {}".format("Pass" if res else "Fail")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: data, weight, output = to_tvm_nd_array( [data, weight, output], akg.tvm.context(target_name, 0)) target_profiling(mod, data, weight, output, target=target_name, repeat_time=attrs["repeat_times"]) return (data, weight), output, expect, res