def test_fused_mul_div_rsqrt_mul_isfinite_red(shape, dtype='float32', poly_sch=False): input = gen_data(shape, dtype) expect = compute_expect(input) input_shape = [shape, shape] input_dtype = [dtype, dtype] if poly_sch: mod = utils.op_build_test( fused_mul_div_rsqrt_mul_isfinite_red, input_shape, input_dtype, kernel_name="fused_mul_div_rsqrt_mul_isfinite_red", attrs={"target": "cuda"}) outputs = [np.full( (1, ), False, 'bool')] + [np.full(shape, np.nan, dtype)] * 3 output = utils.mod_launch(mod, [*input, *outputs], outputs=list(range(-len(outputs), 0)), expect=expect) ret = compare_tensor(output[0], expect[0], rtol=5e-03, atol=1.e-08) ret &= compare_tensor(output[1], expect[1], rtol=5e-03, atol=1.e-08) ret &= compare_tensor(output[2], expect[2], rtol=5e-03, atol=1.e-08) ret &= compare_tensor(output[3], expect[3], rtol=5e-03, atol=1.e-08) print("Test {}".format("Pass" if ret else "Failed")) if not ret: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") data = to_tvm_nd_array(input) expect = to_tvm_nd_array(expect) gpu_profiling(mod, *data, *expect, 400)
def globalavgpool_run(n, c, h, w, pool_type, dtype, attrs): dshape = (n, c, h, w) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(globalavgpool.globalavgpool, [dshape], [pool_type], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: args, exp_output, input = gen_data(c, dshape, dtype, h, n, pool_type, w) return mod, exp_output, args else: return mod else: # Result_globalavgpool mod = globalavgpool.globalavgpool(n, c, h, w, pool_type, attrs) args, exp_output, input = gen_data(c, dshape, dtype, h, n, pool_type, w) acu_output = utils.mod_launch(mod, args, expect=exp_output) # compare result TestCase_Result = compare_tensor(acu_output, exp_output, rtol=5e-03, equal_nan=True) return input, acu_output, exp_output, TestCase_Result """
def test_ms_resize_grad(shape, size, dtype, align_corners, poly_sch=False): op_attr = [size, align_corners] if poly_sch: mod = utils.op_build_test( resize_nearest_neighbor_grad_auto, [shape], [dtype], op_attr, kernel_name="resize_nearest_neighbor_grad_auto", attrs={"target": "cuda"}) else: mod = utils.op_build_test( resize_nearest_neighbor_grad_manual, [shape], [dtype], op_attr, kernel_name="resize_nearest_neighbor_grad_manual") data, output, expect = gen_data(shape, size, align_corners, dtype) output = utils.mod_launch(mod, (data, output), expect=expect) compare_res = compare_tensor(output, expect, rtol=5e-03, atol=1e-08)
def apply_ftrl_run(shape, dtype, attrs=None): """run function for dsl function apply_ftrl.""" scalar_shape = (1, ) var_shape, accum_shape, linear_shape, grad_shape = [shape] * 4 lr_shape, l1_shape, l2_shape, lr_power_shape = [scalar_shape] * 4 shapes = [ var_shape, accum_shape, linear_shape, grad_shape, lr_shape, l1_shape, l2_shape, lr_power_shape ] dtypes = [dtype] * 9 mod = utils.op_build_test(apply_ftrl, shapes, dtypes, kernel_name='apply_ftrl', attrs=attrs) expects, (var, accum, linear, grad), (lr, l1, l2, lr_power) = gen_data(dtype, shape) outputs = utils.mod_launch( mod, (var, accum, linear, grad, lr, l1, l2, lr_power), outputs=(0, 1, 2)) rtol, atol = get_rtol_atol("apply_ftrl", dtype) compare_result = list( map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), outputs, expects)) inputs = (var, accum, linear, grad, lr, l1, l2, lr_power) return inputs, outputs, expects, all(compare_result)
def test_ms_equal(shapes, dtype, poly_sch=False): if poly_sch: mod = utils.op_build_test(equal, shapes, [dtype, dtype], kernel_name="equal", attrs={"target": "cuda"}) inputs1, output1, expect1 = gen_data(shapes, dtype) output1 = utils.mod_launch(mod, (*inputs1, output1), expect=expect1) if shapes[0] == shapes[1]: inputs2 = [] inputs2.append(inputs1[0]) inputs2.append(inputs1[0]) expect2 = np.equal(inputs2[0], inputs2[1]) output2 = np.full(expect2.shape, 0, bool) output2 = utils.mod_launch(mod, (*inputs2, output2), expect=expect1) res = np.allclose(output1, expect1, rtol=5e-03, atol=1.e-8) and np.allclose(output2, expect2, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") return True else: res = np.allclose(output1, expect1, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") return True
def logprob_ad_run(shape, dtype, kernel_name="", attrs=None): expects, head, x, mean, scale, outputs = gen_data(dtype, shape) mod = utils.op_build_test( distr_normal_diag_logprob_ad.normal_diag_logprob_ad, [head.shape, x.shape, mean.shape, scale.shape], [dtype, dtype, dtype, dtype], kernel_name=kernel_name, op_attrs=None, attrs=None, log_code=True, dump_code=True, polyhedral=True, ) outputs = utils.mod_launch(mod, [head, x, mean, scale, *outputs], outputs=tuple(range(-len(outputs), 0)), expect=expects) outputs = list(outputs) result = True for i in range(len(outputs)): result &= compare_tensor(outputs[i], expects[i], rtol=5e-03, equal_nan=True) return (head, x, mean, scale), outputs, expects, result
def reduce_max_run(shape, dtype, axis, keepdims, kernel_name="reduce_max", attrs=None): """run function for dsl function reduce_max""" if attrs is None: attrs = {} op_attrs = [axis, keepdims] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(reduce_max, [shape], [dtype], op_attrs=op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, inputs, output = gen_data(axis, dtype, keepdims, shape) return mod, expect, (inputs, output) return mod mod = utils.op_build_test(reduce_max, [shape], [dtype], op_attrs=op_attrs, kernel_name=kernel_name, attrs=attrs) expect, inputs, output = gen_data(axis, dtype, keepdims, shape) output = utils.mod_launch(mod, (inputs, output), expect=expect) rtol, atol = get_rtol_atol("reduce_max", dtype) if attrs.get("profiling", False): import akg target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([inputs, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return inputs, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def softmax_ad_run(shape, dtype, axis, kernel_name, optimized, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) if optimized: mod = utils.op_build_test(softmax_ad.softmax_ad_optimized, [shape, shape], [dtype, dtype], kernel_name=kernel_name, op_attrs=[axis], attrs=attrs, tuning=t) else: mod = utils.op_build_test(softmax_ad.softmax_ad, [shape, shape], [dtype, dtype], kernel_name=kernel_name, op_attrs=[axis], attrs=attrs, tuning=t) if t: expect, input_head, inputs, output = gen_data(dtype, shape) return mod, expect, (input_head, inputs, output) else: return mod else: expect, input_head, inputs, output = gen_data(dtype, shape) if optimized: mod = utils.op_build_test(softmax_ad.softmax_ad_optimized, [shape, shape], [dtype, dtype], kernel_name="softmax_ad_optimized", op_attrs=[axis], attrs=attrs) else: mod = utils.op_build_test(softmax_ad.softmax_ad, [shape, shape], [dtype, dtype], kernel_name="softmax_ad", op_attrs=[axis], attrs=attrs) print(mod.imported_modules[0].get_source()) output = utils.mod_launch(mod, [input_head, inputs, output], expect=expect) return [input_head, inputs], output, expect, np.allclose(output, expect, rtol=5e-03, atol=0.1, equal_nan=True)
def test_fused_relu_grad_bn_update_grad(shape, out_shape, dtype="float16", layout="NHWC", out_dtype="float32", poly_sch=False): shape_list = [out_shape, shape, shape, shape] dtype_list = [out_dtype, dtype, dtype, dtype] op_attrs = [layout] if poly_sch: mod = utils.op_build_test( fused_relu_grad_bn_update_grad_auto, shape_list, dtype_list, op_attrs=op_attrs, kernel_name="fused_relu_grad_bn_update_grad_auto", attrs={ "target": "cuda", "register_memory_depth": 3 }) else: mod = utils.op_build_test( fused_relu_grad_bn_update_grad_manual, shape_list, dtype_list, kernel_name="fused_relu_grad_bn_update_grad_manual", op_attrs=op_attrs) head, data_sum, in_bn, in_active, output, expect = gen_data( shape, out_shape, dtype, out_dtype, layout) outputs = [output, output] inputs = [data_sum, in_bn, head, in_active] arg_list = inputs + outputs outputs = utils.mod_launch(mod, arg_list, outputs=tuple(range(-len(outputs), 0)), expect=expect) res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") inputs = to_tvm_nd_array(inputs) expect = to_tvm_nd_array(expect) gpu_profiling(mod, *inputs, *expect, 400)
def batch_matmul_run(shape1, shape2, dtype, out_dtype="float32", layout1="NHDT", layout2="NHDT", layout_out="NHDT", shape_bias=None, add_bias=False, tensor_core=True, poly_sch=True, attrs=None): op_attrs = [out_dtype, layout1, layout2, layout_out, tensor_core, add_bias] default_attrs = attrs if not attrs: default_attrs = {"target": "cuda"} if default_attrs["target"] == "cuda" and tensor_core: default_attrs.update({ "pragma_enable_matmul": True, "enable_auto_inline": False }) elif default_attrs["target"] == "llvm": if "pragma_enable_matmul" not in default_attrs.keys(): default_attrs["pragma_enable_matmul"] = True if "feature" not in default_attrs.keys(): default_attrs["feature"] = "avx" mod = utils.op_build_test(BatchMatMul, (shape1, shape2, shape_bias), (dtype, dtype, out_dtype), op_attrs=op_attrs, attrs=default_attrs, polyhedral=poly_sch, kernel_name="batch_matmul") lhs, rhs, bias, output, expect = gen_data(shape1, shape2, dtype, out_dtype, layout1, layout2, layout_out, shape_bias, add_bias) args = (lhs, rhs, bias, output) output = utils.mod_launch(mod, args, expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) target_name = default_attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: args = to_tvm_nd_array(args, akg.tvm.context(target_name, 0)) target_profiling(mod, *args, target=target_name, repeat_time=attrs["repeat_times"]) return (lhs, rhs, bias), output, expect, res
def fused_gather_gather_add_mul_max_exp_scatter_add_run( input1_shape, input2_shape, input3_shape, input4_shape, data_dtype, indices_type, axis, poly_sch=True, attrs=None): op_attrs = [axis] default_attrs = {"target": "cuda"} if attrs: default_attrs.update(attrs) mod = utils.op_build_test( fused_gather_gather_add_mul_max_exp_scatter_add, [input1_shape, input2_shape, input3_shape, input4_shape], [data_dtype, indices_type, data_dtype, indices_type], op_attrs=op_attrs, attrs=default_attrs, polyhedral=poly_sch, kernel_name="fused_gather_gather_add_mul_max_exp_scatter_add", ) # gen data input1, input2, input3, input4, expect1, expect2 = gen_data( input1_shape, input2_shape, input3_shape, input4_shape, data_dtype, indices_type, axis) output1 = np.zeros(expect1.shape, expect1.dtype) output2 = deepcopy(input1) output1, output2 = utils.mod_launch( mod, (input1, input2, input3, input4, output1, output2), outputs=(-2, -1)) atol, rtol = get_rtol_atol( "fused_gather_gather_add_mul_max_exp_scatter_add", data_dtype) res = compare_tensor(output1, expect1, rtol=rtol, atol=atol) res &= compare_tensor(output2, expect2, rtol=rtol, atol=atol) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: inputs = to_tvm_nd_array( [input1, input2, input3, input4, output1, output2], akg.tvm.context(target_name, 0)) target_profiling(mod, *inputs, target=target_name, repeat_time=attrs["repeat_times"]) return (input1, input2, input3, input4), (output1, output2), (expect1, expect2), res
def assign_run(ref_shape, val_shape, dtype, kernel_name="assign", attrs_op={}, cce_path="./", attrs={}): attrs.update(attrs_op) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(Assign, [ref_shape, val_shape], [dtype, dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: ref, val, expect = gen_data(dtype, ref_shape, val_shape) return mod, expect, (ref, val) else: return mod else: ref, val, expect = gen_data(dtype, ref_shape, val_shape) mod = utils.op_build_test(Assign, [ref_shape, val_shape], [dtype, dtype], kernel_name=kernel_name, attrs=attrs) fake_output = np.full(val_shape, np.nan, dtype) result, _ = utils.mod_launch(mod, (ref, val, fake_output), outputs=(0, -1), expect=expect) if attrs.get("profiling", False): target_name = attrs["target"].split()[0] ref, val, output = to_tvm_nd_array([ref, val, fake_output], akg.tvm.context(target_name, 0)) target_profiling(mod, ref, val, output, target=target_name, repeat_time=attrs["repeat_times"]) return (ref, val), result, expect, compare_tensor(result, expect, atol=5e-01, rtol=5e-03, equal_nan=True)
def test_fused_bn_double_follow_relu(in_shape, in_dtype='float16', layout='NHWC', out_dtype='float16', poly_sch=False): if layout != "NHWC" and layout != "NCHW": raise NotImplementedError('Layout not supported {} '.format(layout)) inter_dtype = 'float32' inputs, output, expect = gen_data(in_shape, in_dtype, inter_dtype, layout, out_dtype) input_shape_list = [i.shape for i in inputs] input_dtype_list = [inter_dtype] * 4 + [in_dtype ] + [inter_dtype] * 4 + [in_dtype] op_attrs = [layout, out_dtype] if poly_sch: mod = utils.op_build_test( fused_bn_double_follow_relu_auto, input_shape_list, input_dtype_list, kernel_name="fused_bn_double_follow_relu_auto", op_attrs=op_attrs, attrs={"target": "cuda"}) else: mod = utils.op_build_test( fused_bn_double_follow_relu_manual, input_shape_list, input_dtype_list, kernel_name="fused_bn_double_follow_relu_manual", op_attrs=op_attrs) outputs = [output] arglist = inputs + outputs output = utils.mod_launch(mod, arglist, expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") inputs = to_tvm_nd_array(inputs) expect = to_tvm_nd_array(expect) gpu_profiling(mod, *inputs, expect, 400)
def ones_like_run(shape, dtype, attrs): mod = utils.op_build_test(ones_like.ones_like, [shape], [dtype], kernel_name='ones_like', attrs=attrs) input, expect, output = gen_data(dtype, shape) output = utils.mod_launch(mod, (input, output), expect=expect) rtol, atol = get_rtol_atol("ones_like", dtype) # compare result compare_res = compare_tensor(output, expect, rtol=rtol, atol=atol) return input, output, expect, compare_res
def softplus_ad_run(shape, dtype, kernel_name, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(softplus_ad, [shape, shape], [dtype, dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, head_np, input_np, output = gen_data(dtype, shape) return mod, expect, (head_np, input_np, output) else: return mod else: expect, head_np, input_np, output = gen_data(dtype, shape) mod = utils.op_build_test(softplus_ad, [shape, shape], [dtype, dtype], kernel_name="softplus", attrs=attrs) output = utils.mod_launch(mod, [head_np, input_np, output], expect=expect) rtol, atol = get_rtol_atol("softplus", dtype) return (head_np, input_np), output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def test_ms_maximum(shape1, shape2, dtype, poly_sch=False): if poly_sch: mod = utils.op_build_test(maximum_auto, (shape1, shape2), (dtype, dtype), kernel_name="maximum_auto", attrs={"target": "cuda"}) else: mod = utils.op_build_test(maximum_manual, (shape1, shape2), (dtype, dtype), kernel_name="maximum_manual") lhs, rhs, output, expect = gen_data(shape1, shape2, dtype) args = (lhs, rhs, output) output = utils.mod_launch(mod, args, expect=expect) res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect]) gpu_profiling(mod, lhs, rhs, expect, 400)
def test_expand_dims(shape1, axis, dtype, poly_sch=False): if poly_sch: mod = utils.op_build_test(expand_dims_auto, [shape1], [dtype], op_attrs=[axis], attrs={"target": "cuda"}, kernel_name="expand_dims_auto") else: mod = utils.op_build_test(expand_dims_manual, [shape1], [dtype], op_attrs=[axis], kernel_name="expand_dims_manual") expect, input1, output = gen_data(axis, dtype, shape1) args = (input1, output) output = utils.mod_launch(mod, args, expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") input1, expect = to_tvm_nd_array([input1, expect]) gpu_profiling(mod, input1, expect, 400)
def test_ms_cast(shape, srcType, dstType, poly_sch=False): if poly_sch: mod = utils.op_build_test(cast_auto, [shape], [ srcType], [dstType], attrs={"target": "cuda"}, kernel_name="cast_auto") else: mod = utils.op_build_test(cast_manual, [shape], [srcType], [dstType], kernel_name="cast_manual") output, expect, inputs = gen_data(shape, srcType, dstType) output = utils.mod_launch(mod, (inputs, output), expect=expect) res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8) print("Test {}".format("Pass" if res else "Fail")) if not res: print("Error cuda:========================") print(mod.imported_modules[0].get_source()) raise AssertionError("Test fail") inputs, expect = to_tvm_nd_array([inputs, expect]) gpu_profiling(mod, inputs, expect, 400)
def addn_compile(shape, dtype, n, attrs, kernel_name="addn", tuning=False): shapes = [] for _ in range(n): shapes.append(shape) return utils.op_build_test(Addn, [shapes], [dtype], kernel_name=kernel_name, attrs=attrs, tuning=tuning), shapes
def slice_run(shape, begin, size, dtype, attrs): op_attr = [begin, size] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(op_slice, [shape], [dtype], op_attr, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, input, output = gen_data(begin, dtype, shape, size) return mod, expect, (input, output) else: return mod else: mod = utils.op_build_test(op_slice, [shape], [dtype], op_attr, kernel_name='slice', attrs=attrs) expect, input, output = gen_data(begin, dtype, shape, size) output = utils.mod_launch(mod, (input, output), expect=expect) # unified launch return input, output, expect, compare_tensor(output, expect, rtol=5e-03, equal_nan=True)
def log1p_run(shape, dtype, kernel_name, attrs): """run function for dsl function log1p.""" if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(log1p.log1p, [shape], [dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, inputs, output = gen_data(dtype, shape) return mod, expect, (inputs, output) return mod mod = utils.op_build_test(log1p.log1p, [shape], [dtype], kernel_name=kernel_name, attrs=attrs) expect, inputs, output = gen_data(dtype, shape) output = utils.mod_launch(mod, (inputs, output), expect=expect) return inputs, output, expect, compare_tensor(output, expect, rtol=5e-03, atol=5e-03, equal_nan=True)
def mean_square_run(shape, reduce_axis, keepdims, dtype, attrs): op_attrs = [reduce_axis, keepdims] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(mean_square.mean_square, [shape], [dtype], op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, input1, output = gen_data(dtype, keepdims, reduce_axis, shape) return mod, expect, (input1, output) else: return mod else: mod = utils.op_build_test(mean_square.mean_square, [shape], [dtype], op_attrs, attrs=attrs) expect, input1, output = gen_data(dtype, keepdims, reduce_axis, shape) output = utils.mod_launch(mod, (input1, output), expect=expect) return input1, output, expect, compare_tensor(output, expect, rtol=5e-3, atol=5e-3, equal_nan=True)
def avgpool_run(shape, kernel, stride, strategy, dtype, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(avgpool.avgpool, [shape], [dtype], op_attrs=[kernel, stride, strategy], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, input, output = gen_data(dtype, kernel, shape, strategy, stride) return mod, expect, (input, output) else: return mod else: mod = utils.op_build_test(avgpool.avgpool, [shape], [dtype], op_attrs=[kernel, stride, strategy], kernel_name='avgpool', attrs=attrs) expect, input, output = gen_data(dtype, kernel, shape, strategy, stride) output = utils.mod_launch(mod, [input, output], expect=expect) return input, output, expect, compare_tensor(output, expect, rtol=5e-03, atol=5e-03, equal_nan=True)
def add_ad_run(ashape, bshape, dtype, kernel_name="add", scale=1.0, attrs={}, polyhedral=True): if type(scale) is not float or not int: if type(attrs) is not bool: scale, attrs = 1.0, scale else: scale, attrs, polyhedral = 1.0, scale, attrs if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) a = random_gaussian(ashape, miu=1, sigma=0.1).astype(dtype) b = random_gaussian(bshape, miu=1, sigma=0.1).astype(dtype) out = np.add(a, b * scale) mod = utils.op_build_test(add_ad, [out.shape, ashape, bshape], [dtype, dtype, dtype], kernel_name=kernel_name, op_attrs=[scale], attrs=attrs, polyhedral=polyhedral, tuning=t) if t: expect, head_np, output = gen_data(dtype, out) return mod, expect, (head_np, a, b, output) else: return mod else: a = random_gaussian(ashape, miu=1, sigma=0.1).astype(dtype) b = random_gaussian(bshape, miu=1, sigma=0.1).astype(dtype) out = np.add(a, b * scale) mod = utils.op_build_test(add_ad, [out.shape, ashape, bshape], [dtype, dtype, dtype], kernel_name='add_ad', op_attrs=[scale], attrs=attrs, polyhedral=polyhedral) expect, head_np, output = gen_data(dtype, out) output = utils.mod_launch(mod, (head_np, a, b, output), expect=expect) return (head_np, a, b), output, expect, compare_tensor(output, expect, atol=0.1)
def logprob_run(shape, dtype, kernelname="", attrs = None): expect, x, mean, scale, output = gen_data(dtype, shape) mod = utils.op_build_test(logprob_op, [x.shape, mean.shape, scale.shape], [dtype, dtype, dtype], kernel_name=kernelname, op_attrs=[], attrs=None, log_cce=True, dump_cce=True, polyhedral=True) output = utils.mod_launch(mod, [x, mean, scale, output], expect = expect) return (x, mean, scale), output, expect, compare_tensor(output, expect, rtol=5e-03, equal_nan=True)
def conv_fusion_run(shape_data, shape_filter1, shape_filter2, stride1, stride2, padding1, padding2, dilation1, dilation2, dtype, out_dtype="float32", poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} op_attrs = [stride1, stride2, padding1, padding2, dilation1, dilation2] attrs.update({ "enable_auto_fuse": False, "shared_memory_tensors": "out input_1 input_2 input_3", "pragma_disable_loop_fusion": True, "dim": "3 0 1 1 3 1 1 1 3 2 4 4 3 3 52 52 3 4 64 64" }) mod = utils.op_build_test(ConvFusion, (shape_data, shape_filter1, shape_filter2), (dtype, dtype, dtype), op_attrs=op_attrs, attrs=attrs, polyhedral=poly_sch, kernel_name="conv_fusion_auto") data, weight1, weight2, output, expect = fusion_gen_data( shape_data, shape_filter1, shape_filter2, stride1, stride2, padding1, padding2, dilation1, dilation2, dtype, out_dtype) args = (data, weight1, weight2, output) output = utils.mod_launch(mod, args, expect=expect) res = np.allclose(output, expect, rtol=5e-3, atol=1.e-8) print("Test {}".format("Pass")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: data, weight1, weight2, output = to_tvm_nd_array( [data, weight1, weight2, output], akg.tvm.context(target_name, 0)) target_profiling(mod, data, weight1, weight2, output, target=target_name, repeat_time=attrs["repeat_times"]) return (data, weight1, weight2), output, expect, res
def conv_bn1_run(fmap_shape, filter_shape, pad, stride, dilation, use_bias=False, attrs=None): vc_util.convolution_format_check(fmap_shape, filter_shape, pad, stride, dilation) if use_bias: raise ValueError("do not support bias yet !!!") conv_dtype = 'float16' conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation} stride, pad, dilation = conv_param_prepare(conv_param) fm_shape, w_shape, out_shape = conv_shape_4d(fmap_shape, filter_shape, pad, stride, dilation) IN, IC, IH, IW = fm_shape WN, WC, WH, WW = w_shape C0 = 16 input_shape = [(IN, IC // C0, IH, IW, C0), (WC // C0 * WH * WW, WN // 16, 16, C0)] mod = utils.op_build_test(conv_bn1.conv_bn1, [input_shape], [conv_dtype], op_attrs=[ fmap_shape, filter_shape, pad, stride, dilation, use_bias, attrs ], kernel_name='conv_bn1', attrs=attrs) fmap_data, filter_data, bias_data, conv_expect = \ gen_data(fmap_shape, filter_shape, pad, stride, dilation, use_bias) axes = (0, 2, 3) conv_mean = np.mean(conv_expect, axis=axes, keepdims=True) conv_square = np.power(conv_expect, 2) conv_var_part = np.mean(conv_square, axis=axes, keepdims=True) expects = (conv_expect, conv_var_part, conv_mean) out_datas = [np.full(e.shape, 0, 'float16') for e in expects] out_datas[1] = out_datas[1].astype(np.float32) out_datas[2] = out_datas[2].astype(np.float32) in_data = [fmap_data, filter_data] args = in_data for out in out_datas: args.append(out) args = tuple(args) outputs = utils.mod_launch(mod, args, outputs=(-3, -2, -1), expect=expects) rtol, atol = get_rtol_atol("conv_bn1", conv_dtype) cmp_res = list( map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), outputs, expects)) return (fmap_data, filter_data, bias_data), outputs, expects, all(cmp_res)
def common_run(shape, dtype, axis, attrs, method): if attrs is None: attrs = {} attrs["enable_algebra_simplify"] = True if attrs.get("dynamic"): build_shape = [] for i in range(len(shape)): build_shape.append(tvm.var("I" + str(i))) else: build_shape = shape if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) if method is "min": mod = utils.op_build_test(argmin.argmin, [build_shape], [dtype], op_attrs=[axis], kernel_name=kernel_name, attrs=attrs, tuning=t) elif method is "max": mod = utils.op_build_test(argmax.argmax, [build_shape], [dtype], op_attrs=[axis], kernel_name=kernel_name, attrs=attrs, tuning=t) else: raise RuntimeError("not support " + method) if t: args, exp_output, input = gen_data(axis, dtype, method, shape) return mod, exp_output, args else: return mod else: if method is "min": mod = utils.op_build_test(argmin.argmin, [build_shape], [dtype], op_attrs=[axis], kernel_name="argmin", attrs=attrs) elif method is "max": mod = utils.op_build_test(argmax.argmax, [build_shape], [dtype], op_attrs=[axis], kernel_name="argmax", attrs=attrs) else: raise RuntimeError("not support " + method) args, exp_output, input = gen_data(axis, dtype, method, shape) if attrs.get("dynamic"): for i in range(len(shape)): args.append(shape[i]) block_dim = compute_blockdim(shape) args.append(block_dim) res = utils.mod_launch(mod, args, outputs=(1,), expect=exp_output) acu_output = res.astype("int32") rtol, atol = get_rtol_atol("argmax_min_common", dtype) return input, acu_output, exp_output, compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True)
def bounding_box_encode_run(anchor_box_shape, groundtruth_box_shape, anchor_samples_shape, dtype, scale_factors, epsilon, kernel_name, attrs={}): # check_shape: # bachsize assert (groundtruth_box_shape[0] == anchor_samples_shape[0]) # num_archors assert (anchor_box_shape[0] == anchor_samples_shape[1]) assert (not scale_factors or len(scale_factors) == COORDINATES_LEN) and ( anchor_box_shape[-1] == COORDINATES_PAD_LEN) and ( groundtruth_box_shape[-1] == COORDINATES_PAD_LEN) op_attrs = [scale_factors, epsilon] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(bounding_box_encode.bouding_box_encode, [anchor_box_shape, groundtruth_box_shape, anchor_samples_shape], [dtype, dtype, "int32"], op_attrs, kernel_name=kernel_name, attrs=attrs, dump_code=True, tuning=t) if t: anchor_box_data, anchor_samples_data, expect, groundtruth_box_data, output_data = gen_data(anchor_box_shape, anchor_samples_shape, dtype, epsilon, groundtruth_box_shape, scale_factors) return mod, expect, (anchor_box_data, groundtruth_box_data, anchor_samples_data, output_data) else: return mod else: mod = utils.op_build_test(bounding_box_encode.bouding_box_encode, [anchor_box_shape, groundtruth_box_shape, anchor_samples_shape], [dtype, dtype, "int32"], op_attrs, kernel_name=kernel_name, attrs=attrs, dump_code=True) anchor_box_data, anchor_samples_data, expect, groundtruth_box_data, output_data = gen_data(anchor_box_shape, anchor_samples_shape, dtype, epsilon, groundtruth_box_shape, scale_factors) output = utils.mod_launch(mod, (anchor_box_data, groundtruth_box_data, anchor_samples_data, output_data), expect=expect) # compare result compare_result = compare_tensor(output, expect, rtol=5e-3, equal_nan=True) return (anchor_box_data, groundtruth_box_data, anchor_samples_data), output, expect, compare_result
def bn_2_run(shape, dtype, momentum, eps, kernel_name, attrs): """Test run function for second part of splited bn""" in_shapes, in_dtypes = get_compile_param(shape, dtype, 2) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(fused_bn2, in_shapes, in_dtypes, op_attrs=[momentum], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: inputs, output_buffers, expects = gen_data(shape, dtype, momentum, eps, 2) inplace_binds = ((2, 1), (3, 2)) output_places2 = list(range(-len(output_buffers), 0)) if inplace_binds is not None: for bind in inplace_binds: output_places2[bind[1]] = bind[0] return mod, expects, { "args": (*inputs, *output_buffers), 'outputs': output_places2, 'tuning': False} return mod mod_2 = utils.op_build_test(fused_bn2, in_shapes, in_dtypes, op_attrs=[momentum], kernel_name="fusedbn2_"+kernel_name, attrs=attrs) inputs, output_buffers, expects = gen_data(shape, dtype, momentum, eps, 2) inplace_binds = ((2, 1), (3, 2)) output_places2 = list(range(-len(output_buffers), 0)) if inplace_binds is not None: for bind in inplace_binds: output_places2[bind[1]] = bind[0] res_2 = utils.mod_launch(mod_2, [*inputs, *output_buffers], outputs=output_places2, expect=expects) rtol, atol = get_rtol_atol("bn_split", dtype) cmp_res = list(map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), res_2, expects)) return inputs, res_2, expects, all(cmp_res)