def reshape_compile(in_shape, out_shape, dtype, attrs, kernel_name='reshape', tuning=False): if attrs.get("dynamic"): var_shape = [] for i in range(len(in_shape)): var_shape.append(tvm.var("I" + str(i))) build_in_shape = var_shape total_size = reduce(lambda x, y: x * y, var_shape) out_var_shape = [] if len(out_shape) >= 2: for i in range(len(out_shape) - 1): out_var_shape.append(tvm.var("O" + str(i))) out_size = reduce(lambda x, y: x * y, out_var_shape) out_var_shape.append(tvm.div(total_size, out_size)) else: out_var_shape.append(total_size) build_out_shape = out_var_shape else: build_in_shape = in_shape build_out_shape = out_shape op_attr = [build_out_shape] return utils.op_build_test(reshape.reshape, [build_in_shape], [dtype], op_attr, kernel_name=kernel_name, attrs=attrs, tuning=tuning)
def my_dsl(dtype, kernel_name, attrs): m = tvm.var("M") n = tvm.var("N") A = tvm.placeholder((m, ), name="A", dtype=dtype) B = tvm.placeholder((m, ), name="B", dtype=dtype) if insn == "add": C = topi.add(A, B) elif insn == "sub": C = topi.subtract(A, B) if insn == "mul": C = topi.multiply(A, B) elif insn == "div": C = topi.divide(A, B) elif insn == "max": C = topi.maximum(A, B) elif insn == "min": C = topi.minimum(A, B) elif insn == "abs": C = tvm.compute(A.shape, lambda *index: tvm.abs(A(*index)), name='C') elif insn == "exp": C = topi.exp(A) elif insn == "log": C = topi.log(A) elif insn == "sqrt": C = topi.sqrt(A) C = topi.log(A) elif insn == "sqrt": C = topi.sqrt(A) elif insn == "adds": C = A + tvm.const(2, dtype) elif insn == "muls": C = A * tvm.const(2, dtype) # C = tvm.compute((m, ), lambda i: A[i] + B[i], name="C") s = tvm.create_schedule([C.op]) with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True): if insnType == "binary": mod = akg.build(s, [A, B, C], "cce", name=kernel_name, attrs=attrs, polyhedral=True) else: mod = akg.build(s, [A, C], "cce", name=kernel_name, attrs=attrs, polyhedral=True) return mod
def process_dynamic_shape(shapes, attrs, keep_axis=None): dynamic_shape_args = [] if len(shapes) == 0 or not attrs.get("dynamic"): return shapes, dynamic_shape_args new_shapes = [] prefix = "I" keep_axis_local = keep_axis if isinstance(keep_axis, int): keep_axis_local = [keep_axis] for shape in shapes: dynamic_shape = [] for i in range(len(shape)): if (i in keep_axis_local) or ((i - len(shape)) in keep_axis_local): dynamic_shape.append(shape[i]) else: dynamic_shape.append(tvm.var(prefix + str(i))) dynamic_shape_args.append(shape[i]) new_shapes.append(dynamic_shape) prefix += "I" return new_shapes, dynamic_shape_args
def squeeze_run(shape, axis, dtype, kernel_name="squeeze", attrs=None): op_attrs = [axis] if attrs is not None and attrs.get("dynamic"): build_shape = [] for i in range(len(shape)): build_shape.append(tvm.var("I" + str(i))) else: build_shape = shape if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(squeeze.squeeze, [build_shape], [dtype], op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, input, output = gen_data(axis, dtype, shape) return mod, expect, (input, output) else: return mod else: expect, input, output = gen_data(axis, dtype, shape) mod = utils.op_build_test(squeeze.squeeze, [build_shape], [dtype], op_attrs, kernel_name=kernel_name, attrs=attrs) args = [input, output] if attrs is not None and attrs.get("dynamic"): for i in range(len(shape)): args.append(shape[i]) output = utils.mod_launch(mod, args, outputs=(1,), expect=expect) return input, output, expect, compare_tensor(output, expect, rtol=5e-03, equal_nan=True)
def maxpool_run(shape, kernel, stride, pad, hybrid, dtype, attrs=None, polyhedral=True): if attrs.get("dynamic"): var_shape = [] for i in range(len(shape)): if i == len(shape) - 1: var_shape.append(shape[i]) else: var_shape.append(tvm.var("I" + str(i))) build_shape = var_shape else: build_shape = shape if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(maxpool.maxpool, [build_shape], [dtype], op_attrs=[kernel, stride, pad], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, input, out_shape, res = gen_data(dtype, kernel, pad, shape, stride) return mod, expect, {"args": (input, res), 'outputs': (-1, ), 'tuning': False} else: return mod else: if polyhedral: if hybrid: mod = utils.op_build_test(maxpool.maxpool, [build_shape], [dtype], op_attrs=[kernel, stride, pad], kernel_name='maxpool', attrs=attrs) else: mod = utils.op_build_test(maxpool.old_maxpool, [build_shape], [dtype], op_attrs=[kernel, stride, pad], kernel_name='maxpool_old', attrs=attrs) else: mod = maxpool.maxpool_manual_schedule(build_shape, kernel, stride, pad, dtype,attrs=attrs, polyhedral=polyhedral) expect, input, out_shape, res = gen_data(dtype, kernel, pad, shape, stride) output = utils.mod_launch(mod, [input, res], expect=expect) rtol, atol = get_rtol_atol("maxpool", dtype) return input, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def four2five_compile(shape, dtype, op_attrs, attrs, kernel_name='four2five', tuning=False): if attrs.get("dynamic"): var_shape = [] format, dst_type = op_attrs channel_idx = 1 if format == 'NCHW': channel_idx = 1 elif format == 'NHWC': channel_idx = len(shape) - 1 for i in range(len(shape)): if i == channel_idx: var_shape.append(shape[i]) else: var_shape.append(tvm.var("I" + str(i))) build_shape = var_shape else: build_shape = shape return utils.op_build_test(four2five.four2five, [build_shape], [dtype], op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=tuning)
def maxpool_with_argmax_run(shape, kernel, stride, pad, dsl, dtype, attrs=None, polyhedral=True): build_shape = [] arg_list = [] if attrs is None: attrs = {} if attrs.get("dynamic"): for i in range(len(shape)): if i == len(shape) - 1: build_shape.append(shape[i]) else: tmp_var = tvm.var("I" + str(i)) build_shape.append(tmp_var) arg_list.append(shape[i]) else: build_shape = shape arg_len = len(arg_list) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(maxpool.maxpool_with_argmax, [shape], [dtype], op_attrs=[kernel, stride, pad], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: input, expects, outputs = \ gen_data(dtype, kernel, pad, shape, stride) return mod, expects, \ {"args": (input, outputs[0], outputs[1]), 'outputs': (-2 - arg_len, -1 - arg_len), 'tuning': False} else: return mod else: if polyhedral: if attrs.get("dynamic") and len(build_shape) > 0: mod = utils.op_build_test(maxpool.maxpool_with_argmax_dynamic, [build_shape], [dtype], op_attrs=[kernel, stride, pad], kernel_name='maxpool', attrs=attrs) else: mod = utils.op_build_test(maxpool.maxpool_with_argmax, [shape], [dtype], op_attrs=[kernel, stride, pad], kernel_name='maxpool', attrs=attrs) else: mod = maxpool.maxpool_manual_schedule(shape, kernel, stride, pad, dtype, attrs=attrs, polyhedral=polyhedral) input, expects, outputs = \ gen_data(dtype, kernel, pad, shape, stride, attrs) args = [input, outputs[0], outputs[1]] if attrs is not None and attrs.get("dynamic"): args = args + arg_list block_dim = compute_blockdim(shape) args.append(block_dim) outputs = utils.mod_launch(mod, args, (-3 - arg_len, -2 - arg_len), expect=expects) else: outputs = utils.mod_launch(mod, args, (-2 - arg_len, -1 - arg_len), expect=expects) rtol, atol = get_rtol_atol("maxpool", dtype) results = list(map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), outputs, expects)) return input, outputs, expects, all(results)
def cast_run(shape, srcType, dstType, attrs={}): op_attrs = [dstType] if attrs.get("dynamic"): attrs["enable_double_buffer"] = False var_shape = [] for i in range(len(shape)): var_shape.append(tvm.var("I" + str(i))) build_shape = var_shape else: build_shape = shape if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(Cast, [build_shape], [srcType], op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: args, exp_output, input = gen_data(dstType, shape, srcType) return mod, exp_output, args else: return mod else: mod = utils.op_build_test(Cast, [build_shape], [srcType], op_attrs, kernel_name='cast', attrs=attrs) args, exp_output, input = gen_data(dstType, shape, srcType) if attrs.get("dynamic"): for i in range(len(shape)): args.append(shape[i]) block_dim = compute_blockdim(shape) args.append(block_dim) acu_output = utils.mod_launch(mod, args, outputs=(1, ), expect=exp_output) # compare result rtol, atol = get_rtol_atol("cast", dstType) TestCase_Result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) if attrs.get("profiling", False): target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array(args, akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return input, acu_output, exp_output, TestCase_Result
def cast_run(shape, srcType, dstType, attrs): op_attrs = [dstType] if attrs is None: attrs = {} if attrs.get("dynamic"): attrs["enable_double_buffer"] = False var_shape = [] for i in range(len(shape)): var_shape.append(tvm.var("I" + str(i))) build_shape = var_shape else: build_shape = shape if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(cast.cast, [build_shape], [srcType], op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: args, exp_output, input = gen_data(dstType, shape, srcType) return mod, exp_output, args else: return mod else: mod = utils.op_build_test(cast.cast, [build_shape], [srcType], op_attrs, kernel_name='cast', attrs=attrs) args, exp_output, input = gen_data(dstType, shape, srcType) if attrs.get("dynamic"): for i in range(len(shape)): args.append(shape[i]) block_dim = compute_blockdim(shape) args.append(block_dim) acu_output = utils.mod_launch(mod, args, outputs=(1, ), expect=exp_output) # compare result rtol, atol = get_rtol_atol("cast", dstType) TestCase_Result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) return input, acu_output, exp_output, TestCase_Result
def common_run(shape, dtype, axis, attrs, method): if attrs is None: attrs = {} attrs["enable_algebra_simplify"] = True if attrs.get("dynamic"): build_shape = [] for i in range(len(shape)): build_shape.append(tvm.var("I" + str(i))) else: build_shape = shape if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) if method is "min": mod = utils.op_build_test(argmin.argmin, [build_shape], [dtype], op_attrs=[axis], kernel_name=kernel_name, attrs=attrs, tuning=t) elif method is "max": mod = utils.op_build_test(argmax.argmax, [build_shape], [dtype], op_attrs=[axis], kernel_name=kernel_name, attrs=attrs, tuning=t) else: raise RuntimeError("not support " + method) if t: args, exp_output, input = gen_data(axis, dtype, method, shape) return mod, exp_output, args else: return mod else: if method is "min": mod = utils.op_build_test(argmin.argmin, [build_shape], [dtype], op_attrs=[axis], kernel_name="argmin", attrs=attrs) elif method is "max": mod = utils.op_build_test(argmax.argmax, [build_shape], [dtype], op_attrs=[axis], kernel_name="argmax", attrs=attrs) else: raise RuntimeError("not support " + method) args, exp_output, input = gen_data(axis, dtype, method, shape) if attrs.get("dynamic"): for i in range(len(shape)): args.append(shape[i]) block_dim = compute_blockdim(shape) args.append(block_dim) res = utils.mod_launch(mod, args, outputs=(1,), expect=exp_output) acu_output = res.astype("int32") rtol, atol = get_rtol_atol("argmax_min_common", dtype) return input, acu_output, exp_output, compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True)
def equal_count_run(shapes, dtype, kernel_name, attrs): # shape check if attrs is None: attrs = {} if attrs.get("dynamic"): var_size = tvm.var("I0") var_shape = [] for shape in shapes: assert len(shape) == 1 var_shape.append([var_size]) build_shape = var_shape else: build_shape = shapes if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(equal_count.equal_count, build_shape, [dtype, dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: benchMark1, inputs1, output1 = gen_data(dtype, shapes) return mod, benchMark1, inputs1 + [output1] else: return mod else: mod = utils.op_build_test(equal_count.equal_count, build_shape, [dtype, dtype], kernel_name=kernel_name, attrs=attrs) benchMark1, inputs1, output1 = gen_data(dtype, shapes) if attrs.get("dynamic"): args = inputs1.copy() args.append(output1) for i in range(len(shape) - 1, -1, -1): args.append(shape[i]) block_dim = compute_blockdim(shapes) args.append(block_dim) else: args = inputs1 + [output1] output1 = utils.mod_launch(mod, args, outputs=(2, ), expect=benchMark1) return inputs1, output1, benchMark1, (output1[0] == benchMark1)
def sum_compile(shape, reduce_axis, keepdims, dtype, attrs, kernel_name="sum", tuning=False): op_attrs = [reduce_axis, keepdims] if attrs is not None and attrs.get("dynamic"): var_shape = [] for i in range(len(shape)): var_shape.append(tvm.var("I" + str(i))) attrs["enable_post_poly_loop_partition"] = False build_shape = var_shape else: build_shape = shape return utils.op_build_test(sum.sum_value, [build_shape], [dtype], op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=tuning)
def relu_run(shape, dtype, rtol, attrs): if attrs is not None and attrs.get("dynamic"): build_shape = [] attrs['enable_post_poly_loop_partition'] = False for i in range(len(shape)): build_shape.append(tvm.var("I" + str(i))) else: build_shape = shape if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(Relu, [build_shape], [dtype], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: input_np, expect = gen_data(dtype, shape) return mod, (input_np, expect) else: return mod else: mod = utils.op_build_test(Relu, [build_shape], [dtype], kernel_name='relu', attrs=attrs) input_np, expect = gen_data(dtype, shape) output = np.full(expect.shape, np.nan, dtype=expect.dtype) args = [input_np, output] if attrs is not None and attrs.get("dynamic"): for i in range(len(shape)): args.append(shape[i]) block_dim = compute_blockdim(shape) args.append(block_dim) output = utils.mod_launch(mod, args, outputs=(1, ), expect=expect) rtol, atol = get_rtol_atol("relu", dtype) return input_np, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol)
def five2four_compile(shape_5d, dtype, op_attrs, attrs, kernel_name='five2four', tuning=False): if attrs.get("dynamic"): var_shape = [] shape4d, dst_type, _ = op_attrs channel_idx = 1 for i in range(len(shape_5d)): if shape_5d[i] == 1: var_shape.append(shape_5d[i]) else: var_shape.append(tvm.var("I" + str(i))) build_shape = var_shape else: build_shape = shape_5d return utils.op_build_test(five2four.five2four, [build_shape], [dtype], op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=tuning)
def batchmatmul_compile(bs, m, n, k, bias_shape, dtype, trans_a, trans_b, kernel_name, attrs, tuning=False): args_dict = {} if attrs.get("dynamic"): bs_vars = () for i in range(len(bs)): tmp = tvm.var('I' + str(i)) bs_vars = bs_vars + (tmp, ) args_dict[tmp] = bs[i] bs = bs_vars index = len(bs) m_var = tvm.var('I' + str(index)) n_var = tvm.var('I' + str(index + 1)) k_var = tvm.var('I' + str(index + 2)) args_dict[m_var] = m args_dict[n_var] = n args_dict[k_var] = k m = m_var n = n_var k = k_var index = index + 3 bias_vars = () if len(bias_shape) > 0: for i in range(len(bias_shape)): tmp = tvm.var('I' + str(i + index)) args_dict[tmp] = bias_shape[i] bias_vars = bias_vars + (tmp, ) bias_shape = bias_vars data_shape, weight_shape, out_shape = get_shape(bs, m, n, k, trans_a, trans_b) if len(bias_shape) > 0: input_shapes = [data_shape, weight_shape, bias_shape] input_types = [dtype, dtype, dtype] else: input_shapes = [data_shape, weight_shape] input_types = [dtype, dtype] op_attrs = [trans_a, trans_b] args = get_vars(input_shapes, input_types) args_value = [] for item in args: if item in args_dict: args_value.append(args_dict[item]) if len(bias_shape) > 0: return utils.op_build_test(batchmatmul.batchmatmul_bias, input_shapes, input_types, op_attrs, kernel_name, attrs, tuning=tuning), args_value else: return utils.op_build_test(batchmatmul.batchmatmul, input_shapes, input_types, op_attrs, kernel_name, attrs, tuning=tuning), args_value
def conv_run(fmap_shape, filter_shape, pad, stride, dilation, use_bias=False, attrs=None, dump_data=False): conv_dtype = 'float16' vc_util.convolution_format_check(fmap_shape, filter_shape, pad, stride, dilation) conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation} stride, pad, dilation = conv_param_prepare(conv_param) fm_shape, w_shape, out_shape = conv_shape_4d(fmap_shape, filter_shape, pad, stride, dilation) IN, IC, IH, IW = fm_shape WN, WC, WH, WW = w_shape C0 = 16 if use_bias: input_shape = [(IN, IC // C0, IH, IW, C0), (WC // C0 * WH * WW, WN // 16, 16, C0), (1, WN // 16, 1, 1, 16)] else: input_shape = [(IN, IC // C0, IH, IW, C0), (WC // C0 * WH * WW, WN // 16, 16, C0)] input_file = os.environ.get("RANDOM_DATA_DISK_PATH", "") expect_file = input_file + "/" + gen_kernel_name( [input_shape], [conv_dtype], op_attrs=[ fmap_shape, filter_shape, pad, stride, dilation, use_bias, attrs ], kernel_name='conv', attrs=attrs) + ".bin" all_dynamic = 0 # kh kw pad stride partial_dynamic = 0 # fn fc1 fh fw wN wC if attrs.get("dynamic"): all_dynamic = 1 print("=================all dynamic==================") if attrs.get("partial_dynamic"): partial_dynamic = 1 print("=================partial dynamic==================") dynamic = partial_dynamic or all_dynamic if not dynamic: print("=================static shape==================") if dynamic: fmap_shape_real = fmap_shape filter_shape_real = filter_shape pad_real = pad stride_real = stride dilation_real = dilation if partial_dynamic or all_dynamic: N = tvm.var("N") C = tvm.var("CI") CI1 = tvm.var("CI1") H = tvm.var("H") W = tvm.var("W") COUT = tvm.var("CO") CO1 = tvm.var("CO1") _, _, KH, KW = filter_shape SH, SW = stride PT, PB, PL, PR = pad params = () if all_dynamic: PARAM_KH = tvm.var("KH") PARAM_KW = tvm.var("KW") PARAM_PT = tvm.var("PT") PARAM_PB = tvm.var("PB") PARAM_PL = tvm.var("PL") PARAM_PR = tvm.var("PR") PARAM_SH = tvm.var("SH") PARAM_SW = tvm.var("SW") PARAM_T1_0_H = tvm.var("T1_0_H") PARAM_T1_0_W = tvm.var("T1_0_W") PARAM_T1_0_C1 = tvm.var("T1_0_C1") PARAM_T0_0_MO = tvm.var("T0_0_MO") PARAM_T0_0_NO = tvm.var("T0_0_NO") PARAM_T0_0_KO = tvm.var("T0_0_KO") params = (PARAM_KH, PARAM_KW, PARAM_PT, PARAM_PB, PARAM_PL, PARAM_PR, PARAM_SH, PARAM_SW, PARAM_T1_0_H, PARAM_T1_0_W, PARAM_T1_0_C1, PARAM_T0_0_MO, PARAM_T0_0_NO, PARAM_T0_0_KO) DEBUG = 1 if dynamic: KH_FAKE = 11 KW_FAKE = 31 fmap_shape = (N, C, H, W) filter_shape = (COUT, C, KH, KW) if not DEBUG: CO1 = (COUT + 15) // 16 CI1 = (C + 15) // 16 if use_bias: # input_shape = [(IN, IC // C0, IH, IW, C0), (WC // C0 * WH * WW, WN // 16, 16, C0), (1, WN // 16, 1, 1, 16)] if all_dynamic: input_shape = [(N, CI1, H, W, 16), (CI1 * KH_FAKE * KW_FAKE, CO1, 16, 16), (1, CO1, 1, 1, 16)] else: input_shape = [(N, CI1, H, W, 16), (CI1 * KH * KW, CO1, 16, 16), (1, CO1, 1, 1, 16)] else: # input_shape = [(IN, IC // C0, IH, IW, C0), (WC // C0 * WH * WW, WN // 16, 16, C0)] if all_dynamic: input_shape = [(N, CI1, H, W, 16), (CI1 * KH_FAKE * KW_FAKE, CO1, 16, 16)] else: input_shape = [(N, CI1, H, W, 16), (CI1 * KH * KW, CO1, 16, 16)] mod = utils.op_build_test(Conv, [input_shape], [conv_dtype], op_attrs=[ fmap_shape, filter_shape, pad, stride, dilation, use_bias, attrs, params ], kernel_name='conv', attrs=attrs) fmap_data, filter_data, bias_data, expect = gen_data( fmap_shape_real, filter_shape_real, pad_real, stride_real, dilation_real, use_bias, expect_file) else: mod = utils.op_build_test(Conv, [input_shape], [conv_dtype], op_attrs=[ fmap_shape, filter_shape, pad, stride, dilation, use_bias, attrs ], kernel_name='conv', attrs=attrs) fmap_data, filter_data, bias_data, expect = gen_data( fmap_shape, filter_shape, pad, stride, dilation, use_bias, expect_file) if dump_data: with open('input.bin', 'wb') as fo: fo.write(fmap_data.astype(np.float16, copy=False)) with open('filter.bin', 'wb') as fo: fo.write(filter_data.astype(np.float16, copy=False)) with open('bias.bin', 'wb') as fo: fo.write(bias_data.astype(np.float16, copy=False)) with open('output.bin', 'wb') as fo: fo.write(expect.astype(np.float16, copy=False)) out_data = np.full(expect.shape, np.nan, 'float16') if use_bias: input = [fmap_data, filter_data, bias_data] else: input = [fmap_data, filter_data] flag_w = os.environ.get("WRITE_TO_DISK", "No") if flag_w == "Yes": return input, out_data, expect, True if not dynamic: args = input args.append(out_data) args = tuple(args) out_data = utils.mod_launch(mod, args, expect=expect) else: args = [] args.append(fmap_data) args.append(filter_data) args.append(out_data) if partial_dynamic or all_dynamic: args.append(IN) args.append(IC) args.append(IH) args.append(IW) args.append(WN) if all_dynamic: args.append(KH) args.append(KW) args.append(PT) args.append(PB) args.append(PL) args.append(PR) args.append(SH) args.append(SW) if attrs.get("conv_tile") and len(attrs["conv_tile"]) == 7: T1_0_H = attrs["conv_tile"][0] T1_0_C1 = attrs["conv_tile"][1] T0_0_MO = attrs["conv_tile"][2] T0_0_KO = attrs["conv_tile"][3] T0_0_NO = attrs["conv_tile"][4] T1_0_W = attrs["conv_tile"][5] if T1_0_H == IH: T1_0_H += PT + PB T1_0_H_cut = (T1_0_H - KH) // SH + 1 if T1_0_W == IW: T1_0_W += PL + PR T1_0_W_cut = (T1_0_W - KW) // SW + 1 args.append(T1_0_H_cut) args.append(T1_0_W_cut) args.append((T1_0_C1 + 15) // 16) args.append((T0_0_MO + 15) // 16) args.append((T0_0_NO + 15) // 16) args.append((T0_0_KO + 15) // 16) if DEBUG: args.append(IC // 16) args.append(WN // 16) block_dim = min(32, IN) args.append(block_dim) out_data = utils.mod_launch(mod, args, outputs=(2, ), expect=expect) rtol, atol = get_rtol_atol("conv", conv_dtype) return input, out_data, expect, compare_tensor(out_data, expect, rtol=rtol, atol=atol, equal_nan=True)
def add_run(shape1, shape2, dtype, kernel_name="add", scale=1.0, attrs={}, polyhedral=True): if type(scale) is not float or not int: if type(attrs) is not bool: scale, attrs = 1.0, scale else: scale, attrs, polyhedral = 1.0, scale, attrs op_attrs = [scale] if not polyhedral: op_attrs = op_attrs + [polyhedral, attrs] if attrs.get("dynamic"): attrs["enable_double_buffer"] = False if shape1 != shape2: raise TypeError( "Input tensors have different shape. broadcast is't support for dynamic" ) var_shape = [] for i in range(len(shape1)): var_shape.append(tvm.var("I" + str(i))) build_shape1 = var_shape build_shape2 = var_shape else: build_shape1 = shape1 build_shape2 = shape2 if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(add.add, [build_shape1, build_shape2], [dtype, dtype], op_attrs, kernel_name=kernel_name, attrs=attrs, polyhedral=polyhedral, tuning=t) if t: args, expect, input1, input2 = gen_data(shape1, shape2, dtype, scale) return mod, expect, args else: return mod else: args, expect, input1, input2 = gen_data(shape1, shape2, dtype, scale) mod = utils.op_build_test(add.add, [build_shape1, build_shape2], [dtype, dtype], op_attrs, kernel_name=kernel_name, attrs=attrs, polyhedral=polyhedral) if attrs.get("dynamic"): for i in range(len(shape1)): args.append(shape1[i]) block_dim = compute_blockdim(shape1) args.append(block_dim) output = utils.mod_launch(mod, args, outputs=(2, ), expect=expect) rtol, atol = get_rtol_atol("add", dtype) return (input1, input2), output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def add_run(shape1, shape2, dtype, kernel_name="add", scale=1.0, attrs_op=None, polyhedral=True, attrs=None): if type(scale) is not float or not int: if type(attrs_op) is not bool: scale, attrs_op = 1.0, scale else: scale, attrs_op, polyhedral = 1.0, scale, attrs_op op_attrs = [scale] if not polyhedral: op_attrs = op_attrs + [polyhedral, attrs_op] attrs = {} if attrs is None else attrs if isinstance(attrs_op, dict): attrs.update(attrs_op) if attrs.get("dynamic"): attrs["enable_double_buffer"] = False if shape1 != shape2: raise TypeError( "Input tensors have different shape. broadcast is't support for dynamic" ) var_shape = [] for i in range(len(shape1)): var_shape.append(tvm.var("I" + str(i))) build_shape1 = var_shape build_shape2 = var_shape else: build_shape1 = shape1 build_shape2 = shape2 if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(Add, [build_shape1, build_shape2], [dtype, dtype], op_attrs, kernel_name=kernel_name, attrs=attrs, polyhedral=polyhedral, tuning=t) if t: args, expect, input1, input2 = gen_data(shape1, shape2, dtype, scale) return mod, expect, args else: return mod else: args, expect, input1, input2 = gen_data(shape1, shape2, dtype, scale) mod = utils.op_build_test(Add, [build_shape1, build_shape2], [dtype, dtype], op_attrs, kernel_name=kernel_name, attrs=attrs, polyhedral=polyhedral) if attrs.get("dynamic"): for i in range(len(shape1)): args.append(shape1[i]) block_dim = compute_blockdim(shape1) args.append(block_dim) output = utils.mod_launch(mod, args, outputs=(2, ), expect=expect) if attrs.get("profiling", False): target_name = attrs["target"].split()[0] data = to_tvm_nd_array(args, akg.tvm.context(target_name, 0)) target_profiling(mod, *data, target=target_name, repeat_time=attrs["repeat_times"]) rtol, atol = get_rtol_atol("add", dtype) return (input1, input2), output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)