def case_1(data_shape, dtype, kernel_name, attrs): """elemwise chain case 1""" utils.ops_dtype_check(dtype, utils.DtypeForDavinci.FLOAT16) utils.check_shape_length_equal("data", data_shape, 2) m, k = data_shape A = akg.tvm.placeholder((m, k), name='A', dtype=dtype) B = akg.tvm.placeholder((k, ), name='B', dtype=dtype) C = akg.tvm.placeholder((m, k), name='C', dtype=dtype) E = akg.tvm.compute((m, k), lambda i, j: A[i, j] * (B[j] + C[i, j]), name="E") forward_s = akg.tvm.create_schedule(E.op) op_vars = [A, B, C, E] akg.lower(forward_s, op_vars, simple_mode=True, polyhedral=True) kernel_name = gen_name_kernel(kernel_name, dtype, data_shape) with akg.build_config(add_lower_pass=debug_mode(0), dump_pass_ir=True): mod = akg.build(forward_s, op_vars, "cce", name="test", attrs=attrs, polyhedral=True) return mod
def div_mod_issue(data_shape, weight_shape, case_number): if (case_number == 0): A = akg.tvm.placeholder(data_shape, dtype='float16', name='input0') divisor = 2 stage1 = akg.tvm.compute( data_shape, lambda n, c, h, w: A[n, c / divisor, h, w] + 1, name="stage1") op_vars = [A, stage1] s = akg.tvm.create_schedule([stage1.op]) akg.lower(s, op_vars, simple_mode=True, polyhedral=True) with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True): mod = akg.build(s, op_vars, "cce", name="test1", polyhedral=True) return mod else: A = akg.tvm.placeholder(data_shape, dtype='float16', name='input0') B = akg.tvm.placeholder(weight_shape, dtype='float16', name='input1') divisor = 3 stage1 = akg.tvm.compute( data_shape, lambda n, c, h, w: A[n, c / divisor, h, w] + 1, name="stage1") stage2 = akg.tvm.compute( weight_shape, lambda n, c, h, w: stage1[0, c, 0, 0] + B[n, c, h, w], name="stage2") op_vars = [A, B, stage2] s = akg.tvm.create_schedule([stage2.op]) akg.lower(s, op_vars, simple_mode=True, polyhedral=True) with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True): mod_stage2 = akg.build(s, op_vars, "cce", name="test2", polyhedral=True) return mod_stage2
def gen_spaces_dim_key(op_func, s, op_var, kernel_name, attrs, polyhedral, tuning, target): """ Generate tiling parameter. Args: op_func (function returning an op or (op, [op_vars])): The op build function. s (dict): schedule of op. op_var (list): the akg.tvm.tensor of inputs and outputs for op. kernel_name (str): name of op. attrs (dict): tiling parameter. polyhedral (bool): True by default. tuning (bool): False by default. Return: tiling parameter. """ set_dim_key = "" if op_func.__name__ in ct_util.set_dim_func_map.keys(): func_ = ct_util.set_dim_func_map[op_func.__name__] if inspect.isfunction(func_): set_dim_key = func_(*args)[1] elif op_func.__name__ in ct_util.gen_key_func_map.keys(): func_ = ct_util.gen_key_func_map[op_func.__name__] if inspect.isfunction(func_): set_dim_key = func_(*args) with akg.build_config(dump_pass_ir=True): spaces = akg.lower(s, op_var, name=kernel_name, attrs=attrs, polyhedral=polyhedral, tuning=tuning, target=target) if set_dim_key == "": set_dim_key = str(args) return spaces, set_dim_key
def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="", attrs=None, log_cce=False, dump_ir=True, dump_cce=True, polyhedral=True, tuning=False): """ Return module built from op_func with given inputs. Args: op_func (function returning an op or (op, [op_vars])): The op build function. input_shapes(iterable of iterable of int): the dim sizes for input for op. input_types (iterable of iterable of str): the dtypes for each input. op_attrs (list or tuple): extra attributes for the op. kernel_name (str): name of op. attrs (dict): tiling parameter. log_cce (bool): False by default. dump_ir (bool): True by default. dump_cce (bool): False by default. polyhedral (bool): True by default. tuning (bool): False by default. Return: module. """ inputs = [] set_dim_key = "" shape_params = [] for i, (shape, dtype) in enumerate(zip(input_shapes, input_types)): if isinstance(shape, (list, tuple)) and shape and isinstance( shape[0], (list, tuple)): tmp_input = [] for j, tmp_shape in enumerate(shape): tmp_input.append( akg.tvm.placeholder(tmp_shape, dtype, "input_%d_%d" % (i + 1, j + 1))) for tmp in tmp_shape: if isinstance(tmp, akg.tvm.expr.Var): shape_params.append(tmp) inputs.append(tmp_input) elif isinstance(shape, (list, tuple)) and shape and isinstance( shape[0], akg.tvm.expr.Var): inputs.append( akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1))) for tmp_shape in shape: if isinstance(tmp_shape, akg.tvm.expr.Var): shape_params.append(tmp_shape) elif isinstance(shape, akg.tvm.tensor.Tensor): inputs.append(shape) for tmp_shape in shape.shape: shape_params.append(tmp_shape) else: inputs.append( akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1))) attrs_params = [] if op_attrs is not None: args = inputs + op_attrs for tmp_attr in op_attrs: if isinstance(tmp_attr, (list, tuple)) and tmp_attr and isinstance( tmp_attr[0], akg.tvm.expr.Var): for attr_param in tmp_attr: if isinstance(attr_param, akg.tvm.expr.Var): attrs_params.append(attr_param) elif isinstance(tmp_attr, akg.tvm.expr.Var): attrs_params.append(tmp_attr) else: args = inputs # backup inputs because the tensor names may be updated inside op_func inputs_backup = recursive_copy(inputs) output = op_func(*args) # restore inputs to make sure that tensor names are not changed by op_func inputs = inputs_backup if attrs is None or 'dim' not in attrs or not attrs['dim']: dim_info = "" if attrs is None: attrs = dict() if op_func.__name__ in ct_util.set_dim_func_map.keys(): value = ct_util.set_dim_func_map[op_func.__name__] if inspect.isfunction(value): dim_info = value(*args) elif isinstance(value, dict): key = [] key.append(ft_util.convert_to_list(input_shapes)) key.append(ft_util.convert_to_list(input_types)) if op_attrs is not None: key.append(op_attrs) key = str(tuple(key)) if key in value.keys(): dim_info = ct_util.set_dims(value[key]) else: raise RuntimeError( "Registered set_dim_map is invalid. Must be a function or a dict!" ) if isinstance(dim_info, (list, tuple)): dim_info = dim_info[0] attrs['dim'] = dim_info compute_func = None # func which is defined in dsl for doing compute_inline or other sch_tmpl = None if isinstance(output, (list, tuple)): from inspect import isfunction new_outputs = [] for elem in output: if isfunction(elem): compute_func = elem elif isinstance(elem, dict): for key, value in elem.items(): if key not in attrs or not attrs[key]: attrs[key] = value elif isinstance(elem, (list, tuple)): new_outputs += elem else: new_outputs.append(elem) output = new_outputs elif isinstance(output, dict): sch_tmpl = output output = sch_tmpl['output'] binds = None if not attrs else attrs.pop(BINDS, None) op_var = [] for xx in inputs: if isinstance(xx, list): for x in xx: op_var.append(x) else: op_var.append(xx) shape_var = [] if attrs_params: [shape_var.append(i) for i in attrs_params if i not in shape_var] [shape_var.append(i) for i in shape_params if i not in shape_var] if isinstance(output, (list, tuple)): op_var = op_var + [i for i in output if TensorUtils.is_output_value(i)] else: if TensorUtils.is_output_value(output): op_var = op_var + [output] if sch_tmpl != None: assert (sch_tmpl['target'] == 'cuda') kernel_name = kernel_name if kernel_name != "" else sch_tmpl['op_name'] with akg.tvm.target.cuda() as target: s = sch_tmpl['schedule'](sch_tmpl['output']) with akg.tvm.build_config(dump_pass_ir=True): mod = akg.tvm.build(s, op_var, target, target_host='stackvm', name=kernel_name) dump_cuda_meta.dump(mod, kernel_name, s, op_var) return mod if isinstance(output, (list, tuple)): tmp = [] for x in list(output): if isinstance(x, tuple): tmp.append(x[0].op) else: tmp.append(x.op) s = akg.tvm.create_schedule(tmp) else: s = akg.tvm.create_schedule(output.op) if compute_func is not None: compute_func(s) polyhedral = False kernel_name = kernel_name if kernel_name != "" else op_func.__name__ mode = get_runtime_mode() level = attrs.get("help_tiling") if tuning or (level is not None and level > help_tiling_level['None']): if op_func.__name__ in ct_util.set_dim_func_map.keys(): func_ = ct_util.set_dim_func_map[op_func.__name__] if inspect.isfunction(func_): set_dim_key = func_(*args)[1] elif op_func.__name__ in ct_util.gen_key_func_map.keys(): func_ = ct_util.gen_key_func_map[op_func.__name__] if inspect.isfunction(func_): set_dim_key = func_(*args) with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True): spaces = akg.lower(s, op_var, name=kernel_name, attrs=attrs, polyhedral=polyhedral, tuning=tuning) if set_dim_key == "": set_dim_key = str(args) return spaces, set_dim_key if mode == "cpu": mod = akg.tvm.build(s, op_var, "llvm") if not os.path.isdir("./cpu/ir/"): os.makedirs("./cpu/ir/") with os.fdopen( os.open("./cpu/ir/" + kernel_name + ".cc", os.O_WRONLY | os.O_CREAT, 0o400), 'w') as irf: irf.write(akg.tvm.lower(s, op_var, shape_var, simple_mode=True)) return mod with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=dump_ir): mod = akg.build(s, op_var, "cce", shape_var, name=kernel_name, attrs=attrs, polyhedral=polyhedral, binds=binds) if mod is None: return None source_code = mod.imported_modules[0].get_source() if log_cce: logging.debug("#################cce code####################") logging.debug(source_code) if dump_cce: cce_path = "./" create_cce(kernel_name, cce_path, source_code) return mod
def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="", attrs=None, log_cce=False, dump_ir=True, dump_code=True, polyhedral=True, tuning=False): """ Return module built from op_func with given inputs. Args: op_func (function returning an op or (op, [op_vars])): The op build function. input_shapes(iterable of iterable of int): the dim sizes for input for op. input_types (iterable of iterable of str): the dtypes for each input. op_attrs (list or tuple): extra attributes for the op. kernel_name (str): name of op. attrs (dict): tiling parameter. log_cce (bool): False by default. dump_ir (bool): True by default. dump_code (bool): False by default. polyhedral (bool): True by default. tuning (bool): False by default. Return: module. """ inputs = [] set_dim_key = "" shape_params = [] # save all the shape params for dynamic_shape cases for i, (shape, dtype) in enumerate(zip(input_shapes, input_types)): if isinstance(shape, (list, tuple)) and shape and isinstance( shape[0], (list, tuple)): tmp_input = [] for j, tmp_shape in enumerate(shape): tmp_input.append( akg.tvm.placeholder(tmp_shape, dtype, "input_%d_%d" % (i + 1, j + 1))) for tmp in tmp_shape: if isinstance(tmp, akg.tvm.expr.Var): shape_params.append(tmp) inputs.append(tmp_input) elif isinstance(shape, (list, tuple)) and shape and isinstance( shape[0], akg.tvm.expr.Var): inputs.append( akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1))) for tmp_shape in shape: if isinstance(tmp_shape, akg.tvm.expr.Var): shape_params.append(tmp_shape) elif isinstance(shape, akg.tvm.tensor.Tensor): inputs.append(shape) for tmp_shape in shape.shape: shape_params.append(tmp_shape) else: inputs.append( akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1))) attrs_params = [] if op_attrs is not None: args = inputs + op_attrs for tmp_attr in op_attrs: if isinstance(tmp_attr, (list, tuple)) and tmp_attr and isinstance( tmp_attr[0], akg.tvm.expr.Var): for attr_param in tmp_attr: if isinstance(attr_param, akg.tvm.expr.Var): attrs_params.append(attr_param) elif isinstance(tmp_attr, akg.tvm.expr.Var): attrs_params.append(tmp_attr) else: args = inputs # backup inputs because the tensor names may be updated inside op_func inputs_backup = recursive_copy(inputs) output = op_func(*args) # restore inputs to make sure that tensor names are not changed by op_func inputs = inputs_backup if attrs is None or 'dim' not in attrs or not attrs['dim']: dim_info = "" if attrs is None: attrs = dict() if op_func.__name__ in ct_util.set_dim_func_map.keys(): value = ct_util.set_dim_func_map[op_func.__name__] if inspect.isfunction(value): dim_info = value(*args) elif isinstance(value, dict): key = [] key.append(ft_util.convert_to_list(input_shapes)) key.append(ft_util.convert_to_list(input_types)) if op_attrs is not None: key.append(op_attrs) key = str(tuple(key)) if key in value.keys(): dim_info = ct_util.set_dims(value[key]) else: raise RuntimeError( "Registered set_dim_map is invalid. Must be a function or a dict!" ) if isinstance(dim_info, (list, tuple)): dim_info = dim_info[0] attrs['dim'] = dim_info compute_func = None # func which is defined in dsl for doing compute_inline or other sch_tmpl = None gpu_binds = None if isinstance(output, (list, tuple)): from inspect import isfunction new_outputs = [] for elem in output: if isfunction(elem): compute_func = elem elif isinstance(elem, dict): for key, value in elem.items(): if key not in attrs or not attrs[key]: attrs[key] = value elif isinstance(elem, (list, tuple)): new_outputs += elem else: new_outputs.append(elem) output = new_outputs elif isinstance(output, dict): sch_tmpl = output output = sch_tmpl['output'] gpu_binds = sch_tmpl['binds'] binds = None if not attrs else attrs.pop(BINDS, None) op_var = [] for xx in inputs: if isinstance(xx, list): for x in xx: op_var.append(x) else: op_var.append(xx) shape_var = [] if attrs_params: [shape_var.append(i) for i in attrs_params if i not in shape_var] [shape_var.append(i) for i in shape_params if i not in shape_var] if isinstance(output, (list, tuple)): op_var = op_var + [i for i in output if TensorUtils.is_output_value(i)] else: if TensorUtils.is_output_value(output): op_var = op_var + [output] if sch_tmpl is not None or (attrs and attrs.get("target", "cce") == "cuda"): if kernel_name == "": kernel_name = op_func.__name__ if sch_tmpl is None else sch_tmpl[ 'op_name'] if sch_tmpl is not None: if sch_tmpl['target'] != CUDA: raise ValueError( "Only support cuda as target when using schedule template.") global kc_air_mode kc_air_mode = "CUDA" with akg.tvm.target.cuda() as target: if not tuning: s = sch_tmpl['schedule'](sch_tmpl['output']) with akg.tvm.build_config(dump_pass_ir=dump_ir): mod = akg.build(s, op_var, "cuda", shape_var, name=kernel_name, attrs=attrs, polyhedral=False, binds=gpu_binds) else: @autotvm.template def _autotune_template(): s = sch_tmpl['schedule'](sch_tmpl['output']) return (s, op_var) # create autotune task task = autotvm.task.create(_autotune_template, args=list(), target='cuda') print("task config: ", task.config_space) # set measure_option measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(repeat=5, min_repeat_ms=150, timeout=4)) # Begin tuning, log records to file `kernel_name.log` tuner = autotvm.tuner.RandomTuner(task) if not os.path.exists(kernel_name + '.log'): tuner.tune(n_trial=len(task.config_space), measure_option=measure_option, callbacks=[ autotvm.callback.log_to_file(kernel_name + '.log') ]) # query best config dispatch_context = autotvm.apply_history_best(kernel_name + '.log') best_config = dispatch_context.query(task.target, task.workload) print("\nBest config is:") print(best_config) # apply best config with autotvm.apply_history_best(kernel_name + '.log'): s, op_var = _autotune_template() mod = akg.build(s, op_var, "cuda", shape_var, name=kernel_name, attrs=attrs, polyhedral=False, binds=gpu_binds) if dump_code: source_code = mod.imported_modules[0].get_source() create_code(kernel_name, "./", source_code, CUDA) return mod if isinstance(output, (list, tuple)): tmp = [] for x in list(output): if isinstance(x, tuple): tmp.append(x[0].op) else: tmp.append(x.op) s = akg.tvm.create_schedule(tmp) else: s = akg.tvm.create_schedule(output.op) if compute_func is not None: compute_func(s) polyhedral = False mode = get_runtime_mode() level = attrs.get("help_tiling") if tuning or (level is not None and level > help_tiling_level['None']): if op_func.__name__ in ct_util.set_dim_func_map.keys(): func_ = ct_util.set_dim_func_map[op_func.__name__] if inspect.isfunction(func_): set_dim_key = func_(*args)[1] elif op_func.__name__ in ct_util.gen_key_func_map.keys(): func_ = ct_util.gen_key_func_map[op_func.__name__] if inspect.isfunction(func_): set_dim_key = func_(*args) with akg.build_config(dump_pass_ir=True): spaces = akg.lower(s, op_var, name=kernel_name, attrs=attrs, polyhedral=polyhedral, tuning=tuning) if set_dim_key == "": set_dim_key = str(args) return spaces, set_dim_key if mode == "cpu": mod = akg.tvm.build(s, op_var, "llvm") if not os.path.isdir("./cpu/ir/"): os.makedirs("./cpu/ir/") with os.fdopen( os.open("./cpu/ir/" + kernel_name + ".cc", os.O_WRONLY | os.O_CREAT, 0o400), 'w') as irf: irf.write(akg.tvm.lower(s, op_var, shape_var, simple_mode=True)) return mod target = CUDA if attrs and attrs.get("target", "cce") == CUDA else CCE with akg.build_config(dump_pass_ir=dump_ir): mod = akg.build(s, op_var, target, shape_var, name=kernel_name, attrs=attrs, polyhedral=polyhedral, binds=binds) if mod is None: return None source_code = mod.imported_modules[0].get_source() if log_cce: logging.debug("#################cce code####################") logging.debug(source_code) if dump_code: create_code(kernel_name, "./", source_code, target) return mod