def gen_op_var(inputs, output, op_var): """ Combine inputs and outputs about the op. Args: inputs(list): the inputs of op. output(list): the outputs of op. op_var (list): inputs and outputs for the op. """ for xx in inputs: if isinstance(xx, list): for x in xx: op_var.append(x) else: op_var.append(xx) if isinstance(output, (list, tuple)): op_var = op_var + [i for i in output if TensorUtils.is_output_value(i)] else: if TensorUtils.is_output_value(output): op_var = op_var + [output] return op_var
def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="", attrs=None, log_cce=False, dump_ir=True, dump_cce=True, polyhedral=True, tuning=False): """ Return module built from op_func with given inputs. Args: op_func (function returning an op or (op, [op_vars])): The op build function. input_shapes(iterable of iterable of int): the dim sizes for input for op. input_types (iterable of iterable of str): the dtypes for each input. op_attrs (list or tuple): extra attributes for the op. kernel_name (str): name of op. attrs (dict): tiling parameter. log_cce (bool): False by default. dump_ir (bool): True by default. dump_cce (bool): False by default. polyhedral (bool): True by default. tuning (bool): False by default. Return: module. """ inputs = [] set_dim_key = "" shape_params = [] for i, (shape, dtype) in enumerate(zip(input_shapes, input_types)): if isinstance(shape, (list, tuple)) and shape and isinstance( shape[0], (list, tuple)): tmp_input = [] for j, tmp_shape in enumerate(shape): tmp_input.append( akg.tvm.placeholder(tmp_shape, dtype, "input_%d_%d" % (i + 1, j + 1))) for tmp in tmp_shape: if isinstance(tmp, akg.tvm.expr.Var): shape_params.append(tmp) inputs.append(tmp_input) elif isinstance(shape, (list, tuple)) and shape and isinstance( shape[0], akg.tvm.expr.Var): inputs.append( akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1))) for tmp_shape in shape: if isinstance(tmp_shape, akg.tvm.expr.Var): shape_params.append(tmp_shape) elif isinstance(shape, akg.tvm.tensor.Tensor): inputs.append(shape) for tmp_shape in shape.shape: shape_params.append(tmp_shape) else: inputs.append( akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1))) attrs_params = [] if op_attrs is not None: args = inputs + op_attrs for tmp_attr in op_attrs: if isinstance(tmp_attr, (list, tuple)) and tmp_attr and isinstance( tmp_attr[0], akg.tvm.expr.Var): for attr_param in tmp_attr: if isinstance(attr_param, akg.tvm.expr.Var): attrs_params.append(attr_param) elif isinstance(tmp_attr, akg.tvm.expr.Var): attrs_params.append(tmp_attr) else: args = inputs # backup inputs because the tensor names may be updated inside op_func inputs_backup = recursive_copy(inputs) output = op_func(*args) # restore inputs to make sure that tensor names are not changed by op_func inputs = inputs_backup if attrs is None or 'dim' not in attrs or not attrs['dim']: dim_info = "" if attrs is None: attrs = dict() if op_func.__name__ in ct_util.set_dim_func_map.keys(): value = ct_util.set_dim_func_map[op_func.__name__] if inspect.isfunction(value): dim_info = value(*args) elif isinstance(value, dict): key = [] key.append(ft_util.convert_to_list(input_shapes)) key.append(ft_util.convert_to_list(input_types)) if op_attrs is not None: key.append(op_attrs) key = str(tuple(key)) if key in value.keys(): dim_info = ct_util.set_dims(value[key]) else: raise RuntimeError( "Registered set_dim_map is invalid. Must be a function or a dict!" ) if isinstance(dim_info, (list, tuple)): dim_info = dim_info[0] attrs['dim'] = dim_info compute_func = None # func which is defined in dsl for doing compute_inline or other sch_tmpl = None if isinstance(output, (list, tuple)): from inspect import isfunction new_outputs = [] for elem in output: if isfunction(elem): compute_func = elem elif isinstance(elem, dict): for key, value in elem.items(): if key not in attrs or not attrs[key]: attrs[key] = value elif isinstance(elem, (list, tuple)): new_outputs += elem else: new_outputs.append(elem) output = new_outputs elif isinstance(output, dict): sch_tmpl = output output = sch_tmpl['output'] binds = None if not attrs else attrs.pop(BINDS, None) op_var = [] for xx in inputs: if isinstance(xx, list): for x in xx: op_var.append(x) else: op_var.append(xx) shape_var = [] if attrs_params: [shape_var.append(i) for i in attrs_params if i not in shape_var] [shape_var.append(i) for i in shape_params if i not in shape_var] if isinstance(output, (list, tuple)): op_var = op_var + [i for i in output if TensorUtils.is_output_value(i)] else: if TensorUtils.is_output_value(output): op_var = op_var + [output] if sch_tmpl != None: assert (sch_tmpl['target'] == 'cuda') kernel_name = kernel_name if kernel_name != "" else sch_tmpl['op_name'] with akg.tvm.target.cuda() as target: s = sch_tmpl['schedule'](sch_tmpl['output']) with akg.tvm.build_config(dump_pass_ir=True): mod = akg.tvm.build(s, op_var, target, target_host='stackvm', name=kernel_name) dump_cuda_meta.dump(mod, kernel_name, s, op_var) return mod if isinstance(output, (list, tuple)): tmp = [] for x in list(output): if isinstance(x, tuple): tmp.append(x[0].op) else: tmp.append(x.op) s = akg.tvm.create_schedule(tmp) else: s = akg.tvm.create_schedule(output.op) if compute_func is not None: compute_func(s) polyhedral = False kernel_name = kernel_name if kernel_name != "" else op_func.__name__ mode = get_runtime_mode() level = attrs.get("help_tiling") if tuning or (level is not None and level > help_tiling_level['None']): if op_func.__name__ in ct_util.set_dim_func_map.keys(): func_ = ct_util.set_dim_func_map[op_func.__name__] if inspect.isfunction(func_): set_dim_key = func_(*args)[1] elif op_func.__name__ in ct_util.gen_key_func_map.keys(): func_ = ct_util.gen_key_func_map[op_func.__name__] if inspect.isfunction(func_): set_dim_key = func_(*args) with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True): spaces = akg.lower(s, op_var, name=kernel_name, attrs=attrs, polyhedral=polyhedral, tuning=tuning) if set_dim_key == "": set_dim_key = str(args) return spaces, set_dim_key if mode == "cpu": mod = akg.tvm.build(s, op_var, "llvm") if not os.path.isdir("./cpu/ir/"): os.makedirs("./cpu/ir/") with os.fdopen( os.open("./cpu/ir/" + kernel_name + ".cc", os.O_WRONLY | os.O_CREAT, 0o400), 'w') as irf: irf.write(akg.tvm.lower(s, op_var, shape_var, simple_mode=True)) return mod with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=dump_ir): mod = akg.build(s, op_var, "cce", shape_var, name=kernel_name, attrs=attrs, polyhedral=polyhedral, binds=binds) if mod is None: return None source_code = mod.imported_modules[0].get_source() if log_cce: logging.debug("#################cce code####################") logging.debug(source_code) if dump_cce: cce_path = "./" create_cce(kernel_name, cce_path, source_code) return mod
def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="", attrs=None, log_cce=False, dump_ir=True, dump_code=True, polyhedral=True, tuning=False): """ Return module built from op_func with given inputs. Args: op_func (function returning an op or (op, [op_vars])): The op build function. input_shapes(iterable of iterable of int): the dim sizes for input for op. input_types (iterable of iterable of str): the dtypes for each input. op_attrs (list or tuple): extra attributes for the op. kernel_name (str): name of op. attrs (dict): tiling parameter. log_cce (bool): False by default. dump_ir (bool): True by default. dump_code (bool): False by default. polyhedral (bool): True by default. tuning (bool): False by default. Return: module. """ inputs = [] set_dim_key = "" shape_params = [] # save all the shape params for dynamic_shape cases for i, (shape, dtype) in enumerate(zip(input_shapes, input_types)): if isinstance(shape, (list, tuple)) and shape and isinstance( shape[0], (list, tuple)): tmp_input = [] for j, tmp_shape in enumerate(shape): tmp_input.append( akg.tvm.placeholder(tmp_shape, dtype, "input_%d_%d" % (i + 1, j + 1))) for tmp in tmp_shape: if isinstance(tmp, akg.tvm.expr.Var): shape_params.append(tmp) inputs.append(tmp_input) elif isinstance(shape, (list, tuple)) and shape and isinstance( shape[0], akg.tvm.expr.Var): inputs.append( akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1))) for tmp_shape in shape: if isinstance(tmp_shape, akg.tvm.expr.Var): shape_params.append(tmp_shape) elif isinstance(shape, akg.tvm.tensor.Tensor): inputs.append(shape) for tmp_shape in shape.shape: shape_params.append(tmp_shape) else: inputs.append( akg.tvm.placeholder(shape, dtype, "input_%d" % (i + 1))) attrs_params = [] if op_attrs is not None: args = inputs + op_attrs for tmp_attr in op_attrs: if isinstance(tmp_attr, (list, tuple)) and tmp_attr and isinstance( tmp_attr[0], akg.tvm.expr.Var): for attr_param in tmp_attr: if isinstance(attr_param, akg.tvm.expr.Var): attrs_params.append(attr_param) elif isinstance(tmp_attr, akg.tvm.expr.Var): attrs_params.append(tmp_attr) else: args = inputs # backup inputs because the tensor names may be updated inside op_func inputs_backup = recursive_copy(inputs) output = op_func(*args) # restore inputs to make sure that tensor names are not changed by op_func inputs = inputs_backup if attrs is None or 'dim' not in attrs or not attrs['dim']: dim_info = "" if attrs is None: attrs = dict() if op_func.__name__ in ct_util.set_dim_func_map.keys(): value = ct_util.set_dim_func_map[op_func.__name__] if inspect.isfunction(value): dim_info = value(*args) elif isinstance(value, dict): key = [] key.append(ft_util.convert_to_list(input_shapes)) key.append(ft_util.convert_to_list(input_types)) if op_attrs is not None: key.append(op_attrs) key = str(tuple(key)) if key in value.keys(): dim_info = ct_util.set_dims(value[key]) else: raise RuntimeError( "Registered set_dim_map is invalid. Must be a function or a dict!" ) if isinstance(dim_info, (list, tuple)): dim_info = dim_info[0] attrs['dim'] = dim_info compute_func = None # func which is defined in dsl for doing compute_inline or other sch_tmpl = None gpu_binds = None if isinstance(output, (list, tuple)): from inspect import isfunction new_outputs = [] for elem in output: if isfunction(elem): compute_func = elem elif isinstance(elem, dict): for key, value in elem.items(): if key not in attrs or not attrs[key]: attrs[key] = value elif isinstance(elem, (list, tuple)): new_outputs += elem else: new_outputs.append(elem) output = new_outputs elif isinstance(output, dict): sch_tmpl = output output = sch_tmpl['output'] gpu_binds = sch_tmpl['binds'] binds = None if not attrs else attrs.pop(BINDS, None) op_var = [] for xx in inputs: if isinstance(xx, list): for x in xx: op_var.append(x) else: op_var.append(xx) shape_var = [] if attrs_params: [shape_var.append(i) for i in attrs_params if i not in shape_var] [shape_var.append(i) for i in shape_params if i not in shape_var] if isinstance(output, (list, tuple)): op_var = op_var + [i for i in output if TensorUtils.is_output_value(i)] else: if TensorUtils.is_output_value(output): op_var = op_var + [output] if sch_tmpl is not None or (attrs and attrs.get("target", "cce") == "cuda"): if kernel_name == "": kernel_name = op_func.__name__ if sch_tmpl is None else sch_tmpl[ 'op_name'] if sch_tmpl is not None: if sch_tmpl['target'] != CUDA: raise ValueError( "Only support cuda as target when using schedule template.") global kc_air_mode kc_air_mode = "CUDA" with akg.tvm.target.cuda() as target: if not tuning: s = sch_tmpl['schedule'](sch_tmpl['output']) with akg.tvm.build_config(dump_pass_ir=dump_ir): mod = akg.build(s, op_var, "cuda", shape_var, name=kernel_name, attrs=attrs, polyhedral=False, binds=gpu_binds) else: @autotvm.template def _autotune_template(): s = sch_tmpl['schedule'](sch_tmpl['output']) return (s, op_var) # create autotune task task = autotvm.task.create(_autotune_template, args=list(), target='cuda') print("task config: ", task.config_space) # set measure_option measure_option = autotvm.measure_option( builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner(repeat=5, min_repeat_ms=150, timeout=4)) # Begin tuning, log records to file `kernel_name.log` tuner = autotvm.tuner.RandomTuner(task) if not os.path.exists(kernel_name + '.log'): tuner.tune(n_trial=len(task.config_space), measure_option=measure_option, callbacks=[ autotvm.callback.log_to_file(kernel_name + '.log') ]) # query best config dispatch_context = autotvm.apply_history_best(kernel_name + '.log') best_config = dispatch_context.query(task.target, task.workload) print("\nBest config is:") print(best_config) # apply best config with autotvm.apply_history_best(kernel_name + '.log'): s, op_var = _autotune_template() mod = akg.build(s, op_var, "cuda", shape_var, name=kernel_name, attrs=attrs, polyhedral=False, binds=gpu_binds) if dump_code: source_code = mod.imported_modules[0].get_source() create_code(kernel_name, "./", source_code, CUDA) return mod if isinstance(output, (list, tuple)): tmp = [] for x in list(output): if isinstance(x, tuple): tmp.append(x[0].op) else: tmp.append(x.op) s = akg.tvm.create_schedule(tmp) else: s = akg.tvm.create_schedule(output.op) if compute_func is not None: compute_func(s) polyhedral = False mode = get_runtime_mode() level = attrs.get("help_tiling") if tuning or (level is not None and level > help_tiling_level['None']): if op_func.__name__ in ct_util.set_dim_func_map.keys(): func_ = ct_util.set_dim_func_map[op_func.__name__] if inspect.isfunction(func_): set_dim_key = func_(*args)[1] elif op_func.__name__ in ct_util.gen_key_func_map.keys(): func_ = ct_util.gen_key_func_map[op_func.__name__] if inspect.isfunction(func_): set_dim_key = func_(*args) with akg.build_config(dump_pass_ir=True): spaces = akg.lower(s, op_var, name=kernel_name, attrs=attrs, polyhedral=polyhedral, tuning=tuning) if set_dim_key == "": set_dim_key = str(args) return spaces, set_dim_key if mode == "cpu": mod = akg.tvm.build(s, op_var, "llvm") if not os.path.isdir("./cpu/ir/"): os.makedirs("./cpu/ir/") with os.fdopen( os.open("./cpu/ir/" + kernel_name + ".cc", os.O_WRONLY | os.O_CREAT, 0o400), 'w') as irf: irf.write(akg.tvm.lower(s, op_var, shape_var, simple_mode=True)) return mod target = CUDA if attrs and attrs.get("target", "cce") == CUDA else CCE with akg.build_config(dump_pass_ir=dump_ir): mod = akg.build(s, op_var, target, shape_var, name=kernel_name, attrs=attrs, polyhedral=polyhedral, binds=binds) if mod is None: return None source_code = mod.imported_modules[0].get_source() if log_cce: logging.debug("#################cce code####################") logging.debug(source_code) if dump_code: create_code(kernel_name, "./", source_code, target) return mod
def _compilewithjson_to_module_op(kernel_info, attrs, processor): """compile with json for single op.""" def _get_op_func(op_name): op_func = None # get custom ops implementation first. if 'impl_path' in kernel_info and kernel_info['impl_path'] is not None: impl_path = os.path.realpath(kernel_info['impl_path']) if os.path.isfile(impl_path): custom_mod_name = Path(impl_path).resolve().stem mod_spec = importlib.util.spec_from_file_location( custom_mod_name, impl_path) custom_mod = importlib.util.module_from_spec(mod_spec) mod_spec.loader.exec_module(custom_mod) op_func = getattr(custom_mod, op_name, None) # get built-in ops. if op_func is None: op_func = get_op(op_name, attrs["target"]) return op_func def _compilewithjson_cuda(op_func): input_shapes = [] input_types = [] for input_desc in kernel_info['input_desc']: input_shapes.append(input_desc[0]['shape']) input_types.append(input_desc[0]['data_type']) op_attrs = [] if kernel_info['attr']: for ext_arg in kernel_info['attr']: op_attrs.append(ext_arg['value']) dump_ir = os.getenv(get_dump_ir_flag()) == "on" dump_code = os.getenv(get_dump_code_flag()) == "on" kernel_exec.op_build(op_func, input_shapes, input_types, op_attrs, kernel_info['op'], attrs=attrs, dump_ir=dump_ir, dump_code=dump_code) return True def _update_attrs(elem): for key, value in elem.items(): if key not in attrs or not attrs[key]: attrs[key] = value def _parse_output(output): schedule_func = None if isinstance(output, (list, tuple)): from inspect import isfunction tmp_outputs = [] for elem in output: if isfunction(elem): schedule_func = elem elif isinstance(elem, dict): _update_attrs(elem) else: tmp_outputs.append(elem) output = tmp_outputs else: output = [output] return schedule_func, output op_name = kernel_info['name'] op_func = _get_op_func(op_name) if op_func is None: logging.error( "this op not support by akg, please check op name %s", str(op_name)) return False if processor == 'cuda': return _compilewithjson_cuda(op_func) args = {} tsr = [] for input_desc in kernel_info['input_desc']: if len(input_desc) == 1: tensor_shape = input_desc[0]['shape'] tensor_shape = (1,) if not tensor_shape else tensor_shape utils.shape_dtype_max_size_check( tensor_shape, input_desc[0]['data_type']) args[input_desc[0]['name']] = akg.tvm.placeholder( shape=tensor_shape, name=input_desc[0]['tensor_name'], dtype=input_desc[0]['data_type']) tsr.append(args[input_desc[0]['name']]) else: tmp_input = [] for tmp_desc in input_desc: tensor_shape = tmp_desc['shape'] tensor_shape = (1,) if not tensor_shape else tensor_shape utils.shape_dtype_max_size_check( tensor_shape, tmp_desc['data_type']) tmp_input.append(akg.tvm.placeholder( shape=tensor_shape, name=tmp_desc['tensor_name'], dtype=tmp_desc['data_type'])) args[input_desc[0]['name']] = tmp_input tsr = tsr + tmp_input if kernel_info['attr']: for ext_arg in kernel_info['attr']: args[ext_arg['name']] = ext_arg['value'] output = op_func(**args, target=attrs["target"]) schedule_func, output = _parse_output(output) tsr = tsr + [i for i in output if TensorUtils.is_output_value(i)] build_res = op_build([op_name], output, tsr, schedule_func, processor, kernel_info['op'], attrs) if not build_res: return False return True
def compilewithjson(json_str): """compile with json.""" try: kernel_info = json.loads(json_str) except jd.JSONDecodeError: logging.error(traceback.format_exc()) return False op_name = kernel_info['name'] op_func = None processor = 'aicore' if 'process' in kernel_info: processor = kernel_info['process'] # get custom ops implementation first. if 'impl_path' in kernel_info and kernel_info['impl_path'] is not None: impl_path = os.path.realpath(kernel_info['impl_path']) if os.path.isfile(impl_path): custom_mod_name = Path(impl_path).resolve().stem mod_spec = importlib.util.spec_from_file_location( custom_mod_name, impl_path) custom_mod = importlib.util.module_from_spec(mod_spec) mod_spec.loader.exec_module(custom_mod) op_func = getattr(custom_mod, op_name, None) # get built-in ops. if op_func is None: if processor == 'cuda': op_func = getattr(gpu, op_name, None) if op_func is None: logging.error("this op not supported, please check op name %s", str(op_name)) return False args = {} tsr = [] for input_desc in kernel_info['input_desc']: if len(input_desc) == 1: tensor_shape = input_desc[0]['shape'] tensor_shape = (1, ) if not tensor_shape else tensor_shape vc_util.shape_dtype_max_size_check(tensor_shape) args[input_desc[0]['name']] = akg.tvm.placeholder( shape=tensor_shape, name=input_desc[0]['tensor_name'], dtype=input_desc[0]['data_type']) tsr.append(args[input_desc[0]['name']]) else: tmp_input = [] for tmp_desc in input_desc: tensor_shape = tmp_desc['shape'] tensor_shape = (1, ) if not tensor_shape else tensor_shape vc_util.shape_dtype_max_size_check(tensor_shape) tmp_input.append( akg.tvm.placeholder(shape=tensor_shape, name=tmp_desc['tensor_name'], dtype=tmp_desc['data_type'])) args[input_desc[0]['name']] = tmp_input tsr = tsr + tmp_input if kernel_info['attr']: for ext_arg in kernel_info['attr']: args[ext_arg['name']] = ext_arg['value'] output = op_func(**args) schedule_func = None attrs = {} if isinstance(output, (list, tuple)): from inspect import isfunction tmp_outputs = [] for elem in output: if isfunction(elem): schedule_func = elem elif isinstance(elem, dict): for key, value in elem.items(): if key not in attrs or not attrs[key]: attrs[key] = value else: tmp_outputs.append(elem) output = tmp_outputs else: output = [output] tsr = tsr + [i for i in output if TensorUtils.is_output_value(i)] return op_build([op_name], output, tsr, schedule_func, processor, kernel_info['op'], attrs)