def _compilewithjson_to_module(kernel_info, attrs): """compile with json.""" def _get_target_from_processor(processor): if processor is None: return None elif processor == "aicore": return utils.CCE elif processor == "cuda": return utils.CUDA elif processor == "cpu": return utils.LLVM else: return None processor = kernel_info['process'] if 'process' in kernel_info else utils.CUDA attrs["target"] = _get_target_from_processor(processor) if kernel_info.get('composite', False): try: composite.build(kernel_info, attrs) return True except Exception: logging.error(traceback.format_exc()) return False else: return _compilewithjson_to_module_op(kernel_info, attrs, processor)
def get_result(desc, poly, attrs=None): if poly: reduce_lib_key = "enable_akg_reduce_lib" if reduce_lib_key not in attrs.keys(): attrs[reduce_lib_key] = poly if attrs == {}: mod = composite.build(desc, {'dim':"0 0 9728 9728"}, poly=poly) else: mod = composite.build(desc, attrs, poly=poly) input_for_mod, expect, output_indexes = gen_json_data(desc) output = utils.mod_launch(mod, input_for_mod, output_indexes) rtol, atol = get_rtol_atol("FUSED", "float32") flag = True if len(output_indexes) > 1: if not all(map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), output, expect)): logging.info(mod.imported_modules[0].get_source()) flag = False else: if not compare_tensor(output, expect, rtol=rtol, atol=atol): logging.info(mod.imported_modules[0].get_source()) flag = False desc_d = json.loads(desc) if desc_d["process"] == "cuda": inputs = to_tvm_nd_array(input_for_mod) expect = to_tvm_nd_array(expect) gpu_profiling(mod, *inputs, *expect, repeat_time=400) return flag
def get_result(desc, poly, attrs=None): backend = _get_backend(desc) if backend == "cuda" and not attrs: attrs = _add_attrs_from_json(desc, attrs, poly) if poly: reduce_lib_key = "enable_akg_reduce_lib" if reduce_lib_key not in attrs.keys(): attrs[reduce_lib_key] = poly build_attrs = attrs if attrs else None mod = composite.build(desc, build_attrs, poly=poly) input_for_mod, expect, output_indexes = gen_json_data(desc) output = utils.mod_launch(mod, input_for_mod, output_indexes) if not all( map(_compare_func, output if isinstance(output, (list, tuple)) else [output], expect if isinstance(expect, (list, tuple)) else [expect])): logging.info(mod.imported_modules[0].get_source()) return False if backend == "cuda": inputs = to_tvm_nd_array(input_for_mod) expect = to_tvm_nd_array(expect) gpu_profiling(mod, *inputs, *expect, repeat_time=400) return True
def get_result(desc, poly, attrs=None, profiling=True, need_compare=True): backend = _get_backend(desc) mod = composite.build(desc, attrs, poly=poly) if not need_compare: return True input_for_mod, expect, output_indexes = gen_json_data(desc) output = utils.mod_launch(mod, input_for_mod, output_indexes) # In profiling mode, mod_launch will return compute outputs and profiling value, only compute outputs needed here if isinstance(output, tuple) and len(output) > 0 and isinstance( output[-1], dict): output = output[0] output = output if isinstance(output, (list, tuple)) else [output] expect = expect if isinstance(expect, (list, tuple)) else [expect] output = list(output) expect = list(expect) for i, _ in enumerate(expect): if expect[i].dtype == "complex128" or expect[i].dtype == "complex64": final_shape = functools.reduce(lambda x, y: x * y, output[i].shape) flattern_output = output[i].reshape((final_shape, )) output_real = [] output_imag = [] for k, _ in enumerate(flattern_output): if k % 2 == 0: output_real.append(flattern_output[k]) else: output_imag.append(flattern_output[k]) output[i] = np.vectorize(complex)(output_real, output_imag) output[i] = output[i].reshape(expect[i].shape) if len(output) != len(expect): raise RuntimeError( "output and expect have different length, {} vs {}".format( len(output), len(expect))) compare_tolerance = get_compare_tolerance(desc, output_indexes) compare_res = list(map(_compare_func, output, expect, compare_tolerance)) if not all(compare_res): source = (mod.imported_modules[0] if backend == "cuda" else mod).get_source() logging.debug(source) _dump_info(desc, attrs, poly, input_for_mod, output, expect) logging.warning("Compare results: %s", str(compare_res)) return False if profiling and backend in ["cuda", "cpu"]: ctx = tvm.context(backend, 0) has_complex = False for i in input_for_mod: if i.dtype == "complex64" or i.dtype == "complex128": has_complex = True break if has_complex == False: inputs = to_tvm_nd_array(input_for_mod, ctx) target_profiling(mod, *inputs, target=backend, repeat_time=1000) return True
def get_result(desc, attrs=None): input_for_mod, expect, output_indexes = gen_json_data(desc) if attrs: mod = composite.build(desc, attrs) else: mod = composite.build(desc) output = utils.mod_launch(mod, input_for_mod, output_indexes) rtol, atol = get_rtol_atol("FUSED", "float32") flag = True if len(output_indexes) > 1: if not all( map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), output, expect)): flag = False else: if not compare_tensor(output, expect, rtol=rtol, atol=atol): flag = False return flag
def test_composite_stitch(ci_path): files = os.listdir(ci_path) flag = True for fi in files: with open(os.path.join(ci_path, fi), 'r') as f: print( "\033[94m%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%file: \033[0m", fi) desc = f.read() poly = True attrs = {} reduce_lib_key = "enable_akg_reduce_lib" attrs[reduce_lib_key] = poly mod = composite.build(desc, attrs, poly=poly) rtol = 0.001 atol = 0.005 max_run_times = 3 case_flag = False for i in range(max_run_times): input_for_mod, expect, output_indexes = gen_json_data(desc) output = utils.mod_launch(mod, input_for_mod, output_indexes) if len(output_indexes) > 1: if all( map( lambda x, y: compare_tensor( x, y, rtol=rtol, atol=atol), output, expect)): case_flag = True break else: if compare_tensor(output, expect, rtol=rtol, atol=atol): case_flag = True break if not case_flag: logging.info("\033[91mComposite Json {} fail!\033[0m".format(fi)) else: logging.info("\033[92mComposite Json {} pass!\033[0m".format(fi)) flag &= case_flag if not flag: raise ValueError("Precision Error") logging.info("All ops are ok!")
def get_result(desc, poly, attrs=None): backend = _get_backend(desc) if attrs is None: attrs = {} build_attrs = attrs if attrs else None mod = composite.build(desc, build_attrs, poly=poly) input_for_mod, expect, output_indexes = gen_json_data(desc) output = utils.mod_launch(mod, input_for_mod, output_indexes) if not all( map(_compare_func, output if isinstance(output, (list, tuple)) else [output], expect if isinstance(expect, (list, tuple)) else [expect])): logging.info(mod.imported_modules[0].get_source()) return False if backend == "cce": inputs = to_tvm_nd_array(input_for_mod) expect = to_tvm_nd_array(expect) target_profiling(mod, *inputs, *expect, repeat_time=400) return True
def run_one_kernel(self, run_times, idx, config, best_time=np.inf, is_auto=False): """Compile and execute a config of the operator on device""" time_one_kernel_start = time.time() logger.debug('compile %dth kernel', idx) try: time_start_build = time.time() if self.op_type == "json": if is_auto: mod = composite.build(self.op_desc) else: tiling = [] for value in config.input._asdict().values(): item = [value, 1] tiling.append(item) tiling_param = [] for i, element in enumerate(tiling): tiling_param.append(self._index_table[i] + element) dim_info = ct_util.set_dims(tuple(tiling_param)) attrs = {'dim': dim_info} mod = composite.build(self.op_desc, attrs) else: mod = compile_kernel(self.op_type, self.op_desc, self.input_shape, self._index_table, None if is_auto else config.input, idx) time_end_build = time.time() logger.debug("build module time: %f", time_end_build - time_start_build) logger.debug('finished compile %dth kernel', idx) except BaseException as e: logger.debug("Compile Failed: [%s] : %s", "origin" if is_auto else str(config.input), str(e)) run_times[idx] = compile_fail_time return run_times[idx] = run_failed_time # get available device if utils.get_available_devices_num() == 1: device_id = utils.get_device_id() else: device_id = idx + utils.get_device_id() os.environ[ 'PROFILING_DIR'] = "/var/log/npu/profiling/container/" + str( device_id) os.environ['DEVICE_ID'] = str(device_id) logger.debug('run %dth kernel', idx) logger.debug('++++++++++++++++++++++=device_id') logger.debug(device_id) logger.debug('++++++++++++++++++++++=device_id') try: for _ in range(self.repeat_times): stat_info = {} try: time_start_launch = time.time() if self.mod_output_param is not None: output, stat_info = utils.mod_launch( mod, list(self.input), self.mod_output_param, tuning=True, device_id=device_id) if stat_info['run_time'] < best_time: if not all( map( lambda x, y: np.allclose(x, y, rtol=5e-03, atol=5e-03, equal_nan=True ), output, self.expect)): stat_info['run_time'] = precision_error_time logger.debug( "Precision Error: [%s]", "origin" if config is None else str(config.input)) else: output, stat_info = utils.mod_launch( mod, self.input, tuning=True, device_id=device_id) if stat_info['run_time'] < best_time: if not np.allclose(output, self.expect, rtol=5e-03, atol=5e-03, equal_nan=True): stat_info['run_time'] = precision_error_time logger.debug( "Precision Error: [%s]", "origin" if config is None else str(config.input)) time_end_launch = time.time() logger.debug("mod launch time: %f", time_end_launch - time_start_launch) except BaseException as e: logger.debug("Run Failed: [%s] : %s", str(config.input), str(e)) stat_info['run_time'] = run_failed_time run_times[idx] = np.minimum(run_times[idx], stat_info['run_time']) finally: logger.debug('end of %dth kernel', idx) time_one_kernel_end = time.time() logger.debug('run one kernel time: %f', time_one_kernel_end - time_one_kernel_start) return
def run_one_kernel(self, run_times, idx, config, best_time=np.inf, is_auto=False): """Compile and execute a config of the operator on device""" time_one_kernel_start = time.time() logger.debug('compile %dth kernel', idx) # get available device if utils.get_available_devices_num() == 1: device_id = utils.get_device_id() else: device_id = idx + utils.get_device_id() os.environ['PROFILING_DIR'] = "/var/log/npu/profiling/container/" + str(device_id) os.environ['DEVICE_ID'] = str(device_id) logger.debug('run %dth kernel', idx) logger.debug('++++++++++++++++++++++=device_id') logger.debug(device_id) logger.debug('++++++++++++++++++++++=device_id') try: time_start_build = time.time() logger.debug(config) if self.op_type in ["json", "extra_tune"]: if is_auto: mod = composite.build(self.op_desc) if self.op_type == "extra_tune": del os.environ['MS_GRAPH_KERNEL_TILING'] else: attrs = get_attr_from_config(config.input, self._index_table) if os.environ['RUNTIME_MODE'] == "gpu": attrs['target'] = "cuda" mod = composite.build(self.op_desc, attrs, use_repo=False) else: mod = compile_kernel(self.op_type, self.op_desc, self.input_shape, self._index_table, None if is_auto else config.input, idx) time_end_build = time.time() logger.debug("build module time: %f", time_end_build - time_start_build) logger.debug('finished compile %dth kernel', idx) except BaseException as e: logger.debug("Compile Failed: [%s] : %s", "origin" if is_auto else str(config.input), str(e)) run_times[idx] = compile_fail_time return run_times[idx] = run_failed_time try: for _ in range(self.repeat_times): stat_info = {} try: time_start_launch = time.time() if self.mod_output_param is not None: output, stat_info = utils.mod_launch(mod, list(self.input), self.mod_output_param, tuning=True, device_id=device_id) if stat_info['run_time'] < best_time: if not all(map(lambda x, y: np.allclose(x, y, rtol=5e-03, atol=5e-03, equal_nan=True), output, self.expect)): stat_info['run_time'] = precision_error_time logger.debug("Precision Error: [%s]", "origin" if config is None else str(config.input)) else: output, stat_info = utils.mod_launch(mod, self.input, tuning=True, device_id=device_id) if stat_info['run_time'] < best_time: if not np.allclose(output, self.expect, rtol=5e-03, atol=5e-03, equal_nan=True): stat_info['run_time'] = precision_error_time logger.debug("Precision Error: [%s]", "origin" if config is None else str(config.input)) time_end_launch = time.time() logger.debug("mod launch time: %f", time_end_launch - time_start_launch) except BaseException as e: logger.debug("Run Failed: [%s] : %s", str(config.input), str(e)) stat_info['run_time'] = run_failed_time run_times[idx] = np.minimum(run_times[idx], stat_info['run_time']) finally: logger.debug('end of %dth kernel', idx) time_one_kernel_end = time.time() logger.debug('run one kernel time: %f', time_one_kernel_end - time_one_kernel_start) return