Example #1
0
def _compilewithjson_to_module(kernel_info, attrs):
    """compile with json."""

    def _get_target_from_processor(processor):
        if processor is None:
            return None
        elif processor == "aicore":
            return utils.CCE
        elif processor == "cuda":
            return utils.CUDA
        elif processor == "cpu":
            return utils.LLVM
        else:
            return None

    processor = kernel_info['process'] if 'process' in kernel_info else utils.CUDA
    attrs["target"] = _get_target_from_processor(processor)

    if kernel_info.get('composite', False):
        try:
            composite.build(kernel_info, attrs)
            return True
        except Exception:
            logging.error(traceback.format_exc())
            return False
    else:
        return _compilewithjson_to_module_op(kernel_info, attrs, processor)
Example #2
0
def get_result(desc, poly, attrs=None):
    if poly:
        reduce_lib_key = "enable_akg_reduce_lib"
        if reduce_lib_key not in attrs.keys():
            attrs[reduce_lib_key] = poly
    if attrs == {}:
        mod = composite.build(desc, {'dim':"0 0 9728 9728"}, poly=poly)
    else:
        mod = composite.build(desc, attrs, poly=poly)
    input_for_mod, expect, output_indexes = gen_json_data(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)

    rtol, atol = get_rtol_atol("FUSED", "float32")
    flag = True
    if len(output_indexes) > 1:
        if not all(map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), output, expect)):
            logging.info(mod.imported_modules[0].get_source())
            flag = False
    else:
        if not compare_tensor(output, expect, rtol=rtol, atol=atol):
            logging.info(mod.imported_modules[0].get_source())
            flag = False
    desc_d = json.loads(desc)
    if desc_d["process"] == "cuda":
        inputs = to_tvm_nd_array(input_for_mod)
        expect = to_tvm_nd_array(expect)
        gpu_profiling(mod, *inputs, *expect, repeat_time=400)
    return flag
Example #3
0
def get_result(desc, poly, attrs=None):
    backend = _get_backend(desc)
    if backend == "cuda" and not attrs:
        attrs = _add_attrs_from_json(desc, attrs, poly)
    if poly:
        reduce_lib_key = "enable_akg_reduce_lib"
        if reduce_lib_key not in attrs.keys():
            attrs[reduce_lib_key] = poly

    build_attrs = attrs if attrs else None
    mod = composite.build(desc, build_attrs, poly=poly)

    input_for_mod, expect, output_indexes = gen_json_data(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)

    if not all(
            map(_compare_func, output if isinstance(output, (list, tuple)) else
                [output], expect if isinstance(expect,
                                               (list, tuple)) else [expect])):
        logging.info(mod.imported_modules[0].get_source())
        return False
    if backend == "cuda":
        inputs = to_tvm_nd_array(input_for_mod)
        expect = to_tvm_nd_array(expect)
        gpu_profiling(mod, *inputs, *expect, repeat_time=400)
    return True
Example #4
0
def get_result(desc, poly, attrs=None, profiling=True, need_compare=True):
    backend = _get_backend(desc)

    mod = composite.build(desc, attrs, poly=poly)
    if not need_compare:
        return True
    input_for_mod, expect, output_indexes = gen_json_data(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)
    # In profiling mode, mod_launch will return compute outputs and profiling value, only compute outputs needed here
    if isinstance(output, tuple) and len(output) > 0 and isinstance(
            output[-1], dict):
        output = output[0]
    output = output if isinstance(output, (list, tuple)) else [output]
    expect = expect if isinstance(expect, (list, tuple)) else [expect]
    output = list(output)
    expect = list(expect)
    for i, _ in enumerate(expect):
        if expect[i].dtype == "complex128" or expect[i].dtype == "complex64":
            final_shape = functools.reduce(lambda x, y: x * y, output[i].shape)
            flattern_output = output[i].reshape((final_shape, ))
            output_real = []
            output_imag = []
            for k, _ in enumerate(flattern_output):
                if k % 2 == 0:
                    output_real.append(flattern_output[k])
                else:
                    output_imag.append(flattern_output[k])
            output[i] = np.vectorize(complex)(output_real, output_imag)
            output[i] = output[i].reshape(expect[i].shape)
    if len(output) != len(expect):
        raise RuntimeError(
            "output and expect have different length, {} vs {}".format(
                len(output), len(expect)))

    compare_tolerance = get_compare_tolerance(desc, output_indexes)
    compare_res = list(map(_compare_func, output, expect, compare_tolerance))
    if not all(compare_res):
        source = (mod.imported_modules[0]
                  if backend == "cuda" else mod).get_source()
        logging.debug(source)
        _dump_info(desc, attrs, poly, input_for_mod, output, expect)
        logging.warning("Compare results: %s", str(compare_res))
        return False
    if profiling and backend in ["cuda", "cpu"]:
        ctx = tvm.context(backend, 0)
        has_complex = False
        for i in input_for_mod:
            if i.dtype == "complex64" or i.dtype == "complex128":
                has_complex = True
                break
        if has_complex == False:
            inputs = to_tvm_nd_array(input_for_mod, ctx)
            target_profiling(mod, *inputs, target=backend, repeat_time=1000)
    return True
Example #5
0
def get_result(desc, attrs=None):
    input_for_mod, expect, output_indexes = gen_json_data(desc)

    if attrs:
        mod = composite.build(desc, attrs)
    else:
        mod = composite.build(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)

    rtol, atol = get_rtol_atol("FUSED", "float32")
    flag = True
    if len(output_indexes) > 1:
        if not all(
                map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol),
                    output, expect)):
            flag = False
    else:
        if not compare_tensor(output, expect, rtol=rtol, atol=atol):
            flag = False
    return flag
Example #6
0
def test_composite_stitch(ci_path):
    files = os.listdir(ci_path)
    flag = True
    for fi in files:
        with open(os.path.join(ci_path, fi), 'r') as f:
            print(
                "\033[94m%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%file: \033[0m",
                fi)
            desc = f.read()
        poly = True
        attrs = {}
        reduce_lib_key = "enable_akg_reduce_lib"
        attrs[reduce_lib_key] = poly
        mod = composite.build(desc, attrs, poly=poly)
        rtol = 0.001
        atol = 0.005
        max_run_times = 3
        case_flag = False

        for i in range(max_run_times):
            input_for_mod, expect, output_indexes = gen_json_data(desc)
            output = utils.mod_launch(mod, input_for_mod, output_indexes)
            if len(output_indexes) > 1:
                if all(
                        map(
                            lambda x, y: compare_tensor(
                                x, y, rtol=rtol, atol=atol), output, expect)):
                    case_flag = True
                    break
            else:
                if compare_tensor(output, expect, rtol=rtol, atol=atol):
                    case_flag = True
                    break
        if not case_flag:
            logging.info("\033[91mComposite Json {} fail!\033[0m".format(fi))
        else:
            logging.info("\033[92mComposite Json {} pass!\033[0m".format(fi))
        flag &= case_flag
    if not flag:
        raise ValueError("Precision Error")
    logging.info("All ops are ok!")
Example #7
0
def get_result(desc, poly, attrs=None):
    backend = _get_backend(desc)
    if attrs is None:
        attrs = {}

    build_attrs = attrs if attrs else None
    mod = composite.build(desc, build_attrs, poly=poly)

    input_for_mod, expect, output_indexes = gen_json_data(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)

    if not all(
            map(_compare_func, output if isinstance(output, (list, tuple)) else
                [output], expect if isinstance(expect,
                                               (list, tuple)) else [expect])):
        logging.info(mod.imported_modules[0].get_source())
        return False
    if backend == "cce":
        inputs = to_tvm_nd_array(input_for_mod)
        expect = to_tvm_nd_array(expect)
        target_profiling(mod, *inputs, *expect, repeat_time=400)
    return True
Example #8
0
    def run_one_kernel(self,
                       run_times,
                       idx,
                       config,
                       best_time=np.inf,
                       is_auto=False):
        """Compile and execute a config of the operator on device"""
        time_one_kernel_start = time.time()
        logger.debug('compile %dth kernel', idx)
        try:
            time_start_build = time.time()
            if self.op_type == "json":
                if is_auto:
                    mod = composite.build(self.op_desc)
                else:
                    tiling = []
                    for value in config.input._asdict().values():
                        item = [value, 1]
                        tiling.append(item)
                    tiling_param = []
                    for i, element in enumerate(tiling):
                        tiling_param.append(self._index_table[i] + element)
                    dim_info = ct_util.set_dims(tuple(tiling_param))
                    attrs = {'dim': dim_info}
                    mod = composite.build(self.op_desc, attrs)
            else:
                mod = compile_kernel(self.op_type, self.op_desc,
                                     self.input_shape, self._index_table,
                                     None if is_auto else config.input, idx)
            time_end_build = time.time()
            logger.debug("build module time: %f",
                         time_end_build - time_start_build)
            logger.debug('finished compile %dth kernel', idx)
        except BaseException as e:
            logger.debug("Compile Failed: [%s] : %s",
                         "origin" if is_auto else str(config.input), str(e))
            run_times[idx] = compile_fail_time
            return

        run_times[idx] = run_failed_time
        # get available device
        if utils.get_available_devices_num() == 1:
            device_id = utils.get_device_id()
        else:
            device_id = idx + utils.get_device_id()
        os.environ[
            'PROFILING_DIR'] = "/var/log/npu/profiling/container/" + str(
                device_id)
        os.environ['DEVICE_ID'] = str(device_id)
        logger.debug('run %dth kernel', idx)
        logger.debug('++++++++++++++++++++++=device_id')
        logger.debug(device_id)
        logger.debug('++++++++++++++++++++++=device_id')
        try:
            for _ in range(self.repeat_times):
                stat_info = {}
                try:
                    time_start_launch = time.time()
                    if self.mod_output_param is not None:
                        output, stat_info = utils.mod_launch(
                            mod,
                            list(self.input),
                            self.mod_output_param,
                            tuning=True,
                            device_id=device_id)
                        if stat_info['run_time'] < best_time:
                            if not all(
                                    map(
                                        lambda x, y: np.allclose(x,
                                                                 y,
                                                                 rtol=5e-03,
                                                                 atol=5e-03,
                                                                 equal_nan=True
                                                                 ), output,
                                        self.expect)):
                                stat_info['run_time'] = precision_error_time
                                logger.debug(
                                    "Precision Error: [%s]", "origin"
                                    if config is None else str(config.input))

                    else:
                        output, stat_info = utils.mod_launch(
                            mod, self.input, tuning=True, device_id=device_id)
                        if stat_info['run_time'] < best_time:
                            if not np.allclose(output,
                                               self.expect,
                                               rtol=5e-03,
                                               atol=5e-03,
                                               equal_nan=True):
                                stat_info['run_time'] = precision_error_time
                                logger.debug(
                                    "Precision Error: [%s]", "origin"
                                    if config is None else str(config.input))
                    time_end_launch = time.time()
                    logger.debug("mod launch time: %f",
                                 time_end_launch - time_start_launch)
                except BaseException as e:
                    logger.debug("Run Failed: [%s] : %s", str(config.input),
                                 str(e))
                    stat_info['run_time'] = run_failed_time
                run_times[idx] = np.minimum(run_times[idx],
                                            stat_info['run_time'])
        finally:
            logger.debug('end of %dth kernel', idx)
            time_one_kernel_end = time.time()
            logger.debug('run one kernel time: %f',
                         time_one_kernel_end - time_one_kernel_start)
        return
Example #9
0
File: runner.py Project: wxyhv/akg
    def run_one_kernel(self, run_times, idx, config, best_time=np.inf, is_auto=False):
        """Compile and execute a config of the operator on device"""
        time_one_kernel_start = time.time()
        logger.debug('compile %dth kernel', idx)
        # get available device
        if utils.get_available_devices_num() == 1:
            device_id = utils.get_device_id()
        else:
            device_id = idx + utils.get_device_id()
        os.environ['PROFILING_DIR'] = "/var/log/npu/profiling/container/" + str(device_id)
        os.environ['DEVICE_ID'] = str(device_id)
        logger.debug('run %dth kernel', idx)
        logger.debug('++++++++++++++++++++++=device_id')
        logger.debug(device_id)
        logger.debug('++++++++++++++++++++++=device_id')
        try:
            time_start_build = time.time()
            logger.debug(config)
            if self.op_type in ["json", "extra_tune"]:
                if is_auto:
                    mod = composite.build(self.op_desc)
                    if self.op_type == "extra_tune":
                        del os.environ['MS_GRAPH_KERNEL_TILING']
                else:
                    attrs = get_attr_from_config(config.input, self._index_table)
                    if os.environ['RUNTIME_MODE'] == "gpu":
                        attrs['target'] = "cuda"
                    mod = composite.build(self.op_desc, attrs, use_repo=False)
            else:
                mod = compile_kernel(self.op_type, self.op_desc, self.input_shape, self._index_table,
                                     None if is_auto else config.input, idx)
            time_end_build = time.time()
            logger.debug("build module time: %f", time_end_build - time_start_build)
            logger.debug('finished compile %dth kernel', idx)
        except BaseException as e:
            logger.debug("Compile Failed: [%s] : %s", "origin" if is_auto else str(config.input), str(e))
            run_times[idx] = compile_fail_time
            return

        run_times[idx] = run_failed_time

        try:
            for _ in range(self.repeat_times):
                stat_info = {}
                try:
                    time_start_launch = time.time()
                    if self.mod_output_param is not None:
                        output, stat_info = utils.mod_launch(mod, list(self.input), self.mod_output_param,
                                                             tuning=True, device_id=device_id)
                        if stat_info['run_time'] < best_time:
                            if not all(map(lambda x, y: np.allclose(x, y, rtol=5e-03, atol=5e-03, equal_nan=True),
                                           output, self.expect)):
                                stat_info['run_time'] = precision_error_time
                                logger.debug("Precision Error: [%s]",
                                             "origin" if config is None else str(config.input))

                    else:
                        output, stat_info = utils.mod_launch(mod, self.input, tuning=True, device_id=device_id)
                        if stat_info['run_time'] < best_time:
                            if not np.allclose(output, self.expect, rtol=5e-03, atol=5e-03, equal_nan=True):
                                stat_info['run_time'] = precision_error_time
                                logger.debug("Precision Error: [%s]",
                                             "origin" if config is None else str(config.input))
                    time_end_launch = time.time()
                    logger.debug("mod launch time: %f", time_end_launch - time_start_launch)
                except BaseException as e:
                    logger.debug("Run Failed: [%s] : %s", str(config.input), str(e))
                    stat_info['run_time'] = run_failed_time
                run_times[idx] = np.minimum(run_times[idx], stat_info['run_time'])
        finally:
            logger.debug('end of %dth kernel', idx)
            time_one_kernel_end = time.time()
            logger.debug('run one kernel time: %f', time_one_kernel_end - time_one_kernel_start)
        return