Beispiel #1
0
def launch_json(debug_mode: bool = True, save_res: bool = False, json_input_dir=""):
    """composite json tuning launch"""
    iter_times = [3, 3, 3] if debug_mode else [80, 160, 320]
    json_dir = json_load.format(json_input_dir)
    files = os.listdir(json_dir)
    for input_file in files:
        with open(json_dir + '/' + input_file, 'r') as f:
            json_input = f.read()
        json_content = json.loads(json_input)
        for input_desc in json_content["input_desc"]:
            if input_desc[0]["shape"] == []:
                input_desc[0]["shape"] = [1]
        json_input = json.dumps(json_content)
        space_res = composite.get_tiling_space(json_input, 2)
        index_table = space_res['index']
        tiling_spaces = space_res['tuning_space']
        if not tiling_spaces:
            raise RuntimeError('empty tiling spaces')
        dim_names = ['tiling_' + str(i) for i in range(len(tiling_spaces[0]))]
        input_type = namedtuple("json", dim_names)
        space = ListConfigSpace(input_type)
        for tiling_space in tiling_spaces:
            config = input_type(*tiling_space)
            space.add(config)
        key = json_content["op"]
        input_for_mod, expect = gen_data(op_type="json", op_desc=json_input)

        print('space size:', space.length)
        print('index table:', index_table)

        output_para = None  # this is for multi-output
        if len(json_content["output_desc"]) > 1:
            output_para = []
            for i in range(len(json_content["output_desc"])):
                output_para.append(i - len(json_content["output_desc"]))
        runner = KernelRunner(op_type="json", op_desc=json_input, index_table=index_table, input_data=input_for_mod,
                            expect=expect, mod_output_param=output_para, timeout=180, repeat_times=1)

        # we can only get a valid tiling, or accurate get cycles
        is_truly_profiling = utils.get_profiling_mode()

        # available device numbers, normally is 8 or 1
        available_device_numbers = utils.get_available_devices_num()

        tuner = ModelBasedTuner(runner, index_table, space,
                                n_parallel=available_device_numbers if is_truly_profiling else 1,
                                plan_size=64, pre_model=None)
        least_try_times = iter_times[0 if space.length < 10 ** 4 else 1 if space.length < 10 ** 5 else 2]
        tuner.tune(least_try_times, output_file="json.log")

        print_tuning_result("json", space, index_table, tuner, key)

        if save_res:
            save_tuning_result(key, "json", None, index_table, tuner)
Beispiel #2
0
def jobs(op_type: str = 'add', desc=None, debug_mode: bool = True,
         save_res: bool = False, insert_key='', conf_of_set_dim=""):
    """AutoTuning jobs"""
    iter_times = [3, 3, 3] if debug_mode else [80, 160, 320]
    index_table, space, key, expect, input_for_mod = get_space(op_type, desc)
    print('space size:', space.length)
    print('index table:', index_table)
    key = key if insert_key == '' else insert_key

    # filter already tuned shape
    if isinstance(conf_of_set_dim, dict) and key in conf_of_set_dim.keys():
        if isinstance(conf_of_set_dim[key], (list, tuple)) and conf_of_set_dim[key]:
            return

        if isinstance(conf_of_set_dim[key], dict):
            return

    output_para = None  # this is for multi-output
    if isinstance(input_for_mod, dict):
        input_for_mod, output_para = input_for_mod['args'], input_for_mod['outputs']
    runner = KernelRunner(op_type, desc, index_table, input_data=input_for_mod,
                          expect=expect, mod_output_param=output_para, timeout=180, repeat_times=1)

    # we can only get a valid tiling, or accurate get cycles
    is_truly_profiling = utils.get_profiling_mode()

    # available device numbers, normally is 8 or 1
    available_device_numbers = utils.get_available_devices_num()

    tuner = ModelBasedTuner(runner, index_table, space,
                            n_parallel=available_device_numbers if is_truly_profiling else 1,
                            plan_size=64, pre_model=None)
    least_try_times = iter_times[0 if space.length < 10 ** 4 else 1 if space.length < 10 ** 5 else 2]
    tuner.tune(least_try_times, output_file=op_type + ".log")

    print_tuning_result(op_type, space, index_table, tuner, key)

    if save_res:
        save_tuning_result(key, op_type, desc, index_table, tuner)
Beispiel #3
0
Datei: job.py Projekt: wxyhv/akg
def launch_json(debug_mode: bool = True,
                save_res: bool = False,
                json_dir="",
                repo_path="",
                all_space=False,
                skip_exist=True,
                extra_tune=False,
                self_attrs=[],
                tuning_attrs=[]):
    """composite json tuning launch"""
    subprocess.run("mkdir -p res/", shell=True)
    iter_times = [3, 3, 3] if debug_mode else [80, 160, 320]
    files = os.listdir(json_dir)
    with open(repo_path, 'r') as f:
        repo = json.loads(f.read())
    for input_file in files:
        print("----Start tuning for ", input_file)
        with open(json_dir + '/' + input_file, 'r') as f:
            json_input = f.read()
        json_content = json.loads(json_input)
        for input_desc in json_content["input_desc"]:
            if input_desc[0]["shape"] == []:
                input_desc[0]["shape"] = [1]
        json_input = json.dumps(json_content)

        # skip tuning for info in repo
        if skip_exist:
            compute, shape, dtype = generate_trait(json_content)
            if get_repo(repo, [compute, shape, dtype]):
                print("Info for %s already exists" % input_file)
                print("ops are ", str(compute))
                print("shape is ", str(shape))
                print("dtype is ", str(dtype))
                with open('res/skip_file.txt', 'a') as fe:
                    fe.write(input_file)
                    fe.write("\n")
                continue

        # generate tuning space
        if not extra_tune:
            time_start_get_space = time.time()
            with Manager() as manager:
                space_dict = manager.dict()
                p = Process(target=get_json_space,
                            args=(json_input, space_dict))
                p.start()
                p.join(600)
                if 'res' not in space_dict:
                    with open('res/error_space_list.txt', 'a') as fe:
                        fe.write(input_file)
                        fe.write("\n")
                    continue
                space_res = space_dict['res']
            time_end_get_space = time.time()
            print("get space time: ",
                  time_end_get_space - time_start_get_space)
            index_table = space_res['index']
            tiling_spaces = space_res['tuning_space']
            if not isinstance(tiling_spaces, list):
                with open('res/empty_space_list.txt', 'a') as fe:
                    fe.write(input_file)
                    fe.write("\n")
                continue
            dim_names = [
                'tiling_' + str(i) for i in range(len(tiling_spaces[0]))
            ]
            use_tuning_attrs = len(tiling_spaces) < 10**5
            if tuning_attrs and use_tuning_attrs:
                dim_names.extend(tuning_attrs)
            input_type = namedtuple("json", dim_names)
            space = ListConfigSpace(input_type)
            if tuning_attrs and use_tuning_attrs:
                attr_options = gen_bool_list(tuning_attrs)
                for tiling_space in tiling_spaces:
                    for attr_option in attr_options:
                        tmp = tiling_space[:]
                        tmp.extend(attr_option)
                        config = input_type(*tmp)
                        space.add(config)
            else:
                for tiling_space in tiling_spaces:
                    config = input_type(*tiling_space)
                    space.add(config)
        else:
            index_table = []
            pre_lists = gen_bool_list(self_attrs)
            pre_input_type = namedtuple("extra_tune", self_attrs)
            space = ListConfigSpace(pre_input_type)
            for item in pre_lists:
                config = pre_input_type(*item)
                space.add(config)

        key = json_content["op"]
        try:
            input_for_mod, expect = gen_data(op_type="json",
                                             op_desc=json_input)
        except BaseException as e:
            logger.debug("gen numpy data from [%s] failed: %s", input_file,
                         str(e))
            with open('res/error_gen_data_list.txt', 'a') as fe:
                fe.write(input_file)
                fe.write(": ")
                fe.write(str(e))
                fe.write("\n")
            continue
        print('space size:', space.length)
        print('index table:', index_table)

        output_para = None  # this is for multi-output
        if len(json_content["output_desc"]) > 1:
            output_para = []
            for i in range(len(json_content["output_desc"])):
                output_para.append(i - len(json_content["output_desc"]))
        runner = KernelRunner(op_type="json",
                              op_desc=json_input,
                              index_table=index_table,
                              self_attrs=self_attrs,
                              input_data=input_for_mod,
                              expect=expect,
                              mod_output_param=output_para,
                              timeout=180,
                              repeat_times=1)

        # we can only get a valid tiling, or accurate get cycles
        is_truly_profiling = utils.get_profiling_mode(
        ) or os.environ['RUNTIME_MODE'] == "gpu"

        # available device numbers, normally is 8 or 1
        available_device_numbers = utils.get_available_devices_num()

        if all_space:
            tuner = Tuner(runner,
                          index_table,
                          space,
                          n_parallel=available_device_numbers)
            least_try_times = space.length
        else:
            tuner = ModelBasedTuner(runner,
                                    index_table,
                                    space,
                                    n_parallel=available_device_numbers
                                    if is_truly_profiling else 1,
                                    plan_size=64,
                                    pre_model=None)
            least_try_times = iter_times[0 if space.length < 10**4 else
                                         1 if space.length < 10**5 else 2]
        tuner.tune(least_try_times, output_file="json.log")

        print_tuning_result("json", space, index_table, tuner, key)

        if save_res:
            if extra_tune:
                save_tuning_result(key, "extra_tune", json_content,
                                   index_table, tuner, repo_path)
            else:
                save_tuning_result(key, "json", json_content, index_table,
                                   tuner, repo_path)
Beispiel #4
0
    def run_one_kernel(self,
                       run_times,
                       idx,
                       config,
                       best_time=np.inf,
                       is_auto=False):
        """Compile and execute a config of the operator on device"""
        time_one_kernel_start = time.time()
        logger.debug('compile %dth kernel', idx)
        try:
            time_start_build = time.time()
            if self.op_type == "json":
                if is_auto:
                    mod = composite.build(self.op_desc)
                else:
                    tiling = []
                    for value in config.input._asdict().values():
                        item = [value, 1]
                        tiling.append(item)
                    tiling_param = []
                    for i, element in enumerate(tiling):
                        tiling_param.append(self._index_table[i] + element)
                    dim_info = ct_util.set_dims(tuple(tiling_param))
                    attrs = {'dim': dim_info}
                    mod = composite.build(self.op_desc, attrs)
            else:
                mod = compile_kernel(self.op_type, self.op_desc,
                                     self.input_shape, self._index_table,
                                     None if is_auto else config.input, idx)
            time_end_build = time.time()
            logger.debug("build module time: %f",
                         time_end_build - time_start_build)
            logger.debug('finished compile %dth kernel', idx)
        except BaseException as e:
            logger.debug("Compile Failed: [%s] : %s",
                         "origin" if is_auto else str(config.input), str(e))
            run_times[idx] = compile_fail_time
            return

        run_times[idx] = run_failed_time
        # get available device
        if utils.get_available_devices_num() == 1:
            device_id = utils.get_device_id()
        else:
            device_id = idx + utils.get_device_id()
        os.environ[
            'PROFILING_DIR'] = "/var/log/npu/profiling/container/" + str(
                device_id)
        os.environ['DEVICE_ID'] = str(device_id)
        logger.debug('run %dth kernel', idx)
        logger.debug('++++++++++++++++++++++=device_id')
        logger.debug(device_id)
        logger.debug('++++++++++++++++++++++=device_id')
        try:
            for _ in range(self.repeat_times):
                stat_info = {}
                try:
                    time_start_launch = time.time()
                    if self.mod_output_param is not None:
                        output, stat_info = utils.mod_launch(
                            mod,
                            list(self.input),
                            self.mod_output_param,
                            tuning=True,
                            device_id=device_id)
                        if stat_info['run_time'] < best_time:
                            if not all(
                                    map(
                                        lambda x, y: np.allclose(x,
                                                                 y,
                                                                 rtol=5e-03,
                                                                 atol=5e-03,
                                                                 equal_nan=True
                                                                 ), output,
                                        self.expect)):
                                stat_info['run_time'] = precision_error_time
                                logger.debug(
                                    "Precision Error: [%s]", "origin"
                                    if config is None else str(config.input))

                    else:
                        output, stat_info = utils.mod_launch(
                            mod, self.input, tuning=True, device_id=device_id)
                        if stat_info['run_time'] < best_time:
                            if not np.allclose(output,
                                               self.expect,
                                               rtol=5e-03,
                                               atol=5e-03,
                                               equal_nan=True):
                                stat_info['run_time'] = precision_error_time
                                logger.debug(
                                    "Precision Error: [%s]", "origin"
                                    if config is None else str(config.input))
                    time_end_launch = time.time()
                    logger.debug("mod launch time: %f",
                                 time_end_launch - time_start_launch)
                except BaseException as e:
                    logger.debug("Run Failed: [%s] : %s", str(config.input),
                                 str(e))
                    stat_info['run_time'] = run_failed_time
                run_times[idx] = np.minimum(run_times[idx],
                                            stat_info['run_time'])
        finally:
            logger.debug('end of %dth kernel', idx)
            time_one_kernel_end = time.time()
            logger.debug('run one kernel time: %f',
                         time_one_kernel_end - time_one_kernel_start)
        return
Beispiel #5
0
    def run_one_kernel(self, run_times, idx, config, best_time=np.inf, is_auto=False):
        """Compile and execute a config of the operator on device"""
        time_one_kernel_start = time.time()
        logger.debug('compile %dth kernel', idx)
        # get available device
        if utils.get_available_devices_num() == 1:
            device_id = utils.get_device_id()
        else:
            device_id = idx + utils.get_device_id()
        os.environ['PROFILING_DIR'] = "/var/log/npu/profiling/container/" + str(device_id)
        os.environ['DEVICE_ID'] = str(device_id)
        logger.debug('run %dth kernel', idx)
        logger.debug('++++++++++++++++++++++=device_id')
        logger.debug(device_id)
        logger.debug('++++++++++++++++++++++=device_id')
        try:
            time_start_build = time.time()
            logger.debug(config)
            if self.op_type in ["json", "extra_tune"]:
                if is_auto:
                    mod = composite.build(self.op_desc)
                    if self.op_type == "extra_tune":
                        del os.environ['MS_GRAPH_KERNEL_TILING']
                else:
                    attrs = get_attr_from_config(config.input, self._index_table)
                    if os.environ['RUNTIME_MODE'] == "gpu":
                        attrs['target'] = "cuda"
                    mod = composite.build(self.op_desc, attrs, use_repo=False)
            else:
                mod = compile_kernel(self.op_type, self.op_desc, self.input_shape, self._index_table,
                                     None if is_auto else config.input, idx)
            time_end_build = time.time()
            logger.debug("build module time: %f", time_end_build - time_start_build)
            logger.debug('finished compile %dth kernel', idx)
        except BaseException as e:
            logger.debug("Compile Failed: [%s] : %s", "origin" if is_auto else str(config.input), str(e))
            run_times[idx] = compile_fail_time
            return

        run_times[idx] = run_failed_time

        try:
            for _ in range(self.repeat_times):
                stat_info = {}
                try:
                    time_start_launch = time.time()
                    if self.mod_output_param is not None:
                        output, stat_info = utils.mod_launch(mod, list(self.input), self.mod_output_param,
                                                             tuning=True, device_id=device_id)
                        if stat_info['run_time'] < best_time:
                            if not all(map(lambda x, y: np.allclose(x, y, rtol=5e-03, atol=5e-03, equal_nan=True),
                                           output, self.expect)):
                                stat_info['run_time'] = precision_error_time
                                logger.debug("Precision Error: [%s]",
                                             "origin" if config is None else str(config.input))

                    else:
                        output, stat_info = utils.mod_launch(mod, self.input, tuning=True, device_id=device_id)
                        if stat_info['run_time'] < best_time:
                            if not np.allclose(output, self.expect, rtol=5e-03, atol=5e-03, equal_nan=True):
                                stat_info['run_time'] = precision_error_time
                                logger.debug("Precision Error: [%s]",
                                             "origin" if config is None else str(config.input))
                    time_end_launch = time.time()
                    logger.debug("mod launch time: %f", time_end_launch - time_start_launch)
                except BaseException as e:
                    logger.debug("Run Failed: [%s] : %s", str(config.input), str(e))
                    stat_info['run_time'] = run_failed_time
                run_times[idx] = np.minimum(run_times[idx], stat_info['run_time'])
        finally:
            logger.debug('end of %dth kernel', idx)
            time_one_kernel_end = time.time()
            logger.debug('run one kernel time: %f', time_one_kernel_end - time_one_kernel_start)
        return