Exemple #1
0
def get_tiling_space(kernel_desc, level=1, attr=None):
    """
    get tiling space of composite kernel
    Args:
       kernel_desc : str of compute description
       level       : info level
       attr        : dict of build attributes

    Returns:
       Module.
    """
    if attr is None:
        attr = {}
    attr['help_tiling'] = level
    func = tvm.get_global_func('composite_lower')
    ret = func(kernel_desc, attr)
    spaces = {}
    spaces['index'] = ret.index_table.asnumpy().tolist()
    spaces['l1_range'] = ret.l1_tile_range_table.asnumpy().tolist()
    spaces['l0_range'] = ret.l0_tile_range_table.asnumpy().tolist()
    spaces['l1_mod'] = ret.l1_tile_mod_table.asnumpy().tolist()
    spaces['l0_mod'] = ret.l0_tile_mod_table.asnumpy().tolist()
    if level >= 2:
        spaces['tuning_space'] = ret.tiling_candidate.asnumpy().tolist()
    return spaces
Exemple #2
0
def _build_to_gpu_func(desc_s, desc_d, attr=None, poly=False):
    """
    build kernel with compute description in json format
    Args:
       desc_s : str of compute description
       desc_d : dict of compute description
       attr   : dict of build attributes

    Returns:
       Module.
    """
    def get_repo(keys, default=None):
        repo = repository_gpu
        for key in keys:
            repo = repo.get(key)
            if not repo:
                return default
        return repo
    if attr is None:
        attr = {'dim': ''}
    compute, shape, dtype = generate_trait(desc_d)
    repo_attr = get_repo([compute, shape, dtype, 'metadata', 'attrs'], {})
    if not repo_attr:
        repo_attr = get_repo([compute, 'metadata', 'attrs'], {})
    for a in repo_attr:
        if not attr.get(a):
            attr[a] = repo_attr[a]
    attr_list = ['dim', 'bind_block', 'bind_thread']
    for item in attr_list:
        if attr.get(item) in (None, ''):
            value = get_repo([compute, shape, dtype, item])
            if value:
                attr[item] = value
    func = tvm.get_global_func("composite_with_json")
    return func(desc_s, attr, poly)
Exemple #3
0
    def execute(self):
        """
        Execute the parser, get result data, and write it to the output file.

        Returns:
            bool, whether succeed to analyse hwts log.
        """

        log_type = ['Start of task', 'End of task', 'Start of block', 'End of block', 'Block PMU']

        result_data = ""

        self._source_flie_name = validate_and_normalize_path(self._source_flie_name)
        last_syscnt = 0
        cycles = 0

        kernel_label = tvm.get_global_func("ascend_get_kernel_label")()
        with open(self._source_flie_name, 'rb') as hwts_data:
            while True:
                # read 64 bit data
                line = hwts_data.read(64)
                if line:
                    if not line.strip():
                        continue
                else:
                    break
                byte_first_four = struct.unpack('BBHHH', line[0:8])
                # byte_first[0:4] refers to count. byte_first[4] refers to is_warn_res0_0v.
                # byte_first[5:8] refers to the type of ms.
                byte_first = bin(byte_first_four[0]).replace('0b', '').zfill(8)
                ms_type = byte_first[-3:]
                is_warn_res0_ov = byte_first[4]
                cnt = int(byte_first[0:4], 2)
                core_id = byte_first_four[1]
                blk_id, task_id = byte_first_four[3], byte_first_four[4]
                stream_id, syscnt = self._parse_struct(ms_type, line, is_warn_res0_ov)
                if stream_id is None:
                    logging.info("Profiling: invalid hwts log record type %s", ms_type)
                    continue
                if int(task_id) < 25000:
                    task_id = str(task_id)
                if kernel_label == (str(stream_id) + '_' + str(task_id)):
                    if log_type[int(ms_type, 2)] == "Start of task":
                        last_syscnt = syscnt
                    elif log_type[int(ms_type, 2)] == "End of task":
                        cycles += syscnt - last_syscnt

                if self._is_print:
                    result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" % (log_type[int(ms_type, 2)], cnt, core_id,
                                                                              blk_id, task_id, syscnt, stream_id))

        if self._is_print:
            fwrite_format(self._output_filename, data_source=self._dst_file_title, is_start=True)
            fwrite_format(self._output_filename, data_source=self._dst_file_column_title)
            fwrite_format(self._output_filename, data_source=result_data)

        return cycles if cycles != 0 else max_time_consume
Exemple #4
0
def _build_to_gpu_func(desc_s, desc_d, attrs=None, poly=False):
    """
    build kernel with compute description in json format
    Args:
       desc_s : str of compute description
       desc_d : dict of compute description
       attrs   : dict of build attributes

    Returns:
       Module.
    """
    if os.getenv('MS_GRAPH_KERNEL_TILING'):
        repository_gpu = read_repo_file(
            str(os.getenv('MS_GRAPH_KERNEL_TILING')))
    elif 'buffer_stitch' in desc_d:
        repository_gpu = {}
    else:
        file_path = _get_repository_file_path("repository_gpu.json")
        repository_gpu = read_repo_file(file_path)

    def get_repo(keys, default=None):
        repo = repository_gpu
        for key in keys:
            repo = repo.get(key)
            if not repo:
                return default
        return repo

    if attrs is None:
        attrs = {'dim': ''}
    compute, shape, dtype = generate_trait(desc_d)
    batchmatmul = _is_batchmatmul(desc_d)
    if batchmatmul:
        shape = "any_shape"
    repo_attr = get_repo([compute, shape, dtype, 'metadata', 'attrs'], {})
    if repo_attr and batchmatmul:
        repo_attr = _set_tiling_attrs(desc_d['output_desc'][0]['shape'],
                                      repo_attr)
    if not repo_attr:
        repo_attr = get_repo([compute, 'metadata', 'attrs'], {})
    for a in repo_attr:
        if not attrs.get(a):
            attrs[a] = repo_attr[a]
    attr_list = ['dim', 'bind_block', 'bind_thread']
    for item in attr_list:
        if attrs.get(item) in (None, ''):
            value = get_repo([compute, shape, dtype, item])
            if value:
                attrs[item] = value

    if 'parallel_fusion' in desc_d or 'buffer_stitch' in desc_d:
        return _build_json_list_func(desc_d, attrs, poly, 'cuda')
    func = tvm.get_global_func("composite_with_json")
    return func(desc_s, attrs, poly)
Exemple #5
0
def _get_feature(target, segment_tree, segment_infos):
    tune_composite = tvm.get_global_func("tune_composite")
    stmt, args = tune_composite(target, True, segment_tree, segment_infos)
    from akg.tvm import build_module
    binds, _ = build_module.get_binds(args)
    from akg.utils.auto_tuning import get_features_from_stmts
    feature = get_features_from_stmts(target=target,
                                      stmts=[stmt],
                                      binds=[binds],
                                      n_skip_cache=0)[0]
    return feature
Exemple #6
0
def _build_to_func(desc_s, desc_d, attr=None, use_repo=True):
    """
    build kernel with compute description in json format
    Args:
       desc_s : str of compute description
       desc_d : dict of compute description
       attr   : dict of build attributes

    Returns:
       Module.
    """
    if os.getenv('MS_GRAPH_KERNEL_TILING'):
        repository = read_repo_file(str(os.getenv('MS_GRAPH_KERNEL_TILING')))
    else:
        file_path = _get_repository_file_path("repository.json")
        repository = read_repo_file(file_path)

    def get_repo(keys, default=None):
        repo = repository
        for key in keys:
            repo = repo.get(key)
            if not repo:
                return default
        return repo

    if attr is None:
        attr = {'dim': ''}
    # turn 'enable_auto_inline' off for composite op by default.
    if 'enable_auto_inline' not in attr:
        attr['enable_auto_inline'] = False
    if use_repo:
        compute, shape, dtype = generate_trait(desc_d)
        repo_attr = get_repo([compute, shape, dtype, 'metadata', 'attrs'], {})
        if not repo_attr:
            repo_attr = get_repo([compute, 'metadata', 'attrs'], {})
        for a in repo_attr:
            if not attr.get(a):
                attr[a] = repo_attr[a]
        if attr.get('dim') in (None, ''):
            tiling = get_repo([compute, shape, dtype, 'dim'])
            if tiling:
                attr['dim'] = tiling

    if 'parallel_fusion' in desc_d or 'buffer_stitch' in desc_d:
        return _build_json_list_func(desc_d, attr, True, 'cce')
    func = tvm.get_global_func("composite_with_json_to_func")
    return func(desc_s, attr)
Exemple #7
0
def get_tiling_space(kernel_desc, level=1, attr=None):
    """
    get tiling space of composite kernel
    Args:
       kernel_desc : str of compute description
       level       : info level
       attr        : dict of build attributes

    Returns:
       Module.
    """
    if attr is None:
        attr = {}
    attr['help_tiling'] = level
    attr['tuning'] = 'on'
    desc_d = json.loads(kernel_desc)
    backend = desc_d['process']
    all_ops = set(op['name'] for op in desc_d['op_desc'])
    if backend == "cuda":
        attr = _update_attrs_gpu(all_ops, attr, True)
    elif backend == "cpu":
        attr = _update_attrs_cpu(all_ops, attr, True)
    else:
        attr = _update_attrs_ascend(all_ops, attr)

    segment_tree, segment_infos = get_tune_construct_args(kernel_desc, attr)
    tune_composite = tvm.get_global_func("tune_composite")
    ret = tune_composite(backend, True, segment_tree, segment_infos)
    spaces = {}
    if attr.get("use_new_space", False):
        spaces['tune_space'] = ret
    else:
        spaces['index'] = ret.index_table.asnumpy().tolist()
        spaces['c1_range'] = ret.c1_tile_range_table.asnumpy().tolist()
        spaces['c0_range'] = ret.c0_tile_range_table.asnumpy().tolist()
        spaces['c1_mod'] = ret.c1_tile_mod_table.asnumpy().tolist()
        spaces['c0_mod'] = ret.c0_tile_mod_table.asnumpy().tolist()
        if level >= 2:
            spaces['tuning_space'] = ret.tiling_candidate.asnumpy().tolist()
    return spaces
Exemple #8
0
def _build_to_func(desc_s, desc_d, attr=None):
    """
    build kernel with compute description in json format
    Args:
       desc_s : str of compute description
       desc_d : dict of compute description
       attr   : dict of build attributes

    Returns:
       Module.
    """
    def get_repo(keys, default=None):
        repo = repository
        for key in keys:
            repo = repo.get(key)
            if not repo:
                return default
        return repo

    if attr is None:
        attr = {'dim': ''}
    # turn 'enable_auto_inline' off for composite op by default.
    if 'enable_auto_inline' not in attr:
        attr['enable_auto_inline'] = False
    compute, shape, dtype = generate_trait(desc_d)
    repo_attr = get_repo([compute, shape, dtype, 'metadata', 'attrs'], {})
    if not repo_attr:
        repo_attr = get_repo([compute, 'metadata', 'attrs'], {})
    for a in repo_attr:
        if not attr.get(a):
            attr[a] = repo_attr[a]
    if attr.get('dim') in (None, ''):
        tiling = get_repo([compute, shape, dtype, 'dim'])
        if tiling:
            attr['dim'] = tiling
    func = tvm.get_global_func("composite_with_json_to_func")
    return func(desc_s, attr)
Exemple #9
0
def _build(desc_s, desc_d, attr=None):
    if desc_d['process'] == 'cuda':
        func = tvm.get_global_func("composite_with_json")
        return func(desc_s, attr)
    rst = _build_to_func(desc_s, desc_d, attr)
    return _api_internal._BuildToModule(rst)
Exemple #10
0
def _build_json_list_func(desc_d, attrs, poly, target):
    func = tvm.get_global_func("composite_with_json_list")
    block_jsons, input_tensor_name, output_tensor_name, attrs_list, alloc_map_list, reuse_map_list, \
    clean_op_map_list = _json_need_split(desc_d, attrs)
    return func(block_jsons, input_tensor_name, output_tensor_name, alloc_map_list, reuse_map_list, \
                clean_op_map_list, attrs_list, poly, target)
Exemple #11
0
def _build_to_module_ascend(desc_s_in, desc_d_in, attr=None, use_repo=True):
    """
    build kernel with compute description in json format
    Args:
       desc_s_in : str of compute description
       desc_d_in : dict of compute description
       attr   : dict of build attributes

    Returns:
       Module.
    """

    repository = _get_repository("repository.json", desc_d_in)

    def _update_attr_by_repo(desc_s,
                             desc_d,
                             attr,
                             given_attrs=None,
                             support_online_tuning=True):
        def _auto_set_single_block(desc_d, attr):
            if not attr.get("enable_multicore", None) and desc_d.get(
                    "extra", None):
                if desc_d["extra"].get("BlockMode", "") == "single_block":
                    attr["enable_multicore"] = 0
            return attr

        if attr is None:
            attr = {'dim': ''}
        all_ops = set(op['name'] for op in desc_d['op_desc'])
        attr = _update_attrs_ascend(all_ops, attr)
        attr = _auto_set_single_block(desc_d, attr)
        if given_attrs is not None:
            for key, value in given_attrs.items():
                if not attr.get(key):
                    attr[key] = value
        elif use_repo:
            compute, shape, dtype = generate_trait(desc_d)
            repo_attr = _get_repo_attr(desc_d, compute, shape, dtype,
                                       repository, False)
            attr = merge_attrs(attr, repo_attr)
            if attr.get('dim') in (None, ''):
                tiling = get_attr_from_dict([compute, shape, dtype, 'dim'],
                                            repository)
                if tiling:
                    attr['dim'] = tiling
                elif support_online_tuning and 'online_tuning' in attr:
                    attr = _get_online_tune_attr(
                        desc_s, attr,
                        get_repository_file_path("repository.json"))
            _, desc_s = _set_compute_attrs(desc_d, attr)
        return desc_s, attr

    def _get_parallel_repo(desc_d):
        compute, shape, dtype = generate_trait(desc_d)
        repo_attr = get_attr_from_dict([compute, shape, dtype, 'BlockPlan'],
                                       repository, {})
        return repo_attr

    def _get_stitch_repo(desc_d):
        compute, shape, dtype = generate_trait(desc_d)
        repo_attr = get_attr_from_dict([compute, shape, dtype], repository, {})
        return repo_attr

    def _parallel_postprocess(desc_d, json_str_list, attrs_list, _):
        parallel_repo = _get_parallel_repo(desc_d)
        if parallel_repo:
            # "BlockPlan" should be: [{"block_plan": x1, attr1: x2, attr2: x3}, ...]
            for i, [cur_json, cur_attr, cur_plan] in enumerate(
                    zip(json_str_list, attrs_list, parallel_repo)):
                # When BlockPlan is active, the body should be run as single block
                cur_attr["enable_multicore"] = 0
                json_str_list[i], attrs_list[i] = _update_attr_by_repo(
                    cur_json, json.loads(cur_json), cur_attr,
                    cur_plan[ConstructKey.ATTRS], False)
        else:
            for i, [cur_json,
                    cur_attr] in enumerate(zip(json_str_list, attrs_list)):
                json_str_list[i], attrs_list[i] = _update_attr_by_repo(
                    cur_json, json.loads(cur_json), cur_attr, None, False)

        return json_str_list, attrs_list

    def _stitch_postprocess(desc_d, stitch_jsons, attrs_list, _):
        def _stitch_combine_attrs(common_attr, sub_attrs):
            combine_attrs = []
            for i, a in enumerate(sub_attrs):
                new_sub_attrs = {}
                for k, v in common_attr.items():
                    new_sub_attrs[k] = v
                if a:
                    key = "sub_attr_" + str(i + 1)
                    new_sub_attrs[key] = {}
                    for k, v in a.items():
                        new_sub_attrs.get(key)[k] = v
                combine_attrs.append(new_sub_attrs)
            return combine_attrs

        origin_stitch_attrs = attrs_list[0]
        if origin_stitch_attrs.get("peeling") is None:
            # Read buffer stitch attr from repo
            stitch_repo = _get_stitch_repo(desc_d)
            if stitch_repo.get("peeling") is not None:
                origin_stitch_attrs.update(stitch_repo)
            elif "online_tuning" in attr:
                # If buffer stitch attr not in repo, use online tuning
                tuning_attr = _get_online_tune_attr(
                    json.dumps(desc_d), origin_stitch_attrs,
                    get_repository_file_path("repository.json"))
                origin_stitch_attrs.update(tuning_attr)
        # Update sub json attr
        common_attr, stitch_sub_attrs = split_stitch_attr(
            origin_stitch_attrs, len(stitch_jsons))
        for i, cur_json_str in enumerate(stitch_jsons):
            stitch_jsons[i], stitch_sub_attrs[i] = _update_attr_by_repo(
                cur_json_str, json.loads(cur_json_str), stitch_sub_attrs[i],
                {})
        stitch_attrs = _stitch_combine_attrs(common_attr, stitch_sub_attrs)

        return stitch_jsons, stitch_attrs

    def _normal_postprocess(desc_d, json_str_list, attrs_list, poly):
        _ = (desc_d, poly)  # For unused warning...
        for i, (cur_json_str,
                cur_attr) in enumerate(zip(json_str_list, attrs_list)):
            json_str_list[i], attrs_list[i] = _update_attr_by_repo(
                cur_json_str, json.loads(cur_json_str), cur_attr)
        return json_str_list, attrs_list

    post_funcs = {
        ConstructType.PARALLEL: _parallel_postprocess,
        ConstructType.STITCH: _stitch_postprocess,
        ConstructType.NORMAL: _normal_postprocess,
    }
    segment_tree, segment_infos = get_construct_args(desc_s_in, attr,
                                                     post_funcs)
    process = desc_d_in["process"]

    func = tvm.get_global_func("lower_composite_to_module")
    if "ret_mode" in attr:
        return _build_for_tuning(attr, func, process, segment_tree,
                                 segment_infos)
    return func(process, True, segment_tree, segment_infos)
Exemple #12
0
def _build_to_module(desc_s, desc_d, attrs=None, poly=True):
    """
    build kernel with compute description in json format
    Args:
       desc_s : str of compute description
       desc_d : dict of compute description
       attrs   : dict of build attributes

    Returns:
       Module.
    """
    def _update_attr_by_repo(desc_s, attrs):
        desc_d = json.loads(desc_s)
        process = desc_d["process"]
        file_name = "repository_" + process + ".json"
        repository = _get_repository(file_name, desc_d)
        all_ops = set(op["name"] for op in desc_d["op_desc"])

        if attrs is None:
            attrs = {"dim": ""}
        compute, shape, dtype = generate_trait(desc_d)
        batchmatmul = "BatchMatMul" in all_ops
        if batchmatmul:
            shape = "any_shape"
        repo_attr = _get_repo_attr(desc_d, compute, shape, dtype, repository,
                                   batchmatmul)
        attrs = merge_attrs(attrs, repo_attr)
        attr_list = ["dim", "bind_block", "bind_thread"
                     ] if process == "cuda" else ["dim"]
        for item in attr_list:
            if attrs.get(item) in (None, ""):
                value = get_attr_from_dict([compute, shape, dtype, item],
                                           repository)
                if value:
                    attrs[item] = value
        if attrs.get("dim") in (None, "") and "online_tuning" in attrs:
            attrs = _get_online_tune_attr(desc_s, attrs,
                                          get_repository_file_path(file_name))
        return desc_d, attrs

    def _post_update_attr(desc_s, attrs, poly):
        desc_d, attrs = _update_attr_by_repo(desc_s, attrs)
        all_ops = set(op["name"] for op in desc_d["op_desc"])
        if desc_d["process"] == "cuda":
            attrs = _update_attrs_gpu(all_ops, attrs, poly)
        elif desc_d["process"] == "cpu":
            attrs = _update_attrs_cpu(all_ops, attrs, poly)
        return attrs

    def _common_postprocess(_, json_str_list, attrs_list, poly):
        for i, (cur_json_str,
                cur_attr) in enumerate(zip(json_str_list, attrs_list)):
            attrs_list[i] = _post_update_attr(cur_json_str, cur_attr, poly)
        return json_str_list, attrs_list

    def _stitch_postprocess(desc_d, json_str_list, attrs_list, poly):
        for i, cur_attr in enumerate(attrs_list):
            attrs_list[i] = _post_update_attr(json.dumps(desc_d), cur_attr,
                                              poly)
        return json_str_list, attrs_list

    post_funcs = {
        ConstructType.PARALLEL: _common_postprocess,
        ConstructType.STITCH: _stitch_postprocess,
        ConstructType.NORMAL: _common_postprocess,
        ConstructType.TOT: _common_postprocess,
        ConstructType.CONCAT: _common_postprocess
    }
    segment_tree, segment_infos = get_construct_args(desc_s, attrs, post_funcs)
    process = desc_d["process"]

    func = tvm.get_global_func("lower_composite_to_module")
    if "ret_mode" in attrs and poly:
        return _build_for_tuning(attrs, func, process, segment_tree,
                                 segment_infos)
    return func(process, poly, segment_tree, segment_infos)