コード例 #1
ファイル: depthwise_conv2d.py プロジェクト: gekowa/ascend-opp
def depthwise_conv2d_fusion_para(inputs, outputs):
    get L1 fusion para for depthwise_conv2d
    input_memory_type = inputs.op.attrs["addr_type"] \
        if "addr_type" in inputs.op.attrs else 0
    output_memory_type = outputs["addr_type"] \
        if "addr_type" in outputs else 0
    valid_shape = inputs.op.attrs["valid_shape"] \
        if "valid_shape" in inputs.op.attrs else ()
    slice_offset = inputs.op.attrs["slice_offset"] \
        if "slice_offset" in inputs.op.attrs else ()
    l1_fusion_type = inputs.op.attrs["L1_fusion_type"] \
        if "L1_fusion_type" in inputs.op.attrs else -1

    fmap_l1_addr_flag = inputs.op.attrs["L1_addr_flag"] \
        if "L1_addr_flag" in inputs.op.attrs else -1
    fmap_l1_valid_size = inputs.op.attrs["L1_valid_size"] \
        if "L1_valid_size" in inputs.op.attrs else -1

    l1_fusion_enable_flag = get_L1_info("L1_fusion_enabled")
    if not l1_fusion_enable_flag:
        l1_fusion_type = -1

    valid_shape = shape_to_list(valid_shape)
    slice_offset = shape_to_list(slice_offset)

    if not l1_fusion_enable_flag:
        input_memory_type = 0
        output_memory_type = 0
        valid_shape = []
        slice_offset = []
        l1_fusion_type = -1

    if int(input_memory_type) not in (0, 1, 2):
        err_man.raise_err_input_mem_type("depthwise_conv2d", input_memory_type)
    if int(output_memory_type) not in (0, 1, 2):
    if valid_shape and not slice_offset:
            "if valid_shape exists slice_offset can not be []")

    fusion_para = {
        "input_memory_type": input_memory_type,
        "output_memory_type": output_memory_type,
        "valid_shape": valid_shape,
        "slice_offset": slice_offset,
        "l1_fusion_type": l1_fusion_type,
        "fmap_l1_addr_flag": fmap_l1_addr_flag,
        "fmap_l1_valid_size": fmap_l1_valid_size

    return fusion_para
コード例 #2
def _conv2d_fusion_para(inputs, outputs):
    get L2 fusion para for conv2d
    input_memory_type = inputs.get("addr_type") \
        if "addr_type" in inputs else 0
    output_memory_type = outputs.get("addr_type") \
        if "addr_type" in outputs else 0
    valid_shape = inputs.get("valid_shape") \
        if "valid_shape" in inputs else ()
    slice_offset = inputs.get("slice_offset") \
        if "slice_offset" in inputs else ()
    l1_fusion_type = inputs.get("L1_fusion_type") \
        if "L1_fusion_type" in inputs else -1

    fmap_l1_addr_flag = inputs.get("L1_addr_flag", "nothing")
    fmap_l1_valid_size = inputs.get("L1_valid_size", -1)

    l1_fusion_enable_flag = get_L1_info("L1_fusion_enabled")
    if not l1_fusion_enable_flag:
        l1_fusion_type = -1

    valid_shape = _shape_to_list(valid_shape)
    slice_offset = _shape_to_list(slice_offset)

    l2_fusion_enable_flag = get_L1_info("L2_fusion_enabled")

    if not l2_fusion_enable_flag and (not l1_fusion_enable_flag):
        input_memory_type = 0
        output_memory_type = 0
        valid_shape = []
        slice_offset = []
        l1_fusion_type = -1

    if input_memory_type not in (0, 1, 2):
        err_man.raise_err_input_mem_type("conv2d", input_memory_type)
    if output_memory_type not in (0, 1, 2):
        err_man.raise_err_output_mem_type("conv2d", output_memory_type)
    if valid_shape and not slice_offset:
        err_man.raise_err_specific_user("conv2d", "if valid_shape exists "\
           + "slice_offset can not be []")

    fusion_para = {"input_memory_type": input_memory_type,
                   "output_memory_type": output_memory_type,
                   "valid_shape": valid_shape, "slice_offset": slice_offset, \
                   "l1_fusion_type": l1_fusion_type, \
                   "fmap_l1_addr_flag": fmap_l1_addr_flag, \
                   "fmap_l1_valid_size": fmap_l1_valid_size}

    return fusion_para
コード例 #3
def _conv2d_compute_fusion_para(inputs):
    get L2 fusion para for conv2d_compute
    input_memory_type = inputs.op.attrs["addr_type"].value \
        if "addr_type" in inputs.op.attrs else 0
    valid_shape = inputs.op.attrs["valid_shape"] \
        if "valid_shape" in inputs.op.attrs else ()
    slice_offset = inputs.op.attrs["slice_offset"] \
        if "slice_offset" in inputs.op.attrs else ()
    l1_fusion_type = inputs.op.attrs["L1_fusion_type"].value \
    if "L1_fusion_type" in inputs.op.attrs else -1

    fmap_l1_addr_flag = inputs.op.attrs["L1_addr_flag"].value \
        if "L1_addr_flag" in inputs.op.attrs else "nothing"
    fmap_l1_valid_size = inputs.op.attrs["L1_valid_size"].value \
        if "L1_valid_size" in inputs.op.attrs else -1

    l1_fusion_enable_flag = get_L1_info("L1_fusion_enabled")
    if not l1_fusion_enable_flag:
        l1_fusion_type = -1

    valid_shape = _shape_to_list(valid_shape)
    slice_offset = _shape_to_list(slice_offset)

    l2_fusion_enable_flag = get_L1_info("L2_fusion_enabled")
    l1_fusion_enable_flag = get_L1_info("L1_fusion_enabled")

    if (not l2_fusion_enable_flag) and (not l1_fusion_enable_flag):
        input_memory_type = 0
        valid_shape = []
        slice_offset = []
        l1_fusion_type = -1

    if input_memory_type not in (0, 1, 2):
        err_man.raise_err_input_mem_type("conv2d", input_memory_type)
    if valid_shape and not slice_offset:
        err_man.raise_err_specific_user("conv2d", "if valid_shape exists "\
            + "slice_offset can not be []")

    fusion_para = {"input_memory_type": input_memory_type,
                   "output_memory_type": "fuse_flag",
                   "valid_shape": valid_shape, "slice_offset": slice_offset,
                   "l1_fusion_type": l1_fusion_type, \
                   "fmap_l1_addr_flag": fmap_l1_addr_flag, \
                   "fmap_l1_valid_size": fmap_l1_valid_size}

    return fusion_para
コード例 #4
def conv_layer_cce_para_check(shape_in, shape_w, padh, padw, strideh, stridew,
                              in_dtype, w_dtype, res_dtype, offset_w_dtype,
                              bias, kernel_name, dilateh=1, dilatew=1,
                              optim_dict=None, fusion_para=None):

    shape_in: shape of feature map

    shape_w: shape of weight

    padh: H direction padding

    padw: W direction padding

    strideh: H direction stride

    stridew: W direction stride

    in_dtype: the feature map data type

    w_dtype: the weight data type

    res_dtype: the result data type

    offset_w_dtype: weight offset data type, default 'int32'

    bias: the tag for bias or not

    fusion_para: the config for L1 or L2 Fusion

    kernel_name: cce kernel name

    dilateh: H direction spacing between kernel

    dilatew: W direction spacing between kernel

    optim_dict: optimize feature dict


    check_dtype_rule(offset_w_dtype, ['int32'])
    if cce_conf.get_soc_spec("SOC_VERSION") in ("Ascend310", "Hi3796CV300ES", \
        "Ascend710", "Ascend615", "Ascend610", "Hi3796CV300CS"):
        check_dtype_rule(in_dtype, ('int8', "float16"))
        check_dtype_rule(w_dtype, ('int8', "float16"))
        check_dtype_rule(res_dtype, ('int32', "float16"))
        check_dtype_rule(in_dtype, ['float16'])
        check_dtype_rule(w_dtype, ['float16'])
        check_dtype_rule(res_dtype, ['float16'])

    if isinstance(padh, list):
        if len(padh) != PAD_SHAPE_DIM:
            err_man.raise_err_specific_user("conv2d", "Dimension must be "\
                                            + str(PAD_SHAPE_DIM) + \
                                            " when padh is a list.")
        pad_top = padh[0]
        pad_bottom = padh[1]
        pad_top = padh
        pad_bottom = padh

    if isinstance(padw, list):
        if len(padw) != PAD_SHAPE_DIM:
            err_man.raise_err_specific_user("conv2d", "Dimension must be "\
                                            + str(PAD_SHAPE_DIM) + \
                                            " when padw is a list.")
        pad_left = padw[0]
        pad_right = padw[1]
        pad_left = padw
        pad_right = padw
    if optim_dict is None:
        optim_dict = {"c0_optim_flg": False}
    optim_off = shape_in[1] > 4 or shape_w[1] > 4 or \
                (shape_w[2] == 1 and shape_w[3] == 1)
    if optim_dict.get("c0_optim_flg") is True:
        if optim_off:
            err_man.raise_err_specific_user("conv2d", "Invalid "\
                + "config for c0=4 optimize feature.")

    if fusion_para is None:
        fusion_para = {"input_memory_type": 0, "output_memory_type": 0,
                       "valid_shape": (), "slice_offset": (), \
                       "l1_fusion_type": -1, \
                       "fmap_l1_addr_flag": 0, \
                       "fmap_l1_valid_size": -1}

    dilation_not_pass = (dilateh > 1 or dilatew > 1) and w_dtype == 'int8'
    if dilation_not_pass:
        err_man.raise_err_specific_user("conv2d", "Quant conv does not "\
            + "support dilate > 1.")

    shape_in, shape_w = check_conv_shape(shape_in, shape_w,
                                         pad_top, pad_bottom,
                                         pad_left, pad_right, strideh, stridew,
                                         in_dtype, w_dtype, fusion_para,
                                         optim_dict, dilateh, dilatew)

    return shape_in, shape_w
コード例 #5
def calc_para_from_tensor(inputs, weights, bias, offset_w, strides, pads,
                          dilations, offset_x, kernel_name,

    shape_w = []
    for i in weights.op.attrs['ori_shape']:
    shape_fm = []
    for i in inputs.shape:

    input_h = shape_fm[2]
    input_w = shape_fm[3]

    format_w = weights.op.attrs['ori_format'].value
    all_fmt = ["NCHW", "NHWC", "HWCN"]
    if format_w not in all_fmt:
        err_man.raise_err_input_format_invalid("conv2d", \
        "weights", ["NCHW", "NHWC", "HWCN"], format_w)

    pos_c = format_w.find('C')
    pos_h = format_w.find('H')
    pos_w = format_w.find('W')
    weight_h = shape_w[pos_h]
    weight_w = shape_w[pos_w]
    shape_c = shape_w[pos_c]

    if len(strides) != 4:
        err_man.raise_err_should_be_4d("conv2d", "strides")
    if len(dilations) != 4:
        err_man.raise_err_should_be_4d("conv2d", "directions")

    format_x = inputs.op.attrs['ori_format'].value

    all_fmt = ["NCHW", "NHWC"]
    if format_x not in all_fmt:
        err_man.raise_err_input_format_invalid("conv2d", \
        "input", ["NCHW", "NHWC"], format_x)
    pos_h = data_format.find('H')
    pos_w = data_format.find('W')
    strideh = strides[pos_h]
    stridew = strides[pos_w]
    dlt_h = dilations[pos_h]
    dlt_w = dilations[pos_w]

    if len(pads) == 4:
        padh = [pads[0], pads[1]]
        padw = [pads[2], pads[3]]
        err_man.raise_err_should_be_4d("conv2d", "pads shape")

    fusion_para = _conv2d_compute_fusion_para(inputs)

    valid_shape = fusion_para.get("valid_shape")
    if valid_shape and valid_shape[2] == shape_fm[2]:
        valid_shape = ()
        fusion_para["valid_shape"] = ()
        fusion_para["slice_offset"] = ()
    if valid_shape:
        input_h = valid_shape[2]
        input_w = valid_shape[3]

    strideh = _trans_stride(input_h, weight_h, strideh, padh, dlt_h)
    stridew = _trans_stride(input_w, weight_w, stridew, padw, dlt_w)

    para_dict = {"pad_h": padh, "pad_w": padw, "stride_h": strideh,
                 "stride_w": stridew, "dilate_h": dlt_h, "dilate_w": dlt_w,
                 "offset_x": offset_x, "filter_h": weight_h,
                 "filter_w": weight_w, "bias_tensor": bias,
                 "offset_w_tensor": offset_w,
                 "fusion_para": fusion_para,
                 "kernel_name": kernel_name}

    if cce_conf.get_soc_spec("SOC_VERSION") in \
    ("Hi3796CV300ES", "Hi3796CV300CS"):
        para_dict["mad_dtype"] = "float16"
        if weights.dtype != "float16":
            para_dict["mad_dtype"] = "int32"
        if cce_conf.get_soc_spec("SOC_VERSION") in ("Ascend310",) \
        and weights.dtype == "int8":
            para_dict["mad_dtype"] = "int32"

    c0_optim_flg = False
    if shape_c <= 4 and ("format" in weights.op.attrs and
                         weights.op.attrs['format'].value == "FRACTAL_Z_C04"):
        c0_optim_flg = True
        if (weight_h == 1) and (weight_w == 1):
            err_man.raise_err_specific_user("conv2d", "weight shape does "\
                + "not support that H and W are both equal to 1 when C0=4.")

        if fusion_para["input_memory_type"] == 1:
            err_man.raise_err_specific_input_shape("conv2d", "c0 optim not "\
                + "support fmap from L1 directly (instead of DDR)")

    optim_dict = {"c0_optim_flg": c0_optim_flg}

    return para_dict, optim_dict
コード例 #6
def calc_para_from_dict(inputs, weights, strides, pads,
                        dilations, outputs, data_format="NCHW"):
    shape_x = inputs.get("ori_shape")
    shape_w = weights.get("ori_shape")

    if len(strides) != 4:
        err_man.raise_err_should_be_4d("conv2d", "strides")
    if len(dilations) != 4:
        err_man.raise_err_should_be_4d("conv2d", "dilations")

    if len(pads) == 4:
        padh = [pads[0], pads[1]]
        padw = [pads[2], pads[3]]
        err_man.raise_err_should_be_4d("conv2d", "pads shape")

    if (not isinstance(shape_x, (tuple, list))) or len(shape_x) != 4:
        err_man.raise_err_should_be_4d("conv2d", "inputs")

    if (not isinstance(shape_w, (tuple, list))) or len(shape_w) != 4:
        err_man.raise_err_should_be_4d("conv2d", "weights")

    format_x = inputs.get("ori_format")
    all_fmt = ["NCHW", "NHWC"]
    if format_x not in all_fmt:
        err_man.raise_err_input_format_invalid("conv2d", \
            "inputs", ["NCHW", "NHWC"], format_x)
    pos_n = format_x.find('N')
    pos_c = format_x.find('C')
    pos_h = format_x.find('H')
    pos_w = format_x.find('W')
    shape_fm = [shape_x[pos_n], shape_x[pos_c], shape_x[pos_h], shape_x[pos_w]]

    pos_attr_h = data_format.find('H')
    pos_attr_w = data_format.find('W')
    strideh = strides[pos_attr_h]
    stridew = strides[pos_attr_w]
    dlt_h = dilations[pos_attr_h]
    dlt_w = dilations[pos_attr_w]

    format_w = weights.get("ori_format")
    all_fmt = ["NCHW", "NHWC", "HWCN"]
    if format_w not in all_fmt:
        err_man.raise_err_input_format_invalid("conv2d", \
            "weights", ["NCHW", "NHWC", "HWCN"], format_w)
    pos_n = format_w.find('N')
    pos_c = format_w.find('C')
    pos_h = format_w.find('H')
    pos_w = format_w.find('W')
    shape_filter = [shape_w[pos_n], shape_w[pos_c], \
                    shape_w[pos_h], shape_w[pos_w]]

    fusion_para = _conv2d_fusion_para(inputs, outputs)

    valid_shape = fusion_para.get("valid_shape")
    if valid_shape and valid_shape[2] == shape_fm[2]:
        valid_shape = ()
        fusion_para["valid_shape"] = ()
        fusion_para["slice_offset"] = ()

    if valid_shape:
        input_h = valid_shape[2]
        input_w = valid_shape[3]
        input_h = shape_fm[2]
        input_w = shape_fm[3]

    strideh = _trans_stride(input_h, shape_filter[2], strideh, padh, dlt_h)
    stridew = _trans_stride(input_w, shape_filter[3], stridew, padw, dlt_w)

    c0_optim_flg = False
    if shape_w[pos_c] <= 4 and weights.get("format") == "FRACTAL_Z_C04":
        c0_optim_flg = True
        if (shape_w[pos_h] == 1) and (shape_w[pos_w] == 1):
            err_man.raise_err_specific_user("conv2d", "weight shape "\
                + "does not support that H and W are both "\
                + "equal to 1 when C0=4.")
        if fusion_para["input_memory_type"] == 1:
            err_man.raise_err_specific_input_shape("conv2d", \
                "c0 optim not support fmap "\
                + "from L1 directly (instead of DDR)")
    optim_dict = {"c0_optim_flg": c0_optim_flg}

    return shape_fm, shape_filter, padh, padw, strideh, stridew, \
           dlt_h, dlt_w, optim_dict, fusion_para
コード例 #7
ファイル: depthwise_conv2d.py プロジェクト: gekowa/ascend-opp
def depthwise_conv2d(
    dilations=(1, 1, 1, 1),
    pads=(0, 0, 0, 0),
    algorithm: depthwise conv2d

    calculating  depthwise convolution

    x : a dict of featureMap
        {"shape", "dtype", "format"}
        shape of input tensor [N, C1, H, W, C0],
        support float16.

    filter : a dict of filter
        {"shape", "dtype"}
        shape of filter tensor [C1, H, W, K, Co, C0],
        K is depthwise_multiplier, support int.

    bias : a dict of bias
        {"shape", "dtype"}
        shape of bias tensor [C1*C0,]
        support int8.

    offset_w : a dict of filter offset
        {"shape", "dtype"}
        shape of offset tensor [C1, H, W, K, Co, C0]
        support float16.

    y : a dict of output
        {"shape", "dtype"}
        shape of input tensor [N, C1, H, W, C0],
        support float16.

    strides : a list/tuple of four ints
        strides size, [1, 1, stride_height, stride_width] or
        [1, stride_height, stride_width, 1]

    dilations : a list/tuple of four ints
        dilation size, [1, 1, dilation_height, dilation_width] or
        [1, dilation_height, dilation_width, 1]

    pads : padding added to each dimension of the input

    data_format : a str of featuremap original shape
        shape of origine shape of featuremap [N, C, H, W] or [N, H, W, C]

    offset_x : offset of the input

    kernel_name : str
       cce kernel name


    shape_w = filter.get("shape")
    shape_in = x.get("shape")
    output_dtype = y.get("dtype")
    in_dtype = x.get("dtype")
    w_dtype = filter.get("dtype")
    fmap_data_format = x.get("format")

    op_utils.check_dtype(in_dtype, ('float16', 'int8'), param_name="x")
    op_utils.check_dtype(w_dtype, ('float16', 'int8'), param_name="filter")
    op_utils.check_dtype(output_dtype, ('float16', 'int32'), param_name="y")


    if fmap_data_format != "NC1HWC0":
        dict_args = {
            'errCode': 'E60008',
            'op_name': 'depthwise_conv2d',
            'param_name': 'featuremap',
            'expected_format_list': '[{}]'.format('NC1HWC0'),
            'format': fmap_data_format
        raise RuntimeError(dict_args, err_mana.get_error_message(dict_args))

    def _check_shape(fmap_shape, filter_shape):
        """check input shape"""
        _, in_c1, _, _, _ = fmap_shape
        filter_c1, _, _, filter_k, _, _ = filter_shape

        # check feature map API feature map  shape is 5hd
        # The shape of feature map and filter must be 5HD
        if len(fmap_shape) != FEATURE_MAP_DIM:
            dict_args = {
                'errCode': 'E60008',
                'op_name': 'depthwise_conv2d',
                'param_name': 'featuremap',
                'expected_format_list': '[{}]'.format('NC1HWC0'),
                'format': fmap_data_format
            raise RuntimeError(dict_args,

        # check feature map shape of c, equal filter of c
        if in_c1 != filter_c1:
            dict_args = {
                'errCode': 'E60002',
                'op_name': 'depthwise_conv2d',
                'attr_name': 'channel',
                'param1_name': 'fmap',
                'param2_name': 'filter',
                'param1_value': str(in_c1),
                'param2_value': str(filter_c1)
            raise RuntimeError(dict_args,

        # check multiplier equal 1
        if filter_k != 1:
            dict_args = {
                'errCode': 'E60000',
                'op_name': 'depthwise_conv2d',
                'param_name': 'filter_k',
                'expected_value': '1',
                'input_value': str(filter_k)
            raise RuntimeError(dict_args,

    # fmap shape reshape, c ceil 16, 6d shape;
    # c must be 16x, if data not 16x, framework reshape c 16x
    in_n, in_c1, in_h, in_w, in_c0 = shape_in
    fmap_shape_5d = in_n, in_c1, in_h, in_w, in_c0
    shape_w_5d = shape_w[0], shape_w[1], shape_w[2], shape_w[4], shape_w[5]

    #filter shape: C1HWNCoC0
    filter_c1, filter_h, filter_w, _, _, _ = shape_w

    if data_format != 'NCHW' and data_format != 'NHWC':
        dict_args = {
            'errCode': 'E50002',
            'op_name': 'depthwise_conv2d',
            'param': 'featuremap',
            'expected_format_list': '[{}, {}]'.format('NCHW', 'NHWC'),
            'format': data_format
        raise RuntimeError(dict_args, err_mana.get_error_message(dict_args))

    _check_shape(shape_in, shape_w)

    DIM_N, DIM_C, DIM_H, DIM_W = 0, 1, 2, 3  # NCHW
    if data_format == 'NHWC':
        DIM_N, DIM_H, DIM_W, DIM_C = 0, 1, 2, 3

    # check strides is list, strides[0] ==shape_in[1]
    # strides list, and h w value equal
    if not isinstance(strides, (list, tuple)) and len(strides) == 4:
        dict_args = {
            'errCode': 'E60107',
            'op_name': 'depthwise_conv2d',
            'param_name': 'strides'
        raise RuntimeError(dict_args, err_mana.get_error_message(dict_args))

    if strides[DIM_N] != 1 or strides[DIM_C] != 1:
            "stride only support 1 in N axis and C axis.")
    if strides[DIM_H] != strides[DIM_W]:
        dict_args = {
            'errCode': 'E60002',
            'op_name': 'depthwise_conv2d',
            'attr_name': 'stride value',
            'param1_name': 'strides[DIM_H]',
            'param2_name': 'strides[DIM_W]',
            'param1_value': str(strides[DIM_H]),
            'param2_value': str(strides[DIM_W])
        raise RuntimeError(dict_args, err_mana.get_error_message(dict_args))
    if dilations[DIM_N] != 1 or dilations[DIM_C] != 1:
        dict_args = {
            'errCode': 'E60023',
            'op_name': 'depthwise_conv2d',
            'dilation_n': str(dilations[DIM_N]),
            'dilation_c': str(dilations[DIM_C])
        raise RuntimeError(dict_args, err_mana.get_error_message(dict_args))
    if dilations[DIM_H] != dilations[DIM_W]:
        dict_args = {
            'errCode': 'E60002',
            'op_name': 'depthwise_conv2d',
            'attr_name': 'dilations value',
            'param1_name': 'dilations[DIM_H]',
            'param2_name': 'dilations[DIM_W]',
            'param1_value': str(dilations[DIM_H]),
            'param2_value': str(dilations[DIM_W])
        raise RuntimeError(dict_args, err_mana.get_error_message(dict_args))

    # check pad parameter
    if len(pads) != 4:
        dict_args = {
            'errCode': 'E50001',
            'param': 'pads',
            'op_name': 'depthwise_conv2d',
            'expected_length': "4",
            'length': str(len(pads))
        raise RuntimeError(dict_args, err_mana.get_error_message(dict_args))

    strides_2d = strides[DIM_H], strides[DIM_W]
    dilations_2d = dilations[DIM_H], dilations[DIM_W]
    bias_tensor = None
    if bias is not None and bias != {}:
        bias_tensor = tvm.placeholder((filter_c1 * 16, ),
    fmap_placeholder = tvm.placeholder(fmap_shape_5d,
    filter_placeholder = tvm.placeholder(shape_w_5d,
    dsl_flag = False
    out = te.lang.cce.te_compute.depthwise_conv2d_compute(
        fmap_placeholder, filter_placeholder, output_dtype.lower(), strides_2d,
        pads, dilations_2d, {
            "bias_tensor": bias_tensor,
            "dsl_flag": dsl_flag,
            "offset_x": offset_x
        }, None, kernel_name)

    tensor_list = [fmap_placeholder, filter_placeholder, out]
    if bias_tensor is not None:
        tensor_list = [fmap_placeholder, filter_placeholder, bias_tensor, out]

    with tvm.target.cce():
        sch = generic.auto_schedule(out)

    with tbe_platform.build_config:
        tvm.build(sch, tensor_list, "cce", name=kernel_name)