Beispiel #1
0
def conv_bn1_run(fmap_shape,
                 filter_shape,
                 pad,
                 stride,
                 dilation,
                 use_bias=False,
                 attrs=None):
    vc_util.convolution_format_check(fmap_shape, filter_shape, pad, stride,
                                     dilation)
    if use_bias:
        raise ValueError("do not support bias yet !!!")

    conv_dtype = 'float16'
    conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation}
    stride, pad, dilation = conv_param_prepare(conv_param)
    fm_shape, w_shape, out_shape = conv_shape_4d(fmap_shape, filter_shape, pad,
                                                 stride, dilation)
    IN, IC, IH, IW = fm_shape
    WN, WC, WH, WW = w_shape
    C0 = 16

    input_shape = [(IN, IC // C0, IH, IW, C0),
                   (WC // C0 * WH * WW, WN // 16, 16, C0)]
    mod = utils.op_build_test(conv_bn1.conv_bn1, [input_shape], [conv_dtype],
                              op_attrs=[
                                  fmap_shape, filter_shape, pad, stride,
                                  dilation, use_bias, attrs
                              ],
                              kernel_name='conv_bn1',
                              attrs=attrs)

    fmap_data, filter_data, bias_data, conv_expect = \
        gen_data(fmap_shape, filter_shape, pad, stride, dilation, use_bias)

    axes = (0, 2, 3)
    conv_mean = np.mean(conv_expect, axis=axes, keepdims=True)
    conv_square = np.power(conv_expect, 2)
    conv_var_part = np.mean(conv_square, axis=axes, keepdims=True)

    expects = (conv_expect, conv_var_part, conv_mean)

    out_datas = [np.full(e.shape, 0, 'float16') for e in expects]
    out_datas[1] = out_datas[1].astype(np.float32)
    out_datas[2] = out_datas[2].astype(np.float32)

    in_data = [fmap_data, filter_data]

    args = in_data
    for out in out_datas:
        args.append(out)
    args = tuple(args)

    outputs = utils.mod_launch(mod, args, outputs=(-3, -2, -1), expect=expects)
    rtol, atol = get_rtol_atol("conv_bn1", conv_dtype)
    cmp_res = list(
        map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), outputs,
            expects))
    return (fmap_data, filter_data, bias_data), outputs, expects, all(cmp_res)
Beispiel #2
0
def conv_backprop_filter(data, fmap_shape, filter_shape, pad_, stride_, dilation_, attrs=None):
    """
    Computes dw according "conv forward".

    Args:
        data (list[tvm.tensor.Tensor]): list with length 2.
              data[0](consider as dy) Tensor of type float16 ,shape 5D(out_n, out_c//C0, out_h, out_w,C0)
              data[1](consider as x)  Tensor of type float16 ,shape 5D(fN,fC//C0,fH,fW,C0)
        fmap_shape (list[int]): [fN, fC, fH, fW]
        filter_shape (list[int]): [wN, wC, wH, wW]
        pad_ (list[int]): [pad_left, pad_right, pad_top, pad_bottom]
        stride_ (list[int]): [stride_h, stride_w]
        dilation_ (list[int]): [dilation_h, dilation_w]
        attrs (dict): a dict with keys like conv_tile,bypass.

    Returns:
        tvm.tensor.Tensor.
        configs.
    """

    if len(data) != 2:
        raise IndexError("data contains output tensor and feature map tensor")

    vc_util.convolution_format_check(fmap_shape, filter_shape, pad_, stride_, dilation_)

    block_size = 16

    in_n, in_c, in_h, in_w = fmap_shape
    cout, _, w_h, w_w = filter_shape

    in_c = (in_c + block_size - 1) // block_size * block_size
    cout = (cout + block_size - 1) // block_size * block_size

    pad_top, pad_bottom, pad_left, pad_right = pad_
    stride_h, stride_w = stride_

    dilation_h, dilation_w = dilation_
    if dilation_h != 1 or dilation_w != 1:
        raise ValueError("The value of elements in dilation must be 1")

    out_n = in_n
    out_c = cout
    out_h = (in_h + pad_top + pad_bottom - w_h) // stride_h + 1
    out_w = (in_w + pad_left + pad_right - w_w) // stride_w + 1

    dy_shape = (out_n, out_c, out_h, out_w)
    dx_shape = (in_n, in_c, in_h, in_w)
    dw_shape = (cout, in_c, w_h, w_w)

    key = gen_key(fmap_shape, filter_shape, pad_, stride_, dilation_)
    res_c, configs = conv_backprop_filter_compute(data, dx_shape, dw_shape, dy_shape, pad_, stride_, dilation_,
                                                  block_size=block_size, attrs=attrs, key=key)

    return res_c, configs
Beispiel #3
0
def conv_core(data,
              fmap_shape,
              filter_shape,
              pad,
              stride,
              dilation,
              use_bias=False,
              attrs=None):
    """core computation for op conv."""
    if use_bias:
        if len(data) != 3:
            raise IndexError(
                "data should contain 3 tensors, i.e. feature map, filter and bias"
            )
        if data[2].dtype != "float16":
            raise TypeError("data type of bias should be float16")
    else:
        if len(data) != 2:
            raise IndexError(
                "data should contain 2 tensors, i.e. feature map and filter")
    if data[0].dtype != "float16":
        raise TypeError("data type of feature map should be float16")
    if data[1].dtype != "float16":
        raise TypeError("data type of filter should be float16")
    if not isinstance(use_bias, bool):
        raise TypeError("use_bias should be set as False or True")

    all_dynamic = 0  # kh kw pad stride
    partial_dynamic = 0  # fn fc1 fh fw wN wC
    if attrs is None:
        attrs = {}
    if attrs.get("dynamic"):
        all_dynamic = 1
    if attrs.get("partial_dynamic"):
        partial_dynamic = 1
    dynamic = partial_dynamic or all_dynamic
    dynamic_tiling = 1 if attrs.get("dynamic") else 0
    use_autotiling = 1 if dynamic and not dynamic_tiling else 0
    block_size = 16

    if not dynamic:
        vc_util.convolution_format_check(fmap_shape, filter_shape, pad, stride,
                                         dilation)
        for tmp_data in data:
            shape = [x.value for x in tmp_data.shape]
            vc_util.check_shape(shape)
        vc_util.check_shape(fmap_shape)
        vc_util.check_shape(filter_shape)

    stride_len = 2
    pad_len = 4
    dilation_len = 2
    zero = 0
    max_s = 63
    max_d = 255

    if isinstance(stride, int):
        stride = [stride] * stride_len
    elif isinstance(
            stride,
        (list, tuple)) and len(stride) == 1:  # only has one element
        stride = list(stride) * stride_len
    elif isinstance(stride, (list, tuple)) and len(stride) == stride_len:
        pass
    else:
        raise IndexError("stride para illegal !!!")

    if not dynamic:
        for val in stride:
            if val <= zero:
                raise ValueError(
                    "elements in stride should be greater than Zero !!!")
            if val > max_s:
                raise ValueError(
                    "elements in stride should be less than 64 !!!")

    if isinstance(pad, int):
        pad = [pad] * pad_len
    elif isinstance(pad,
                    (list, tuple)) and len(pad) == 1:  # only has one element
        pad = list(pad) * pad_len
    elif isinstance(pad, (list, tuple)) and len(pad) == pad_len:
        pass
    else:
        raise IndexError("pad para illegal !!!")

    if not dynamic:
        for val in pad:
            if val < zero:
                raise ValueError(
                    "elements in pad should not be less than Zero !!!")
            if val > max_d:
                raise ValueError("elements in pad should be less than 256 !!!")

    if isinstance(dilation, int):
        dilation = [dilation] * dilation_len
    elif isinstance(
            dilation,
        (list, tuple)) and len(dilation) == 1:  # only has one element
        dilation = list(dilation) * dilation_len
    elif isinstance(dilation, (list, tuple)) and len(dilation) == dilation_len:
        pass
    else:
        raise IndexError("dilation para illegal !!!")

    for val in dilation:
        if val <= zero:
            raise ValueError(
                "elements in dilation should be greater than Zero !!!")
        if val > max_d:
            raise ValueError(
                "elements in dilation should be less than 256 !!!")

    if len(stride) != stride_len or len(pad) != pad_len or len(
            dilation) != dilation_len:
        raise IndexError(" shape of parameters must be as expected")

    block_size_sub_one = block_size - 1
    # input shape (NCHW -> NC1HWC0)
    in_n, in_c, in_h, in_w = fmap_shape
    in_c = (in_c + block_size_sub_one) // block_size * block_size

    # kernel shape (NCHW -> NC1HWC0 -> Fractal)
    k_n, k_c, k_h, k_w = filter_shape
    k_c = (k_c + block_size_sub_one) // block_size * block_size
    k_n = (k_n + block_size_sub_one) // block_size * block_size

    # padding(padding_top, padding_bottom, padding_left, padding_right)
    p_top, p_bottom, p_left, p_right = pad

    # stride (stride_h, stride_w)
    s_h, s_w = stride

    k_h_real = k_h
    k_w_real = k_w
    p_top_real = p_top
    p_bottom_real = p_bottom
    p_left_real = p_left
    p_right_real = p_right
    s_h_real = s_h
    s_w_real = s_w

    if dynamic_tiling:
        k_h = k_h_fake
        k_w = k_w_fake
        p_top = p_top_fake
        p_bottom = p_bottom_fake
        p_left = p_left_fake
        p_right = p_right_fake
        s_h = s_h_fake
        s_w = s_w_fake

    # dilation (dilation_h, dilation_w)
    d_h, d_w = dilation

    # tiling
    key = []
    key.append(tuple(fmap_shape))
    key.append(tuple(filter_shape))
    key.append(tuple(pad))
    key.append(tuple(stride))
    key.append(tuple(dilation))
    key.append(use_bias)

    hash_key = str(tuple(key))

    k_w_d = (k_w - 1) * d_w + 1
    out_w = (in_w + p_left + p_right - k_w_d) // (s_w) + 1

    bypass_list = [0, 1]
    bypass = 0 if dynamic else 1

    # (NC1HWCO)
    a_value = data[0]

    # (fractal)
    b_value = data[1]
    setdim_map = conv_set_dim_map

    conv_tile_num = 5
    if attrs is not None and "conv_tile" in attrs and len(
            attrs["conv_tile"]) >= conv_tile_num:
        use_autotiling = 0
        tile_hh = attrs["conv_tile"][0]
        tile_coco = attrs["conv_tile"][1]
        tile_mm = attrs["conv_tile"][2]
        tile_kk = attrs["conv_tile"][3]
        tile_nn = attrs["conv_tile"][4]
        if len(attrs["conv_tile"]) > conv_tile_num:
            tile_ww = attrs["conv_tile"][conv_tile_num]
        else:
            tile_ww = (out_w - 1) * s_w + k_w_d
        if "bypass" in attrs:
            bypass = attrs["bypass"]
    elif hash_key in setdim_map:
        configs = setdim_map[hash_key]
        if isinstance(configs, tuple):
            tiles = configs[0]
            if "bypass" in configs[1]:
                bypass = configs[1]["bypass"]
        else:
            tiles = configs
        if len(tiles) > conv_tile_num:
            tile_hh, tile_coco, tile_mm, tile_kk, tile_nn, tile_ww = tiles
        else:
            tile_hh, tile_coco, tile_mm, tile_kk, tile_nn = tiles
            tile_ww = (out_w - 1) * s_w + k_w_d
    else:
        win_cut_h = 1
        k_h_d = (k_h - 1) * d_h + 1
        win_h = (in_h + p_top + p_bottom - k_h_d) // (s_h) + 1
        if not dynamic:
            while win_cut_h <= win_h:
                if (((win_h + win_cut_h - 1) // win_cut_h - 1) * win_cut_h -
                        1) * s_h + k_h_d <= in_h + p_top:
                    break
                win_cut_h += 1
        tile_hh = (win_cut_h - 1) * s_h + k_h_d
        tile_ww = (out_w - 1) * s_w + k_w_d
        tile_coco = block_size
        tile_mm = block_size
        tile_kk = block_size
        tile_nn = block_size
    if bypass not in bypass_list:
        raise ValueError("bypass of conv only supports %s" %
                         (",".join(str(bypass_list))))

    if tile_hh == in_h:
        tile_hh += p_top + p_bottom

    if tile_ww == in_w:
        tile_ww += p_left + p_right

    tile_coco = (tile_coco + block_size_sub_one) // block_size * block_size
    tile_mm = (tile_mm + block_size_sub_one) // block_size * block_size
    tile_kk = (tile_kk + block_size_sub_one) // block_size * block_size
    tile_nn = (tile_nn + block_size_sub_one) // block_size * block_size

    input_shape_nc1hwc0 = get_shape(data[0])
    if not dynamic and input_shape_nc1hwc0 != [
            in_n, in_c // block_size, in_h, in_w, block_size
    ]:
        raise ValueError("feature map tensor data[0] shape illegal !!!")
    in_n, c1_in, in_h, in_w, _ = input_shape_nc1hwc0

    if not dynamic:
        kernel_shape_nc1hwc0 = (k_n, k_c // block_size, k_h, k_w, block_size)
    else:
        kernel_shape_nc1hwc0 = (k_n, c1_in, k_h, k_w, block_size
                                )  # simplify for dynamic case
    k_n, k_c1, k_h, k_w, k_c0 = kernel_shape_nc1hwc0
    kernel_shape_fractal = get_shape(data[1])
    if not dynamic and kernel_shape_fractal != [
            k_c1 * k_h * k_w, k_n // block_size, block_size, k_c0
    ]:
        raise ValueError("filter tensor data[1] shape illegal !!!")

    if use_bias:
        bias_value = data[2]
        bias_name = bias_value.op.name
        bias_shape = [x.value for x in data[2].shape]
        if bias_shape != [1, k_n // block_size, 1, 1, block_size]:
            raise ValueError("bias tensor data[2] shape illegal !!!")
    else:
        bias_name = "None"
        bias_value = None

    # Create reduction variables
    kc1 = akg.tvm.reduce_axis((0, k_c1), name="kc1")
    kh = akg.tvm.reduce_axis((0, k_h), name="kh")
    kw = akg.tvm.reduce_axis((0, k_w), name="kw")
    kc0 = akg.tvm.reduce_axis((0, k_c0), name="kc0")

    k_h_d = (k_h - 1) * d_h + 1
    k_h_d_real = (k_h_real - 1) * d_h + 1
    k_w_d = (k_w - 1) * d_w + 1
    k_w_d_real = (k_w_real - 1) * d_w + 1
    out_h = (in_h + p_top + p_bottom - k_h_d) // (s_h) + 1
    tile_out_h = (tile_hh - k_h_d) // s_h + 1
    tile_out_h_real = (tile_hh - k_h_d_real) // s_h_real + 1
    out_w = (in_w + p_left + p_right - k_w_d) // (s_w) + 1
    tile_out_w = (tile_ww - k_w_d) // s_w + 1
    tile_out_w_real = (tile_ww - k_w_d_real) // s_w_real + 1

    if not dynamic:
        out_shape_nc1hwc0 = (in_n, k_n // block_size, out_h, out_w, block_size)
    else:
        _, c1_out, _, _ = data[1].shape
        out_shape_nc1hwc0 = (in_n, c1_out, out_h, out_w, block_size)
    _, out_c1, out_h, out_w, _ = out_shape_nc1hwc0

    if tile_coco > 0:
        c1_cut = tile_coco // block_size
    else:
        c1_cut = out_c1

    # Compute the convolution
    output_name = "output0"
    conv_attr = {
        "pragma_conv_kernel_n": k_n,
        "pragma_conv_kernel_h": k_h,
        "pragma_conv_kernel_w": k_w,
        "pragma_conv_padding_top": p_top,
        "pragma_conv_padding_bottom": p_bottom,
        "pragma_conv_padding_left": p_left,
        "pragma_conv_padding_right": p_right,
        "pragma_conv_bypass_l1": bypass,
        "pragma_conv_stride_h": s_h,
        "pragma_conv_stride_w": s_w,
        "pragma_conv_dilation_h": d_h,
        "pragma_conv_dilation_w": d_w,
        "pragma_conv_fm_n": in_n,
        "pragma_conv_fm_c": in_c,
        "pragma_conv_fm_h": in_h,
        "pragma_conv_fm_w": in_w,
        "feature": a_value.op.name,
        "filter": b_value.op.name,
        "bias": bias_name,
        "res": output_name
    }

    if dynamic_tiling:
        conv_attr["pragma_conv_h_cut"] = (tile_out_h_fake - 1) * s_h + k_h_d
        conv_attr["pragma_conv_w_cut"] = (tile_out_w_fake - 1) * s_w + k_w_d
        conv_attr["pragma_conv_co_cut"] = c1_cut_fake * 16
        conv_attr["pragma_conv_m_cut"] = m_cut_fake
        conv_attr["pragma_conv_k_cut"] = k_cut_fake
        conv_attr["pragma_conv_n_cut"] = n_cut_fake
        conv_attr["pragma_conv_tile_co"] = c1_cut
        conv_attr["pragma_conv_tile_ho"] = tile_out_h_real
        conv_attr["pragma_conv_tile_wo"] = tile_out_w_real
        conv_attr["pragma_conv_tile_mo"] = tile_mm // 16
        conv_attr["pragma_conv_tile_ko"] = tile_kk // 16
        conv_attr["pragma_conv_tile_no"] = tile_nn // 16
        conv_attr["pragma_conv_real_kh"] = k_h_real
        conv_attr["pragma_conv_real_kw"] = k_w_real
        conv_attr["pragma_conv_real_sh"] = s_h_real
        conv_attr["pragma_conv_real_sw"] = s_w_real
        conv_attr["pragma_conv_real_pt"] = p_top_real
        conv_attr["pragma_conv_real_pb"] = p_bottom_real
        conv_attr["pragma_conv_real_pl"] = p_left_real
        conv_attr["pragma_conv_real_pr"] = p_right_real
    elif not use_autotiling:
        conv_attr["pragma_conv_h_cut"] = (tile_out_h - 1) * s_h + k_h_d
        conv_attr["pragma_conv_w_cut"] = (tile_out_w - 1) * s_w + k_w_d
        conv_attr["pragma_conv_co_cut"] = c1_cut * k_c0
        conv_attr["pragma_conv_m_cut"] = tile_mm
        conv_attr["pragma_conv_k_cut"] = tile_kk
        conv_attr["pragma_conv_n_cut"] = tile_nn
    c_value = akg.tvm.compute(
        out_shape_nc1hwc0,
        lambda n, c1, h, w, c0: akg.lang.cce.mmad((akg.tvm.if_then_else(
            akg.tvm.any((h * s_h + kh) < p_top, (h * s_h + kh) >
                        (in_h + p_top - 1), (w * s_w + kw) < p_left,
                        (w * s_w + kw) >
                        (in_w + p_left - 1)), akg.tvm.const(0.0, "float16"),
            a_value[n, kc1, (h * s_h + (kh * d_h) - p_top),
                    (w * s_w + (kw * d_w) - p_left), kc0]) * b_value[
                        (kc1 * k_h + kh) * k_w + kw, c1, c0, kc0]).astype(
                            "float32"),
                                                  axis=[kc1, kh, kw, kc0]),
        name=output_name,
        attrs=conv_attr)
    return c_value
Beispiel #4
0
def conv_run(fmap_shape,
             filter_shape,
             pad,
             stride,
             dilation,
             use_bias=False,
             attrs=None,
             dump_data=False):
    conv_dtype = 'float16'

    vc_util.convolution_format_check(fmap_shape, filter_shape, pad, stride,
                                     dilation)

    conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation}
    stride, pad, dilation = conv_param_prepare(conv_param)
    fm_shape, w_shape, out_shape = conv_shape_4d(fmap_shape, filter_shape, pad,
                                                 stride, dilation)
    IN, IC, IH, IW = fm_shape
    WN, WC, WH, WW = w_shape
    C0 = 16

    if use_bias:
        input_shape = [(IN, IC // C0, IH, IW, C0),
                       (WC // C0 * WH * WW, WN // 16, 16, C0),
                       (1, WN // 16, 1, 1, 16)]
    else:
        input_shape = [(IN, IC // C0, IH, IW, C0),
                       (WC // C0 * WH * WW, WN // 16, 16, C0)]

    input_file = os.environ.get("RANDOM_DATA_DISK_PATH", "")
    expect_file = input_file + "/" + gen_kernel_name(
        [input_shape], [conv_dtype],
        op_attrs=[
            fmap_shape, filter_shape, pad, stride, dilation, use_bias, attrs
        ],
        kernel_name='conv',
        attrs=attrs) + ".bin"

    all_dynamic = 0  # kh kw pad stride
    partial_dynamic = 0  # fn fc1 fh fw wN wC
    if attrs.get("dynamic"):
        all_dynamic = 1
        print("=================all dynamic==================")
    if attrs.get("partial_dynamic"):
        partial_dynamic = 1
        print("=================partial dynamic==================")
    dynamic = partial_dynamic or all_dynamic

    if not dynamic:
        print("=================static shape==================")
    if dynamic:
        fmap_shape_real = fmap_shape
        filter_shape_real = filter_shape
        pad_real = pad
        stride_real = stride
        dilation_real = dilation

        if partial_dynamic or all_dynamic:
            N = tvm.var("N")
            C = tvm.var("CI")
            CI1 = tvm.var("CI1")
            H = tvm.var("H")
            W = tvm.var("W")

            COUT = tvm.var("CO")
            CO1 = tvm.var("CO1")
            _, _, KH, KW = filter_shape
            SH, SW = stride
            PT, PB, PL, PR = pad

        params = ()
        if all_dynamic:
            PARAM_KH = tvm.var("KH")
            PARAM_KW = tvm.var("KW")
            PARAM_PT = tvm.var("PT")
            PARAM_PB = tvm.var("PB")
            PARAM_PL = tvm.var("PL")
            PARAM_PR = tvm.var("PR")
            PARAM_SH = tvm.var("SH")
            PARAM_SW = tvm.var("SW")

            PARAM_T1_0_H = tvm.var("T1_0_H")
            PARAM_T1_0_W = tvm.var("T1_0_W")
            PARAM_T1_0_C1 = tvm.var("T1_0_C1")
            PARAM_T0_0_MO = tvm.var("T0_0_MO")
            PARAM_T0_0_NO = tvm.var("T0_0_NO")
            PARAM_T0_0_KO = tvm.var("T0_0_KO")

            params = (PARAM_KH, PARAM_KW, PARAM_PT, PARAM_PB, PARAM_PL,
                      PARAM_PR, PARAM_SH, PARAM_SW, PARAM_T1_0_H, PARAM_T1_0_W,
                      PARAM_T1_0_C1, PARAM_T0_0_MO, PARAM_T0_0_NO,
                      PARAM_T0_0_KO)

        DEBUG = 1
        if dynamic:
            KH_FAKE = 11
            KW_FAKE = 31
            fmap_shape = (N, C, H, W)
            filter_shape = (COUT, C, KH, KW)
            if not DEBUG:
                CO1 = (COUT + 15) // 16
                CI1 = (C + 15) // 16
            if use_bias:
                # input_shape = [(IN, IC // C0, IH, IW, C0), (WC // C0 * WH * WW, WN // 16, 16, C0), (1, WN // 16, 1, 1, 16)]
                if all_dynamic:
                    input_shape = [(N, CI1, H, W, 16),
                                   (CI1 * KH_FAKE * KW_FAKE, CO1, 16, 16),
                                   (1, CO1, 1, 1, 16)]
                else:
                    input_shape = [(N, CI1, H, W, 16),
                                   (CI1 * KH * KW, CO1, 16, 16),
                                   (1, CO1, 1, 1, 16)]
            else:
                # input_shape = [(IN, IC // C0, IH, IW, C0), (WC // C0 * WH * WW, WN // 16, 16, C0)]
                if all_dynamic:
                    input_shape = [(N, CI1, H, W, 16),
                                   (CI1 * KH_FAKE * KW_FAKE, CO1, 16, 16)]
                else:
                    input_shape = [(N, CI1, H, W, 16),
                                   (CI1 * KH * KW, CO1, 16, 16)]

        mod = utils.op_build_test(Conv, [input_shape], [conv_dtype],
                                  op_attrs=[
                                      fmap_shape, filter_shape, pad, stride,
                                      dilation, use_bias, attrs, params
                                  ],
                                  kernel_name='conv',
                                  attrs=attrs)
        fmap_data, filter_data, bias_data, expect = gen_data(
            fmap_shape_real, filter_shape_real, pad_real, stride_real,
            dilation_real, use_bias, expect_file)
    else:
        mod = utils.op_build_test(Conv, [input_shape], [conv_dtype],
                                  op_attrs=[
                                      fmap_shape, filter_shape, pad, stride,
                                      dilation, use_bias, attrs
                                  ],
                                  kernel_name='conv',
                                  attrs=attrs)
        fmap_data, filter_data, bias_data, expect = gen_data(
            fmap_shape, filter_shape, pad, stride, dilation, use_bias,
            expect_file)

    if dump_data:
        with open('input.bin', 'wb') as fo:
            fo.write(fmap_data.astype(np.float16, copy=False))
        with open('filter.bin', 'wb') as fo:
            fo.write(filter_data.astype(np.float16, copy=False))
        with open('bias.bin', 'wb') as fo:
            fo.write(bias_data.astype(np.float16, copy=False))
        with open('output.bin', 'wb') as fo:
            fo.write(expect.astype(np.float16, copy=False))

    out_data = np.full(expect.shape, np.nan, 'float16')

    if use_bias:
        input = [fmap_data, filter_data, bias_data]
    else:
        input = [fmap_data, filter_data]

    flag_w = os.environ.get("WRITE_TO_DISK", "No")
    if flag_w == "Yes":
        return input, out_data, expect, True

    if not dynamic:
        args = input
        args.append(out_data)
        args = tuple(args)
        out_data = utils.mod_launch(mod, args, expect=expect)
    else:
        args = []
        args.append(fmap_data)
        args.append(filter_data)
        args.append(out_data)
        if partial_dynamic or all_dynamic:
            args.append(IN)
            args.append(IC)
            args.append(IH)
            args.append(IW)
            args.append(WN)
        if all_dynamic:
            args.append(KH)
            args.append(KW)
            args.append(PT)
            args.append(PB)
            args.append(PL)
            args.append(PR)
            args.append(SH)
            args.append(SW)
            if attrs.get("conv_tile") and len(attrs["conv_tile"]) == 7:
                T1_0_H = attrs["conv_tile"][0]
                T1_0_C1 = attrs["conv_tile"][1]
                T0_0_MO = attrs["conv_tile"][2]
                T0_0_KO = attrs["conv_tile"][3]
                T0_0_NO = attrs["conv_tile"][4]
                T1_0_W = attrs["conv_tile"][5]
                if T1_0_H == IH:
                    T1_0_H += PT + PB
                T1_0_H_cut = (T1_0_H - KH) // SH + 1
                if T1_0_W == IW:
                    T1_0_W += PL + PR
                T1_0_W_cut = (T1_0_W - KW) // SW + 1
                args.append(T1_0_H_cut)
                args.append(T1_0_W_cut)
                args.append((T1_0_C1 + 15) // 16)
                args.append((T0_0_MO + 15) // 16)
                args.append((T0_0_NO + 15) // 16)
                args.append((T0_0_KO + 15) // 16)
        if DEBUG:
            args.append(IC // 16)
            args.append(WN // 16)
        block_dim = min(32, IN)
        args.append(block_dim)
        out_data = utils.mod_launch(mod, args, outputs=(2, ), expect=expect)

    rtol, atol = get_rtol_atol("conv", conv_dtype)
    return input, out_data, expect, compare_tensor(out_data,
                                                   expect,
                                                   rtol=rtol,
                                                   atol=atol,
                                                   equal_nan=True)
Beispiel #5
0
def _get_space_conv(op_desc: ConvDesc):
    """get config space of convolution"""
    if not isinstance(op_desc, ConvDesc):
        raise TypeError('op_desc must be ConvDesc')

    stride_ = op_desc.stride
    pad_ = op_desc.pad
    dilation_ = op_desc.dilation
    vc_util.convolution_format_check(op_desc.fmap_shape, op_desc.filter_shape,
                                     pad_, stride_, dilation_)
    config_space = ListConfigSpace(ConvConfig)

    # if double buff is not enabled, set it's value to 1
    size_scale = 1

    l1_max_size = (1024 * 1024) // size_scale
    l0a_max_size = (64 * 1024) // size_scale
    l0b_max_size = (64 * 1024) // size_scale
    l0c_max_size = ((256 - 8) * 1024) // size_scale // 2

    _, in_c, in_h, in_w = op_desc.fmap_shape
    k_n, _, k_h, k_w = op_desc.filter_shape
    padding = (pad_[0], pad_[1], pad_[2], pad_[3])
    p_top, p_bottom, p_left, p_right = padding
    s_h, s_w = stride_

    in_c = ((in_c - 1) // 16 + 1) * 16
    tile_c = in_c
    tile_co_start = 16

    data_len = 2

    h_max = in_h + p_top + p_bottom
    win_h = (h_max - k_h) // s_h + 1
    h_max = (h_max - k_h) // s_h * s_h + k_h
    w_max = in_w + p_left + p_right
    win_w = (w_max - k_w) // s_w + 1
    w_max = (w_max - k_w) // s_w * s_w + k_w

    bypass_options = [0, 1]

    for bypass in bypass_options:
        for tile_h in range(h_max, k_h - 1, -s_h):
            size_h = tile_h
            if tile_h == h_max:
                w_range = range(w_max, k_w - 1, -s_w)
                size_h = in_h
            else:
                w_range = [w_max]
                win_tile_h = (tile_h - k_h) // s_h + 1
                h_tiles = (win_h + win_tile_h - 1) // win_tile_h
                if h_tiles == 2:
                    size_h = max(tile_h - p_top,
                                 in_h + p_top - tile_h + k_h - s_h)

            for tile_w in w_range:
                size_w = tile_w
                if size_w == w_max:
                    size_w = in_w
                else:
                    win_tile_w = (tile_w - k_w) // s_w + 1
                    w_tiles = (win_w + win_tile_w - 1) // win_tile_w
                    if w_tiles == 2:
                        size_w = max(tile_w - p_left,
                                     in_w + p_left - tile_w + k_w - s_w)

                k_n_ = ((k_n - 1) // 16 + 1) * 16
                co_range = range(k_n_, tile_co_start - 1, -16)
                for tile_co in co_range:
                    if bypass == 1:
                        if tile_co != k_n:
                            continue
                        l1_size = data_len * (size_h * size_w * in_c)
                    else:
                        l1_size = data_len * (size_h * size_w * in_c +
                                              tile_co * tile_c * k_h * k_w)

                    if l1_size > l1_max_size:
                        continue

                    tile_co_ = ((tile_co - 1) // 16 + 1) * 16
                    for tile_n in range(tile_co_, 15, -16):
                        k_max = in_c * k_h * k_w
                        k_max_ = ((k_max - 1) // 16 + 1) * 16
                        k_size = l0b_max_size // data_len // tile_n
                        k_size_ = k_size // 16 * 16
                        for tile_k in range(min(k_max_, k_size_), 15, -16):
                            m_max = (int(((tile_h - k_h) //
                                          (s_h)) + 1)) * (int((
                                              (tile_w - k_w) // (s_w)) + 1))
                            m_max_ = ((m_max - 1) // 16 + 1) * 16
                            m_size1 = l0a_max_size // data_len // tile_k
                            m_size1_ = m_size1 // 16 * 16
                            m_size2 = l0c_max_size // data_len // tile_n
                            m_size2_ = m_size2 // 16 * 16
                            for tile_m in range(
                                    min(m_max_, m_size1_, m_size2_), 15, -16):
                                config_space.add(
                                    ConvConfig(tile_h, tile_co, tile_m, tile_k,
                                               tile_n, tile_w, bypass))

    return None, config_space, op_desc.__str__(), None, None
Beispiel #6
0
def _get_space_conv_backprop_filter(op_desc: ConvBackpropDesc):
    """get config space of convolution backwprop filter"""
    if not isinstance(op_desc, ConvBackpropDesc):
        raise TypeError('op_desc must be ConvBackpropDesc')

    stride_ = op_desc.stride
    pad_ = op_desc.pad
    dilation_ = op_desc.dilation
    vc_util.convolution_format_check(op_desc.fmap_shape, op_desc.filter_shape,
                                     pad_, stride_, dilation_)
    config_space = ListConfigSpace(ConvBackpropFilterConfig)

    # if double buff is not enabled, set it's value to 1
    size_scale = 1
    block_size = 16

    l1_max_size = (1024 * 1024) // size_scale
    l0a_max_size = (64 * 1024) // size_scale
    l0b_max_size = (64 * 1024) // size_scale
    l0c_max_size = ((256 - 8) * 1024) // size_scale // 2

    in_n, in_c, in_h, in_w = op_desc.fmap_shape
    cout, _, k_h, k_w = op_desc.filter_shape
    k_n = cout

    in_c = (in_c + block_size - 1) // block_size * block_size
    cout = (cout + block_size - 1) // block_size * block_size

    pad_top, pad_bottom, pad_left, pad_right = pad_
    s_h, s_w = stride_
    tile_co_start = 16
    tile_ci_start = 16
    data_len = 2
    h_max = in_h + pad_top + pad_bottom
    win_h = (h_max - k_h) // s_h + 1
    h_max = (h_max - k_h) // s_h * s_h + k_h
    w_max = in_w + pad_left + pad_right
    win_w = (w_max - k_w) // s_w + 1
    w_max = (w_max - k_w) // s_w * s_w + k_w

    for tile_h in range(h_max, k_h - 1, -s_h):
        size_h = tile_h
        win_tile_h = (tile_h - k_h) // s_h + 1
        # Only one head for cut H axis
        if win_tile_h * s_h < pad_top:
            continue
        # Only one tail for cut H axis
        if (((win_h + win_tile_h - 1) // win_tile_h - 1) * win_tile_h -
                1) * s_h + k_h > in_h + pad_top:
            continue
        if tile_h == h_max:
            w_range = range(w_max, k_w - 1, -s_w)
            size_h = in_h
        else:
            w_range = [w_max]
            h_tiles = (win_h + win_tile_h - 1) // win_tile_h
            if h_tiles == 2:
                size_h = max(tile_h - pad_top,
                             in_h + pad_top - tile_h + k_h - s_h)

        for tile_w in w_range:
            size_w = tile_w
            win_tile_w = (tile_w - k_w) // s_w + 1
            # Only one head for cut W axis
            if win_tile_w * s_w < pad_left:
                continue
            # Only one tail for cut W axis
            if (((win_w + win_tile_w - 1) // win_tile_w - 1) * win_tile_w -
                    1) * s_w + k_w > in_w + pad_left:
                continue
            if size_w == w_max:
                size_w = in_w
            else:
                w_tiles = (win_w + win_tile_w - 1) // win_tile_w
                if w_tiles == 2:
                    size_w = max(tile_w - pad_left,
                                 in_w + pad_left - tile_w + k_w - s_w)
            for tile_kh in range(k_h, 0, -1):
                for tile_kw in range(k_w, 0, -1):
                    k_n_ = ((k_n - 1) // 16 + 1) * 16
                    co_range = range(k_n_, tile_co_start - 1, -16)
                    for tile_co in co_range:
                        in_c_ = ((in_c - 1) // 16 + 1) * 16
                        ci_range = range(in_c_, tile_ci_start - 1, -16)
                        for tile_ci in ci_range:
                            tile_batch = 1
                            l1_size = data_len * tile_batch * (
                                tile_co * win_tile_h * win_tile_w +
                                tile_ci * size_h * size_w)
                            if l1_size > l1_max_size:
                                continue

                            if (tile_batch != in_n or tile_co != k_n_
                                    or tile_ci != in_c_):
                                tile_m = tile_co
                                tile_n = tile_ci * tile_kh * tile_kw
                                l0c_size = data_len * tile_n * tile_m
                                if l0c_size > l0c_max_size:
                                    continue
                                k_max = tile_batch * tile_h * tile_w
                                k_max_ = ((k_max - 1) // 16 + 1) * 16
                                k_size1 = l0a_max_size // data_len // tile_m
                                k_size1_ = k_size1 // 16 * 16
                                k_size2 = l0b_max_size // data_len // tile_n
                                k_size2_ = k_size2 // 16 * 16
                                for tile_k in range(
                                        min(k_max_, k_size1_, k_size2_), 15,
                                        -16):
                                    config_space.add(
                                        ConvBackpropFilterConfig(
                                            tile_ci, tile_kh, tile_kw, tile_co,
                                            tile_batch, tile_h, tile_w, tile_m,
                                            tile_k, tile_n))
                            else:
                                for tile_n in range(
                                        tile_ci * tile_kh * tile_kw, 15, -16):
                                    k_max = tile_batch * tile_h * tile_w
                                    k_max_ = ((k_max - 1) // 16 + 1) * 16
                                    k_size = l0b_max_size // data_len // tile_n
                                    k_size_ = k_size // 16 * 16
                                    for tile_k in range(
                                            min(k_max_, k_size_), 15, -16):
                                        m_max = tile_co
                                        m_max_ = ((m_max - 1) // 16 + 1) * 16
                                        m_size1 = l0a_max_size // data_len // tile_k
                                        m_size1_ = m_size1 // 16 * 16
                                        m_size2 = l0c_max_size // data_len // tile_n
                                        m_size2_ = m_size2 // 16 * 16
                                        for tile_m in range(
                                                min(m_max_, m_size1_,
                                                    m_size2_), 15, -16):
                                            config_space.add(
                                                ConvBackpropFilterConfig(
                                                    tile_ci, tile_kh, tile_kw,
                                                    tile_co, tile_batch,
                                                    tile_h, tile_w, tile_m,
                                                    tile_k, tile_n))
    return None, config_space, op_desc.__str__(), None, None
Beispiel #7
0
def _get_space_conv_backprop_input(op_desc: ConvBackpropDesc):
    """get config space of convolution backprop input"""
    if not isinstance(op_desc, ConvBackpropDesc):
        raise TypeError('op_desc must be ConvDesc')

    stride_ = op_desc.stride
    pad_ = op_desc.pad
    dilation_ = op_desc.dilation
    vc_util.convolution_format_check(op_desc.fmap_shape, op_desc.filter_shape,
                                     pad_, stride_, dilation_)
    config_space = ListConfigSpace(ConvBackpropInputConfig)

    # if double buff is not enabled, set it's value to 1
    size_scale = 1
    block_size = 16

    l1_max_size = (1024 * 1024) // size_scale
    l0a_max_size = (64 * 1024) // size_scale
    l0b_max_size = (64 * 1024) // size_scale
    l0c_max_size = ((256 - 8) * 1024) // size_scale // 2
    ub_max_size = l0c_max_size

    _, in_c, in_h, in_w = op_desc.fmap_shape
    k_n, _, k_h, k_w = op_desc.filter_shape

    in_c = (in_c + block_size - 1) // block_size * block_size
    k_n = (k_n + block_size - 1) // block_size * block_size

    pad_top, pad_bottom, pad_left, pad_right = pad_
    stride_h, stride_w = stride_

    out_c = k_n
    out_h = (in_h + pad_top + pad_bottom - k_h) // stride_h + 1
    out_w = (in_w + pad_left + pad_right - k_w) // stride_w + 1

    out_h = out_h * stride_h
    out_w = out_w * stride_w

    p_top = k_h - pad_[0] - 1
    p_bottom = in_h + pad_[0] - stride_[0] * (
        (in_h + pad_[0] + pad_[1] - k_h) // stride_[0] + 1)
    p_left = k_w - pad_[2] - 1
    p_right = in_w + pad_[2] - stride_[1] * (
        (in_w + pad_[2] + pad_[3] - k_w) // stride_[1] + 1)

    s_h = 1
    s_w = 1

    tile_c = out_c
    tile_co_start = 16

    data_len = 2

    h_max = out_h + p_top + p_bottom
    win_h = (h_max - k_h) // s_h + 1
    h_max = (h_max - k_h) // s_h * s_h + k_h
    w_max = out_w + p_left + p_right
    win_w = (w_max - k_w) // s_w + 1
    w_max = (w_max - k_w) // s_w * s_w + k_w

    for tile_h in range(h_max, k_h - 1, -s_h):
        size_h = tile_h
        if tile_h == h_max:
            w_range = range(w_max, k_w - 1, -s_w)
            size_h = in_h
        else:
            w_range = [w_max]
            win_tile_h = (tile_h - k_h) // s_h + 1
            h_tiles = (win_h + win_tile_h - 1) // win_tile_h
            if h_tiles == 2:
                size_h = max(tile_h - p_top, in_h + p_top - tile_h + k_h - s_h)

        for tile_w in w_range:
            size_w = tile_w
            if size_w == w_max:
                size_w = in_w
            else:
                win_tile_w = (tile_w - k_w) // s_w + 1
                w_tiles = (win_w + win_tile_w - 1) // win_tile_w
                if w_tiles == 2:
                    size_w = max(tile_w - p_left,
                                 in_w + p_left - tile_w + k_w - s_w)

            k_n_ = ((k_n - 1) // 16 + 1) * 16
            co_range = range(k_n_, tile_co_start - 1, -16)
            for tile_co in co_range:
                l1_size = data_len * (size_h * size_w * out_c +
                                      tile_co * tile_c * k_h * k_w)
                if l1_size > l1_max_size:
                    continue
                ub_size = data_len * (size_h * size_w * out_c)
                if ub_size > ub_max_size:
                    continue

                tile_co_ = ((tile_co - 1) // 16 + 1) * 16
                for tile_n in range(tile_co_, 15, -16):
                    k_max = out_c * k_h * k_w
                    k_base = 16 * k_h * k_w
                    k_max_ = ((k_max - 1) // k_base + 1) * k_base
                    k_size = l0b_max_size // data_len // tile_n
                    k_size_ = k_size // k_base * k_base
                    for tile_k in range(min(k_max_, k_size_), k_base - 1,
                                        -k_base):
                        m_max = (int(((tile_h - k_h) //
                                      (s_h)) + 1)) * (int(((tile_w - k_w) //
                                                           (s_w)) + 1))
                        m_max_ = ((m_max - 1) // 16 + 1) * 16
                        m_size1 = l0a_max_size // data_len // tile_k
                        m_size1_ = m_size1 // 16 * 16
                        m_size2 = l0c_max_size // data_len // tile_n
                        m_size2_ = m_size2 // 16 * 16
                        for tile_m in range(min(m_max_, m_size1_, m_size2_),
                                            15, -16):
                            config_space.add(
                                ConvBackpropInputConfig(
                                    tile_h, tile_co, tile_m, tile_k, tile_n,
                                    tile_w))
    return None, config_space, op_desc.__str__(), None, None
Beispiel #8
0
def conv_backprop_input_run(fmap_shape,
                            filter_shape,
                            pad_,
                            stride_,
                            dilation_,
                            attrs=None):
    conv_dtype = 'float16'
    block_size = 16

    vc_util.convolution_format_check(fmap_shape, filter_shape, pad_, stride_,
                                     dilation_)

    in_n, in_c, in_h, in_w = fmap_shape
    cout, cin, w_h, w_w = filter_shape

    in_c = (in_c + block_size - 1) // block_size * block_size
    cout = (cout + block_size - 1) // block_size * block_size

    pad_top, pad_bottom, pad_left, pad_right = pad_
    stride_h, stride_w = stride_

    out_n = in_n
    out_c = cout
    out_h = (in_h + pad_top + pad_bottom - w_h) // stride_h + 1
    out_w = (in_w + pad_left + pad_right - w_w) // stride_w + 1

    x_shape = (out_n, out_c, out_h, out_w)
    w_shape = (cout, in_c, w_h, w_w)
    inN, inC, inH, inW = x_shape
    input_shape_nc1hwc0 = (inN, inC // block_size, inH, inW, block_size)
    k_n, k_c, k_h, k_w = w_shape
    kernel_shape_nc1hwc0 = (k_n, k_c // block_size, k_h, k_w, block_size)
    k_n, k_c1, k_h, k_w, k_c0 = kernel_shape_nc1hwc0
    kernel_shape_fractal = (k_c // block_size * k_h * k_w, k_n // block_size,
                            block_size, block_size)

    input_shape = [input_shape_nc1hwc0, kernel_shape_fractal]

    input_file = os.environ.get("RANDOM_DATA_DISK_PATH", "")
    expect_file = input_file + "/" + gen_kernel_name(
        [input_shape], [conv_dtype],
        op_attrs=[fmap_shape, filter_shape, pad_, stride_, dilation_, attrs],
        kernel_name='conv_backprop_input',
        attrs=attrs) + ".bin"
    fmap_data, filter_data, expect = gen_data(fmap_shape,
                                              filter_shape,
                                              pad_,
                                              stride_,
                                              dilation_,
                                              expect_file,
                                              attrs=attrs)

    out_data = np.full(expect.shape, np.nan, 'float16')
    input = (fmap_data, filter_data)

    flag_w = os.environ.get("WRITE_TO_DISK", "No")
    if flag_w == "Yes":
        return input, out_data, expect, True

    mod = utils.op_build_test(
        conv_backprop_input, [input_shape], [conv_dtype],
        op_attrs=[fmap_shape, filter_shape, pad_, stride_, dilation_, attrs],
        kernel_name='conv_backprop_input',
        attrs=attrs)

    args = (fmap_data, filter_data, out_data)
    out_data = utils.mod_launch(mod, args, expect=expect)
    rtol, atol = get_rtol_atol("conv_backprop_input", conv_dtype)
    return input, out_data, expect, compare_tensor(out_data,
                                                   expect,
                                                   rtol=rtol,
                                                   atol=atol,
                                                   equal_nan=True)