Example #1
0
def matmul_execute(shape_x, shape_y, bias, left_format, right_format, out_format, adj_x, adj_y, dtype, out_dtype, kernel_name, attrs):
    '''
    There are four types of fractal format in Davinci core: zZ, zN, nZ, nN
    general matmul format
    left_trans: False right_trans False: zZ * nZ = zN
    left_trans: True  right_trans False: nN * nZ = zN
    left_trans: False right_trans True : zZ * zN = zN
    left_trans: True  right_trans True : nN * zN = zN

    Now we need to support: zN * nZ = zN
    use left_format to specify, left matrix data format
    use right_format to specify, right matrix data format
    '''
    batch_tuple, m, k, n = extract_dim(shape_x, shape_y, adj_x, adj_y)
    m = (m + 15) // 16 * 16
    n = (n + 15) // 16 * 16
    k = (k + 15) // 16 * 16
    shape_xx, shape_yy, bias_shape, out_shape, k = get_converted_shapes(m, n, k, batch_tuple, adj_x, adj_y, bias, left_format, right_format, out_format)
    mod = dynamic_matmul_compile(shape_x, shape_y, bias, left_format, right_format, out_format, adj_x, adj_y, dtype, out_dtype, kernel_name, attrs)
    # Generate data
    m_x, m_y, bench_mark, bias_data = matmul_data(batch_tuple, m, k, n, dtype, out_dtype, bias, adj_x, adj_y, left_format, right_format, out_format)

    # mod launch
    output = np.full(out_shape, np.nan, out_dtype)
    if bias == 0:
        output = utils.mod_launch(mod, (m_x, m_y, output, 1, 1, 1, 1, 1, 1, 1, 1, 1), outputs=(2,), expect=bench_mark)
    elif bias == 1:
        output = utils.mod_launch(mod, (m_x, m_y, bias_data, output), expect=bench_mark)

    # compare result
    rtol, atol = get_rtol_atol("matmul", dtype)
    compare_result = compare_tensor(output, bench_mark, rtol=rtol, atol=atol, equal_nan=True)
    # compare_result = utils.result_compare(output, bench_mark, r_tol=5e-3)
    return (m_x, m_y), output, bench_mark, compare_result
Example #2
0
def dynamic_matmul_compile(shape_x, shape_y, bias, left_format, right_format, output_format, adj_x, adj_y, dtype, out_dtype, kernel_name, attrs):
    batch_tuple, m, k, n = extract_dim(shape_x, shape_y, adj_x, adj_y)
    m = akg.tvm.var("I2")
    n = akg.tvm.var("I1")
    k = akg.tvm.var("KO")
    x = akg.tvm.placeholder((1, m, k, 16, 16), name='A', dtype=dtype)
    y = akg.tvm.placeholder((1, k, n, 16, 16), name='B', dtype=dtype)
    """
    m = (m + 15) // 16 * 16
    n = (n + 15) // 16 * 16
    k = (k + 15) // 16 * 16
    """
    shape_xx, shape_yy, bias_shape, out_shape, k = get_converted_shapes(m, n, k, batch_tuple, adj_x, adj_y, bias,
                                                                        left_format, right_format, output_format)
    input_shapes = [shape_xx, shape_yy, bias_shape]
    input_types = [dtype, dtype, dtype]

    has_bias = False
    if bias == 1:
        has_bias = True
    op_attrs = [out_dtype, left_format, right_format, output_format, adj_x, adj_y, has_bias, attrs]
    if has_bias == False:
        input_shapes = [x, y]
        input_types = [dtype, dtype]
        op_attrs = [None, out_dtype, left_format, right_format, output_format, adj_x, adj_y, has_bias, attrs]
    return utils.op_build_test(matmul, input_shapes, input_types, op_attrs, kernel_name, attrs)
Example #3
0
def matmul4d_ad_run(shape_x, shape_y, bias, adj_x, adj_y, dtype, out_dtype,
                    kernel_name, attrs):

    # calculate the shape in fractal type and create the data
    batch_tuple, m, k, n = extract_dim(shape_x, shape_y, adj_x, adj_y)

    m = (m + 15) // 16 * 16
    n = (n + 15) // 16 * 16
    k = (k + 15) // 16 * 16

    shape_xx, shape_yy, bias_shape, output_shape, k = get_converted_shapes(
        m, n, k, batch_tuple, adj_x, adj_y, bias)

    input_x = random_gaussian(shape_xx, miu=0.5, sigma=0.01).astype(np.float16)
    input_y = random_gaussian(shape_yy, miu=0.5, sigma=0.01).astype(np.float16)
    input_head = random_gaussian(output_shape, miu=0.5,
                                 sigma=0.01).astype(np.float16)

    dX_expected = compute_expected(input_y, input_head, adj_x, adj_y, shape_xx)

    input_shapes = [output_shape, shape_xx, shape_yy, bias_shape]
    input_types = [out_dtype, dtype, dtype, dtype]
    op_attrs = [out_dtype, adj_x, adj_y]
    if bias_shape is None:
        input_shapes = [output_shape, shape_xx, shape_yy]
        input_types = [out_dtype, dtype, dtype]
        op_attrs = [None, out_dtype, adj_x, adj_y]

    mod = utils.op_build_test(matmul4d_ad.matmul4d_ad, input_shapes,
                              input_types, op_attrs, kernel_name, attrs)

    # calculate the backward kernel
    dX = np.full(shape_xx, np.nan, dtype)
    dX = utils.mod_launch(mod, (input_head, input_x, input_y, dX),
                          expect=dX_expected)

    return (input_x, input_y,
            input_head), dX, dX_expected, compare_tensor(dX,
                                                         dX_expected,
                                                         rtol=0.01,
                                                         equal_nan=True)
Example #4
0
def _gen_data_matmul_cube(op_desc: MatmulCubeDesc):
    """Generating test data for matmul_cube"""
    batch_tuple, m, k, n = matmul_run.extract_dim(op_desc.x_shape, op_desc.y_shape, op_desc.adj_x, op_desc.adj_y)
    m = (m + 15) // 16 * 16
    n = (n + 15) // 16 * 16
    k = (k + 15) // 16 * 16
    _, _, _, out_shape, k = matmul_run.get_converted_shapes(m, n, k, batch_tuple, op_desc.adj_x, op_desc.adj_y,
                                                            op_desc.bias, op_desc.left_format, op_desc.right_format,
                                                            op_desc.out_format)
    m_x, m_y, bench_mark, bias_data = matmul_run.matmul_data(batch_tuple, m, k, n, op_desc.dtype, op_desc.out_dtype,
                                                             op_desc.bias, op_desc.adj_x, op_desc.adj_y,
                                                             op_desc.left_format, op_desc.right_format,
                                                             op_desc.out_format)

    out_data = np.full(out_shape, np.nan, op_desc.out_dtype)

    if op_desc.bias:
        args = (m_x, m_y, bias_data, out_data)
    else:
        args = (m_x, m_y, out_data)
    return args, bench_mark
Example #5
0
def _get_space_matmul_cube(op_desc: MatmulCubeDesc):
    """get config space of matmul_cube"""
    if not isinstance(op_desc, MatmulCubeDesc):
        raise TypeError('op_desc must be MatmulCubeDesc')
    config_space = ListConfigSpace(MatmulCubeConfig)
    batch_tuple, m, k, n = matmul_run.extract_dim(op_desc.x_shape,
                                                  op_desc.y_shape,
                                                  op_desc.adj_x, op_desc.adj_y)

    mmax = (m + 15) // 16
    nmax = (n + 15) // 16
    kmax = (k + 15) // 16

    double_buffer = True
    mad_fp32 = True

    l1_max_size = (1024 * 1024)  # L1  MEM 1024KB
    l0a_max_size = (64 * 1024)  # L0A MEM 64KB
    l0b_max_size = (64 * 1024)  # L0B MEM 64KB
    l0c_max_size = (256 * 1024)  # L0C MEM 256KB
    ub_max_size = (
        (256 - 8) * 1024)  # UB  MEM 248KB, 8KB reserved for compiler

    if double_buffer:
        l1_max_size = l1_max_size // 2
        l0a_max_size = l0a_max_size // 2
        l0b_max_size = l0b_max_size // 2
        l0c_max_size = l0c_max_size // 2
        ub_max_size = ub_max_size // 2

    if mad_fp32:
        l0c_max_size = l0c_max_size // 2
    if op_desc.out_dtype == 'float32':
        ub_max_size = ub_max_size // 2

    bypass_options = [0, 1, 2]

    for bypass in bypass_options:
        if (bypass == 2) and (
            (op_desc.adj_x == False and op_desc.left_format[0].lower() == 'n')
                or
            (op_desc.adj_x == True and op_desc.left_format[0].lower() == 'z')):
            continue

        if (bypass == 1) and ((op_desc.adj_y == False
                               and op_desc.right_format[0].lower() == 'z') or
                              (op_desc.adj_y == True
                               and op_desc.right_format[0].lower() == 'n')):
            continue

        for k_l1 in range(1, kmax + 1):
            if kmax % k_l1 != 0:
                continue
            for k_l0 in range(1, k_l1 + 1):
                if k_l1 % k_l0 != 0:
                    continue

                # no need to cut from l1 to l0 for m and n when k is cut
                for m_l1 in range(1, mmax + 1):
                    if mmax % m_l1 != 0:
                        continue
                    m_l0_range = [m_l1] if k_l1 != kmax else range(1, m_l1 + 1)
                    for m_l0 in m_l0_range:
                        if m_l1 % m_l0 != 0:
                            continue
                        for n_l1 in range(1, nmax + 1):
                            if nmax % n_l1 != 0:
                                continue
                            n_l0_range = [n_l1] if k_l1 != kmax else range(
                                1, n_l1 + 1)
                            for n_l0 in n_l0_range:
                                if n_l1 % n_l0 != 0:
                                    continue

                                if m_l0 * 16 * k_l0 * 16 > l0a_max_size:
                                    continue

                                if n_l0 * 16 * k_l0 * 16 > l0b_max_size:
                                    continue

                                if m_l0 * 16 * n_l0 * 16 > l0c_max_size:
                                    continue

                                if m_l0 * 16 * n_l0 * 16 > ub_max_size:
                                    continue

                                if bypass == 2:
                                    l1_size = n_l1 * 16 * k_l1 * 16
                                elif bypass == 1:
                                    l1_size = m_l1 * 16 * k_l1 * 16
                                else:
                                    l1_size = (m_l1 * 16 +
                                               n_l1 * 16) * k_l1 * 16
                                if l1_size > l1_max_size:
                                    continue

                                if nmax == 1:
                                    n_l1 = 0
                                    n_l0 = 0
                                if mmax == 1:
                                    m_l1 = 0
                                    m_l0 = 0
                                if kmax == 1:
                                    k_l1 = 16
                                    k_l0 = 16
                                config_space.add(
                                    MatmulCubeConfig(n_l1, n_l0, m_l1, m_l0,
                                                     k_l1, k_l0, bypass))
    shape_xx, shape_yy, _, _, k = matmul_run.get_converted_shapes(
        m, n, k, batch_tuple, op_desc.adj_x, op_desc.adj_y, op_desc.bias,
        op_desc.left_format, op_desc.right_format, op_desc.out_format)
    return None, config_space, str(
        (shape_xx, shape_yy, op_desc.bias, op_desc.left_format,
         op_desc.right_format, op_desc.out_format, op_desc.adj_x,
         op_desc.adj_y, op_desc.dtype, op_desc.out_dtype)), None, None