Ejemplo n.º 1
0
def reshape_compile(in_shape,
                    out_shape,
                    dtype,
                    attrs,
                    kernel_name='reshape',
                    tuning=False):
    if attrs.get("dynamic"):
        var_shape = []
        for i in range(len(in_shape)):
            var_shape.append(tvm.var("I" + str(i)))
        build_in_shape = var_shape
        total_size = reduce(lambda x, y: x * y, var_shape)
        out_var_shape = []
        if len(out_shape) >= 2:
            for i in range(len(out_shape) - 1):
                out_var_shape.append(tvm.var("O" + str(i)))
            out_size = reduce(lambda x, y: x * y, out_var_shape)
            out_var_shape.append(tvm.div(total_size, out_size))
        else:
            out_var_shape.append(total_size)
        build_out_shape = out_var_shape
    else:
        build_in_shape = in_shape
        build_out_shape = out_shape
    op_attr = [build_out_shape]
    return utils.op_build_test(reshape.reshape, [build_in_shape], [dtype],
                               op_attr,
                               kernel_name=kernel_name,
                               attrs=attrs,
                               tuning=tuning)
Ejemplo n.º 2
0
def my_dsl(dtype, kernel_name, attrs):
    m = tvm.var("M")
    n = tvm.var("N")
    A = tvm.placeholder((m, ), name="A", dtype=dtype)
    B = tvm.placeholder((m, ), name="B", dtype=dtype)

    if insn == "add":
        C = topi.add(A, B)
    elif insn == "sub":
        C = topi.subtract(A, B)
    if insn == "mul":
        C = topi.multiply(A, B)
    elif insn == "div":
        C = topi.divide(A, B)
    elif insn == "max":
        C = topi.maximum(A, B)
    elif insn == "min":
        C = topi.minimum(A, B)

    elif insn == "abs":
        C = tvm.compute(A.shape, lambda *index: tvm.abs(A(*index)), name='C')
    elif insn == "exp":
        C = topi.exp(A)
    elif insn == "log":
        C = topi.log(A)
    elif insn == "sqrt":
        C = topi.sqrt(A)
        C = topi.log(A)
    elif insn == "sqrt":
        C = topi.sqrt(A)

    elif insn == "adds":
        C = A + tvm.const(2, dtype)
    elif insn == "muls":
        C = A * tvm.const(2, dtype)

    # C = tvm.compute((m, ), lambda i: A[i] + B[i], name="C")
    s = tvm.create_schedule([C.op])
    with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True):
        if insnType == "binary":
            mod = akg.build(s, [A, B, C],
                            "cce",
                            name=kernel_name,
                            attrs=attrs,
                            polyhedral=True)
        else:
            mod = akg.build(s, [A, C],
                            "cce",
                            name=kernel_name,
                            attrs=attrs,
                            polyhedral=True)
    return mod
Ejemplo n.º 3
0
def process_dynamic_shape(shapes, attrs, keep_axis=None):
    dynamic_shape_args = []

    if len(shapes) == 0 or not attrs.get("dynamic"):
        return shapes, dynamic_shape_args

    new_shapes = []
    prefix = "I"

    keep_axis_local = keep_axis

    if isinstance(keep_axis, int):
        keep_axis_local = [keep_axis]

    for shape in shapes:
        dynamic_shape = []
        for i in range(len(shape)):
            if (i in keep_axis_local) or ((i - len(shape)) in keep_axis_local):
                dynamic_shape.append(shape[i])
            else:
                dynamic_shape.append(tvm.var(prefix + str(i)))
                dynamic_shape_args.append(shape[i])

        new_shapes.append(dynamic_shape)
        prefix += "I"

    return new_shapes, dynamic_shape_args
Ejemplo n.º 4
0
def squeeze_run(shape, axis, dtype, kernel_name="squeeze", attrs=None):
    op_attrs = [axis]
    if attrs is not None and attrs.get("dynamic"):
        build_shape = []
        for i in range(len(shape)):
            build_shape.append(tvm.var("I" + str(i)))
    else:
        build_shape = shape
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(squeeze.squeeze, [build_shape], [dtype], op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t)
        if t:
            expect, input, output = gen_data(axis, dtype, shape)
            return mod, expect, (input, output)
        else:
            return mod
    else:
        expect, input, output = gen_data(axis, dtype, shape)
        mod = utils.op_build_test(squeeze.squeeze, [build_shape], [dtype], op_attrs, kernel_name=kernel_name, attrs=attrs)
        args = [input, output]
        if attrs is not None and attrs.get("dynamic"):
            for i in range(len(shape)):
                args.append(shape[i])
        output = utils.mod_launch(mod, args, outputs=(1,), expect=expect)
        return input, output, expect, compare_tensor(output, expect, rtol=5e-03, equal_nan=True)
Ejemplo n.º 5
0
def maxpool_run(shape, kernel, stride, pad, hybrid, dtype, attrs=None, polyhedral=True):
    if attrs.get("dynamic"):
        var_shape = []
        for i in range(len(shape)):
            if i == len(shape) - 1:
                var_shape.append(shape[i])
            else:
                var_shape.append(tvm.var("I" + str(i)))
        build_shape = var_shape
    else:
        build_shape = shape
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(maxpool.maxpool, [build_shape], [dtype], op_attrs=[kernel, stride, pad],
                                  kernel_name=kernel_name, attrs=attrs, tuning=t)
        if t:
            expect, input, out_shape, res = gen_data(dtype, kernel, pad, shape, stride)
            return mod, expect,  {"args": (input, res), 'outputs': (-1, ), 'tuning': False}
        else:
            return mod
    else:
        if polyhedral:
            if hybrid:
                mod = utils.op_build_test(maxpool.maxpool, [build_shape], [dtype], op_attrs=[kernel, stride, pad],
                                          kernel_name='maxpool', attrs=attrs)
            else:
                mod = utils.op_build_test(maxpool.old_maxpool, [build_shape], [dtype], op_attrs=[kernel, stride, pad],
                                          kernel_name='maxpool_old', attrs=attrs)
        else:
            mod = maxpool.maxpool_manual_schedule(build_shape, kernel, stride, pad, dtype,attrs=attrs, polyhedral=polyhedral)
        expect, input, out_shape, res = gen_data(dtype, kernel, pad, shape, stride)
        output = utils.mod_launch(mod, [input, res], expect=expect)
        rtol, atol = get_rtol_atol("maxpool", dtype)
        return input, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
Ejemplo n.º 6
0
def four2five_compile(shape,
                      dtype,
                      op_attrs,
                      attrs,
                      kernel_name='four2five',
                      tuning=False):
    if attrs.get("dynamic"):
        var_shape = []
        format, dst_type = op_attrs
        channel_idx = 1
        if format == 'NCHW':
            channel_idx = 1
        elif format == 'NHWC':
            channel_idx = len(shape) - 1
        for i in range(len(shape)):
            if i == channel_idx:
                var_shape.append(shape[i])
            else:
                var_shape.append(tvm.var("I" + str(i)))
        build_shape = var_shape
    else:
        build_shape = shape
    return utils.op_build_test(four2five.four2five, [build_shape], [dtype],
                               op_attrs,
                               kernel_name=kernel_name,
                               attrs=attrs,
                               tuning=tuning)
Ejemplo n.º 7
0
def maxpool_with_argmax_run(shape, kernel, stride, pad, dsl, dtype, attrs=None, polyhedral=True):
    build_shape = []
    arg_list = []
    if attrs is None:
        attrs = {}
    if attrs.get("dynamic"):
        for i in range(len(shape)):
            if i == len(shape) - 1:
                build_shape.append(shape[i])
            else:
                tmp_var = tvm.var("I" + str(i))
                build_shape.append(tmp_var)
                arg_list.append(shape[i])
    else:
        build_shape = shape
    arg_len = len(arg_list)
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(maxpool.maxpool_with_argmax,
                                  [shape], [dtype], op_attrs=[kernel, stride, pad],
                                  kernel_name=kernel_name, attrs=attrs, tuning=t)
        if t:
            input, expects, outputs = \
                gen_data(dtype, kernel, pad, shape, stride)
            return mod, expects, \
                {"args": (input, outputs[0], outputs[1]), 'outputs': (-2 - arg_len, -1 - arg_len), 'tuning': False}
        else:
            return mod
    else:
        if polyhedral:
            if attrs.get("dynamic") and len(build_shape) > 0:
                mod = utils.op_build_test(maxpool.maxpool_with_argmax_dynamic,
                                          [build_shape], [dtype], op_attrs=[kernel, stride, pad],
                                          kernel_name='maxpool', attrs=attrs)
            else:
                mod = utils.op_build_test(maxpool.maxpool_with_argmax,
                                          [shape], [dtype], op_attrs=[kernel, stride, pad],
                                          kernel_name='maxpool', attrs=attrs)
        else:
            mod = maxpool.maxpool_manual_schedule(shape, kernel, stride, pad, dtype,
                                                  attrs=attrs, polyhedral=polyhedral)
        input, expects, outputs = \
            gen_data(dtype, kernel, pad, shape, stride, attrs)
        args = [input, outputs[0], outputs[1]]
        if attrs is not None and attrs.get("dynamic"):
            args = args + arg_list
            block_dim = compute_blockdim(shape)
            args.append(block_dim)
            outputs = utils.mod_launch(mod, args, (-3 - arg_len, -2 - arg_len), expect=expects)
        else:
            outputs = utils.mod_launch(mod, args, (-2 - arg_len, -1 - arg_len), expect=expects)

        rtol, atol = get_rtol_atol("maxpool", dtype)
        results = list(map(lambda x, y:
                           compare_tensor(x, y, rtol=rtol, atol=atol),
                           outputs, expects))
        return input, outputs, expects, all(results)
Ejemplo n.º 8
0
def cast_run(shape, srcType, dstType, attrs={}):
    op_attrs = [dstType]
    if attrs.get("dynamic"):
        attrs["enable_double_buffer"] = False
        var_shape = []
        for i in range(len(shape)):
            var_shape.append(tvm.var("I" + str(i)))
        build_shape = var_shape
    else:
        build_shape = shape

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(Cast, [build_shape], [srcType],
                                  op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            args, exp_output, input = gen_data(dstType, shape, srcType)
            return mod, exp_output, args
        else:
            return mod
    else:
        mod = utils.op_build_test(Cast, [build_shape], [srcType],
                                  op_attrs,
                                  kernel_name='cast',
                                  attrs=attrs)
        args, exp_output, input = gen_data(dstType, shape, srcType)
        if attrs.get("dynamic"):
            for i in range(len(shape)):
                args.append(shape[i])
            block_dim = compute_blockdim(shape)
            args.append(block_dim)
        acu_output = utils.mod_launch(mod,
                                      args,
                                      outputs=(1, ),
                                      expect=exp_output)
        # compare result
        rtol, atol = get_rtol_atol("cast", dstType)
        TestCase_Result = compare_tensor(acu_output,
                                         exp_output,
                                         rtol=rtol,
                                         atol=atol,
                                         equal_nan=True)

        if attrs.get("profiling", False):
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array(args, akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        return input, acu_output, exp_output, TestCase_Result
Ejemplo n.º 9
0
def cast_run(shape, srcType, dstType, attrs):
    op_attrs = [dstType]
    if attrs is None:
        attrs = {}
    if attrs.get("dynamic"):
        attrs["enable_double_buffer"] = False
        var_shape = []
        for i in range(len(shape)):
            var_shape.append(tvm.var("I" + str(i)))
        build_shape = var_shape
    else:
        build_shape = shape

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(cast.cast, [build_shape], [srcType],
                                  op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            args, exp_output, input = gen_data(dstType, shape, srcType)
            return mod, exp_output, args
        else:
            return mod
    else:
        mod = utils.op_build_test(cast.cast, [build_shape], [srcType],
                                  op_attrs,
                                  kernel_name='cast',
                                  attrs=attrs)
        args, exp_output, input = gen_data(dstType, shape, srcType)
        if attrs.get("dynamic"):
            for i in range(len(shape)):
                args.append(shape[i])
            block_dim = compute_blockdim(shape)
            args.append(block_dim)
        acu_output = utils.mod_launch(mod,
                                      args,
                                      outputs=(1, ),
                                      expect=exp_output)
        # compare result
        rtol, atol = get_rtol_atol("cast", dstType)
        TestCase_Result = compare_tensor(acu_output,
                                         exp_output,
                                         rtol=rtol,
                                         atol=atol,
                                         equal_nan=True)
        return input, acu_output, exp_output, TestCase_Result
Ejemplo n.º 10
0
def common_run(shape, dtype, axis, attrs, method):
    if attrs is None:
        attrs = {}
    attrs["enable_algebra_simplify"] = True
    if attrs.get("dynamic"):
        build_shape = []
        for i in range(len(shape)):
            build_shape.append(tvm.var("I" + str(i)))
    else:
        build_shape = shape
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        if method is "min":
            mod = utils.op_build_test(argmin.argmin, [build_shape], [dtype], op_attrs=[axis], kernel_name=kernel_name,
                                      attrs=attrs, tuning=t)
        elif method is "max":
            mod = utils.op_build_test(argmax.argmax, [build_shape], [dtype], op_attrs=[axis], kernel_name=kernel_name,
                                      attrs=attrs, tuning=t)
        else:
            raise RuntimeError("not support " + method)
        if t:
            args, exp_output, input = gen_data(axis, dtype, method, shape)
            return mod, exp_output, args
        else:
            return mod
    else:
        if method is "min":
            mod = utils.op_build_test(argmin.argmin, [build_shape], [dtype], op_attrs=[axis], kernel_name="argmin",
                                      attrs=attrs)
        elif method is "max":
            mod = utils.op_build_test(argmax.argmax, [build_shape], [dtype], op_attrs=[axis], kernel_name="argmax",
                                      attrs=attrs)
        else:
            raise RuntimeError("not support " + method)
        args, exp_output, input = gen_data(axis, dtype, method, shape)
        if attrs.get("dynamic"):
            for i in range(len(shape)):
                args.append(shape[i])
            block_dim = compute_blockdim(shape)
            args.append(block_dim)
        res = utils.mod_launch(mod, args, outputs=(1,), expect=exp_output)
        acu_output = res.astype("int32")
        rtol, atol = get_rtol_atol("argmax_min_common", dtype)
        return input, acu_output, exp_output, compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True)
Ejemplo n.º 11
0
def equal_count_run(shapes, dtype, kernel_name, attrs):
    # shape check
    if attrs is None:
        attrs = {}
    if attrs.get("dynamic"):
        var_size = tvm.var("I0")
        var_shape = []
        for shape in shapes:
            assert len(shape) == 1
            var_shape.append([var_size])
        build_shape = var_shape
    else:
        build_shape = shapes
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(equal_count.equal_count,
                                  build_shape, [dtype, dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            benchMark1, inputs1, output1 = gen_data(dtype, shapes)
            return mod, benchMark1, inputs1 + [output1]
        else:
            return mod
    else:
        mod = utils.op_build_test(equal_count.equal_count,
                                  build_shape, [dtype, dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs)
        benchMark1, inputs1, output1 = gen_data(dtype, shapes)
        if attrs.get("dynamic"):
            args = inputs1.copy()
            args.append(output1)
            for i in range(len(shape) - 1, -1, -1):
                args.append(shape[i])
            block_dim = compute_blockdim(shapes)
            args.append(block_dim)
        else:
            args = inputs1 + [output1]
        output1 = utils.mod_launch(mod, args, outputs=(2, ), expect=benchMark1)
        return inputs1, output1, benchMark1, (output1[0] == benchMark1)
Ejemplo n.º 12
0
def sum_compile(shape,
                reduce_axis,
                keepdims,
                dtype,
                attrs,
                kernel_name="sum",
                tuning=False):
    op_attrs = [reduce_axis, keepdims]
    if attrs is not None and attrs.get("dynamic"):
        var_shape = []
        for i in range(len(shape)):
            var_shape.append(tvm.var("I" + str(i)))
        attrs["enable_post_poly_loop_partition"] = False
        build_shape = var_shape
    else:
        build_shape = shape
    return utils.op_build_test(sum.sum_value, [build_shape], [dtype],
                               op_attrs,
                               kernel_name=kernel_name,
                               attrs=attrs,
                               tuning=tuning)
Ejemplo n.º 13
0
def relu_run(shape, dtype, rtol, attrs):
    if attrs is not None and attrs.get("dynamic"):
        build_shape = []
        attrs['enable_post_poly_loop_partition'] = False
        for i in range(len(shape)):
            build_shape.append(tvm.var("I" + str(i)))
    else:
        build_shape = shape
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(Relu, [build_shape], [dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            input_np, expect = gen_data(dtype, shape)
            return mod, (input_np, expect)
        else:
            return mod
    else:
        mod = utils.op_build_test(Relu, [build_shape], [dtype],
                                  kernel_name='relu',
                                  attrs=attrs)
        input_np, expect = gen_data(dtype, shape)
        output = np.full(expect.shape, np.nan, dtype=expect.dtype)
        args = [input_np, output]
        if attrs is not None and attrs.get("dynamic"):
            for i in range(len(shape)):
                args.append(shape[i])
            block_dim = compute_blockdim(shape)
            args.append(block_dim)
        output = utils.mod_launch(mod, args, outputs=(1, ), expect=expect)
        rtol, atol = get_rtol_atol("relu", dtype)
        return input_np, output, expect, compare_tensor(output,
                                                        expect,
                                                        rtol=rtol,
                                                        atol=atol)
Ejemplo n.º 14
0
def five2four_compile(shape_5d,
                      dtype,
                      op_attrs,
                      attrs,
                      kernel_name='five2four',
                      tuning=False):
    if attrs.get("dynamic"):
        var_shape = []
        shape4d, dst_type, _ = op_attrs
        channel_idx = 1
        for i in range(len(shape_5d)):
            if shape_5d[i] == 1:
                var_shape.append(shape_5d[i])
            else:
                var_shape.append(tvm.var("I" + str(i)))
        build_shape = var_shape
    else:
        build_shape = shape_5d
    return utils.op_build_test(five2four.five2four, [build_shape], [dtype],
                               op_attrs,
                               kernel_name=kernel_name,
                               attrs=attrs,
                               tuning=tuning)
Ejemplo n.º 15
0
def batchmatmul_compile(bs,
                        m,
                        n,
                        k,
                        bias_shape,
                        dtype,
                        trans_a,
                        trans_b,
                        kernel_name,
                        attrs,
                        tuning=False):
    args_dict = {}
    if attrs.get("dynamic"):
        bs_vars = ()
        for i in range(len(bs)):
            tmp = tvm.var('I' + str(i))
            bs_vars = bs_vars + (tmp, )
            args_dict[tmp] = bs[i]

        bs = bs_vars
        index = len(bs)
        m_var = tvm.var('I' + str(index))
        n_var = tvm.var('I' + str(index + 1))
        k_var = tvm.var('I' + str(index + 2))
        args_dict[m_var] = m
        args_dict[n_var] = n
        args_dict[k_var] = k
        m = m_var
        n = n_var
        k = k_var

        index = index + 3
        bias_vars = ()
        if len(bias_shape) > 0:
            for i in range(len(bias_shape)):
                tmp = tvm.var('I' + str(i + index))
                args_dict[tmp] = bias_shape[i]
                bias_vars = bias_vars + (tmp, )
            bias_shape = bias_vars

    data_shape, weight_shape, out_shape = get_shape(bs, m, n, k, trans_a,
                                                    trans_b)
    if len(bias_shape) > 0:
        input_shapes = [data_shape, weight_shape, bias_shape]
        input_types = [dtype, dtype, dtype]
    else:
        input_shapes = [data_shape, weight_shape]
        input_types = [dtype, dtype]

    op_attrs = [trans_a, trans_b]
    args = get_vars(input_shapes, input_types)
    args_value = []
    for item in args:
        if item in args_dict:
            args_value.append(args_dict[item])

    if len(bias_shape) > 0:
        return utils.op_build_test(batchmatmul.batchmatmul_bias,
                                   input_shapes,
                                   input_types,
                                   op_attrs,
                                   kernel_name,
                                   attrs,
                                   tuning=tuning), args_value
    else:
        return utils.op_build_test(batchmatmul.batchmatmul,
                                   input_shapes,
                                   input_types,
                                   op_attrs,
                                   kernel_name,
                                   attrs,
                                   tuning=tuning), args_value
Ejemplo n.º 16
0
def conv_run(fmap_shape,
             filter_shape,
             pad,
             stride,
             dilation,
             use_bias=False,
             attrs=None,
             dump_data=False):
    conv_dtype = 'float16'

    vc_util.convolution_format_check(fmap_shape, filter_shape, pad, stride,
                                     dilation)

    conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation}
    stride, pad, dilation = conv_param_prepare(conv_param)
    fm_shape, w_shape, out_shape = conv_shape_4d(fmap_shape, filter_shape, pad,
                                                 stride, dilation)
    IN, IC, IH, IW = fm_shape
    WN, WC, WH, WW = w_shape
    C0 = 16

    if use_bias:
        input_shape = [(IN, IC // C0, IH, IW, C0),
                       (WC // C0 * WH * WW, WN // 16, 16, C0),
                       (1, WN // 16, 1, 1, 16)]
    else:
        input_shape = [(IN, IC // C0, IH, IW, C0),
                       (WC // C0 * WH * WW, WN // 16, 16, C0)]

    input_file = os.environ.get("RANDOM_DATA_DISK_PATH", "")
    expect_file = input_file + "/" + gen_kernel_name(
        [input_shape], [conv_dtype],
        op_attrs=[
            fmap_shape, filter_shape, pad, stride, dilation, use_bias, attrs
        ],
        kernel_name='conv',
        attrs=attrs) + ".bin"

    all_dynamic = 0  # kh kw pad stride
    partial_dynamic = 0  # fn fc1 fh fw wN wC
    if attrs.get("dynamic"):
        all_dynamic = 1
        print("=================all dynamic==================")
    if attrs.get("partial_dynamic"):
        partial_dynamic = 1
        print("=================partial dynamic==================")
    dynamic = partial_dynamic or all_dynamic

    if not dynamic:
        print("=================static shape==================")
    if dynamic:
        fmap_shape_real = fmap_shape
        filter_shape_real = filter_shape
        pad_real = pad
        stride_real = stride
        dilation_real = dilation

        if partial_dynamic or all_dynamic:
            N = tvm.var("N")
            C = tvm.var("CI")
            CI1 = tvm.var("CI1")
            H = tvm.var("H")
            W = tvm.var("W")

            COUT = tvm.var("CO")
            CO1 = tvm.var("CO1")
            _, _, KH, KW = filter_shape
            SH, SW = stride
            PT, PB, PL, PR = pad

        params = ()
        if all_dynamic:
            PARAM_KH = tvm.var("KH")
            PARAM_KW = tvm.var("KW")
            PARAM_PT = tvm.var("PT")
            PARAM_PB = tvm.var("PB")
            PARAM_PL = tvm.var("PL")
            PARAM_PR = tvm.var("PR")
            PARAM_SH = tvm.var("SH")
            PARAM_SW = tvm.var("SW")

            PARAM_T1_0_H = tvm.var("T1_0_H")
            PARAM_T1_0_W = tvm.var("T1_0_W")
            PARAM_T1_0_C1 = tvm.var("T1_0_C1")
            PARAM_T0_0_MO = tvm.var("T0_0_MO")
            PARAM_T0_0_NO = tvm.var("T0_0_NO")
            PARAM_T0_0_KO = tvm.var("T0_0_KO")

            params = (PARAM_KH, PARAM_KW, PARAM_PT, PARAM_PB, PARAM_PL,
                      PARAM_PR, PARAM_SH, PARAM_SW, PARAM_T1_0_H, PARAM_T1_0_W,
                      PARAM_T1_0_C1, PARAM_T0_0_MO, PARAM_T0_0_NO,
                      PARAM_T0_0_KO)

        DEBUG = 1
        if dynamic:
            KH_FAKE = 11
            KW_FAKE = 31
            fmap_shape = (N, C, H, W)
            filter_shape = (COUT, C, KH, KW)
            if not DEBUG:
                CO1 = (COUT + 15) // 16
                CI1 = (C + 15) // 16
            if use_bias:
                # input_shape = [(IN, IC // C0, IH, IW, C0), (WC // C0 * WH * WW, WN // 16, 16, C0), (1, WN // 16, 1, 1, 16)]
                if all_dynamic:
                    input_shape = [(N, CI1, H, W, 16),
                                   (CI1 * KH_FAKE * KW_FAKE, CO1, 16, 16),
                                   (1, CO1, 1, 1, 16)]
                else:
                    input_shape = [(N, CI1, H, W, 16),
                                   (CI1 * KH * KW, CO1, 16, 16),
                                   (1, CO1, 1, 1, 16)]
            else:
                # input_shape = [(IN, IC // C0, IH, IW, C0), (WC // C0 * WH * WW, WN // 16, 16, C0)]
                if all_dynamic:
                    input_shape = [(N, CI1, H, W, 16),
                                   (CI1 * KH_FAKE * KW_FAKE, CO1, 16, 16)]
                else:
                    input_shape = [(N, CI1, H, W, 16),
                                   (CI1 * KH * KW, CO1, 16, 16)]

        mod = utils.op_build_test(Conv, [input_shape], [conv_dtype],
                                  op_attrs=[
                                      fmap_shape, filter_shape, pad, stride,
                                      dilation, use_bias, attrs, params
                                  ],
                                  kernel_name='conv',
                                  attrs=attrs)
        fmap_data, filter_data, bias_data, expect = gen_data(
            fmap_shape_real, filter_shape_real, pad_real, stride_real,
            dilation_real, use_bias, expect_file)
    else:
        mod = utils.op_build_test(Conv, [input_shape], [conv_dtype],
                                  op_attrs=[
                                      fmap_shape, filter_shape, pad, stride,
                                      dilation, use_bias, attrs
                                  ],
                                  kernel_name='conv',
                                  attrs=attrs)
        fmap_data, filter_data, bias_data, expect = gen_data(
            fmap_shape, filter_shape, pad, stride, dilation, use_bias,
            expect_file)

    if dump_data:
        with open('input.bin', 'wb') as fo:
            fo.write(fmap_data.astype(np.float16, copy=False))
        with open('filter.bin', 'wb') as fo:
            fo.write(filter_data.astype(np.float16, copy=False))
        with open('bias.bin', 'wb') as fo:
            fo.write(bias_data.astype(np.float16, copy=False))
        with open('output.bin', 'wb') as fo:
            fo.write(expect.astype(np.float16, copy=False))

    out_data = np.full(expect.shape, np.nan, 'float16')

    if use_bias:
        input = [fmap_data, filter_data, bias_data]
    else:
        input = [fmap_data, filter_data]

    flag_w = os.environ.get("WRITE_TO_DISK", "No")
    if flag_w == "Yes":
        return input, out_data, expect, True

    if not dynamic:
        args = input
        args.append(out_data)
        args = tuple(args)
        out_data = utils.mod_launch(mod, args, expect=expect)
    else:
        args = []
        args.append(fmap_data)
        args.append(filter_data)
        args.append(out_data)
        if partial_dynamic or all_dynamic:
            args.append(IN)
            args.append(IC)
            args.append(IH)
            args.append(IW)
            args.append(WN)
        if all_dynamic:
            args.append(KH)
            args.append(KW)
            args.append(PT)
            args.append(PB)
            args.append(PL)
            args.append(PR)
            args.append(SH)
            args.append(SW)
            if attrs.get("conv_tile") and len(attrs["conv_tile"]) == 7:
                T1_0_H = attrs["conv_tile"][0]
                T1_0_C1 = attrs["conv_tile"][1]
                T0_0_MO = attrs["conv_tile"][2]
                T0_0_KO = attrs["conv_tile"][3]
                T0_0_NO = attrs["conv_tile"][4]
                T1_0_W = attrs["conv_tile"][5]
                if T1_0_H == IH:
                    T1_0_H += PT + PB
                T1_0_H_cut = (T1_0_H - KH) // SH + 1
                if T1_0_W == IW:
                    T1_0_W += PL + PR
                T1_0_W_cut = (T1_0_W - KW) // SW + 1
                args.append(T1_0_H_cut)
                args.append(T1_0_W_cut)
                args.append((T1_0_C1 + 15) // 16)
                args.append((T0_0_MO + 15) // 16)
                args.append((T0_0_NO + 15) // 16)
                args.append((T0_0_KO + 15) // 16)
        if DEBUG:
            args.append(IC // 16)
            args.append(WN // 16)
        block_dim = min(32, IN)
        args.append(block_dim)
        out_data = utils.mod_launch(mod, args, outputs=(2, ), expect=expect)

    rtol, atol = get_rtol_atol("conv", conv_dtype)
    return input, out_data, expect, compare_tensor(out_data,
                                                   expect,
                                                   rtol=rtol,
                                                   atol=atol,
                                                   equal_nan=True)
Ejemplo n.º 17
0
def add_run(shape1,
            shape2,
            dtype,
            kernel_name="add",
            scale=1.0,
            attrs={},
            polyhedral=True):
    if type(scale) is not float or not int:
        if type(attrs) is not bool:
            scale, attrs = 1.0, scale
        else:
            scale, attrs, polyhedral = 1.0, scale, attrs

    op_attrs = [scale]
    if not polyhedral:
        op_attrs = op_attrs + [polyhedral, attrs]

    if attrs.get("dynamic"):
        attrs["enable_double_buffer"] = False
        if shape1 != shape2:
            raise TypeError(
                "Input tensors have different shape. broadcast is't support for dynamic"
            )
        var_shape = []
        for i in range(len(shape1)):
            var_shape.append(tvm.var("I" + str(i)))
        build_shape1 = var_shape
        build_shape2 = var_shape
    else:
        build_shape1 = shape1
        build_shape2 = shape2

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(add.add, [build_shape1, build_shape2],
                                  [dtype, dtype],
                                  op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  polyhedral=polyhedral,
                                  tuning=t)
        if t:
            args, expect, input1, input2 = gen_data(shape1, shape2, dtype,
                                                    scale)
            return mod, expect, args
        else:
            return mod
    else:
        args, expect, input1, input2 = gen_data(shape1, shape2, dtype, scale)
        mod = utils.op_build_test(add.add, [build_shape1, build_shape2],
                                  [dtype, dtype],
                                  op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  polyhedral=polyhedral)
        if attrs.get("dynamic"):
            for i in range(len(shape1)):
                args.append(shape1[i])
            block_dim = compute_blockdim(shape1)
            args.append(block_dim)
        output = utils.mod_launch(mod, args, outputs=(2, ), expect=expect)
        rtol, atol = get_rtol_atol("add", dtype)
        return (input1, input2), output, expect, compare_tensor(output,
                                                                expect,
                                                                rtol=rtol,
                                                                atol=atol,
                                                                equal_nan=True)
Ejemplo n.º 18
0
def add_run(shape1,
            shape2,
            dtype,
            kernel_name="add",
            scale=1.0,
            attrs_op=None,
            polyhedral=True,
            attrs=None):
    if type(scale) is not float or not int:
        if type(attrs_op) is not bool:
            scale, attrs_op = 1.0, scale
        else:
            scale, attrs_op, polyhedral = 1.0, scale, attrs_op

    op_attrs = [scale]
    if not polyhedral:
        op_attrs = op_attrs + [polyhedral, attrs_op]

    attrs = {} if attrs is None else attrs
    if isinstance(attrs_op, dict):
        attrs.update(attrs_op)
    if attrs.get("dynamic"):
        attrs["enable_double_buffer"] = False
        if shape1 != shape2:
            raise TypeError(
                "Input tensors have different shape. broadcast is't support for dynamic"
            )
        var_shape = []
        for i in range(len(shape1)):
            var_shape.append(tvm.var("I" + str(i)))
        build_shape1 = var_shape
        build_shape2 = var_shape
    else:
        build_shape1 = shape1
        build_shape2 = shape2

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(Add, [build_shape1, build_shape2],
                                  [dtype, dtype],
                                  op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  polyhedral=polyhedral,
                                  tuning=t)
        if t:
            args, expect, input1, input2 = gen_data(shape1, shape2, dtype,
                                                    scale)
            return mod, expect, args
        else:
            return mod
    else:
        args, expect, input1, input2 = gen_data(shape1, shape2, dtype, scale)
        mod = utils.op_build_test(Add, [build_shape1, build_shape2],
                                  [dtype, dtype],
                                  op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  polyhedral=polyhedral)
        if attrs.get("dynamic"):
            for i in range(len(shape1)):
                args.append(shape1[i])
            block_dim = compute_blockdim(shape1)
            args.append(block_dim)
        output = utils.mod_launch(mod, args, outputs=(2, ), expect=expect)

        if attrs.get("profiling", False):
            target_name = attrs["target"].split()[0]
            data = to_tvm_nd_array(args, akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *data,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])

        rtol, atol = get_rtol_atol("add", dtype)
        return (input1, input2), output, expect, compare_tensor(output,
                                                                expect,
                                                                rtol=rtol,
                                                                atol=atol,
                                                                equal_nan=True)