예제 #1
0
def fused_is_finite_run(shape, layout='NHWC', poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    attrs.update({"enable_akg_reduce_lib": True, "enable_atomic_add": True})
    dtype = "float32"
    mod = utils.op_build_test(fused_is_finite, [shape], [dtype],
                              op_attrs=[layout],
                              kernel_name="fused_is_finite",
                              polyhedral=poly_sch,
                              attrs=attrs)

    data, expect, output = gen_data(shape, dtype, layout)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        data, output = to_tvm_nd_array([data, output],
                                       akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         data,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return data, output, expect, res
예제 #2
0
def select_run(shape_cond, shape_x, dtype_cond, dtype_x, attrs=None):
    """select_run implementation"""
    if attrs is None:
        attrs = {}

    mod = utils.op_build_test(select, [shape_cond, shape_x, shape_x],
                              [dtype_cond, dtype_x, dtype_x],
                              kernel_name='select',
                              op_attrs=[],
                              attrs=attrs)
    args, exp_output, cond, x1, x2 = gen_data(shape_cond, shape_x, dtype_cond,
                                              dtype_x)
    acu_output = utils.mod_launch(mod, args, expect=exp_output)
    if attrs.get("profiling", False):
        import akg
        target_name = attrs["target"].split()[0]
        args_list = to_tvm_nd_array(args, akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args_list,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    # compare result
    rtol, atol = get_rtol_atol("select", dtype_x)
    testcase_result = compare_tensor(acu_output,
                                     exp_output,
                                     rtol=rtol,
                                     atol=atol,
                                     equal_nan=True)

    return [cond, x1, x2], acu_output, exp_output, testcase_result
예제 #3
0
def expand_dims_run(shape, axis, dtype, kernel_name="expand_dims", attrs={}):
    op_attr = [axis]
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(ExpandDims, [shape], [dtype],
                                  op_attr,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            expect, input, output = gen_data(axis, dtype, shape)
            return mod, expect, (input, output)
        else:
            return mod
    else:
        mod = utils.op_build_test(ExpandDims, [shape], [dtype],
                                  op_attr,
                                  kernel_name=kernel_name,
                                  attrs=attrs)
        expect, input, output = gen_data(axis, dtype, shape)
        output = utils.mod_launch(mod, (input, output), expect=expect)
        if attrs.get("profiling", False):
            import akg
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array([input, output],
                                        akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        return input, output, expect, compare_tensor(output,
                                                     expect,
                                                     rtol=5e-03,
                                                     equal_nan=True)
예제 #4
0
def fused_mul_div_rsqrt_mul_isfinite_red_run(shape, dtype='float32', poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    attrs.update({"enable_akg_reduce_lib": True, "enable_atomic_add": True})
    inputs = gen_data(shape, dtype)
    expect = compute_expect(inputs)
    input_shape = [shape, shape]
    input_dtype = [dtype, dtype]
    mod = utils.op_build_test(fused_mul_div_rsqrt_mul_isfinite_red, input_shape, input_dtype,
                          kernel_name="fused_mul_div_rsqrt_mul_isfinite_red", polyhedral=poly_sch, attrs=attrs)

    outputs = [np.full((1,), False, 'bool')] + [np.full(shape, np.nan, dtype)] * 3
    output = utils.mod_launch(mod, [*inputs, *outputs], outputs=list(range(-len(outputs), 0)), expect=expect)
    ret = compare_tensor(output[0], expect[0], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[1], expect[1], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[2], expect[2], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[3], expect[3], rtol=5e-03, atol=1.e-08)
    print("Test {}".format("Pass" if ret else "Failed"))
    target_name = attrs["target"].split()[0]
    if not ret:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        data = to_tvm_nd_array([*inputs, *outputs], akg.tvm.context(target_name, 0))
        target_profiling(mod, *data, target=target_name, repeat_time=attrs["repeat_times"])
    return inputs, outputs, expect, ret
예제 #5
0
def standard_normal_run(seed, shape, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    mod = utils.op_build_test(standard_normal, [], [],
                              kernel_name="standard_normal",
                              op_attrs=[seed, shape],
                              attrs=attrs)

    output, expect = gen_data(shape)
    output = utils.mod_launch(mod, (output, ), expect=expect)
    res = output.shape == expect.shape
    res &= abs(np.mean(output) - 0) < 1e-1
    res &= abs(np.std(output) - 1) < 1e-1
    print("Test {}".format("Pass" if res else "Fail"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        output = to_tvm_nd_array(output, akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return output, output, expect, res
예제 #6
0
def fused_relu_grad_run(shape, c1=0, poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    dtype='float16'
    input = gen_data(shape, dtype)
    expect = compute_expect(input, c1)
    shapes = [shape] * 3
    dtypes = [dtype] * 3
    op_attrs = [c1]
    mod = utils.op_build_test(fused_relu_grad, shapes, dtypes, op_attrs=op_attrs, kernel_name="fused_relu_grad",
                        polyhedral=poly_sch, attrs=attrs)

    output = np.full(shape, np.nan, dtype)
    output = utils.mod_launch(mod, (*input, output), expect=expect)
    res = np.allclose(output, expect, rtol=5e-3, atol=1e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        data = to_tvm_nd_array([*input, output], akg.tvm.context(target_name, 0))
        target_profiling(mod, *data, target=target_name, repeat_time=attrs["repeat_times"])
    return input, output, expect, res
예제 #7
0
def reciprocal_run(shape, dtype, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = reciprocal_compile(shape,
                                 dtype,
                                 attrs,
                                 kernel_name=kernel_name,
                                 tuning=t)
        if t:
            expect, input1, output = gen_data(dtype, shape)
            return mod, expect, (input1, output)
        else:
            return mod
    else:
        mod = reciprocal_compile(shape, dtype, attrs)
        expect, input1, output = gen_data(dtype, shape)
        output = utils.mod_launch(mod, (input1, output), expect=expect)
        if attrs["profiling"]:
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array([input1, output],
                                        akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        rtol, atol = get_rtol_atol("reciprocal", dtype)
        return (input1, ), output, expect, compare_tensor(output,
                                                          expect,
                                                          rtol=rtol,
                                                          atol=atol,
                                                          equal_nan=True)
예제 #8
0
def fused_bn_update_grad_run(shape, out_shape, dtype="float16", out_dtype="float32", layout="NHWC", poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    attrs.update({"enable_akg_reduce_lib": True, "enable_atomic_add": True})
    shape_list = [shape, out_shape, shape]
    dtype_list = [dtype, out_dtype, dtype]
    op_attrs = [layout]
    mod = utils.op_build_test(fused_bn_update_grad, shape_list, dtype_list, op_attrs=op_attrs, kernel_name="fused_bn_update_grad",
                              polyhedral=poly_sch, attrs=attrs)

    head, data_sum, in_bn, output, expect = gen_data(shape, out_shape, dtype, out_dtype, layout)
    outputs = [output, output]
    inputs = [head, data_sum, in_bn]
    arg_list = inputs + outputs
    outputs = utils.mod_launch(mod, arg_list, outputs=tuple(range(-len(outputs), 0)), expect=expect)

    res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        arg_list = to_tvm_nd_array(arg_list, akg.tvm.context(target_name, 0))
        target_profiling(mod, *arg_list, target=target_name, repeat_time=attrs["repeat_times"])
    return inputs, outputs, expect, res
예제 #9
0
def sqrt_run(shape, dtype, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(sqrt, [shape], [dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            expect, input, output = gen_data(dtype, shape)
            return mod, expect, (input, output)
        else:
            return mod
    else:
        expect, input, output = gen_data(dtype, shape)
        mod = utils.op_build_test(sqrt, [shape], [dtype],
                                  kernel_name='sqrt',
                                  attrs=attrs)
        output = utils.mod_launch(mod, (input, output), expect=expect)
        if attrs.get("profiling", False):
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array([input, output],
                                        akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        return input, output, expect, compare_tensor(output,
                                                     expect,
                                                     rtol=5e-03,
                                                     equal_nan=True)
예제 #10
0
def csrmv_run(shape1, dtype1, shape2, dtype2, poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    if attrs["target"] == "cuda":
        attrs["enable_akg_reduce_lib"] = True
        attrs["enable_atomic_add"] = True
    data, indices, indptr, weight, expect = gen_data(shape1, dtype1, shape2, dtype2)
    attrs["is_csr"] = True

    mod = utils.op_build_test(csr_mv, [data.shape, indices.shape, indptr.shape, weight.shape],
                              ["float32", "int32", "int32", "float32"], polyhedral=poly_sch,
                              attrs=attrs, kernel_name='csrmv')
    
    output_shape = expect.shape
    output = np.zeros(output_shape, dtype="float32")
    output = utils.mod_launch(mod, (data, indices, indptr, weight, output), expect=expect)
    res = compare_tensor(output, expect, rtol=5e-3, atol=1e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")
    if attrs["profiling"]:
        args_list = to_tvm_nd_array([data, indices, indptr, weight, output], akg.tvm.context(target_name, 0))
        target_profiling(mod, *args_list, target=target_name,  repeat_time=attrs["repeat_times"])
    return (data, indices, indptr, weight), output, expect, res
예제 #11
0
def tensor_scatter_add_run(data_shape,
                           data_type,
                           indices_shape,
                           indices_type,
                           axis,
                           poly_sch=True,
                           attrs=None):
    op_attrs = [axis]
    default_attrs = {"target": "cuda"}
    if attrs:
        default_attrs.update(attrs)
    if len(indices_shape) > 1:
        updates_shape = indices_shape[:-1] + data_shape[indices_shape[-1]:]
    else:
        updates_shape = indices_shape + data_shape[1:]

    mod = utils.op_build_test(tensor_scatter_add,
                              [data_shape, indices_shape, updates_shape],
                              [data_type, indices_type, data_type],
                              attrs=default_attrs,
                              kernel_name="tensor_scatter_add",
                              polyhedral=poly_sch)

    # gen data
    indices_shape = indices_shape + (1, ) if len(
        indices_shape) == 1 else indices_shape
    params, indices, updates, expect = gen_data(data_shape, data_type,
                                                indices_shape, indices_type)
    output_shape = expect.shape

    if len(expect.shape) == 0:
        output_shape = (1, )
    output = np.zeros(output_shape, expect.dtype)
    output = utils.mod_launch(mod, (params, indices, updates, output),
                              expect=expect)

    atol, rtol = get_rtol_atol("tensor_scatter_add", data_type)
    res = compare_tensor(output, expect, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        params, indices, updates, output = to_tvm_nd_array(
            [params, indices, updates, output],
            akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         params,
                         indices,
                         updates,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (params, indices, updates), output, expect, res
예제 #12
0
def batch_matmul_run(shape1,
                     shape2,
                     dtype,
                     out_dtype="float32",
                     layout1="NHDT",
                     layout2="NHDT",
                     layout_out="NHDT",
                     shape_bias=None,
                     add_bias=False,
                     tensor_core=True,
                     poly_sch=True,
                     attrs=None):
    op_attrs = [out_dtype, layout1, layout2, layout_out, tensor_core, add_bias]

    default_attrs = attrs
    if not attrs:
        default_attrs = {"target": "cuda"}

    if default_attrs["target"] == "cuda" and tensor_core:
        default_attrs.update({
            "pragma_enable_matmul": True,
            "enable_auto_inline": False
        })
    elif default_attrs["target"] == "llvm":
        if "pragma_enable_matmul" not in default_attrs.keys():
            default_attrs["pragma_enable_matmul"] = True
        if "feature" not in default_attrs.keys():
            default_attrs["feature"] = "avx"

    mod = utils.op_build_test(BatchMatMul, (shape1, shape2, shape_bias),
                              (dtype, dtype, out_dtype),
                              op_attrs=op_attrs,
                              attrs=default_attrs,
                              polyhedral=poly_sch,
                              kernel_name="batch_matmul")

    lhs, rhs, bias, output, expect = gen_data(shape1, shape2, dtype, out_dtype,
                                              layout1, layout2, layout_out,
                                              shape_bias, add_bias)
    args = (lhs, rhs, bias, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    target_name = default_attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        args = to_tvm_nd_array(args, akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (lhs, rhs, bias), output, expect, res
예제 #13
0
def fused_gather_gather_add_mul_max_exp_scatter_add_run(
        input1_shape,
        input2_shape,
        input3_shape,
        input4_shape,
        data_dtype,
        indices_type,
        axis,
        poly_sch=True,
        attrs=None):
    op_attrs = [axis]
    default_attrs = {"target": "cuda"}
    if attrs:
        default_attrs.update(attrs)
    mod = utils.op_build_test(
        fused_gather_gather_add_mul_max_exp_scatter_add,
        [input1_shape, input2_shape, input3_shape, input4_shape],
        [data_dtype, indices_type, data_dtype, indices_type],
        op_attrs=op_attrs,
        attrs=default_attrs,
        polyhedral=poly_sch,
        kernel_name="fused_gather_gather_add_mul_max_exp_scatter_add",
    )

    # gen data
    input1, input2, input3, input4, expect1, expect2 = gen_data(
        input1_shape, input2_shape, input3_shape, input4_shape, data_dtype,
        indices_type, axis)

    output1 = np.zeros(expect1.shape, expect1.dtype)
    output2 = deepcopy(input1)
    output1, output2 = utils.mod_launch(
        mod, (input1, input2, input3, input4, output1, output2),
        outputs=(-2, -1))

    atol, rtol = get_rtol_atol(
        "fused_gather_gather_add_mul_max_exp_scatter_add", data_dtype)
    res = compare_tensor(output1, expect1, rtol=rtol, atol=atol)
    res &= compare_tensor(output2, expect2, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        inputs = to_tvm_nd_array(
            [input1, input2, input3, input4, output1, output2],
            akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *inputs,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (input1, input2, input3, input4), (output1, output2), (expect1,
                                                                  expect2), res
예제 #14
0
def conv_fusion_run(shape_data,
                    shape_filter1,
                    shape_filter2,
                    stride1,
                    stride2,
                    padding1,
                    padding2,
                    dilation1,
                    dilation2,
                    dtype,
                    out_dtype="float32",
                    poly_sch=True,
                    attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    op_attrs = [stride1, stride2, padding1, padding2, dilation1, dilation2]
    attrs.update({
        "enable_auto_fuse": False,
        "shared_memory_tensors": "out input_1 input_2 input_3",
        "pragma_disable_loop_fusion": True,
        "dim": "3 0 1 1 3 1 1 1 3 2 4 4 3 3 52 52 3 4 64 64"
    })

    mod = utils.op_build_test(ConvFusion,
                              (shape_data, shape_filter1, shape_filter2),
                              (dtype, dtype, dtype),
                              op_attrs=op_attrs,
                              attrs=attrs,
                              polyhedral=poly_sch,
                              kernel_name="conv_fusion_auto")

    data, weight1, weight2, output, expect = fusion_gen_data(
        shape_data, shape_filter1, shape_filter2, stride1, stride2, padding1,
        padding2, dilation1, dilation2, dtype, out_dtype)
    args = (data, weight1, weight2, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-3, atol=1.e-8)
    print("Test {}".format("Pass"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        data, weight1, weight2, output = to_tvm_nd_array(
            [data, weight1, weight2, output], akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         data,
                         weight1,
                         weight2,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (data, weight1, weight2), output, expect, res
예제 #15
0
def csr_reduce_sum_run(shape,
                       dtype1,
                       dtype2,
                       axis,
                       nnz=-1,
                       poly_sch=True,
                       attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    if attrs["target"] == "cuda":
        attrs["enable_akg_reduce_lib"] = True
        attrs["enable_atomic_add"] = True
    op_attrs = [axis, shape]

    # gen data
    data, col_idx, row_idx, expect = gen_data(shape,
                                              dtype1,
                                              dtype2,
                                              axis,
                                              nnz=nnz)
    output_shape = expect.shape
    attrs["is_csr"] = True

    mod = utils.op_build_test(csr_reduce_sum,
                              [data.shape, col_idx.shape, row_idx.shape],
                              [dtype1, dtype2, dtype2],
                              op_attrs=op_attrs,
                              polyhedral=poly_sch,
                              attrs=attrs,
                              kernel_name="csr_reduce_sum")

    if len(expect.shape) == 0:
        output_shape = (1, )
    output = np.zeros(output_shape, expect.dtype)
    output = utils.mod_launch(mod, (data, col_idx, row_idx, output),
                              expect=expect)
    atol, rtol = get_rtol_atol("csr_reduce_sum", dtype1)
    res = compare_tensor(output, expect, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")
    if attrs["profiling"]:
        args_list = to_tvm_nd_array([data, col_idx, row_idx, output],
                                    akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args_list,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (data, col_idx, row_idx), output, expect, res
예제 #16
0
def cast_run(shape, srcType, dstType, attrs={}):
    op_attrs = [dstType]
    if attrs.get("dynamic"):
        attrs["enable_double_buffer"] = False
        var_shape = []
        for i in range(len(shape)):
            var_shape.append(tvm.var("I" + str(i)))
        build_shape = var_shape
    else:
        build_shape = shape

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(Cast, [build_shape], [srcType],
                                  op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            args, exp_output, input = gen_data(dstType, shape, srcType)
            return mod, exp_output, args
        else:
            return mod
    else:
        mod = utils.op_build_test(Cast, [build_shape], [srcType],
                                  op_attrs,
                                  kernel_name='cast',
                                  attrs=attrs)
        args, exp_output, input = gen_data(dstType, shape, srcType)
        if attrs.get("dynamic"):
            for i in range(len(shape)):
                args.append(shape[i])
            block_dim = compute_blockdim(shape)
            args.append(block_dim)
        acu_output = utils.mod_launch(mod,
                                      args,
                                      outputs=(1, ),
                                      expect=exp_output)
        # compare result
        rtol, atol = get_rtol_atol("cast", dstType)
        TestCase_Result = compare_tensor(acu_output,
                                         exp_output,
                                         rtol=rtol,
                                         atol=atol,
                                         equal_nan=True)

        if attrs.get("profiling", False):
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array(args, akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        return input, acu_output, exp_output, TestCase_Result
예제 #17
0
def equal_run(shapes,
              dtype,
              kernel_name="equal",
              attrs_op={},
              cce_path="./",
              attrs={}):
    attrs.update(attrs_op)
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(Equal,
                                  shapes, [dtype, dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            benchMark1, inputs1, output1 = gen_data(dtype, shapes)
            return mod, benchMark1, inputs1 + [output1]
        else:
            return mod
    else:
        mod = utils.op_build_test(Equal,
                                  shapes, [dtype, dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs)
        benchMark1, inputs1, output1 = gen_data(dtype, shapes)
        output1 = utils.mod_launch(mod, inputs1 + [output1], expect=benchMark1)
        if attrs.get("profiling", False):
            import akg
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array([inputs1, output1],
                                        akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        # Also test the case where the inputs are equal
        if shapes[0] == shapes[1]:
            inputs2 = []
            inputs2.append(inputs1[0])
            inputs2.append(inputs1[0])
            benchMark2 = np.equal(inputs2[0], inputs2[1])
            output2 = np.full(benchMark2.shape, 0, bool)
            output2 = utils.mod_launch(mod,
                                       inputs2 + [output2],
                                       expect=benchMark1)
            testPass = (np.array_equal(output1, benchMark1)
                        and np.array_equal(output2, benchMark2))
            return (inputs1, inputs2), (output1,
                                        output2), (benchMark1,
                                                   benchMark2), testPass
        else:
            return inputs1, output1, benchMark1, np.array_equal(
                output1, benchMark1)
예제 #18
0
def get_result(desc, poly, attrs=None, profiling=True, need_compare=True):
    backend = _get_backend(desc)

    mod = composite.build(desc, attrs, poly=poly)
    if not need_compare:
        return True
    input_for_mod, expect, output_indexes = gen_json_data(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)
    # In profiling mode, mod_launch will return compute outputs and profiling value, only compute outputs needed here
    if isinstance(output, tuple) and len(output) > 0 and isinstance(
            output[-1], dict):
        output = output[0]
    output = output if isinstance(output, (list, tuple)) else [output]
    expect = expect if isinstance(expect, (list, tuple)) else [expect]
    output = list(output)
    expect = list(expect)
    for i, _ in enumerate(expect):
        if expect[i].dtype == "complex128" or expect[i].dtype == "complex64":
            final_shape = functools.reduce(lambda x, y: x * y, output[i].shape)
            flattern_output = output[i].reshape((final_shape, ))
            output_real = []
            output_imag = []
            for k, _ in enumerate(flattern_output):
                if k % 2 == 0:
                    output_real.append(flattern_output[k])
                else:
                    output_imag.append(flattern_output[k])
            output[i] = np.vectorize(complex)(output_real, output_imag)
            output[i] = output[i].reshape(expect[i].shape)
    if len(output) != len(expect):
        raise RuntimeError(
            "output and expect have different length, {} vs {}".format(
                len(output), len(expect)))

    compare_tolerance = get_compare_tolerance(desc, output_indexes)
    compare_res = list(map(_compare_func, output, expect, compare_tolerance))
    if not all(compare_res):
        source = (mod.imported_modules[0]
                  if backend == "cuda" else mod).get_source()
        logging.debug(source)
        _dump_info(desc, attrs, poly, input_for_mod, output, expect)
        logging.warning("Compare results: %s", str(compare_res))
        return False
    if profiling and backend in ["cuda", "cpu"]:
        ctx = tvm.context(backend, 0)
        has_complex = False
        for i in input_for_mod:
            if i.dtype == "complex64" or i.dtype == "complex128":
                has_complex = True
                break
        if has_complex == False:
            inputs = to_tvm_nd_array(input_for_mod, ctx)
            target_profiling(mod, *inputs, target=backend, repeat_time=1000)
    return True
예제 #19
0
def cumsum_run(shape,
               dtype,
               axis=0,
               exclusive=False,
               reverse=False,
               poly_sch=True,
               attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}

    def cumsum(data):
        op_attrs = {
            "axis": axis if isinstance(axis, list) else [axis],
            "exclusive": exclusive,
            "reverse": reverse
        }
        return cumsum_ir_builder([
            data,
        ], op_attrs)

    mod = utils.op_build_test(cumsum, [shape], [dtype],
                              kernel_name="cumsum",
                              polyhedral=poly_sch,
                              attrs=attrs)

    data, output, expect = gen_data(shape, dtype, axis, exclusive, reverse)
    output = utils.mod_launch(mod, (data, output), expect=expect)
    ret = compare_tensor(output,
                         expect,
                         rtol=5e-03,
                         atol=1.e-8,
                         equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    target_name = attrs["target"].split()[0]
    if not ret:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        args_list = to_tvm_nd_array([data, output],
                                    akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args_list,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return data, output, expect, ret
예제 #20
0
def abs_run(shape, dtype, attrs={}):
    # Result_Numpy
    input_shape = [shape]
    input_dtype = [dtype]

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(Abs,
                                  input_shape,
                                  input_dtype,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            exp_output, inputs, output = gen_date(dtype, shape)
            return mod, exp_output, (inputs, output)
        else:
            return mod
    else:
        mod = utils.op_build_test(Abs,
                                  input_shape,
                                  input_dtype,
                                  kernel_name='abs',
                                  attrs=attrs)
        exp_output, inputs, output = gen_date(dtype, shape)
        acu_output = utils.mod_launch(mod, (inputs, output), expect=exp_output)

        # compare result
        rtol, atol = get_rtol_atol("abs", dtype)
        TestCase_Result = compare_tensor(acu_output,
                                         exp_output,
                                         rtol=rtol,
                                         atol=atol,
                                         equal_nan=True)

        target_name = attrs["target"].split()[0]
        if attrs.get("profiling", False):
            target_name = attrs["target"].split()[0]
            data, output = to_tvm_nd_array([inputs, output],
                                           akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             data,
                             output,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])

        return inputs, acu_output, exp_output, TestCase_Result
예제 #21
0
def fused_bn_update_run(in_shape,
                        dtype="float32",
                        c1=(1 / (256 * 7 * 7)),
                        c2=1.001e-05,
                        c3=1.00007975,
                        c4=0.100000024,
                        poly_sch=True,
                        attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    inputs = gen_data(in_shape, dtype)
    expect = compute_expect(inputs, c1, c2, c3, c4)
    op_attrs = [dtype, c1, c2, c3, c4]
    shapes = [in_shape] * 4
    dtypes = [dtype] * 4
    mod = utils.op_build_test(fused_bn_update,
                              shapes,
                              dtypes,
                              kernel_name="fused_bn_update",
                              op_attrs=op_attrs,
                              polyhedral=poly_sch,
                              attrs=attrs)

    outputs = [np.full(in_shape, np.nan, dtype)] * 3
    attrs_list = inputs + outputs
    output = utils.mod_launch(mod,
                              attrs_list,
                              outputs=(range(-len(outputs), 0)),
                              expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        attrs_list = to_tvm_nd_array(attrs_list,
                                     akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *attrs_list,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return inputs, outputs, expect, res
예제 #22
0
def one_hot_run(shape,
                depth,
                dtype,
                on_value,
                off_value,
                axis,
                poly_sch=True,
                attrs=None):
    if not attrs:
        attrs = {"target": "cce"}
    if attrs["target"] == CCE:
        return one_hot_ascend(shape, depth, dtype, on_value, off_value, axis,
                              attrs)

    mod = utils.op_build_test(
        one_hot, [shape], [dtype],
        kernel_name="one_hot",
        op_attrs=[on_value, off_value, depth, axis, dtype],
        polyhedral=poly_sch,
        attrs=attrs)
    # gen data
    expect, data_tmp, _, _, output = gen_data(axis, depth, dtype, shape,
                                              on_value, off_value)
    data = data_tmp.astype(dtype)
    output = utils.mod_launch(mod, (data, output), expect=expect)
    res = compare_tensor(output,
                         expect,
                         rtol=5e-03,
                         atol=1.e-8,
                         equal_nan=True)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")
    if attrs["profiling"]:
        target_name = attrs["target"].split()[0]
        args_list = to_tvm_nd_array([data, output],
                                    akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args_list,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return data, output, expect, res
예제 #23
0
def fused_bn_reduce_grad_run(in_shape,
                             layout='NHWC',
                             in_dtype="float16",
                             out_dtype='float16',
                             poly_sch=True,
                             attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    if layout != "NHWC" and layout != "NCHW":
        raise NotImplementedError('Layout not supported {} '.format(layout))

    inter_dtype = 'float32'
    inputs, output, expect = gen_data(in_shape, in_dtype, inter_dtype, layout,
                                      out_dtype)
    input_shape_list = [i.shape for i in inputs]
    input_dtype_list = [inter_dtype] * 3 + [in_dtype
                                            ] + [inter_dtype] * 3 + [in_dtype]
    op_attrs = [layout, out_dtype]
    mod = utils.op_build_test(fused_bn_reduce_grad,
                              input_shape_list,
                              input_dtype_list,
                              kernel_name="fused_bn_reduce_grad",
                              op_attrs=op_attrs,
                              polyhedral=poly_sch,
                              attrs=attrs)

    outputs = [output]
    arglist = inputs + outputs
    output = utils.mod_launch(mod, arglist, expect=expect)

    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        arglist = to_tvm_nd_array(arglist, akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *arglist,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return inputs, outputs, expect, res
예제 #24
0
def gather_run(shape1,
               dtype1,
               shape2,
               dtype2,
               axis,
               poly_sch=True,
               attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    op_attrs = [axis]
    mod = utils.op_build_test(gather, [shape1, shape2], [dtype1, dtype2],
                              op_attrs=op_attrs,
                              polyhedral=poly_sch,
                              attrs=attrs,
                              kernel_name="gather")

    # gen data
    params, indices, expect = gen_data(shape1, dtype1, shape2, dtype2, axis)
    output_shape = expect.shape

    if len(expect.shape) == 0:
        output_shape = (1, )
    output = np.zeros(output_shape, expect.dtype)
    output = utils.mod_launch(mod, (params, indices, output), expect=expect)
    atol, rtol = get_rtol_atol("gather", dtype1)
    res = compare_tensor(output, expect, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        params, indices, output = to_tvm_nd_array([params, indices, output],
                                                  akg.tvm.context(
                                                      target_name, 0))
        target_profiling(mod,
                         params,
                         indices,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (params, indices), output, expect, res
예제 #25
0
def reduce_max_run(shape,
                   dtype,
                   axis,
                   keepdims,
                   kernel_name="reduce_max",
                   attrs=None):
    """run function for dsl function reduce_max"""
    if attrs is None:
        attrs = {}

    op_attrs = [axis, keepdims]

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(reduce_max, [shape], [dtype],
                                  op_attrs=op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            expect, inputs, output = gen_data(axis, dtype, keepdims, shape)
            return mod, expect, (inputs, output)

        return mod

    mod = utils.op_build_test(reduce_max, [shape], [dtype],
                              op_attrs=op_attrs,
                              kernel_name=kernel_name,
                              attrs=attrs)
    expect, inputs, output = gen_data(axis, dtype, keepdims, shape)
    output = utils.mod_launch(mod, (inputs, output), expect=expect)
    rtol, atol = get_rtol_atol("reduce_max", dtype)
    if attrs.get("profiling", False):
        import akg
        target_name = attrs["target"].split()[0]
        args_list = to_tvm_nd_array([inputs, output],
                                    akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args_list,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return inputs, output, expect, compare_tensor(output,
                                                  expect,
                                                  rtol=rtol,
                                                  atol=atol,
                                                  equal_nan=True)
예제 #26
0
def unsorted_segment_sum_run_others(data_shape,
                                    data_type,
                                    indices_shape,
                                    indices_type,
                                    num,
                                    attrs=None):
    mod = unsortedsegmentsum_compile(data_shape,
                                     indices_shape,
                                     num,
                                     data_type,
                                     attrs,
                                     kernel_name='unsortedsegmentsum_run',
                                     tuning=False)
    # gen data
    input1, input2, expect = gen_data(data_shape, data_type, indices_shape,
                                      indices_type, num)
    output_shape = expect.shape

    if len(expect.shape) == 0:
        output_shape = (1, )
    #output = np.full(output_shape, np.nan, expect.dtype)
    output = np.zeros(output_shape, expect.dtype)
    output = utils.mod_launch(mod, (input1, input2, output), expect=expect)

    atol, rtol = get_rtol_atol("unsorted_segment_sum", data_type)
    res = compare_tensor(output, expect, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        input1, input2, output = to_tvm_nd_array([input1, input2, output],
                                                 akg.tvm.context(
                                                     target_name, 0))
        target_profiling(mod,
                         input1,
                         input2,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (input1, input2), output, expect, res
예제 #27
0
def assign_run(ref_shape,
               val_shape,
               dtype,
               kernel_name="assign",
               attrs_op={},
               cce_path="./",
               attrs={}):
    attrs.update(attrs_op)
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(Assign, [ref_shape, val_shape],
                                  [dtype, dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            ref, val, expect = gen_data(dtype, ref_shape, val_shape)
            return mod, expect, (ref, val)
        else:
            return mod
    else:
        ref, val, expect = gen_data(dtype, ref_shape, val_shape)
        mod = utils.op_build_test(Assign, [ref_shape, val_shape],
                                  [dtype, dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs)
        fake_output = np.full(val_shape, np.nan, dtype)
        result, _ = utils.mod_launch(mod, (ref, val, fake_output),
                                     outputs=(0, -1),
                                     expect=expect)
        if attrs.get("profiling", False):
            target_name = attrs["target"].split()[0]
            ref, val, output = to_tvm_nd_array([ref, val, fake_output],
                                               akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             ref,
                             val,
                             output,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        return (ref, val), result, expect, compare_tensor(result,
                                                          expect,
                                                          atol=5e-01,
                                                          rtol=5e-03,
                                                          equal_nan=True)
예제 #28
0
def reduce_sum_run(shape, reduce_axis, keepdims, dtype, attrs):
    if attrs is None:
        attrs = {}
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = sum_compile(shape,
                          reduce_axis,
                          keepdims,
                          dtype,
                          attrs,
                          kernel_name=kernel_name,
                          tuning=t)
        if t:
            expect, input1, output = gen_data(dtype, keepdims, reduce_axis,
                                              shape)
            return mod, expect, (input1, output)
        else:
            return mod
    else:
        # op_attrs = [reduce_axis, keepdims]
        mod = sum_compile(shape, reduce_axis, keepdims, dtype, attrs)
        expect, input1, output = gen_data(dtype, keepdims, reduce_axis, shape)
        args = [input1, output]
        if attrs.get("dynamic"):
            for i in range(len(shape)):
                args.append(shape[i])
            block_dim = compute_blockdim(shape)
            args.append(block_dim)
        output = utils.mod_launch(mod, args, outputs=(1, ), expect=expect)
        if attrs.get("profiling", False):
            import akg
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array([input1, output],
                                        akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             epeat_time=attrs["repeat_times"])
        rtol, atol = get_rtol_atol("sum", dtype)
        return input1, output, expect, compare_tensor(output,
                                                      expect,
                                                      rtol=rtol,
                                                      atol=atol,
                                                      equal_nan=True)
예제 #29
0
def log_run(shape, dtype, kernel_name, attrs_op=None, attrs=None):
    input_shape = [shape]
    input_dtype = [dtype]
    if attrs_op is not None:
        if attrs is not None:
            attrs.update(attrs_op)
        else:
            attrs = attrs_op
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(log,
                                  input_shape,
                                  input_dtype,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            expect, input_, output = gen_data(dtype, shape)
            return mod, expect, (input_, output)
        else:
            return mod
    else:
        mod = utils.op_build_test(log,
                                  input_shape,
                                  input_dtype,
                                  kernel_name=kernel_name,
                                  attrs=attrs)
        expect, input_, output = gen_data(dtype, shape)
        output = utils.mod_launch(mod, (input_, output), expect=expect)
        rtol, atol = get_rtol_atol("log", dtype)
        if attrs.get("profiling", False):
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array([input_, output],
                                        akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        return input_, output, expect, compare_tensor(output,
                                                      expect,
                                                      rtol=rtol,
                                                      atol=atol,
                                                      equal_nan=True)
예제 #30
0
def conv_run(shape_data, shape_weight, stride=(1,1), padding=(0,0,0,0), dilation=(1,1), dtype="float16",
        out_dtype="float16", layout="NHWC", tensor_core=True, poly_sch=True, attrs=None):
    if layout != "NHWC" and layout != "NCHW":
        raise ValueError("Layout NHWC and NCHW supported")
    use_tensor_core = False
    if tensor_core and layout == "NHWC" and dtype == "float16":
        use_tensor_core = True
    op_attrs = [stride, padding, dilation]
    default_attrs = {"target": "cuda", "enable_auto_fuse": False}
    if attrs:
        default_attrs.update(attrs)
    if use_tensor_core:
        op_attrs += [out_dtype]
        default_attrs.update({"pragma_enable_matmul": True, "pragma_enable_conv_tensor_core": True})
        if poly_sch:
            mod = utils.op_build_test(
                TensorcoreConv, (shape_data, shape_weight), (dtype, dtype),
                op_attrs=op_attrs, attrs=default_attrs, kernel_name="tensorcore_conv_auto")
    elif poly_sch:
        mod = utils.op_build_test(Conv, (shape_data, shape_weight), (dtype, dtype),
                                    op_attrs=op_attrs, attrs=default_attrs, kernel_name="conv_auto")

    data, weight, output, expect = gen_data(
        shape_data, shape_weight, layout, stride, padding, dilation, dtype, out_dtype)
    args = (data, weight, output)
    output = utils.mod_launch(mod, args, expect=expect)
    rtol = 1e-3 if dtype == "float16" else 1e-4
    atol = 1e-3 if dtype == "float16" else 1e-4
    res = np.allclose(output, expect, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Fail"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        data, weight, output = to_tvm_nd_array(
            [data, weight, output], akg.tvm.context(target_name, 0))
        target_profiling(mod, data, weight, output, target=target_name, repeat_time=attrs["repeat_times"])
    return (data, weight), output, expect, res