Beispiel #1
0
def _atan_compute(data):
    """compute for atan"""
    dtype = data.dtype

    if dtype == "float16":
        data = topi.cast(data, "float32")

    abs_data = topi.abs(data)
    tensor_one = dc.one_const(abs_data.dtype)

    abs_data_sub_one = topi.subtract(abs_data, tensor_one)
    abs_data_add_one = topi.add(abs_data, tensor_one)
    abs_data2 = topi.abs(topi.divide(abs_data_sub_one, abs_data_add_one))

    # calucate data less than one
    res = _do_atan_taylor(abs_data)
    # calucate data more than one
    res_mt_one = topi.add(_do_atan_taylor(abs_data2),
                          tvm.const(CONST_PI_BY_FOUR, abs_data2.dtype))
    res = topi.minimum(res, res_mt_one)

    if utils.product_is_mini() and data.dtype == "float32":
        sign_mask = topi.cast(topi.sign(topi.cast(data, "float16")), "float32")
    else:
        sign_mask = topi.sign(data)

    res = topi.multiply(res, sign_mask)

    if dtype == "float16":
        res = topi.cast(res, "float16")

    return res
Beispiel #2
0
def selu_compute(input_data):
    """selu compute implemention"""
    # if input_dtype is float16,convert it to float32
    dtype = input_data.dtype
    if dtype == "float16" or dtype == "float32":
        input_data = topi.cast(input_data, "float32")
        type_tmp = "float32"
    else:
        input_data = topi.cast(input_data, "float16")
        type_tmp = "float16"

    # generate tensor_zero to be compared
    tensor_zero = topi.multiply(input_data, tvm.const(0, dtype=type_tmp))
    # generate negative_res and positive_res to compute
    # When the element value is greater than 0 and less than 0
    negative_res = topi.minimum(input_data, tensor_zero)
    positive_res = topi.maximum(input_data, tensor_zero)
    exp_res = exp(negative_res)
    sub_res = topi.add(exp_res, tvm.const(SCALAR_NEGATIVE_ONE, dtype=type_tmp))
    negative_muls_res = topi.multiply(sub_res, tvm.const(SCALE_ALPHA_PRODUCT, dtype=type_tmp))
    if dtype == "int8":
        negative_muls_res = akg.lang.cce.ceil(negative_muls_res)

    positive_muls_res = topi.multiply(positive_res, tvm.const(SCALE, dtype=type_tmp))
    res = topi.add(negative_muls_res, positive_muls_res)
    # cast to ori_dtype
    if dtype == "float16" or dtype == "int8" or dtype == "int32":
        res = topi.cast(res, dtype)

    return res
Beispiel #3
0
def fake_quant_with_min_max_args(input_data,
                                 min_=-6,
                                 max_=6,
                                 num_bits=8,
                                 narrow_range=False):
    """
    Computes Fake-quantize the 'input_data' tensor,
    type float32 to 'output_data' tensor of same type

    output_data = (floor(clamped_shifted * inv_nudged_scale + 0.5f))) * scale
                  + nudged_min
    scale = (max-min) / (quant_max-quant_min)

    Args:
        data_x1 (tvm.tensor.Tensor): Tensor of dtype "float32"
        min ([float, int]): scalar, defaults to -6
        max ([float, int]): scalar, defaults to 6. [min; max] define the
                            clamping range for the input_data data
        num_bits ([float, int]): Defaults to 8. num_bits is the bitwidth
                                 of the quantization,between 2 and 16
        narrow_range ([bool]):
            True, quantized into the quantization range [1; 2^num_bits - 1]
            False,quantized into the quantization range [0; 2^num_bits - 1]

    Returns:
        tvm.tensor.Tensor
    """
    shape = get_shape(input_data)
    utils.check_shape(shape)

    dtype = input_data.dtype
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.FLOAT32)

    nudged_min, nudged_max, scale = nudge_min_max(min_, max_, num_bits,
                                                  narrow_range)

    zero_tensor = tvm.compute(input_data.shape,
                              lambda *i: tvm.const(0, dtype="float32"),
                              name="zero_tensor")
    nudged_max_tensor = topi.add(zero_tensor, nudged_max)
    nudged_min_tensor = topi.add(zero_tensor, nudged_min)
    inv_nudged_scale = 1.00 / scale

    # Transform the input between nudged_max and nudged_min
    clamped_vmin = topi.minimum(input_data, nudged_max_tensor)
    clamped = topi.maximum(clamped_vmin, nudged_min_tensor)

    # Calculate the quantized and dequantized results
    clamped_shifted = topi.subtract(clamped, nudged_min_tensor)
    vmul_shifted = topi.multiply(clamped_shifted, inv_nudged_scale)
    vadds_shifted = topi.add(vmul_shifted, 0.5)
    floor_vadds_shifted = floor(vadds_shifted)
    floor_cast = akg.lang.ascend.cast_to(floor_vadds_shifted, dtype)
    res_scale = topi.multiply(floor_cast, scale)
    res = topi.add(res_scale, nudged_min_tensor)

    return res
Beispiel #4
0
def my_dsl(dtype, kernel_name, attrs):
    m = tvm.var("M")
    n = tvm.var("N")
    A = tvm.placeholder((m, ), name="A", dtype=dtype)
    B = tvm.placeholder((m, ), name="B", dtype=dtype)

    if insn == "add":
        C = topi.add(A, B)
    elif insn == "sub":
        C = topi.subtract(A, B)
    if insn == "mul":
        C = topi.multiply(A, B)
    elif insn == "div":
        C = topi.divide(A, B)
    elif insn == "max":
        C = topi.maximum(A, B)
    elif insn == "min":
        C = topi.minimum(A, B)

    elif insn == "abs":
        C = tvm.compute(A.shape, lambda *index: tvm.abs(A(*index)), name='C')
    elif insn == "exp":
        C = topi.exp(A)
    elif insn == "log":
        C = topi.log(A)
    elif insn == "sqrt":
        C = topi.sqrt(A)
        C = topi.log(A)
    elif insn == "sqrt":
        C = topi.sqrt(A)

    elif insn == "adds":
        C = A + tvm.const(2, dtype)
    elif insn == "muls":
        C = A * tvm.const(2, dtype)

    # C = tvm.compute((m, ), lambda i: A[i] + B[i], name="C")
    s = tvm.create_schedule([C.op])
    with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True):
        if insnType == "binary":
            mod = akg.build(s, [A, B, C],
                            "cce",
                            name=kernel_name,
                            attrs=attrs,
                            polyhedral=True)
        else:
            mod = akg.build(s, [A, C],
                            "cce",
                            name=kernel_name,
                            attrs=attrs,
                            polyhedral=True)
    return mod
Beispiel #5
0
def _asin_compute(data_input):
    """Compute asin"""

    dtype = data_input.dtype
    boundary = tvm.const(BOUNDARY, "float32")

    # Change dtype to float32
    if dtype == "float16":
        data_input = topi.cast(data_input, "float32")

    # Sign mask
    data_sign = sign(data_input)

    # All positive
    data1 = topi.multiply(data_input, data_sign)

    # x belongs to (0, 2^(-0.5))
    choice_1 = topi.minimum(data1, boundary)
    choice_1 = topi.subtract(choice_1, boundary)
    choice_1_floor = akg.lang.cce.floor(choice_1)
    # the dtype of choice_1_floor is int32, need to be cast to fp32.
    if utils.product_is_mini():
        choice_1_floor = topi.cast(choice_1_floor, "float16")
        choice_1_floor = topi.cast(choice_1_floor, "float32")
    else:
        choice_1_floor = topi.cast(choice_1_floor, "float32")
    choice_1 = topi.multiply(choice_1_floor, neg_one_const("float32"))

    taylor1 = _taylor_compute(data1)
    res_1 = topi.multiply(taylor1, choice_1)

    # x belongs to (2^(-0.5), 1)
    choice_2 = topi.subtract(one_const("float32"), choice_1)
    data2 = topi.subtract(one_const("float32"), topi.multiply(data1, data1))
    data2_sqrt = _sqrt(data2)

    taylor2 = _taylor_compute(data2_sqrt, data2)

    res_2 = topi.multiply(taylor2, neg_one_const("float32"))
    res_2 = topi.add(res_2, tvm.const(HALF_PI, "float32"))
    res_2 = topi.multiply(res_2, choice_2)

    # Restore sign
    res_1 = topi.add(res_1, res_2)
    res_1 = topi.multiply(res_1, data_sign)

    # Restore dtype
    if dtype == "float16":
        res_1 = topi.cast(res_1, "float16")

    return res_1
Beispiel #6
0
def truncate_div_compute(input_x1, input_x2):
    """compute for truncate_div"""
    int_list = ("int32", "int8", "uint8")

    if input_x1.dtype in int_list:
        data_zero = dc.zero_const("float32")
        data_x_broad = cast(input_x1, "float32")
        data_y_broad = cast(input_x2, "float32")
        res_div = topi.divide(data_x_broad, data_y_broad)
        res_min_int = ceil(topi.minimum(res_div, data_zero))
        res_max_int = floor(topi.maximum(res_div, data_zero))
        res_trunc = topi.add(res_min_int, res_max_int)
        res_trunc = cast(res_trunc, "float32")
    else:
        res_trunc = topi.divide(input_x1, input_x2)

    return cast(res_trunc, input_x1.dtype)
Beispiel #7
0
def less_compare_float32(data_x, data_y):
    """if x is less than y, then return 1, else return 0"""
    shape_inputs = get_shape(data_x)
    # minimun num of float32 2**(-126)
    data_min = akg.lang.ascend.broadcast(tvm.const(2**(-126), dtype="float32"),
                                         shape_inputs, "float32")
    data_zero = akg.lang.ascend.broadcast(dc.zero_const("float32"),
                                          shape_inputs, "float32")
    res_sub = topi.subtract(data_y, data_x)
    res_min = topi.minimum(res_sub, data_min)
    res_max = topi.maximum(res_min, data_zero)
    # max num of float32 is 2**126
    # but cce can only support 2**62, so use 62 * 62 * 2 to adaptor 126
    res_mul_fierst = topi.multiply(res_max, tvm.const(2**62, dtype="float32"))
    res_mul_second = topi.multiply(res_mul_fierst,
                                   tvm.const(2**62, dtype="float32"))
    res = topi.multiply(res_mul_second, tvm.const(2**2, dtype="float32"))

    return res
Beispiel #8
0
def fake_quant_with_min_max_vars_per_channel_compute(input_data,
                                                     input_min,
                                                     input_max,
                                                     num_bits=8,
                                                     narrow_range=False):
    """fake_quant_with_min_max_vars_per_channel compute implemention"""
    shape = get_shape(input_data.shape)
    dtype = input_data.dtype
    min_broadcast = akg.lang.ascend.broadcast(input_min, shape, dtype)
    max_broadcast = akg.lang.ascend.broadcast(input_max, shape, dtype)
    # get nudged_min and nudged_max by nudged_min_max_compute function
    nudged_min_nudged_max = nudged_min_max_compute(min_broadcast,
                                                   max_broadcast, num_bits,
                                                   narrow_range)
    # transform the input between nudged_max and nudged_min
    clamped_tmp = topi.minimum(input_data, nudged_min_nudged_max[1])
    clamped = topi.maximum(clamped_tmp, nudged_min_nudged_max[0])

    # calculate the quantized and dequantized results
    clamped_shifted = topi.subtract(clamped, nudged_min_nudged_max[0])
    if product_is_mini():
        clamped_shifted_div_scale = mul(clamped_shifted,
                                        reciprocal(nudged_min_nudged_max[2]),
                                        target=utils.CCE)
    else:
        clamped_shifted_div_scale = Divide(clamped_shifted,
                                           nudged_min_nudged_max[2],
                                           target=utils.CCE)
    result_tmp = topi.add(clamped_shifted_div_scale, dc.half_const(dtype))
    floor_result_tmp = akg.lang.ascend.floor(result_tmp)
    if product_is_mini():
        floor_result_tmp = topi.cast(floor_result_tmp, "float16")

    floor_result_tmp = topi.cast(floor_result_tmp, "float32")
    scale_product = topi.multiply(floor_result_tmp, nudged_min_nudged_max[2])
    tmp_res = topi.add(scale_product, nudged_min_nudged_max[0])
    # get bool_both_zero_value by bool_both_zero_compute function
    bool_both_zero_value = bool_both_zero_compute(min_broadcast, max_broadcast)
    res = topi.multiply(tmp_res, bool_both_zero_value)

    return res
Beispiel #9
0
def _cmpare_value(input_data, nudged_min, nudged_max):
    """
    where((input_data<=nudged_max)&(x>=nudged_min),1,0)

    Args:  
        input_data (tvm.tensor.Tensor): Input data
        nudged_min (tvm.tensor.Tensor): Minimum value of comparison
        nudged_max (tvm.tensor.Tensor): Maximum value of comparison

    Returns:
        tvm.tensor.Tensor
    """
    min_value = tvm.const(2**(-126), dtype="float32")
    # (2**(-126))*(2**(62))*(2**(62))*(2**(2)) = 1
    # so min_value*max_value*max_value*max_value_one = 1
    max_value = tvm.const(2**(62), dtype="float32")
    max_value_one = tvm.const(2**(2), dtype="float32")
    data_zero = topi.multiply(input_data, 0)
    max_value_tensor = topi.add(data_zero, max_value)
    min_value_tensor = topi.add(data_zero, min_value)
    max_value_one_tensor = topi.add(data_zero, max_value_one)

    sub_tmp = topi.subtract(input_data, nudged_min)
    sub_min = topi.add(sub_tmp, min_value)
    vmax_tmp = topi.maximum(sub_min, data_zero)

    sub_tmp_max = topi.subtract(nudged_max, input_data)
    sub_max = topi.add(sub_tmp_max, min_value)
    vmin_tmp = topi.maximum(sub_max, data_zero)

    one_tmp = topi.multiply(vmax_tmp, vmin_tmp)
    one_min = topi.minimum(one_tmp, min_value_tensor)

    vmul_max_value = topi.multiply(one_min, max_value_tensor)
    vmul_max_value_one = topi.multiply(vmul_max_value, max_value_tensor)
    between_nudged_min_max = topi.multiply(vmul_max_value_one,
                                           max_value_one_tensor)

    return between_nudged_min_max
Beispiel #10
0
def _do_atan_taylor(data):
    """
    Taylor algorithm for atan.

        if x > 0 and x < tan(pi/8):
            atan(x) = x - x^3/3 + x^5/5 - x^7/7 ...
        elif x > tan(pi/8) and x < tan(pi/4):
            atan(x) = atan(y) + atan((x-y)/(1+xy))

    Args:
        data (tvm.tensor.Tensor): Input data.

    Returns:
        A tvm.tensor.Tensor of atan(x).
    """
    dtype = data.dtype

    tensor_offset = tvm.const(TAN_PI_BY_EIGHT, dtype)
    deno = topi.multiply(data, tvm.const(TAN_PI_BY_EIGHT, dtype))
    deno = topi.add(deno, dc.one_const(dtype))
    molecule = topi.subtract(data, tensor_offset)
    ddata = topi.divide(molecule, deno)
    ddata = topi.abs(ddata)

    square_ddata = topi.multiply(ddata, ddata)
    res = tvm.const(ATAN_TAYLOR_COEF[CONST_ITERTOR], dtype)
    for i in reversed(range(CONST_ITERTOR)):
        res = topi.multiply(res, square_ddata)
        res = topi.add(res, tvm.const(ATAN_TAYLOR_COEF[i], dtype))
    res = topi.multiply(res, ddata)
    res = topi.add(res, tvm.const(CONST_PI_BY_EIGHT, dtype))

    square_data = topi.multiply(data, data)
    res2 = tvm.const(ATAN_TAYLOR_COEF[CONST_ITERTOR2], dtype)
    for i in reversed(range(CONST_ITERTOR2)):
        res2 = topi.multiply(res2, square_data)
        res2 = topi.add(res2, tvm.const(ATAN_TAYLOR_COEF[i], dtype))
    return topi.minimum(res, topi.multiply(res2, data))