Ejemplo n.º 1
0
def bool_both_zero_compute(juduged_min, juduged_max):
    """if input min and max are both zero then output_data will be all zero,so need a juduge compute tensor"""
    dtype = juduged_min.dtype
    tensor_zero = topi.full(juduged_min.shape, dtype, dc.zero_const(dtype))
    min_abs = topi.abs(juduged_min)
    max_abs = topi.abs(juduged_max)
    min_max_replace = topi.add(min_abs, max_abs)
    # just check wether min and max are all zero, if true  return 0
    bool_min_max_product_less_zero = less_compare_float32(
        min_max_replace, tensor_zero)
    bool_min_max_product_more_zero = less_compare_float32(
        tensor_zero, min_max_replace)
    bool_both_zero = topi.add(bool_min_max_product_less_zero,
                              bool_min_max_product_more_zero)

    return bool_both_zero
Ejemplo n.º 2
0
def nudged_min_max_compute(min_broadcast, max_broadcast, num_bits,
                           narrow_range):
    """
    Calculate the maximum and minimum values of the quantization.

    Notes:
        Each channel scale[i] euqal to (max_broadcast[i] - min_broadcast[i]) / (quant_max - quant_min).
        Then compute nudged_zero_point:
                nudged_zero_point = floor(between_min_max_float + 0.5) + less_quant_min_float + more_quant_max_float,
        between_min_max_float is first calculated by:
                zero_point_from_min = (quant_min_float - min_broadcast) / scale,
        then between_min_max_float = zero_point_from_min, which min_broadcast <= zero_point_from_min <= max_broadcast.
        Besides, the value of less_quant_min_float is equal to quant_min or zero, zero_point_from_min < quant_min_float,
        the value is quant_min, else is 0. The same as more_quant_max_float.
        Finally according to scale and nudged_zero_point to compute nudged_min and nudged_max:
                 nudged_min = (quant_min - nudged_zero_point) * scale
                 nudged_max = (quant_max - nudged_zero_point) * scale

    Args:
        min_broadcast (tvm.tensor.Tensor): minimum value to be quantified for each channel.
        max_broadcast (tvm.tensor.Tensor): maximum value to be quantified for each channel.
        num_bits (int): num_bits is the bitwidth of the quantization, range [2,16].
        narrow_range (bool): if True, for each channel, quantized into the quantization range [0, 2^num_bits - 1] else
                      quantized into the quantization range [1, 2^num_bits - 1].

    Returns:
        nudged_min (tvm.tensor.Tensor): The same type and shape as min_broadcast.
        nudged_max (tvm.tensor.Tensor): The same type and shape as max_broadcast.
        scale (tvm.tensor.Tensor): The same type and shape as max_broadcast.
    """

    dtype = min_broadcast.dtype
    quant_min = 1 if narrow_range else 0
    quant_max = (2**num_bits) - 1

    # because of need compute each channel, so quant_min and quant_max need to broadcast.
    quant_min_float = topi.full(min_broadcast.shape, dtype,
                                tvm.const(quant_min, dtype))
    quant_max_float = topi.full(min_broadcast.shape, dtype,
                                tvm.const(quant_max, dtype))

    # caculate each channel max and min difference.
    max_sub_min = topi.subtract(max_broadcast, min_broadcast)
    quant_max_sub_quant_min = topi.subtract(quant_max_float, quant_min_float)
    # compute scale = (max_broadcast - min_broadcast) / (quant_max - quant_min)
    # and min_div_scale = min_broadcast / scale
    if product_is_mini():
        scale = mul(max_sub_min,
                    reciprocal(quant_max_sub_quant_min),
                    target=utils.CCE)
        min_div_scale = Mul(min_broadcast, reciprocal(scale), target=utils.CCE)
    else:
        scale = Divide(max_sub_min, quant_max_sub_quant_min, target=utils.CCE)
        min_div_scale = Divide(min_broadcast, scale, target=utils.CCE)

    # zero_point_from_min = quant_min_float - min_broadcast / scale
    zero_point_from_min = topi.subtract(quant_min_float, min_div_scale)
    # if zero_point_from_min < quant_min_float, bool_less_quant_min_float = 1 else 0
    bool_less_quant_min_float = less_compare_float32(zero_point_from_min,
                                                     quant_min_float)
    # if quant_max_float < zero_point_from_min, bool_more_quant_max_float = 1 else 0
    bool_more_quant_max_float = less_compare_float32(quant_max_float,
                                                     zero_point_from_min)

    # according to above bool param to select effective value
    less_quant_min_float = topi.multiply(quant_min_float,
                                         bool_less_quant_min_float)
    more_quant_max_float = topi.multiply(quant_max_float,
                                         bool_more_quant_max_float)

    # compute which num is not less than quant_min_float and not large than quant_max_float
    tensor_one = topi.full(min_broadcast.shape, dtype, dc.one_const(dtype))
    bool_not_less_quant_min_float = topi.subtract(tensor_one,
                                                  bool_less_quant_min_float)
    bool_not_more_quant_max_float = topi.subtract(tensor_one,
                                                  bool_more_quant_max_float)
    bool_between_min_max = topi.multiply(bool_not_less_quant_min_float,
                                         bool_not_more_quant_max_float)
    between_min_max_float = topi.multiply(zero_point_from_min,
                                          bool_between_min_max)
    # add 0.5 to num which min <= num <= max and then floor them.
    between_min_max_add_half_one = topi.add(between_min_max_float,
                                            dc.half_const(dtype))
    between_min_max_round = akg.lang.ascend.floor(between_min_max_add_half_one)
    if product_is_mini():
        between_min_max_round = topi.cast(between_min_max_round, "float16")

    between_min_max_round = topi.cast(between_min_max_round, "float32")

    # calculate the maximum and minimum values of the quantization
    nudged_zero_point_tmp = topi.add(less_quant_min_float,
                                     more_quant_max_float)
    nudged_zero_point = topi.add(nudged_zero_point_tmp, between_min_max_round)

    nudged_min_tmp = topi.subtract(quant_min_float, nudged_zero_point)
    nudged_max_tmp = topi.subtract(quant_max_float, nudged_zero_point)
    nudged_min = topi.multiply(nudged_min_tmp, scale)
    nudged_max = topi.multiply(nudged_max_tmp, scale)
    res = [nudged_min, nudged_max, scale]

    return res
Ejemplo n.º 3
0
def _erf_compute(input_x):
    r"""
    Compute erf.

    .. math::
        \operatorname{erf}(x) = sign(x) \left(
            1 - (a_1t+a_2t^2+a_3t^3+a_4t^4+a_5t^5) e^{-x^2} + \epsilon(|x|)
            \right), \\
        t = \dfrac{1}{1+p|x|} \\
        \left|\epsilon(|x|)\right| \le 1.5 \times 10^{-7} \\
        where \; p=.3275911 \quad a_1=.254829592 \quad a_2=-.284496736 \\
        a_3=1.421413741 \quad a_4=-1.453152027 \quad a_5=1.061405429

    Args:
        input_x (tvm.tensor.Tensor): Input tensor.

    Returns:
        tvm.tensor.Tensor as rational approximation.
    """

    dtype = input_x.dtype
    shape = get_shape(input_x)

    cst_one = dc.one_const("float32")
    cst_neg_one = dc.neg_one_const("float32")
    cst_p = tvm.const(SCALER_P, "float32")
    cst_a1 = tvm.const(SCALER_A1, "float32")
    cst_a2 = tvm.const(SCALER_A2, "float32")
    cst_a3 = tvm.const(SCALER_A3, "float32")
    cst_a4 = tvm.const(SCALER_A4, "float32")
    cst_a5 = tvm.const(SCALER_A5, "float32")
    fp16_max = tvm.const(SCALER_FP16_MAX, "float32")
    fp16_min = tvm.const(SCALER_FP16_MIN, "float32")

    if dtype == "float16":
        input_x = topi.cast(input_x, "float32")

    # calculate: sign = floor[(x*fp16max) / (|x*fp16max| + fp16min)]
    data_sign_vmuls = topi.multiply(input_x, fp16_max)
    data_sign_abs = topi.abs(data_sign_vmuls)
    data_adds = topi.add(data_sign_abs, fp16_min)
    data_sign_div = div(data_sign_vmuls, data_adds)
    data_round = round_value(data_sign_div)
    # mini device should cast to fp16 first
    if utils.product_is_mini():
        data_round = topi.cast(data_round, "float16")
    tensor_sign = topi.cast(data_round, "float32")

    # t = 1 / (1 + px)
    tensor_abs = topi.abs(input_x)
    one_plus_px = topi.add(cst_one, topi.multiply(tensor_abs, cst_p))
    data_t = div(topi.full(shape, "float32", 1.0), one_plus_px)

    # e^{-x^2}
    abs_square = topi.multiply(tensor_abs, tensor_abs)
    neg_square = topi.multiply(abs_square, cst_neg_one)
    exp_neg_square = exp(neg_square)

    # a1t + a2t^2 + a3t^3 + a4t^4 + a5t^5 = ((((a5t + a4)t + a3)t + a2)t + a1)t
    tmp_a5 = topi.multiply(cst_a5, data_t)
    tmp_a5a4 = topi.multiply(topi.add(tmp_a5, cst_a4), data_t)
    tmp_a5a4a3 = topi.multiply(topi.add(tmp_a5a4, cst_a3), data_t)
    tmp_a5a4a3a2 = topi.multiply(topi.add(tmp_a5a4a3, cst_a2), data_t)
    data_muladd = topi.multiply(topi.add(tmp_a5a4a3a2, cst_a1), data_t)

    # erf = sign(x) * (1 - data_muladd * e^{-x^2})
    erf_res = topi.multiply(
        tensor_sign,
        topi.add(
            cst_one,
            topi.multiply(cst_neg_one,
                          topi.multiply(data_muladd, exp_neg_square))))

    if dtype == "float16":
        erf_res = topi.cast(erf_res, dtype)

    return erf_res