Ejemplo n.º 1
0
def _tan_2x_multi(input_x, times):
    """calculating tan x by calculating tan (x/2^times) and using double angle formula multiple times"""
    # Calculate tan (x/2^times)
    if input_x.dtype == FLOAT_16 and utils.product_is_mini():
        input_x_divide = topi.multiply(input_x, tvm.const(1.0/(2.0**times), FLOAT_16))
        res = _tan_expand(input_x_divide)
    else:
        input_x_divide = topi.multiply(input_x, 1.0/(2.0**times))
        res = _tan_expand(input_x_divide)
    while times != 0:
        # using double angle formula: tan 2x = 2*tan x/(1-tan x*tan x)
        if input_x.dtype == FLOAT_16 and utils.product_is_mini():
            res_numerator = topi.multiply(res, tvm.const(2.0, FLOAT_16))
            tanx_square = topi.multiply(res, res)
            res_denominator = topi.add(topi.multiply(tanx_square, tvm.const(-1.0, FLOAT_16)), tvm.const(1.0, FLOAT_16))
        else:
            res_numerator = topi.multiply(res, 2.0)
            tanx_square = topi.multiply(res, res)
            res_denominator = topi.add(topi.multiply(tanx_square, -1.0), 1.0)

        if utils.product_is_mini():
            res = mul(res_numerator, reciprocal(res_denominator))
        else:
            res = div(res_numerator, res_denominator)
        times = times - 1
    return res
Ejemplo n.º 2
0
    def softmax_cross_entropy_with_logits(labels,
                                          logits,
                                          axis,
                                          reduction="mean",
                                          scale=1.0):
        max_logits = reduce_max(logits, axis, keepdims=True, target=utils.CCE)
        data_sub = sub(logits, max_logits, target=utils.CCE)
        akg.register_variables("minus_max", [logits], data_sub)
        data_exp = Exp(data_sub, target=utils.CCE)
        data_expsum = sum(data_exp, axis, keepdims=True, target=utils.CCE)
        data_expsum_log = log(data_expsum, target=utils.CCE)
        sub_value = sub(data_sub, data_expsum_log, target=utils.CCE)
        neg_labels = neg(labels, target=utils.CCE)
        cross_entropy = mul(neg_labels, sub_value, target=utils.CCE)
        # backprop: prob - labels, where prob = softmax(logits)
        prob = Exp(sub_value, target=utils.CCE)
        backprop = sub(prob, labels, target=utils.CCE)

        if reduction.lower() == "none":
            loss = sum_v2(cross_entropy, axis, keepdims=True)
        elif reduction.lower() == "mean":
            loss = sum_v2(cross_entropy, axis=None)
            factor = logits.shape[0].value
            loss = loss * akg.tvm.const(1 / factor, logits.dtype)
            backprop = backprop * akg.tvm.const(1 / factor, logits.dtype)
        elif reduction.lower() == "sum":
            loss = sum_v2(cross_entropy, axis=None)
        else:
            raise ValueError(
                "reduction method {0} is not supported".format(reduction))
        backprop = akg.topi.multiply(backprop,
                                     akg.tvm.const(scale, backprop.dtype))
        return loss, backprop
Ejemplo n.º 3
0
def _bessel_i1e_compute(input_data):
    """bessel i1e compute"""

    shape = vc_util.get_shape(input_data)
    dtype = input_data.dtype

    # chose the type of data in begin
    if dtype == "float16":
        input_data = cast(input_data, "float32")

    abs_data = abs_value(input_data)
    # compute bessel_i1e for data in (-3.75, 3.75)
    before_res = _before_res_compute(abs_data)
    # compute bessel_i1e for data in other domain
    after_res = _after_res_compute(abs_data)

    # As vcmp_lt and vsel instruction don't support fp32 on mini
    # It can be simplified by some methods, such as , "auto cast"
    if utils.product_is_mini():
        res = akg.tvm.compute(
            shape, lambda *indice: akg.tvm.expr.Select(
                abs_data[indice].astype("float16") < akg.tvm.const(
                    CONST_LIMIT, "float16"), before_res[indice].astype(
                        "float16"), after_res[indice].astype("float16")))
        res = cast(res, "float32")
    else:
        res = akg.tvm.compute(
            shape,
            lambda *indice: akg.tvm.expr.Select(abs_data[
                indice] < CONST_LIMIT, before_res[indice], after_res[indice]))
    data_sign = sign(input_data)
    res = mul(res, data_sign)
    if dtype == "float16":
        res = cast(res, "float16")
    return res
def mul_unsortedsegmentsum(input1, input2, ids_tensor, num_segments):
    import akg.tvm
    temp = mul.mul(input1, input2)
    output = unsortedsegmentsum.unsortedsegmentsum(temp, ids_tensor,
                                                   num_segments)[0]
    output = akg.tvm.compute(output.shape, lambda *i: output(*i),
                             "fused_mul_unsorted")
    return output
Ejemplo n.º 5
0
def Mul(x, x_shape, y, y_shape, data_format=None):
    """mul"""
    if data_format:
        x_new = broadcast_by_format(x, x_shape, data_format[0], y_shape)
        y_new = broadcast_by_format(y, y_shape, data_format[1], x_shape)
    else:
        x_new = x
        y_new = y

    return mul.mul(x_new, y_new)
Ejemplo n.º 6
0
def _after_res_compute(abs_data):
    """
    compute bessel_i1e for abs value of data greater than or equal to 3.75

    Algrithm:
    t = 3.75 / x
    I1(x) = (1 / sqrt(x))*(0.39894228 - 0.03988024t - 0.00362018t^2
                           + 0.00163801t^3 - 0.01031555t^4 + 0.02282967t^5
                           - 0.02895312t^6 + 0.01787654t^7 - 0.00420059t^8)
    """
    broad_const_limit = akg.lang.cce.broadcast(
        akg.tvm.const(CONST_LIMIT, abs_data.dtype), abs_data.shape)
    data = div(broad_const_limit, abs_data)
    after_res = topi.multiply(data, ITR_AFTER[LEN_AFTER - 1])
    after_res = topi.add(after_res, ITR_AFTER[LEN_AFTER - 2])
    for iter_number in ITR_AFTER[LEN_AFTER - 3::-1]:
        after_res = mul(after_res, data)
        after_res = topi.add(after_res, iter_number)
    abs_data_rsqrt = rsqrt(abs_data)
    after_res = mul(after_res, abs_data_rsqrt)
    return after_res
Ejemplo n.º 7
0
def _before_res_compute(abs_data):
    """
    compute bessel_i1e for abs value of data less than or equal to 3.75

    Algrithm:
    t = x / 3.75
    I1(x) = e^-|x|*x*(0.5 + 0.87890594t^2 + 0.51498869t^4 + 0.15084934t^6
                    + 0.02658773t^8 + 0.00301532t^10 + 0.00032411t^12)
    """

    data = topi.multiply(abs_data, 1.0 / CONST_LIMIT)
    data_square = mul(data, data)
    before_res = topi.multiply(data_square, ITR_BEFORE[LEN_BEFORE - 1])
    before_res = topi.add(before_res, ITR_BEFORE[LEN_BEFORE - 2])
    for iter_number in ITR_BEFORE[LEN_BEFORE - 3::-1]:
        before_res = mul(before_res, data_square)
        before_res = topi.add(before_res, iter_number)
    exp_value = exp(neg(abs_data))
    before_res = mul(before_res, exp_value)
    before_res = mul(before_res, abs_data)
    return before_res
Ejemplo n.º 8
0
def sigmoid_cross_entropy_with_logits(labels=None, logits=None):
    ##
    # \brief Computes sigmoid cross entropy given `logits`.
    #
    # \f[
    #   cost = lables * -log(sigmoid(logits)) + (1 - lables) * -log(1 - sigmoid(logits))
    # \f]
    # \param labels akg.tvm.Tensor of the same type and shape as `logits`.
    # \param  logits akg.tvm.Tensor of type float16, float32
    #
    # \return akg.tvm.Tensor of the same shape as `logits` with the componentwise logistic losses.
    ##

    if get_shape(logits) != get_shape(labels):
        raise ValueError(
            "logits and labels must have the same shape  (%s vs %s)" %
            (get_shape(logits), get_shape(labels)))
    if logits.dtype != labels.dtype:
        raise ValueError(
            "logits and labels must have the same dtype  (%s vs %s)" %
            (logits.dtype, labels.dtype))

    shape = logits.shape
    dtype = logits.dtype

    check_list = ["float16", "float32"]
    if not (dtype.lower() in check_list):
        raise RuntimeError(
            "sigmoid_cross_entropy_with_logits only support %s while dtype is %s"
            % (",".join(check_list), dtype))

    #    z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
    # =  z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
    # =  max(x, 0) - x * z + log(1 + exp(-abs(x)))

    zero = akg.tvm.const(0, dtype=dtype)
    relu_logits = akg.tvm.compute(
        shape,
        lambda *indice: akg.tvm.expr.Select(
            logits(*indice) < zero, zero, logits(*indice)),
        name="relu_logits")
    neg_abs_logits = akg.tvm.compute(
        shape,
        lambda *indice: akg.tvm.expr.Select(
            logits(*indice) < zero, logits(*indice),
            logits(*indice) * -1),
        name="neg_abs_logits")
    sigmoid_logits = exp(neg_abs_logits) + akg.tvm.const(1, dtype=dtype)
    ln_sigmoid_logits = log(sigmoid_logits)
    logits_mul_lables = mul(logits, labels)
    res = relu_logits - logits_mul_lables + ln_sigmoid_logits
    return res
Ejemplo n.º 9
0
def mul_conv(data,
             fmap_shape,
             filter_shape,
             pad_,
             stride_,
             dilation_,
             bypass_l1=False,
             use_bias=False,
             block_size=16,
             attrs=None):
    a1 = data[0]
    a2 = data[1]
    b = data[2]
    a = mul.mul(data[0], data[1])
    if use_bias:
        conv_data = [a, b, data[3]]
    else:
        conv_data = [a, b]
    res = conv.conv(conv_data, fmap_shape, filter_shape, pad_, stride_,
                    dilation_, use_bias, block_size, attrs)
    return res
def fake_quant_with_min_max_vars_per_channel_compute(input_data,
                                                     input_min,
                                                     input_max,
                                                     num_bits=8,
                                                     narrow_range=False):
    """fake_quant_with_min_max_vars_per_channel compute implemention"""
    shape = get_shape(input_data.shape)
    dtype = input_data.dtype
    min_broadcast = akg.lang.cce.broadcast(input_min, shape, dtype)
    max_broadcast = akg.lang.cce.broadcast(input_max, shape, dtype)
    # get nudged_min and nudged_max by nudged_min_max_compute function
    nudged_min_nudged_max = nudged_min_max_compute(min_broadcast,
                                                   max_broadcast, num_bits,
                                                   narrow_range)
    # transform the input between nudged_max and nudged_min
    clamped_tmp = topi.minimum(input_data, nudged_min_nudged_max[1])
    clamped = topi.maximum(clamped_tmp, nudged_min_nudged_max[0])

    # calculate the quantized and dequantized results
    clamped_shifted = topi.subtract(clamped, nudged_min_nudged_max[0])
    if utils.product_is_mini():
        clamped_shifted_div_scale = mul(clamped_shifted,
                                        reciprocal(nudged_min_nudged_max[2]))
    else:
        clamped_shifted_div_scale = div(clamped_shifted,
                                        nudged_min_nudged_max[2])
    result_tmp = topi.add(clamped_shifted_div_scale, dc.half_const(dtype))
    floor_result_tmp = akg.lang.cce.floor(result_tmp)
    if utils.product_is_mini():
        floor_result_tmp = topi.cast(floor_result_tmp, "float16")

    floor_result_tmp = topi.cast(floor_result_tmp, "float32")
    scale_product = topi.multiply(floor_result_tmp, nudged_min_nudged_max[2])
    tmp_res = topi.add(scale_product, nudged_min_nudged_max[0])
    # get bool_both_zero_value by bool_both_zero_compute function
    bool_both_zero_value = bool_both_zero_compute(min_broadcast, max_broadcast)
    res = topi.multiply(tmp_res, bool_both_zero_value)

    return res
Ejemplo n.º 11
0
def mean_mul(first_input, second_input, axis=None, keepdims=False):
    temp, _ = mean.mean(first_input, axis, keepdims)
    output = mul.mul(temp, second_input)
    return output
Ejemplo n.º 12
0
def mul_mean(first_input, second_input, axis=None, keepdims=False):
    temp = mul.mul(first_input, second_input)
    output, _ = mean.mean(temp, axis, keepdims)
    return output
Ejemplo n.º 13
0
def mul_sub_mutioutput(first_input, second_input, third_input):
    temp = mul.mul(first_input, second_input)
    output = sub.sub(temp, third_input)
    return [temp, output]
def nudged_min_max_compute(min_broadcast, max_broadcast, num_bits,
                           narrow_range):
    """
    Calculate the maximum and minimum values of the quantization.

    Notes:
        Each channel scale[i] euqal to (max_broadcast[i] - min_broadcast[i]) / (quant_max - quant_min).
        Then compute nudged_zero_point:
                nudged_zero_point = floor(between_min_max_float + 0.5) + less_quant_min_float + more_quant_max_float,
        between_min_max_float is first calculated by:
                zero_point_from_min = (quant_min_float - min_broadcast) / scale,
        then between_min_max_float = zero_point_from_min, which min_broadcast <= zero_point_from_min <= max_broadcast.
        Besides, the value of less_quant_min_float is equal to quant_min or zero, zero_point_from_min < quant_min_float,
        the value is quant_min, else is 0. The same as more_quant_max_float.
        Finally according to scale and nudged_zero_point to compute nudged_min and nudged_max:
                 nudged_min = (quant_min - nudged_zero_point) * scale
                 nudged_max = (quant_max - nudged_zero_point) * scale

    Args:
        min_broadcast (tvm.tensor.Tensor): minimum value to be quantified for each channel.
        max_broadcast (tvm.tensor.Tensor): maximum value to be quantified for each channel.
        num_bits (int): num_bits is the bitwidth of the quantization, range [2,16].
        narrow_range (bool): if True, for each channel, quantized into the quantization range [0, 2^num_bits - 1] else
                      quantized into the quantization range [1, 2^num_bits - 1].

    Returns:
        nudged_min (tvm.tensor.Tensor): The same type and shape as min_broadcast.
        nudged_max (tvm.tensor.Tensor): The same type and shape as max_broadcast.
        scale (tvm.tensor.Tensor): The same type and shape as max_broadcast.
    """

    dtype = min_broadcast.dtype
    quant_min = 1 if narrow_range else 0
    quant_max = (2**num_bits) - 1

    # because of need compute each channel, so quant_min and quant_max need to broadcast.
    quant_min_float = topi.full(min_broadcast.shape, dtype,
                                tvm.const(quant_min, dtype))
    quant_max_float = topi.full(min_broadcast.shape, dtype,
                                tvm.const(quant_max, dtype))

    # caculate each channel max and min difference.
    max_sub_min = topi.subtract(max_broadcast, min_broadcast)
    quant_max_sub_quant_min = topi.subtract(quant_max_float, quant_min_float)
    # compute scale = (max_broadcast - min_broadcast) / (quant_max - quant_min)
    # and min_div_scale = min_broadcast / scale
    if utils.product_is_mini():
        scale = mul(max_sub_min, reciprocal(quant_max_sub_quant_min))
        min_div_scale = mul(min_broadcast, reciprocal(scale))
    else:
        scale = div(max_sub_min, quant_max_sub_quant_min)
        min_div_scale = div(min_broadcast, scale)

    # zero_point_from_min = quant_min_float - min_broadcast / scale
    zero_point_from_min = topi.subtract(quant_min_float, min_div_scale)
    # if zero_point_from_min < quant_min_float, bool_less_quant_min_float = 1 else 0
    bool_less_quant_min_float = less_compare_float32(zero_point_from_min,
                                                     quant_min_float)
    # if quant_max_float < zero_point_from_min, bool_more_quant_max_float = 1 else 0
    bool_more_quant_max_float = less_compare_float32(quant_max_float,
                                                     zero_point_from_min)

    # according to above bool param to select effective value
    less_quant_min_float = topi.multiply(quant_min_float,
                                         bool_less_quant_min_float)
    more_quant_max_float = topi.multiply(quant_max_float,
                                         bool_more_quant_max_float)

    # compute which num is not less than quant_min_float and not large than quant_max_float
    tensor_one = topi.full(min_broadcast.shape, dtype, dc.one_const(dtype))
    bool_not_less_quant_min_float = topi.subtract(tensor_one,
                                                  bool_less_quant_min_float)
    bool_not_more_quant_max_float = topi.subtract(tensor_one,
                                                  bool_more_quant_max_float)
    bool_between_min_max = topi.multiply(bool_not_less_quant_min_float,
                                         bool_not_more_quant_max_float)
    between_min_max_float = topi.multiply(zero_point_from_min,
                                          bool_between_min_max)
    # add 0.5 to num which min <= num <= max and then floor them.
    between_min_max_add_half_one = topi.add(between_min_max_float,
                                            dc.half_const(dtype))
    between_min_max_round = akg.lang.cce.floor(between_min_max_add_half_one)
    if utils.product_is_mini():
        between_min_max_round = topi.cast(between_min_max_round, "float16")

    between_min_max_round = topi.cast(between_min_max_round, "float32")

    # calculate the maximum and minimum values of the quantization
    nudged_zero_point_tmp = topi.add(less_quant_min_float,
                                     more_quant_max_float)
    nudged_zero_point = topi.add(nudged_zero_point_tmp, between_min_max_round)

    nudged_min_tmp = topi.subtract(quant_min_float, nudged_zero_point)
    nudged_max_tmp = topi.subtract(quant_max_float, nudged_zero_point)
    nudged_min = topi.multiply(nudged_min_tmp, scale)
    nudged_max = topi.multiply(nudged_max_tmp, scale)
    res = [nudged_min, nudged_max, scale]

    return res
Ejemplo n.º 15
0
def mul_ad(head, a, b):
    output = mul.mul(a, b)
    jacs_ = list(akg.differentiate(output, [a], head))
    return jacs_[0]
Ejemplo n.º 16
0
def Mul(x, y):
    """mul."""
    return mul.mul(x, y)
Ejemplo n.º 17
0
def mul_sub(first_input, second_input, third_input):
    temp = mul.mul(first_input, second_input)
    output = sub.sub(temp, third_input)
    return output