Esempio n. 1
0
def _taylor_compute(data_x, x_square=None):
    """
    do arcsinx compute use the 15th order taylor expansion when 0 <= x <= BOUNDARY_1
    asin(x) = x + 1/6*x^3 + 3/40*x^5 + 5/112*x^7 + ... + 13!!/(14!!*15)*x^15

    Parameters:
    ----------
    data_x : the placeholder of data input

    x_square : the placeholder of the square of data_x

    Returns : A Tensor. Has the same type as data.
    -------
    """

    if x_square is None:
        x_square = te.lang.cce.vmul(data_x, data_x)

    res = te.lang.cce.vmuls(x_square,
                            tvm.const(COEF[TAYLOR_COUNT], x_square.dtype))
    for temp in reversed(range(TAYLOR_COUNT)):
        res = te.lang.cce.vadds(res, tvm.const(COEF[temp], x_square.dtype))
        if temp == 0:
            res = te.lang.cce.vmul(res, data_x)
        else:
            res = te.lang.cce.vmul(x_square, res)

    return res
Esempio n. 2
0
def _newton_taylor_iter(input_x, input_y, input_z):
    """
    do element-wise Newton compute
    z(n+1) = z(n) - (e^(z(n)*x(n)^-1) - y(n))/x(n)^-1*e^(z(n)*x(n)^-1)

    Parameters:
    ----------
    input_x: TVM tensor, the placeholder of input_x
    input_y: TVM tensor, the placeholder of input_y
    input_z: start value of Newton iteration

    Returns : A Tensor. Has the same type as input_z.
    -------
    """
    #Newton begin:z(n+1) = z(n) - x(n) + x(n)*y(n)*e^(-z(n)*x(n)^-1)
    input_x_mul = te.lang.cce.vmuls(input_x,
                                    tvm.const(SCALAR_NEG_ONE, "float32"))
    newton_taylor = te.lang.cce.vadd(input_x_mul, input_z)
    input_xy = te.lang.cce.vmul(input_x, input_y)
    input_x_rec = te.lang.cce.vrec(input_x)
    input_x_res = te.lang.cce.vmuls(input_x_rec,
                                    tvm.const(SCALAR_NEG_ONE, "float32"))
    input_z_mul = te.lang.cce.vmul(input_x_res, input_z)
    input_z_taylor = _exp_taylor_compute(input_z_mul)
    input_z_res = te.lang.cce.vmul(input_z_taylor, input_xy)
    newton_taylor = te.lang.cce.vadd(newton_taylor, input_z_res)

    return newton_taylor
Esempio n. 3
0
def _get_pd_x_front_nz(data, param_nz, cast_dtype):
    """
    compute front part of pd_x according to data, params and shape_x

    """
    pd_xl = _get_pd_xl_nz(data, param_nz)

    pd_var, var_elta_2, sub_x_mean = _get_pd_var_nz(data, param_nz, pd_xl,
                                                    cast_dtype)

    pd_mean = _get_pd_mean_nz(param_nz, pd_xl, pd_var, var_elta_2,
                              sub_x_mean, cast_dtype)

    var_elta_2_cast = _broadcast_nz(var_elta_2, param_nz.get("shape_x_nz"))
    pd_x_1 = te.lang.cce.vmul(var_elta_2_cast, pd_xl)
    pdx2_broad = _broadcast_nz(pd_var, param_nz.get("shape_x_nz"))
    pdx2_mul = te.lang.cce.vmul(pdx2_broad, sub_x_mean)
    pd_x_2 = \
        te.lang.cce.vmuls(pdx2_mul,
                          tvm.const((2*(param_nz.get("mean_nz_num")**(-1))),
                                    dtype=cast_dtype))
    pd_x_3 = \
        te.lang.cce.vmuls(pd_mean,
                          tvm.const((param_nz.get("mean_nz_num")**(-1)),
                                    dtype=cast_dtype))

    return pd_x_1, pd_x_2, pd_x_3
Esempio n. 4
0
def l1_loss_grad_compute(grad_out,
                         predict,
                         target,
                         y,
                         reduction="mean",
                         kernel_name="l1_loss_grad"):
    predict_dtype = predict.dtype.lower()
    zero_tensor = te.lang.cce.vmuls(predict, tvm.const(0, dtype=predict_dtype))
    one_tensor = te.lang.cce.vadds(zero_tensor,
                                   tvm.const(1, dtype=predict_dtype))
    neg_one_tensor = te.lang.cce.vadds(zero_tensor,
                                       tvm.const(-1, dtype=predict_dtype))
    # if predict is equal or bigger than target, the sign will be given 1; else -1
    sign = te.lang.cce.vcmpsel(predict, target, "gt", one_tensor,
                               neg_one_tensor)
    # rectify sign to 0 when predict equal to target
    sign = te.lang.cce.vcmpsel(predict, target, "eq", zero_tensor, sign)
    grad_shape = te.lang.cce.util.shape_to_list(grad_out.shape)
    n = reduce(lambda x, y: x * y, grad_shape)
    norm = grad_out
    # if choose "mean", grad_out should divide over n
    if reduction == "mean":
        norm = te.lang.cce.vmuls(norm, tvm.const(1 / n, dtype=predict_dtype))
    # chain multiplication to get the gradient of L1 with respect to weights(grad_out)
    res = te.lang.cce.vmul(sign, norm)
    return res
def fake_quant_perchannel_compute(x, min_val, max_val, y, quant_min, quant_max,
                                  kernel_name="fake_quant_perchannel"):
    """FakeQuantPerChannel"""
    x_shape = te.lang.cce.util.shape_to_list(x.shape)
    minmax_shape = te.lang.cce.util.shape_to_list(min_val.shape)
    quant_min = tvm.const(quant_min, x.dtype)
    quant_max = tvm.const(quant_max, x.dtype)
    quant_min = te.lang.cce.broadcast(quant_min, minmax_shape, x.dtype)
    quant_max = te.lang.cce.broadcast(quant_max, minmax_shape, x.dtype)

    # CalNudge(NudgeMinMax)
    scale = te.lang.cce.vdiv(te.lang.cce.vsub(
        max_val, min_val), te.lang.cce.vsub(quant_max, quant_min))
    zp_from_min = te.lang.cce.vsub(quant_min, te.lang.cce.vdiv(min_val, scale))

    # Nudge zero point
    nudge_zp_ = te.lang.cce.vmin(
        quant_max, te.lang.cce.vmax(quant_min, zp_from_min))
    nudge_zp = te.lang.cce.floor(te.lang.cce.vadds(nudge_zp_, 0.5))
    nudge_min = te.lang.cce.vmul(te.lang.cce.vsub(quant_min, nudge_zp), scale)
    nudge_max = te.lang.cce.vmul(te.lang.cce.vsub(quant_max, nudge_zp), scale)

    # FakeQuant
    nudge_min_b = te.lang.cce.broadcast(nudge_min, x_shape)
    nudge_max_b = te.lang.cce.broadcast(nudge_max, x_shape)
    scale_b = te.lang.cce.broadcast(scale, x_shape)

    input_x = te.lang.cce.vmin(nudge_max_b, te.lang.cce.vmax(nudge_min_b, x))
    nudge_input_ = te.lang.cce.vdiv(
        te.lang.cce.vsub(input_x, nudge_min_b), scale_b)
    nudge_input = te.lang.cce.floor(te.lang.cce.vadds(nudge_input_, 0.5))
    res = te.lang.cce.vadd(te.lang.cce.vmul(nudge_input, scale_b), nudge_min_b)

    return res
Esempio n. 6
0
    def _trans_input_shape(self, axis):
        """
        trans the input shape into three dimensions (left, mid, right) and
        get the range of different dims of the input shape.
        Returns:
        -------
        x_reshape: new input shape of format with (left, mid, right)
        left_range:left dim range
        right_range:right dim range
        """
        real_axis = axis + len(self.dims_var) if axis < 0 else axis
        left_dim = tvm.const(1)
        left_upper = 1
        for idx in range(real_axis):
            left_dim *= self.dim_vars[idx]
            left_upper *= self.dim_bounds[idx][1]
        self.left_range = (1, left_upper)

        right_dim = tvm.const(1)
        right_upper = 1
        for idx in range(real_axis + 1, len(self.dim_vars)):
            right_dim *= self.dim_vars[idx]
            right_upper *= self.dim_bounds[idx][1]
        self.right_range = (1, right_upper)
        self.x_reshape = (left_dim, self.dim_vars[real_axis], right_dim)
Esempio n. 7
0
def _get_pd_var_front(data, cast_dtype):
    """
    compute front part of pd_var according to data_variance

    Parameters
    ----------
    data: dict
        placeholders after cast

    Returns
    -------
    pd_var_1: tvm.tensor
        np.power((data_variance + EPSLON), (-1.5))
    var_elta_2: tvm.tensor
        np.power((data_variance + EPSLON), (-0.5))
    """
    var_elta = te.lang.cce.vadds(data.get("data_variance"),
                                 tvm.const(EPSLON, dtype=cast_dtype))
    var_elta_log = te.lang.cce.vlog(var_elta)
    var_elta_mul = te.lang.cce.vmuls(var_elta_log,
                                     tvm.const(-0.5, dtype=cast_dtype))
    var_elta_2 = te.lang.cce.vexp(var_elta_mul)
    pdvar1_mul = te.lang.cce.vmul(var_elta_2, var_elta_2)
    pd_var_1 = te.lang.cce.vmul(pdvar1_mul, var_elta_2)

    return pd_var_1, var_elta_2
Esempio n. 8
0
def _less_compare_float32(data_x, data_y):
    """
    Compare data_x and data_y to determine whether data_x is less than data_y.
    If the element in data_x is less than in data_y, then return 1,
    else return 0.

    max num of float32 is 2**126
    but cce can only support 2**62, so use 62/62/2 to adaptor 126
    (2**(-126))*(2**(62))*(2**(62))*(2**(2)) = 1
    so min_value*max_value*max_value*factor_value = 1
    """
    shape_inputs = te.lang.cce.util.shape_to_list(data_x.shape)
    min_value = tvm.const(2 ** (-126), dtype=D_TYPE)
    max_value = tvm.const(2 ** 62, dtype=D_TYPE)
    factor_value = tvm.const(2 ** 2, dtype=D_TYPE)

    if api_check_support("te.lang.cce.vmaxs", data_x.dtype):
        res_sub = te.lang.cce.vsub(data_y, data_x)
        res_min = te.lang.cce.vmins(res_sub, min_value)
        res_max = te.lang.cce.vmaxs(res_min, tvm.const(0, dtype=D_TYPE))
    else:
        data_zero = te.lang.cce.vmuls(data_x, 0)
        min_value_tensor = te.lang.cce.vadds(data_zero, min_value)

        res_sub = te.lang.cce.vsub(data_y, data_x)
        res_min = te.lang.cce.vmin(res_sub, min_value_tensor)
        res_max = te.lang.cce.vmax(res_min, data_zero)

    res_max_mul = te.lang.cce.vmuls(res_max, max_value)
    res_max_mul_max = te.lang.cce.vmuls(res_max_mul, max_value)
    res = te.lang.cce.vmuls(res_max_mul_max, factor_value)

    return res
Esempio n. 9
0
def rsqrt_grad_compute(input_y, input_dy, output_z, kernel_name="rsqrt_grad"):
    """
    compute for rsqrt_grad

    Parameters
    ----------
    input_y: TVM tensor
        the placeholder of input_y
    input_dy: TVM tensor
        the placeholder of input_dy
    output_z: dict
        dict info of output_z
    kernel_name: str
        cce kernel name, default value is "rsqrt_grad"

    Returns
    -------
    res: TVM tensor
        the result of compute
    """
    dtype_input_y = input_y.dtype
    rsqrt_const = tvm.const(SCALAR, dtype=dtype_input_y)
    if dtype_input_y in ("int8", "float16"):
        rsqrt_const = tvm.const(SCALAR, dtype="float32")
        input_y = te.lang.cce.cast_to(input_y, "float32")
        input_dy = te.lang.cce.cast_to(input_dy, "float32")
    res_vmul = te.lang.cce.vmul(input_y, input_y)
    res_vmul1 = te.lang.cce.vmul(res_vmul, input_y)
    res_vmul2 = te.lang.cce.vmul(res_vmul1, input_dy)
    res = te.lang.cce.vmuls(res_vmul2, rsqrt_const)
    if dtype_input_y in ("int8", "int32", "float16"):
        res = te.lang.cce.cast_to(res, dtype_input_y, f1628IntegerFlag=True)
    return res
Esempio n. 10
0
def sigmoid_compute(input_x):
    """
    calculating sigmoid
    """
    data_input = input_x
    dtype = input_x.dtype
    exp_support = cce.cce_conf.api_check_support(
        "te.lang.cce.vexp", "float32")
    mul_support = cce.cce_conf.api_check_support(
        "te.lang.cce.vmuls", "float32")
    if dtype == "float32" and not mul_support:
        error_manager_vector.raise_err_specific_reson("DynamicLSTM",
                                                      "Input dtype only support float16 while input dtype is float32")

    const_num_neg_one = tvm.const(-1, dtype=dtype)
    const_num_one = tvm.const(1, dtype=dtype)
    tmp_negative = te.lang.cce.vmuls(data_input, const_num_neg_one)
    if dtype == "float32" and not exp_support:
        tmp_negative = te.lang.cce.cast_to(tmp_negative, "float16")
    tmp_exp = te.lang.cce.vexp(tmp_negative)
    if dtype == "float32" and not exp_support:
        tmp_exp = te.lang.cce.cast_to(tmp_exp, "float32")
    tmp_sum = te.lang.cce.vadds(tmp_exp, const_num_one)
    if dtype == "float32":
        inp_shape = tmp_sum.shape
        tensor_one = te.lang.cce.broadcast(tvm.const(1, dtype), inp_shape)
        res = te.lang.cce.vdiv(tensor_one, tmp_sum)
    else:
        res = te.lang.cce.vrec(tmp_sum)

    return res
Esempio n. 11
0
def _negative_compute(input_x, input_y):
    """
    compute result of pow when data_x is less than 0,
    use [-2 * (|y| % 2) + 1] * exp(y * ln|x|)
    """
    dtype = input_x.dtype
    shape = input_x.shape
    abs_value = te.lang.cce.vabs(input_y)

    if not tbe_platform.cce_conf.api_check_support("te.lang.cce.vmod",
                                                   "float32"):
        dtype = "float16"
        abs_value = te.lang.cce.cast_to(abs_value, "float16")

    data_two = te.lang.cce.broadcast(tvm.const(2, dtype), shape, dtype)
    mod_value = te.lang.cce.vmod(abs_value, data_two)
    mul_value = te.lang.cce.vmuls(mod_value, tvm.const(-2, dtype))
    add_value = te.lang.cce.vadds(mul_value, tvm.const(1, dtype))

    if tbe_platform.cce_conf.api_check_support("te.lang.cce.vexp", "float32"):
        add_value = te.lang.cce.cast_to(add_value, "float32")

    abs_data_x = te.lang.cce.vabs(input_x)
    log_value = te.lang.cce.vlog(abs_data_x)
    mul_value = te.lang.cce.vmul(input_y, log_value)
    exp_value = te.lang.cce.vexp(mul_value)
    res = te.lang.cce.vmul(add_value, exp_value)

    return res
Esempio n. 12
0
def _compute(data_input, shape):
    """
    Algrithm: atanh(x) = 0.5*log((1+x)/(1-x))

    Parameters
    ----------
    data_input: the placeholder of data input

    shape: the shape of data_input

    Returns
    -------
    data_res :  return of atanh
    """

    data_1_sum_x = te.lang.cce.vadds(data_input, tvm.const(CONST_ONE,
                                                           data_input.dtype))
    data_sub_x = te.lang.cce.vmuls(data_input, tvm.const(CONST_NEG_ONE,
                                                         data_input.dtype))
    data_1_sub_x = te.lang.cce.vadds(data_sub_x, tvm.const(CONST_ONE,
                                                           data_input.dtype))
    data_x_mul = te.lang.cce.vdiv(data_1_sum_x, data_1_sub_x)
    data_x_log = te.lang.cce.vlog(data_x_mul, 1)
    data_res = te.lang.cce.vmuls(data_x_log, tvm.const(CONST_HALF,
                                                       data_input.dtype))

    return data_res
Esempio n. 13
0
def _compute_process(var, m, lr_broad, alpha_broad, sign_decay_broad,
                     beta_broad, grad):
    """
    calculate
    m_t <- beta1 * m_{t-1} + (1 - beta1) * g
    update <- (alpha + sign_decay * sign(g) *sign(m)) * g
    variable <- variable - lr_t * update

    Parameters:
    ----------
    var: the dict of var, support float16, float32
    m: the dict of m, support float16, float32
    lr: the dict of lr, support float16, float32
    alpha: the dict of alpha, support float16, float32
    sign_decay: the dict of sign_decay, support float16, float32
    beta: the dict of beta, support float16, float32
    grad: the dict of grad, support float16, float32

    Returns
    -------
    the new value of var and out
    the output
    """
    m_out = _update_m(m, beta_broad, grad)
    sign_gm = te.lang.cce.vmul(_sign_compute(grad), _sign_compute(m_out))
    decay_gm = te.lang.cce.vmul(sign_gm, sign_decay_broad)
    var_out = _update_var(decay_gm, alpha_broad, lr_broad, grad, var)

    output_data = te.lang.cce.vadds(var_out, tvm.const(CONST_ZERO, "float32"))
    m_output_data = te.lang.cce.vadds(m_out, tvm.const(CONST_ZERO, "float32"))

    return m_out, var_out, output_data, m_output_data
Esempio n. 14
0
def _sigmoid_compute(input_x):
    """
    calculating sigmoid
    """
    data_input = input_x
    dtype = input_x.dtype
    exp_support = tbe_platform.cce_conf.api_check_support(
        "te.lang.cce.vexp", "float32")
    mul_support = tbe_platform.cce_conf.api_check_support(
        "te.lang.cce.vmuls", "float32")
    if dtype == "float32" and not mul_support:
        error_manager_vector.raise_err_check_params_rules(
            "DynamicGRU", 'vmuls should support float32', 'mul_support',
            str(mul_support))

    const_num_neg_one = tvm.const(-1, dtype=dtype)
    const_num_one = tvm.const(1, dtype=dtype)
    tmp_negative = tbe.vmuls(data_input, const_num_neg_one)
    if dtype == "float32" and not exp_support:
        tmp_negative = tbe.cast_to(tmp_negative, "float16")
    tmp_exp = tbe.vexp(tmp_negative)
    if dtype == "float32" and not exp_support:
        tmp_exp = tbe.cast_to(tmp_exp, "float32")
    tmp_sum = tbe.vadds(tmp_exp, const_num_one)
    if dtype == "float32":
        inp_shape = tmp_sum.shape
        tensor_one = tbe.broadcast(tvm.const(1, dtype), inp_shape)
        res = tbe.vdiv(tensor_one, tmp_sum)
    else:
        res = tbe.vrec(tmp_sum)

    return res
Esempio n. 15
0
def less_compute(input_x, input_y, output_z, kernel_name="less"):
    """
    if x is less than y, then return 1, else return 0.

    Parameters:
    ----------
    input_x: TVM tensor
        the placeholder of first input data
    input_y: TVM tensor
        the placeholder of second input data
    output_x: dict
        shape and dtype of output, should be broadcast shape and type as input
    kernel_name: str
        cce kernel name, default value is less

    Returns
    -------
    the result
    """
    shape_x = te.lang.cce.util.shape_to_list(input_x.shape)
    shape_y = te.lang.cce.util.shape_to_list(input_y.shape)
    shape_x, shape_y, shape = broadcast_shapes(shape_x,
                                               shape_y,
                                               param_name_input1="input_x",
                                               param_name_input2="input_y")
    cce_product = tbe_platform.cce_conf.get_soc_spec("SOC_VERSION")
    dtype = input_x.dtype
    if dtype in ("uint8", "int8"):
        input_x = te.lang.cce.cast_to(input_x, "float16")
        input_y = te.lang.cce.cast_to(input_y, "float16")
        dtype = "float16"

    if dtype == "float32":
        # minimun num of float32 2**(-126)
        data_min = te.lang.cce.broadcast(tvm.const(2**(-126), dtype=dtype),
                                         shape, dtype)
    elif dtype == "float16" and cce_product not in ("Ascend910", "Ascend710"):
        # minimun num of float16 2**(-24)
        data_min = te.lang.cce.broadcast(tvm.const(2**(-24), dtype=dtype),
                                         shape, dtype)
    elif dtype == "float16" and cce_product in ("Ascend910", "Ascend710"):
        input_x = te.lang.cce.cast_to(input_x, "float32")
        input_y = te.lang.cce.cast_to(input_y, "float32")
        dtype = "float32"
        data_min = te.lang.cce.broadcast(tvm.const(2**(-126), dtype=dtype),
                                         shape, dtype)
    elif dtype == "int32" and cce_product not in ("Ascend910", "Ascend710"):
        data_min = te.lang.cce.broadcast(tvm.const(1, dtype=dtype), shape,
                                         dtype)
    else:
        input_x = te.lang.cce.cast_to(input_x, "float32")
        input_y = te.lang.cce.cast_to(input_y, "float32")
        dtype = "float32"
        data_min = te.lang.cce.broadcast(tvm.const(2**(-126), dtype=dtype),
                                         shape, dtype)

    input_x = te.lang.cce.broadcast(input_x, shape)
    input_y = te.lang.cce.broadcast(input_y, shape)

    return _less_compare((input_x, input_y), shape, dtype, data_min)
Esempio n. 16
0
def _taylor_compute(data):
    """
    algrithm: log(x) = ((((0.2x - 0.25)x + 0.33333)x - 0.5)x + 1)x

    Parameters
    ----------
    data: input tensor that we want to calculate log

    Returns
    -------
    None

    """
    # 0.2x - 0.25
    taylor_five = te.lang.cce.vmuls(data, tvm.const(CONST_ONE_FIVE, "float32"))
    taylor_four_1 = te.lang.cce.vadds(taylor_five,
                                      tvm.const(CONST_ONE_FOUR_NEG, "float32"))
    # (0.2x - 0.25)x + 0.33333
    taylor_four_2 = te.lang.cce.vmul(taylor_four_1, data)
    taylor_three_1 = te.lang.cce.vadds(taylor_four_2,
                                       tvm.const(CONST_ONE_THREE, "float32"))
    # ((0.2x - 0.25)x + 0.33333)x - 0.5
    taylor_three_2 = te.lang.cce.vmul(taylor_three_1, data)
    taylor_two_1 = te.lang.cce.vadds(
        taylor_three_2, tvm.const(CONST_NEWTON_FACTOR_NEG, "float32"))
    # (((0.2x - 0.25)x + 0.33333)x - 0.5)x+1
    taylor_two_2 = te.lang.cce.vmul(taylor_two_1, data)
    taylor_one = te.lang.cce.vadds(taylor_two_2,
                                   tvm.const(CONST_ONE, "float32"))
    # ((((0.2x - 0.25)x + 0.33333)x - 0.5)x + 1)x
    taylor = te.lang.cce.vmul(taylor_one, data)

    return taylor
Esempio n. 17
0
def _cosh_taylor_compute(data):
    """
    Calculate cosh  = 1 + x^2( 1/2! + x^2( 1/4! + x^2/6!))

    Parameters:
    ----------
    data : the placeholder of data input

    Returns
    -------
    A Tensor represents cosh(data). Has the same type as data.
    """

    # x^2 / 6!
    pow_2 = te.lang.cce.vmul(data, data)
    pow_2_div = te.lang.cce.vmuls(pow_2, tvm.const(TAYLOR_SIXTH, data.dtype))

    # 1/4! + x^2 / 6!
    pow_2_plus = te.lang.cce.vadds(pow_2_div,
                                   tvm.const(TAYLOR_FOURTH, data.dtype))

    # 1/2! + x^2( 1/4! + x^2/6!)
    pow_4 = te.lang.cce.vmul(pow_2_plus, pow_2)
    pow_4_plus = te.lang.cce.vadds(pow_4, tvm.const(TAYLOR_SECOND, data.dtype))

    # 1 + x^2( 1/2! + x^2( 1/4! + x^2/6!))
    pow_6 = te.lang.cce.vmul(pow_4_plus, pow_2)
    res = te.lang.cce.vadds(pow_6, tvm.const(NUM_ONE, data.dtype))

    return res
def _compare_value_float(x_data, y_data):
    """
    The input data type of the function only support float;
    The return value of the function: if x_data >= y_data return 1; else return 0.
    """
    # The smallest positive subnormal number of float32 is 2**(-126)
    min_value = tvm.const(2**(-126), dtype="float32")
    # (2**(-126))*(2**(62))*(2**(62))*(2**(2)) = 1
    # so min_value*max_value*max_value*max_value_1 = 1
    max_value = tvm.const(2**(62), dtype="float32")
    max_value_1 = tvm.const(2**(2), dtype="float32")

    data_zero = te.lang.cce.vmuls(x_data, 0)
    min_value_tensor = te.lang.cce.vadds(data_zero, min_value)
    max_value_tensor = te.lang.cce.vadds(data_zero, max_value)
    max_value_1_tensor = te.lang.cce.vadds(data_zero, max_value_1)
    sub_xy = te.lang.cce.vsub(x_data, y_data)
    add_min_value = te.lang.cce.vadds(sub_xy, min_value)
    vmax_zero = te.lang.cce.vmax(add_min_value, data_zero)
    vmin_min_value = te.lang.cce.vmin(vmax_zero, min_value_tensor)
    vmul_max_value = te.lang.cce.vmul(vmin_min_value, max_value_tensor)
    vmul_max_value_1 = te.lang.cce.vmul(vmul_max_value, max_value_tensor)
    result = te.lang.cce.vmul(vmul_max_value_1, max_value_1_tensor)

    return result
Esempio n. 19
0
def _log1p_mini_compute(mini_res, input_x, shape):
    """
    do element-wise log(x + 1) compute in mini scene
    f(y) = e^y(n),        y(n) <= TAYLOR_NEGATIVE_THRESHOLD or y(n) >= TAYLOR_POSITIVE_THRESHOLD
    f(y) = seventh taylor computer, TAYLOR_NEGATIVE_THRESHOLD < y(n) < TAYLOR_POSITIVE_THRESHOLD

    Parameters:
    ----------
    mini_res: TVM tensor, the tensor of log(x + 1)
    input_x : TVM tensor, the placeholder of input_x
    shape : tuple, the shape of input_x

    Returns : A Tensor. Has the same type as mini_res.
    -------
    """
    input_y = mini_res
    newton_taylor_res = _newton_taylor_log1p(input_x, input_y)
    newton_exp_res = _newton_exp_log1p(input_x, input_y)

    input_left_border = tvm.const(TAYLOR_NEGATIVE_THRESHOLD, input_y.dtype)
    tensor_input_left_border = te.lang.dynamic.broadcast(
        input_left_border, shape)
    input_right_border = tvm.const(TAYLOR_POSITIVE_THRESHOLD, input_y.dtype)
    tensor_input_right_border = te.lang.dynamic.broadcast(
        input_right_border, shape)
    exp_taylor_neg = te.lang.dynamic.vcmpsel(input_y, tensor_input_left_border,
                                             'gt', newton_taylor_res,
                                             newton_exp_res)
    exp_taylor_neg = te.lang.dynamic.vcmpsel(input_y,
                                             tensor_input_right_border, 'lt',
                                             exp_taylor_neg, newton_exp_res)
    return mini_res
Esempio n. 20
0
def prelu_compute(input_x, weight_input, output_y, kernel_name="prelu"):
    """
    calculating data

    Parameters
    ----------
    input_x : TVM tensor
        the placeholder of input_x
    output_y : dict
        dict of output_y, include keys(shape and dtype)
    weight_input : TVM tensor
        the placeholder of weight_input
    kernel_name : str
        kernel name, default value is "prelu"

    Returns
    -------
    output tensor
    """
    shape_x = te.lang.cce.util.shape_to_list(input_x.shape)
    if input_x.dtype == "float16":
        scalar_zero = tvm.const(0, dtype="float16")
    else:
        scalar_zero = tvm.const(0, dtype="float32")
    val_max = te.lang.cce.vmaxs(input_x, scalar_zero)
    val_min = te.lang.cce.vmins(input_x, scalar_zero)
    weight_input = te.lang.cce.broadcast(weight_input, shape_x)
    val_prod = te.lang.cce.vmul(val_min, weight_input)
    res = te.lang.cce.vadd(val_max, val_prod)
    return res
Esempio n. 21
0
def abs_grad_compute(y, dy, z, kernel_name="abs_grad"):
    """
    do abs_grad compute
    Parameters:
    ----------------
    y: input tensor y
    dy: input tensor dy
    z: output dict
    kernel_name: cce kernel name, default value is "abs_grad"
    return: data_dy * sign(data_y)
    ----------------
    """

    dtype = dy.dtype

    if dtype == "float16":
        fp_max = tvm.const(2**15, dtype)
        fp_min = tvm.const(2**(-15), dtype)
    else:
        fp_max = tvm.const(2**62, dtype)
        fp_min = tvm.const(2**(-127), dtype)
    new_data = te.lang.cce.vmuls(y, fp_max)
    abs_data = te.lang.cce.vabs(new_data)
    denominator = te.lang.cce.vadds(abs_data, fp_min)
    res = te.lang.cce.vdiv(new_data, denominator)
    res = te.lang.cce.round(res)
    data1_res = te.lang.cce.vmul(res, dy)
    return data1_res
Esempio n. 22
0
def tanh_grad_compute(y, dy, z, kernel_name="tanh_grad"):
    """
    do element-wise tanh_grad operation between two input tensors

    Parameters
    ----------
    y: TVM tensor
        the placeholder of y input data
    dy: TVM tensor
        the placeholder of dy input data
    z: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name: str
        cce kernel name, default value is tanh_grad

    Returns
    -------
    res : tvm.tensor
        the result of tanh_grad
    """
    dtype = y.dtype

    if dtype == "float16":
        y = te.lang.cce.cast_to(y, "float32")
        dy = te.lang.cce.cast_to(dy, "float32")

    data1_square = te.lang.cce.vmul(y, y)
    data_mul = te.lang.cce.vmuls(data1_square, tvm.const(-1, dtype=dtype))
    anuminate = te.lang.cce.vadds(data_mul, tvm.const(1, dtype=dtype))
    res = te.lang.cce.vmul(anuminate, dy)

    if dtype == "float16":
        res = te.lang.cce.cast_to(res, "float16")

    return res
Esempio n. 23
0
def mish_compute(input_x, output_y, kernel_name="mish"):
    """
    algorithm: mish
    calculating data's mish,y= x*(1 - 2/(1+(1+exp(x))^2))

    Parameters
    ----------
    input_x: TVM tensor
        the placeholder of input data
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name: str
        cce kernel name, default value is mish

    Returns
    -------
    res : tvm.tensor
        the result of mish
    """
    dtype = input_x.dtype
    exp_val = te.lang.cce.vexp(input_x)
    add_exp_val = te.lang.cce.vadds(exp_val, tvm.const(1, dtype))
    pow_var = te.lang.cce.vmul(add_exp_val, add_exp_val)
    add_val = te.lang.cce.vadds(pow_var, tvm.const(1, dtype))
    rec_val = te.lang.cce.vrec(add_val)
    mul_val = te.lang.cce.vmuls(rec_val, tvm.const(-2, dtype=dtype))
    add_val2 = te.lang.cce.vadds(mul_val, tvm.const(1, dtype=dtype))
    res = te.lang.cce.vmul(input_x, add_val2)

    return res
Esempio n. 24
0
def acos_grad_compute(y, dy, z, kernel_name="acos_grad"):
    """
    do acos_grad compute with sqrt and div
    Parameters:
    ----------------
    y: input tensor y
    dy: input tensor dy
    z: output dict
    kernel_name: cce kernel name, default value is "acos_grad"
    return: dy * (- 1 / (1 - data_y^2)^1/2)
    ----------------
    """

    dtype = y.dtype
    dtype_1 = dtype
    if dtype == "float16" and \
       api_check_support("te.lang.cce.vadd", "float32"):
        y = te.lang.cce.cast_to(y, "float32")
        dy = te.lang.cce.cast_to(dy, "float32")
        dtype = "float32"

    data1_square = te.lang.cce.vmul(y, y)
    data1_square = te.lang.cce.vmuls(data1_square,
                                     tvm.const(NUM_MINUS_ONE, dtype=dtype))
    data1_square = te.lang.cce.vadds(data1_square,
                                     tvm.const(NUM_ONE, dtype=dtype))

    data1_reciprocal = te.lang.cce.vsqrt(data1_square, 1)
    data1_reciprocal = te.lang.cce.vdiv(dy, data1_reciprocal)
    res = te.lang.cce.vmuls(data1_reciprocal,
                            tvm.const(NUM_MINUS_ONE, dtype=dtype))

    if dtype_1 == "float16":
        res = te.lang.cce.cast_to(res, "float16")
    return res
Esempio n. 25
0
def _compute_positive(prox_v, alpha_broad, l1_broad, l2_broad):
    """
    the operator's compute
    var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}

    Parameters:
    ----------
    prox_v: the value of prox_v
    alpha_broad: the value of alpha_broad
    l1_broad: the value of l1_broad
    l2_broad: the value of l2_broad

    Returns
    the value of var_res
    """
    prox_v_abs = te.lang.cce.vabs(prox_v)
    prox_v_sign = sign(prox_v)
    # 1+alpha*l2
    alpha_l2 = te.lang.cce.vmul(alpha_broad, l2_broad)
    alpha_l2_1 = te.lang.cce.vadds(alpha_l2, tvm.const(CONST_ONE, "float32"))
    # max{|prox_v|-alpha*l1,0}
    alpha_l1 = te.lang.cce.vmul(alpha_broad, l1_broad)
    alpha_l1_neg = te.lang.cce.vmuls(alpha_l1,
                                     tvm.const(CONST_ONE_NEG, "float32"))
    prox_v_l1 = te.lang.cce.vadd(prox_v_abs, alpha_l1_neg)
    max_value = te.lang.cce.vmax(
        prox_v_l1,
        te.lang.cce.broadcast(tvm.const(CONST_ZERO, "float32"), prox_v.shape))
    # sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
    res = te.lang.cce.vdiv(prox_v_sign, alpha_l2_1)
    var_res = te.lang.cce.vmul(res, max_value)

    return var_res
Esempio n. 26
0
def fake_quant_with_min_max_grad_compute(dout, x, min_val, max_val, quant_min, quant_max,
                                         kernel_name="fake_quant_with_min_max_grad"):
    """FakeQuantWithMinMaxGrad"""
    shape = te.lang.cce.util.shape_to_list(x.shape)
    shape_min = te.lang.cce.util.shape_to_list(min_val.shape)
    quant_min = tvm.const(quant_min, x.dtype)
    quant_max = tvm.const(quant_max, x.dtype)
    quant_min = te.lang.cce.broadcast(quant_min, shape_min)
    quant_max = te.lang.cce.broadcast(quant_max, shape_min)

    # CalNudge(NudgeMinMax)
    scale = te.lang.cce.vdiv(te.lang.cce.vsub(max_val, min_val), te.lang.cce.vsub(quant_max, quant_min))
    zp_from_min = te.lang.cce.vsub(quant_min, te.lang.cce.vdiv(min_val, scale))
    # Nudge zero point
    nudge_zp = te.lang.cce.round(te.lang.cce.vmin(quant_max, te.lang.cce.vmax(quant_min, zp_from_min)))
    nudge_min = te.lang.cce.vmul(te.lang.cce.vsub(quant_min, nudge_zp), scale)
    nudge_max = te.lang.cce.vmul(te.lang.cce.vsub(quant_max, nudge_zp), scale)
    nudge_min = te.lang.cce.broadcast(nudge_min, shape)
    nudge_max = te.lang.cce.broadcast(nudge_max, shape)

    bool_over_min = _less_compare_float32(nudge_min, x)
    bool_less_max = _less_compare_float32(x, nudge_max)
    bool_between = te.lang.cce.vmul(bool_over_min, bool_less_max)
    res = te.lang.cce.vmul(dout, bool_between)

    return res
Esempio n. 27
0
def eltwise_compute(x, y, mode=1, coeff=[], kernel_name="eltwise"):
    '''
    Compute elementwise operation
    '''
    tensor_num = len(x)
    inp_dtype = x[0].dtype
    data0_tmp = x[0]

    tmp_y = {}
    tmp_y["addr_type"] = 0
    tmp_y["valid_shape"] = []
    tmp_y["slice_offset"] = []
    fuse_y = tmp_y if y is None else y
    fusion_params = get_fusion_params(x, fuse_y, tensor_num)

    if mode == 1:
        if len(coeff) != 0 and len(coeff) != tensor_num:

            errorInfo = {}
            errorInfo['errCode'] = "E81002"
            errorInfo['op_name'] = 'eltwise'
            errorInfo['coeff_length'] = str(len(coeff))
            errorInfo['input_num'] = str(tensor_num)
            raise RuntimeError(errorInfo, "In op[%s], the parameter[coeff]'s length[%s] should be "
                                          "equal to inputs'num[%s]." %
                               (errorInfo['op_name'], errorInfo['coeff_length'],
                                errorInfo['input_num']))
        if len(coeff) == tensor_num:
            if type(coeff[0]) != int and type(coeff[0]) != float:
                raise RuntimeError("ele of coeff must be a number.")
            if coeff[0] != 1:
                coeff1 = tvm.const(coeff[0], dtype=inp_dtype)
                data0_tmp = te.lang.cce.vmuls(data0_tmp, coeff1)

    res = None
    if tensor_num == 1:
        const_val_0 = tvm.const(0, dtype=inp_dtype)
        data0_tmp = te.lang.cce.vadds(data0_tmp, const_val_0)
        res = data0_tmp
    elif tensor_num > 1:
        for i in range(1, tensor_num):
            datan_tmp = x[i]
            if mode == 0:
                data0_tmp = te.lang.cce.vmul(data0_tmp, datan_tmp)
            elif mode == 2:
                data0_tmp = te.lang.cce.vmax(data0_tmp, datan_tmp)
            else:
                if len(coeff) == 0:
                    data0_tmp = te.lang.cce.vadd(data0_tmp, datan_tmp)
                elif coeff[i] == 1:
                    data0_tmp = te.lang.cce.vadd(data0_tmp, datan_tmp)
                else:
                    coeff2 = tvm.const(coeff[i], dtype=inp_dtype)
                    datan_tmp = te.lang.cce.vmuls(datan_tmp, coeff2)
                    data0_tmp = te.lang.cce.vadd(data0_tmp, datan_tmp)
        res = data0_tmp

    res.op.attrs["ele_fusion_params"] = fusion_params
    return res
Esempio n. 28
0
def newton_iteration(shape, tensor_x_rec, tensor_x, symbol, iter_num):
    """
    the function of newton_iteration
    Parameters
    ----------
    shape: tensor shape
    tensor_x_rec: tensor
    tensor_x: tensor
    symbol: tensor symbol

    Returns
    -------
    tensor_list: dict
    scope_list: dict
    emit_list: dict
    """
    dtype_c = tensor_x_rec.dtype
    num_two = tvm.const(2, dtype=dtype_c)
    neg_one = tvm.const(-1, dtype=dtype_c)
    tmp = tensor_x_rec

    tensor_list = {}
    scope_list = {}
    emit_list = {}
    tmp_mul = None
    tmp_neg = None
    tmp_add = None
    for index in range(0, iter_num):
        key = "tmp_mul_" + symbol + str(index)
        tmp_mul = tvm.compute(shape,
                              lambda *i: tensor_x(*i) * tmp(*i),
                              name=key)
        tensor_list[key] = tmp_mul
        scope_list[key] = cce.scope_ubuf
        emit_list[key] = "vector_mul"

        key = "tmp_neg_" + symbol + str(index)
        tmp_neg = tvm.compute(shape,
                              lambda *i: tmp_mul(*i) * neg_one,
                              name=key)
        tensor_list[key] = tmp_neg
        scope_list[key] = cce.scope_ubuf
        emit_list[key] = "vector_muls"

        key = "tmp_add_" + symbol + str(index)
        tmp_add = tvm.compute(shape,
                              lambda *i: tmp_neg(*i) + num_two,
                              name=key)
        tensor_list[key] = tmp_add
        scope_list[key] = cce.scope_ubuf
        emit_list[key] = "vector_adds"

        key = "tmp_" + symbol + str(index)
        tmp = tvm.compute(shape, lambda *i: tmp_add(*i) * tmp(*i), name=key)
        tensor_list[key] = tmp
        scope_list[key] = cce.scope_ubuf
        emit_list[key] = "vector_mul"

    return tensor_list, scope_list, emit_list
Esempio n. 29
0
def select_compute(condition, x1, x2, kernel_name="select"):
    """
    compute for select

    Parameters
    ----------
    condition: TVM tensor
        the placeholder of input condition
    x1: TVM tensor
        the placeholder of first input data
    x2: TVM tensor
        the placeholder of second input data
    kernel_name: str
        cce kernel name, default value is "select"

    Returns
    -------
    res : output of the result of select compute
    """
    shape = te.lang.cce.util.shape_to_list(x1.shape)
    x1_dtype = x1.dtype
    con_shape = te.lang.cce.util.shape_to_list(condition.shape)
    bool_dtype = condition.dtype

    if x1_dtype in ("int8", "uint8"):
        x1_dtype = "float32"
        ones = te.lang.cce.broadcast(tvm.const(1, dtype=x1_dtype),
                                     shape,
                                     output_dtype=x1_dtype)
        x1 = te.lang.cce.cast_to(x1, "float32")
        x2 = te.lang.cce.cast_to(x2, "float32")
    else:
        ones = te.lang.cce.broadcast(tvm.const(1, dtype=x1_dtype),
                                     shape,
                                     output_dtype=x1_dtype)

    if bool_dtype == "int8":
        if x1_dtype == "int32":
            condition_dtype = te.lang.cce.ceil(condition)
        else:
            condition_dtype = te.lang.cce.cast_to(condition, x1_dtype)
    else:
        if x1_dtype == "int32":
            condition_dtype = condition
        else:
            condition_dtype = te.lang.cce.cast_to(condition, x1_dtype)

    if list(con_shape) != list(shape):
        condition_dtype = te.lang.cce.broadcast(condition_dtype, shape)

    condition_opp = te.lang.cce.vsub(ones, condition_dtype)

    temp_x = te.lang.cce.vmul(x1, condition_dtype)
    temp_y = te.lang.cce.vmul(x2, condition_opp)
    res = te.lang.cce.vadd(temp_x, temp_y)
    if x1_dtype in ("int8", "uint8"):
        res = te.lang.cce.cast_to(res, x1_dtype)

    return res
Esempio n. 30
0
def xlogy_grad_compute(placeholders, shape_max, dtype, rx, ry):
    """
    do element-wise xlogy_grad compute

    Parameters
    ----------
    placeholders : the placeholder of data input
    shape_max : the shape of broadcast
    dtype : the type of data input
    rx : the reduction indices of data input with broadcast
    ry : the reduction indices for data input with broadcast

    Returns
    -------
    output_y1 : result of xlogy_grad
    output_y2 : result of xlogy_grad
    None
    """
    x1_ori = placeholders[0]
    x2_ori = placeholders[1]
    grad_ori = placeholders[2]

    fp32_support = tbe_platform.cce_conf.api_check_support(
        "te.lang.cce.vdiv", "float32")
    if dtype == "float32" and not fp32_support:
        raise RuntimeError("Don't support float32 in the platform.")

    if dtype == "float16" and fp32_support:
        x1 = te.lang.cce.cast_to(x1_ori, "float32")
        x2 = te.lang.cce.cast_to(x2_ori, "float32")
        grad = te.lang.cce.cast_to(grad_ori, "float32")
        x1 = te.lang.cce.broadcast(x1, shape_max)
        x2 = te.lang.cce.broadcast(x2, shape_max)
        grad = te.lang.cce.broadcast(grad, shape_max)
    else:
        x1 = te.lang.cce.broadcast(x1_ori, shape_max)
        x2 = te.lang.cce.broadcast(x2_ori, shape_max)
        grad = te.lang.cce.broadcast(grad_ori, shape_max)

    if dtype == "float16" and not fp32_support:
        esp_min = tvm.const(1.18e-7, dtype="float16")
    else:
        esp_min = tvm.const(1.18e-38, dtype="float32")
    x1_addespmin = te.lang.cce.vadds(x1, esp_min)
    not_zero_x1 = te.lang.cce.vdiv(x1, x1_addespmin)
    log_x2 = te.lang.cce.vlog(x2)
    partial_x1 = te.lang.cce.vmul(not_zero_x1, log_x2)
    partial_x1g = te.lang.cce.vmul(partial_x1, grad)

    partial_x2 = te.lang.cce.vdiv(x1, x2)
    partial_x2g = te.lang.cce.vmul(partial_x2, grad)

    output_y1 = te.lang.cce.sum(partial_x1g, rx, keepdims=True)
    output_y2 = te.lang.cce.sum(partial_x2g, ry, keepdims=True)

    if dtype == "float16" and fp32_support:
        output_y1 = te.lang.cce.cast_to(output_y1, "float16")
        output_y2 = te.lang.cce.cast_to(output_y2, "float16")
    return output_y1, output_y2