Beispiel #1
0
def _tan_2x_multi(input_x, times):
    """calculating tan x by calculating tan (x/2^times) and using double angle formula multiple times"""
    # Calculate tan (x/2^times)
    if input_x.dtype == FLOAT_16 and utils.product_is_mini():
        input_x_divide = topi.multiply(input_x, tvm.const(1.0/(2.0**times), FLOAT_16))
        res = _tan_expand(input_x_divide)
    else:
        input_x_divide = topi.multiply(input_x, 1.0/(2.0**times))
        res = _tan_expand(input_x_divide)
    while times != 0:
        # using double angle formula: tan 2x = 2*tan x/(1-tan x*tan x)
        if input_x.dtype == FLOAT_16 and utils.product_is_mini():
            res_numerator = topi.multiply(res, tvm.const(2.0, FLOAT_16))
            tanx_square = topi.multiply(res, res)
            res_denominator = topi.add(topi.multiply(tanx_square, tvm.const(-1.0, FLOAT_16)), tvm.const(1.0, FLOAT_16))
        else:
            res_numerator = topi.multiply(res, 2.0)
            tanx_square = topi.multiply(res, res)
            res_denominator = topi.add(topi.multiply(tanx_square, -1.0), 1.0)

        if utils.product_is_mini():
            res = mul(res_numerator, reciprocal(res_denominator))
        else:
            res = div(res_numerator, res_denominator)
        times = times - 1
    return res
Beispiel #2
0
def matrix_set_diag_compute(input_matrix, input_diagonal, input_help):
    """matrix_set_diag compute implemention"""
    shape_input = get_shape(input_matrix)
    input_dtype = input_matrix.dtype

    if input_dtype == "int8" or input_dtype == "uint8":
        input_matrix = topi.cast(input_matrix, "float16")
        input_diagonal = topi.cast(input_diagonal, "float16")
        input_help = topi.cast(input_help, "float16")
    if input_dtype == "int32" and product_is_mini():
        input_matrix = topi.cast(input_matrix, "float16")
        input_diagonal = topi.cast(input_diagonal, "float16")
        input_help = topi.cast(input_help, "float16")
        input_matrix = topi.cast(input_matrix, "float32")
        input_diagonal = topi.cast(input_diagonal, "float32")
        input_help = topi.cast(input_help, "float32")
    if input_dtype == "int32" and not product_is_mini():
        input_matrix = topi.cast(input_matrix, "float32")
        input_diagonal = topi.cast(input_diagonal, "float32")
        input_help = topi.cast(input_help, "float32")
    diag_tmp = topi.broadcast_to(input_diagonal, shape_input)
    help_tmp = topi.add(input_help, -1)
    help_y = topi.abs(help_tmp)

    res_vmul_x = topi.multiply(input_matrix, help_y)
    res_vmul_y = topi.multiply(diag_tmp, input_help)
    res = topi.add(res_vmul_x, res_vmul_y)

    if input_dtype == "int32" and product_is_mini():
        res = topi.cast(res, "float16")

    res = topi.cast(res, input_dtype)

    return res
Beispiel #3
0
def less(data1, data2):
    """
    compute tensor with smaller value in data1 and data2 elementwisely.

    Args:
        data1 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32.
        data2 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32.

    Returns:
        tvm.tensor.Tensor. If data1 less than data2, return True, else return False.
    """

    vc_util.check_shape(data1.shape)
    vc_util.check_shape(data2.shape)

    # check types
    vc_util.elemwise_dtype_check(
        data1.dtype, data2.dtype,
        [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32])

    # check runtime mode, and change dtype
    if utils.product_is_mini() and data1.dtype != "float16":
        data1 = akg.topi.cast(data1, "float16")
        data2 = akg.topi.cast(data2, "float16")
    if (not utils.product_is_mini()) and data1.dtype == "int32":
        data1 = akg.topi.cast(data1, "float32")
        data2 = akg.topi.cast(data2, "float32")

    res = akg.topi.less(data1, data2)
    return res
Beispiel #4
0
def matrix_diag_part_compute(input_diagonal, input_help):
    """matrix_diag_part compute implemention"""
    shape_input_diagonal = get_shape(input_diagonal)
    dtype_input_diagonal = input_diagonal.dtype
    if dtype_input_diagonal == "int8" or dtype_input_diagonal == "uint8":
        input_diagonal = topi.cast(input_diagonal, "float16")
        input_help = topi.cast(input_help, "float16")
    if dtype_input_diagonal == "int32" and product_is_mini():
        input_diagonal = topi.cast(input_diagonal, "float16")
        input_help = topi.cast(input_help, "float16")
        input_diagonal = topi.cast(input_diagonal, "float32")
        input_help = topi.cast(input_help, "float32")
    if dtype_input_diagonal == "int32" and not product_is_mini():
        input_diagonal = topi.cast(input_diagonal, "float32")
        input_help = topi.cast(input_help, "float32")
    res_vmul = topi.multiply(input_help, input_diagonal)

    if shape_input_diagonal[-2] < shape_input_diagonal[-1]:
        res = topi.sum(res_vmul, -1)
    else:
        res = topi.sum(res_vmul, -2)

    if dtype_input_diagonal == "int32" and product_is_mini():
        res = topi.cast(res, "float16")

    res = topi.cast(res, dtype_input_diagonal)
    return res
Beispiel #5
0
def kldiv_loss_grad(pre_deriv, inputs, outputs):
    """
    do backprop for kldiv loss

    Args:
        pre_deriv (tvm.tensor.Tensor): Gradient tensor for forward output.
        inputs (tvm.tensor.Tensor): Forward input tensor.
        outputs (tvm.tensor.Tensor): Forward output tensor.

    Returns:
        Gradient tensor for forward input.
    """
    inputs_dtype = inputs.dtype
    target_dtype = outputs.dtype
    pre_deriv_dtype = pre_deriv.dtype
    utils.ops_dtype_check([inputs_dtype, target_dtype, pre_deriv_dtype],
                            utils.DtypeForDavinci.ALL_FLOAT)

    if get_const_tuple(outputs.shape) != get_const_tuple(inputs.shape):
        raise RuntimeError("Please ensure inputs have the same size."
                           "", outputs.shape, inputs.shape)

    inputs_dtype_old = inputs_dtype

    if product_is_mini() and inputs_dtype == 'float32':
        inputs = akg.topi.cast(inputs, "float16")
        outputs = akg.topi.cast(outputs, "float16")
        inputs_dtype = "float16"

    cur_deriv = akg.topi.divide(outputs, inputs)
    cur_deriv = akg.topi.multiply(cur_deriv, pre_deriv)
    if product_is_mini() and inputs_dtype_old == 'float32':
        cur_deriv = akg.topi.cast(cur_deriv, inputs_dtype_old)
    return cur_deriv
Beispiel #6
0
def exp(in_data):
    """
    Compute exponential of in_data element-wise

    :math:`exp^x`

    Args:
        in_data (tvm.tensor.Tensor): Tensor of type float16, float32.

    Rerurns:
        tvm.tensor.Tensor of same type and shape as in_data.

    Raises:
        ValueError: If the type of input is invalid.
    """
    dtype = in_data.dtype
    vc_util.check_shape(in_data.shape)
    if dtype == "float32" and utils.product_is_mini():
        in_data = akg.tvm.compute(
            in_data.shape,
            lambda *indice: in_data(*indice).astype("float16"),
            name='type_cast')

    output = akg.tvm.compute(in_data.shape,
                             lambda *index: akg.tvm.exp(in_data(*index)),
                             name='exp')

    if dtype == "float32" and utils.product_is_mini():
        output = akg.tvm.compute(
            in_data.shape,
            lambda *indice: output(*indice).astype("float32"),
            name='res')

    return output
Beispiel #7
0
def tan_compute(input_x):
    """tan compute implemention"""
    dtype = input_x.dtype

    # cast to type float32 when type is float16 in cloud and mini, or int32 in cloud
    if dtype == FLOAT_16 or dtype == FLOAT_32 or (dtype == INT_32
                                                  and not product_is_mini()):
        input_x = topi.cast(input_x, FLOAT_32)
        # adjust x to [-pi/2,pi/2] using x = x-round(x/pi)*pi
        round_pi_div = akg.lang.ascend.round(
            topi.multiply(input_x, tvm.const(1.0 / PI, FLOAT_32)))
        round_pi_div = akg.lang.ascend.cast_to(round_pi_div, FLOAT_32)
        input_x = topi.subtract(
            input_x, topi.multiply(round_pi_div, tvm.const(PI, FLOAT_32)))
    # cast to type float16 when type is int32 in mini
    elif dtype == INT_32 and product_is_mini():
        input_x = topi.cast(input_x, FLOAT_16)
        # adjust x to [-pi/2,pi/2] using x = x-round(x/pi)*pi
        round_pi_div = akg.lang.ascend.round(
            topi.multiply(input_x, tvm.const(1.0 / PI, FLOAT_16)))
        round_pi_div = akg.lang.ascend.cast_to(round_pi_div, FLOAT_16)
        input_x = topi.subtract(
            input_x, topi.multiply(round_pi_div, tvm.const(PI, FLOAT_16)))

    res = _tan_2x_multi(input_x, TAN_2X_TIMES)
    # cast the dtype to original dtype
    res = topi.cast(res, dtype)
    return res
Beispiel #8
0
def less(data1, data2, target=utils.CCE):
    """
    compute tensor with smaller value in data1 and data2 elementwisely.

    Args:
        data1 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32.
        data2 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32.

    Returns:
        tvm.tensor.Tensor. If data1 less than data2, return True, else return False.

    Supported Platforms:
        'Ascend', 'GPU', 'CPU'
    """
    utils.check_supported_target(target)
    utils.check_shape(data1.shape)
    utils.check_shape(data2.shape)

    # check types
    if target == utils.CCE:
        utils.elemwise_dtype_check(
            data1.dtype, data2.dtype,
            [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32])
        # check runtime mode, and change dtype
        if product_is_mini() and data1.dtype != "float16":
            data1 = akg.topi.cast(data1, "float16")
            data2 = akg.topi.cast(data2, "float16")
        if (not product_is_mini()) and data1.dtype == "int32":
            data1 = akg.topi.cast(data1, "float32")
            data2 = akg.topi.cast(data2, "float32")
    res = akg.topi.less(data1, data2)
    return res
Beispiel #9
0
def tanh_ad(head, in_data):
    """
    Compute gradient of tanh operator using automatic differentiate.

    Args:
        head (tvm.tensor.Tensor): Tensor of type float16, float32.
        in_data (tvm.tensor.Tensor): Tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor has the same shape as input.
    """
    in_dtype = in_data.dtype

    # On cloud environment, cast data type from 'float16' to 'float32',
    # then cast result back to 'float16', could achieve higher precision.
    if in_dtype == 'float16' and not utils.product_is_mini():
        in_data = akg.topi.cast(in_data, "float32")
        head = akg.topi.cast(head, "float32")

    out_data = tanh.tanh(in_data)
    jacs = list(akg.differentiate(out_data, [in_data], head))
    jacs_res = jacs[0]
    if in_dtype == 'float16' and not utils.product_is_mini():
        jacs_res = akg.topi.cast(jacs_res, 'float16')
    return jacs_res
Beispiel #10
0
def _log_ascend(data):
    """
    Compute natural logarithm of x element-wise.

    Args:
        data (tvm.tensor.Tensor): Tensor of type float16, float32, int8, uint8, int32.

    Returns:
        tvm.tensor.Tensor of same type and shape as data
    """

    in_data = data
    dtype = in_data.dtype
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)

    if dtype == "float32" and product_is_mini():
        in_data = akg.tvm.compute(
            in_data.shape,
            lambda *indice: in_data(*indice).astype("float16"),
            name='type_cast')

    output = akg.tvm.compute(in_data.shape,
                             lambda *index: akg.tvm.log(in_data(*index)),
                             name='log')

    if dtype == "float32" and product_is_mini():
        output = akg.tvm.compute(
            in_data.shape,
            lambda *indice: output(*indice).astype("float32"),
            name='res')

    return output
Beispiel #11
0
def _equal_ascend(input1, input2, target=utils.CCE):
    # check shapes
    shape1 = [x.value for x in input1.shape]
    shape2 = [x.value for x in input2.shape]
    shapes = [shape1, shape2]
    for _, shp in enumerate(shapes):
        utils.check_shape(shp)

    utils.ops_dtype_check([input1.dtype, input2.dtype],
                            [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32,
                             utils.DtypeForDavinci.INT8, utils.DtypeForDavinci.UINT8])

    dtype = input1.dtype
    orig_dtype = dtype
    if product_is_mini() and dtype != "float16":
        dtype = "float16"
    if (not product_is_mini()) and dtype not in ("float16", "float32"):
        # for int32, if cast to float16, there may be overflow
        dtype = "float32"

    if orig_dtype == "float32" and dtype == "float16":
        input_sub = sub(input1, input2, target)
        input_sub = Cast(input_sub, dtype, target)
        zero = akg.tvm.const(0.0, dtype)
        res = akg.topi.equal(input_sub, zero)
    else:
        input1 = Cast(input1, dtype, target)
        input2 = Cast(input2, dtype, target)
        res = akg.topi.equal(input1, input2)
    return res
Beispiel #12
0
def asinh(x, target=utils.CCE):
    r"""
    Compute asinh function.

    .. math:: asinh(x) = log(x+\sqrt{x*x+1})

    Args:
        x (tvm.tensor.Tensor): Tensor of type float16, float32. 

    Returns:
       tvm.tensor.Tensor, has the same type and shape as x.
    
    Supported Platforms:
        'Ascend'
    """
    # check shape
    utils.check_shape(x)

    # check input tensor data_type
    utils.ops_dtype_check(x.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    dtype = x.dtype

    # Known that, asinh(x) = log(x + sqrt(x*x+1)), and, asinh(-x) = -asinh(x)
    # If x is a large negative number, (x + sqrt(x*x+1)) will be close to zero.
    # So, asinh(x) = sign(x) * log(|x| + sqrt(|x|*|x| + 1))
    compute_dtype = dtype
    if dtype == "float16":
        # To avoid overflow and higher accuracy, x is casted to float32
        compute_dtype = "float32"
        x = topi.cast(x, compute_dtype)

    x_abs = topi.abs(x)

    if product_is_mini():
        # sqrt(|x|*|x| + 1) = |x| * sqrt(1 + 1/(|x|*|x|))
        vsquare_add_one = topi.add(1,
                                   topi.divide(1, topi.multiply(x_abs, x_abs)))
        sqrt_compute_value = sqrt_mini_newton_iter_impl(vsquare_add_one)
        sqrt_value = topi.multiply(x_abs, sqrt_compute_value)
    else:
        x_abs_square_add_one = topi.add(topi.multiply(x_abs, x_abs), 1)
        sqrt_value = topi.sqrt(x_abs_square_add_one)

    x_add_sqrt = topi.add(x_abs, sqrt_value)

    if product_is_mini():
        log_value = log_compute_mini_impl(x_add_sqrt, target)
    else:
        log_value = topi.log(x_add_sqrt)

    res = topi.multiply(Sign(x, target), log_value)

    if res.dtype != dtype:
        res = topi.cast(res, dtype)

    if product_is_mini():
        attrs = {"enable_auto_inline": False}
        return res, attrs
    return res
Beispiel #13
0
def xlogy_grad_compute(placeholders, shape_max, dtype, rx, ry):
    """
    do element-wise xlogy_grad compute

    Args:
        placeholders (Union[list, typle]): the placeholder of data input
        shape_max (Union[list, typle]): the shape of broadcast
        dtype (string): the type of data input
        rx (list): the reduction indices of data input with broadcast
        ry (list): the reduction indices for data input with broadcast

    Returns
        output_y1 (tvm.tensor.Tensor): result of xlogy_grad
        output_y2 (tvm.tensor.Tensor): result of xlogy_grad
    """
    x1_ori = placeholders[0]
    x2_ori = placeholders[1]
    grad_ori = placeholders[2]

    if dtype == "float16":
        x1 = akg.lang.cce.cast_to(x1_ori, "float32")
        x2 = akg.lang.cce.cast_to(x2_ori, "float32")
        grad = akg.lang.cce.cast_to(grad_ori, "float32")
        x1 = akg.lang.cce.broadcast(x1, shape_max)
        x2 = akg.lang.cce.broadcast(x2, shape_max)
        grad = akg.lang.cce.broadcast(grad, shape_max)
    else:
        x1 = akg.lang.cce.broadcast(x1_ori, shape_max)
        x2 = akg.lang.cce.broadcast(x2_ori, shape_max)
        grad = akg.lang.cce.broadcast(grad_ori, shape_max)

    esp_min = tvm.const(1.18e-38, dtype="float32")
    x1_addespmin = akg.lang.cce.vadds(x1, esp_min)

    if utils.product_is_mini():
        not_zero_x1 = akg.lang.cce.vmul(x1, reciprocal(x1_addespmin))
        log_x2 = tvm.compute(
            x2.shape,
            lambda *i: (tvm.log(x2(*i).astype("float16"))).astype("float32"),
            name="log_x2")
    else:
        not_zero_x1 = div(x1, x1_addespmin)
        log_x2 = akg.lang.cce.vlog(x2)

    partial_x1 = akg.lang.cce.vmul(not_zero_x1, log_x2)
    partial_x1g = akg.lang.cce.vmul(partial_x1, grad)

    partial_x2 = div(x1, x2) if not utils.product_is_mini() else \
        akg.lang.cce.vmul(x1, reciprocal(x2))
    partial_x2g = akg.lang.cce.vmul(partial_x2, grad)

    output_y1 = akg.lang.cce.sum(partial_x1g, rx, keepdims=True)
    output_y2 = akg.lang.cce.sum(partial_x2g, ry, keepdims=True)

    if dtype == "float16":
        output_y1 = akg.lang.cce.cast_to(output_y1, "float16")
        output_y2 = akg.lang.cce.cast_to(output_y2, "float16")
    return output_y1, output_y2
Beispiel #14
0
def kldiv_loss(inputs, outputs, reduction='none'):
    """
    Computes Kullback-Leibler divergence loss between outputs and inputs.

    In default, loss = outputs*(log(outputs) - log(inputs)),
    the way using to reduce loss is defined in reduction

    Args:
        inputs (tvm.tensor.Tensor): Tensor with type float16, float32
        outputs (tvm.tensor.Tensor): Tensor with same type as inputs.
        reduction (str): uses one of ['sum', 'mean', 'batchmean']

    Returns:
        Tensor with same type as input tensors.
    """

    inputs_dtype = inputs.dtype
    target_dtype = outputs.dtype
    utils.ops_dtype_check([inputs_dtype, target_dtype],
                          utils.DtypeForDavinci.ALL_FLOAT)

    if get_const_tuple(outputs.shape) != get_const_tuple(inputs.shape):
        raise RuntimeError("Please ensure inputs have the same size.",
                           outputs.shape, inputs.shape)

    inputs_dtype_old = inputs_dtype

    if product_is_mini() and inputs_dtype == 'float32':
        inputs = akg.topi.cast(inputs, "float16")
        outputs = akg.topi.cast(outputs, "float16")
        inputs_dtype = "float16"

    log_inputs = akg.topi.log(inputs)
    log_target = akg.topi.log(outputs)
    loss = akg.topi.subtract(log_target, log_inputs)
    loss = akg.topi.multiply(outputs, loss)
    if reduction == 'sum':
        loss = akg.topi.sum(loss)
    if reduction == 'mean':
        loss = akg.topi.sum(loss)
        deno = 1.0
        for num in inputs.shape:
            deno = deno * num
        deno = akg.topi.cast(deno, dtype=inputs_dtype)
        loss = akg.topi.divide(loss, deno)
    if reduction == 'batchmean':
        reduce_axis = tuple(numpy.arange(1, len(inputs.shape)))
        loss = akg.topi.sum(loss, axis=reduce_axis, keepdims=False)
        deno = 1.0
        for num in inputs.shape[1:]:
            deno = deno * num
        deno = akg.topi.cast(deno, dtype=inputs_dtype)
        loss = akg.topi.divide(loss, deno)

    if product_is_mini() and inputs_dtype_old == 'float32':
        loss = akg.topi.cast(loss, inputs_dtype_old)
    return loss
Beispiel #15
0
 def select(l1, tmp_val, gradient_accum):
     """Returns tmp_val if l1 > 0 else gradient_accum."""
     if product_is_mini():
         l1 = topi.cast(l1, "float16")
         tmp_val = topi.cast(tmp_val, "float16")
         gradient_accum = topi.cast(gradient_accum, "float16")
     tmp_val = akg.tvm.compute(
         tmp_val.shape, lambda *i: tvm.expr.Select(l1[0] > 0, tmp_val(*i),
                                                   gradient_accum(*i)))
     return topi.cast(tmp_val, "float32") if product_is_mini() else tmp_val
Beispiel #16
0
def _exp_ascend(in_data):
    dtype = in_data.dtype
    utils.check_shape(in_data.shape)
    if dtype == "float32" and product_is_mini():
        in_data = akg.tvm.compute(in_data.shape, lambda *indice: in_data(*indice).astype("float16"), name='type_cast')

    output = akg.tvm.compute(in_data.shape, lambda *index: akg.tvm.exp(in_data(*index)), name='exp')

    if dtype == "float32" and product_is_mini():
        output = akg.tvm.compute(in_data.shape, lambda *indice: output(*indice).astype("float32"), name='res')

    return output
Beispiel #17
0
def Tanh(in_data, target=utils.CCE):
    """
    Compute tanh function. This version is able to avoid exp(x) overflow when x is large.

    ..math:`res = sign(in_data) * (1 - exp(-2*abs(in_data))) / (1 + exp(-2*abs(in_data)))`

    Args:
        in_data (tvm.tensor.Tensor): input tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor, has the same type and shape as in_data.
    
    Supported Platforms:
        'Ascend'
    """

    utils.check_shape(in_data.shape)

    dtype = in_data.dtype
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)
    ori_dtype = dtype
    in_data_compute = in_data
    if ori_dtype == "float32" and product_is_mini():
        in_data_compute = akg.tvm.compute(in_data.shape, lambda *indice: in_data(* \
                                          indice).astype("float16"), name='type_cast')
        dtype = 'float16'

    in_data_abs = akg.lang.ascend.vabs(in_data_compute)
    exponent = akg.lang.ascend.vmuls(in_data_abs, akg.tvm.const(-2, dtype))
    exp_value = akg.lang.ascend.vexp(exponent)

    exp_value_add_one = akg.lang.ascend.vadds(exp_value,
                                              akg.tvm.const(1, dtype))
    one_sub_exp_value = akg.topi.subtract(akg.tvm.const(1, dtype), exp_value)
    exp_value_add_one_rec = RecPositive(exp_value_add_one, target)
    tanh_value_pos = akg.topi.multiply(one_sub_exp_value,
                                       exp_value_add_one_rec)
    output_shape = in_data_compute.shape
    sign = akg.tvm.compute(
        output_shape, lambda *indice: akg.tvm.expr.Select(
            in_data_compute(*indice) < akg.tvm.const(0, dtype),
            akg.tvm.const(-1, dtype), akg.tvm.const(1, dtype)))

    tanh_value = akg.topi.multiply(sign, tanh_value_pos)
    if ori_dtype == "float32" and product_is_mini():
        tanh_value = akg.tvm.compute(
            tanh_value.shape,
            lambda *indice: tanh_value(*indice).astype("float32"),
            name='res')

    return tanh_value
Beispiel #18
0
def acosh(x, target=utils.CCE):
    r"""
    Compute acosh function.

    .. math:: acosh(x) = log(x+\sqrt{x*x-1})

    Args:
        x (tvm.tensor.Tensor): Tensor of type float16, float32. Each entry
        in it must be in `[1, inf)`

    Returns:
       tvm.tensor.Tensor, has the same type and shape as x.
    
    Supported Platforms:
        'Ascend'
    """
    # check shape
    utils.check_shape(x)

    # check input tensor data_type
    utils.ops_dtype_check(x.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    dtype = x.dtype

    if dtype == "float16":
        # To avoid overflow and higher accuracy, x is casted to float32
        x = akg.topi.cast(x, "float32")

    """acosh(x) = log(x + sqrt(x*x-1))"""
    x_square = akg.topi.multiply(x, x)
    x_square_sub = akg.topi.subtract(x_square, 1)

    if product_is_mini():
        sqrt_value = _sqrt_mini_vsqrt_newton_iter(x_square_sub)
    else:
        sqrt_value = akg.topi.sqrt(x_square_sub)

    sqrt_add = akg.topi.add(sqrt_value, x)

    if product_is_mini():
        res = log_compute_mini_impl(sqrt_add, target)
    else:
        res = akg.topi.log(sqrt_add)

    if res.dtype != dtype:
        res = akg.topi.cast(res, dtype)

    if product_is_mini():
        attrs = {"enable_auto_inline": False}
        return res, attrs
    return res
Beispiel #19
0
def cosh_call(x):
    """Compute cosh by the call method."""
    dtype = x.dtype
    shape = get_shape(x)
    # in order to get the precise calcuate result
    if product_is_mini() and dtype == "float32":
        x = akg.lang.ascend.cast_to(x, "float16")

    res = akg.tvm.compute(shape, lambda *indice: akg.lang.ascend.cosh(x(*indice)), name="res")

    if product_is_mini() and dtype == "float32":
        res = akg.lang.ascend.cast_to(res, "float32")

    return res, get_attrs()
Beispiel #20
0
def l1_loss_grad(pre_deriv, inputs, target):
    """
    do backprop for L1 loss (MAE)
    """
    inputs_dtype = inputs.dtype
    target_dtype = target.dtype
    pre_deriv_dtype = pre_deriv.dtype

    # check inputs data types
    check_list = ["float16", "float32"]
    if not inputs_dtype.lower() in check_list:
        raise RuntimeError("inputs only support %s while dtype is %s" % (
            ",".join(check_list), inputs_dtype))

    if not target_dtype.lower() in check_list:
        raise RuntimeError("target only support %s while dtype is %s" % (
            ",".join(check_list), target_dtype))

    if not pre_deriv_dtype.lower() in check_list:
        raise RuntimeError("prev Derivative only support %s while dtype is %s" % (
            ",".join(check_list), pre_deriv_dtype))

    if not get_const_tuple(target.shape) == get_const_tuple(inputs.shape):
        raise RuntimeError(
            "Please ensure inputs have the same size.", target.shape, prediction.shape)

    inputs_dtype_old = inputs_dtype

    if utils.product_is_mini() and inputs_dtype == 'float32':
        inputs = akg.topi.cast(inputs, "float16")
        target = akg.topi.cast(target, "float16")
        inputs_dtype = "float16"

    def grad_dsl(inputs, target, pre_deriv):
        # do roadcast outside, cause tvm need shape check;if shape not fix how to check
        #pre_deriv = akg.topi.broadcast_to(pre_deriv, inputs.shape)
        coefficient = akg.tvm.const(-1.0, dtype=inputs_dtype)
        res = akg.tvm.compute(inputs.shape,
                          lambda *i: akg.tvm.if_then_else(
                              inputs(*i) >= target(*i),
                              pre_deriv(*i), coefficient * pre_deriv(*i))
                          )
        return res

    cur_deriv = grad_dsl(inputs, target, pre_deriv)
    if utils.product_is_mini() and inputs_dtype_old == 'float32':
        cur_deriv = akg.topi.cast(cur_deriv, inputs_dtype_old)
    return cur_deriv
Beispiel #21
0
def smooth_l1_loss_grad_run(shape, dtype, attrs=None, kernel_name="smooth_l1_loss_grad"):
    assert len(shape) >= 2, "last dimension of the shape will be reduced, so the shape length should be >= 2"
    sample_shape = shape[:-1]

    anchor_samples_dtype = "int32"
    # sigma is a constant parameter
    sigma = 1.0
    anchor_sample_correct = 0

    if not utils.product_is_mini():
        attrs['enable_align_fix'] = True
        attrs['enable_multicore'] = True

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(smooth_l1_loss_grad.smooth_l1_loss_grad, [sample_shape, shape, shape, sample_shape],
                                  [dtype, dtype, dtype, anchor_samples_dtype], op_attrs=[sigma, anchor_sample_correct],
                                  attrs=attrs, kernel_name=kernel_name, dump_code=True, tuning=t)
        if t:
            anchor_samples, dloss, expect, output, prediction, prediction_, target, target_ = gen_data(
                anchor_sample_correct, anchor_samples_dtype, dtype, sample_shape, shape, sigma)
            return mod, expect, (dloss, prediction, target, anchor_samples, output)
        else:
            return mod
    else:
        anchor_samples, dloss, expect, output, prediction, prediction_, target, target_ = gen_data(
            anchor_sample_correct, anchor_samples_dtype, dtype, sample_shape, shape, sigma)
        mod = utils.op_build_test(smooth_l1_loss_grad.smooth_l1_loss_grad,
                                  [sample_shape, shape, shape, sample_shape],
                                  [dtype, dtype, dtype, anchor_samples_dtype], op_attrs=[sigma, anchor_sample_correct],
                                  attrs=attrs, kernel_name=kernel_name, dump_code=True)
        output = utils.mod_launch(mod, (dloss, prediction, target, anchor_samples, output), expect=expect)
        return (dloss, prediction, target, anchor_samples), output, expect, compare_tensor(output, expect, atol=5e-3,
                                                                                           rtol=5e-3)
Beispiel #22
0
def _asin_grad_compute(x, dy):
    """Compute asin_grad."""

    dtype = x.dtype
    if dtype == "float16":
        x = topi.cast(x, "float32")
        dy = topi.cast(dy, "float32")

    # step 1: calculate num_to_vrsqrt = 1 - x^2
    data = topi.multiply(x, x)
    data = topi.multiply(data, tvm.const(-1, "float32"))
    num_to_vrsqrt = topi.add(data, tvm.const(1, "float32"))

    # step 2: calculate dy * (1 / sqrt(1 - x^2))
    if utils.product_is_mini():
        # mini: use newton's method for high accuracy result
        res = _vrsqrt_newton(num_to_vrsqrt)
        res = topi.multiply(res, dy)
    else:
        # cloud: use vdiv for high efficiency computation
        vsqrt_res = topi.sqrt(num_to_vrsqrt)
        res = topi.divide(dy, vsqrt_res)

    if dtype == "float16":
        res = topi.cast(res, "float16")

    return res
Beispiel #23
0
def truncate_div(input_x1, input_x2):
    """
    Calculating data's truncate_div, res = floor(x1/x2) if x1/x2>0 else ceil(x1/x2).

    Args:
        input_x1 (tvm.tensor.Tensor): Input tensor, support float16,
                                      float32 on mini device, while support
                                      int32, int8, uint8, float16, float32 on
                                      cloud ones.
        input_x2 (tvm.tensor.Tensor): Input tensor, with same dtype as input_x1.
    Returns:
        A tvm.tensor.Tensor as result of truncate_div.
    """
    vc_util.check_shape(get_shape(input_x1))
    vc_util.check_shape(get_shape(input_x2))
    vc_util.elemwise_dtype_check(input_x1.dtype, input_x2.dtype)
    vc_util.ops_dtype_check(
        input_x1.dtype,
        (vc_util.DtypeForDavinci.ALL_FLOAT) if utils.product_is_mini() \
            else (vc_util.DtypeForDavinci.ALL_FLOAT,
                  vc_util.DtypeForDavinci.INT32,
                  vc_util.DtypeForDavinci.INT8,
                  vc_util.DtypeForDavinci.UINT8))

    return truncate_div_compute(input_x1, input_x2)
Beispiel #24
0
def floordiv(data1, data2):
    """
    Calculate x/y, and always returns an integer which is floored.

    Args:
        data1 (tvm.tensor.Tensor): Tensor of type float16, float32.
        data2 (tvm.tensor.Tensor): Tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor, has type of int32.
    """
    vc_util.ops_dtype_check([data1.dtype, data2.dtype],
                            vc_util.DtypeForDavinci.ALL_FLOAT)
    shape1 = [x.value for x in data1.shape]
    vc_util.check_shape(shape1)
    shape2 = [x.value for x in data2.shape]
    vc_util.check_shape(shape2)

    if utils.product_is_mini():
        rec = reciprocal(data2, high_precision=True)
        res = data1 * rec
    else:
        res = akg.topi.divide(data1, data2)
    res = akg.lang.cce.floor(res)
    return res
Beispiel #25
0
def reciprocal(data, high_precision=True):
    """
    Computes the reciprocal of data element-wise.

    Args:
        data (list[tvm.tensor.Tensor]): a list of tvm.tensor.Tensor of type float16, float32.
        high_precision (bool): a bool value, whether to use high-precision version.

    Returns:
        tvm.tensor.Tensor of same type and shape as data.
    """

    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    shape = [x.value for x in data.shape]
    vc_util.check_shape(shape)

    res = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(1, data.dtype) / (data(*indice)), name="res")

    # When product is mini, using Newtom iteration method to achieve higher precision.
    if utils.product_is_mini() and high_precision:
        steps = 1
        for _ in range(steps):
            temp1 = data * res
            temp2 = temp1 * akg.tvm.const(-1, data.dtype)
            temp3 = temp2 + akg.tvm.const(2, data.dtype)
            res = temp3 * res

    return res
Beispiel #26
0
def _bessel_i1e_compute(input_data):
    """bessel i1e compute"""

    shape = vc_util.get_shape(input_data)
    dtype = input_data.dtype

    # chose the type of data in begin
    if dtype == "float16":
        input_data = cast(input_data, "float32")

    abs_data = abs_value(input_data)
    # compute bessel_i1e for data in (-3.75, 3.75)
    before_res = _before_res_compute(abs_data)
    # compute bessel_i1e for data in other domain
    after_res = _after_res_compute(abs_data)

    # As vcmp_lt and vsel instruction don't support fp32 on mini
    # It can be simplified by some methods, such as , "auto cast"
    if utils.product_is_mini():
        res = akg.tvm.compute(
            shape, lambda *indice: akg.tvm.expr.Select(
                abs_data[indice].astype("float16") < akg.tvm.const(
                    CONST_LIMIT, "float16"), before_res[indice].astype(
                        "float16"), after_res[indice].astype("float16")))
        res = cast(res, "float32")
    else:
        res = akg.tvm.compute(
            shape,
            lambda *indice: akg.tvm.expr.Select(abs_data[
                indice] < CONST_LIMIT, before_res[indice], after_res[indice]))
    data_sign = sign(input_data)
    res = mul(res, data_sign)
    if dtype == "float16":
        res = cast(res, "float16")
    return res
Beispiel #27
0
def floor_div(data1, data2, target=utils.CCE):
    """
    Calculate x/y, and always returns an integer which is floored.

    Args:
        data1 (tvm.tensor.Tensor): Tensor of type float16, float32.
        data2 (tvm.tensor.Tensor): Tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor, has type of int32.

    Supported Platforms:
        'Ascend'
    """
    utils.ops_dtype_check([data1.dtype, data2.dtype],
                          utils.DtypeForDavinci.ALL_FLOAT)
    shape1 = [x.value for x in data1.shape]
    utils.check_shape(shape1)
    shape2 = [x.value for x in data2.shape]
    utils.check_shape(shape2)

    if product_is_mini():
        rec = reciprocal(data2, high_precision=True, target=target)
        res = data1 * rec
    else:
        res = akg.topi.divide(data1, data2)
    res = akg.lang.ascend.floor(res)
    return res
Beispiel #28
0
def _atan_compute(data):
    """compute for atan"""
    dtype = data.dtype

    if dtype == "float16":
        data = topi.cast(data, "float32")

    abs_data = topi.abs(data)
    tensor_one = dc.one_const(abs_data.dtype)

    abs_data_sub_one = topi.subtract(abs_data, tensor_one)
    abs_data_add_one = topi.add(abs_data, tensor_one)
    abs_data2 = topi.abs(topi.divide(abs_data_sub_one, abs_data_add_one))

    # calucate data less than one
    res = _do_atan_taylor(abs_data)
    # calucate data more than one
    res_mt_one = topi.add(_do_atan_taylor(abs_data2),
                          tvm.const(CONST_PI_BY_FOUR, abs_data2.dtype))
    res = topi.minimum(res, res_mt_one)

    if utils.product_is_mini() and data.dtype == "float32":
        sign_mask = topi.cast(topi.sign(topi.cast(data, "float16")), "float32")
    else:
        sign_mask = topi.sign(data)

    res = topi.multiply(res, sign_mask)

    if dtype == "float16":
        res = topi.cast(res, "float16")

    return res
Beispiel #29
0
def compute_blockdim(shape, axis, dtype):
    # strategy: all the shape except reduce axis can be used for multicore
    blockdim_limit = 2 if utils.product_is_mini() else 32
    blockdim = 1
    if isinstance(shape, int):
        shape = [shape]
    if not isinstance(axis, list):
        axis = list(axis)
    for a in axis:
        if a < 0:
            a += len(shape)
    axis = sorted(axis)
    red_sh = 1
    if isinstance(shape, (list, tuple)):
        for i, sh in enumerate(shape):
            if not isinstance(sh, int):
                raise TypeError(
                    "Shape to compute blockdim must be a list/tuple of integer"
                )
            if i in axis:
                red_sh *= sh
            else:
                blockdim = blockdim * sh
    else:
        raise TypeError(
            "Shape to compute blockdim must be a list/tuple of integer")
    if red_sh < 32 / get_bytes(dtype):
        # when reduce axis is too small, multicore may not always increase performace
        blockdim = 1

    return min(blockdim_limit, blockdim)
Beispiel #30
0
def atanh(input_data):
    """
    Return atanh(x)=0.5*ln((1+x)/(1-x)) if abs(x)<1.

    Args:
        input_data (tvm.tensor.Tensor): Input tensor, only support float16, float32.

    Returns:
        A tvm.tensor.Tensor as result of atanh.

    Supported Platforms:
        'Ascend'
    """
    shape = get_shape(input_data)
    utils.check_shape(shape)

    inp_dtype = input_data.dtype
    utils.ops_dtype_check(inp_dtype, utils.DtypeForDavinci.ALL_FLOAT)

    if inp_dtype == "float16":
        input_data = topi.cast(input_data, "float32")

    if product_is_mini():
        res = _compute_mini(input_data, shape)
    else:
        res = _compute_cloud(input_data)

    res = topi.cast(res, inp_dtype)

    return res