Example #1
0
def mul(l_input, r_input, target=utils.CCE):
    """
    Calculate x * y element-wise.

    Note:
        mul supports broadcasting.

    Args:
        l_input (tvm.tensor.Tensor): Tensor of type float16, float32.
        r_input (tvm.tensor.Tensor): Tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor, has the same type as l_input and r_input.

    Supported Platforms:
        'Ascend', 'GPU', 'CPU'
    """
    utils.check_supported_target(target)
    utils.ops_dtype_check([l_input.dtype, r_input.dtype],
                          utils.DtypeForDavinci.ALL_FLOAT)

    shape1 = [x.value for x in l_input.shape]
    shape2 = [x.value for x in r_input.shape]
    utils.check_shape(shape1)
    utils.check_shape(shape2)
    utils.auto_broadcast_check(shape1, shape2)
    utils.elemwise_dtype_check(l_input.dtype, r_input.dtype)
    output = akg.topi.multiply(l_input, r_input)

    return output
Example #2
0
def Divide(lhs, rhs, target=utils.CCE):
    """
    Calculate divide.

    Args:
        lhs: The left tensor.
        rhs: The right tensor.

    Returns:
        tvm.tensor.Tensor.

    Supported Platforms:
        'Ascend', 'GPU', 'CPU'
    """
    utils.check_supported_target(target)
    if target == utils.CCE:
        return _div_ascend(lhs, rhs)
    shape_l = [x.value for x in lhs.shape]
    shape_r = [x.value for x in rhs.shape]
    utils.check_shape(shape_l)
    utils.check_shape(shape_r)
    utils.auto_broadcast_check(shape_l, shape_r)
    utils.elemwise_dtype_check(lhs.dtype, rhs.dtype)
    output = akg.topi.divide(lhs, rhs)

    return output
Example #3
0
def maximum(data1, data2, target=utils.CCE):
    """
    Take element-wise maximum of two tensors with auto-broadcasting.

    Args:
        data1: tvm.tensor.Tensor
        data2: tvm.tensor.Tensor

    Returns:
        tvm.tensor.Tensor of maximum of two tensors.

    Supported Platforms:
        'Ascend', 'GPU', 'CPU'
    """
    utils.check_supported_target(target)
    shape1 = [x.value for x in data1.shape]
    shape2 = [x.value for x in data2.shape]
    utils.check_shape(shape1)
    utils.check_shape(shape2)
    utils.auto_broadcast_check(shape1, shape2)
    utils.elemwise_dtype_check(data1.dtype, data2.dtype)

    dtype = data1.dtype
    need_cast = True if target == utils.CCE and dtype in ["int8", "uint8"
                                                          ] else False
    if need_cast:
        data1 = Cast(data1, "float16")
        data2 = Cast(data2, "float16")
    res = topi.maximum(data1, data2)
    if need_cast:
        res = Cast(res, dtype)
    return res
Example #4
0
def _div_ascend(data1, data2):
    """
    Calculates x/y, and returns an integer when inputs are all integers.

    When both arguments are integers, use integer division (also known as "floor division").
    When arguments are float numbers, use normal floating point division

    Note:
        div supports broadcasting.

    Args:
        data1 (tvm.tensor.Tensor): Tensor of type float16, float32, int32, int8 and uint8.
        data2 (tvm.tensor.Tensor): Tensor of type float16, float32, int32, int8 and uint8.

    Returns:
        tvm.tensor.Tensor, has the same type as data1 and data2.
    """

    utils.ops_dtype_check([data1.dtype, data2.dtype],
                          utils.DtypeForDavinci.ALL_TYPES)
    utils.elemwise_dtype_check(data1.dtype, data2.dtype)
    dtype = data1.dtype

    shape1 = [x.value for x in data1.shape]
    shape2 = [x.value for x in data2.shape]
    utils.check_shape(shape1)
    utils.check_shape(shape2)

    utils.auto_broadcast_check(shape1, shape2)
    n_shape1, n_shape2, out_shape = produce_shapes(shape1, shape2)
    if n_shape1 != out_shape:
        input1_cast = akg.topi.broadcast_to(data1, out_shape)
    else:
        input1_cast = data1
    if n_shape2 != out_shape:
        input2_cast = akg.topi.broadcast_to(data2, out_shape)
    else:
        input2_cast = data2

    if dtype in ("int32", "int8", "uint8"):
        input1p = Case(input1_cast, "float16", utils.CCE)
        input2p = Cast(input2_cast, "float16", utils.CCE)
    else:
        input1p = input1_cast
        input2p = input2_cast

    if product_is_mini():
        input2p_rec = reciprocal(input2p, target=utils.CCE)
        res = akg.topi.multiply(input1p, input2p_rec)
    else:
        res = akg.topi.divide(input1p, input2p)

    if dtype in ("int8", "uint8"):
        res = floor(res, utils.CCE)
        res = Cast(res, "float16", utils.CCE)
    if dtype in ("int32", "int8", "uint8"):
        res = Cast(res, dtype, utils.CCE)

    return res
Example #5
0
def _add(data1, data2):
    utils.elemwise_dtype_check(data1.dtype, data2.dtype)
    utils.check_shape(data1.shape)
    utils.check_shape(data2.shape)
    utils.auto_broadcast_check(data1.shape, data2.shape)

    res = akg.topi.add(data1, data2)

    return res
Example #6
0
def _pow(data1, data2):
    utils.elemwise_dtype_check(data1.dtype, data2.dtype)
    utils.check_shape(data1.shape)
    utils.check_shape(data2.shape)
    utils.auto_broadcast_check(data1.shape, data2.shape)

    in_dtype = data1.dtype
    if in_dtype == 'float16':
        data1 = akg.topi.cast(data1, 'float32')
        data2 = akg.topi.cast(data2, 'float32')
    res = akg.topi.power(data1, data2)
    if in_dtype == 'float16':
        res = akg.topi.cast(res, 'float16')

    return res
Example #7
0
def fake_quant_with_min_max_vars_per_channel(input_data,
                                             input_min,
                                             input_max,
                                             num_bits=8,
                                             narrow_range=False):
    """
    Generate fake_quantize the input_data for every channel.

    Note:
        For input_data last dim must be equal to d. And need to satisfy: input_min <= 0 <= input_max.

    Args:
        input_data (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [b, d] or [b, h, w, d] or [d].
        input_min (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [d].
        input_max (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [d].
        num_bits (int):  The quantization bits, must be int, defaults to 8.
        narror_range (Union[bool, None]): if True, quant_min equal to 1, else 0, defaults to False.

    Returns:
        tvm.tensor.Tensor of same type and shape as input_data.
    """

    # get shape and check
    shape_inputs = get_shape(input_data)
    shape_min = get_shape(input_min)
    shape_max = get_shape(input_max)
    utils.elemwise_shape_check(shape_min, shape_max)
    utils.auto_broadcast_check(shape_min, shape_inputs)
    if shape_min[0] != shape_inputs[-1]:
        raise RuntimeError(
            "The shapes of min,max and shape_inputs last one dimension should be same!"
        )

    # check dtype
    utils.ops_dtype_check(input_data.dtype, utils.DtypeForDavinci.FLOAT32)
    utils.elemwise_dtype_check(input_min.dtype, input_max.dtype,
                               utils.DtypeForDavinci.FLOAT32)
    # check num_bits range
    if num_bits > 16 or num_bits < 2:
        raise ValueError("numbits should be in range [2, 16]!")

    # get output by fake_quant_with_min_max_vars_per_channel_compute function
    res = fake_quant_with_min_max_vars_per_channel_compute(
        input_data, input_min, input_max, num_bits, narrow_range)
    return res
Example #8
0
def RealDiv(input1, input2, target=utils.CCE):
    """
    Returns input1 / input2 element-wise for real types.

    Note:
        Realdiv supports broadcasting.

    Args:
        input1 (tvm.tensor.Tensor): Tensor of type float16, float32.
        input2 (tvm.tensor.Tensor): Tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor, has the same type of input1 and shaped by broadcasting.
    
    Supported Platforms:
        'Ascend'
    """
    utils.ops_dtype_check([input1.dtype, input2.dtype],
                          utils.DtypeForDavinci.ALL_FLOAT)
    utils.elemwise_dtype_check(input1.dtype, input2.dtype)

    shape1 = [x.value for x in input1.shape]
    shape2 = [x.value for x in input2.shape]
    utils.check_shape(shape1)
    utils.check_shape(shape2)

    utils.auto_broadcast_check(shape1, shape2)
    n_shape1, n_shape2, out_shape = produce_shapes(shape1, shape2)

    if n_shape1 != out_shape:
        input1_cast = akg.topi.broadcast_to(input1, out_shape)
    else:
        input1_cast = input1
    if n_shape2 != out_shape:
        input2_cast = akg.topi.broadcast_to(input2, out_shape)
    else:
        input2_cast = input2

    res = akg.topi.divide(input1_cast, input2_cast)
    return res
Example #9
0
def _pow_ascend(data, scale, target):
    shape1 = [x.value for x in data.shape]
    shape2 = [x.value for x in scale.shape]

    check_list = ["float16", "float32", "int32", "int8", "uint8"]
    dtype = data.dtype
    if not dtype.lower() in check_list:
        raise RuntimeError("tile_cce only support %s while dtype is %s" %
                           (",".join(check_list), dtype))

    shape = [x.value for x in data.shape]
    utils.check_shape(shape)
    utils.auto_broadcast_check(shape1, shape2)
    compute_dtype = "float32"
    if product_is_mini():
        compute_dtype = "float16"
    data = Cast(data, compute_dtype, target)
    scale = Cast(scale, compute_dtype, target)

    c = akg.topi.power(data, scale)
    c = Cast(c, dtype, target)
    return c
Example #10
0
def minimum(input1, input2, target=utils.CCE):
    """
    Return the min value of two tensors element-wise.

    Note:
        minimum supports broadcasting.

    Args:
        input1: Tensor.
        input2: Tensor. Has the same type as input1.

    Returns:
        Tensor, has the same type as inputs.

    Supported Platforms:
        'Ascend', 'GPU', 'CPU'
    """
    utils.check_supported_target(target)
    utils.ops_dtype_check([input1.dtype, input2.dtype],
                          utils.DtypeForDavinci.ALL_TYPES)
    utils.elemwise_dtype_check(input1.dtype, input2.dtype)
    dtype = input1.dtype

    shape1 = [x.value for x in input1.shape]
    shape2 = [x.value for x in input2.shape]
    utils.check_shape(shape1)
    utils.check_shape(shape2)

    utils.auto_broadcast_check(shape1, shape2)

    need_cast = True if target == utils.CCE and dtype in ["int8", "uint8"
                                                          ] else False
    if need_cast:
        input1 = Cast(input1, "float16", target)
        input2 = Cast(input2, "float16", target)
    res = akg.topi.minimum(input1, input2)
    if need_cast:
        res = Cast(res, dtype, target)
    return res
Example #11
0
def div_no_nan(data_x, data_y, target=utils.CCE):
    """
    Returns 0 if the denominator is zero, else, like Div.

    Args:
        data_x (tvm.tensor.Tensor): tensor with type int32/int8/uint8, float16/float32.
        data_y (tvm.tensor.Tensor): tensor with type int32/int8/uint8, float16/float32.

    Returns:
        tvm.tensor.Tensor.
    """
    dtype = data_x.dtype
    if dtype != data_y.dtype:
        raise TypeError("input dtype should be the same")
    utils.ops_dtype_check(dtype, [utils.DtypeForDavinci.ALL_FLOAT, 
                                    utils.DtypeForDavinci.INT8,
                                    utils.DtypeForDavinci.UINT8, 
                                    utils.DtypeForDavinci.INT32])

    utils.check_shape(data_x.shape)
    utils.check_shape(data_y.shape)
    utils.auto_broadcast_check(data_x, data_y)

    # dtype for vsel and vcmp
    if product_is_mini():
        compute_dtype = "float16"
    else:
        compute_dtype = "float32"
 
    # div fp16 y returns 0 if y < 2^-12
    # div fp32 y returns 0 if y < 2^-64
    min_val = tvm.const(2**(-12) if product_is_mini() else 2**(-64),
                        dtype=compute_dtype)    

    tvm_one = tvm.const(1, dtype=compute_dtype)
    tvm_zero = tvm.const(0, dtype=compute_dtype)
    
    if not product_is_mini() and dtype == "float16":
        min_val = tvm.const(2**(-12), "float32")

    data_y_fp32 = akg.lang.ascend.cast_to(data_y, "float32")
    # avoid when y > 2^15 cast from fp32 to fp16 in mini
    clip_y_fp32 = akg.topi.clip(data_y_fp32, -1.0, 1.0)
    abs_clip_y_fp32 = Abs(clip_y_fp32, target)
    y_cmp = akg.lang.ascend.cast_to(abs_clip_y_fp32, compute_dtype) 

    is_zero = tvm.compute(data_y.shape,
                          lambda *i : tvm.expr.Select(
                              y_cmp(*i) < min_val, tvm_one, tvm_zero), 
                          name="is_zero")    
    
    # if fp32 y < 2^-24, cast(y,fp16)==0. to find y in (2^-64, 2^-24): 
    if product_is_mini() and dtype == "float32":
        is_zero = _refine_is_zero(is_zero, abs_clip_y_fp32)
    
    is_zero = akg.lang.ascend.cast_to(is_zero, "float32")
    not_zero = tvm.compute(data_y.shape,
                           lambda *i : (1 - is_zero(*i)).astype("float32"),
                           name="not_zero")    
   
    # replace [x1 x2]/[y1 0] by [x1 0]/[y1 1] 
    data_x = mul(akg.lang.ascend.cast_to(data_x, "float32"), not_zero, target=target)
    data_y = akg.lang.ascend.cast_to(data_y, "float32") + is_zero
    res = Divide(data_x, data_y, target=target)

    if dtype in ("int8", "uint8", "int32"):
        res = akg.lang.ascend.floor(res)
        res = akg.lang.ascend.cast_to(res, dtype)
    else:
        res = akg.lang.ascend.cast_to(res, dtype)
    return res