コード例 #1
0
def apply_rms_prop_mixed_precision(var, ms, mom, grad, lr, momentum, rho,
                                   epsilon):
    """
    Mixed precision version for apply_rms_prop.

    Args:
        var (tvm.tensor.Tensor): The tensor to be updated. Should be float32.
        ms (tvm.tensor.Tensor): Mean square, a tensor of same shape and type as var.
        mom (tvm.tensor.Tensor): A tensor of same shape and type as var.
        grad (tvm.tensor.Tensor): A tensor of same shape and type as var.
        lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var.
        momentum (float): Coefficient for calculate new mom, 0.0 <= momentum <= 1.0.
        rho (float): Coefficient for calculate new ms, 0.0 <= rho <= 1.0.
        epsilon (float): A small value to prevent division by 0.

    Returns:
        tvm.tensor.Tensor, Updated var of type float32.
        tvm.tensor.Tensor, Updated var of type float16.
        tvm.tensor.Tensor, Updated ms.
        tvm.tensor.Tensor, Updated mom.
    """

    vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.FLOAT32)
    _apply_rms_prop_check(var, ms, mom, grad, lr, momentum, rho, epsilon)

    out_var, out_var_fp16, out_ms, out_mom = _apply_rms_prop_mixed_precision_compute(
        var, ms, mom, grad, lr, momentum, rho, epsilon)
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_ms, binds_info2 = TensorUtils.inplace_set(ms, out_ms, "ms_buf")
    out_mom, binds_info3 = TensorUtils.inplace_set(mom, out_mom, "mom_buf")
    binds_info.update(binds_info2)
    binds_info.update(binds_info3)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_var_fp16, out_ms, out_mom, attrs
コード例 #2
0
def gather(params_shape,
           indices_shape,
           params_dtype,
           indices_dtype,
           axis,
           kernel_name,
           cce_path="./"):
    """Gather data by indices"""
    vc_util.check_shape(params_shape, length=2)
    vc_util.check_shape(indices_shape, length=1)
    vc_util.ops_dtype_check(params_dtype, vc_util.DtypeForDavinci.ALL_TYPES)
    vc_util.ops_dtype_check(indices_dtype, vc_util.DtypeForDavinci.INT32)
    vc_util.check_equal("axis", "zero", axis, 0)

    # construct compute
    o_shape = (indices_shape[0], params_shape[1])
    xx = akg.tvm.placeholder(params_shape, dtype=params_dtype, name="X")
    yy = akg.tvm.placeholder(indices_shape, dtype=indices_dtype, name="Y")
    res = akg.tvm.extern(o_shape, [xx, yy],
                         lambda ins, outs: kernel_ir(outs[0], ins[0], ins[1]),
                         name="res",
                         dtype=params_dtype)
    s = akg.tvm.create_schedule(res.op)

    # create cce
    attrs = {"enable_multicore": False}
    with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True):
        mod = akg.build(s, [xx, yy, res], "cce", name=kernel_name, attrs=attrs)

    source_code = mod.imported_modules[0].get_source()
    utils.create_code(kernel_name, cce_path, source_code)

    return mod
コード例 #3
0
def dropout_do_mask(data_tensor, data_mask, keep_prob):
    dtype = data_tensor.dtype
    shape_tensor = [x.value for x in data_tensor.shape]
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.check_shape(shape_tensor)

    strides = [1]
    for x in reversed(shape_tensor):
        strides.append(strides[-1] * x)

    if keep_prob < 0 or keep_prob > 1:
        raise RuntimeError("keep_prob must in [0,1]")

    keep_prob_const = akg.tvm.const(1.0 / keep_prob, dtype=dtype)
    data_scale_ub = akg.tvm.compute(
        shape_tensor,
        lambda *indices: data_tensor(*indices) * keep_prob_const,
        name='data_scale_ub')

    def get_index(indices):
        idx = 0
        for i in range(len(indices)):
            idx += indices[len(indices) - i - 1] * strides[i]
        return idx // 8

    if dtype == "float32":
        data_scale_ub_16 = akg.topi.cast(data_scale_ub, "float16")
        res_ub_16 = akg.tvm.compute(shape_tensor,
                                lambda *indice: dav.dropout(data_mask[get_index(indice)], data_scale_ub_16(*indice)))
        res = akg.topi.cast(res_ub_16, "float32")
    else:
        res = akg.tvm.compute(shape_tensor, lambda *indice: dav.dropout(data_mask[get_index(indice)], data_scale_ub(*indice)))

    return res
コード例 #4
0
ファイル: logical_not.py プロジェクト: zhuyawen/akg
def logical_not(inputs):
    vc_util.ops_dtype_check(inputs.dtype, vc_util.DtypeForDavinci.BOOL)
    vc_util.check_shape(inputs.shape)

    res = akg.topi.logical_not(inputs)

    return res
コード例 #5
0
def discontinous_mov(data, out_shape):
    """
    Extract the element with the odd index from the original data and copy it into a tensor with a dimension of
    2 * original dimension/2.

    Args:
        data (tvm.tensor.Tensor): Tensor of type float16, float32.
        out_shape (list): a list of output's shape.

    Returns:
           tvm.tensor.Tensor, has the same type as data, but it's shape changes to out_shape not data's shape.

    Example:
           if data = [1,2,3,4,5,6,7,8,9,10] then the output = [[1,3,5,7,9],[1,3,5,7,9]].
    """

    # check types
    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    shape = [x.value for x in data.shape]
    vc_util.check_shape(shape)

    output = akg.tvm.compute(out_shape,
                             lambda j, i: data[i * 2],
                             name="output")

    return output
コード例 #6
0
ファイル: round.py プロジェクト: zhuyawen/akg
def round_value(input):
    """
    rounds the values of a akg.tvm.tensor to the nearest even(integer), element-wise

    Args:
        input: akg.tvm.Tensor of type float16, float32

    Returns:
        akg.tvm.Tensor of same shape as input, of type int32

    Raises:
        ValueError: If the type of input is invalid.
    """
    dtype = input.dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)

    shape = input.shape
    vc_util.check_shape(shape)

    if dtype == "float16":
        data_f16 = input
    else:
        data_f16 = akg.tvm.compute(shape,
                                   lambda *i: input(*i).astype("float16"),
                                   name="data_f16")

    res = akg.lang.cce.round(data_f16)

    return res
コード例 #7
0
ファイル: xdivy.py プロジェクト: x200510iong/akg
def xdivy(data_x1, data_x2):
    """
    Calculate data_x1 divided by data_x2.

    .. math::
        y = \\left\\{
	    \\begin{aligned}
		0, && if \\quad x1 == 0 \\\\
		\\dfrac{x1}{x2}, && otherwise
	    \\end{aligned}
	\\right.

    Args:
        data_x1 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32"
        data_x2 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32"

    Returns:
        tvm.tensor.Tensor
    """
    shape_x1 = get_shape(data_x1)
    shape_x2 = get_shape(data_x2)

    vc_util.check_shape(shape_x1)
    vc_util.check_shape(shape_x2)

    vc_util.elemwise_dtype_check(data_x1.dtype, data_x2.dtype)
    dtype = data_x1.dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)

    return xdivy_compute(data_x1, data_x2)
コード例 #8
0
def truncate_div(input_x1, input_x2):
    """
    Calculating data's truncate_div, res = floor(x1/x2) if x1/x2>0 else ceil(x1/x2).

    Args:
        input_x1 (tvm.tensor.Tensor): Input tensor, support float16,
                                      float32 on mini device, while support
                                      int32, int8, uint8, float16, float32 on
                                      cloud ones.
        input_x2 (tvm.tensor.Tensor): Input tensor, with same dtype as input_x1.
    Returns:
        A tvm.tensor.Tensor as result of truncate_div.
    """
    vc_util.check_shape(get_shape(input_x1))
    vc_util.check_shape(get_shape(input_x2))
    vc_util.elemwise_dtype_check(input_x1.dtype, input_x2.dtype)
    vc_util.ops_dtype_check(
        input_x1.dtype,
        (vc_util.DtypeForDavinci.ALL_FLOAT) if utils.product_is_mini() \
            else (vc_util.DtypeForDavinci.ALL_FLOAT,
                  vc_util.DtypeForDavinci.INT32,
                  vc_util.DtypeForDavinci.INT8,
                  vc_util.DtypeForDavinci.UINT8))

    return truncate_div_compute(input_x1, input_x2)
コード例 #9
0
ファイル: reduce_any_d.py プロジェクト: zhuyawen/akg
def reduce_any_d(x, axis=None, keepdims=False):
    """
    Reduce a tensor on a certain axis based on max.

    Args:

        x (tvm.tensor.Tensor): The input tensor to reduce. Should be of type int8.
        axis (Union[list, tuple, int, None]): The dimensions to reduce. If None, all dimensions will be reduced.
                                              each dim must be in the range [-len(data.shape), len(data.shape) - 1].
        keepdims (Union[bool, None]): If True, retains reduced dimensions with length 1, defaults to False.

    Returns:
        tvm.tensor.Tensor of same type as input tensor x.
    """
    # check type
    vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.INT8)
    vc_util.check_shape(x.shape)
    # check axis
    vc_util.reduce_axis_check(x.shape, axis)
    refined_axis = refine_reduce_axis(x, axis)
    if len(set(refined_axis)) == len(x.shape) and not keepdims:
        keepdims = True
    res = _reduce_any_d_compute(x, refined_axis, keepdims)
    if len(set(refined_axis)) == len(x.shape):
        res = topi.reshape(res, (1, ))
    return res
コード例 #10
0
def apply_adagrad(var, accum, learning_rate, grad, update_slots=True):
    """
    Update `var` according to the Adagrad algorithm.

    .. math:
        accum += grad^2
        var -= learning_rate * grad / accum.sqrt()

    Args:
        var (tvm.tensor.Tensor): input var to be updated of type float16, float32
        accum (tvm.tensor.Tensor): accumulation of the squared gradients of type float16, float32
        learning_rate (tvm.tensor.Tensor): A scalar tensor of type float16, float32
        grad (tvm.tensor.Tensor): input grad of type float16, float32
        update_slots (Bool): If True, the accum tensor will be updated;
            otherwise the option is False, the accum tensor will not be update.
            Defaults to 'True'.

    Returns:
        tvm.tensor.Tensor, the updated var.
        tvm.tensor.Tensor, the updated accum.
    """

    vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    for i in (accum, learning_rate, grad):
        vc_util.elemwise_dtype_check(var.dtype, i.dtype)
    for i in (accum, grad):
        vc_util.elemwise_shape_check(var.shape, i.shape)
    if tuple(get_shape(learning_rate)) != (1,):
        raise RuntimeError("learning_rate only support scalar tensor")

    return _apply_adagrad_compute(var, accum, learning_rate, grad, update_slots)
コード例 #11
0
def matrix_diag_part(input_diagonal, input_help):
    """
    Calculate the batched diagonal part of a batched tensor.
    Note:
        input_help is a tensor with a diagonal element of 1 and other positions of 0,
        the last two dimensions can be unequal.

    Args:
        input_diagonal (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8. The last two dimensions
                                            can be unequal.
        input_help (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8, and with a diagonal element of 1
                                        and other positions of 0.
    Returns:
        tvm.tensor.Tensor, has the same type as input_diagonal, the shape dims is equal to dims(input_diagonal) - 1.
    """
    dtype_input_diagonal = input_diagonal.dtype
    dtype_input_help = input_help.dtype

    vc_util.elemwise_shape_check(input_help.shape, input_diagonal.shape)

    if len(input_help.shape) < 2:
        raise ValueError("Input tensors of rank>=2 are supported!")

    vc_util.ops_dtype_check([dtype_input_diagonal, dtype_input_help], [vc_util.DtypeForDavinci.ALL_FLOAT,
                                                                       vc_util.DtypeForDavinci.INT8,
                                                                       vc_util.DtypeForDavinci.INT32,
                                                                       vc_util.DtypeForDavinci.UINT8])
    res = matrix_diag_part_compute(input_diagonal, input_help)
    return res
コード例 #12
0
ファイル: reverse.py プロジェクト: zhuyawen/akg
def reverse(input_data, axis):
    """
    Reverse a tensor on some dimension.
    Args:
        input_data (tvm.tensor.Tensor): Tensor of float16, float32 and int32.
        axis (Union[list, tuple, int]): Because of don't support reverse which contain last dim, so can't equal None.
    Returns:
        tvm.tensor.Tensor,has the same type and shape as input_data
    """
    shape = get_shape(input_data)
    dtype = input_data.dtype
    # check dtype and shape
    vc_util.check_shape(shape)
    vc_util.ops_dtype_check(
        dtype,
        [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32])
    # check axis
    shape_len = len(shape)
    if hasattr(axis, 'index'):
        axis = list(axis)
    if isinstance(axis, int):
        axis = [axis]
    vc_util.axis_check(shape_len, axis)
    _check_axis(axis, shape)
    # compute res
    res = reverse_compute(input_data, axis)
    return res
コード例 #13
0
def sparse_softmax_cross_entropy_with_logits(labels, logits, reduction='mean'):
    """
    Computes sparse softmax cross entropy between `logits` and `labels`.

    Note:
        Softmax calculation of Logits is done inside the op.

    Args:
        labels (tvm.tensor.Tensor): int32 tensor of shape [batch_size].
                                    Each entry in it  must be an index in `[0, num_classes)`.
        logits (tvm.tensor.Tensor): float32 or float16 tensor of shape [batch_size, num_class].
        reduction (str): Specifies the reduction to apply to the output: 'none' or 'mean' or 'sum'. Default: 'mean'.
            'none': no reduction for the output
            'sum': the sum for the output
            'mean': the mean for the output.

    Returns:
        tvm.tensor.Tensor, has the same dtype as logits.
        If reduction is 'none', shape of the tensor is the same as logits,
        otherwise shape of the tensor is the same as labels.
    """
    vc_util.ops_dtype_check(logits.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    strategy, cost, _ = sparse_softmax_cross_entropy_with_logits_impl(
        labels, logits, reduction)
    attr_map = {"custom_tiling": strategy}
    return cost, attr_map
コード例 #14
0
def matrix_diag(data, out_shape):
    """
    Generate a batched tensor whose value in diagonal lines are defined in `data`.

    Args:
        data (tvm.tensor.Tensor): A tensor of type float16, float32 or int32. Rank is L.
        out_shape (Union[list, tuple]): Output shape of length L + 1.
            The value of `out_shape[0, ..., L-1]` should be equal to `data.shape[0, ..., L-1]`.

    Returns:
        tvm.tensor.Tensor, has same type as "data", shape is "out_shape".
    """
    dtype = data.dtype
    vc_util.ops_dtype_check(dtype, [vc_util.DtypeForDavinci.ALL_FLOAT,
                                    vc_util.DtypeForDavinci.INT32])

    shape = get_shape(data)
    vc_util.check_shape(data)
    vc_util.check_shape(out_shape, length=len(shape) + 1)
    if tuple(shape[:-1]) != tuple(out_shape[:-2]):
        raise RuntimeError("The value of out_shape[:-2] should be equal to data.shape[:-1]")

    res = akg.tvm.compute(out_shape,
                          lambda *i: akg.tvm.if_then_else(akg.tvm.all(i[-1] == i[-2], i[-1] < shape[-1]),
                                                          data(*i[:-1]),
                                                          zero_const(dtype)),
                          name="diag")

    return res
コード例 #15
0
ファイル: pad.py プロジェクト: zhuyawen/akg
def pad(data, paddings, padtype):
    """add paddings to the tensor
    :shape: The shape of the tensor, now only support two dimension Tensor
    :paddings: The shape of the paddings, shape [N,2], N is the dimension of the tensor,
     For each dimension D of input, paddings[D, 0] indicates how many values to add before
     the contents of tensor in that dimension, and paddings[D, 1] indicates how many values to
     add after the contents of tensor in that dimension.
    :dtype: The type of the input, float16, float32
    :padtype: One of "CONSTANT", "REFLECT", or "SYMMETRIC".
    """
    # check shape
    vc_util.check_shape(data.shape)
    # check types
    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_TYPES)
    # check padding types
    ptype_checklist = ['constant']
    if not (padtype in ptype_checklist):
        raise RuntimeError("pad_cce only support %s while padtype is %s" % (",".join(ptype_checklist), padtype))

    dtype = data.dtype
    if dtype == 'int8' or dtype == 'uint8':
        data = cast(data, "float16")

    rank = len(data.shape)
    pad_before = []
    pad_after = []
    for i in range(rank):
        pad_before.append(paddings[i][0])
        pad_after.append(paddings[i][1])
    B = tvm_pad(data, pad_before, pad_after=pad_after, name='B')

    if dtype == 'int8' or dtype == 'uint8':
        B = cast(B, dtype)
    return B
コード例 #16
0
ファイル: reciprocal.py プロジェクト: zhuyawen/akg
def reciprocal(data, high_precision=True):
    """
    Computes the reciprocal of data element-wise.

    Args:
        data (list[tvm.tensor.Tensor]): a list of tvm.tensor.Tensor of type float16, float32.
        high_precision (bool): a bool value, whether to use high-precision version.

    Returns:
        tvm.tensor.Tensor of same type and shape as data.
    """

    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    shape = [x.value for x in data.shape]
    vc_util.check_shape(shape)

    res = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(1, data.dtype) / (data(*indice)), name="res")

    # When product is mini, using Newtom iteration method to achieve higher precision.
    if utils.product_is_mini() and high_precision:
        steps = 1
        for _ in range(steps):
            temp1 = data * res
            temp2 = temp1 * akg.tvm.const(-1, data.dtype)
            temp3 = temp2 + akg.tvm.const(2, data.dtype)
            res = temp3 * res

    return res
コード例 #17
0
ファイル: elemwise_chain.py プロジェクト: zhuyawen/akg
def case_1(data_shape, dtype, kernel_name, attrs):
    """elemwise chain case 1"""
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.FLOAT16)
    vc_util.check_shape_length_equal("data", data_shape, 2)

    m, k = data_shape

    A = akg.tvm.placeholder((m, k), name='A', dtype=dtype)
    B = akg.tvm.placeholder((k, ), name='B', dtype=dtype)
    C = akg.tvm.placeholder((m, k), name='C', dtype=dtype)

    E = akg.tvm.compute((m, k),
                        lambda i, j: A[i, j] * (B[j] + C[i, j]),
                        name="E")

    forward_s = akg.tvm.create_schedule(E.op)
    op_vars = [A, B, C, E]
    forward_low = akg.lower(forward_s,
                            op_vars,
                            simple_mode=True,
                            polyhedral=True)

    kernel_name = utils.gen_name_kernel(kernel_name, dtype, data_shape)

    with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True):
        mod = akg.build(forward_s,
                        op_vars,
                        "cce",
                        name="test",
                        attrs=attrs,
                        polyhedral=True)
        source_code = mod.imported_modules[0].get_source()
        return mod
コード例 #18
0
ファイル: tanh_grad.py プロジェクト: zhuyawen/akg
def tanh_grad(data_y, data_dy):
    """
    Compute the backpropogation gradient of tanh.

    Args:
        data_y: Tensor, which equals the output of tanh.
        data_dy: Tensor, the initial gradients.

    Return:
        Tensor, overall gradients.
    """
    dtype = data_y.dtype
    vc_util.ops_dtype_check(data_y.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    shape = [x.value for x in data_y.shape]
    vc_util.check_shape(shape)

    # dx = dy * (1 - y*y)
    tmp1 = akg.tvm.const(-1, dtype=dtype)
    tmp2 = akg.tvm.const(1, dtype=dtype)
    data1_square = akg.lang.cce.vmul(data_y, data_y)
    data_tmp = akg.lang.cce.vmuls(data1_square, tmp1)
    anuminate = akg.lang.cce.vadds(data_tmp, tmp2)
    res = akg.lang.cce.vmul(anuminate, data_dy)

    return res
コード例 #19
0
ファイル: broadcast_to.py プロジェクト: zhuyawen/akg
def broadcast_to(x, shape):
    """
    Broadcast an tensor to a compatible shape.

    Args:
        x (tvm.tensor.Tensor): Tensor of type float32, float16, int8, uint8, int32
        shape (list, tuple): The shape of output tensor.

    Returns:
        An tvm.tensor.Tensor with the same type as x.

    """
    # check shape
    vc_util.check_shape(x)
    vc_util.check_shape(shape)

    # check dtype
    dtype = x.dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_TYPES)

    # vector_dup instruction don't support int8 and uint8
    # It can be simplified by some methods, such as , "auto cast"
    x_shape = get_shape(x)
    if len(x_shape) == 1 and x_shape[0] == 1 and dtype in ["int8", "uint8"]:
        x = cast(x, "float16")

    res = topi.broadcast_to(x, shape)
    if res.dtype != dtype:
        res = cast(res, dtype)
    return res
コード例 #20
0
ファイル: reduce_prod.py プロジェクト: zhuyawen/akg
def reduce_prod(data, axis=None, keepdims=False):
    """
    Computes the product of elements along specific axis

    Args:
        data (tvm.tensor.Tensor): indicating the input tensor.
        axis (Union[list, tuple, int, None]): indicating the dimensions to reduce at. if it's None, all dimensions
                                               will be reduced.
        keepdims (Union[bool, None]): if true, keep the dimensions with length 1.

    Returns:
    Tensor, the product of elements of input tensor.
    """
    shape = [x.value for x in data.shape]
    ops_dtype_check(data.dtype, [
        DtypeForDavinci.ALL_FLOAT, DtypeForDavinci.INT8, DtypeForDavinci.UINT8
    ])

    if axis is None and keepdims is False:
        raise ValueError("keepdims must be True when axis is None!")

    axis_new = ft_util.refine_reduce_axis(data, axis)

    check_shape(shape)
    dtype = data.dtype
    if dtype in ["int8", "uint8"]:
        data = akg.topi.cast(data, "float16")

    vlog_t = akg_log(data)
    res = akg.topi.sum(vlog_t, axis=axis_new, keepdims=keepdims)
    res = akg_exp(res)

    if dtype in ["int8", "uint8"]:
        res = akg.topi.cast(res, dtype)
    return res
コード例 #21
0
ファイル: logsoftmax_grad.py プロジェクト: zhuyawen/akg
def logsoftmax_grad(Y, dY, axis):
    """
    Computes the back propagation gradients by chain rule.

    Args:
        Y: Tensor, holds the logsoftmax activation output.
        dY: Tensor, holds the initial gradients.
        axis: Integer, on which dimension the softmax is applied.

    Returns:
        Tensor, the overall gradients.
    """
    shape = [x.value for x in Y.shape]
    vc_util.check_shape(shape)
    dtype = Y.dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    if axis == -1:
        axis = len(shape) + axis
    if axis >= len(shape):
        raise RuntimeError("axis should be less than dimension")
    if axis < -1:
        raise RuntimeError(
            "negative axis only support -1, please specify the axis in positive value"
        )

    softmax = akg.topi.exp(Y)
    dy_sum = akg.lang.cce.sum(dY, axis=axis)
    dy_sum_broadcast = akg.lang.cce.broadcast(dy_sum, shape)
    mul_result = akg.lang.cce.vmul(softmax, dy_sum_broadcast)
    res = akg.lang.cce.vsub(dY, mul_result)
    attrs = {"pragma_reschedule": 1, "pragma_modshift": 1}
    return res, attrs
コード例 #22
0
ファイル: floordiv.py プロジェクト: zhuyawen/akg
def floordiv(data1, data2):
    """
    Calculate x/y, and always returns an integer which is floored.

    Args:
        data1 (tvm.tensor.Tensor): Tensor of type float16, float32.
        data2 (tvm.tensor.Tensor): Tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor, has type of int32.
    """
    vc_util.ops_dtype_check([data1.dtype, data2.dtype],
                            vc_util.DtypeForDavinci.ALL_FLOAT)
    shape1 = [x.value for x in data1.shape]
    vc_util.check_shape(shape1)
    shape2 = [x.value for x in data2.shape]
    vc_util.check_shape(shape2)

    if utils.product_is_mini():
        rec = reciprocal(data2, high_precision=True)
        res = data1 * rec
    else:
        res = akg.topi.divide(data1, data2)
    res = akg.lang.cce.floor(res)
    return res
コード例 #23
0
ファイル: acos_grad.py プロジェクト: zhuyawen/akg
def acos_grad(x, dy):
    """
    Gradient for acos.

    .. math:
        dx = [\\frac{-1}{(1 - x^2)^0.5} / ] \\cdot dy

    Args:
        x (tvm.tensor.Tensor): tensor of type float16, float32.
        dy (tvm.tensor.Tensor): tensor of type float16, float32.

    Returns:
        tvm.tensor.Tensor, same type and shape as x.
    """
    dtype = x.dtype
    vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.ops_dtype_check(dy.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.check_shape(x.shape)
    vc_util.check_shape(dy.shape)

    one = akg.tvm.const(1.0, dtype=dtype)
    mid_square = akg.tvm.compute(x.shape,
                                 lambda *i: (one - x(*i) * x(*i)),
                                 name="mid_square")
    rsq = rsqrt.rsqrt(mid_square)
    dx = akg.tvm.compute(x.shape, lambda *i: -rsq(*i) * dy(*i), name="dx")

    return dx
コード例 #24
0
ファイル: concat.py プロジェクト: zhuyawen/akg
def concat(data, axis):
    """
    Concatenates data along the dimension set by axis.

    Args:
        data (Union[list, tuple]): list or tuple of tvm.tensor.Tensor of type float16, float32, int32, int8, uint8
        axis (int): Specifies the axis along which to concatenate. Must be in the range [-rank(data), rank(data))

    Returns:
        tvm.tensor.Tensor of same type as data.
    """

    data_size = len(data)
    if data_size < min_size:
        raise RuntimeError("The size of data must be greater equal 1")

    dtype = data[0].dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_TYPES)

    shape_0 = data[0].shape
    vc_util.check_shape(shape_0)
    if axis < 0:
        axis += len(shape_0)

    for i in range(1, data_size):
        shape_i = data[i].shape
        vc_util.check_shape(shape_i)
        if len(shape_i) != len(shape_0):
            raise ValueError("Input tensors must have same dimensions.")

    res = akg.lang.cce.concat(data, axis)
    return res
コード例 #25
0
def select(condition, x1, x2):
    """
    Selects elements from x1 or x2, depending on condition.
    Note:
        every parmas' shape need legal, can support condition's shape broadcast.

    Args:
        condition (tvm.tensor.Tensor): Tensor of type int8, int32, must be 0 or 1.
        x1 (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32, uint8.
        x2 (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32, uint8.

    Returns:
        tvm.tensor.Tensor, has the same type and shape as x1.

    """
    shape_x1 = get_shape(x1)
    shape_x2 = get_shape(x2)
    con_shape = get_shape(condition)
    vc_util.elemwise_shape_check(shape_x1, shape_x2)
    vc_util.elemwise_dtype_check(x1.dtype, x2.dtype, [
        vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT8,
        vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.UINT8
    ])
    vc_util.ops_dtype_check(
        condition.dtype,
        [vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.INT32])
    vc_util.auto_broadcast_check(con_shape, shape_x1)
    res = select_compute(condition, x1, x2)
    return res
コード例 #26
0
def blas_axby(x, y, alpha, beta):
    r"""
    Blas axby.

    :math:`\alpha x + \beta y`

    Args:
        x (tvm.tensor.Tensor): Input `x` of type float16 or float32.
        y (tvm.tensor.Tensor): Input `y` of type float16 or float32.
        alpha (Union[int, float]): Scale of `x`.
        beta (Union[int, float]): Scale of `y`.

    Returns:
        tvm.tensor.Tensor, has the same shape and type as inputs.
    """
    vc_util.ops_dtype_check([x.dtype, y.dtype],
                            vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.check_shape(x.shape)
    vc_util.check_shape(y.shape)

    ax = akg.lang.cce.vmuls(x, alpha)
    by = akg.lang.cce.vmuls(y, beta)
    res = akg.lang.cce.vadd(ax, by)

    return res
コード例 #27
0
ファイル: leaky_relu.py プロジェクト: zhuyawen/akg
def leaky_relu(data, negative_slop=0):
    """
    leaky_relu op for input tensor (N,C,H,W) OR (N,C1,H,W,C0).

    ..math:`max(x,negative_slop*x)`

    Args:
        data (tvm.tensor.Tensor): tensor with type float16 or float32.
        negative_slop (float): 0<=negative_slop<1

    Returns:
        tvm.tensor.Tensor.
    """
    dtype = data.dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)

    vc_util.check_shape(data.shape)

    if negative_slop >= 1 or negative_slop < 0:
        raise RuntimeError(
            "leaky_relu only support negative_slop between [0,1)")

    slop_tmp = akg.tvm.const(negative_slop, dtype=dtype)
    tmp = akg.lang.cce.vmuls(data, slop_tmp)
    res = akg.lang.cce.vmax(tmp, data)

    return res
コード例 #28
0
ファイル: kldiv_loss_grad.py プロジェクト: zhuyawen/akg
def kldiv_loss_grad(pre_deriv, inputs, target):
    """
    do backprop for kldiv loss

    Args:
        pre_deriv (tvm.tensor.Tensor): Gradient tensor for forward output.
        inputs (tvm.tensor.Tensor): Forward input tensor.
        target (tvm.tensor.Tensor): Forward output tensor.

    Returns:
        Gradient tensor for forward input.
    """
    inputs_dtype = inputs.dtype
    target_dtype = target.dtype
    pre_deriv_dtype = pre_deriv.dtype
    vc_util.ops_dtype_check([inputs_dtype, target_dtype, pre_deriv_dtype],
                            vc_util.DtypeForDavinci.ALL_FLOAT)

    if get_const_tuple(target.shape) != get_const_tuple(inputs.shape):
        raise RuntimeError("Please ensure inputs have the same size."
                           "", target.shape, inputs.shape)

    inputs_dtype_old = inputs_dtype

    if utils.product_is_mini() and inputs_dtype == 'float32':
        inputs = akg.topi.cast(inputs, "float16")
        target = akg.topi.cast(target, "float16")
        inputs_dtype = "float16"

    cur_deriv = akg.topi.divide(target, inputs)
    cur_deriv = akg.topi.multiply(cur_deriv, pre_deriv)
    if utils.product_is_mini() and inputs_dtype_old == 'float32':
        cur_deriv = akg.topi.cast(cur_deriv, inputs_dtype_old)
    return cur_deriv
コード例 #29
0
ファイル: minimum.py プロジェクト: zhuyawen/akg
def minimum(input1, input2):
    """
    Return the min value of two tensors element-wise.

    Note:
        minimum supports broadcasting.

    Args:
        input1: Tensor.
        input2: Tensor. Has the same type as input1.

    Returns:
        Tensor, has the same type as inputs.
    """

    vc_util.ops_dtype_check([input1.dtype, input2.dtype], vc_util.DtypeForDavinci.ALL_TYPES)
    vc_util.elemwise_dtype_check(input1.dtype, input2.dtype)
    dtype = input1.dtype

    shape1 = [x.value for x in input1.shape]
    shape2 = [x.value for x in input2.shape]
    vc_util.check_shape(shape1)
    vc_util.check_shape(shape2)

    vc_util.auto_broadcast_check(shape1, shape2)

    if dtype in ("int8", "uint8"):
        input1 = cast(input1, "float16")
        input2 = cast(input2, "float16")
    res = akg.topi.minimum(input1, input2)
    if dtype in ("int8", "uint8"):
        res = cast(res, dtype)

    return res
コード例 #30
0
def bitwise_or(x1, x2):
    """
    Computes the bitwise or of `x1` and `x2`.

    Args:
        x1 (tvm.tensor.Tensor): Tensor of type int16, uint16.
        x2 (tvm.tensor.Tensor): Tensor of type int16, uint16.

    Returns:
        tvm.tensor.Tensor, has the same type as x1.
    """
    # check shape
    vc_util.check_shape(x1)
    vc_util.check_shape(x2)
    _, _, output_shape = produce_shapes(get_shape(x1), get_shape(x2))

    # check input tensor data_type
    vc_util.ops_dtype_check(
        [x1.dtype, x2.dtype],
        [vc_util.DtypeForDavinci.INT16, vc_util.DtypeForDavinci.UINT16])
    dtype = x1.dtype
    if dtype != x2.dtype:
        raise RuntimeError("input type must be same, but got %s  vs %s", dtype,
                           x2.dtype)

    x1 = akg.topi.broadcast_to(x1, output_shape)
    x2 = akg.topi.broadcast_to(x2, output_shape)
    res = akg.tvm.compute(output_shape,
                          lambda *indice: x1(*indice) | x2(*indice))
    return res