def reciprocal(data, high_precision=True, target=utils.CCE): """ Computes the reciprocal of data element-wise. Args: data (list[tvm.tensor.Tensor]): a list of tvm.tensor.Tensor of type float16, float32. high_precision (bool): a bool value, whether to use high-precision version. Returns: tvm.tensor.Tensor of same type and shape as data. Supported Platforms: 'Ascend', 'GPU' """ utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data.shape] utils.check_shape(shape) res = akg.tvm.compute( shape, lambda *indice: akg.tvm.const(1, data.dtype) / data(*indice), name="res") # When product is mini, using Newtom iteration method to achieve higher precision. if product_is_mini() and high_precision: steps = 1 for _ in range(steps): temp1 = data * res temp2 = temp1 * akg.tvm.const(-1, data.dtype) temp3 = temp2 + akg.tvm.const(2, data.dtype) res = temp3 * res return res
def mul(l_input, r_input, target=utils.CCE): """ Calculate x * y element-wise. Note: mul supports broadcasting. Args: l_input (tvm.tensor.Tensor): Tensor of type float16, float32. r_input (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has the same type as l_input and r_input. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.ops_dtype_check([l_input.dtype, r_input.dtype], utils.DtypeForDavinci.ALL_FLOAT) shape1 = [x.value for x in l_input.shape] shape2 = [x.value for x in r_input.shape] utils.check_shape(shape1) utils.check_shape(shape2) utils.auto_broadcast_check(shape1, shape2) utils.elemwise_dtype_check(l_input.dtype, r_input.dtype) output = akg.topi.multiply(l_input, r_input) return output
def Gather(params_shape, indices_shape, params_dtype, indices_dtype, axis, kernel_name, cce_path="./", target=utils.CCE): """Gather data by indices""" utils.check_shape(params_shape, length=2) utils.check_shape(indices_shape, length=1) utils.ops_dtype_check(params_dtype, utils.DtypeForDavinci.ALL_TYPES) utils.ops_dtype_check(indices_dtype, utils.DtypeForDavinci.INT32) utils.check_equal("axis", "zero", axis, 0) # construct compute o_shape = (indices_shape[0], params_shape[1]) xx = akg.tvm.placeholder(params_shape, dtype=params_dtype, name="X") yy = akg.tvm.placeholder(indices_shape, dtype=indices_dtype, name="Y") res = akg.tvm.extern(o_shape, [xx, yy], lambda ins, outs: kernel_ir(outs[0], ins[0], ins[1]), name="res", dtype=params_dtype) s = akg.tvm.create_schedule(res.op) # create cce attrs = {"enable_multicore": False} with akg.build_config(add_lower_pass=debug_mode(0), dump_pass_ir=True): mod = akg.build(s, [xx, yy, res], "cce", name=kernel_name, attrs=attrs) source_code = mod.imported_modules[0].get_source() create_code(kernel_name, cce_path, source_code) return mod
def sqrt(data, target=utils.CUDA): """ Computes square root of x element-wise. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has same type and shape as data. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_supported_target(target) if target == utils.CCE: return _sqrt_ascend(data) check_list = ["float16", "float32"] dtype = data.dtype if not dtype in check_list: raise RuntimeError("Sqrt cce only support %s while dtype is %s" % ( ",".join(check_list), dtype)) shape = [x.value for x in data.shape] utils.check_shape(shape) res = akg.topi.sqrt(data) return res
def relu6(inputs, target="cce"): """ Computes Rectified Linear 6: min(max(features, 0), 6). Args: inputs (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, which has same type and shape as input. """ dtype = inputs.dtype check_list = ["float16", "float32"] if not dtype in check_list: raise RuntimeError("relu6 only support %s while dtype is %s" % (",".join(check_list), dtype)) shape = inputs.shape utils.check_shape(shape) zero = lang.ascend.broadcast(akg.tvm.const(0, dtype=dtype), shape) max_inputs = lang.ascend.vmax(inputs, zero) six = lang.ascend.broadcast(akg.tvm.const(6, dtype=dtype), shape) res = lang.ascend.vmin(max_inputs, six) return res
def Divide(lhs, rhs, target=utils.CCE): """ Calculate divide. Args: lhs: The left tensor. rhs: The right tensor. Returns: tvm.tensor.Tensor. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _div_ascend(lhs, rhs) shape_l = [x.value for x in lhs.shape] shape_r = [x.value for x in rhs.shape] utils.check_shape(shape_l) utils.check_shape(shape_r) utils.auto_broadcast_check(shape_l, shape_r) utils.elemwise_dtype_check(lhs.dtype, rhs.dtype) output = akg.topi.divide(lhs, rhs) return output
def reshape(data, out_shape, target=utils.CUDA): """ Rearranges input tensor data to new shape out_shape. Args: data (tvm.tensor.Tensor): The tensor to be reshaped. out_shape (list, tuple): The new shape applied on the input tensor data, should be compatible with the original shape of data. Returns: The reshaped akg.tvm.tensor of same type as input tensor data. Supported Platforms: 'Ascend', 'GPU' """ if target == utils.CCE: return _reshape_ascend(data, out_shape) data_shape = data.shape utils.check_shape(data_shape) in_shape = get_shape(data) out_shape = list(out_shape) if -1 in out_shape: out_shape = get_out_shape(in_shape, out_shape) res = akg.topi.reshape(data, out_shape) return res
def round_(data, target=utils.CCE): """ Round elements of x to nearest integer. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32, int8, unit8, int32. Returns: tvm.tensor.Tensor of same type and shape as data. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_shape(data.shape) in_type = data.dtype if target == utils.CCE: if in_type != 'float16': data = akg.tvm.compute(data.shape, lambda *i: data(*i).astype("float16"), name="data_f16") return akg.lang.ascend.round(data) if in_type == 'float16': data = akg.topi.cast(data, 'float32') output = akg.topi.round(data) if in_type == 'float16': output = akg.topi.cast(output, 'float16') return output
def flatten(x): """ reshape into (batch, c*h*w). Args: x (akg.tvm.tensor.Tensor): the first dimension is batch Returns: akg.tvm.tensor.Tensor """ # check shape utils.check_shape(x) shape = get_shape(x) # check input tensor data_type utils.ops_dtype_check(x.dtype, [ utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT8, utils.DtypeForDavinci.INT16, utils.DtypeForDavinci.INT32, utils.DtypeForDavinci.INT64, utils.DtypeForDavinci.UINT8, utils.DtypeForDavinci.UINT16, utils.DtypeForDavinci.UINT32, utils.DtypeForDavinci.UINT64 ]) size = 1 for i in range(1, len(shape)): size = size * shape[i] new_shape = [shape[0], size] res = akg.topi.reshape(x, new_shape) return res
def resize_nearest(input, output_shape): """ Resize images using Nearest-neighbor interpolation. Args: input (tvm.tensor.Tensor): 4-D tensor of type float16 or float32 `("NHWC")`. output_shape (Union[tuple, list]): New size of image 4 integers `("NHWC")`. Note: The batch_num("N") of input and output must be equal, channel_num("C") is also. Returns: tvm.tensor.Tensor, has the same type as `input`. """ input_shape = get_shape(input) utils.check_shape(input, 4, "input") utils.check_shape(output_shape, 4, "output_shape") utils.ops_dtype_check(input.dtype, utils.DtypeForDavinci.ALL_FLOAT) utils.check_equal("input batchsize", "output batchsize", input_shape[0], output_shape[0]) utils.check_equal("input channel num", "output channel num", input_shape[3], output_shape[3]) res = process_integer_scale(input, output_shape) if res == None: res = process_non_integer_scale(input, output_shape) return res
def pad(data, paddings, padtype, target="cce"): """add paddings to the tensor :shape: The shape of the tensor, now only support two dimension Tensor :paddings: The shape of the paddings, shape [N,2], N is the dimension of the tensor, For each dimension D of input, paddings[D, 0] indicates how many values to add before the contents of tensor in that dimension, and paddings[D, 1] indicates how many values to add after the contents of tensor in that dimension. :dtype: The type of the input, float16, float32 :padtype: One of "CONSTANT", "REFLECT", or "SYMMETRIC". """ # check shape utils.check_shape(data.shape) # check types utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_TYPES) # check padding types ptype_checklist = ['constant'] if not (padtype in ptype_checklist): raise RuntimeError("pad_cce only support %s while padtype is %s" % (",".join(ptype_checklist), padtype)) dtype = data.dtype if dtype == 'int8' or dtype == 'uint8': data = Cast(data, "float16", target=target) rank = len(data.shape) pad_before = [] pad_after = [] for i in range(rank): pad_before.append(paddings[i][0]) pad_after.append(paddings[i][1]) B = tvm_pad(data, pad_before, pad_after=pad_after, name='B') if dtype == 'int8' or dtype == 'uint8': B = Cast(B, dtype, target=target) return B
def sigmoid(data, target="cce"): """ Computes sigmoid of x element-wise. \f[ y = \frac{1}{e^{-x} + 1} \f] Args: data (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has same type and shape as data. """ check_list = ["float16", "float32"] dtype = data.dtype if not dtype in check_list: raise RuntimeError("sigmoid_cce only support %s while dtype is %s" % (",".join(check_list), dtype)) shape = data.shape utils.check_shape(shape) res = vrec(vadds(vexp(vmuls(data, -1.0)), 1.0)) return res
def Sin(x, target=utils.CCE): """ Computes sine value of a tensor with Taylor's theorem. .. math:: \\begin{array}{ll} \\\\ sin(x) = x - \\frac{x^3}{3!} + \\frac{x^5}{5!} + ... + (-1)^k \\cdot \\frac{x^{2(k+1)}}{(2(k+1))!} \\end{array} Args: x (tvm.tensor.Tensor): Tensor of type float16, float32. Rerurns: tvm.tensor.Tensor of same type and shape as in_data. Supported Platforms: 'Ascend' """ utils.ops_dtype_check(x.dtype, utils.DtypeForDavinci.ALL_FLOAT) utils.check_shape(x.shape) use_call = True if use_call: return sin_call(x) return sin_compute(x)
def discontinous_mov(data, out_shape, target=utils.CCE): """ Extract the element with the odd index from the original data and copy it into a tensor with a dimension of 2 * original dimension/2. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32. out_shape (list): a list of output's shape. Returns: tvm.tensor.Tensor, has the same type as data, but it's shape changes to out_shape not data's shape. Example: if data = [1,2,3,4,5,6,7,8,9,10] then the output = [[1,3,5,7,9],[1,3,5,7,9]]. """ # check types utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data.shape] utils.check_shape(shape) output = akg.tvm.compute(out_shape, lambda j, i: data[i * 2], name="output") return output
def bitwise_or(x1, x2, target=utils.CCE): """ Computes the bitwise or of `x1` and `x2`. Args: x1 (tvm.tensor.Tensor): Tensor of type int16, uint16. x2 (tvm.tensor.Tensor): Tensor of type int16, uint16. Returns: tvm.tensor.Tensor, has the same type as x1. """ # check shape utils.check_shape(x1) utils.check_shape(x2) _, _, output_shape = produce_shapes(get_shape(x1), get_shape(x2)) # check input tensor data_type utils.ops_dtype_check( [x1.dtype, x2.dtype], [utils.DtypeForDavinci.INT16, utils.DtypeForDavinci.UINT16]) dtype = x1.dtype if dtype != x2.dtype: raise RuntimeError("input type must be same, but got %s vs %s", dtype, x2.dtype) x1 = akg.topi.broadcast_to(x1, output_shape) x2 = akg.topi.broadcast_to(x2, output_shape) res = akg.tvm.compute(output_shape, lambda *indice: x1(*indice) | x2(*indice)) return res
def reduce_logsumexp(data, axis=None, keepdims=False, target="cce"): """ Compute `log(sum(exp(elements across dimensions of a tensor)))` of elements over a give axis or a list of axes of a tensor Args: data: (tvm.tensor.Tensor): Tensor of type float16 axis: The dimensions to reduce. Could be None(by default), int, list or tuple. If None, all dimenstions will be reduced. If int or list, must be in the range of [-len(date.shape), len(date.shape)-1] keepdims: Boolean. If true, remians reduced dimensions with lengthe 1. False by default Returns: tvm.tensor.Tensor, has the same shape and type as data. """ check_list = ["float16"] dtype = data.dtype if not dtype in check_list: raise RuntimeError( "reduce_logsumexp_cce only support %s while dtype is %s" % (",".join(check_list), dtype)) shape = [x.value for x in data.shape] utils.check_shape(shape) exp_ = vexp(data) sum_ = sum(exp_, axis=axis, keepdims=keepdims) res = vlog(sum_) return res
def broadcast_to(x, shape, target=utils.CCE): """ Broadcast an tensor to a compatible shape. Args: x (tvm.tensor.Tensor): Tensor of type float32, float16, int8, uint8, int32 shape (list, tuple): The shape of output tensor. Returns: An tvm.tensor.Tensor with the same type as x. Supported Platforms: 'Ascend' """ # check shape utils.check_shape(x) utils.check_shape(shape) # check dtype dtype = x.dtype utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_TYPES) # vector_dup instruction don't support int8 and uint8 # It can be simplified by some methods, such as , "auto cast" x_shape = get_shape(x) if len(x_shape) == 1 and x_shape[0] == 1 and dtype in ["int8", "uint8"]: x = Cast(x, "float16", target) res = topi.broadcast_to(x, shape) if res.dtype != dtype: res = Cast(res, dtype, target) return res
def clip(data, min_val, max_val, target=utils.CCE): """ Clip the data in range(min_val, max_val). Change values less than min_val in data to min_val, and change values greater than max_val to max_val. Note: min_val should be smaller or equal to max_val. Args: data: Tensor. min_val: Float. When data < min_val, set data to min_val. max_val: Float. When data > max_val, set data to max_val. Returns: Tensor, has the same type and shape as data. """ dtype = data.dtype check_list = ["float16", "float32"] if not dtype.lower() in check_list: raise RuntimeError("clip only support %s while dtype is %s" % (",".join(check_list), dtype)) shape = data.shape utils.check_shape(shape) res = akg.topi.clip(data, min_val, max_val) return res
def l2loss(data, target="cce"): dtype = data.dtype check_list = ["float16", "float32"] if not (dtype.lower() in check_list): raise RuntimeError("tile_cce only support %s while dtype is %s" % (",".join(check_list), dtype)) utils.check_shape(data.shape) orig_dtype = dtype if dtype.lower() == "float16": dtype = "float32" data = akg.topi.cast(data, dtype) # code has bug #shape, axis = simplify_axis_shape(shape, range(len(shape))) coeff_sqrt = akg.tvm.const(1.0 / (2 ** (0.5)), dtype=dtype) res = akg.lang.ascend.vmuls(data, coeff_sqrt) res = akg.lang.ascend.vmul(res, res) res = sum(res, target=target) if dtype != orig_dtype: res = akg.topi.cast(res, orig_dtype) return res
def matrix_diag(data, out_shape): """ Generate a batched tensor whose value in diagonal lines are defined in `data`. Args: data (tvm.tensor.Tensor): A tensor of type float16, float32 or int32. Rank is L. out_shape (Union[list, tuple]): Output shape of length L + 1. The value of `out_shape[0, ..., L-1]` should be equal to `data.shape[0, ..., L-1]`. Returns: tvm.tensor.Tensor, has same type as "data", shape is "out_shape". """ dtype = data.dtype utils.ops_dtype_check(dtype, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) shape = get_shape(data) utils.check_shape(data) utils.check_shape(out_shape, length=len(shape) + 1) if tuple(shape[:-1]) != tuple(out_shape[:-2]): raise RuntimeError("The value of out_shape[:-2] should be equal to data.shape[:-1]") res = akg.tvm.compute(out_shape, lambda *i: akg.tvm.if_then_else(akg.tvm.all(i[-1] == i[-2], i[-1] < shape[-1]), data(*i[:-1]), zero_const(dtype)), name="diag") return res
def _equal_ascend(input1, input2, target=utils.CCE): # check shapes shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] shapes = [shape1, shape2] for _, shp in enumerate(shapes): utils.check_shape(shp) utils.ops_dtype_check([input1.dtype, input2.dtype], [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32, utils.DtypeForDavinci.INT8, utils.DtypeForDavinci.UINT8]) dtype = input1.dtype orig_dtype = dtype if product_is_mini() and dtype != "float16": dtype = "float16" if (not product_is_mini()) and dtype not in ("float16", "float32"): # for int32, if cast to float16, there may be overflow dtype = "float32" if orig_dtype == "float32" and dtype == "float16": input_sub = sub(input1, input2, target) input_sub = Cast(input_sub, dtype, target) zero = akg.tvm.const(0.0, dtype) res = akg.topi.equal(input_sub, zero) else: input1 = Cast(input1, dtype, target) input2 = Cast(input2, dtype, target) res = akg.topi.equal(input1, input2) return res
def xdivy(data_x1, data_x2, target=utils.CCE): """ Calculate data_x1 divided by data_x2. .. math:: y = \\left\\{ \\begin{aligned} 0, && if \\quad x1 == 0 \\\\ \\dfrac{x1}{x2}, && otherwise \\end{aligned} \\right. Args: data_x1 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32" data_x2 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32" Returns: tvm.tensor.Tensor """ shape_x1 = get_shape(data_x1) shape_x2 = get_shape(data_x2) utils.check_shape(shape_x1) utils.check_shape(shape_x2) utils.elemwise_dtype_check(data_x1.dtype, data_x2.dtype) dtype = data_x1.dtype utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT) return xdivy_compute(data_x1, data_x2)
def floor_div(data1, data2, target=utils.CCE): """ Calculate x/y, and always returns an integer which is floored. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32. data2 (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has type of int32. Supported Platforms: 'Ascend' """ utils.ops_dtype_check([data1.dtype, data2.dtype], utils.DtypeForDavinci.ALL_FLOAT) shape1 = [x.value for x in data1.shape] utils.check_shape(shape1) shape2 = [x.value for x in data2.shape] utils.check_shape(shape2) if product_is_mini(): rec = reciprocal(data2, high_precision=True, target=target) res = data1 * rec else: res = akg.topi.divide(data1, data2) res = akg.lang.ascend.floor(res) return res
def greater_equal(data1, data2, target=utils.CCE): """ Check whether input1 greaterquals to input2. Args: input1 (tvm.tensor.Tensor): Tensor. input2 (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor. If input1 greaterquals to input2 return True, else return False. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) # check shapes shape1 = [x.value for x in data1.shape] shape2 = [x.value for x in data2.shape] shapes = [shape1, shape2] for _, shape in enumerate(shapes): utils.check_shape(shape) # check types dtype = data1.dtype dtype2 = data2.dtype utils.elemwise_dtype_check(dtype, dtype2) if target == utils.CCE: utils.ops_dtype_check(dtype, utils.DtypeForDavinci.FLOAT16) res = akg.topi.greater_equal(data1, data2) return res
def acos_grad(x, dy, target=utils.CCE): """ Gradient for acos. .. math: dx = [\\frac{-1}{(1 - x^2)^0.5} / ] \\cdot dy Args: x (tvm.tensor.Tensor): tensor of type float16, float32. dy (tvm.tensor.Tensor): tensor of type float16, float32. Returns: tvm.tensor.Tensor, same type and shape as x. Supported Platforms: 'Ascend' """ dtype = x.dtype utils.ops_dtype_check(x.dtype, utils.DtypeForDavinci.ALL_FLOAT) utils.ops_dtype_check(dy.dtype, utils.DtypeForDavinci.ALL_FLOAT) utils.check_shape(x.shape) utils.check_shape(dy.shape) one = akg.tvm.const(1.0, dtype=dtype) mid_square = akg.tvm.compute(x.shape, lambda *i: (one - x(*i) * x(*i)), name="mid_square") rsq = rsqrt(mid_square, target) dx = akg.tvm.compute(x.shape, lambda *i: -rsq(*i) * dy(*i), name="dx") return dx
def neg(data, target=utils.CCE): """ Computes negative value of input tensor. Args: data(tvm.tensor.Tensor): Tensor of type float16, float32, int32. Returns: tvm.tensor.Tensor of same type and shape as input tensor data. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_shape(data.shape) if target == utils.CCE: data_type = data.dtype utils.ops_dtype_check( data_type, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) pone = akg.tvm.const(-1.0, dtype=data_type) res = akg.lang.ascend.vmuls(data, pone) if data_type == "int32": res = akg.topi.cast(res, "int32") else: res = akg.topi.negative(data) return res
def ExpandDims(data, axis, target=utils.CCE): """ Computes data1 elementwise. Args: data1 (tvm.tensor.Tensor): Tensor. axis (int): axis. Returns: tvm.tensor.Tensor, expand the dimension of data1. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_shape(data.shape) if target == utils.CCE: utils.ops_dtype_check( data.dtype, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) res = akg.topi.expand_dims(data, axis, 1) else: res = akg.topi.expand_dims(data, axis) return res
def atanh(input_data): """ Return atanh(x)=0.5*ln((1+x)/(1-x)) if abs(x)<1. Args: input_data (tvm.tensor.Tensor): Input tensor, only support float16, float32. Returns: A tvm.tensor.Tensor as result of atanh. Supported Platforms: 'Ascend' """ shape = get_shape(input_data) utils.check_shape(shape) inp_dtype = input_data.dtype utils.ops_dtype_check(inp_dtype, utils.DtypeForDavinci.ALL_FLOAT) if inp_dtype == "float16": input_data = topi.cast(input_data, "float32") if product_is_mini(): res = _compute_mini(input_data, shape) else: res = _compute_cloud(input_data) res = topi.cast(res, inp_dtype) return res
def TanhGrad(data_y, data_dy, target=utils.CCE): """ Compute the backpropogation gradient of tanh. Args: data_y: Tensor, which equals the output of tanh. data_dy: Tensor, the initial gradients. Return: Tensor, overall gradients. Supported Platforms: 'Ascend' """ dtype=data_y.dtype utils.ops_dtype_check(data_y.dtype, utils.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data_y.shape] utils.check_shape(shape) # dx = dy * (1 - y*y) tmp1 = akg.tvm.const(-1, dtype=dtype) tmp2 = akg.tvm.const(1, dtype=dtype) data1_square = akg.lang.ascend.vmul(data_y, data_y) data_tmp = akg.lang.ascend.vmuls(data1_square, tmp1) anuminate = akg.lang.ascend.vadds(data_tmp, tmp2) res = akg.lang.ascend.vmul(anuminate, data_dy) return res
def less(data1, data2, target=utils.CCE): """ compute tensor with smaller value in data1 and data2 elementwisely. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32. data2 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32. Returns: tvm.tensor.Tensor. If data1 less than data2, return True, else return False. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_shape(data1.shape) utils.check_shape(data2.shape) # check types if target == utils.CCE: utils.elemwise_dtype_check( data1.dtype, data2.dtype, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) # check runtime mode, and change dtype if product_is_mini() and data1.dtype != "float16": data1 = akg.topi.cast(data1, "float16") data2 = akg.topi.cast(data2, "float16") if (not product_is_mini()) and data1.dtype == "int32": data1 = akg.topi.cast(data1, "float32") data2 = akg.topi.cast(data2, "float32") res = akg.topi.less(data1, data2) return res