def apply_rms_prop_mixed_precision(var, ms, mom, grad, lr, momentum, rho, epsilon): """ Mixed precision version for apply_rms_prop. Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float32. ms (tvm.tensor.Tensor): Mean square, a tensor of same shape and type as var. mom (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var. momentum (float): Coefficient for calculate new mom, 0.0 <= momentum <= 1.0. rho (float): Coefficient for calculate new ms, 0.0 <= rho <= 1.0. epsilon (float): A small value to prevent division by 0. Returns: tvm.tensor.Tensor, Updated var of type float32. tvm.tensor.Tensor, Updated var of type float16. tvm.tensor.Tensor, Updated ms. tvm.tensor.Tensor, Updated mom. """ vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.FLOAT32) _apply_rms_prop_check(var, ms, mom, grad, lr, momentum, rho, epsilon) out_var, out_var_fp16, out_ms, out_mom = _apply_rms_prop_mixed_precision_compute( var, ms, mom, grad, lr, momentum, rho, epsilon) out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_ms, binds_info2 = TensorUtils.inplace_set(ms, out_ms, "ms_buf") out_mom, binds_info3 = TensorUtils.inplace_set(mom, out_mom, "mom_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) attrs = {utils.BINDS: binds_info} return out_var, out_var_fp16, out_ms, out_mom, attrs
def gather(params_shape, indices_shape, params_dtype, indices_dtype, axis, kernel_name, cce_path="./"): """Gather data by indices""" vc_util.check_shape(params_shape, length=2) vc_util.check_shape(indices_shape, length=1) vc_util.ops_dtype_check(params_dtype, vc_util.DtypeForDavinci.ALL_TYPES) vc_util.ops_dtype_check(indices_dtype, vc_util.DtypeForDavinci.INT32) vc_util.check_equal("axis", "zero", axis, 0) # construct compute o_shape = (indices_shape[0], params_shape[1]) xx = akg.tvm.placeholder(params_shape, dtype=params_dtype, name="X") yy = akg.tvm.placeholder(indices_shape, dtype=indices_dtype, name="Y") res = akg.tvm.extern(o_shape, [xx, yy], lambda ins, outs: kernel_ir(outs[0], ins[0], ins[1]), name="res", dtype=params_dtype) s = akg.tvm.create_schedule(res.op) # create cce attrs = {"enable_multicore": False} with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True): mod = akg.build(s, [xx, yy, res], "cce", name=kernel_name, attrs=attrs) source_code = mod.imported_modules[0].get_source() utils.create_code(kernel_name, cce_path, source_code) return mod
def dropout_do_mask(data_tensor, data_mask, keep_prob): dtype = data_tensor.dtype shape_tensor = [x.value for x in data_tensor.shape] vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.check_shape(shape_tensor) strides = [1] for x in reversed(shape_tensor): strides.append(strides[-1] * x) if keep_prob < 0 or keep_prob > 1: raise RuntimeError("keep_prob must in [0,1]") keep_prob_const = akg.tvm.const(1.0 / keep_prob, dtype=dtype) data_scale_ub = akg.tvm.compute( shape_tensor, lambda *indices: data_tensor(*indices) * keep_prob_const, name='data_scale_ub') def get_index(indices): idx = 0 for i in range(len(indices)): idx += indices[len(indices) - i - 1] * strides[i] return idx // 8 if dtype == "float32": data_scale_ub_16 = akg.topi.cast(data_scale_ub, "float16") res_ub_16 = akg.tvm.compute(shape_tensor, lambda *indice: dav.dropout(data_mask[get_index(indice)], data_scale_ub_16(*indice))) res = akg.topi.cast(res_ub_16, "float32") else: res = akg.tvm.compute(shape_tensor, lambda *indice: dav.dropout(data_mask[get_index(indice)], data_scale_ub(*indice))) return res
def logical_not(inputs): vc_util.ops_dtype_check(inputs.dtype, vc_util.DtypeForDavinci.BOOL) vc_util.check_shape(inputs.shape) res = akg.topi.logical_not(inputs) return res
def discontinous_mov(data, out_shape): """ Extract the element with the odd index from the original data and copy it into a tensor with a dimension of 2 * original dimension/2. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32. out_shape (list): a list of output's shape. Returns: tvm.tensor.Tensor, has the same type as data, but it's shape changes to out_shape not data's shape. Example: if data = [1,2,3,4,5,6,7,8,9,10] then the output = [[1,3,5,7,9],[1,3,5,7,9]]. """ # check types vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data.shape] vc_util.check_shape(shape) output = akg.tvm.compute(out_shape, lambda j, i: data[i * 2], name="output") return output
def round_value(input): """ rounds the values of a akg.tvm.tensor to the nearest even(integer), element-wise Args: input: akg.tvm.Tensor of type float16, float32 Returns: akg.tvm.Tensor of same shape as input, of type int32 Raises: ValueError: If the type of input is invalid. """ dtype = input.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) shape = input.shape vc_util.check_shape(shape) if dtype == "float16": data_f16 = input else: data_f16 = akg.tvm.compute(shape, lambda *i: input(*i).astype("float16"), name="data_f16") res = akg.lang.cce.round(data_f16) return res
def xdivy(data_x1, data_x2): """ Calculate data_x1 divided by data_x2. .. math:: y = \\left\\{ \\begin{aligned} 0, && if \\quad x1 == 0 \\\\ \\dfrac{x1}{x2}, && otherwise \\end{aligned} \\right. Args: data_x1 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32" data_x2 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32" Returns: tvm.tensor.Tensor """ shape_x1 = get_shape(data_x1) shape_x2 = get_shape(data_x2) vc_util.check_shape(shape_x1) vc_util.check_shape(shape_x2) vc_util.elemwise_dtype_check(data_x1.dtype, data_x2.dtype) dtype = data_x1.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) return xdivy_compute(data_x1, data_x2)
def truncate_div(input_x1, input_x2): """ Calculating data's truncate_div, res = floor(x1/x2) if x1/x2>0 else ceil(x1/x2). Args: input_x1 (tvm.tensor.Tensor): Input tensor, support float16, float32 on mini device, while support int32, int8, uint8, float16, float32 on cloud ones. input_x2 (tvm.tensor.Tensor): Input tensor, with same dtype as input_x1. Returns: A tvm.tensor.Tensor as result of truncate_div. """ vc_util.check_shape(get_shape(input_x1)) vc_util.check_shape(get_shape(input_x2)) vc_util.elemwise_dtype_check(input_x1.dtype, input_x2.dtype) vc_util.ops_dtype_check( input_x1.dtype, (vc_util.DtypeForDavinci.ALL_FLOAT) if utils.product_is_mini() \ else (vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.UINT8)) return truncate_div_compute(input_x1, input_x2)
def reduce_any_d(x, axis=None, keepdims=False): """ Reduce a tensor on a certain axis based on max. Args: x (tvm.tensor.Tensor): The input tensor to reduce. Should be of type int8. axis (Union[list, tuple, int, None]): The dimensions to reduce. If None, all dimensions will be reduced. each dim must be in the range [-len(data.shape), len(data.shape) - 1]. keepdims (Union[bool, None]): If True, retains reduced dimensions with length 1, defaults to False. Returns: tvm.tensor.Tensor of same type as input tensor x. """ # check type vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.INT8) vc_util.check_shape(x.shape) # check axis vc_util.reduce_axis_check(x.shape, axis) refined_axis = refine_reduce_axis(x, axis) if len(set(refined_axis)) == len(x.shape) and not keepdims: keepdims = True res = _reduce_any_d_compute(x, refined_axis, keepdims) if len(set(refined_axis)) == len(x.shape): res = topi.reshape(res, (1, )) return res
def apply_adagrad(var, accum, learning_rate, grad, update_slots=True): """ Update `var` according to the Adagrad algorithm. .. math: accum += grad^2 var -= learning_rate * grad / accum.sqrt() Args: var (tvm.tensor.Tensor): input var to be updated of type float16, float32 accum (tvm.tensor.Tensor): accumulation of the squared gradients of type float16, float32 learning_rate (tvm.tensor.Tensor): A scalar tensor of type float16, float32 grad (tvm.tensor.Tensor): input grad of type float16, float32 update_slots (Bool): If True, the accum tensor will be updated; otherwise the option is False, the accum tensor will not be update. Defaults to 'True'. Returns: tvm.tensor.Tensor, the updated var. tvm.tensor.Tensor, the updated accum. """ vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (accum, learning_rate, grad): vc_util.elemwise_dtype_check(var.dtype, i.dtype) for i in (accum, grad): vc_util.elemwise_shape_check(var.shape, i.shape) if tuple(get_shape(learning_rate)) != (1,): raise RuntimeError("learning_rate only support scalar tensor") return _apply_adagrad_compute(var, accum, learning_rate, grad, update_slots)
def matrix_diag_part(input_diagonal, input_help): """ Calculate the batched diagonal part of a batched tensor. Note: input_help is a tensor with a diagonal element of 1 and other positions of 0, the last two dimensions can be unequal. Args: input_diagonal (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8. The last two dimensions can be unequal. input_help (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8, and with a diagonal element of 1 and other positions of 0. Returns: tvm.tensor.Tensor, has the same type as input_diagonal, the shape dims is equal to dims(input_diagonal) - 1. """ dtype_input_diagonal = input_diagonal.dtype dtype_input_help = input_help.dtype vc_util.elemwise_shape_check(input_help.shape, input_diagonal.shape) if len(input_help.shape) < 2: raise ValueError("Input tensors of rank>=2 are supported!") vc_util.ops_dtype_check([dtype_input_diagonal, dtype_input_help], [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.UINT8]) res = matrix_diag_part_compute(input_diagonal, input_help) return res
def reverse(input_data, axis): """ Reverse a tensor on some dimension. Args: input_data (tvm.tensor.Tensor): Tensor of float16, float32 and int32. axis (Union[list, tuple, int]): Because of don't support reverse which contain last dim, so can't equal None. Returns: tvm.tensor.Tensor,has the same type and shape as input_data """ shape = get_shape(input_data) dtype = input_data.dtype # check dtype and shape vc_util.check_shape(shape) vc_util.ops_dtype_check( dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32]) # check axis shape_len = len(shape) if hasattr(axis, 'index'): axis = list(axis) if isinstance(axis, int): axis = [axis] vc_util.axis_check(shape_len, axis) _check_axis(axis, shape) # compute res res = reverse_compute(input_data, axis) return res
def sparse_softmax_cross_entropy_with_logits(labels, logits, reduction='mean'): """ Computes sparse softmax cross entropy between `logits` and `labels`. Note: Softmax calculation of Logits is done inside the op. Args: labels (tvm.tensor.Tensor): int32 tensor of shape [batch_size]. Each entry in it must be an index in `[0, num_classes)`. logits (tvm.tensor.Tensor): float32 or float16 tensor of shape [batch_size, num_class]. reduction (str): Specifies the reduction to apply to the output: 'none' or 'mean' or 'sum'. Default: 'mean'. 'none': no reduction for the output 'sum': the sum for the output 'mean': the mean for the output. Returns: tvm.tensor.Tensor, has the same dtype as logits. If reduction is 'none', shape of the tensor is the same as logits, otherwise shape of the tensor is the same as labels. """ vc_util.ops_dtype_check(logits.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) strategy, cost, _ = sparse_softmax_cross_entropy_with_logits_impl( labels, logits, reduction) attr_map = {"custom_tiling": strategy} return cost, attr_map
def matrix_diag(data, out_shape): """ Generate a batched tensor whose value in diagonal lines are defined in `data`. Args: data (tvm.tensor.Tensor): A tensor of type float16, float32 or int32. Rank is L. out_shape (Union[list, tuple]): Output shape of length L + 1. The value of `out_shape[0, ..., L-1]` should be equal to `data.shape[0, ..., L-1]`. Returns: tvm.tensor.Tensor, has same type as "data", shape is "out_shape". """ dtype = data.dtype vc_util.ops_dtype_check(dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32]) shape = get_shape(data) vc_util.check_shape(data) vc_util.check_shape(out_shape, length=len(shape) + 1) if tuple(shape[:-1]) != tuple(out_shape[:-2]): raise RuntimeError("The value of out_shape[:-2] should be equal to data.shape[:-1]") res = akg.tvm.compute(out_shape, lambda *i: akg.tvm.if_then_else(akg.tvm.all(i[-1] == i[-2], i[-1] < shape[-1]), data(*i[:-1]), zero_const(dtype)), name="diag") return res
def pad(data, paddings, padtype): """add paddings to the tensor :shape: The shape of the tensor, now only support two dimension Tensor :paddings: The shape of the paddings, shape [N,2], N is the dimension of the tensor, For each dimension D of input, paddings[D, 0] indicates how many values to add before the contents of tensor in that dimension, and paddings[D, 1] indicates how many values to add after the contents of tensor in that dimension. :dtype: The type of the input, float16, float32 :padtype: One of "CONSTANT", "REFLECT", or "SYMMETRIC". """ # check shape vc_util.check_shape(data.shape) # check types vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_TYPES) # check padding types ptype_checklist = ['constant'] if not (padtype in ptype_checklist): raise RuntimeError("pad_cce only support %s while padtype is %s" % (",".join(ptype_checklist), padtype)) dtype = data.dtype if dtype == 'int8' or dtype == 'uint8': data = cast(data, "float16") rank = len(data.shape) pad_before = [] pad_after = [] for i in range(rank): pad_before.append(paddings[i][0]) pad_after.append(paddings[i][1]) B = tvm_pad(data, pad_before, pad_after=pad_after, name='B') if dtype == 'int8' or dtype == 'uint8': B = cast(B, dtype) return B
def reciprocal(data, high_precision=True): """ Computes the reciprocal of data element-wise. Args: data (list[tvm.tensor.Tensor]): a list of tvm.tensor.Tensor of type float16, float32. high_precision (bool): a bool value, whether to use high-precision version. Returns: tvm.tensor.Tensor of same type and shape as data. """ vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data.shape] vc_util.check_shape(shape) res = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(1, data.dtype) / (data(*indice)), name="res") # When product is mini, using Newtom iteration method to achieve higher precision. if utils.product_is_mini() and high_precision: steps = 1 for _ in range(steps): temp1 = data * res temp2 = temp1 * akg.tvm.const(-1, data.dtype) temp3 = temp2 + akg.tvm.const(2, data.dtype) res = temp3 * res return res
def case_1(data_shape, dtype, kernel_name, attrs): """elemwise chain case 1""" vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.FLOAT16) vc_util.check_shape_length_equal("data", data_shape, 2) m, k = data_shape A = akg.tvm.placeholder((m, k), name='A', dtype=dtype) B = akg.tvm.placeholder((k, ), name='B', dtype=dtype) C = akg.tvm.placeholder((m, k), name='C', dtype=dtype) E = akg.tvm.compute((m, k), lambda i, j: A[i, j] * (B[j] + C[i, j]), name="E") forward_s = akg.tvm.create_schedule(E.op) op_vars = [A, B, C, E] forward_low = akg.lower(forward_s, op_vars, simple_mode=True, polyhedral=True) kernel_name = utils.gen_name_kernel(kernel_name, dtype, data_shape) with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True): mod = akg.build(forward_s, op_vars, "cce", name="test", attrs=attrs, polyhedral=True) source_code = mod.imported_modules[0].get_source() return mod
def tanh_grad(data_y, data_dy): """ Compute the backpropogation gradient of tanh. Args: data_y: Tensor, which equals the output of tanh. data_dy: Tensor, the initial gradients. Return: Tensor, overall gradients. """ dtype = data_y.dtype vc_util.ops_dtype_check(data_y.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data_y.shape] vc_util.check_shape(shape) # dx = dy * (1 - y*y) tmp1 = akg.tvm.const(-1, dtype=dtype) tmp2 = akg.tvm.const(1, dtype=dtype) data1_square = akg.lang.cce.vmul(data_y, data_y) data_tmp = akg.lang.cce.vmuls(data1_square, tmp1) anuminate = akg.lang.cce.vadds(data_tmp, tmp2) res = akg.lang.cce.vmul(anuminate, data_dy) return res
def broadcast_to(x, shape): """ Broadcast an tensor to a compatible shape. Args: x (tvm.tensor.Tensor): Tensor of type float32, float16, int8, uint8, int32 shape (list, tuple): The shape of output tensor. Returns: An tvm.tensor.Tensor with the same type as x. """ # check shape vc_util.check_shape(x) vc_util.check_shape(shape) # check dtype dtype = x.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_TYPES) # vector_dup instruction don't support int8 and uint8 # It can be simplified by some methods, such as , "auto cast" x_shape = get_shape(x) if len(x_shape) == 1 and x_shape[0] == 1 and dtype in ["int8", "uint8"]: x = cast(x, "float16") res = topi.broadcast_to(x, shape) if res.dtype != dtype: res = cast(res, dtype) return res
def reduce_prod(data, axis=None, keepdims=False): """ Computes the product of elements along specific axis Args: data (tvm.tensor.Tensor): indicating the input tensor. axis (Union[list, tuple, int, None]): indicating the dimensions to reduce at. if it's None, all dimensions will be reduced. keepdims (Union[bool, None]): if true, keep the dimensions with length 1. Returns: Tensor, the product of elements of input tensor. """ shape = [x.value for x in data.shape] ops_dtype_check(data.dtype, [ DtypeForDavinci.ALL_FLOAT, DtypeForDavinci.INT8, DtypeForDavinci.UINT8 ]) if axis is None and keepdims is False: raise ValueError("keepdims must be True when axis is None!") axis_new = ft_util.refine_reduce_axis(data, axis) check_shape(shape) dtype = data.dtype if dtype in ["int8", "uint8"]: data = akg.topi.cast(data, "float16") vlog_t = akg_log(data) res = akg.topi.sum(vlog_t, axis=axis_new, keepdims=keepdims) res = akg_exp(res) if dtype in ["int8", "uint8"]: res = akg.topi.cast(res, dtype) return res
def logsoftmax_grad(Y, dY, axis): """ Computes the back propagation gradients by chain rule. Args: Y: Tensor, holds the logsoftmax activation output. dY: Tensor, holds the initial gradients. axis: Integer, on which dimension the softmax is applied. Returns: Tensor, the overall gradients. """ shape = [x.value for x in Y.shape] vc_util.check_shape(shape) dtype = Y.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) if axis == -1: axis = len(shape) + axis if axis >= len(shape): raise RuntimeError("axis should be less than dimension") if axis < -1: raise RuntimeError( "negative axis only support -1, please specify the axis in positive value" ) softmax = akg.topi.exp(Y) dy_sum = akg.lang.cce.sum(dY, axis=axis) dy_sum_broadcast = akg.lang.cce.broadcast(dy_sum, shape) mul_result = akg.lang.cce.vmul(softmax, dy_sum_broadcast) res = akg.lang.cce.vsub(dY, mul_result) attrs = {"pragma_reschedule": 1, "pragma_modshift": 1} return res, attrs
def floordiv(data1, data2): """ Calculate x/y, and always returns an integer which is floored. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32. data2 (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has type of int32. """ vc_util.ops_dtype_check([data1.dtype, data2.dtype], vc_util.DtypeForDavinci.ALL_FLOAT) shape1 = [x.value for x in data1.shape] vc_util.check_shape(shape1) shape2 = [x.value for x in data2.shape] vc_util.check_shape(shape2) if utils.product_is_mini(): rec = reciprocal(data2, high_precision=True) res = data1 * rec else: res = akg.topi.divide(data1, data2) res = akg.lang.cce.floor(res) return res
def acos_grad(x, dy): """ Gradient for acos. .. math: dx = [\\frac{-1}{(1 - x^2)^0.5} / ] \\cdot dy Args: x (tvm.tensor.Tensor): tensor of type float16, float32. dy (tvm.tensor.Tensor): tensor of type float16, float32. Returns: tvm.tensor.Tensor, same type and shape as x. """ dtype = x.dtype vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.ops_dtype_check(dy.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.check_shape(x.shape) vc_util.check_shape(dy.shape) one = akg.tvm.const(1.0, dtype=dtype) mid_square = akg.tvm.compute(x.shape, lambda *i: (one - x(*i) * x(*i)), name="mid_square") rsq = rsqrt.rsqrt(mid_square) dx = akg.tvm.compute(x.shape, lambda *i: -rsq(*i) * dy(*i), name="dx") return dx
def concat(data, axis): """ Concatenates data along the dimension set by axis. Args: data (Union[list, tuple]): list or tuple of tvm.tensor.Tensor of type float16, float32, int32, int8, uint8 axis (int): Specifies the axis along which to concatenate. Must be in the range [-rank(data), rank(data)) Returns: tvm.tensor.Tensor of same type as data. """ data_size = len(data) if data_size < min_size: raise RuntimeError("The size of data must be greater equal 1") dtype = data[0].dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_TYPES) shape_0 = data[0].shape vc_util.check_shape(shape_0) if axis < 0: axis += len(shape_0) for i in range(1, data_size): shape_i = data[i].shape vc_util.check_shape(shape_i) if len(shape_i) != len(shape_0): raise ValueError("Input tensors must have same dimensions.") res = akg.lang.cce.concat(data, axis) return res
def select(condition, x1, x2): """ Selects elements from x1 or x2, depending on condition. Note: every parmas' shape need legal, can support condition's shape broadcast. Args: condition (tvm.tensor.Tensor): Tensor of type int8, int32, must be 0 or 1. x1 (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32, uint8. x2 (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32, uint8. Returns: tvm.tensor.Tensor, has the same type and shape as x1. """ shape_x1 = get_shape(x1) shape_x2 = get_shape(x2) con_shape = get_shape(condition) vc_util.elemwise_shape_check(shape_x1, shape_x2) vc_util.elemwise_dtype_check(x1.dtype, x2.dtype, [ vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.UINT8 ]) vc_util.ops_dtype_check( condition.dtype, [vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.INT32]) vc_util.auto_broadcast_check(con_shape, shape_x1) res = select_compute(condition, x1, x2) return res
def blas_axby(x, y, alpha, beta): r""" Blas axby. :math:`\alpha x + \beta y` Args: x (tvm.tensor.Tensor): Input `x` of type float16 or float32. y (tvm.tensor.Tensor): Input `y` of type float16 or float32. alpha (Union[int, float]): Scale of `x`. beta (Union[int, float]): Scale of `y`. Returns: tvm.tensor.Tensor, has the same shape and type as inputs. """ vc_util.ops_dtype_check([x.dtype, y.dtype], vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.check_shape(x.shape) vc_util.check_shape(y.shape) ax = akg.lang.cce.vmuls(x, alpha) by = akg.lang.cce.vmuls(y, beta) res = akg.lang.cce.vadd(ax, by) return res
def leaky_relu(data, negative_slop=0): """ leaky_relu op for input tensor (N,C,H,W) OR (N,C1,H,W,C0). ..math:`max(x,negative_slop*x)` Args: data (tvm.tensor.Tensor): tensor with type float16 or float32. negative_slop (float): 0<=negative_slop<1 Returns: tvm.tensor.Tensor. """ dtype = data.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.check_shape(data.shape) if negative_slop >= 1 or negative_slop < 0: raise RuntimeError( "leaky_relu only support negative_slop between [0,1)") slop_tmp = akg.tvm.const(negative_slop, dtype=dtype) tmp = akg.lang.cce.vmuls(data, slop_tmp) res = akg.lang.cce.vmax(tmp, data) return res
def kldiv_loss_grad(pre_deriv, inputs, target): """ do backprop for kldiv loss Args: pre_deriv (tvm.tensor.Tensor): Gradient tensor for forward output. inputs (tvm.tensor.Tensor): Forward input tensor. target (tvm.tensor.Tensor): Forward output tensor. Returns: Gradient tensor for forward input. """ inputs_dtype = inputs.dtype target_dtype = target.dtype pre_deriv_dtype = pre_deriv.dtype vc_util.ops_dtype_check([inputs_dtype, target_dtype, pre_deriv_dtype], vc_util.DtypeForDavinci.ALL_FLOAT) if get_const_tuple(target.shape) != get_const_tuple(inputs.shape): raise RuntimeError("Please ensure inputs have the same size." "", target.shape, inputs.shape) inputs_dtype_old = inputs_dtype if utils.product_is_mini() and inputs_dtype == 'float32': inputs = akg.topi.cast(inputs, "float16") target = akg.topi.cast(target, "float16") inputs_dtype = "float16" cur_deriv = akg.topi.divide(target, inputs) cur_deriv = akg.topi.multiply(cur_deriv, pre_deriv) if utils.product_is_mini() and inputs_dtype_old == 'float32': cur_deriv = akg.topi.cast(cur_deriv, inputs_dtype_old) return cur_deriv
def minimum(input1, input2): """ Return the min value of two tensors element-wise. Note: minimum supports broadcasting. Args: input1: Tensor. input2: Tensor. Has the same type as input1. Returns: Tensor, has the same type as inputs. """ vc_util.ops_dtype_check([input1.dtype, input2.dtype], vc_util.DtypeForDavinci.ALL_TYPES) vc_util.elemwise_dtype_check(input1.dtype, input2.dtype) dtype = input1.dtype shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] vc_util.check_shape(shape1) vc_util.check_shape(shape2) vc_util.auto_broadcast_check(shape1, shape2) if dtype in ("int8", "uint8"): input1 = cast(input1, "float16") input2 = cast(input2, "float16") res = akg.topi.minimum(input1, input2) if dtype in ("int8", "uint8"): res = cast(res, dtype) return res
def bitwise_or(x1, x2): """ Computes the bitwise or of `x1` and `x2`. Args: x1 (tvm.tensor.Tensor): Tensor of type int16, uint16. x2 (tvm.tensor.Tensor): Tensor of type int16, uint16. Returns: tvm.tensor.Tensor, has the same type as x1. """ # check shape vc_util.check_shape(x1) vc_util.check_shape(x2) _, _, output_shape = produce_shapes(get_shape(x1), get_shape(x2)) # check input tensor data_type vc_util.ops_dtype_check( [x1.dtype, x2.dtype], [vc_util.DtypeForDavinci.INT16, vc_util.DtypeForDavinci.UINT16]) dtype = x1.dtype if dtype != x2.dtype: raise RuntimeError("input type must be same, but got %s vs %s", dtype, x2.dtype) x1 = akg.topi.broadcast_to(x1, output_shape) x2 = akg.topi.broadcast_to(x2, output_shape) res = akg.tvm.compute(output_shape, lambda *indice: x1(*indice) | x2(*indice)) return res