def greater_equal(data1, data2, target=utils.CCE): """ Check whether input1 greaterquals to input2. Args: input1 (tvm.tensor.Tensor): Tensor. input2 (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor. If input1 greaterquals to input2 return True, else return False. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) # check shapes shape1 = [x.value for x in data1.shape] shape2 = [x.value for x in data2.shape] shapes = [shape1, shape2] for _, shape in enumerate(shapes): utils.check_shape(shape) # check types dtype = data1.dtype dtype2 = data2.dtype utils.elemwise_dtype_check(dtype, dtype2) if target == utils.CCE: utils.ops_dtype_check(dtype, utils.DtypeForDavinci.FLOAT16) res = akg.topi.greater_equal(data1, data2) return res
def maximum(data1, data2, target=utils.CCE): """ Take element-wise maximum of two tensors with auto-broadcasting. Args: data1: tvm.tensor.Tensor data2: tvm.tensor.Tensor Returns: tvm.tensor.Tensor of maximum of two tensors. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) shape1 = [x.value for x in data1.shape] shape2 = [x.value for x in data2.shape] utils.check_shape(shape1) utils.check_shape(shape2) utils.auto_broadcast_check(shape1, shape2) utils.elemwise_dtype_check(data1.dtype, data2.dtype) dtype = data1.dtype need_cast = True if target == utils.CCE and dtype in ["int8", "uint8" ] else False if need_cast: data1 = Cast(data1, "float16") data2 = Cast(data2, "float16") res = topi.maximum(data1, data2) if need_cast: res = Cast(res, dtype) return res
def xdivy(data_x1, data_x2, target=utils.CCE): """ Calculate data_x1 divided by data_x2. .. math:: y = \\left\\{ \\begin{aligned} 0, && if \\quad x1 == 0 \\\\ \\dfrac{x1}{x2}, && otherwise \\end{aligned} \\right. Args: data_x1 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32" data_x2 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32" Returns: tvm.tensor.Tensor """ shape_x1 = get_shape(data_x1) shape_x2 = get_shape(data_x2) utils.check_shape(shape_x1) utils.check_shape(shape_x2) utils.elemwise_dtype_check(data_x1.dtype, data_x2.dtype) dtype = data_x1.dtype utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT) return xdivy_compute(data_x1, data_x2)
def Addn(data, target=utils.CCE): """ Compute sum of all elements in tensor. Args: data (tvm.tensor.Tensor): Tensor of of type float16, float32. Returns: tvm.tensor.Tensor, compute result, get all elements' sum. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) # check types dtype = data[0].dtype if target == utils.CCE: utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT) res = data[0] for i in range(1, len(data)): utils.elemwise_dtype_check(res.dtype, data[i].dtype) utils.elemwise_shape_check(res.shape, data[i].shape) res = akg.topi.elemwise_sum(data) return res
def minimum_ad(head, data_x, data_y, grad_x=True, grad_y=True): """ Calculating the reversed outputs of the operator minimum by using automatic differentiate. Args: head (tvm.tensor.Tensor): Input tensor of float32, float16 and int32. data_x (tvm.tensor.Tensor): Input tensor of float32, float16 and int32. data_y (tvm.tensor.Tensor): Input tensor of float32, float16 and int32. grad_x (bool): Default is True, whether to differentiate x. grad_y (bool): Default is True, whether to differentiate y. Returns: tvm.tensor.Tensor, has the same type and shape as grads, if grad_x and grad_y all equal to True, need return a list like: [jacs[0], jacs[1]]. """ utils.elemwise_shape_check(data_x.shape, data_y.shape) utils.elemwise_shape_check(head.shape, data_x.shape) utils.elemwise_dtype_check( data_x.dtype, head.dtype, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) utils.elemwise_dtype_check( data_x.dtype, data_y.dtype, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) if not grad_x and not grad_y: raise ValueError("At least one of grad_x and grad_y is True.") op = minimum(data_x, data_y) jacs = list(akg.differentiate(op, [data_x, data_y], head)) if grad_x and grad_y: return jacs[0], jacs[1] if grad_x: return jacs[0] return jacs[1]
def less(data1, data2, target=utils.CCE): """ compute tensor with smaller value in data1 and data2 elementwisely. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32. data2 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32. Returns: tvm.tensor.Tensor. If data1 less than data2, return True, else return False. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_shape(data1.shape) utils.check_shape(data2.shape) # check types if target == utils.CCE: utils.elemwise_dtype_check( data1.dtype, data2.dtype, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) # check runtime mode, and change dtype if product_is_mini() and data1.dtype != "float16": data1 = akg.topi.cast(data1, "float16") data2 = akg.topi.cast(data2, "float16") if (not product_is_mini()) and data1.dtype == "int32": data1 = akg.topi.cast(data1, "float32") data2 = akg.topi.cast(data2, "float32") res = akg.topi.less(data1, data2) return res
def atan_grad(head, input_x): """ Compute gradient of input_x in atan. .. math:: dx = \\frac{1}{1 + x^2} \\cdot dy Args: head (tvm.tensor.Tensor): Gradient tensor of forward's output with the same shape and dtype as input_x. input_x (tvm.tensor.Tensor): Forward's input tensor support float16 and float32. Returns: A tvm.tensor.Tensor as gradient of forward's input. Supported Platforms: 'Ascend' """ utils.elemwise_shape_check(head.shape, input_x.shape) utils.elemwise_dtype_check(head.dtype, input_x.dtype, utils.DtypeForDavinci.ALL_FLOAT) dtype = input_x.dtype tensor_one = dc.one_const(dtype) def _compute(*i): return tensor_one / (tensor_one + input_x(*i) * input_x(*i)) * head(*i) out_tensor = tvm.compute(input_x.shape, _compute, name="out") return out_tensor
def truncate_div(input_x1, input_x2): """ Calculating data's truncate_div, res = floor(x1/x2) if x1/x2>0 else ceil(x1/x2). Args: input_x1 (tvm.tensor.Tensor): Input tensor, support float16, float32 on mini device, while support int32, int8, uint8, float16, float32 on cloud ones. input_x2 (tvm.tensor.Tensor): Input tensor, with same dtype as input_x1. Returns: A tvm.tensor.Tensor as result of truncate_div. """ utils.check_shape(get_shape(input_x1)) utils.check_shape(get_shape(input_x2)) utils.elemwise_dtype_check(input_x1.dtype, input_x2.dtype) utils.ops_dtype_check( input_x1.dtype, (utils.DtypeForDavinci.ALL_FLOAT) if product_is_mini() \ else (utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32, utils.DtypeForDavinci.INT8, utils.DtypeForDavinci.UINT8)) return truncate_div_compute(input_x1, input_x2)
def mul(l_input, r_input, target=utils.CCE): """ Calculate x * y element-wise. Note: mul supports broadcasting. Args: l_input (tvm.tensor.Tensor): Tensor of type float16, float32. r_input (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has the same type as l_input and r_input. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.ops_dtype_check([l_input.dtype, r_input.dtype], utils.DtypeForDavinci.ALL_FLOAT) shape1 = [x.value for x in l_input.shape] shape2 = [x.value for x in r_input.shape] utils.check_shape(shape1) utils.check_shape(shape2) utils.auto_broadcast_check(shape1, shape2) utils.elemwise_dtype_check(l_input.dtype, r_input.dtype) output = akg.topi.multiply(l_input, r_input) return output
def Divide(lhs, rhs, target=utils.CCE): """ Calculate divide. Args: lhs: The left tensor. rhs: The right tensor. Returns: tvm.tensor.Tensor. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _div_ascend(lhs, rhs) shape_l = [x.value for x in lhs.shape] shape_r = [x.value for x in rhs.shape] utils.check_shape(shape_l) utils.check_shape(shape_r) utils.auto_broadcast_check(shape_l, shape_r) utils.elemwise_dtype_check(lhs.dtype, rhs.dtype) output = akg.topi.divide(lhs, rhs) return output
def _div_ascend(data1, data2): """ Calculates x/y, and returns an integer when inputs are all integers. When both arguments are integers, use integer division (also known as "floor division"). When arguments are float numbers, use normal floating point division Note: div supports broadcasting. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32, int32, int8 and uint8. data2 (tvm.tensor.Tensor): Tensor of type float16, float32, int32, int8 and uint8. Returns: tvm.tensor.Tensor, has the same type as data1 and data2. """ utils.ops_dtype_check([data1.dtype, data2.dtype], utils.DtypeForDavinci.ALL_TYPES) utils.elemwise_dtype_check(data1.dtype, data2.dtype) dtype = data1.dtype shape1 = [x.value for x in data1.shape] shape2 = [x.value for x in data2.shape] utils.check_shape(shape1) utils.check_shape(shape2) utils.auto_broadcast_check(shape1, shape2) n_shape1, n_shape2, out_shape = produce_shapes(shape1, shape2) if n_shape1 != out_shape: input1_cast = akg.topi.broadcast_to(data1, out_shape) else: input1_cast = data1 if n_shape2 != out_shape: input2_cast = akg.topi.broadcast_to(data2, out_shape) else: input2_cast = data2 if dtype in ("int32", "int8", "uint8"): input1p = Case(input1_cast, "float16", utils.CCE) input2p = Cast(input2_cast, "float16", utils.CCE) else: input1p = input1_cast input2p = input2_cast if product_is_mini(): input2p_rec = reciprocal(input2p, target=utils.CCE) res = akg.topi.multiply(input1p, input2p_rec) else: res = akg.topi.divide(input1p, input2p) if dtype in ("int8", "uint8"): res = floor(res, utils.CCE) res = Cast(res, "float16", utils.CCE) if dtype in ("int32", "int8", "uint8"): res = Cast(res, dtype, utils.CCE) return res
def apply_ftrl(var, accum, linear, grad, lr, l1, l2, lr_power, target=utils.CCE): """ Ftrl-proximal optimization algorithm. Note: accum_new = accum + grad * grad linear_new = linear + grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var x = clip(linear_new, -l1, l1) - linear_new y = accum_new^(-lr_power) / lr + 2 * l2 var_new = x / y Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32. accum (tvm.tensor.Tensor): A tensor of same shape and type as var. Eatch entry in it must be greater or equal to zero. linear (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. lr_power (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. Value of it must be less or equal to zero. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated accum. tvm.tensor.Tensor, updated linear. """ # As vlog instruction on mini product has a percision problem and mini product used to infer # rather than train if product_is_mini(): raise RuntimeError("The apply_ftrl operator does not support the mini product") # check_shape utils.check_shape(var) shape = get_shape(var) for tensor in (accum, linear, grad): utils.elemwise_shape_check(shape, tensor.shape) sclar_shape = (1,) for sclar in (lr, l1, l2, lr_power): utils.elemwise_shape_check(sclar.shape, sclar_shape) # check dtype dtype = var.dtype utils.ops_dtype_check(dtype, [utils.DtypeForDavinci.FLOAT16, utils.DtypeForDavinci.FLOAT32]) for tensor in (var, accum, linear, grad, lr, l1, l2, lr_power): utils.elemwise_dtype_check(tensor.dtype, dtype) var_new, accum_new, linear_new = apply_ftrl_impl(var, accum, linear, grad, lr, l1, l2, None, lr_power, with_l2_shrinkage=False) # update by inplace (var_new, accum_new, linear_new), binds_info = \ TensorUtils.inplace_set_tensors((var, accum, linear), (var_new, accum_new, linear_new)) attrs = {utils.BINDS: binds_info} return var_new, accum_new, linear_new, attrs
def _add(data1, data2): utils.elemwise_dtype_check(data1.dtype, data2.dtype) utils.check_shape(data1.shape) utils.check_shape(data2.shape) utils.auto_broadcast_check(data1.shape, data2.shape) res = akg.topi.add(data1, data2) return res
def _apply_rms_prop_check(var, ms, mom, grad, lr, momentum, rho, epsilon): """Check inputs""" utils.check_shape(var) for i in (ms, mom, grad, lr, momentum, rho): utils.elemwise_dtype_check(var.dtype, i.dtype) for i in (ms, mom, grad): utils.elemwise_shape_check(var.shape, i.shape) for i in (lr, rho, momentum): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, rho and momentum only support scalar tensor.") if epsilon <= 0: raise ValueError("epsilon should greater than zero.")
def apply_power_sign(var, m, grad, lr, logbase, sign_decay, beta, target=utils.CCE): """ Update 'var' according to the PowerSign update m_out = beta * m + (1 - beta) * grad var_out = var - lr_t * (exp(logbase * sign_decay * Sign(grad) * Sign(m_out)) * grad) Args: var (tvm.tensor.Tensor): A tensor of type float16 or float32 m (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): A scalar tensor of of same type as var. logbase (tvm.tensor.Tensor): A scalar tensor of of same type as var. sign_decay (tvm.tensor.Tensor): A scalar tensor of of same type as var. beta (tvm.tensor.Tensor): A scalar tensor of of same type as var. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated m. """ # check dtypes utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT) for i in (m, grad, lr, logbase, sign_decay, beta): utils.elemwise_dtype_check(var.dtype, i.dtype) # check shapes for i in (m, grad): utils.elemwise_shape_check(var.shape, i.shape) for i in (lr, logbase, sign_decay, beta): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, logbase, sign_decay and beta only support scalar tensor.") # compute out_var, out_m = _apply_power_sign_compute(var, m, grad, lr, logbase, sign_decay, beta) # reuse var, m out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf") binds_info.update(binds_info2) attrs = {utils.BINDS: binds_info} return out_var, out_m, attrs
def eltwise(data, mode=1, coeff=()): """ Compute elementwise modes, such as 0:PRODUCT, 1:SUM and 2:MAX. Args: data (list of tvm.tensor.Tensor): a list of tensor, tensor support fp16 and fp32. mode (int): 0:product, 1:sum, 2:max. coeff (tuple): tensor name of data should be equal with coeff size, only used by sum, support int and float. Returns: tvm.tensor.Tensor. """ dtype = data[0].dtype utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT) utils.check_shape(data[0].shape) shape_data = get_shape(data[0]) if not mode in [0, 1, 2]: raise RuntimeError("mode only support 0, 1, or 2") if not len(data) == len(coeff) and len(coeff) != 0: raise RuntimeError( "coeff should be [] or its length be same as data") tensor_num = len(data) #tensor num must be [1, 120] if tensor_num < 1 or tensor_num > 120: raise RuntimeError("tensor_num need in range [1,120].") if mode == 1 and len(coeff) == 0: return addn.addn(data) if len(coeff) != 0: if type(coeff[0]) != int and type(coeff[0]) != float: raise RuntimeError("ele of coeff must be a number.") for i in range(1, len(data)): utils.elemwise_dtype_check(data[0].dtype, data[i].dtype) utils.elemwise_shape_check(data[0].shape, data[i].shape) if mode == 1 and len(coeff) > 0: return _addn(data, coeff) if mode == 0: return _product(data) if mode == 2: return _max(data)
def apply_proximal_adagrad(var, accum, lr, l1, l2, grad, target=utils.CCE): """ The FOBOS optimization algorithm with Adagrad learning rate. Note: accum_new = accum + grad * grad ada_lr = lr * rsqrt(accum_new) prox_var = var - ada_lr * grad if l1 > 0: var_new = Sign(prox_var)/(1+ada_lr*l2) * max{|prox_var|-ada_lr*l1,0} else: var_new = prox_var/(1+ada_lr*l2) Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32. accum (tvm.tensor.Tensor): A tensor of same shape and type as var. Eatch entry in it must be greater or equal to zero. lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated accum. """ # check_shape utils.check_shape(var) shape = get_shape(var) for tensor in (accum, grad): utils.elemwise_shape_check(shape, tensor.shape) sclar_shape = (1, ) for sclar in (lr, l1, l2): utils.elemwise_shape_check(sclar.shape, sclar_shape) # check dtype dtype = var.dtype utils.ops_dtype_check( dtype, [utils.DtypeForDavinci.FLOAT16, utils.DtypeForDavinci.FLOAT32]) for tensor in (var, accum, lr, l1, l2, grad): utils.elemwise_dtype_check(tensor.dtype, dtype) var_new, accum_new = _apply_proximal_adagrad_compute( var, accum, lr, l1, l2, grad) (var_new, accum_new), binds_info = TensorUtils.inplace_set_tensors( [var, accum], [var_new, accum_new]) attrs = {utils.BINDS: binds_info} return var_new, accum_new, attrs
def apply_centered_rms_prop(var, mg, ms, mom, grad, lr, momentum, rho, epsilon, target=utils.CCE): """ Update `var` according to the centered RMSProp algorithm. out_mean_grad = decay * mg + (1-decay) * grad out_mean_square = decay * ms + (1-decay) * grad * grad out_mom = momentum * mom + lr * grad / sqrt(out_mean_square - out_mean_grad^2 + epsilon) out_var = var - out_mom Args: var (tvm.tensor.Tensor): Input data of type float16 or float32. mg (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. ms (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. mom (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. grad (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. momentum (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. rho (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. epsilon (float): A scalar tensor of the same type as `var`. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated mean_grad. tvm.tensor.Tensor, updated mean_square. tvm.tensor.Tensor, updated mom. """ utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT) for i in (mg, ms, mom, lr, rho, momentum, grad): utils.elemwise_dtype_check(var.dtype, i.dtype) for i in (mg, ms, mom, grad): utils.elemwise_shape_check(var.shape, i.shape) for i in (lr, rho, momentum): if tuple(get_shape(i)) != (1,): raise RuntimeError("lr, rho and momentum only support scalar tensor.") if epsilon <= 0: raise ValueError("epsilon should be greater than 0.") out_var, out_mg, out_ms, out_mom = _apply_centered_rms_prop_compute( var, mg, ms, mom, grad, lr, momentum, rho, epsilon) out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_mg, binds_info2 = TensorUtils.inplace_set(mg, out_mg, "mg_buf") out_ms, binds_info3 = TensorUtils.inplace_set(ms, out_ms, "ms_buf") out_mom, binds_info4 = TensorUtils.inplace_set(mom, out_mom, "mom_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) binds_info.update(binds_info4) attrs = {utils.BINDS: binds_info} return out_var, out_mg, out_ms, out_mom, attrs
def _pow(data1, data2): utils.elemwise_dtype_check(data1.dtype, data2.dtype) utils.check_shape(data1.shape) utils.check_shape(data2.shape) utils.auto_broadcast_check(data1.shape, data2.shape) in_dtype = data1.dtype if in_dtype == 'float16': data1 = akg.topi.cast(data1, 'float32') data2 = akg.topi.cast(data2, 'float32') res = akg.topi.power(data1, data2) if in_dtype == 'float16': res = akg.topi.cast(res, 'float16') return res
def _check_inputs(var, grad_accum, grad_squared_accum, grad, lr, l1, l2, global_step): """Check op inputs""" # check dtype utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT) for i in (grad_accum, grad_squared_accum, grad, lr, l1, l2): utils.elemwise_dtype_check(var.dtype, i.dtype) utils.ops_dtype_check(global_step.dtype, utils.DtypeForDavinci.INT32) # check shape for i in (grad_accum, grad_squared_accum, grad): utils.elemwise_shape_check(var.shape, i.shape) for i in (lr, l1, l2, global_step): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, l1, l2 and global_step only support scalar tensor.")
def _check_inputs(var, accum, accum_update, grad, lr, rho, epsilon): """Check op inputs""" # check dtype utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT) for i in (accum, accum_update, grad, lr, rho): utils.elemwise_dtype_check(var.dtype, i.dtype) # check shape for i in (accum, accum_update, grad): utils.elemwise_shape_check(var.shape, i.shape) for i in (lr, rho): if tuple(get_shape(i)) != (1,): raise RuntimeError("lr and rho only support scalar tensor.") # check value if epsilon <= 0: raise ValueError("epsilon should be greater than zero.")
def apply_add_sign(var, m, grad, lr, alpha, sign_decay, beta, target=utils.CCE): """ Update 'var' according to the AddSign update. m_out = m * beta + grad * (1 - beta) var_out = var - lr * (alpha + sign_decay * Sign(grad) *Sign(m)) * grad Args: var (tvm.tensor.Tensor): A tensor of type float16 or float32 m (tvm.tensor.Tensor): A tensor of type float16 or float32 grad (tvm.tensor.Tensor): A tensor of type float16 or float32 lr (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 alpha (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 sign_decay (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 beta (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated m. """ utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT) for i in (m, lr, alpha, sign_decay, beta, grad): utils.elemwise_dtype_check(var.dtype, i.dtype) for i in (m, grad): utils.elemwise_shape_check(var.shape, i.shape) for i in (lr, alpha, sign_decay, beta): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, alpha, sign_decay and beta only support scalar.") out_var, out_m = _apply_add_sign_compute(var, m, grad, lr, alpha, sign_decay, beta) out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf") binds_info.update(binds_info2) attrs = {utils.BINDS: binds_info} return out_var, out_m, attrs
def inv_grad(input_y, input_dy): """ Calculate data's Reciprocal grad,dx = -1 * input_dy * input_y * input_y. Args: input_y (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32. input_dy (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32. Returns: tvm.tensor.Tensor, has the same type and shape as input_y. """ # Check shapes and dtypes. utils.elemwise_shape_check(input_y.shape, input_dy.shape) utils.elemwise_dtype_check(input_y.dtype, input_dy.dtype, supported_type=["float16", "float32", "int8", "int32"]) res = inv_grad_compute(input_y, input_dy) return res
def fake_quant_with_min_max_vars_per_channel(input_data, input_min, input_max, num_bits=8, narrow_range=False): """ Generate fake_quantize the input_data for every channel. Note: For input_data last dim must be equal to d. And need to satisfy: input_min <= 0 <= input_max. Args: input_data (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [b, d] or [b, h, w, d] or [d]. input_min (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [d]. input_max (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [d]. num_bits (int): The quantization bits, must be int, defaults to 8. narror_range (Union[bool, None]): if True, quant_min equal to 1, else 0, defaults to False. Returns: tvm.tensor.Tensor of same type and shape as input_data. """ # get shape and check shape_inputs = get_shape(input_data) shape_min = get_shape(input_min) shape_max = get_shape(input_max) utils.elemwise_shape_check(shape_min, shape_max) utils.auto_broadcast_check(shape_min, shape_inputs) if shape_min[0] != shape_inputs[-1]: raise RuntimeError( "The shapes of min,max and shape_inputs last one dimension should be same!" ) # check dtype utils.ops_dtype_check(input_data.dtype, utils.DtypeForDavinci.FLOAT32) utils.elemwise_dtype_check(input_min.dtype, input_max.dtype, utils.DtypeForDavinci.FLOAT32) # check num_bits range if num_bits > 16 or num_bits < 2: raise ValueError("numbits should be in range [2, 16]!") # get output by fake_quant_with_min_max_vars_per_channel_compute function res = fake_quant_with_min_max_vars_per_channel_compute( input_data, input_min, input_max, num_bits, narrow_range) return res
def _check_inputs(var, m, v, grad, lr, beta1, beta1_power, beta2, epsilon): """Check op inputs""" # check dtype utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT) for i in (m, v, grad, beta1_power, lr, beta1, beta2): utils.elemwise_dtype_check(var.dtype, i.dtype) # check shape for i in (m, v, grad): utils.elemwise_shape_check(var.shape, i.shape) for i in (beta1_power, lr, beta1, beta2): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "beta1_power, lr, beta1 and beta2 only support scalar tensor.") # check value if epsilon <= 0: raise ValueError("epsilon should be greater than zero.")
def make_input_and_value(data1, data2): shape1 = [x.value for x in data1.shape] shape2 = [x.value for x in data2.shape] utils.check_shape(shape1) utils.check_shape(shape2) shape1, shape2, shape = produce_shapes(shape1, shape2) utils.elemwise_dtype_check(data1.dtype, data2.dtype) dtype = data1.dtype t_value = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(1, dtype), "T") f_value = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(0, dtype), "F") input1_bro = akg.topi.broadcast_to(data1, shape) input2_bro = akg.topi.broadcast_to(data2, shape) res = (t_value, f_value, input1_bro, input2_bro, shape) return res
def truncatemod(x, y, target=utils.CCE): """ Computes remainder of division(x / y). Note: res = x - y*trunc(x/y) Args: x(tvm.tensor.Tensor): Input tensor, support float16 on mini device, while support int32, int8, uint8, float16, float32 on cloud ones. y(tvm.tensor.Tensor): Tensor with same type as input tensor x. Returns: tvm.tensor.Tensor of same type as input tensors. """ utils.check_shape(x) utils.check_shape(y) utils.elemwise_dtype_check(x.dtype, y.dtype) dtype = x.dtype support_dtype = [ utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32, utils.DtypeForDavinci.INT8, utils.DtypeForDavinci.UINT8 ] if product_is_mini(): support_dtype = [utils.DtypeForDavinci.FLOAT16] utils.ops_dtype_check(dtype, support_dtype) if not product_is_mini(): # The high precision compute is required. # For brevity, lex x = 132.05, y = 131.95; x and y are very close, but the difference between trunc(x)=132 # and trunc(y)=131 is 1 if dtype != "float32": x = Cast(x, "float32", target=target) y = Cast(y, "float32", target=target) res = akg.topi.mod(x, y) else: res = _truncatemod_compute_mini(x, y) if res.dtype != dtype: res = Cast(res, dtype, target=target) return res
def acosh_grad(y, dy): """ Gradient for acosh. Note: dx = dy * 1/sinh(y) Args: y (tvm.tensor.Tensor): tensor of type float16, float32. dy (tvm.tensor.Tensor): same type and shape as y. Returns: tvm.tensor.Tensor, same type and shape as y. Supported Platforms: 'Ascend' """ # mini product just used to infer if product_is_mini(): raise RuntimeError( "The mini product does not support the acosh_grad operator") dtype = y.dtype utils.ops_dtype_check(y.dtype, utils.DtypeForDavinci.ALL_FLOAT) utils.elemwise_dtype_check(dtype, dy.dtype) utils.check_shape(y.shape) utils.elemwise_shape_check(y.shape, dy.shape) if dtype == "float16": y = topi.cast(y, "float32") dy = topi.cast(dy, "float32") # If we use sinh(y) = (exp(y) - exp(-y))/2 directly, there will be some precision problems # For example, as dx = dy/sinh(y), if we use exp directly, when exp(y) and exp(-y) are close, # the small precision error of exp calculation will be greatly enlarged in the final result sinh_y = _sinh_taylor(y) dx = topi.divide(dy, sinh_y) if dx.dtype != dtype: dx = topi.cast(dx, dtype) attrs = {"enable_auto_inline": False} return dx, attrs
def apply_proximal_gradient_descent(var, alpha, l1, l2, delta, target=utils.CCE): """ The FOBOS algorithm with fixed learning rate. Note: prox_var = var - alpha * delta if l1 > 0: var_new = Sign(prox_var)/(1+alpha*l2) * max{|prox_var|-alpha*l1,0} else: var_new = prox_var/(1+alpha*l2) Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32. alpha (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. delta (tvm.tensor.Tensor): A tensor of same shape and type as var. Returns: tvm.tensor.Tensor, updated var. """ # check_shape utils.check_shape(var) shape = get_shape(var) utils.elemwise_shape_check(shape, delta.shape) sclar_shape = (1,) for sclar in (alpha, l1, l2): utils.elemwise_shape_check(sclar.shape, sclar_shape) # check dtype dtype = var.dtype utils.ops_dtype_check(dtype, [utils.DtypeForDavinci.FLOAT16, utils.DtypeForDavinci.FLOAT32]) for tensor in (var, alpha, l1, l2, delta): utils.elemwise_dtype_check(tensor.dtype, dtype) var_new = apply_proximal_gradient_descent_impl(var, alpha, l1, l2, delta) var_new, binds_info = TensorUtils.inplace_set(var, var_new, "var_buf") attrs = {utils.BINDS: binds_info} return var_new, attrs
def RealDiv(input1, input2, target=utils.CCE): """ Returns input1 / input2 element-wise for real types. Note: Realdiv supports broadcasting. Args: input1 (tvm.tensor.Tensor): Tensor of type float16, float32. input2 (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has the same type of input1 and shaped by broadcasting. Supported Platforms: 'Ascend' """ utils.ops_dtype_check([input1.dtype, input2.dtype], utils.DtypeForDavinci.ALL_FLOAT) utils.elemwise_dtype_check(input1.dtype, input2.dtype) shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] utils.check_shape(shape1) utils.check_shape(shape2) utils.auto_broadcast_check(shape1, shape2) n_shape1, n_shape2, out_shape = produce_shapes(shape1, shape2) if n_shape1 != out_shape: input1_cast = akg.topi.broadcast_to(input1, out_shape) else: input1_cast = input1 if n_shape2 != out_shape: input2_cast = akg.topi.broadcast_to(input2, out_shape) else: input2_cast = input2 res = akg.topi.divide(input1_cast, input2_cast) return res