def minimum_ad(head, data_x, data_y, grad_x=True, grad_y=True): """ Calculating the reversed outputs of the operator minimum by using automatic differentiate. Args: head (tvm.tensor.Tensor): Input tensor of float32, float16 and int32. data_x (tvm.tensor.Tensor): Input tensor of float32, float16 and int32. data_y (tvm.tensor.Tensor): Input tensor of float32, float16 and int32. grad_x (bool): Default is True, whether to differentiate x. grad_y (bool): Default is True, whether to differentiate y. Returns: tvm.tensor.Tensor, has the same type and shape as grads, if grad_x and grad_y all equal to True, need return a list like: [jacs[0], jacs[1]]. """ vc_util.elemwise_shape_check(data_x.shape, data_y.shape) vc_util.elemwise_shape_check(head.shape, data_x.shape) vc_util.elemwise_dtype_check( data_x.dtype, head.dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32]) vc_util.elemwise_dtype_check( data_x.dtype, data_y.dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32]) if not grad_x and not grad_y: raise ValueError("At least one of grad_x and grad_y is True.") op = minimum.minimum(data_x, data_y) jacs = list(akg.differentiate(op, [data_x, data_y], head)) if grad_x and grad_y: return jacs[0], jacs[1] if grad_x: return jacs[0] return jacs[1]
def less_equal(input1, input2): """ Check whether input1 lessequals to input2. Args: input1 (tvm.tensor.Tensor): Tensor. input2 (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor. If input1 lessequal to input2 return True, else return False. """ shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] vc_util.check_shape(shape1) vc_util.check_shape(shape2) shape1, shape2, shape = produce_shapes(shape1, shape2) vc_util.elemwise_dtype_check(input1.dtype, input2.dtype) dtype = input1.dtype # get lessequal compute t_value = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(1, dtype), "T") f_value = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(0, dtype), "F") input1_bro = akg.topi.broadcast_to(input1, shape) input2_bro = akg.topi.broadcast_to(input2, shape) c_out = akg.tvm.compute(shape, lambda *indice: akg.tvm.expr.Select(input1_bro[indice] <= input2_bro[indice], t_value[indice], f_value[indice]), name="C") res = akg.tvm.compute(shape, lambda *indice: c_out(*indice).astype("bool"), name="res") return res
def minimum(input1, input2): """ Return the min value of two tensors element-wise. Note: minimum supports broadcasting. Args: input1: Tensor. input2: Tensor. Has the same type as input1. Returns: Tensor, has the same type as inputs. """ vc_util.ops_dtype_check([input1.dtype, input2.dtype], vc_util.DtypeForDavinci.ALL_TYPES) vc_util.elemwise_dtype_check(input1.dtype, input2.dtype) dtype = input1.dtype shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] vc_util.check_shape(shape1) vc_util.check_shape(shape2) vc_util.auto_broadcast_check(shape1, shape2) if dtype in ("int8", "uint8"): input1 = cast(input1, "float16") input2 = cast(input2, "float16") res = akg.topi.minimum(input1, input2) if dtype in ("int8", "uint8"): res = cast(res, dtype) return res
def apply_gradient_descent(var, alpha, delta): """ Update var by subtracting alpha * delta from it. .. math:: var_{t} = var_{t-1} - \\alpha \\delta Args: var (tvm.tensor.Tensor): Input var of dtype float16, float32. alpha (tvm.tensor.Tensor): A scalar tensor of same type as input var. delta (tvm.tensor.Tensor): A tensor of same shape and dtype as input var. Returns: tvm.tensor.Tensor, Updated var. """ # check dtypes vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (alpha, delta): vc_util.elemwise_dtype_check(var.dtype, i.dtype) # check shapes vc_util.elemwise_shape_check(var.shape, delta.shape) if tuple(get_shape(alpha)) != (1, ): raise RuntimeError("input alpha only support scalar tensor.") # compute out_var = _apply_gradient_descent_compute(var, alpha, delta) # reuse var out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") attrs = {utils.BINDS: binds_info} return out_var, attrs
def pow(data1, data2): """ Computes power(data1,data2) elementwise, broadcast is supported. Args: data1 (tvm.tensor.Tensor): Tensor. data2 (tvm.tensor.Tensor): Tensor of same type as data1, if shape(data2) != shape(data1), broadcast will happen. Returns: tvm.tensor.Tensor, powered result, with same type as input tensors and broadcasted shape of data1 and data2. """ vc_util.elemwise_dtype_check(data1.dtype, data2.dtype) vc_util.check_shape(data1.shape) vc_util.check_shape(data2.shape) vc_util.auto_broadcast_check(data1.shape, data2.shape) in_dtype = data1.dtype if in_dtype == 'float16': data1 = akg.topi.cast(data1, 'float32') data2 = akg.topi.cast(data2, 'float32') res = akg.topi.power(data1, data2) if in_dtype == 'float16': res = akg.topi.cast(res, 'float16') return res
def mul(l_input, r_input): """ Calculate x * y element-wise. Note: mul supports broadcasting. Args: l_input (tvm.tensor.Tensor): Tensor of type float16, float32. r_input (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has the same type as l_input and r_input. """ vc_util.ops_dtype_check([l_input.dtype, r_input.dtype], vc_util.DtypeForDavinci.ALL_FLOAT) shape1 = [x.value for x in l_input.shape] shape2 = [x.value for x in r_input.shape] vc_util.check_shape(shape1) vc_util.check_shape(shape2) vc_util.auto_broadcast_check(shape1, shape2) vc_util.elemwise_dtype_check(l_input.dtype, r_input.dtype) output = akg.topi.multiply(l_input, r_input) return output
def atan_grad(head, input_x): """ Compute gradient of input_x in atan. .. math:: dx = \\frac{1}{1 + x^2} \\cdot dy Args: head (tvm.tensor.Tensor): Gradient tensor of forward's output with the same shape and dtype as input_x. input_x (tvm.tensor.Tensor): Forward's input tensor support float16 and float32. Returns: A tvm.tensor.Tensor as gradient of forward's input. """ vc_util.elemwise_shape_check(head.shape, input_x.shape) vc_util.elemwise_dtype_check(head.dtype, input_x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) dtype = input_x.dtype tensor_one = dc.one_const(dtype) def _compute(*i): return tensor_one / (tensor_one + input_x(*i) * input_x(*i)) * head(*i) out_tensor = tvm.compute(input_x.shape, _compute, name="out") return out_tensor
def xdivy(data_x1, data_x2): """ Calculate data_x1 divided by data_x2. .. math:: y = \\left\\{ \\begin{aligned} 0, && if \\quad x1 == 0 \\\\ \\dfrac{x1}{x2}, && otherwise \\end{aligned} \\right. Args: data_x1 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32" data_x2 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32" Returns: tvm.tensor.Tensor """ shape_x1 = get_shape(data_x1) shape_x2 = get_shape(data_x2) vc_util.check_shape(shape_x1) vc_util.check_shape(shape_x2) vc_util.elemwise_dtype_check(data_x1.dtype, data_x2.dtype) dtype = data_x1.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) return xdivy_compute(data_x1, data_x2)
def assign_sub(data1, data2): """ Computes data1 - data2 elementwise. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32, int32, int8, uint8. data2 (tvm.tensor.Tensor): Tensor of same shape and type as data1. Returns: Subtracted result, with same shape and type as input tensors. """ dtype = data1.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_TYPES) vc_util.elemwise_dtype_check(data1.dtype, data2.dtype) vc_util.elemwise_shape_check(data1.shape, data2.shape) need_cast_dtype = ["int8", "uint8"] cast_type = "float16" if dtype in need_cast_dtype: data1 = akg.topi.cast(data1, cast_type) data2 = akg.topi.cast(data2, cast_type) res = akg.topi.subtract(data1, data2) if dtype in need_cast_dtype: if dtype == "uint8": cons = akg.tvm.const(256, dtype=cast_type) res = akg.tvm.compute(res.shape, lambda *indice: akg.tvm.expr.Select( res(*indice) < 0, res(*indice) + cons, res(*indice)), name="positive_res") res = akg.topi.cast(res, dtype) return res
def less(data1, data2): """ compute tensor with smaller value in data1 and data2 elementwisely. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32. data2 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32. Returns: tvm.tensor.Tensor. If data1 less than data2, return True, else return False. """ vc_util.check_shape(data1.shape) vc_util.check_shape(data2.shape) # check types vc_util.elemwise_dtype_check( data1.dtype, data2.dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32]) # check runtime mode, and change dtype if utils.product_is_mini() and data1.dtype != "float16": data1 = akg.topi.cast(data1, "float16") data2 = akg.topi.cast(data2, "float16") if (not utils.product_is_mini()) and data1.dtype == "int32": data1 = akg.topi.cast(data1, "float32") data2 = akg.topi.cast(data2, "float32") res = akg.topi.less(data1, data2) return res
def truncate_div(input_x1, input_x2): """ Calculating data's truncate_div, res = floor(x1/x2) if x1/x2>0 else ceil(x1/x2). Args: input_x1 (tvm.tensor.Tensor): Input tensor, support float16, float32 on mini device, while support int32, int8, uint8, float16, float32 on cloud ones. input_x2 (tvm.tensor.Tensor): Input tensor, with same dtype as input_x1. Returns: A tvm.tensor.Tensor as result of truncate_div. """ vc_util.check_shape(get_shape(input_x1)) vc_util.check_shape(get_shape(input_x2)) vc_util.elemwise_dtype_check(input_x1.dtype, input_x2.dtype) vc_util.ops_dtype_check( input_x1.dtype, (vc_util.DtypeForDavinci.ALL_FLOAT) if utils.product_is_mini() \ else (vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.UINT8)) return truncate_div_compute(input_x1, input_x2)
def apply_adagrad(var, accum, learning_rate, grad, update_slots=True): """ Update `var` according to the Adagrad algorithm. .. math: accum += grad^2 var -= learning_rate * grad / accum.sqrt() Args: var (tvm.tensor.Tensor): input var to be updated of type float16, float32 accum (tvm.tensor.Tensor): accumulation of the squared gradients of type float16, float32 learning_rate (tvm.tensor.Tensor): A scalar tensor of type float16, float32 grad (tvm.tensor.Tensor): input grad of type float16, float32 update_slots (Bool): If True, the accum tensor will be updated; otherwise the option is False, the accum tensor will not be update. Defaults to 'True'. Returns: tvm.tensor.Tensor, the updated var. tvm.tensor.Tensor, the updated accum. """ vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (accum, learning_rate, grad): vc_util.elemwise_dtype_check(var.dtype, i.dtype) for i in (accum, grad): vc_util.elemwise_shape_check(var.shape, i.shape) if tuple(get_shape(learning_rate)) != (1,): raise RuntimeError("learning_rate only support scalar tensor") return _apply_adagrad_compute(var, accum, learning_rate, grad, update_slots)
def select(condition, x1, x2): """ Selects elements from x1 or x2, depending on condition. Note: every parmas' shape need legal, can support condition's shape broadcast. Args: condition (tvm.tensor.Tensor): Tensor of type int8, int32, must be 0 or 1. x1 (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32, uint8. x2 (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32, uint8. Returns: tvm.tensor.Tensor, has the same type and shape as x1. """ shape_x1 = get_shape(x1) shape_x2 = get_shape(x2) con_shape = get_shape(condition) vc_util.elemwise_shape_check(shape_x1, shape_x2) vc_util.elemwise_dtype_check(x1.dtype, x2.dtype, [ vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.UINT8 ]) vc_util.ops_dtype_check( condition.dtype, [vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.INT32]) vc_util.auto_broadcast_check(con_shape, shape_x1) res = select_compute(condition, x1, x2) return res
def div(data1, data2): """ Calculates x/y, and returns an integer when inputs are all integers. When both arguments are integers, use integer division (also known as "floor division"). When arguments are float numbers, use normal floating point division Note: div supports broadcasting. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32, int32, int8 and uint8. data2 (tvm.tensor.Tensor): Tensor of type float16, float32, int32, int8 and uint8. Returns: tvm.tensor.Tensor, has the same type as data1 and data2. """ vc_util.ops_dtype_check([data1.dtype, data2.dtype], vc_util.DtypeForDavinci.ALL_TYPES) vc_util.elemwise_dtype_check(data1.dtype, data2.dtype) dtype = data1.dtype shape1 = [x.value for x in data1.shape] shape2 = [x.value for x in data2.shape] vc_util.check_shape(shape1) vc_util.check_shape(shape2) vc_util.auto_broadcast_check(shape1, shape2) n_shape1, n_shape2, out_shape = produce_shapes(shape1, shape2) if n_shape1 != out_shape: input1_cast = akg.topi.broadcast_to(data1, out_shape) else: input1_cast = data1 if n_shape2 != out_shape: input2_cast = akg.topi.broadcast_to(data2, out_shape) else: input2_cast = data2 if dtype in ("int32", "int8", "uint8"): input1p = cast(input1_cast, "float16") input2p = cast(input2_cast, "float16") else: input1p = input1_cast input2p = input2_cast if utils.product_is_mini(): input2p_rec = reciprocal(input2p) res = akg.topi.multiply(input1p, input2p_rec) else: res = akg.topi.divide(input1p, input2p) if dtype in ("int8", "uint8"): res = floor(res) res = cast(res, "float16") if dtype in ("int32", "int8", "uint8"): res = cast(res, dtype) return res
def apply_ftrl(var, accum, linear, grad, lr, l1, l2, lr_power): """ Ftrl-proximal optimization algorithm. Note: accum_new = accum + grad * grad linear_new = linear + grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var x = clip(linear_new, -l1, l1) - linear_new y = accum_new^(-lr_power) / lr + 2 * l2 var_new = x / y Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32. accum (tvm.tensor.Tensor): A tensor of same shape and type as var. Eatch entry in it must be greater or equal to zero. linear (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. lr_power (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. Value of it must be less or equal to zero. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated accum. tvm.tensor.Tensor, updated linear. """ # As vlog instruction on mini product has a percision problem and mini product used to infer # rather than train if utils.product_is_mini(): raise RuntimeError("The apply_ftrl operator does not support the mini product") # check_shape vc_util.check_shape(var) shape = get_shape(var) for tensor in (accum, linear, grad): vc_util.elemwise_shape_check(shape, tensor.shape) sclar_shape = (1,) for sclar in (lr, l1, l2, lr_power): vc_util.elemwise_shape_check(sclar.shape, sclar_shape) # check dtype dtype = var.dtype vc_util.ops_dtype_check(dtype, [vc_util.DtypeForDavinci.FLOAT16, vc_util.DtypeForDavinci.FLOAT32]) for tensor in (var, accum, linear, grad, lr, l1, l2, lr_power): vc_util.elemwise_dtype_check(tensor.dtype, dtype) var_new, accum_new, linear_new = apply_ftrl_impl(var, accum, linear, grad, lr, l1, l2, None, lr_power, with_l2_shrinkage=False) # update by inplace (var_new, accum_new, linear_new), binds_info = \ utils.TensorUtils.inplace_set_tensors((var, accum, linear), (var_new, accum_new, linear_new)) attrs = {utils.BINDS: binds_info} return var_new, accum_new, linear_new, attrs
def _apply_rms_prop_check(var, ms, mom, grad, lr, momentum, rho, epsilon): """Check inputs""" vc_util.check_shape(var) for i in (ms, mom, grad, lr, momentum, rho): vc_util.elemwise_dtype_check(var.dtype, i.dtype) for i in (ms, mom, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, rho, momentum): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, rho and momentum only support scalar tensor.") if epsilon <= 0: raise ValueError("epsilon should greater than zero.")
def apply_centered_rms_prop(var, mg, ms, mom, grad, lr, momentum, rho, epsilon): """ Update `var` according to the centered RMSProp algorithm. out_mean_grad = decay * mg + (1-decay) * grad out_mean_square = decay * ms + (1-decay) * grad * grad out_mom = momentum * mom + lr * grad / sqrt(out_mean_square - out_mean_grad^2 + epsilon) out_var = var - out_mom Args: var (tvm.tensor.Tensor): Input data of type float16 or float32. mg (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. ms (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. mom (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. grad (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. momentum (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. rho (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. epsilon (float): A scalar tensor of the same type as `var`. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated mean_grad. tvm.tensor.Tensor, updated mean_square. tvm.tensor.Tensor, updated mom. """ vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (mg, ms, mom, lr, rho, momentum, grad): vc_util.elemwise_dtype_check(var.dtype, i.dtype) for i in (mg, ms, mom, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, rho, momentum): if tuple(get_shape(i)) != (1,): raise RuntimeError("lr, rho and momentum only support scalar tensor.") if epsilon <= 0: raise ValueError("epsilon should be greater than 0.") out_var, out_mg, out_ms, out_mom = _apply_centered_rms_prop_compute( var, mg, ms, mom, grad, lr, momentum, rho, epsilon) out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_mg, binds_info2 = TensorUtils.inplace_set(mg, out_mg, "mg_buf") out_ms, binds_info3 = TensorUtils.inplace_set(ms, out_ms, "ms_buf") out_mom, binds_info4 = TensorUtils.inplace_set(mom, out_mom, "mom_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) binds_info.update(binds_info4) attrs = {utils.BINDS: binds_info} return out_var, out_mg, out_ms, out_mom, attrs
def eltwise(data, mode=1, coeff=()): """ Compute elementwise modes, such as 0:PRODUCT, 1:SUM and 2:MAX. Args: data (list of tvm.tensor.Tensor): a list of tensor, tensor support fp16 and fp32. mode (int): 0:product, 1:sum, 2:max. coeff (tuple): tensor name of data should be equal with coeff size, only used by sum, support int and float. Returns: tvm.tensor.Tensor. """ dtype = data[0].dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.check_shape(data[0].shape) shape_data = get_shape(data[0]) if not mode in [0, 1, 2]: raise RuntimeError("mode only support 0, 1, or 2") if not len(data) == len(coeff) and len(coeff) != 0: raise RuntimeError("coeff should be [] or its length be same as data") tensor_num = len(data) #tensor num must be [1, 120] if tensor_num < 1 or tensor_num > 120: raise RuntimeError("tensor_num need in range [1,120].") if mode == 1 and len(coeff) == 0: return addn.addn(data) if len(coeff) != 0: if type(coeff[0]) != int and type(coeff[0]) != float: raise RuntimeError("ele of coeff must be a number.") for i in range(1, len(data)): vc_util.elemwise_dtype_check(data[0].dtype, data[i].dtype) vc_util.elemwise_shape_check(data[0].shape, data[i].shape) if mode == 1 and len(coeff) > 0: return _addn(data, coeff) if mode == 0: return _product(data) if mode == 2: return _max(data)
def _check_inputs(var, grad_accum, grad_squared_accum, grad, lr, l1, l2, global_step): """Check op inputs""" # check dtype vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (grad_accum, grad_squared_accum, grad, lr, l1, l2): vc_util.elemwise_dtype_check(var.dtype, i.dtype) vc_util.ops_dtype_check(global_step.dtype, vc_util.DtypeForDavinci.INT32) # check shape for i in (grad_accum, grad_squared_accum, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, l1, l2, global_step): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, l1, l2 and global_step only support scalar tensor.")
def greater_equal(data1, data2): # check shapes shape1 = [x.value for x in data1.shape] shape2 = [x.value for x in data2.shape] shapes = [shape1, shape2] for i in range(len(shapes)): vc_util.check_shape(shapes[i]) # check types dtype = data1.dtype dtype2 = data2.dtype vc_util.elemwise_dtype_check(dtype, dtype2) vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.FLOAT16) res = akg.topi.greater_equal(data1, data2) return res
def logical_or(input1, input2): dtype1 = input1.dtype dtype2 = input2.dtype vc_util.elemwise_dtype_check(dtype1, dtype2) vc_util.ops_dtype_check(dtype1, vc_util.DtypeForDavinci.BOOL) shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] vc_util.check_shape(shape1) vc_util.check_shape(shape2) vc_util.elemwise_shape_check(shape1, shape2) res = akg.topi.logical_or(input1, input2) return res
def _check_inputs(var, accum, accum_update, grad, lr, rho, epsilon): """Check op inputs""" # check dtype vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (accum, accum_update, grad, lr, rho): vc_util.elemwise_dtype_check(var.dtype, i.dtype) # check shape for i in (accum, accum_update, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, rho): if tuple(get_shape(i)) != (1,): raise RuntimeError("lr and rho only support scalar tensor.") # check value if epsilon <= 0: raise ValueError("epsilon should be greater than zero.")
def _check_inputs(var, m, v, grad, lr, beta1, beta1_power, beta2, epsilon): """Check op inputs""" # check dtype vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (m, v, grad, beta1_power, lr, beta1, beta2): vc_util.elemwise_dtype_check(var.dtype, i.dtype) # check shape for i in (m, v, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (beta1_power, lr, beta1, beta2): if tuple(get_shape(i)) != (1,): raise RuntimeError("beta1_power, lr, beta1 and beta2 only support scalar tensor.") # check value if epsilon <= 0: raise ValueError("epsilon should be greater than zero.")
def apply_proximal_adagrad(var, accum, lr, l1, l2, grad): """ The FOBOS optimization algorithm with Adagrad learning rate. Note: accum_new = accum + grad * grad ada_lr = lr * rsqrt(accum_new) prox_var = var - ada_lr * grad if l1 > 0: var_new = sign(prox_var)/(1+ada_lr*l2) * max{|prox_var|-ada_lr*l1,0} else: var_new = prox_var/(1+ada_lr*l2) Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32. accum (tvm.tensor.Tensor): A tensor of same shape and type as var. Eatch entry in it must be greater or equal to zero. lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated accum. """ # check_shape vc_util.check_shape(var) shape = get_shape(var) for tensor in (accum, grad): vc_util.elemwise_shape_check(shape, tensor.shape) sclar_shape = (1,) for sclar in (lr, l1, l2): vc_util.elemwise_shape_check(sclar.shape, sclar_shape) # check dtype dtype = var.dtype vc_util.ops_dtype_check(dtype, [vc_util.DtypeForDavinci.FLOAT16, vc_util.DtypeForDavinci.FLOAT32]) for tensor in (var, accum, lr, l1, l2, grad): vc_util.elemwise_dtype_check(tensor.dtype, dtype) var_new, accum_new = _apply_proximal_adagrad_compute(var, accum, lr, l1, l2, grad) (var_new, accum_new), binds_info = utils.TensorUtils.inplace_set_tensors([var, accum], [var_new, accum_new]) attrs = {utils.BINDS: binds_info} return var_new, accum_new, attrs
def fake_quant_with_min_max_vars_per_channel(input_data, input_min, input_max, num_bits=8, narrow_range=False): """ Generate fake_quantize the input_data for every channel. Note: For input_data last dim must be equal to d. And need to satisfy: input_min <= 0 <= input_max. Args: input_data (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [b, d] or [b, h, w, d] or [d]. input_min (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [d]. input_max (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [d]. num_bits (int): The quantization bits, must be int, defaults to 8. narror_range (Union[bool, None]): if True, quant_min equal to 1, else 0, defaults to False. Returns: tvm.tensor.Tensor of same type and shape as input_data. """ # get shape and check shape_inputs = get_shape(input_data) shape_min = get_shape(input_min) shape_max = get_shape(input_max) vc_util.elemwise_shape_check(shape_min, shape_max) vc_util.auto_broadcast_check(shape_min, shape_inputs) if shape_min[0] != shape_inputs[-1]: raise RuntimeError( "The shapes of min,max and shape_inputs last one dimension should be same!" ) # check dtype vc_util.ops_dtype_check(input_data.dtype, vc_util.DtypeForDavinci.FLOAT32) vc_util.elemwise_dtype_check(input_min.dtype, input_max.dtype, vc_util.DtypeForDavinci.FLOAT32) # check num_bits range if num_bits > 16 or num_bits < 2: raise ValueError("numbits should be in range [2, 16]!") # get output by fake_quant_with_min_max_vars_per_channel_compute function res = fake_quant_with_min_max_vars_per_channel_compute( input_data, input_min, input_max, num_bits, narrow_range) return res
def sub(data1, data2): """ Computes data1 - data2 elementwise, broadcast is supported. Args: data1 (tvm.tensor.Tensor): Tensor. data2 (tvm.tensor.Tensor): Tensor of same type as data1, if shape(data2) != shape(data1), broadcast will happen. Returns: tvm.tensor.Tensor, subtracted result, with same type as input tensors and broadcasted shape of data1 and data2. """ vc_util.elemwise_dtype_check(data1.dtype, data2.dtype) vc_util.check_shape(data1.shape) vc_util.check_shape(data2.shape) vc_util.auto_broadcast_check(data1.shape, data2.shape) res = akg.topi.subtract(data1, data2) return res
def atan2(y, x): """ Compute arc tangent of y/x. .. math:: \\arctan2(y, x) = \\arctan(\\frac{y}{x}) Args: y (tvm.tensor.Tensor): Input tensor, only support float16, float32. x (tvm.tensor.Tensor): Input tensor, only support float16, float32. Returns: A tvm.tensor.Tensor as angles in radians. """ vc_util.elemwise_shape_check(get_shape(y), get_shape(x)) vc_util.elemwise_dtype_check(y.dtype, x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) return _atan2_compute(y, x), {"enable_auto_inline": False}
def apply_power_sign(var, m, grad, lr, logbase, sign_decay, beta): """ Update 'var' according to the PowerSign update m_out = beta * m + (1 - beta) * grad var_out = var - lr_t * (exp(logbase * sign_decay * sign(grad) * sign(m_out)) * grad) Args: var (tvm.tensor.Tensor): A tensor of type float16 or float32 m (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): A scalar tensor of of same type as var. logbase (tvm.tensor.Tensor): A scalar tensor of of same type as var. sign_decay (tvm.tensor.Tensor): A scalar tensor of of same type as var. beta (tvm.tensor.Tensor): A scalar tensor of of same type as var. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated m. """ # check dtypes vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (m, grad, lr, logbase, sign_decay, beta): vc_util.elemwise_dtype_check(var.dtype, i.dtype) # check shapes for i in (m, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, logbase, sign_decay, beta): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, logbase, sign_decay and beta only support scalar tensor.") # compute out_var, out_m = _apply_power_sign_compute(var, m, grad, lr, logbase, sign_decay, beta) # reuse var, m out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf") binds_info.update(binds_info2) attrs = {utils.BINDS: binds_info} return out_var, out_m, attrs
def asin_grad(x, dy): """ Gradient for arcsin. .. math:: \\frac {\\partial arcsin(x)} {\\partial x} = \\frac{1}{\\sqrt{1 - x^2}} Args: x (tvm.tensor.Tensor): Tensor of type float16, float32. dy (tvm.tensor.Tensor): Tensor of same type and shape as x. Rerurns: tvm.tensor.Tensor of same type and shape as x. """ vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.elemwise_dtype_check(x.dtype, dy.dtype) vc_util.elemwise_shape_check(x.shape, dy.shape) return _asin_grad_compute(x, dy)
def truncatemod(x, y): """ Computes remainder of division(x / y). Note: res = x - y*trunc(x/y) Args: x(tvm.tensor.Tensor): Input tensor, support float16 on mini device, while support int32, int8, uint8, float16, float32 on cloud ones. y(tvm.tensor.Tensor): Tensor with same type as input tensor x. Returns: tvm.tensor.Tensor of same type as input tensors. """ vc_util.check_shape(x) vc_util.check_shape(y) vc_util.elemwise_dtype_check(x.dtype, y.dtype) dtype = x.dtype support_dtype = [ vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.UINT8 ] if utils.product_is_mini(): support_dtype = [vc_util.DtypeForDavinci.FLOAT16] vc_util.ops_dtype_check(dtype, support_dtype) if not utils.product_is_mini(): # The high precision compute is required. # For brevity, lex x = 132.05, y = 131.95; x and y are very close, but the difference between trunc(x)=132 # and trunc(y)=131 is 1 if dtype != "float32": x = cast(x, "float32") y = cast(y, "float32") res = akg.topi.mod(x, y) else: res = _truncatemod_compute_mini(x, y) if res.dtype != dtype: res = cast(res, dtype) return res