def apply_adagrad(var, accum, learning_rate, grad, update_slots=True): """ Update `var` according to the Adagrad algorithm. .. math: accum += grad^2 var -= learning_rate * grad / accum.sqrt() Args: var (tvm.tensor.Tensor): input var to be updated of type float16, float32 accum (tvm.tensor.Tensor): accumulation of the squared gradients of type float16, float32 learning_rate (tvm.tensor.Tensor): A scalar tensor of type float16, float32 grad (tvm.tensor.Tensor): input grad of type float16, float32 update_slots (Bool): If True, the accum tensor will be updated; otherwise the option is False, the accum tensor will not be update. Defaults to 'True'. Returns: tvm.tensor.Tensor, the updated var. tvm.tensor.Tensor, the updated accum. """ vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (accum, learning_rate, grad): vc_util.elemwise_dtype_check(var.dtype, i.dtype) for i in (accum, grad): vc_util.elemwise_shape_check(var.shape, i.shape) if tuple(get_shape(learning_rate)) != (1,): raise RuntimeError("learning_rate only support scalar tensor") return _apply_adagrad_compute(var, accum, learning_rate, grad, update_slots)
def matrix_diag_part(input_diagonal, input_help): """ Calculate the batched diagonal part of a batched tensor. Note: input_help is a tensor with a diagonal element of 1 and other positions of 0, the last two dimensions can be unequal. Args: input_diagonal (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8. The last two dimensions can be unequal. input_help (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8, and with a diagonal element of 1 and other positions of 0. Returns: tvm.tensor.Tensor, has the same type as input_diagonal, the shape dims is equal to dims(input_diagonal) - 1. """ dtype_input_diagonal = input_diagonal.dtype dtype_input_help = input_help.dtype vc_util.elemwise_shape_check(input_help.shape, input_diagonal.shape) if len(input_help.shape) < 2: raise ValueError("Input tensors of rank>=2 are supported!") vc_util.ops_dtype_check([dtype_input_diagonal, dtype_input_help], [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.UINT8]) res = matrix_diag_part_compute(input_diagonal, input_help) return res
def assign_sub(data1, data2): """ Computes data1 - data2 elementwise. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32, int32, int8, uint8. data2 (tvm.tensor.Tensor): Tensor of same shape and type as data1. Returns: Subtracted result, with same shape and type as input tensors. """ dtype = data1.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_TYPES) vc_util.elemwise_dtype_check(data1.dtype, data2.dtype) vc_util.elemwise_shape_check(data1.shape, data2.shape) need_cast_dtype = ["int8", "uint8"] cast_type = "float16" if dtype in need_cast_dtype: data1 = akg.topi.cast(data1, cast_type) data2 = akg.topi.cast(data2, cast_type) res = akg.topi.subtract(data1, data2) if dtype in need_cast_dtype: if dtype == "uint8": cons = akg.tvm.const(256, dtype=cast_type) res = akg.tvm.compute(res.shape, lambda *indice: akg.tvm.expr.Select( res(*indice) < 0, res(*indice) + cons, res(*indice)), name="positive_res") res = akg.topi.cast(res, dtype) return res
def atan_grad(head, input_x): """ Compute gradient of input_x in atan. .. math:: dx = \\frac{1}{1 + x^2} \\cdot dy Args: head (tvm.tensor.Tensor): Gradient tensor of forward's output with the same shape and dtype as input_x. input_x (tvm.tensor.Tensor): Forward's input tensor support float16 and float32. Returns: A tvm.tensor.Tensor as gradient of forward's input. """ vc_util.elemwise_shape_check(head.shape, input_x.shape) vc_util.elemwise_dtype_check(head.dtype, input_x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) dtype = input_x.dtype tensor_one = dc.one_const(dtype) def _compute(*i): return tensor_one / (tensor_one + input_x(*i) * input_x(*i)) * head(*i) out_tensor = tvm.compute(input_x.shape, _compute, name="out") return out_tensor
def apply_gradient_descent(var, alpha, delta): """ Update var by subtracting alpha * delta from it. .. math:: var_{t} = var_{t-1} - \\alpha \\delta Args: var (tvm.tensor.Tensor): Input var of dtype float16, float32. alpha (tvm.tensor.Tensor): A scalar tensor of same type as input var. delta (tvm.tensor.Tensor): A tensor of same shape and dtype as input var. Returns: tvm.tensor.Tensor, Updated var. """ # check dtypes vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (alpha, delta): vc_util.elemwise_dtype_check(var.dtype, i.dtype) # check shapes vc_util.elemwise_shape_check(var.shape, delta.shape) if tuple(get_shape(alpha)) != (1, ): raise RuntimeError("input alpha only support scalar tensor.") # compute out_var = _apply_gradient_descent_compute(var, alpha, delta) # reuse var out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") attrs = {utils.BINDS: binds_info} return out_var, attrs
def select(condition, x1, x2): """ Selects elements from x1 or x2, depending on condition. Note: every parmas' shape need legal, can support condition's shape broadcast. Args: condition (tvm.tensor.Tensor): Tensor of type int8, int32, must be 0 or 1. x1 (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32, uint8. x2 (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32, uint8. Returns: tvm.tensor.Tensor, has the same type and shape as x1. """ shape_x1 = get_shape(x1) shape_x2 = get_shape(x2) con_shape = get_shape(condition) vc_util.elemwise_shape_check(shape_x1, shape_x2) vc_util.elemwise_dtype_check(x1.dtype, x2.dtype, [ vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.UINT8 ]) vc_util.ops_dtype_check( condition.dtype, [vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.INT32]) vc_util.auto_broadcast_check(con_shape, shape_x1) res = select_compute(condition, x1, x2) return res
def minimum_ad(head, data_x, data_y, grad_x=True, grad_y=True): """ Calculating the reversed outputs of the operator minimum by using automatic differentiate. Args: head (tvm.tensor.Tensor): Input tensor of float32, float16 and int32. data_x (tvm.tensor.Tensor): Input tensor of float32, float16 and int32. data_y (tvm.tensor.Tensor): Input tensor of float32, float16 and int32. grad_x (bool): Default is True, whether to differentiate x. grad_y (bool): Default is True, whether to differentiate y. Returns: tvm.tensor.Tensor, has the same type and shape as grads, if grad_x and grad_y all equal to True, need return a list like: [jacs[0], jacs[1]]. """ vc_util.elemwise_shape_check(data_x.shape, data_y.shape) vc_util.elemwise_shape_check(head.shape, data_x.shape) vc_util.elemwise_dtype_check( data_x.dtype, head.dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32]) vc_util.elemwise_dtype_check( data_x.dtype, data_y.dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32]) if not grad_x and not grad_y: raise ValueError("At least one of grad_x and grad_y is True.") op = minimum.minimum(data_x, data_y) jacs = list(akg.differentiate(op, [data_x, data_y], head)) if grad_x and grad_y: return jacs[0], jacs[1] if grad_x: return jacs[0] return jacs[1]
def apply_ftrl(var, accum, linear, grad, lr, l1, l2, lr_power): """ Ftrl-proximal optimization algorithm. Note: accum_new = accum + grad * grad linear_new = linear + grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var x = clip(linear_new, -l1, l1) - linear_new y = accum_new^(-lr_power) / lr + 2 * l2 var_new = x / y Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32. accum (tvm.tensor.Tensor): A tensor of same shape and type as var. Eatch entry in it must be greater or equal to zero. linear (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. lr_power (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. Value of it must be less or equal to zero. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated accum. tvm.tensor.Tensor, updated linear. """ # As vlog instruction on mini product has a percision problem and mini product used to infer # rather than train if utils.product_is_mini(): raise RuntimeError("The apply_ftrl operator does not support the mini product") # check_shape vc_util.check_shape(var) shape = get_shape(var) for tensor in (accum, linear, grad): vc_util.elemwise_shape_check(shape, tensor.shape) sclar_shape = (1,) for sclar in (lr, l1, l2, lr_power): vc_util.elemwise_shape_check(sclar.shape, sclar_shape) # check dtype dtype = var.dtype vc_util.ops_dtype_check(dtype, [vc_util.DtypeForDavinci.FLOAT16, vc_util.DtypeForDavinci.FLOAT32]) for tensor in (var, accum, linear, grad, lr, l1, l2, lr_power): vc_util.elemwise_dtype_check(tensor.dtype, dtype) var_new, accum_new, linear_new = apply_ftrl_impl(var, accum, linear, grad, lr, l1, l2, None, lr_power, with_l2_shrinkage=False) # update by inplace (var_new, accum_new, linear_new), binds_info = \ utils.TensorUtils.inplace_set_tensors((var, accum, linear), (var_new, accum_new, linear_new)) attrs = {utils.BINDS: binds_info} return var_new, accum_new, linear_new, attrs
def fake_quant_with_min_max_vars_per_channel_gradient(input_gradients, input_data, input_min, input_max, num_bits=8, narrow_range=False): """ Computes gradients of Fake-quantize on the 'input_data' tensor, output_backprops = input_gradients*(if input_data>=nudged_min and <=nudged_max 1 else 0) Args: input_gradients (tvm.tensor.Tensor): input gradients from previously operation input_data (tvm.tensor.Tensor): input of fake-quantize, only supports "float32" input_min (tvm.tensor.Tensor): input_min shape equals to input_max shape The last dimension shoud be same for shapes of min, max and shape_inputs only support fp32 input_max (tvm.tensor.Tensor): only support fp32 num_bits (int): Defaults to 8. bitwidth of the quantization,between 2 and 16 narrow_range (bool): True, quantized into the quantization range [1, 2^num_bits - 1] False,quantized into the quantization range [0, 2^num_bits - 1] Returns: tvm.tensor.Tensor """ input_gradients_shape = get_shape(input_gradients) input_data_shape = get_shape(input_data) input_min_shape = get_shape(input_min) input_max_shape = get_shape(input_max) vc_util.check_shape(input_gradients_shape) vc_util.check_shape(input_data_shape) vc_util.check_shape(input_min_shape) vc_util.check_shape(input_max_shape) vc_util.elemwise_shape_check(input_gradients.shape, input_data.shape) vc_util.elemwise_shape_check(input_min_shape, input_max_shape) if input_min_shape[0] != input_data_shape[-1]: raise RuntimeError( "The shapes of min,max and shape_inputs last one dimension shoud be same") vc_util.ops_dtype_check(input_gradients.dtype, vc_util.DtypeForDavinci.FLOAT32) vc_util.ops_dtype_check(input_data.dtype, vc_util.DtypeForDavinci.FLOAT32) vc_util.ops_dtype_check(input_min.dtype, vc_util.DtypeForDavinci.FLOAT32) vc_util.ops_dtype_check(input_max.dtype, vc_util.DtypeForDavinci.FLOAT32) if num_bits > 16 or num_bits < 2: raise RuntimeError("numbits should be range[2,16]") input_min_broadcast = topi.broadcast_to(input_min, input_data_shape) input_max_broadcast = topi.broadcast_to(input_max, input_data_shape) res = fake_quant_with_min_max_vars_per_channel_gradient_compute(input_gradients, input_data, input_min_broadcast, input_max_broadcast, num_bits, narrow_range) return res
def _apply_rms_prop_check(var, ms, mom, grad, lr, momentum, rho, epsilon): """Check inputs""" vc_util.check_shape(var) for i in (ms, mom, grad, lr, momentum, rho): vc_util.elemwise_dtype_check(var.dtype, i.dtype) for i in (ms, mom, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, rho, momentum): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, rho and momentum only support scalar tensor.") if epsilon <= 0: raise ValueError("epsilon should greater than zero.")
def apply_centered_rms_prop(var, mg, ms, mom, grad, lr, momentum, rho, epsilon): """ Update `var` according to the centered RMSProp algorithm. out_mean_grad = decay * mg + (1-decay) * grad out_mean_square = decay * ms + (1-decay) * grad * grad out_mom = momentum * mom + lr * grad / sqrt(out_mean_square - out_mean_grad^2 + epsilon) out_var = var - out_mom Args: var (tvm.tensor.Tensor): Input data of type float16 or float32. mg (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. ms (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. mom (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. grad (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. momentum (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. rho (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. epsilon (float): A scalar tensor of the same type as `var`. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated mean_grad. tvm.tensor.Tensor, updated mean_square. tvm.tensor.Tensor, updated mom. """ vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (mg, ms, mom, lr, rho, momentum, grad): vc_util.elemwise_dtype_check(var.dtype, i.dtype) for i in (mg, ms, mom, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, rho, momentum): if tuple(get_shape(i)) != (1,): raise RuntimeError("lr, rho and momentum only support scalar tensor.") if epsilon <= 0: raise ValueError("epsilon should be greater than 0.") out_var, out_mg, out_ms, out_mom = _apply_centered_rms_prop_compute( var, mg, ms, mom, grad, lr, momentum, rho, epsilon) out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_mg, binds_info2 = TensorUtils.inplace_set(mg, out_mg, "mg_buf") out_ms, binds_info3 = TensorUtils.inplace_set(ms, out_ms, "ms_buf") out_mom, binds_info4 = TensorUtils.inplace_set(mom, out_mom, "mom_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) binds_info.update(binds_info4) attrs = {utils.BINDS: binds_info} return out_var, out_mg, out_ms, out_mom, attrs
def matrix_set_diag(input_matrix, input_diagonal, input_help): """ Return a batched matrix tensor with new batched diagonal values. Args: input_matrix (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8. The last two dimensions can be unequal. input_diagonal (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8.The last shape need equal to min(input_matrix[-1], input_matrix[-2]). input_help (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8,and with a diagonal element of 1 and other positions of 0. Returns: tvm.tensor.Tensor, has the same type and shape as input_matrix. """ shape_input = get_shape(input_matrix) shape_diag = get_shape(input_diagonal) shape_help = get_shape(input_help) dtype = input_matrix.dtype vc_util.check_shape(shape_input) vc_util.check_shape(shape_diag) vc_util.check_shape(shape_help) # Check help_matrix. if (len(shape_input) < 2) or (len(shape_help) < 2): raise RuntimeError( "Only the rank of input tensors >= 2 are supported!") vc_util.elemwise_shape_check(shape_input, shape_help) # Check support dtype. vc_util.ops_dtype_check(dtype, [ vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.UINT8 ]) # Adjust diag's shape according to input shape. # Extend the shape_diag dimension for broadcast. # if input_shape is [2,4,7,9] and shape_diag is [2,4,7] then new_shape is [2,4,7,1] # if input_shape is [2,4,9,7] and shape_diag is [2,4,7], then new_shape is [2,4,1,7] if shape_input[-2] <= shape_input[-1]: shape_b_newshape = list(shape_diag) + [1] # The penultimate dimension of the shape_diag is extended for broadcast. else: shape_b_newshape = list(shape_diag) shape_b_newshape.insert(-1, 1) input_diagonal = topi.reshape(input_diagonal, shape_b_newshape) res = matrix_set_diag_compute(input_matrix, input_diagonal, input_help) return res
def eltwise(data, mode=1, coeff=()): """ Compute elementwise modes, such as 0:PRODUCT, 1:SUM and 2:MAX. Args: data (list of tvm.tensor.Tensor): a list of tensor, tensor support fp16 and fp32. mode (int): 0:product, 1:sum, 2:max. coeff (tuple): tensor name of data should be equal with coeff size, only used by sum, support int and float. Returns: tvm.tensor.Tensor. """ dtype = data[0].dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.check_shape(data[0].shape) shape_data = get_shape(data[0]) if not mode in [0, 1, 2]: raise RuntimeError("mode only support 0, 1, or 2") if not len(data) == len(coeff) and len(coeff) != 0: raise RuntimeError("coeff should be [] or its length be same as data") tensor_num = len(data) #tensor num must be [1, 120] if tensor_num < 1 or tensor_num > 120: raise RuntimeError("tensor_num need in range [1,120].") if mode == 1 and len(coeff) == 0: return addn.addn(data) if len(coeff) != 0: if type(coeff[0]) != int and type(coeff[0]) != float: raise RuntimeError("ele of coeff must be a number.") for i in range(1, len(data)): vc_util.elemwise_dtype_check(data[0].dtype, data[i].dtype) vc_util.elemwise_shape_check(data[0].shape, data[i].shape) if mode == 1 and len(coeff) > 0: return _addn(data, coeff) if mode == 0: return _product(data) if mode == 2: return _max(data)
def logical_or(input1, input2): dtype1 = input1.dtype dtype2 = input2.dtype vc_util.elemwise_dtype_check(dtype1, dtype2) vc_util.ops_dtype_check(dtype1, vc_util.DtypeForDavinci.BOOL) shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] vc_util.check_shape(shape1) vc_util.check_shape(shape2) vc_util.elemwise_shape_check(shape1, shape2) res = akg.topi.logical_or(input1, input2) return res
def _check_inputs(var, grad_accum, grad_squared_accum, grad, lr, l1, l2, global_step): """Check op inputs""" # check dtype vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (grad_accum, grad_squared_accum, grad, lr, l1, l2): vc_util.elemwise_dtype_check(var.dtype, i.dtype) vc_util.ops_dtype_check(global_step.dtype, vc_util.DtypeForDavinci.INT32) # check shape for i in (grad_accum, grad_squared_accum, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, l1, l2, global_step): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, l1, l2 and global_step only support scalar tensor.")
def _check_inputs(var, accum, accum_update, grad, lr, rho, epsilon): """Check op inputs""" # check dtype vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (accum, accum_update, grad, lr, rho): vc_util.elemwise_dtype_check(var.dtype, i.dtype) # check shape for i in (accum, accum_update, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, rho): if tuple(get_shape(i)) != (1,): raise RuntimeError("lr and rho only support scalar tensor.") # check value if epsilon <= 0: raise ValueError("epsilon should be greater than zero.")
def _check_inputs(var, m, v, grad, lr, beta1, beta1_power, beta2, epsilon): """Check op inputs""" # check dtype vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (m, v, grad, beta1_power, lr, beta1, beta2): vc_util.elemwise_dtype_check(var.dtype, i.dtype) # check shape for i in (m, v, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (beta1_power, lr, beta1, beta2): if tuple(get_shape(i)) != (1,): raise RuntimeError("beta1_power, lr, beta1 and beta2 only support scalar tensor.") # check value if epsilon <= 0: raise ValueError("epsilon should be greater than zero.")
def apply_proximal_adagrad(var, accum, lr, l1, l2, grad): """ The FOBOS optimization algorithm with Adagrad learning rate. Note: accum_new = accum + grad * grad ada_lr = lr * rsqrt(accum_new) prox_var = var - ada_lr * grad if l1 > 0: var_new = sign(prox_var)/(1+ada_lr*l2) * max{|prox_var|-ada_lr*l1,0} else: var_new = prox_var/(1+ada_lr*l2) Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32. accum (tvm.tensor.Tensor): A tensor of same shape and type as var. Eatch entry in it must be greater or equal to zero. lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated accum. """ # check_shape vc_util.check_shape(var) shape = get_shape(var) for tensor in (accum, grad): vc_util.elemwise_shape_check(shape, tensor.shape) sclar_shape = (1,) for sclar in (lr, l1, l2): vc_util.elemwise_shape_check(sclar.shape, sclar_shape) # check dtype dtype = var.dtype vc_util.ops_dtype_check(dtype, [vc_util.DtypeForDavinci.FLOAT16, vc_util.DtypeForDavinci.FLOAT32]) for tensor in (var, accum, lr, l1, l2, grad): vc_util.elemwise_dtype_check(tensor.dtype, dtype) var_new, accum_new = _apply_proximal_adagrad_compute(var, accum, lr, l1, l2, grad) (var_new, accum_new), binds_info = utils.TensorUtils.inplace_set_tensors([var, accum], [var_new, accum_new]) attrs = {utils.BINDS: binds_info} return var_new, accum_new, attrs
def l1_loss(inputs, target, reduction='none'): """ Computes l1 loss. Args: inputs(akg.tvm.Tensor): Supported data type is float16, float32. target(akg.tvm.Tensor): With same type as inputs. reduction(str): Default is 'none', could be 'sum' or 'mean', if 'mean', loss result will be divided by the size of inputs. Returns: akg.tvm.Tensor of same type as input tensors. """ inputs_dtype = inputs.dtype target_dtype = target.dtype # check inputs data types vc_util.ops_dtype_check([inputs_dtype, target_dtype], vc_util.DtypeForDavinci.ALL_FLOAT) target_shape = [x.value for x in target.shape] inputs_shape = [x.value for x in inputs.shape] vc_util.elemwise_shape_check(target_shape, inputs_shape) inputs_dtype_old = inputs_dtype if utils.product_is_mini() and inputs_dtype == 'float32': inputs = akg.topi.cast(inputs, "float16") target = akg.topi.cast(target, "float16") inputs_dtype = "float16" diff = akg.topi.subtract(inputs, target) loss = akg.topi.abs(diff) if reduction == 'sum': loss = akg.topi.sum(loss) if reduction == 'mean': loss = akg.topi.sum(loss) deno = 1.0 for num in inputs.shape: deno = deno * num deno = akg.topi.cast(deno, dtype=inputs_dtype) loss = akg.topi.divide(loss, deno) if utils.product_is_mini() and inputs_dtype_old == 'float32': loss = akg.topi.cast(loss, inputs_dtype_old) return loss
def fake_quant_with_min_max_vars_per_channel(input_data, input_min, input_max, num_bits=8, narrow_range=False): """ Generate fake_quantize the input_data for every channel. Note: For input_data last dim must be equal to d. And need to satisfy: input_min <= 0 <= input_max. Args: input_data (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [b, d] or [b, h, w, d] or [d]. input_min (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [d]. input_max (tvm.tensor.Tensor): Tensor of type float32, shape must be equal to [d]. num_bits (int): The quantization bits, must be int, defaults to 8. narror_range (Union[bool, None]): if True, quant_min equal to 1, else 0, defaults to False. Returns: tvm.tensor.Tensor of same type and shape as input_data. """ # get shape and check shape_inputs = get_shape(input_data) shape_min = get_shape(input_min) shape_max = get_shape(input_max) vc_util.elemwise_shape_check(shape_min, shape_max) vc_util.auto_broadcast_check(shape_min, shape_inputs) if shape_min[0] != shape_inputs[-1]: raise RuntimeError( "The shapes of min,max and shape_inputs last one dimension should be same!" ) # check dtype vc_util.ops_dtype_check(input_data.dtype, vc_util.DtypeForDavinci.FLOAT32) vc_util.elemwise_dtype_check(input_min.dtype, input_max.dtype, vc_util.DtypeForDavinci.FLOAT32) # check num_bits range if num_bits > 16 or num_bits < 2: raise ValueError("numbits should be in range [2, 16]!") # get output by fake_quant_with_min_max_vars_per_channel_compute function res = fake_quant_with_min_max_vars_per_channel_compute( input_data, input_min, input_max, num_bits, narrow_range) return res
def fake_quant_with_min_max_args_gradient(input_gradients, input_data, min=-6, max=6, num_bits=8, narrow_range=False): """ Computes gradients of Fake-quantize on the 'input_data' tensor, output_backprops = input_gradients*(if input_data>=nudged_min and <=nudged_max 1 else 0) Args: input_gradients (tvm.tensor.Tensor): input gradients from previously operation input_data (tvm.tensor.Tensor): input of fake-quantize, only supports "float32" min ([float, int]): scalar, defaults to -6 max ([float, int]): scalar, defaults to 6. [min; max] define the clamping range for the input_data data num_bits ([float, int]): Defaults to 8. num_bits is the bitwidth of the quantization,between 2 and 16 narrow_range ([bool]): True, quantized into the quantization range [1; 2^num_bits - 1] False,quantized into the quantization range [0; 2^num_bits - 1] Returns: tvm.tensor.Tensor """ shape = get_shape(input_data) vc_util.check_shape(shape) vc_util.elemwise_shape_check(input_gradients.shape, input_data.shape) vc_util.ops_dtype_check(input_data.dtype, vc_util.DtypeForDavinci.FLOAT32) vc_util.ops_dtype_check(input_gradients.dtype, vc_util.DtypeForDavinci.FLOAT32) nudged_min, nudged_max, scale = nudge_min_max(min, max, num_bits, narrow_range) zero_tensor = tvm.compute(input_data.shape, lambda *i: tvm.const(0, dtype="float32"), name="zero_tensor") nudged_max_tensor = topi.add(zero_tensor, nudged_max) nudged_min_tensor = topi.add(zero_tensor, nudged_min) # where((input_data<=nudged_max)&(x>=nudged_min),1,0),Convert the input to 0 and 1 tensor between_nudged_min_max = _cmpare_value(input_data, nudged_min_tensor, nudged_max_tensor) res = topi.multiply(input_gradients, between_nudged_min_max) return res
def atan2(y, x): """ Compute arc tangent of y/x. .. math:: \\arctan2(y, x) = \\arctan(\\frac{y}{x}) Args: y (tvm.tensor.Tensor): Input tensor, only support float16, float32. x (tvm.tensor.Tensor): Input tensor, only support float16, float32. Returns: A tvm.tensor.Tensor as angles in radians. """ vc_util.elemwise_shape_check(get_shape(y), get_shape(x)) vc_util.elemwise_dtype_check(y.dtype, x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) return _atan2_compute(y, x), {"enable_auto_inline": False}
def apply_proximal_gradient_descent(var, alpha, l1, l2, delta): """ The FOBOS algorithm with fixed learning rate. Note: prox_var = var - alpha * delta if l1 > 0: var_new = sign(prox_var)/(1+alpha*l2) * max{|prox_var|-alpha*l1,0} else: var_new = prox_var/(1+alpha*l2) Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32. alpha (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. delta (tvm.tensor.Tensor): A tensor of same shape and type as var. Returns: tvm.tensor.Tensor, updated var. """ # check_shape vc_util.check_shape(var) shape = get_shape(var) vc_util.elemwise_shape_check(shape, delta.shape) sclar_shape = (1, ) for sclar in (alpha, l1, l2): vc_util.elemwise_shape_check(sclar.shape, sclar_shape) # check dtype dtype = var.dtype vc_util.ops_dtype_check( dtype, [vc_util.DtypeForDavinci.FLOAT16, vc_util.DtypeForDavinci.FLOAT32]) for tensor in (var, alpha, l1, l2, delta): vc_util.elemwise_dtype_check(tensor.dtype, dtype) var_new = apply_proximal_gradient_descent_impl(var, alpha, l1, l2, delta) var_new, binds_info = utils.TensorUtils.inplace_set( var, var_new, "var_buf") attrs = {utils.BINDS: binds_info} return var_new, attrs
def apply_power_sign(var, m, grad, lr, logbase, sign_decay, beta): """ Update 'var' according to the PowerSign update m_out = beta * m + (1 - beta) * grad var_out = var - lr_t * (exp(logbase * sign_decay * sign(grad) * sign(m_out)) * grad) Args: var (tvm.tensor.Tensor): A tensor of type float16 or float32 m (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): A scalar tensor of of same type as var. logbase (tvm.tensor.Tensor): A scalar tensor of of same type as var. sign_decay (tvm.tensor.Tensor): A scalar tensor of of same type as var. beta (tvm.tensor.Tensor): A scalar tensor of of same type as var. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated m. """ # check dtypes vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (m, grad, lr, logbase, sign_decay, beta): vc_util.elemwise_dtype_check(var.dtype, i.dtype) # check shapes for i in (m, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, logbase, sign_decay, beta): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, logbase, sign_decay and beta only support scalar tensor.") # compute out_var, out_m = _apply_power_sign_compute(var, m, grad, lr, logbase, sign_decay, beta) # reuse var, m out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf") binds_info.update(binds_info2) attrs = {utils.BINDS: binds_info} return out_var, out_m, attrs
def asin_grad(x, dy): """ Gradient for arcsin. .. math:: \\frac {\\partial arcsin(x)} {\\partial x} = \\frac{1}{\\sqrt{1 - x^2}} Args: x (tvm.tensor.Tensor): Tensor of type float16, float32. dy (tvm.tensor.Tensor): Tensor of same type and shape as x. Rerurns: tvm.tensor.Tensor of same type and shape as x. """ vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.elemwise_dtype_check(x.dtype, dy.dtype) vc_util.elemwise_shape_check(x.shape, dy.shape) return _asin_grad_compute(x, dy)
def accumulate_nv2(data): """ Compute sum of all elements in tensor. Args: data (Union[tuple, list]): the list of input tensors of type float16, float32, int8, uint8, int32. Returns: tvm.tensor.Tensor, compute result, get all elements' sum. """ for d in data: vc_util.ops_dtype_check(d.dtype, vc_util.DtypeForDavinci.ALL_TYPES) for i in range(1, len(data)): vc_util.elemwise_dtype_check(data[0].dtype, data[i].dtype) vc_util.elemwise_shape_check(data[0].shape, data[i].shape) res = _accumulate_nv2_compute(data) return res
def inv_grad(input_y, input_dy): """ Calculate data's reciprocal grad,dx = -1 * input_dy * input_y * input_y. Args: input_y (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32. input_dy (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32. Returns: tvm.tensor.Tensor, has the same type and shape as input_y. """ # Check shapes and dtypes. vc_util.elemwise_shape_check(input_y.shape, input_dy.shape) vc_util.elemwise_dtype_check( input_y.dtype, input_dy.dtype, supported_type=["float16", "float32", "int8", "int32"]) res = inv_grad_compute(input_y, input_dy) return res
def addn(data): """ Compute sum of all elements in tensor. Args: data (tvm.tensor.Tensor): Tensor of of type float16, float32. Returns: tvm.tensor.Tensor, compute result, get all elements' sum. """ # check types dtype = data[0].dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) res = data[0] for i in range(1, len(data)): vc_util.elemwise_dtype_check(res.dtype, data[i].dtype) vc_util.elemwise_shape_check(res.shape, data[i].shape) res = akg.topi.elemwise_sum(data) return res
def acosh_grad(y, dy): """ Gradient for acosh. Note: dx = dy * 1/sinh(y) Args: y (tvm.tensor.Tensor): tensor of type float16, float32. dy (tvm.tensor.Tensor): same type and shape as y. Returns: tvm.tensor.Tensor, same type and shape as y. """ # mini product just used to infer if utils.product_is_mini(): raise RuntimeError( "The mini product does not support the acosh_grad operator") dtype = y.dtype vc_util.ops_dtype_check(y.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.elemwise_dtype_check(dtype, dy.dtype) vc_util.check_shape(y.shape) vc_util.elemwise_shape_check(y.shape, dy.shape) if dtype == "float16": y = topi.cast(y, "float32") dy = topi.cast(dy, "float32") # If we use sinh(y) = (exp(y) - exp(-y))/2 directly, there will be some precision problems # For example, as dx = dy/sinh(y), if we use exp directly, when exp(y) and exp(-y) are close, # the small precision error of exp calculation will be greatly enlarged in the final result sinh_y = _sinh_taylor(y) dx = topi.divide(dy, sinh_y) if dx.dtype != dtype: dx = topi.cast(dx, dtype) attrs = {"enable_auto_inline": False} return dx, attrs
def apply_add_sign(var, m, grad, lr, alpha, sign_decay, beta): """ Update 'var' according to the AddSign update. m_out = m * beta + grad * (1 - beta) var_out = var - lr * (alpha + sign_decay * sign(grad) *sign(m)) * grad Args: var (tvm.tensor.Tensor): A tensor of type float16 or float32 m (tvm.tensor.Tensor): A tensor of type float16 or float32 grad (tvm.tensor.Tensor): A tensor of type float16 or float32 lr (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 alpha (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 sign_decay (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 beta (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated m. """ vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (m, lr, alpha, sign_decay, beta, grad): vc_util.elemwise_dtype_check(var.dtype, i.dtype) for i in (m, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, alpha, sign_decay, beta): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, alpha, sign_decay and beta only support scalar.") out_var, out_m = _apply_add_sign_compute(var, m, grad, lr, alpha, sign_decay, beta) out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf") binds_info.update(binds_info2) attrs = {utils.BINDS: binds_info} return out_var, out_m, attrs