def apply_rms_prop_mixed_precision(var, ms, mom, grad, lr, momentum, rho, epsilon): """ Mixed precision version for apply_rms_prop. Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float32. ms (tvm.tensor.Tensor): Mean square, a tensor of same shape and type as var. mom (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var. momentum (float): Coefficient for calculate new mom, 0.0 <= momentum <= 1.0. rho (float): Coefficient for calculate new ms, 0.0 <= rho <= 1.0. epsilon (float): A small value to prevent division by 0. Returns: tvm.tensor.Tensor, Updated var of type float32. tvm.tensor.Tensor, Updated var of type float16. tvm.tensor.Tensor, Updated ms. tvm.tensor.Tensor, Updated mom. """ utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.FLOAT32) _apply_rms_prop_check(var, ms, mom, grad, lr, momentum, rho, epsilon) out_var, out_var_fp16, out_ms, out_mom = _apply_rms_prop_mixed_precision_compute( var, ms, mom, grad, lr, momentum, rho, epsilon) out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_ms, binds_info2 = TensorUtils.inplace_set(ms, out_ms, "ms_buf") out_mom, binds_info3 = TensorUtils.inplace_set(mom, out_mom, "mom_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) attrs = {utils.BINDS: binds_info} return out_var, out_var_fp16, out_ms, out_mom, attrs
def sgd(parameters, gradient, accum, stat, learning_rate, momentum, dampening=0.0, weight_decay=0.0, nesterov=False): """ Update parameters, accum and stat according to the SGD algorithm. accum = accum * momentum + grad if use_nesterov is True: parameters -= grad * lr + accum * momentum * lr else: parameters -= accum * lr Args: parameters (tvm.tensor.Tensor): parameters tensor of float32, float16, to be updated. gradient (tvm.tensor.Tensor): gradient tensor of float32, float16. accum (tvm.tensor.Tensor): accum tensor of float32, float16, to be updated. stat (tvm.tensor.Tensor): stat tensor of float32, float16, to be updated. momentum (tvm.tensor.Tensor): momentum tensor of float32, float16, shape must be equal to (1,). learning_rate (tvm.tensor.Tensor): learning_rate tensor of float32, float16, shape must be equal to (1,). dampening (float): Default value is 0.0. weight_decay (float): Default value is 0.0. nesterov (bool): Default is False. Return: accum_t (tvm.tensor.Tensor): updated accum with same type and shape as accum. stat_t (tvm.tensor.Tensor): updated stat with same type and shape as stat. parameters_t (tvm.tensor.Tensor): updated parameters with same type and shape as parameters. """ if nesterov and dampening != 0: raise ValueError("Nesterov requires zero dampening!") if weight_decay < 0: raise ValueError("weight_decay must > 0.") # shape check utils.elemwise_shape_check(parameters.shape, gradient.shape) utils.elemwise_shape_check(parameters.shape, accum.shape) utils.elemwise_shape_check(parameters.shape, stat.shape) # dtype check utils.ops_dtype_check([parameters.dtype, gradient.dtype, accum.dtype, stat.dtype], utils.DtypeForDavinci.ALL_FLOAT) parameters_t, accum_t, stat_t = sgd_compute(parameters, gradient, learning_rate, accum, momentum, stat, dampening, weight_decay, nesterov) parameters_t, binds_info = TensorUtils.inplace_set(parameters, parameters_t, "parameters_buf") accum_t, binds_info2 = TensorUtils.inplace_set(accum, accum_t, "accum_buf") stat_t, binds_info3 = TensorUtils.inplace_set(stat, stat_t, "stat_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) attrs = {utils.BINDS: binds_info} return parameters_t, accum_t, stat_t, attrs
def apply_adagrad_da(var, grad_accum, grad_squared_accum, grad, lr, l1, l2, global_step, target=utils.CCE): """ Update var according to the Adagrad Dual Averaging algorithm. grad_accum += grad grad_squared_accum += grad * grad tmp_val = Sign(grad_accum) * max(|grad_accum|-l1*global_step, 0) if l1 > 0 else grad_accum x_value = -1 * lr * tmp_val y_value = l2 * global_step * lr + sqrt(grad_squared_accum) var = x_value / y_value Args: var (tvm.tensor.Tensor): Input var to be updated of type float16, float32. grad_accum (tvm.tensor.Tensor): Accumulation of the gradients of same shape and type as var. grad_squared_accum (tvm.tensor.Tensor): Accumulation of the squared gradients of same shape and type as var. grad (tvm.tensor.Tensor): Input grad of same shape and type as var. lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var. l1 (tvm.tensor.Tensor): L1 regularization, a scalar tensor of same type as var. l2 (tvm.tensor.Tensor): L2 regularization, a scalar tensor of same type as var. global_step (tvm.tensor.Tensor): Training step number, a scalar tensor of type int32. Returns: tvm.tensor.Tensor, the updated var. tvm.tensor.Tensor, the updated grad_accum. tvm.tensor.Tensor, the updated grad_squared_accum. """ _check_inputs(var, grad_accum, grad_squared_accum, grad, lr, l1, l2, global_step) out_var, out_ga, out_gsa = _apply_adagrad_da_compute( var, grad_accum, grad_squared_accum, grad, lr, l1, l2, global_step) # reuse var, grad_accum and grad_squared_accum out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_ga, binds_info2 = TensorUtils.inplace_set(grad_accum, out_ga, "grad_accum_buf") out_gsa, binds_info3 = TensorUtils.inplace_set(grad_squared_accum, out_gsa, "grad_squared_accum_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) attrs = {utils.BINDS: binds_info} return out_var, out_ga, out_gsa, attrs
def apply_power_sign(var, m, grad, lr, logbase, sign_decay, beta, target=utils.CCE): """ Update 'var' according to the PowerSign update m_out = beta * m + (1 - beta) * grad var_out = var - lr_t * (exp(logbase * sign_decay * Sign(grad) * Sign(m_out)) * grad) Args: var (tvm.tensor.Tensor): A tensor of type float16 or float32 m (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): A scalar tensor of of same type as var. logbase (tvm.tensor.Tensor): A scalar tensor of of same type as var. sign_decay (tvm.tensor.Tensor): A scalar tensor of of same type as var. beta (tvm.tensor.Tensor): A scalar tensor of of same type as var. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated m. """ # check dtypes utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT) for i in (m, grad, lr, logbase, sign_decay, beta): utils.elemwise_dtype_check(var.dtype, i.dtype) # check shapes for i in (m, grad): utils.elemwise_shape_check(var.shape, i.shape) for i in (lr, logbase, sign_decay, beta): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, logbase, sign_decay and beta only support scalar tensor.") # compute out_var, out_m = _apply_power_sign_compute(var, m, grad, lr, logbase, sign_decay, beta) # reuse var, m out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf") binds_info.update(binds_info2) attrs = {utils.BINDS: binds_info} return out_var, out_m, attrs
def apply_centered_rms_prop(var, mg, ms, mom, grad, lr, momentum, rho, epsilon): """ Update `var` according to the centered RMSProp algorithm. out_mean_grad = decay * mg + (1-decay) * grad out_mean_square = decay * ms + (1-decay) * grad * grad out_mom = momentum * mom + lr * grad / sqrt(out_mean_square - out_mean_grad^2 + epsilon) out_var = var - out_mom Args: var (tvm.tensor.Tensor): Input data of type float16 or float32. mg (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. ms (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. mom (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. grad (tvm.tensor.Tensor): A tensor of the same type and shape as `var`. lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. momentum (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. rho (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. epsilon (float): A scalar tensor of the same type as `var`. Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated mean_grad. tvm.tensor.Tensor, updated mean_square. tvm.tensor.Tensor, updated mom. """ vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (mg, ms, mom, lr, rho, momentum, grad): vc_util.elemwise_dtype_check(var.dtype, i.dtype) for i in (mg, ms, mom, grad): vc_util.elemwise_shape_check(var.shape, i.shape) for i in (lr, rho, momentum): if tuple(get_shape(i)) != (1,): raise RuntimeError("lr, rho and momentum only support scalar tensor.") if epsilon <= 0: raise ValueError("epsilon should be greater than 0.") out_var, out_mg, out_ms, out_mom = _apply_centered_rms_prop_compute( var, mg, ms, mom, grad, lr, momentum, rho, epsilon) out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_mg, binds_info2 = TensorUtils.inplace_set(mg, out_mg, "mg_buf") out_ms, binds_info3 = TensorUtils.inplace_set(ms, out_ms, "ms_buf") out_mom, binds_info4 = TensorUtils.inplace_set(mom, out_mom, "mom_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) binds_info.update(binds_info4) attrs = {utils.BINDS: binds_info} return out_var, out_mg, out_ms, out_mom, attrs
def apply_rms_prop(var, ms, mom, grad, lr, momentum, rho, epsilon, target=utils.CCE): """ Updates var using the RMSProp algorithm. .. math:: \\begin{array}{ll} \\\\ \\hat{ms} &= rho \\cdot ms + (1 - rho) \\cdot grad^2 \\\\ \\hat{mom} &= momentum \\cdot mom + \\frac{lr \\cdot grad}{\\sqrt{\\hat{ms} + epsilon}} \\\\ var &= var - mom \\end{array} Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32. ms (tvm.tensor.Tensor): Mean square, a tensor of same shape and type as var. mom (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var. momentum (tvm.tensor.Tensor): Coefficient for calculate new mom, 0.0 <= momentum <= 1.0. rho (tvm.tensor.Tensor): Coefficient for calculate new ms, 0.0 <= rho <= 1.0. epsilon (float): A small value to prevent division by 0. Returns: tvm.tensor.Tensor, Updated var. tvm.tensor.Tensor, Updated ms. tvm.tensor.Tensor, Updated mom. """ utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT) _apply_rms_prop_check(var, ms, mom, grad, lr, momentum, rho, epsilon) out_var, out_ms, out_mom = _apply_rms_prop_compute(var, ms, mom, grad, lr, momentum, rho, epsilon) out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_ms, binds_info2 = TensorUtils.inplace_set(ms, out_ms, "ms_buf") out_mom, binds_info3 = TensorUtils.inplace_set(mom, out_mom, "mom_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) attrs = {utils.BINDS: binds_info} return out_var, out_ms, out_mom, attrs
def hpl_cholesky(a): attrs = {"RewriteVarTensorIdx": True} @script def func(a): w = a.shape[0] tmp = allocate((a.shape[0], ), a.dtype, "local") tmp_0 = allocate((a.shape[0], ), a.dtype, "local") tmp_1 = allocate((a.shape[0], ), a.dtype, "local") out_0 = allocate(a.shape, a.dtype, "local") out_1 = allocate(a.shape, a.dtype, "local") for i in range(w): for j in range(w): tmp_0[j] = a[i, i] tmp_1[j] = sqrt(tmp_0[j]) tmp[j] = a[i, j] / tmp_1[j] for j in range(w): if j >= i: a[i, j] = tmp[j] else: a[i, j] = float16(0.0) for k in range(a.shape[0]): for l in range(a.shape[1]): if k > i and l > i: out_0[k, l] = a[i, k] out_1[k, l] = out_0[k, l] * a[i, l] a[k, l] = a[k, l] - out_1[k, l] return a out = func(a) out, binds_info = TensorUtils.inplace_set(a, out) attrs[utils.BINDS] = binds_info return out, attrs
def apply_gradient_descent(var, alpha, delta): """ Update var by subtracting alpha * delta from it. .. math:: var_{t} = var_{t-1} - \\alpha \\delta Args: var (tvm.tensor.Tensor): Input var of dtype float16, float32. alpha (tvm.tensor.Tensor): A scalar tensor of same type as input var. delta (tvm.tensor.Tensor): A tensor of same shape and dtype as input var. Returns: tvm.tensor.Tensor, Updated var. """ # check dtypes vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) for i in (alpha, delta): vc_util.elemwise_dtype_check(var.dtype, i.dtype) # check shapes vc_util.elemwise_shape_check(var.shape, delta.shape) if tuple(get_shape(alpha)) != (1, ): raise RuntimeError("input alpha only support scalar tensor.") # compute out_var = _apply_gradient_descent_compute(var, alpha, delta) # reuse var out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") attrs = {utils.BINDS: binds_info} return out_var, attrs
def apply_ada_max(var, m, v, grad, lr, beta1, beta1_power, beta2, epsilon, target=utils.CCE): """ Update var according to the AdaMax algorithm. m_t <- beta1 * m_{t-1} + (1 - beta1) * g v_t <- max(beta2 * v_{t-1}, abs(g)) variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon) Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float32. m (tvm.tensor.Tensor): A tensor of same shape and type as var. v (tvm.tensor.Tensor): A tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var. beta1 (tvm.tensor.Tensor): A scalar tensor of same type as var, 0.0 <= beta1 <= 1.0. beta1_power (tvm.tensor.Tensor): The value of :math:`beta1^t`, a scalar tensor of same type as var. beta2 (tvm.tensor.Tensor): A scalar tensor of same type as var, 0.0 <= beta2 <= 1.0. epsilon (float): A small value to prevent division by 0. Returns: tvm.tensor.Tensor, Updated var. tvm.tensor.Tensor, Updated m. tvm.tensor.Tensor, Updated v. """ _check_inputs(var, m, v, grad, lr, beta1, beta1_power, beta2, epsilon) out_var, out_m, out_v = _apply_ada_max_compute(var, m, v, grad, lr, beta1, beta1_power, beta2, epsilon) # reuse var, m and v out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf") out_v, binds_info3 = TensorUtils.inplace_set(v, out_v, "v_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) attrs = {utils.BINDS: binds_info} return out_var, out_m, out_v, attrs
def apply_add_sign(var, m, grad, lr, alpha, sign_decay, beta, target=utils.CCE): """ Update 'var' according to the AddSign update. m_out = m * beta + grad * (1 - beta) var_out = var - lr * (alpha + sign_decay * Sign(grad) *Sign(m)) * grad Args: var (tvm.tensor.Tensor): A tensor of type float16 or float32 m (tvm.tensor.Tensor): A tensor of type float16 or float32 grad (tvm.tensor.Tensor): A tensor of type float16 or float32 lr (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 alpha (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 sign_decay (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 beta (tvm.tensor.Tensor): A scalar tensor of type float16 or float32 Returns: tvm.tensor.Tensor, updated var. tvm.tensor.Tensor, updated m. """ utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT) for i in (m, lr, alpha, sign_decay, beta, grad): utils.elemwise_dtype_check(var.dtype, i.dtype) for i in (m, grad): utils.elemwise_shape_check(var.shape, i.shape) for i in (lr, alpha, sign_decay, beta): if tuple(get_shape(i)) != (1, ): raise RuntimeError( "lr, alpha, sign_decay and beta only support scalar.") out_var, out_m = _apply_add_sign_compute(var, m, grad, lr, alpha, sign_decay, beta) out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf") binds_info.update(binds_info2) attrs = {utils.BINDS: binds_info} return out_var, out_m, attrs
def apply_adadelta(var, accum, accum_update, grad, lr, rho, epsilon, target=utils.CCE): """ Update var according to the adadelta scheme. accum = rho * accum + (1 - rho) * grad^2 update = sqrt(accum_update + epsilon).sqrt() / sqrt(accum + epsilon) * grad accum_update = rho * accum_update + (1 - rho) * update^2 var -= update * lr Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float32. accum (tvm.tensor.Tensor): The accumulate gradient, a tensor of same shape and type as var. accum_update (tvm.tensor.Tensor): The accumulate updates, tensor of same shape and type as var. grad (tvm.tensor.Tensor): A tensor of same shape and type as var. lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var. rho (tvm.tensor.Tensor): Coefficient for calculate new accum, 0.0 <= rho <= 1.0. epsilon (float): A small value to prevent division by 0. Returns: tvm.tensor.Tensor, Updated var. tvm.tensor.Tensor, Updated accum. tvm.tensor.Tensor, Updated accum_update. """ _check_inputs(var, accum, accum_update, grad, lr, rho, epsilon) out_var, out_accum, out_accum_update = _apply_adadelta_compute(var, accum, accum_update, grad, lr, rho, epsilon) # reuse var, accum and accum_update out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf") out_accum, binds_info2 = TensorUtils.inplace_set(accum, out_accum, "accum_buf") out_accum_update, binds_info3 = TensorUtils.inplace_set(accum_update, out_accum_update, "accum_update_buf") binds_info.update(binds_info2) binds_info.update(binds_info3) attrs = {utils.BINDS: binds_info} return out_var, out_accum, out_accum_update, attrs
def inplace_operate_bind(in_tensors, out_tensors, inplace_binds): """ Some tensor need to be calculate inplace. Args: in_tensors (Union[list, tuple]): Origin input tensors. out_tensors (Union[list, tuple]): Origin output tensors. inplace_binds (tuple): Should be a tuple of tuples, the first value of each element is input tensor index, the second is output tensor index, consist (in_id, out_id), meanning out_id output tensor is inplace update to in_id input tensor. Returns: Two elements tuple, one for output tensors, the other for tensor bind relations. """ for in_id, out_id in inplace_binds: if in_id >= len(in_tensors) or out_id >= len(out_tensors): raise RuntimeError("Inplace binds is invalid, while there are {} " "input tensors and {} output tensors, but get " "bind {}.".format(len(in_tensors), len(out_tensors), inplace_binds)) out_tensors = list(out_tensors) tensor_binds = {} inplaced_tensors = [] for i, bind in enumerate(inplace_binds): in_tensor = in_tensors[bind[0]] out_tensor = out_tensors[bind[1]] out_tensor, binds_info = TensorUtils.inplace_set( in_tensor, out_tensor, buffer_name="inp_buf_{}".format(i)) tensor_binds.update(binds_info) # Caculation is updated inplace in input tensor. But Mindspore # needs a related fake tensor(never use) in output list... out_tensor_shape = out_tensor.shape fake_tensor = akg.tvm.compute( out_tensor_shape, lambda *index, o_tensor=out_tensor: o_tensor(*index), name="fake_tensor_{}".format(i)) out_tensors[bind[1]] = fake_tensor inplaced_tensors.append(out_tensor) return (tuple(out_tensors + inplaced_tensors), tensor_binds)
def ClearZero(data, target=utils.CCE): """ Sets all elements in tensor to zero. Args:xiasn data (tvm.tensor.Tensor): Tensor needs to be cleared to zero. Returns: out: tvm.tensor.Tensor will all elements with value zero. attrs: dict. """ shape = [x for x in data.shape] zero = akg.tvm.const(0, data.dtype) out = akg.tvm.compute(shape, lambda *i: zero, "out") out, binds_info = TensorUtils.inplace_set(data, out) attrs = {utils.BINDS: binds_info} return out, attrs
def Assign(ref, val, target=utils.CUDA): """ Assign val to ref. Args: ref: Tensor, which is mutable. val: Tensor, which will be assigned to ref. Returns: fake_output: Tensor, all zeros has the same shape as ref, needed by ME. ref_val: Tensor, ref assigned with val. attrs: Dictionary, indicates that ref and ref_val share the same buf. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) dtype = val.dtype utils.ops_dtype_check(dtype, [ utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT8, utils.DtypeForDavinci.INT16, utils.DtypeForDavinci.INT32, utils.DtypeForDavinci.INT64, utils.DtypeForDavinci.UINT8, utils.DtypeForDavinci.UINT16, utils.DtypeForDavinci.UINT32, utils.DtypeForDavinci.UINT64 ]) shape1 = [x.value for x in ref.shape] shape2 = [x.value for x in val.shape] if shape1 != shape2: raise RuntimeError("assign operations need input shape equal!") utils.check_shape(shape2) ref_val = akg.tvm.compute(shape2, lambda *indice: val(*indice), name="ref_val") ref_val, binds_info = TensorUtils.inplace_set(ref, ref_val) attrs = {utils.BINDS: binds_info} fake_output = akg.tvm.compute(ref.shape, lambda *indice: ref_val(*indice), name="fake_output") return fake_output, ref_val, attrs
def apply_proximal_gradient_descent(var, alpha, l1, l2, delta, target=utils.CCE): """ The FOBOS algorithm with fixed learning rate. Note: prox_var = var - alpha * delta if l1 > 0: var_new = Sign(prox_var)/(1+alpha*l2) * max{|prox_var|-alpha*l1,0} else: var_new = prox_var/(1+alpha*l2) Args: var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32. alpha (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`. delta (tvm.tensor.Tensor): A tensor of same shape and type as var. Returns: tvm.tensor.Tensor, updated var. """ # check_shape utils.check_shape(var) shape = get_shape(var) utils.elemwise_shape_check(shape, delta.shape) sclar_shape = (1,) for sclar in (alpha, l1, l2): utils.elemwise_shape_check(sclar.shape, sclar_shape) # check dtype dtype = var.dtype utils.ops_dtype_check(dtype, [utils.DtypeForDavinci.FLOAT16, utils.DtypeForDavinci.FLOAT32]) for tensor in (var, alpha, l1, l2, delta): utils.elemwise_dtype_check(tensor.dtype, dtype) var_new = apply_proximal_gradient_descent_impl(var, alpha, l1, l2, delta) var_new, binds_info = TensorUtils.inplace_set(var, var_new, "var_buf") attrs = {utils.BINDS: binds_info} return var_new, attrs
def hpl_trsm(a, b): attrs = {"RewriteVarTensorIdx": True} @script def func(a, b): inverse_0 = allocate(b.shape, b.dtype, "local") row = b.shape[0] col = b.shape[1] for l in range(col // 16): for i in serial(row): for j in serial(i): for k in range(16): inverse_0[i, l*16+k] = a[i, j] * b[j, l*16+k] b[i, l*16+k] = b[i, l*16+k] - inverse_0[i, l*16+k] for k in range(16): b[i, l*16+k] = b[i, l*16+k] / a[i, i] return b out = func(a, b) out, binds_info = TensorUtils.inplace_set(b, out) attrs[utils.BINDS] = binds_info return out, attrs
def hpl_lu(a): attrs = {"RewriteVarTensorIdx": True} @script def func(a): out_0 = allocate(a.shape, a.dtype, "local") out_1 = allocate(a.shape, a.dtype, "local") for i in range(a.shape[0]): for j in range(a.shape[1]): if j > i: a[j, i] = a[j, i] / a[i, i] for k in range(a.shape[0]): for l in range(a.shape[1]): if k > i and l > i: out_0[k, l] = a[k, i] out_1[k, l] = out_0[k, l] * a[i, l] a[k, l] = a[k, l] - out_1[k, l] return a out = func(a) out, binds_info = TensorUtils.inplace_set(a, out) attrs[utils.BINDS] = binds_info return out, attrs
def assign_add(data, value): """ Computes data + value elementwise. Note: Only supports broadcast on input tensor value. Args: data (tvm.tensor.Tensor): Data tensor. value (tvm.tensor.Tensor): Value tensor, broadcast is allowed. Returns: fake_output: Invalid value, just to suit for framework. res: assign add result, tvm.tensor.Tensor, with same type and shape as input tensor data. attrs: dict. """ input_shape = [x.value for x in data.shape] value_shape = [x.value for x in value.shape] if len(input_shape) < len(value_shape): raise RuntimeError("Do not support broadcast on input tensor data!") for i in range(len(value_shape)): if input_shape[len(input_shape) - i - 1] < value_shape[len(value_shape) - i - 1]: raise RuntimeError("Only support on input tensor value!") # broadcast adds extra compute and stage, avoid by checking the shapes before hand if len(value_shape) < len(input_shape) or value_shape != input_shape: broadcasted_value = akg.topi.broadcast_to(value, input_shape) res = akg.lang.cce.vadd(data, broadcasted_value) else: res = akg.lang.cce.vadd(data, value) res, binds_info = TensorUtils.inplace_set(data, res) attrs = {utils.BINDS: binds_info} return res, attrs
def ApplyMomentum(weight, grad, accum, lr_mat, momt_mat, use_nesterov=False, grad_scale=1.0, target=utils.CCE): """ Apply momentum operator. Note: apply mometum is an op with inplace computing and binds is used. Args: weight (tvm.tensor.Tensor): weight tensor to be updated. grad (tvm.tensor.Tensor): gradient tensor. accum (tvm.tensor.Tensor): accum tensor to be updated. lr_mat (tvm.tensor.Tensor): tensor with shape (1,). momt_mat (tvm.tensor.Tensor): momt_mat tensor with shape (1,). use_nesterov (bool): Default value is False. grad_scale (float): Default value is 1.0 Returns: fake_output: Invalid value, just suit for framework. accum_inplace: tvm.tensor.Tensor, updated accum. weight_inplace: tvm.tensor.Tensor, updated weight. atts: dict. """ shape = [x.value for x in weight.shape] # shape check utils.elemwise_shape_check(weight.shape, grad.shape) utils.elemwise_shape_check(weight.shape, accum.shape) # dtype check utils.ops_dtype_check([weight.dtype, grad.dtype, accum.dtype], utils.DtypeForDavinci.ALL_FLOAT) grad = akg.tvm.compute( shape, lambda *indice: grad(*indice) * akg.tvm.const(grad_scale, grad.dtype), name="grad") momt_accum = akg.tvm.compute(shape, lambda *indice: accum(*indice) * momt_mat[0], name="momt_accum") accum_inplace = akg.tvm.compute( shape, lambda *indice: momt_accum(*indice) + grad(*indice), name="accum_inplace") if not use_nesterov: sum_grad = akg.tvm.compute( shape, lambda *indice: accum_inplace(*indice) * lr_mat[0], name="nesterov_lr") weight_inplace = akg.tvm.compute( shape, lambda *indice: weight(*indice) - sum_grad(*indice), name="weight_inplace") else: weight_inplace = akg.tvm.compute( shape, lambda *indice: weight(*indice) - grad(*indice) * lr_mat[ 0] - accum_inplace(*indice) * momt_mat[0] * lr_mat[0], name="weight_inplace") weight_inplace, weight_binds_info = TensorUtils.inplace_set( weight, weight_inplace, "data_buf") accum_inplace, accum_binds_info = TensorUtils.inplace_set( accum, accum_inplace, "accum_buf") binds_info_all = weight_binds_info binds_info_all.update(accum_binds_info) attrs = {utils.BINDS: binds_info_all} fake_output = akg.tvm.compute(shape, lambda *indice: momt_accum(*indice), name="fake_output") # The variable fake_ouput is a invalid value, just to suit for framework of ME ! # The variable weight_inplace is the updated value of weight . # The variable accum_inplace is the updated value of accum . return fake_output, accum_inplace, weight_inplace, attrs
def scatter_add(ref, indices, updates): """ Add ref with updates based on sparse index: indices. Note: updates.shape need equal to indices.shape + ref.shape[1:]. Args: ref (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32 and uint8. indices (tvm.tensor.Tensor): Tensor of type int32. updates (tvm.tensor.Tensor): Tensor has the same type as ref. Returns: tvm.tensor.Tensor, has the same type and shape as ref. """ shape_ref = get_shape(ref) shape_indices = get_shape(indices) shape_updates = get_shape(updates) utils.check_shape(shape_ref) utils.check_shape(shape_indices) utils.check_shape(shape_updates) utils.ops_dtype_check( [ref.dtype, updates.dtype], [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) utils.ops_dtype_check(indices.dtype, utils.DtypeForDavinci.INT32) new_shape_indices = (reduce(lambda x, y: x * y, shape_indices), ) if len(shape_ref) > 1: new_shape_ref = (shape_ref[0], reduce(lambda x, y: x * y, shape_ref[1:])) new_indices = topi.reshape(indices, new_shape_indices) new_updates_shape = (tuple(new_indices.shape) + tuple(new_shape_ref[1:])) new_updates = topi.reshape(updates, new_updates_shape) new_ref = topi.reshape(ref, new_shape_ref) else: new_indices = topi.reshape(indices, new_shape_indices) new_updates_shape = (tuple(new_indices.shape) + tuple(shape_ref[1:])) new_updates = topi.reshape(updates, new_updates_shape) new_ref = ref # 1D case hybrid @script def scatter_add_1d(input, input_indices, input_updates): n, = input.shape idx_len = input_indices.shape[0] for i in range(n): for idx in range(idx_len): if i == input_indices[idx]: input[input_indices[idx]] += input_updates[idx] return input # ND case reshape to 2D's hybrid, now 2D -- 5D are OK @script def scatter_add(input, input_indices, input_updates): n, h = input.shape idx_len = input_indices.shape[0] for i in range(n): for idx in range(idx_len): if i == input_indices[idx]: for j in range(h): input[input_indices[idx], j] += input_updates[idx, j] return input if len(shape_ref) == 1: out = scatter_add_1d(new_ref, new_indices, new_updates) else: out = scatter_add(new_ref, new_indices, new_updates) out = topi.reshape(out, shape_ref) attrs["enable_feature_library"] = True out, binds_info = TensorUtils.inplace_set(ref, out) attrs[utils.BINDS] = binds_info return out, attrs