def import_get_func(self, func, mode): """ from test_run.tile_run import tile_compile :param func: function name :param mode: case mode :return: """ run_mod = "tests.common.test_run" func_py = None try: backend_mod = run_mod + "." + utils.get_backend( self.target).lower() func_mod = backend_mod + "." + func func_py = __import__(func_mod, fromlist=func) except ModuleNotFoundError: func_mod = run_mod + "." + func func_py = __import__(func_mod, fromlist=func) if func_py is None: raise ImportError("import %s fail, please check!" % func_mod) try: run_func = getattr(func_py, func) except (ImportError, AttributeError): new_func = func.split("_run")[0] + "_" + mode run_func = getattr(func_py, new_func) return run_func
def sum_by_shape(broadcast_data, original_shape, target=utils.CCE): """ sum the broadcast_data by original shape; gradient for Broadcast. Supported Platforms: 'Ascend' """ if target != utils.CCE: raise RuntimeError('operator not supported on %s' % utils.get_backend(target)) broadcast_shape = get_shape(broadcast_data) original_shape = get_shape(original_shape) if broadcast_shape == original_shape: return broadcast_data if original_shape == [1]: data = sum(broadcast_data, target=target) return data utils.broadcast_check(original_shape, broadcast_shape) axis_len = len(broadcast_shape) - len(original_shape) if axis_len > 0: axis = list(range(axis_len)) broadcast_data = sum(broadcast_data, axis, False, target=target) broadcast_shape = get_shape(broadcast_data) axis = [] for i, _ in enumerate(original_shape): if original_shape[i] != broadcast_shape[i]: axis.append(i) res = sum(broadcast_data, axis, True, target=target)[0] if axis else broadcast_data return res
def sum_v2(inputs, axis=None, keepdims=True, target=utils.CCE): """ another implementation of sum with topi api. Supported Platforms: 'Ascend' """ if target != utils.CCE: raise RuntimeError('operator not supported on %s' % utils.get_backend(target)) dtype = inputs.dtype utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT) axis = ft_util.refine_reduce_axis(inputs, axis) utils.check_shape(inputs.shape) if not axis: output = akg.topi.identity(inputs) else: if dtype == "float16": step_sum = Cast(inputs, "float32", target) else: step_sum = inputs step_sum = akg.topi.sum(step_sum, axis=axis, keepdims=keepdims) if dtype == "float16": output = Cast(step_sum, "float16", target) else: output = step_sum return output
def prod_force_se_a(net_deriv_tensor, in_deriv_tensor, nlist_tensor, natoms=192, target=utils.CCE): """ Supported Platforms: 'Ascend' """ if target != utils.CCE: raise RuntimeError('operator not supported on %s' % utils.get_backend(target)) net_deriv_tensor_shape = net_deriv_tensor.shape nlist_tensor_shape = nlist_tensor.shape natoms = net_deriv_tensor_shape[1] nframes = net_deriv_tensor_shape[0] ndescript = net_deriv_tensor_shape[2] nnei = nlist_tensor_shape[2] output_shape = [nframes, natoms, 3] @script def prod_force_se_a_compute(net_deriv_tensor, in_deriv_tensor, nlist_tensor): force = output_tensor(output_shape, dtype=net_deriv_tensor.dtype) for kk in range(nframes): for ii in range(natoms): for cc in range(3): force[kk, ii, cc] = 0.0 for ii in range(natoms): for aa in range(ndescript): for cc in range(3): force[kk, ii, cc] -= net_deriv_tensor[ kk, ii, aa] * in_deriv_tensor[kk, ii, aa, cc] for jj in range(nnei): j_idx = nlist_tensor[kk, ii, jj] if j_idx > -1: for aa in range(jj * 4, jj * 4 + 4): for cc in range(3): force[kk, j_idx, cc] += net_deriv_tensor[ kk, ii, aa] * in_deriv_tensor[kk, ii, aa, cc] return force output = prod_force_se_a_compute(net_deriv_tensor, in_deriv_tensor, nlist_tensor) attrs = { 'enable_post_poly_loop_partition': False, 'enable_double_buffer': False, 'enable_feature_library': True, 'RewriteVarTensorIdx': True } return output, attrs
def floor(data, target=utils.CCE): """ Returns element-wise largest integer not greater than x. Args: data (akg.tvm.tensor.Tensor): Tensor of type float16, and float32 Returns: akg.tvm.tensor.Tensor, has the same shape as data and type of int32. Supported Platforms: 'Ascend' """ if target != utils.CCE: raise RuntimeError('operator not supported on %s' % utils.get_backend(target)) utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data.shape] utils.check_shape(shape) if product_is_mini() and data.dtype == "float32": # solve the problem of 87==floor(86.9996) when high_precision is needed. # problem is caused by such as fp16(86.9996)==87. # detect problem by fp32(86.9996) - fp32(floor(fp16(86.9996))) < 0 # floor could only apply on float16 data_fp16 = akg.lang.ascend.cast_to(data, "float16") floor_data = akg.lang.ascend.floor(data_fp16) floor_fp16 = akg.lang.ascend.cast_to(floor_data, "float16") floor_fp32 = akg.lang.ascend.cast(floor_fp16, "float32") # if diff=1e-7, we cannot get right sign of fp16(diff) # but we can get right sign of 10000*diff = 1e-3, which has the same # sign as diff diff = (data - floor_fp32) * 10000 diff_fp16 = akg.lang.ascend.cast_to(diff, "float16") # if diff < 0 and floor == ceil, then it's 87 = floor(86.99999) res = akg.tvm.compute( shape, lambda *i: akg.tvm.expr.Select( diff_fp16(*i) < akg.tvm.const(0, "float16"), floor_fp16(*i) - akg.tvm.const(1, "float16"), floor_fp16(*i)), name="res") res = akg.lang.ascend.cast_to(res, "int32") else: res = akg.lang.ascend.floor(data) return res
def prod_force_se_a_grad(grad_tensor, in_deriv_tensor, nlist_tensor, natoms=192, target=utils.CCE): """ Supported Platforms: 'Ascend' """ if target != utils.CCE: raise RuntimeError('operator not supported on %s' % utils.get_backend(target)) net_deriv_tensor_shape = grad_tensor.shape natoms = net_deriv_tensor_shape[1] nframes = net_deriv_tensor_shape[0] nnei = nlist_tensor.shape[2] ndescript = nnei * 4 output_shape = [nframes, natoms, ndescript] @script def prod_force_se_a_grad_compute(grad_tensor, in_deriv_tensor, nlist_tensor): grad_net = output_tensor(output_shape, dtype=grad_tensor.dtype) for kk in range(nframes): for ii in range(natoms): for jj in range(nnei): for aa in range(jj*4, jj*4+4): grad_net[kk, ii, aa] = 0.0 for cc in range(3): grad_net[kk, ii, aa] -= grad_tensor[kk, ii, cc] * in_deriv_tensor[kk, ii, aa, cc] j_idx = nlist_tensor[kk, ii, jj] if j_idx > -1: grad_net[kk, ii, aa] += grad_tensor[kk, j_idx, cc] * in_deriv_tensor[kk, ii, aa, cc] return grad_net output = prod_force_se_a_grad_compute(grad_tensor, in_deriv_tensor, nlist_tensor) attrs = {'enable_post_poly_loop_partition': False, 'enable_double_buffer': False, 'enable_cover_protect_optimize': False, 'enable_feature_library': True, 'RewriteVarTensorIdx': False} if nframes.value > 1: attrs['dim'] = "0 0 1 1 0 1 192 1 0 2 1 1 0 3 1 1" else: attrs['dim'] = "0 0 192 1 0 1 1 1 0 2 1 1 0 3 1 1" return output, attrs