def reduce_any_d(x, axis=None, keepdims=False): """ Reduce a tensor on a certain axis based on max. Args: x (tvm.tensor.Tensor): The input tensor to reduce. Should be of type int8. axis (Union[list, tuple, int, None]): The dimensions to reduce. If None, all dimensions will be reduced. each dim must be in the range [-len(data.shape), len(data.shape) - 1]. keepdims (Union[bool, None]): If True, retains reduced dimensions with length 1, defaults to False. Returns: tvm.tensor.Tensor of same type as input tensor x. """ # check type vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.INT8) vc_util.check_shape(x.shape) # check axis vc_util.reduce_axis_check(x.shape, axis) refined_axis = refine_reduce_axis(x, axis) if len(set(refined_axis)) == len(x.shape) and not keepdims: keepdims = True res = _reduce_any_d_compute(x, refined_axis, keepdims) if len(set(refined_axis)) == len(x.shape): res = topi.reshape(res, (1, )) return res
def softmax(data, axis): """ Map all element of data to (0,1) and sum to 1. Args: data (tvm.tensor.Tensor): input. axis (int): along which normalization is applied. Return: tvm.tensor.Tensor, output. """ vc_util.check_shape(data.shape) shape = data.shape vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.reduce_axis_check(shape, axis) axis = ft_util.refine_reduce_axis(data, axis) if isinstance(axis, (list, tuple)): if len(axis) != 1: raise RuntimeError( "Reduce axis for softmax op must be 1-dimension, while current is %d-dimension" % (len(axis))) axis = axis[0] output = softmax_op(data, axis, shape) attr_map = {} if ds.shape_is_dynamic(data): # For shifted loops, should have: # dynamic_shape_bound mod tile_size_prime == 2 # This aims to ensure that the shift constant is a multiple of tile_size_prime. # So the generated IR will not have complicated head and tail for shifted blocks. attr_map = { "pragma_modshift": 1, "pragma_outerband_need_split": 1, "enable_post_poly_loop_partition": False, "dynamic_shape": ds.set_dynamic_shape_limit_for_tensor(output, 2048, axis) + ds.set_poly_upper_bound_for_tensor(output, 2048, axis), "custom_tiling": ct.create_constraint_on_tensor( tensor=output, values=[1 for i, _ in enumerate(shape) if i != axis], constraints=ct.TileConstraint.FACTOR, tensor_pos=[i for i, _ in enumerate(shape) if i != axis]) } return output, attr_map
def reduction_layer(data, axis, op, coeff): """ Reduce data on axis and scale by coeff. Args: data (tvm.tensor.Tensor): tensor with type float16 or float32, int8, uint8. axis (int): the beginning axis to reduce, -1 means the last axis. if 0, reduction to scalar. op (str): one of "SUM", "ASUM"(abs and sum), "SUMSQ"(sqr and sum), "MEAN". coeff ([int, float]): scale Returns: tvm.tensor.Tensor. """ dtype = data.dtype vc_util.ops_dtype_check(data.dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.UINT8]) vc_util.check_shape(data.shape) if op not in ["SUM", "ASUM", "SUMSQ", "MEAN"]: raise RuntimeError("op can only be one of SUM, ASUM, SUMSQ, MEAN") shape = get_shape(data) vc_util.reduce_axis_check(shape, axis) axis = _get_axis_list(axis, shape) if dtype in ["int8", "uint8"]: data = topi.cast(data, "float16") data = topi.cast(data, "float32") cof = tvm.const(coeff, "float32") if op == "ASUM": tmp = _asum(data, axis, cof) elif op == "SUMSQ": tmp =_sumsq(data, axis, cof) elif op == "MEAN": tmp = _mean(data, axis, cof, shape) elif op == "SUM": tmp = _sum(data, axis, cof) if dtype in ["int8", "uint8"]: tmp = topi.cast(tmp, "float16") res = topi.cast(tmp, dtype) return res
def mean(data, axis=None, keepdims=False): """ Computes the mean of the values of a Tensor over the whole dataset. Note: If the tuple's elements are unsorted, this function will call preprocess_axis firstly to let these elements sorted. if tuple is empty, this function will compute all elements' sum. if the data type is folat 16 and the whole dim not less than 65536, this function will compute the mean by divide 65535 first to avoid whole dim too large. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32. axis (Union[list, tuple, int, None]): If the tuple is empty, the axis equal to None. keepdims (bool): If keepdims equal to True, the result shape length is same to input shape length. Returns: tvm.tensor.Tensor, has the same type as data. If keepdims equal to True, all reduced dimensions are retained with length 1. else these reduced axis will be eliminate. """ # Check types vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) # Check shape shape = ft_util.get_shape(data) vc_util.reduce_axis_check(shape, axis) axis = ft_util.refine_reduce_axis(data, axis) count = 1 for i in axis: count *= shape[i] output, _ = sum.sum_value(data, axis, keepdims) if shape_is_dynamic(data): res = akg.tvm.compute(output.shape, lambda *i: akg.lang.cce.divide_var(output(*i), count), name="res") else: res = akg.topi.divide(output, count) attrs = get_attrs(data) if shape_is_dynamic(data): attrs["custom_tiling"] = mean_dynamic_tiling_strategy(data, axis) return res, attrs
def mean_v2(data, axis=None, keepdims=False): """Simple implementation of mean.""" # Check types vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) # Check shape shape = [x.value for x in data.shape] vc_util.reduce_axis_check(shape, axis) axis = ft_util.refine_reduce_axis(data, axis) dtype = data.dtype count = 1 for i in axis: count *= shape[i] count_rec = 1 / count output, _ = sum.sum_v2(data, axis, keepdims) res = output * akg.tvm.const(count_rec, dtype) attrs = get_attrs(data) if shape_is_dynamic(data): attrs["custom_tiling"] = mean_dynamic_tiling_strategy(data, axis) return res, attrs
def mean(data, axis=None, keepdims=False): """ Computes the mean of the values of a Tensor over the whole dataset. Args: data (tvm.tensor.Tensor): Tensor. axis (Union[list, tuple, int, None]): If the tuple is empty, the axis equal to None. keepdims (bool): If keepdims equal to True, the result shape length is same to input shape length. Returns: tvm.tensor.Tensor, has the same type as data. If keepdims equal to True, all reduced dimensions are retained with length 1. else these reduced axis will be eliminate. """ shape = [x.value for x in data.shape] vc_util.reduce_axis_check(shape, axis) axis = ft_util.refine_reduce_axis(data, axis) count = 1 for i in axis: count *= shape[i] output, _ = sum.sum_value(data, axis, keepdims) res = akg.topi.divide(output, count) return res
def common(data, axis, method="min"): """ Returns the index with the max or min value across axes of a tensor. Note: method can be "max" or "min" to get argmax or argmin. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32. axis (int): Describe the axis of input tensor. method (str): Can be "max" or "min". Returns: tvm.tensor.Tensor, has type of int32. """ shape = get_shape(data) dtype = data.dtype utils.ops_dtype_check( data.dtype, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.ALL_INT]) utils.reduce_axis_check(shape, axis) real_axis = refine_reduce_axis(shape, axis)[0] out_shape = get_reduce_out_shape(shape, axis=axis) attr_map = {} if shape_is_dynamic(data): attr_map["dynamic_shape"] = set_dynamic_shape_limit_for_tensor( data, 4096, real_axis) if dtype != "float16": data = akg.topi.cast(data, "float16") k = akg.tvm.reduce_axis((0, data.shape[real_axis]), "k") if axis in (len(shape) - 1, -1): if method == "min": reducer = akg.tvm.comm_reducer(lambda x, y: dav.fargmin(x, y), lambda t: akg.tvm.max_value(t)) elif method == "max": reducer = akg.tvm.comm_reducer(lambda x, y: dav.fargmax(x, y), lambda t: akg.tvm.min_value(t)) else: raise ValueError("not support {}".format(method)) if len(data.shape) == 1: res = akg.tvm.compute((1, ), lambda i: reducer(data[k], axis=k)) else: res = akg.tvm.compute( out_shape, lambda *indice: reducer(data(*indice, k), axis=k)) res = akg.tvm.compute(out_shape, lambda *indice: res(*indice).astype("int32"), "argred_output") elif axis in (0, -len(shape)): tmp_idx = akg.tvm.compute( shape[1:], lambda *indice: akg.tvm.const(0.0, "float16"), name='tmp_index') local_data = akg.tvm.compute(shape[1:], lambda *indice: data(0, *indice), name="tmp_data") for idx in range(shape[axis] - 1): if method == 'min': tmp_idx = akg.tvm.compute( shape[1:], lambda *indice, ite_idx=idx: akg.tvm.expr.Select( local_data(*indice) > data(ite_idx + 1, *indice), akg.tvm.const(ite_idx + 1, "float16"), tmp_idx(*indice) )) local_data = akg.tvm.compute( shape[1:], lambda *indice, ite_idx=idx: akg.tvm.expr.Select( local_data(*indice) > data(ite_idx + 1, *indice), data(ite_idx + 1, *indice), local_data(*indice))) elif method == "max": tmp_idx = akg.tvm.compute( shape[1:], lambda *indice, ite_idx=idx: akg.tvm.expr.Select( local_data(*indice) < data(ite_idx + 1, *indice), akg.tvm.const(ite_idx + 1, "float16"), tmp_idx(*indice) )) local_data = akg.tvm.compute( shape[1:], lambda *indice, ite_idx=idx: akg.tvm.expr.Select( local_data(*indice) < data(ite_idx + 1, *indice), data(ite_idx + 1, *indice), local_data(*indice))) else: raise ValueError("not support " + method) res = akg.tvm.compute(out_shape, lambda *indice: tmp_idx(*indice).astype("int32"), "cast1") else: raise ValueError( "Argmax only support first axis and is last axis now!") lager = out_shape if len(out_shape) > len(shape) else shape strategy = argminmax_tiling_strategy(lager, real_axis) if strategy: attr_map["custom_tiling"] = strategy return res, attr_map