def custom_l2_loss(shape, dtype, kernel_name="cce_tf_l2_loss", need_build=False, need_print=False): """ Computes half the L2 norm of a tensor without the sqrt: output = sum(t ** 2) / 2 Parameters ---------- shape : shape of data dtype : source data type, only support float16, float32 kernel_name : cce kernel name, default value is "cce_reductionLayer" need_buid : if need to build CCEC kernel, default value is False need_print : if need to print the ir, default value is False Returns ------- None """ util.check_kernel_name(kernel_name) util.check_shape_rule(shape) util.check_shape_size(shape, SHAPE_SIZE_LIMIT) util.check_reduce_shape_rule(shape) check_list = ["float16", "float32"] if not dtype.lower() in check_list: raise RuntimeError("tf_l2_loss_cce only support %s while dtype is %s" % (",".join(check_list), dtype)) shape, axis = util.simplify_axis_shape(shape, range(len(shape))) inp_dtype = dtype.lower() data_input = tvm.placeholder(shape, name="data_input", dtype=inp_dtype) coeff_sqrt = tvm.const(1.0 / (2**(0.5)), dtype=inp_dtype) data_mul = te.lang.cce.vmuls(data_input, coeff_sqrt) data_sqr = te.lang.cce.vmul(data_mul, data_mul) res = te.lang.cce.sum(data_sqr, axis) with tvm.target.cce(): sch = generic.auto_schedule(res) config = { "print_ir": need_print, "need_build": need_build, "name": kernel_name, "tensor_list": [data_input, res] } te.lang.cce.cce_build_code(sch, config)
def reduce_sum_d(x, y, axis, keepdims=None, kernel_name="reduce_sum_d"): """reduce a tensor on a certain axis based on sum. Parameters: ---------- x: dict the dict of input tensor. y: dict the dict of output tensor. axis: int, list, tuple or NONETYPE the axis for reduce. keepdims: bool or NONETYPE if true, retains reduced dimensions with length 1. kernel_name: str cce kernel name, default value is "reduce_sum_d". Returns ------- None """ shape = x.get("shape") dtype = x.get("dtype") dtype_lower = dtype.lower() check_list = ("float16", "float32") check_shape(shape, param_name="x") check_dtype(dtype_lower, check_list, param_name="x") axis_d = [] shape_len = len(shape) if not axis: for i, _ in enumerate(shape): axis_d.append(i) else: axis_d = list(axis) axis_d = util.axis_check(shape_len, axis_d) # 5HD Special param for 5hd schedule is_5hdc = util.check_and_init_5hdc_reduce_support(x, axis) if not keepdims and not is_5hdc: shape, axis_d = util.shape_refine(list(shape), axis_d, keepdims) shape, axis_d = util.simplify_axis_shape(shape, axis_d) data_input = tvm.placeholder(shape, name="data_input_" + kernel_name, dtype=dtype_lower) res = reduce_sum_d_compute(data_input, y, axis_d, keepdims, is_5hdc=is_5hdc) if is_5hdc: res.ori_shape = x["ori_shape"] res.ori_format = x["ori_format"] with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [data_input, res]} te.lang.cce.cce_build_code(sch, config)
def reduce_prod_d(x, y, axes, keep_dims=None, kernel_name="reduce_prod_d"): """ Reduce a tensor on a certain axes based on product. Parameters: ---------- x : dict shape and dtype of input y: dict shape and dtype of output axes : int, list, tuple, NoneType The dimensions to reduce. If None (the default), reduces all dimensions. Must be in the range [-rank(input_tensor), rank(input_tensor)). keep_dims : bool, NoneType if true, retains reduced dimensions with length 1, default value is None. kernel_name : str cce kernel name, default value is reduce_prod_d Returns ------- None """ shape = x.get("shape") check_shape(shape, param_name="x") inp_dtype = x.get("dtype").lower() check_list = ["float16", "float32", "int8", "uint8"] check_dtype(inp_dtype, check_list, param_name="x") shape_len = len(shape) if not axes: axes = range(shape_len) if hasattr(axes, 'index'): axes = list(axes) axes = util.axis_check(shape_len, axes) util.check_reduce_shape_rule(shape) shape, axes = util.shape_refine(list(shape), axes) shape, axes = util.simplify_axis_shape(shape, axes) data_input = tvm.placeholder(shape, name="data_input", dtype=inp_dtype) with tvm.target.cce(): res = reduce_prod_d_compute(data_input, y, axes, keep_dims, kernel_name) sch = generic.auto_schedule(res) config = { "print_ir": False, "name": kernel_name, "tensor_list": [data_input, res] } te.lang.cce.cce_build_code(sch, config)
def l2_loss(x, y, kernel_name="l2_loss"): """ Reduce a tensor on a certain axis, and scale output with coeff Parameters ---------- shape : shape of data dtype : source data type, only support float16, float32 kernel_name : kernel name, default value is "l2_loss" Returns ------- None """ shape = x.get("shape") dtype = x.get("dtype") check_shape(shape, param_name="x") check_list = ["float16", "float32"] if not dtype.lower() in check_list: raise RuntimeError("l2_loss only support float16 float32") shape, axis = util.simplify_axis_shape(shape, range(len(shape))) inp_dtype = dtype.lower() data_input = tvm.placeholder(shape, name="data_input", dtype=inp_dtype) coeff_sqrt = tvm.const(1.0 / (2**(0.5)), dtype=inp_dtype) data_mul = te.lang.cce.vmuls(data_input, coeff_sqrt) data_sqr = te.lang.cce.vmul(data_mul, data_mul) res = te.lang.cce.sum(data_sqr, axis) with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [data_input, res]} te.lang.cce.cce_build_code(sch, config)
def log_softmax_v2(input_x, output_y, axis=-1, kernel_name="log_softmax_v2", impl_mode="high_performance"): """ algorithm: log_softmax calculating data's log_softmax, x - log(sum(exp(x))) Parameters ---------- input_x : dict shape and dtype of input, only support float16, float32 output_y: dict shape and dtype of output, should be same shape and type as input axis: int, list or tuple the data's axis, range is [-d, d-1] kernel_name : str cce kernel name, default value is log_softmax_v2 Returns ------- None """ check_list = ("float16", "float32") shape = input_x.get("shape") input_dtype = input_x.get("dtype").lower() shape_len = len(shape) shape_list = list(shape) if not isinstance(axis, int): axis = list(axis) check_shape(shape, param_name="input_x") check_dtype(input_dtype, check_list, param_name="input_x") axis = util.axis_check(shape_len, axis) if not isinstance(axis, int): for i in axis: if shape_list[i] == 1: raise RuntimeError("Cannot reduce on an axis with dimension 1") else: if shape_list[axis] == 1: raise RuntimeError("Cannot reduce on an axis with dimension 1") shape, axis = util.shape_refine(list(shape), axis) shape, axis = util.simplify_axis_shape(shape, axis) data_input = tvm.placeholder(shape, name="data_input", dtype=input_dtype) result = log_softmax_v2_compute(data_input, output_y, axis=axis, kernel_name=kernel_name, impl_mode=impl_mode) with tvm.target.cce(): sch = generic.auto_schedule(result) config = { "print_ir": False, "name": kernel_name, "tensor_list": [data_input, result] } te.lang.cce.cce_build_code(sch, config)
def reduce_all_d(input_data, output_data, axes, keepdims=None, kernel_name="reduce_all_d"): """ Reduce a tensor on a certain axes based on min Parameters: ---------- input_data: dict shape and dtype of input_data, only support int8 output_data: dict source data type, only support int8 axes : int, list ,tuple or None. the first axes to reduce, may be negative to index from the end (e.g., -1 for the last axes). axes may be int or list(e.g. [1,2]) keepdims : bool or None . if true, retains reduced dimensions with length 1, default value is None kernel_name : str cce kernel name, default value is "cce_all" Returns ------- None """ input_shape = input_data.get("shape") input_dtype = input_data.get("dtype").lower() if input_dtype == "bool": input_dtype = "int8" check_shape(input_shape, param_name="input_data") check_dtype(input_dtype, ("int8"), param_name="input_data") shape_len = len(input_shape) if not axes: axes = range(shape_len) if hasattr(axes, 'index'): axes = list(axes) axes = util.axis_check(shape_len, axes) if not isinstance(axes, int): for i in axes: if i >= len(input_shape): raise RuntimeError("axes should be less than dimension") else: if axes >= len(input_shape): raise RuntimeError("axes should be less than dimension") # 5HD Special param for 5hd schedule is_5hdc = util.check_and_init_5hdc_reduce_support(input_data, axes) if not is_5hdc: input_shape, axes = util.shape_refine(list(input_shape), axes) input_shape, axes = util.simplify_axis_shape(input_shape, axes) data_input = tvm.placeholder(input_shape, name="data_input_" + kernel_name, dtype=input_dtype) result = reduce_all_d_compute(data_input, output_data, axes, keepdims, kernel_name) if is_5hdc: result.ori_shape = input_data["ori_shape"] result.ori_format = input_data["ori_format"] with tvm.target.cce(): sch = generic.auto_schedule(result) config = { "print_ir": False, "name": kernel_name, "tensor_list": [data_input, result] } te.lang.cce.cce_build_code(sch, config)
def reduce_any_d(x, y, axes, keepdims=None, kernel_name="reduce_any_d"): """ Reduce a tensor on a certain axes based on max Parameters: ---------- x : shape and dtype of input_data, only support int8 y : shape and dtype of output_res, reserved parameter, not used now axes : the first axes to reduce, may be negative to index from the end (e.g., -1 for the last axes). aixs may be int or list(e.g. [1,2]) keepdims : if true, retains reduced dimensions with length 1, default value is None kernel_name : cce kernel name, default value is "reduce_any_d" Returns ------- None """ shape = x.get("shape") dtype = x.get("dtype") check_shape(shape, param_name="x") if dtype == "bool": dtype = "int8" check_list = ("int8", ) check_dtype(dtype, check_list, param_name="x") shape_len = len(shape) if not axes: axes = range(shape_len) if hasattr(axes, 'index'): axes = list(axes) axes = util.axis_check(shape_len, axes) is_5hdc = util.check_and_init_5hdc_reduce_support(x, axes) if not is_5hdc: shape, axes = util.shape_refine(list(shape), axes) shape, axes = util.simplify_axis_shape(shape, axes) inp_dtype = dtype.lower() data_input = tvm.placeholder(shape, name="data_input_" + kernel_name, dtype=inp_dtype) res = reduce_any_d_compute(data_input, y, axes, keepdims, kernel_name) if is_5hdc: res.ori_shape = x["ori_shape"] res.ori_format = x["ori_format"] with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [data_input, res]} te.lang.cce.cce_build_code(sch, config)
def reduce_min_d(input_min, output_min, axis, keep_dims=None, kernel_name="reduce_min_d"): """ Reduce a tensor on a certain axis based on min Parameters: ---------- input_min: dict dict of input, which contains shape and dtype output_min: dict dict of output, which contains shape and dtype axis: int or None The dimensions to reduce. If None (the default), reduces all dimensions. Must be in the range (-rank(input_tensor), rank(input_tensor)) keep_dims: True or False if true, retains reduced dimensions with length 1, default value is None kernel_name: str cce kernel name, default value is "reduce_min_d" Returns ------- None """ shape_input = input_min.get("shape") dtype_input = input_min.get("dtype") check_shape(shape_input, param_name="input_min") check_list = ("float16", "float32", "int8", "uint8") check_dtype(dtype_input.lower(), check_list, param_name="input_min") shape_len = len(shape_input) if not axis: axis = range(shape_len) if hasattr(axis, 'index'): axis = list(axis) axis = util.axis_check(shape_len, axis) is_5hdc = util.check_and_init_5hdc_reduce_support(input_min, axis) if not is_5hdc: shape_input, axis = util.shape_refine(list(shape_input), axis) shape_input, axis = util.simplify_axis_shape(shape_input, axis) data_input = tvm.placeholder(shape_input, name="data_input_" + kernel_name, dtype=dtype_input.lower()) shape_len = len(shape_input) if dtype_input.lower() in ("float32", "int32") and len(axis) == 1 \ and ((axis[0] == (shape_len - 1)) or (axis[0] == -1)): input_min["shape"] = tuple(shape_input) reduce_min_d_tik.reduce_min_d_tik(input_min, output_min, -1, kernel_name) else: res = reduce_min_d_compute(data_input, output_min, axis, keep_dims, kernel_name) if is_5hdc: res.ori_shape = input_min["ori_shape"] res.ori_format = input_min["ori_format"] with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [data_input, res]} te.lang.cce.cce_build_code(sch, config)
def reduce_max_d(x, y, axis, keepdims=False, kernel_name="reduce_max_d"): """ calculating data Parameters ---------- x : dict shape and dtype of input y : dict shape and dtype of output, should be same shape and type as input axis: list the first axis to reduce,may be negative to index from the end (e.g., -1 for the last axis). axis may be int or list(e.g. [1,2]) keepdims: bool if true, retains reduced dimensions with length 1, default value is None kernel_name : str kernel name, default value is "reduce_max_d" Returns ------- None """ shape = x.get("shape") dtype = x.get("dtype") input_dtype = dtype.lower() check_shape(shape, param_name="x") check_list = ["float16", "float32", "int8", "uint8", "int32"] check_dtype(input_dtype, check_list, param_name="x") shape_len = len(shape) if not axis: axis = range(shape_len) if hasattr(axis, 'index'): axis = list(axis) axis = util.axis_check(shape_len, axis) # Shape should not be modified in 5HD mode # 5HD Special param for 5hd schedule is_5hdc = util.check_and_init_5hdc_reduce_support(x, axis) if not is_5hdc: shape, axis = util.shape_refine(list(shape), axis) shape, axis = util.simplify_axis_shape(shape, axis) shape_len = len(shape) x["shape"] = shape if input_dtype in ("float32", "int32") and len(axis) == 1 \ and ((axis[0] == (shape_len - 1)) or (axis[0] == -1)): reduce_max_d_tik(x, y, axis[0], kernel_name) else: data_input = tvm.placeholder(shape, name="data_input_" + kernel_name, dtype=input_dtype) res = reduce_max_d_compute(data_input, y, axis, keepdims, kernel_name) if is_5hdc: res.ori_shape = x["ori_shape"] res.ori_format = x["ori_format"] with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [data_input, res]} te.lang.cce.cce_build_code(sch, config)
def reduce_mean_d(input_x, output_y, axes, keepdims=None, kernel_name="reduce_mean_d", impl_mode="high_performance"): """ Reduce a tensor on a certa in axes based on mean. Parameters: ---------- input_x : dict shape and dtype of input output_y: dict shape and dtype of output axes : int, list, tuple, NoneType The dimensions to reduce. If None (the default), reduces all dimensions. Must be in the range [-rank(input_tensor), rank(input_tensor)). keepdims : bool, NoneType if true, retains reduced dimensions with length 1, default value is None. kernel_name : str cce kernel name, default value is reduce_mean_d Returns ------- None """ global ori_shape global ori_format shape = input_x.get("shape") check_shape(shape, param_name="input_x") check_list = ["float16", "float32"] shape_len = len(shape) if not axes: axes = range(shape_len) if hasattr(axes, 'index'): axes = list(axes) inp_dtype = input_x.get("dtype").lower() check_dtype(inp_dtype, check_list, param_name="input_x") axes = util.axis_check(shape_len, axes) # Shape should not be modified in 5HD mode # 5HD Special param for 5hd schedule is_5hdc = util.check_and_init_5hdc_reduce_support(input_x, axes) if not is_5hdc: shape, axes = util.shape_refine(list(shape), axes) shape, axes = util.simplify_axis_shape(shape, axes) ori_shape = [input_x["ori_shape"], input_x["shape"]] ori_format = [input_x["ori_format"], input_x["format"]] data_input = tvm.placeholder(shape, name="data_input", dtype=inp_dtype) res = reduce_mean_d_compute(data_input, output_y, axes, keepdims, impl_mode=impl_mode, is_5hdc=is_5hdc) if is_5hdc: res.ori_shape = input_x["ori_shape"] res.ori_format = input_x["ori_format"] with tvm.target.cce(): sch = generic.auto_schedule(res) config = { "print_ir": False, "name": kernel_name, "tensor_list": [data_input, res] } te.lang.cce.cce_build_code(sch, config)