def reduce_max_d(x, y, axes=None, keepdims=None, kernel_name="reduce_max_d"): """ reduce a tensor on a certain axes based on max. Parameters ---------- x : dict shape and dtype of input y : dict shape and dtype of output, should be same shape and type as input axes: list the first axes to reduce,may be negative to index from the end (e.g., -1 for the last axes). axes may be int or list(e.g. [1,2]) keepdims: bool if true, retains reduced dimensions with length 1, default value is None kernel_name : str kernel name, default value is "reduce_max_d" Returns ------- None """ dtype = x["dtype"] dtype_lower = dtype.lower() check_list = ("float16", "float32", "int8", "uint8", "int32") check_dtype(dtype_lower, check_list) with te.op.compute(): shape = x["shape"] shape_range = x["range"] shape_len = len(shape) if not axes: axes = range(shape_len) if hasattr(axes, 'index'): axes = list(axes) axes = cce_util.axis_check(shape_len, axes) shape_new, shape_range_new, axes_new, fused_rel_dic = \ fused_reduce_axis(shape, shape_range, axes) add_compile_info("fused_rel_dic", fused_rel_dic) x["shape"] = shape_new x["range"] = shape_range_new shape_var_new = variable_shape([x])[0] data_input = tvm.placeholder(shape_var_new, name="data_input", dtype=dtype_lower) res = reduce_max_d_compute(data_input, y, axes_new, keepdims) with tvm.target.cce(): sch = generic.auto_schedule(res) # build config = {"name": kernel_name, "tensor_list": [data_input, res]} te.lang.dynamic.build(sch, config)
def floor_div(input_x, input_y, output_z, kernel_name="floor_div"): """ algorithm: floordiv calculating data's floordiv, res =floor(x / y) Parameters ---------- input_x: dict input_y: dict output_z: dict kernel_name: str, default value is "floor_div" Returns ------- None """ # check dtype of input_x/input_y input_dtype_x = input_x.get("dtype").lower() input_dtype_y = input_y.get("dtype").lower() check_list = ('int8', 'uint8', 'int32', 'float16', 'float32') check_dtype(input_dtype_x, check_list, param_name="input_x") check_dtype(input_dtype_y, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if input_dtype_x != input_dtype_y: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'floor_div' error_info['param_name1'] = 'input_dtype_x' error_info['param_name2'] = 'input_dtype_y' error_info['param1_dtype'] = str(input_dtype_x) error_info['param2_dtype'] = str(input_dtype_y) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % ( error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([input_x, input_y], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) tensor_x = tvm.placeholder(x_shape, input_dtype_x, "tensor_x") tensor_y = tvm.placeholder(y_shape, input_dtype_y, "tensor_y") res = floor_div_compute(tensor_x, tensor_y, output_z, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def reduce_sum_d(x, y, axis=None, keepdims=None, kernel_name="reduce_sum_d"): """reduce a tensor on a certain axis based on sum. Parameters: ---------- x: dict the dict of input tensor. y: dict the dict of output tensor. axis: int, list, tuple or NONETYPE the axis for reduce. keepdims: bool or NONETYPE if true, retains reduced dimensions with length 1. kernel_name: str cce kernel name, default value is "reduce_sum_d". Returns ------- None """ dtype = x["dtype"] dtype_lower = dtype.lower() check_list = ("float16", "float32") check_dtype(dtype_lower, check_list, param_name="x") with te.op.compute(): shape = x["shape"] shape_range = x["range"] axes = [] shape_len = len(shape) if not axis: for i, _ in enumerate(shape): axes.append(i) else: axes = list(axis) axes = cce_util.axis_check(shape_len, axes) shape_new, shape_range_new, axes_new, fused_rel_dic = \ fused_reduce_axis(shape, shape_range, axes) add_compile_info("fused_rel_dic", fused_rel_dic) x["shape"] = shape_new x["range"] = shape_range_new shape_var_new = variable_shape([x])[0] data_input = tvm.placeholder(shape_var_new, name="data_input", dtype=dtype_lower) res = reduce_sum_d_compute(data_input, y, axes_new, keepdims) with tvm.target.cce(): sch = generic.auto_schedule(res) # build config = {"name": kernel_name, "tensor_list": [data_input, res]} te.lang.dynamic.build(sch, config)
def bn_training_reduce(x, sum, square_sum, kernel_name="bn_training_reduce"): """ algorithm: part of fused_batch_norm_v2 The first step of batch_norm which to calculate the sum and square sum of x. The major component of this operator is reduce operation. Parameters ---------- x: dict dict of input, A 5HD Tensor for input data. sum: dict dict of sum, A `Tensor`. Sum of x. square_sum: dict dict of square_sum, A `Tensor`. Square sum of x. kernel_name: str kernel name, default value is "bn_training_reduce" Returns ------- None """ data_format = x.get("format").upper() origin_format = x.get("ori_format").upper() dtype = x.get("dtype").lower() # check and format check_list = ("NC1HWC0", "NCHW") check_format(data_format, check_list, param_name="x") if data_format == "NCHW" and origin_format not in ("NCHW", ): raise RuntimeError("The origin format only supports " "NCHW when format is NCHW") # check dtype check_list = ("float16", "float32") check_dtype(dtype, check_list, param_name="x") # get dynamic shape, x.get("shape"), x.get("range") shape_x = variable_shape([x])[0] # compute with te.op.compute(): data_input = tvm.placeholder(shape_x, name="data_input", dtype=dtype) res = bn_training_reduce_compute(data_input, sum, square_sum, kernel_name=kernel_name) # schedule with tvm.target.cce(): sch = generic.auto_schedule(res) # build tensor_list = [data_input] + list(res) config = {"name": kernel_name, "tensor_list": tensor_list} te.lang.dynamic.build(sch, config)
def exp(input_x, output_y, base=-1.0, scale=1.0, shift=0.0, kernel_name="exp"): """ algorithm: exp calculating data's exp if base == -1: y = exp(shift + scale * x) if base > 0: y = exp((shift+scale*x)*ln(base)) Parameters ---------- input_x : dict,shape and dtype of input, only support float16,float32 output_y: dict,shape and dtype of output, should be same shape and type as input base: (optional, default -1 for a value of e the base gamma scale: (optional, default 1) the scale alpha shift: (optional, default 0) the shift beta kernel_name : str, kernel name, default value is "exp" Returns ------- None """ dtype = input_x.get("dtype") # input_x' dtype check, only supports fp16 and fp32 check_list = ("float16", "float32") input_dtype = dtype.lower() check_dtype(input_dtype, check_list, param_name="input_x") if base <= 0 and (not isclose(base, -1.0)): expect_value = "strictly positive or -1" real_value = "base < 0 or base notequal with -1" error_manager_vector.raise_err_input_value_invalid( kernel_name, "base", expecte_value, real_value) ins = classify([input_x], Mode.ELEWISE) schedules, tensors = [], [] for (input_x,) in ins: with te.op.compute(): shape_x = variable_shape([input_x]) fuseshape = [1] fuseshape[0] = reduceIns(lambda x, y: x * y, shape_x[0]) data_input = tvm.placeholder(fuseshape, name="data_input", dtype=input_dtype) res = exp_compute(data_input, output_y, base, scale, shift, kernel_name) tensors.append([data_input, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def leaky_relu_grad(g, x, y, negative_slope=0, kernel_name="leaky_relu_grad"): """ calculate the backpropagation of leaky_relu operation y = gradients(x>0) or negative_slope*gradients(x<=0). support dtype:float16,float32 Parameters ---------- g : dict the backpropagated gradients to the corresponding leaky_relu operation x : dict the x passed as output of leaky_relu operation y : dict the output of leaky_relu back propagation negative_slope : float or int allow non-zero slope for negative inputs to speed up optimization kernel_name : str kernel name, default value is "leaky_relu_grad" Returns ------- None """ g_dtype = g.get("dtype").lower() x_dtype = x.get("dtype").lower() check_list = ("float16", "float32") check_dtype(g_dtype, check_list, param_name="input_g") check_dtype(x_dtype, check_list, param_name="input_x") check_elewise_shape_range([g, x], support_broadcast=True) if g_dtype != x_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "g", "x", g_dtype, x_dtype) ins = classify([g, x], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (g, x) in ins: with te.op.compute(): g_shape, x_shape = variable_shape([g, x], support_broadcast=True) g_shape, x_shape = refine_shapes_for_broadcast(g_shape, x_shape) tensor_g = tvm.placeholder(g_shape, g_dtype, "tensor_g") tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") res = leaky_relu_grad_compute(tensor_g, tensor_x, y, negative_slope, kernel_name) tensors.append((tensor_g, tensor_x, res)) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def relu(x, y, kernel_name="relu"): """ Algrithm: relu(x) = max(x, 0) Parameters ---------- Algorithm: relu Parameters: x: dynamic input, include shape, dtype and range y: the dict of output kernel_name: kernel name, must be string, default value is "relu". Returns ------- None """ # check input tensor data_type dtype_x = x.get("dtype").lower() check_list = ("float16", "float32", "int8", "int32") check_dtype(dtype_x, check_list, param_name="x") ins = classify([x], Mode.ELEWISE) schedules, tensors = [], [] for (x, ) in ins: with te.op.compute(): shape_x = variable_shape([x]) fuse_shape = [1] fuse_shape[0] = reduceIns(lambda x, y: x * y, shape_x[0]) input_data = tvm.placeholder(fuse_shape, name="input_data", dtype=dtype_x) res = relu_compute(input_data, y, kernel_name) tensors.append([input_data, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def real_div(x1, x2, y, kernel_name="real_div"): """ algorithm: real_div calculating data's real_div, c = a / b Parameters ---------- x1 : dict shape and dtype of first input, only support float16, float32, int32 x2 : dict shape and dtype of second input, only support float16, float32, int32 y: dict shape and dtype of output, should be broadcast shape and type as input kernel_name : str cce kernel name, default value is real_div Returns ------- None """ x_dtype = x1.get("dtype").lower() y_dtype = x2.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([x1, x2], support_broadcast=True) if x_dtype != y_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "x1", "x2", x_dtype, y_dtype) ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y") res = real_div_compute(tensor_x, tensor_y, y, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def sigmoid_grad(x, dx, out, kernel_name="sigmoid_grad"): """ do sigmoid grad sigmoid_grad = (sigmoid - sigmoid*sigmoid)*grad Parameters: ---------- x : dictionary shape of sigmoid input dx : dictionary shape of grad out: dictionary output kernel_name : cce kernel name, default value is "sigmoid_grad_cce" Returns ------- None """ x_dtype = x.get("dtype").lower() dx_dtype = dx.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(dx_dtype, check_list, param_name="input_dx") check_elewise_shape_range([x, dx], support_broadcast=False) if x_dtype != dx_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "x", "dx", x_dtype, dx_dtype) ins = classify([x, dx], Mode.ELEWISE) schedules, tensors = [], [] for (sig, dx) in ins: with te.op.compute(): shape_sig, shape_dx = variable_shape([sig, dx], support_broadcast=False) shape_sig, shape_dx = refine_shapes_for_broadcast( shape_sig, shape_dx) tensor_sig = tvm.placeholder(shape_sig, x_dtype, "tensor_x") tensor_dx = tvm.placeholder(shape_dx, dx_dtype, "tensor_dx") res = sigmoid_grad_compute(tensor_sig, tensor_dx, out, kernel_name) tensors.append([tensor_sig, tensor_dx, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def sqrt(input_x, output_y, kernel_name="sqrt"): """ algorithm: sqrt calculating data sqrt,y= x**0.5, mini not support vsqrt, use exp(0.5*log(x)) Parameters ---------- input_x : dict shape and dtype of input, only support float16, float32 output_y: dict shape and dtype of output, should be same shape and type as input kernel_name : str cce kernel name, default value is sqrt Returns ------- None """ # check dtype x_dtype = input_x.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="input_x") ins = classify([input_x], Mode.ELEWISE) schedules, tensors = [], [] for (input_x, ) in ins: with te.op.compute(): # shape x_shape = variable_shape([input_x]) fuseshape = [1] fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0]) # div_compute input_data = tvm.placeholder(fuseshape, name="input_data", dtype=x_dtype) res = sqrt_compute(input_data, output_y, kernel_name) tensors.append([input_data, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def square(input_x, output, kernel_name="square"): """ algorithm: square calculating data's square,y= x*x Parameters ---------- input_x : dict shape and dtype of input, only support float16, float32, int32 output_y: dict shape and dtype of output, should be same shape and type as input kernel_name : str kernel name, default value is "square" Returns ------- None """ # check dtype x_dtype = input_x.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(x_dtype, check_list, param_name="input_x") ins = classify([input_x], Mode.ELEWISE) schedules, tensors = [], [] for (input_x, ) in ins: with te.op.compute(): # shape x_shape = variable_shape([input_x]) fuseshape = [1] fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0]) # square_compute data_x = tvm.placeholder(fuseshape, x_dtype, name="data_x") res = square_compute(data_x, output, kernel_name) tensors.append((data_x, res)) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def sqrt_grad(x, dx, out, kernel_name="sqrt_grad"): """ algorithm: sqrt_grad_cce Parameters ---------- x : dict of data: dict dx : dict of data_grad: dict out : dict of output: dict kernel_name : cce kernel name, default value is "sqrt_grad": str Returns ------- None """ x_dtype = x.get("dtype").lower() dx_dtype = dx.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="x") check_dtype(dx_dtype, check_list, param_name="dx") check_elewise_shape_range([x, dx], support_broadcast=False) if x_dtype != dx_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "x", "dx", x_dtype, dx_dtype) ins = classify([x, dx], Mode.ELEWISE) schedules, tensors = [], [] for (x, dx) in ins: with te.op.compute(): x_shape, dx_shape = variable_shape([x, dx], support_broadcast=False) x_shape, dx_shape = refine_shapes_for_broadcast(x_shape, dx_shape) tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_dx = tvm.placeholder(dx_shape, dx_dtype, "tensor_dx") res = sqrt_grad_compute(tensor_x, tensor_dx, out, kernel_name) tensors.append([tensor_x, tensor_dx, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def fill(dims, value, y, kernel_name="fill"): """ do fill operation Parameters: ---------- dims : the dict of input value : the dict of input y: the dict of output kernel_name : cce kernel name, default value is "fill" Returns ------- None """ # get the shape and dtype shape = value.get("shape") dtype = value.get("dtype").lower() dtype_dims = dims.get("dtype").lower() dims["shape"] = [-1] dims['range'] = [[1, None]] # check whether dtypes are right check_list = ("int32", "float16", "float32") check_dtype(dtype, check_list) schedules, tensors = [], [] with te.op.compute(): shape_dim = variable_shape([dims]) x_input = tvm.placeholder(shape, name="x_input", dtype=dtype) dim_input = tvm.placeholder(shape_dim[0], name="dim_input", dtype=dtype_dims) res = fill_compute(shape_dim[0], x_input, y, kernel_name=kernel_name) tensors.append([dim_input, x_input, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config) te.op.add_compile_info("_use_special_pattern", False)
def log1p(input_x, output_y, kernel_name="log1p"): """ algorithm: log1p calculating data's log1p, y = log(x + 1) Parameters ---------- input_x: dict shape and dtype of input, only support float16, float32 output_y: dict shape and dtype of output, should be same shape and type as input kernel_name: str kernel name, default value is "log1p" Returns ------- None """ dtype = input_x.get("dtype") check_list = ("float16", "float32") input_dtype = dtype.lower() check_dtype(input_dtype, check_list, param_name="input_x") schedules, tensors = [], [] ins = classify([input_x], Mode.ELEWISE) for (input_x, ) in ins: with te.op.compute(): x_shape = variable_shape([input_x]) fuseshape = [1] fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0]) data_input = tvm.placeholder(fuseshape, dtype=input_dtype, name="data_input") res = log1p_compute(data_input, output_y, kernel_name) tensors.append([data_input, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = { "name": kernel_name, "tensor_list": tensors, "bool_storage_as_1bit": False } te.lang.dynamic.build(schedules, config)
def sub(input_x, input_y, output_z, kernel_name="sub"): """ do element-wise sub operation between two input tensors Parameters: ---------- input_x : dict shape and dtype of input, only support float16, float32,int32 input_y : dict shape and dtype of input, only support float16, float32,int32 output_z: dict shape and dtype of output, should be same shape and type as input kernel_name : kernel name, default value is "sub" Returns ------- None """ check_list = ["float16", "float32", "int32"] x_dtype = input_x.get("dtype").lower() y_dtype = input_x.get("dtype").lower() if not x_dtype in check_list or not y_dtype in check_list: error_detal = "sub only support float16, float32, int32" error_manager_vector.raise_err_two_input_dtype_invalid( kernel_name, "input_x", "input_y", error_detal) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) data1 = tvm.placeholder(x_shape, x_dtype, "data1") data2 = tvm.placeholder(y_shape, y_dtype, "data2") res = sub_compute(data1, data2, output_z, kernel_name) tensors.append([data1, data2, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def neg(input_x, output_y, kernel_name="neg"): """ Computes numerical negative value element-wise, y = -x. Parameters ---------- input_x: dict shape and dtype of input, only support float16, float32, int32, int8 output_y: dict shape and dtype of output, should be same type as input kernel_name: str kernel name, default value is "neg" Returns ------- None """ dtype_input = input_x.get("dtype").lower() check_list = ("float16", "float32", "int32", "int8") check_dtype(dtype_input, check_list, param_name="input_x") ins = classify([input_x], Mode.ELEWISE) schedules, tensors = [], [] for (input_x, ) in ins: with te.op.compute(): x_shape = variable_shape([input_x]) fuse_shape = [1] fuse_shape[0] = reduceIns(lambda x, y: x * y, x_shape[0]) data_input = tvm.placeholder(fuse_shape, name="data_input", dtype=dtype_input) res = neg_compute(data_input, output_y, kernel_name) tensors.append([data_input, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def zeros_like(x, y, kernel_name="zeros_like"): """ output a tensor of all zero, you can specify the output type Parameters ---------- x: dict shape and dtype of input, only support float16, float32, int32,int8,uint8,bool y: dict shape and dtype of output data kernel_name: str cce kernel name, default value is "zeros_like" Returns ------ None """ dtype_x = x.get("dtype") check_list_src = ("float16", "float32", "int32", "int8", "uint8", "bool") src_dtype = dtype_x.lower() check_dtype(src_dtype, check_list_src, param_name="x") schedules, tensors = [], [] ins = classify([x], Mode.ELEWISE) for (input_x, ) in ins: with te.op.compute(): shape_x = variable_shape([input_x]) shape_x = (functools_reduce(lambda x, y: x * y, shape_x[0]), ) x_input = tvm.placeholder(shape_x, name="x_input", dtype=src_dtype) res = zeros_like_compute(x_input, y, kernel_name=kernel_name) tensors.append([x_input, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def add_n(inputs, output, tensor_num, kernel_name="add_n"): """ algorithm: add_n calculating data's adds, z = a + b + c... Parameters ---------- inputs : list or tuple of dict A list of Tensor objects, each with same shape, range and dtype of first input, only support float16, float32, int32. output : dict shape, range and dtype of output, should be broadcast shape and type as input. tensor_num: nums of input kernel_name : string cce kernel name, default value is add_n Returns ------- None """ # check inputs num input_num = len(inputs) if input_num < 2: error_info = {} error_info['errCode'] = OP_ERROR_CODE_012 error_info['op_name'] = 'add_n' error_info['param_name'] = 'input_num' error_info['max_value'] = '8' error_info['min_value'] = '2' error_info['real_value'] = str(input_num) raise RuntimeError( error_info, "In op[%s], the num of dimensions of input[%s] " "should be in the range of [%s, %s], but actually " "is [%s]." % (error_info['op_name'], error_info['param_name'], error_info['min_value'], error_info['max_value'], error_info['real_value'])) if input_num != tensor_num: error_info = {} error_info['errCode'] = OP_ERROR_CODE_017 error_info['op_name'] = 'add_n' error_info['param_name1'] = 'input_num' error_info['param_name2'] = 'tensor_num' error_info['param1_shape'] = str(input_num) error_info['param2_shape'] = str(tensor_num) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] is not match with" "the parameter[%s][%s],it should be the same." % (error_info['op_name'], error_info['param_name1'], error_info['param1_shape'], error_info['param_name2'], error_info['param2_shape'])) dtype_0 = inputs[0].get("dtype").lower() for index in range(0, tensor_num): shape_input = inputs[index].get("shape") check_shape(shape_input, param_name="inputs") dtype_input = inputs[index].get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(dtype_input, check_list, param_name="inputs") if dtype_input != dtype_0: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'add_n' error_info['param_name1'] = 'dtype_input' error_info['param_name2'] = 'dtype_0' error_info['param1_dtype'] = str(dtype_input) error_info['param2_dtype'] = str(dtype_0) raise RuntimeError( error_info, "In op[%s], the parameter" "[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify(inputs, Mode.ELEWISE) schedules, tensors = [], [] for inputs in ins: with te.op.compute(): shape_normlize = variable_shape(inputs) fuse_shape = [1] datas = [] for (i, input_dict), shape_i in zip(enumerate(inputs), shape_normlize): fuse_shape[0] = reduceIns(lambda x, y: x * y, shape_i) datas.append( tvm.placeholder(fuse_shape, name="data_%d" % i, dtype=dtype_0)) # add_n_compute res = add_n_compute(datas, output, kernel_name) tensors.append(datas) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build datas.append(res) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def add(input_x, input_y, output_z, kernel_name="add"): """ algorithm: add calculating data's add, c = a + b Parameters ---------- input_x : dict including shape, dtype and range, only support float16, float32, int32 input_y : dict including shape, dtype and range, only support float16, float32, int32 output_z: dict shape should be broadcast shape of input, and type equals to input kernel_name : str cce kernel name, default value is add Returns ------- None """ # check input tensor data_type x_dtype = input_x.get("dtype").lower() y_dtype = input_y.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if x_dtype != y_dtype: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'add' error_info['param_name1'] = 'x_dtype' error_info['param_name2'] = 'y_dtype' error_info['param1_dtype'] = str(x_dtype) error_info['param2_dtype'] = str(y_dtype) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in" "dtype with dtype[%s][%s]" % (error_info['op_name'], error_info[ 'param_name1'], error_info[ 'param_name2'], error_info[ 'param1_dtype'], error_info[ 'param2_dtype'])) # format_pattern = 1 Nz and vector # format_pattern = 2 vector and Nz # format_pattern = 0 Nz scalar Nz Nz ND ND format_pattern = _add_check_format(input_x, input_y) # infer shape for supporting add shape_x, shape_y = _infer_shape(format_pattern, input_x, input_y) shape_x = scalar2tensor_one(shape_x) shape_y = scalar2tensor_one(shape_y) # normalize shape input_x["shape"] = shape_x input_y["shape"] = shape_y ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): shape_x, shape_y = variable_shape([input_x, input_y], support_broadcast=True) shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y) data_x = tvm.placeholder(shape_x, name="data_1", dtype=x_dtype) data_y = tvm.placeholder(shape_y, name="data_2", dtype=y_dtype) res = add_compute(data_x, data_y, output_z, kernel_name) tensors.append((data_x, data_y, res)) with tvm.target.cce(): schedule = generic.auto_schedule(res) schedules.append(schedule) config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def reduce_sum(x, axes, y, keepdims=False, kernel_name="reduce_sum"): """reduce a tensor on a certain axes based on sum. Parameters: ---------- x: dict the dict of input tensor. axes: dict the axes for reduce. y: dict the dict of output tensor. keepdims: bool or NONETYPE if true, retains reduced dimensions with length 1. kernel_name: str cce kernel name, default value is "reduce_sum". Returns ------- None """ dtype_x = x["dtype"] dtype_lower_x = dtype_x.lower() check_list_x = ("float16", "float32") check_dtype(dtype_lower_x, check_list_x, param_name="x") dtype_axes = axes["dtype"] dtype_lower_axes = dtype_axes.lower() check_list_axes = ("int32", "int64") check_dtype(dtype_lower_axes, check_list_axes, param_name="axes") input_shape = x.get("shape") if not _check_data_shape_const(input_shape): schedules = [] ins = classify([x, axes], Mode.REDUCE) tensors = [] shape_axes = variable_shape([axes])[0] data_input_axes = tvm.placeholder(shape_axes, name="data_input_axes", dtype=dtype_lower_axes) for (x, axes) in ins: with te.op.compute(): shape_x = variable_shape([x])[0] data_input_x = tvm.placeholder(shape_x, name="data_input_x", dtype=dtype_lower_x) shape_len = len(shape_x) axes_d = cce_util.axis_check(shape_len, axes) res = reduce_sum_compute(data_input_x, axes_d, y, keepdims) tensors.append([data_input_x, data_input_axes, res]) with tvm.target.cce(): schedule = generic.auto_schedule(res) schedules.append(schedule) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config) add_compile_info("reduce_axis_unknown", 1) else: _reduce_sum_const(x, axes, keepdims, kernel_name)
def maximum(x1, x2, y, kernel_name="maximum"): """ do element-wise maximum operation between two input tensors Parameters: ---------- x1 : dict first input dict, only support float16, float32, int32 x2 : dict second input dict, only support float16, float32, int32 y: dict output dict, should be the broadcast shape and type as input kernel_name : str cce kernel name, default value is maximum Returns ------- None """ # check input tensor data dtype check_list = ["float16", "float32", "int32"] dtype_x1 = x1.get("dtype").lower() dtype_x2 = x2.get("dtype").lower() check_dtype(dtype_x1, check_list, param_name="x1") check_dtype(dtype_x2, check_list, param_name="x2") check_elewise_shape_range([x1, x2], support_broadcast=True) if dtype_x1 != dtype_x2: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'maximum' error_info['param_name1'] = 'dtype_x1' error_info['param_name2'] = 'dtype_x2' error_info['param1_dtype'] = str(dtype_x1) error_info['param2_dtype'] = str(dtype_x2) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): shape_x1, shape_x2 = variable_shape([x1, x2], support_broadcast=True) shape_x1, shape_x2 = refine_shapes_for_broadcast( shape_x1, shape_x2) data1 = tvm.placeholder(shape_x1, dtype=dtype_x1, name="data1") data2 = tvm.placeholder(shape_x2, dtype=dtype_x2, name="data2") res = maximum_compute(data1, data2, y, kernel_name) tensors.append([data1, data2, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def mul(input1, input2, output, kernel_name="mul"): """ algorithm: mul calculating data's mul, c = a * b Parameters ---------- input1 : dict include ori_shape, shape, ori_format, format, dtype and range dtype only support float16, float32, int32 input2 : dict include ori_shape, shape, ori_format, format, dtype and range dtype only support float16, float32, int32 output: dict include ori_shape, shape, ori_format, format, dtype and range shape must be broadcast shape of input kernel_name : str cce kernel name, default value is mul Returns ------- None """ # check dtype dtype_x1 = input1.get("dtype").lower() dtype_x2 = input2.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(dtype_x1, check_list, param_name="input1") check_dtype(dtype_x2, check_list, param_name="input2") check_elewise_shape_range([input1, input1], support_broadcast=True) if dtype_x1 != dtype_x2: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'mul' error_info['param_name1'] = 'dtype_x1' error_info['param_name2'] = 'dtype_x2' error_info['param1_dtype'] = str(dtype_x1) error_info['param2_dtype'] = str(dtype_x2) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % ( error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input1, input2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input1, input2) in ins: with te.op.compute(): # shape shape_x1, shape_x2 = variable_shape([input1, input2], support_broadcast=True) shape_x1, shape_x2 = refine_shapes_for_broadcast(shape_x1, shape_x2) # mul_compute data_x1 = tvm.placeholder(shape_x1, dtype=dtype_x1, name="data_x1") data_x2 = tvm.placeholder(shape_x2, dtype=dtype_x2, name="data_x2") res = mul_compute(data_x1, data_x2, output, kernel_name) tensors.append((data_x1, data_x2, res)) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def floor_mod(x1, x2, y, kernel_name="floor_mod"): """ calculate the remainder of division, support fp16,fp32,int32 res = x1 -floor(input_data_x / input_data_y)* input_data_y Parameters ---------- x1: dict dict{"shape":tuple or list,"dtype":str, "range": tuple or list} shape of data the data type, src_dtype equals dst_dtype, support fp16,fp32,int32 x2: dict dict{"shape":tuple or list,"dtype":str, "range": tuple or list} shape of data the data type, src_dtype equals of dst_dtype, support fp16,fp32,int32 y: dict, reserved field dict with keys(shape, dtype and range) of output kernel_name: str cce kernel name, default value is "floor_mod" Returns ------ None """ # check input tensor data_type dtype_x = x1.get("dtype").lower() dtype_y = x2.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(dtype_x, check_list, param_name="x1") check_dtype(dtype_y, check_list, param_name="x2") check_elewise_shape_range([x1, x2], support_broadcast=True) if dtype_x != dtype_y: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'floor_mod' error_info['param_name1'] = 'dtype_x' error_info['param_name2'] = 'dtype_y' error_info['param1_dtype'] = str(dtype_x) error_info['param2_dtype'] = str(dtype_y) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % ( error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): shape_x, shape_y = variable_shape([x1, x2], support_broadcast=True) shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y) input_data_x = tvm.placeholder(shape_x, name="input_data_x", dtype=dtype_x) input_data_y = tvm.placeholder(shape_y, name="input_data_y", dtype=dtype_y) res = floor_mod_compute(input_data_x, input_data_y, y, kernel_name) tensors.append([input_data_x, input_data_y, res]) with tvm.target.cce(): auto_sch = generic.auto_schedule(res) schedules.append(auto_sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def div(input_x, input_y, output_z, kernel_name="div"): """ algorithm: div calculating data's div, res =x / yq Parameters ---------- input_x: dict dict with keys(shape and dtype) of input_x input_y: dict dict with keys(shape and dtype) of input_y output_div: dict dict with keys(shape and dtype) of output kernel_name: str kernel name, default value is "div" Returns ------- None """ # check dtype x_dtype = input_x.get("dtype").lower() y_dtype = input_y.get("dtype").lower() check_list = ("float16", "float32", "int8", "uint8", "int32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if x_dtype != y_dtype: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'div' error_info['param_name1'] = 'x_dtype' error_info['param_name2'] = 'y_dtype' error_info['param1_dtype'] = str(x_dtype) error_info['param2_dtype'] = str(y_dtype) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([input_x, input_y], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y") res = div_compute(tensor_x, tensor_y, output_z, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def cast(input_x, output_y, dst_type, kernel_name="cast"): """ cast a tensor/scaler with input shape form src data type to dst data type. restrictions of input algorithms are as follow only types' groups blow are support tensor process: float16->float32 float16->int32 float32->float16 float32->int32 int8->float32 uint8->float32 int8->float16 uint8->float16 int8->int32 uint8->int32 int32->uint8 // number out of [0,255] can get unexpected result int32->int8 // number out of [-128,127] can get unexpected result int32->float32 // For tans with fp16, only guarantees number in [-1023,1023] get correct result int32->float16 // only guarantees number in [-1023,1023] get correct result scale convert support:(means only support shape [1,]) int64->int32 int64->float32 Parameters ---------- input_x : dict shape and dtype of input, only support float16, float32 output_y: dict shape and dtype of output, should be same shape as input, and the dtype is the dst dtype need to cast kernel_name : str cce kernel name, default value is cast Returns ------- None """ src_type = input_x.get("dtype").lower() if src_type == "bool": src_type = "int8" schedules, tensors = [], [] ins = classify([input_x], Mode.ELEWISE) for (input_x,) in ins: with te.op.compute(): x_shape = variable_shape([input_x]) dst_type = _cast_dsttype_conversion(dst_type) fuseshape = [1] fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0]) data = tvm.placeholder(fuseshape, name="data", dtype=src_type) if src_type == "int64": check_dtype(dst_type, ("float32", "int32"), param_name="dst_type") res = tvm.extern( [fuseshape], [data], lambda ins, outs: _kernel_ir(outs, ins, dst_type, "int64"), name="res", dtype=dst_type) tensor_list = [data, res] schedule = tvm.create_schedule(res.op) with build_config: tvm.build(schedule, tensor_list, "cce", name=kernel_name) else: res = cast_compute(data, output_y, dst_type, kernel_name) tensors.append([data, res]) if src_type != "int64": with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = { "print_ir": False, "name": kernel_name, "tensor_list": tensors } te.lang.dynamic.build(sch, config)
def less_equal(input_x, input_y, output_z, kernel_name="less_equal"): """ Returns the truth value of (x <= y) element-wise Parameters ---------- input_x: dict dict{"shape":tuple or list, "dtype":str, range: tuple or list}, shape, range, and dtype of first input, support float16,float32,int32,int8,uint8 input_y: dict dict{"shape":tuple or list, "dtype":str, range: tuple or list}, shape, range, and dtype of first input, support float16,float32,int32,int8,uint8 output_z: dict dict of output, should be broadcast shape and type as input kernel_name: str cce kernel name, default value is "less_equal" Returns ------- None """ # check input tensor data_type x_dtype = input_x.get("dtype").lower() y_dtype = input_y.get("dtype").lower() check_list = ("float16", "float32", "int32", "uint8", "int8") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if x_dtype != y_dtype: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'less_equal' error_info['param_name1'] = 'x_dtype' error_info['param_name2'] = 'y_dtype' error_info['param1_dtype'] = str(x_dtype) error_info['param2_dtype'] = str(y_dtype) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): # shape x_shape, y_shape = variable_shape([input_x, input_y], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) # less_equal compute tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y") res = less_equal_compute(tensor_x, tensor_y, output_z, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def reduce_mean_d(input_x, output_y, axes, keepdims=None, kernel_name="reduce_mean_d", impl_mode="high_performance"): """ Reduce a tensor on a certa in axes based on mean. Parameters: ---------- input_x : dict shape and dtype of input output_y: dict shape and dtype of output axes : int, list, tuple, NoneType The dimensions to reduce. If None (the default), reduces all dimensions. Must be in the range [-rank(input_tensor), rank(input_tensor)). keepdims : bool, NoneType if true, retains reduced dimensions with length 1, default value is None. kernel_name : str cce kernel name, default value is reduce_mean_d Returns ------- None """ dtype = input_x["dtype"] dtype_lower = dtype.lower() check_list = ("float16", "float32", "int8", "uint8") check_dtype(dtype_lower, check_list) with te.op.compute(): shape = input_x["shape"] shape_range = input_x["range"] shape_len = len(shape) if not axes: axes = range(shape_len) if hasattr(axes, 'index'): axes = list(axes) # not support 5HD is_5hdc = False shape_new, shape_range_new, axes_new, fused_rel_dic = \ fused_reduce_axis(shape, shape_range, axes) add_compile_info("fused_rel_dic", fused_rel_dic) input_x["shape"] = shape_new input_x["range"] = shape_range_new shape_var_new = variable_shape([input_x])[0] data_input = tvm.placeholder(shape_var_new, name="data_input", dtype=dtype_lower) res = reduce_mean_d_compute(data_input, output_y, axes_new, keepdims, impl_mode=impl_mode, is_5hdc=is_5hdc) with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [data_input, res]} te.lang.dynamic.build(sch, config)
def tile_d(input_x, output_x, multiples, kernel_name="tile_d"): """algorithm: tile. The tile in tensorflow can multiple the shape of the given tensor. For example, tiling [a b c d] by [2] produces [a b c d a b c d]. The tile op in TBE is different from tf.tile, tile of TBE use broadcast api, and only support that at least an axis in shape is 1.The '1' axis is to be multipled. For example, if shape = [51, 1] and multiples = [1, 77], after computation, the output shape will be [51, 77]. Abnormal condition: 1. The length of shape must be equal to or less than the shape of multiples. 2. The type of kernel_name is not string. 3. The shape is neither list nor tuple. 4. The dtype is not float32, float16, or int32. 5. All of the axises of the multiples is 1. Parameters ---------- input_x : dict shape and dtype of input output_x: dict dict of output. multiples : list or tuple. Number of the axis replicates. kernel_name : str. kernel name, default value is "tile_d". Returns ------- None """ dtype = input_x.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(dtype, check_list, param_name="input_x") unkown_shape = [] shape = input_x.get("shape") for i in range(0, len(shape)): if shape[i] == -1: unkown_shape.append(i) with te.op.compute(): shape = te.lang.dynamic.shape_to_list(variable_shape([input_x])[0]) multiples = te.lang.dynamic.shape_to_list(multiples) origin_multiples = multiples input_format = input_x.get("format") output_format = output_x.get("format") if input_format in ("NCHW", "NHWC") and output_format in ("NC1HWC0", ): # branch: 4D tile to 5HD ((N, 1, 1, 1) to (N, C1, H, W, C0)) # and output C is 16 align # change input shape from (N, 1, 1, 1) to (N, 1, 1, 1, 1) shape = shape + [1] if input_format == "NCHW": # change multiples from (1, C, H, W) to (1, C1, H, W, C0) multiples = [ multiples[0], multiples[1] // 16, multiples[2], multiples[3], 16 ] else: # change multiples from (1, H, W, C) to (1, C1, H, W, C0) multiples = [ multiples[0], multiples[3] // 16, multiples[1], multiples[2], 16 ] if len(shape) > len(multiples): error_info = {} error_info['errCode'] = OP_ERROR_CODE_012 error_info['op_name'] = 'tile_d' error_info['param_name'] = 'shape' error_info['max_value'] = str(len(multiples)) error_info['min_value'] = '1' error_info['real_value'] = str(len(shape)) raise RuntimeError( error_info, "In op[%s], the num of dimensions of input[%s] should be in the range of " "[%s, %s], but actually is [%s]." % (error_info['op_name'], error_info['param_name'], error_info['min_value'], error_info['max_value'], error_info['real_value'])) if len(shape) < len(multiples): len_error = len(multiples) - len(shape) shape = [1] * len_error + shape shape_adapt = [] multiples_adapt = [] for i, shape_i in enumerate(shape): multiples_i = multiples[i] if multiples_i != 1 and shape_i != 1: shape_adapt.append(1) multiples_adapt.append(multiples_i) multiples_i = 1 shape_adapt.append(shape_i) multiples_adapt.append(multiples_i) shape = shape_adapt multiples = multiples_adapt for shape_i, multiples_i in zip(shape, multiples): if not (shape_i == 1 or multiples_i == 1): error_info = {} error_info['errCode'] = OP_ERROR_CODE_009 error_info['op_name'] = 'tile_d' error_info[ 'rule_desc'] = "Any axis of either shape or multiples have to be 1" error_info['param_name1'] = 'shape_i' error_info['param_name2'] = 'multiples_i' error_info['param1_value'] = str(shape_i) error_info['param2_value'] = str(multiples_i) raise RuntimeError( error_info, "Op[%s] has rule: %s, but [%s] is [%s], [%s] is [%s]." % (error_info['op_name'], error_info['rule_desc'], error_info['param_name1'], error_info['param1_value'], error_info['param_name2'], error_info['param2_value'])) axis_not_multiple = 0 for multiples_i in multiples: if multiples_i == 1: axis_not_multiple += 1 if axis_not_multiple == len(multiples): error_info = {} error_info['errCode'] = OP_ERROR_CODE_005 error_info['op_name'] = 'tile_d' error_info['param_name'] = 'axis_not_multiple' error_info['min_len'] = '1' error_info['max_len'] = str(len(multiples) - 1) error_info['length'] = str(axis_not_multiple) raise RuntimeError( error_info, "In op[%s], the length of parameter[%s] be in the range of [%s, %s], but " "actually is [%s]." % (error_info['op_name'], error_info['param_name'], error_info['min_len'], error_info['max_len'], error_info['length'])) data = tvm.placeholder(shape, name="data", dtype=dtype) res = tile_d_compute(data, output_x, multiples, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) config = { "print_ir": False, "name": kernel_name, "tensor_list": [data, res] } te.lang.dynamic.build(sch, config) te.op.add_compile_info("_unknown_shape", unkown_shape) te.op.add_compile_info("_origin_multiples", origin_multiples) te.op.add_compile_info("_multiples_adapt", multiples_adapt)
def bn_training_update_v3(x, sum, square_sum, scale, offset, y, batch_mean, batch_variance, reserve_1, reserve_2, epsilon, kernel_name="bn_training_update_v3"): """ algorithm: fused_batch_norm_v2 Batch normalization. Parameters ---------- x: dict dict of input, A 5HD Tensor for input data. sum: dict dict of sum, A 5HD Tensor for sum. The output of batch_normalization_forward_training_reduce. square_sum: dict dict of square_sum, A 5HD Tensor for square_sum. The output of batch_normalization_forward_training_reduce. scale: dict dict of scale, A 5HD Tensor for mean. offset: dict dict of offset, A 5HD Tensor for variance. y: dict dict of output, A `Tensor`. Has the same type as `x`. batch_mean: dict dict of batch_mean, A `Tensor`. One of the result which is called save mean. batch_variance: dict dict of batch_variance, A `Tensor`. Has the same type as `batch_mean`. reserve_1: dict dict of batch_mean, A `Tensor`. Has the same type as `batch_mean`. reserve_2: dict dict of batch_variance, A `Tensor`. Has the same type as `batch_variance`. epsilon: float A small float number added to the variance of x. kernel_name: str kernel name, default value is "bn_training_update_v3" Returns ------- None """ dtype_x = x.get("dtype").lower() dtype_sum = sum.get("dtype").lower() dtype_sqrsum = square_sum.get("dtype").lower() dtype_scale = scale.get("dtype").lower() dtype_offset = offset.get("dtype").lower() shape_x = x.get("shape") shape_sum = sum.get("shape") shape_sqrsum = square_sum.get("shape") shape_scale = scale.get("shape") shape_offset = offset.get("shape") data_format = x.get("format").upper() origin_format = x.get("ori_format").upper() # check dtype _check_dtype(dtype_x, dtype_sum, dtype_sqrsum, dtype_scale, dtype_offset) # check format check_list = ("NC1HWC0", "NCHW") check_format(data_format, check_list, param_name="x") if data_format == "NCHW" and origin_format not in ("NCHW",): raise RuntimeError("The origin format only supports " "NCHW when format is NCHW") # check shape if data_format == "NC1HWC0": _check_shape_5hd(shape_x, shape_sum, shape_sqrsum, shape_scale, shape_offset) shape_list = [1, 1, 1, 1, 1] shape_list[1] = shape_x[1] shape_list[4] = shape_x[4] shape_sum = shape_list else: shape_list = [1, 1, 1, 1] shape_list[1] = shape_x[1] shape_sum = shape_list # get dynamic shape shape_x, shape_sum = variable_shape([x, sum]) log.debug("input_x shape: " + str(shape_x)) log.debug("input_sum shape: " + str(shape_sum)) # compute with te.op.compute(): in_x = tvm.placeholder(shape_x, name="x", dtype=dtype_x) in_sum = tvm.placeholder(shape_sum, name="sum", dtype=dtype_sum) in_sqrsum = tvm.placeholder(shape_sum, name="sqrsum", dtype=dtype_sum) in_scale = tvm.placeholder(shape_sum, name="scale", dtype=dtype_sum) in_offset = tvm.placeholder(shape_sum, name="offset", dtype=dtype_sum) res = bn_training_update_v3_compute(in_x, in_sum, in_sqrsum, in_scale, in_offset, y, batch_mean, batch_variance, reserve_1, reserve_2, epsilon, kernel_name=kernel_name) # schedule with tvm.target.cce(): sch = generic.auto_schedule(res) # build tensor_list = [in_x, in_sum, in_sqrsum, in_scale, in_offset] + list(res) config = {"name": kernel_name, "tensor_list": tensor_list} te.lang.dynamic.build(sch, config)