def floor_div(input_x, input_y, output_z, kernel_name="floor_div"): """ algorithm: floordiv calculating data's floordiv, res =floor(x / y) Parameters ---------- input_x: dict input_y: dict output_z: dict kernel_name: str, default value is "floor_div" Returns ------- None """ # check dtype of input_x/input_y input_dtype_x = input_x.get("dtype").lower() input_dtype_y = input_y.get("dtype").lower() check_list = ('int8', 'uint8', 'int32', 'float16', 'float32') check_dtype(input_dtype_x, check_list, param_name="input_x") check_dtype(input_dtype_y, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if input_dtype_x != input_dtype_y: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'floor_div' error_info['param_name1'] = 'input_dtype_x' error_info['param_name2'] = 'input_dtype_y' error_info['param1_dtype'] = str(input_dtype_x) error_info['param2_dtype'] = str(input_dtype_y) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % ( error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([input_x, input_y], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) tensor_x = tvm.placeholder(x_shape, input_dtype_x, "tensor_x") tensor_y = tvm.placeholder(y_shape, input_dtype_y, "tensor_y") res = floor_div_compute(tensor_x, tensor_y, output_z, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def exp(input_x, output_y, base=-1.0, scale=1.0, shift=0.0, kernel_name="exp"): """ algorithm: exp calculating data's exp if base == -1: y = exp(shift + scale * x) if base > 0: y = exp((shift+scale*x)*ln(base)) Parameters ---------- input_x : dict,shape and dtype of input, only support float16,float32 output_y: dict,shape and dtype of output, should be same shape and type as input base: (optional, default -1 for a value of e the base gamma scale: (optional, default 1) the scale alpha shift: (optional, default 0) the shift beta kernel_name : str, kernel name, default value is "exp" Returns ------- None """ dtype = input_x.get("dtype") # input_x' dtype check, only supports fp16 and fp32 check_list = ("float16", "float32") input_dtype = dtype.lower() check_dtype(input_dtype, check_list, param_name="input_x") if base <= 0 and (not isclose(base, -1.0)): expect_value = "strictly positive or -1" real_value = "base < 0 or base notequal with -1" error_manager_vector.raise_err_input_value_invalid( kernel_name, "base", expecte_value, real_value) ins = classify([input_x], Mode.ELEWISE) schedules, tensors = [], [] for (input_x,) in ins: with te.op.compute(): shape_x = variable_shape([input_x]) fuseshape = [1] fuseshape[0] = reduceIns(lambda x, y: x * y, shape_x[0]) data_input = tvm.placeholder(fuseshape, name="data_input", dtype=input_dtype) res = exp_compute(data_input, output_y, base, scale, shift, kernel_name) tensors.append([data_input, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def relu(x, y, kernel_name="relu"): """ Algrithm: relu(x) = max(x, 0) Parameters ---------- Algorithm: relu Parameters: x: dynamic input, include shape, dtype and range y: the dict of output kernel_name: kernel name, must be string, default value is "relu". Returns ------- None """ # check input tensor data_type dtype_x = x.get("dtype").lower() check_list = ("float16", "float32", "int8", "int32") check_dtype(dtype_x, check_list, param_name="x") ins = classify([x], Mode.ELEWISE) schedules, tensors = [], [] for (x, ) in ins: with te.op.compute(): shape_x = variable_shape([x]) fuse_shape = [1] fuse_shape[0] = reduceIns(lambda x, y: x * y, shape_x[0]) input_data = tvm.placeholder(fuse_shape, name="input_data", dtype=dtype_x) res = relu_compute(input_data, y, kernel_name) tensors.append([input_data, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def leaky_relu_grad(g, x, y, negative_slope=0, kernel_name="leaky_relu_grad"): """ calculate the backpropagation of leaky_relu operation y = gradients(x>0) or negative_slope*gradients(x<=0). support dtype:float16,float32 Parameters ---------- g : dict the backpropagated gradients to the corresponding leaky_relu operation x : dict the x passed as output of leaky_relu operation y : dict the output of leaky_relu back propagation negative_slope : float or int allow non-zero slope for negative inputs to speed up optimization kernel_name : str kernel name, default value is "leaky_relu_grad" Returns ------- None """ g_dtype = g.get("dtype").lower() x_dtype = x.get("dtype").lower() check_list = ("float16", "float32") check_dtype(g_dtype, check_list, param_name="input_g") check_dtype(x_dtype, check_list, param_name="input_x") check_elewise_shape_range([g, x], support_broadcast=True) if g_dtype != x_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "g", "x", g_dtype, x_dtype) ins = classify([g, x], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (g, x) in ins: with te.op.compute(): g_shape, x_shape = variable_shape([g, x], support_broadcast=True) g_shape, x_shape = refine_shapes_for_broadcast(g_shape, x_shape) tensor_g = tvm.placeholder(g_shape, g_dtype, "tensor_g") tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") res = leaky_relu_grad_compute(tensor_g, tensor_x, y, negative_slope, kernel_name) tensors.append((tensor_g, tensor_x, res)) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def real_div(x1, x2, y, kernel_name="real_div"): """ algorithm: real_div calculating data's real_div, c = a / b Parameters ---------- x1 : dict shape and dtype of first input, only support float16, float32, int32 x2 : dict shape and dtype of second input, only support float16, float32, int32 y: dict shape and dtype of output, should be broadcast shape and type as input kernel_name : str cce kernel name, default value is real_div Returns ------- None """ x_dtype = x1.get("dtype").lower() y_dtype = x2.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([x1, x2], support_broadcast=True) if x_dtype != y_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "x1", "x2", x_dtype, y_dtype) ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y") res = real_div_compute(tensor_x, tensor_y, y, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def sigmoid_grad(x, dx, out, kernel_name="sigmoid_grad"): """ do sigmoid grad sigmoid_grad = (sigmoid - sigmoid*sigmoid)*grad Parameters: ---------- x : dictionary shape of sigmoid input dx : dictionary shape of grad out: dictionary output kernel_name : cce kernel name, default value is "sigmoid_grad_cce" Returns ------- None """ x_dtype = x.get("dtype").lower() dx_dtype = dx.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(dx_dtype, check_list, param_name="input_dx") check_elewise_shape_range([x, dx], support_broadcast=False) if x_dtype != dx_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "x", "dx", x_dtype, dx_dtype) ins = classify([x, dx], Mode.ELEWISE) schedules, tensors = [], [] for (sig, dx) in ins: with te.op.compute(): shape_sig, shape_dx = variable_shape([sig, dx], support_broadcast=False) shape_sig, shape_dx = refine_shapes_for_broadcast( shape_sig, shape_dx) tensor_sig = tvm.placeholder(shape_sig, x_dtype, "tensor_x") tensor_dx = tvm.placeholder(shape_dx, dx_dtype, "tensor_dx") res = sigmoid_grad_compute(tensor_sig, tensor_dx, out, kernel_name) tensors.append([tensor_sig, tensor_dx, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def sqrt(input_x, output_y, kernel_name="sqrt"): """ algorithm: sqrt calculating data sqrt,y= x**0.5, mini not support vsqrt, use exp(0.5*log(x)) Parameters ---------- input_x : dict shape and dtype of input, only support float16, float32 output_y: dict shape and dtype of output, should be same shape and type as input kernel_name : str cce kernel name, default value is sqrt Returns ------- None """ # check dtype x_dtype = input_x.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="input_x") ins = classify([input_x], Mode.ELEWISE) schedules, tensors = [], [] for (input_x, ) in ins: with te.op.compute(): # shape x_shape = variable_shape([input_x]) fuseshape = [1] fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0]) # div_compute input_data = tvm.placeholder(fuseshape, name="input_data", dtype=x_dtype) res = sqrt_compute(input_data, output_y, kernel_name) tensors.append([input_data, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def square(input_x, output, kernel_name="square"): """ algorithm: square calculating data's square,y= x*x Parameters ---------- input_x : dict shape and dtype of input, only support float16, float32, int32 output_y: dict shape and dtype of output, should be same shape and type as input kernel_name : str kernel name, default value is "square" Returns ------- None """ # check dtype x_dtype = input_x.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(x_dtype, check_list, param_name="input_x") ins = classify([input_x], Mode.ELEWISE) schedules, tensors = [], [] for (input_x, ) in ins: with te.op.compute(): # shape x_shape = variable_shape([input_x]) fuseshape = [1] fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0]) # square_compute data_x = tvm.placeholder(fuseshape, x_dtype, name="data_x") res = square_compute(data_x, output, kernel_name) tensors.append((data_x, res)) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def sqrt_grad(x, dx, out, kernel_name="sqrt_grad"): """ algorithm: sqrt_grad_cce Parameters ---------- x : dict of data: dict dx : dict of data_grad: dict out : dict of output: dict kernel_name : cce kernel name, default value is "sqrt_grad": str Returns ------- None """ x_dtype = x.get("dtype").lower() dx_dtype = dx.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="x") check_dtype(dx_dtype, check_list, param_name="dx") check_elewise_shape_range([x, dx], support_broadcast=False) if x_dtype != dx_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "x", "dx", x_dtype, dx_dtype) ins = classify([x, dx], Mode.ELEWISE) schedules, tensors = [], [] for (x, dx) in ins: with te.op.compute(): x_shape, dx_shape = variable_shape([x, dx], support_broadcast=False) x_shape, dx_shape = refine_shapes_for_broadcast(x_shape, dx_shape) tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_dx = tvm.placeholder(dx_shape, dx_dtype, "tensor_dx") res = sqrt_grad_compute(tensor_x, tensor_dx, out, kernel_name) tensors.append([tensor_x, tensor_dx, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def log1p(input_x, output_y, kernel_name="log1p"): """ algorithm: log1p calculating data's log1p, y = log(x + 1) Parameters ---------- input_x: dict shape and dtype of input, only support float16, float32 output_y: dict shape and dtype of output, should be same shape and type as input kernel_name: str kernel name, default value is "log1p" Returns ------- None """ dtype = input_x.get("dtype") check_list = ("float16", "float32") input_dtype = dtype.lower() check_dtype(input_dtype, check_list, param_name="input_x") schedules, tensors = [], [] ins = classify([input_x], Mode.ELEWISE) for (input_x, ) in ins: with te.op.compute(): x_shape = variable_shape([input_x]) fuseshape = [1] fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0]) data_input = tvm.placeholder(fuseshape, dtype=input_dtype, name="data_input") res = log1p_compute(data_input, output_y, kernel_name) tensors.append([data_input, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = { "name": kernel_name, "tensor_list": tensors, "bool_storage_as_1bit": False } te.lang.dynamic.build(schedules, config)
def sub(input_x, input_y, output_z, kernel_name="sub"): """ do element-wise sub operation between two input tensors Parameters: ---------- input_x : dict shape and dtype of input, only support float16, float32,int32 input_y : dict shape and dtype of input, only support float16, float32,int32 output_z: dict shape and dtype of output, should be same shape and type as input kernel_name : kernel name, default value is "sub" Returns ------- None """ check_list = ["float16", "float32", "int32"] x_dtype = input_x.get("dtype").lower() y_dtype = input_x.get("dtype").lower() if not x_dtype in check_list or not y_dtype in check_list: error_detal = "sub only support float16, float32, int32" error_manager_vector.raise_err_two_input_dtype_invalid( kernel_name, "input_x", "input_y", error_detal) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) data1 = tvm.placeholder(x_shape, x_dtype, "data1") data2 = tvm.placeholder(y_shape, y_dtype, "data2") res = sub_compute(data1, data2, output_z, kernel_name) tensors.append([data1, data2, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def neg(input_x, output_y, kernel_name="neg"): """ Computes numerical negative value element-wise, y = -x. Parameters ---------- input_x: dict shape and dtype of input, only support float16, float32, int32, int8 output_y: dict shape and dtype of output, should be same type as input kernel_name: str kernel name, default value is "neg" Returns ------- None """ dtype_input = input_x.get("dtype").lower() check_list = ("float16", "float32", "int32", "int8") check_dtype(dtype_input, check_list, param_name="input_x") ins = classify([input_x], Mode.ELEWISE) schedules, tensors = [], [] for (input_x, ) in ins: with te.op.compute(): x_shape = variable_shape([input_x]) fuse_shape = [1] fuse_shape[0] = reduceIns(lambda x, y: x * y, x_shape[0]) data_input = tvm.placeholder(fuse_shape, name="data_input", dtype=dtype_input) res = neg_compute(data_input, output_y, kernel_name) tensors.append([data_input, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def zeros_like(x, y, kernel_name="zeros_like"): """ output a tensor of all zero, you can specify the output type Parameters ---------- x: dict shape and dtype of input, only support float16, float32, int32,int8,uint8,bool y: dict shape and dtype of output data kernel_name: str cce kernel name, default value is "zeros_like" Returns ------ None """ dtype_x = x.get("dtype") check_list_src = ("float16", "float32", "int32", "int8", "uint8", "bool") src_dtype = dtype_x.lower() check_dtype(src_dtype, check_list_src, param_name="x") schedules, tensors = [], [] ins = classify([x], Mode.ELEWISE) for (input_x, ) in ins: with te.op.compute(): shape_x = variable_shape([input_x]) shape_x = (functools_reduce(lambda x, y: x * y, shape_x[0]), ) x_input = tvm.placeholder(shape_x, name="x_input", dtype=src_dtype) res = zeros_like_compute(x_input, y, kernel_name=kernel_name) tensors.append([x_input, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def mul(input1, input2, output, kernel_name="mul"): """ algorithm: mul calculating data's mul, c = a * b Parameters ---------- input1 : dict include ori_shape, shape, ori_format, format, dtype and range dtype only support float16, float32, int32 input2 : dict include ori_shape, shape, ori_format, format, dtype and range dtype only support float16, float32, int32 output: dict include ori_shape, shape, ori_format, format, dtype and range shape must be broadcast shape of input kernel_name : str cce kernel name, default value is mul Returns ------- None """ # check dtype dtype_x1 = input1.get("dtype").lower() dtype_x2 = input2.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(dtype_x1, check_list, param_name="input1") check_dtype(dtype_x2, check_list, param_name="input2") check_elewise_shape_range([input1, input1], support_broadcast=True) if dtype_x1 != dtype_x2: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'mul' error_info['param_name1'] = 'dtype_x1' error_info['param_name2'] = 'dtype_x2' error_info['param1_dtype'] = str(dtype_x1) error_info['param2_dtype'] = str(dtype_x2) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % ( error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input1, input2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input1, input2) in ins: with te.op.compute(): # shape shape_x1, shape_x2 = variable_shape([input1, input2], support_broadcast=True) shape_x1, shape_x2 = refine_shapes_for_broadcast(shape_x1, shape_x2) # mul_compute data_x1 = tvm.placeholder(shape_x1, dtype=dtype_x1, name="data_x1") data_x2 = tvm.placeholder(shape_x2, dtype=dtype_x2, name="data_x2") res = mul_compute(data_x1, data_x2, output, kernel_name) tensors.append((data_x1, data_x2, res)) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def floor_mod(x1, x2, y, kernel_name="floor_mod"): """ calculate the remainder of division, support fp16,fp32,int32 res = x1 -floor(input_data_x / input_data_y)* input_data_y Parameters ---------- x1: dict dict{"shape":tuple or list,"dtype":str, "range": tuple or list} shape of data the data type, src_dtype equals dst_dtype, support fp16,fp32,int32 x2: dict dict{"shape":tuple or list,"dtype":str, "range": tuple or list} shape of data the data type, src_dtype equals of dst_dtype, support fp16,fp32,int32 y: dict, reserved field dict with keys(shape, dtype and range) of output kernel_name: str cce kernel name, default value is "floor_mod" Returns ------ None """ # check input tensor data_type dtype_x = x1.get("dtype").lower() dtype_y = x2.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(dtype_x, check_list, param_name="x1") check_dtype(dtype_y, check_list, param_name="x2") check_elewise_shape_range([x1, x2], support_broadcast=True) if dtype_x != dtype_y: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'floor_mod' error_info['param_name1'] = 'dtype_x' error_info['param_name2'] = 'dtype_y' error_info['param1_dtype'] = str(dtype_x) error_info['param2_dtype'] = str(dtype_y) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % ( error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): shape_x, shape_y = variable_shape([x1, x2], support_broadcast=True) shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y) input_data_x = tvm.placeholder(shape_x, name="input_data_x", dtype=dtype_x) input_data_y = tvm.placeholder(shape_y, name="input_data_y", dtype=dtype_y) res = floor_mod_compute(input_data_x, input_data_y, y, kernel_name) tensors.append([input_data_x, input_data_y, res]) with tvm.target.cce(): auto_sch = generic.auto_schedule(res) schedules.append(auto_sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def cast(input_x, output_y, dst_type, kernel_name="cast"): """ cast a tensor/scaler with input shape form src data type to dst data type. restrictions of input algorithms are as follow only types' groups blow are support tensor process: float16->float32 float16->int32 float32->float16 float32->int32 int8->float32 uint8->float32 int8->float16 uint8->float16 int8->int32 uint8->int32 int32->uint8 // number out of [0,255] can get unexpected result int32->int8 // number out of [-128,127] can get unexpected result int32->float32 // For tans with fp16, only guarantees number in [-1023,1023] get correct result int32->float16 // only guarantees number in [-1023,1023] get correct result scale convert support:(means only support shape [1,]) int64->int32 int64->float32 Parameters ---------- input_x : dict shape and dtype of input, only support float16, float32 output_y: dict shape and dtype of output, should be same shape as input, and the dtype is the dst dtype need to cast kernel_name : str cce kernel name, default value is cast Returns ------- None """ src_type = input_x.get("dtype").lower() if src_type == "bool": src_type = "int8" schedules, tensors = [], [] ins = classify([input_x], Mode.ELEWISE) for (input_x,) in ins: with te.op.compute(): x_shape = variable_shape([input_x]) dst_type = _cast_dsttype_conversion(dst_type) fuseshape = [1] fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0]) data = tvm.placeholder(fuseshape, name="data", dtype=src_type) if src_type == "int64": check_dtype(dst_type, ("float32", "int32"), param_name="dst_type") res = tvm.extern( [fuseshape], [data], lambda ins, outs: _kernel_ir(outs, ins, dst_type, "int64"), name="res", dtype=dst_type) tensor_list = [data, res] schedule = tvm.create_schedule(res.op) with build_config: tvm.build(schedule, tensor_list, "cce", name=kernel_name) else: res = cast_compute(data, output_y, dst_type, kernel_name) tensors.append([data, res]) if src_type != "int64": with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = { "print_ir": False, "name": kernel_name, "tensor_list": tensors } te.lang.dynamic.build(sch, config)
def less_equal(input_x, input_y, output_z, kernel_name="less_equal"): """ Returns the truth value of (x <= y) element-wise Parameters ---------- input_x: dict dict{"shape":tuple or list, "dtype":str, range: tuple or list}, shape, range, and dtype of first input, support float16,float32,int32,int8,uint8 input_y: dict dict{"shape":tuple or list, "dtype":str, range: tuple or list}, shape, range, and dtype of first input, support float16,float32,int32,int8,uint8 output_z: dict dict of output, should be broadcast shape and type as input kernel_name: str cce kernel name, default value is "less_equal" Returns ------- None """ # check input tensor data_type x_dtype = input_x.get("dtype").lower() y_dtype = input_y.get("dtype").lower() check_list = ("float16", "float32", "int32", "uint8", "int8") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if x_dtype != y_dtype: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'less_equal' error_info['param_name1'] = 'x_dtype' error_info['param_name2'] = 'y_dtype' error_info['param1_dtype'] = str(x_dtype) error_info['param2_dtype'] = str(y_dtype) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): # shape x_shape, y_shape = variable_shape([input_x, input_y], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) # less_equal compute tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y") res = less_equal_compute(tensor_x, tensor_y, output_z, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def add_n(inputs, output, tensor_num, kernel_name="add_n"): """ algorithm: add_n calculating data's adds, z = a + b + c... Parameters ---------- inputs : list or tuple of dict A list of Tensor objects, each with same shape, range and dtype of first input, only support float16, float32, int32. output : dict shape, range and dtype of output, should be broadcast shape and type as input. tensor_num: nums of input kernel_name : string cce kernel name, default value is add_n Returns ------- None """ # check inputs num input_num = len(inputs) if input_num < 2: error_info = {} error_info['errCode'] = OP_ERROR_CODE_012 error_info['op_name'] = 'add_n' error_info['param_name'] = 'input_num' error_info['max_value'] = '8' error_info['min_value'] = '2' error_info['real_value'] = str(input_num) raise RuntimeError( error_info, "In op[%s], the num of dimensions of input[%s] " "should be in the range of [%s, %s], but actually " "is [%s]." % (error_info['op_name'], error_info['param_name'], error_info['min_value'], error_info['max_value'], error_info['real_value'])) if input_num != tensor_num: error_info = {} error_info['errCode'] = OP_ERROR_CODE_017 error_info['op_name'] = 'add_n' error_info['param_name1'] = 'input_num' error_info['param_name2'] = 'tensor_num' error_info['param1_shape'] = str(input_num) error_info['param2_shape'] = str(tensor_num) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] is not match with" "the parameter[%s][%s],it should be the same." % (error_info['op_name'], error_info['param_name1'], error_info['param1_shape'], error_info['param_name2'], error_info['param2_shape'])) dtype_0 = inputs[0].get("dtype").lower() for index in range(0, tensor_num): shape_input = inputs[index].get("shape") check_shape(shape_input, param_name="inputs") dtype_input = inputs[index].get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(dtype_input, check_list, param_name="inputs") if dtype_input != dtype_0: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'add_n' error_info['param_name1'] = 'dtype_input' error_info['param_name2'] = 'dtype_0' error_info['param1_dtype'] = str(dtype_input) error_info['param2_dtype'] = str(dtype_0) raise RuntimeError( error_info, "In op[%s], the parameter" "[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify(inputs, Mode.ELEWISE) schedules, tensors = [], [] for inputs in ins: with te.op.compute(): shape_normlize = variable_shape(inputs) fuse_shape = [1] datas = [] for (i, input_dict), shape_i in zip(enumerate(inputs), shape_normlize): fuse_shape[0] = reduceIns(lambda x, y: x * y, shape_i) datas.append( tvm.placeholder(fuse_shape, name="data_%d" % i, dtype=dtype_0)) # add_n_compute res = add_n_compute(datas, output, kernel_name) tensors.append(datas) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build datas.append(res) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def add(input_x, input_y, output_z, kernel_name="add"): """ algorithm: add calculating data's add, c = a + b Parameters ---------- input_x : dict including shape, dtype and range, only support float16, float32, int32 input_y : dict including shape, dtype and range, only support float16, float32, int32 output_z: dict shape should be broadcast shape of input, and type equals to input kernel_name : str cce kernel name, default value is add Returns ------- None """ # check input tensor data_type x_dtype = input_x.get("dtype").lower() y_dtype = input_y.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if x_dtype != y_dtype: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'add' error_info['param_name1'] = 'x_dtype' error_info['param_name2'] = 'y_dtype' error_info['param1_dtype'] = str(x_dtype) error_info['param2_dtype'] = str(y_dtype) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in" "dtype with dtype[%s][%s]" % (error_info['op_name'], error_info[ 'param_name1'], error_info[ 'param_name2'], error_info[ 'param1_dtype'], error_info[ 'param2_dtype'])) # format_pattern = 1 Nz and vector # format_pattern = 2 vector and Nz # format_pattern = 0 Nz scalar Nz Nz ND ND format_pattern = _add_check_format(input_x, input_y) # infer shape for supporting add shape_x, shape_y = _infer_shape(format_pattern, input_x, input_y) shape_x = scalar2tensor_one(shape_x) shape_y = scalar2tensor_one(shape_y) # normalize shape input_x["shape"] = shape_x input_y["shape"] = shape_y ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): shape_x, shape_y = variable_shape([input_x, input_y], support_broadcast=True) shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y) data_x = tvm.placeholder(shape_x, name="data_1", dtype=x_dtype) data_y = tvm.placeholder(shape_y, name="data_2", dtype=y_dtype) res = add_compute(data_x, data_y, output_z, kernel_name) tensors.append((data_x, data_y, res)) with tvm.target.cce(): schedule = generic.auto_schedule(res) schedules.append(schedule) config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def reduce_sum(x, axes, y, keepdims=False, kernel_name="reduce_sum"): """reduce a tensor on a certain axes based on sum. Parameters: ---------- x: dict the dict of input tensor. axes: dict the axes for reduce. y: dict the dict of output tensor. keepdims: bool or NONETYPE if true, retains reduced dimensions with length 1. kernel_name: str cce kernel name, default value is "reduce_sum". Returns ------- None """ dtype_x = x["dtype"] dtype_lower_x = dtype_x.lower() check_list_x = ("float16", "float32") check_dtype(dtype_lower_x, check_list_x, param_name="x") dtype_axes = axes["dtype"] dtype_lower_axes = dtype_axes.lower() check_list_axes = ("int32", "int64") check_dtype(dtype_lower_axes, check_list_axes, param_name="axes") input_shape = x.get("shape") if not _check_data_shape_const(input_shape): schedules = [] ins = classify([x, axes], Mode.REDUCE) tensors = [] shape_axes = variable_shape([axes])[0] data_input_axes = tvm.placeholder(shape_axes, name="data_input_axes", dtype=dtype_lower_axes) for (x, axes) in ins: with te.op.compute(): shape_x = variable_shape([x])[0] data_input_x = tvm.placeholder(shape_x, name="data_input_x", dtype=dtype_lower_x) shape_len = len(shape_x) axes_d = cce_util.axis_check(shape_len, axes) res = reduce_sum_compute(data_input_x, axes_d, y, keepdims) tensors.append([data_input_x, data_input_axes, res]) with tvm.target.cce(): schedule = generic.auto_schedule(res) schedules.append(schedule) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config) add_compile_info("reduce_axis_unknown", 1) else: _reduce_sum_const(x, axes, keepdims, kernel_name)
def maximum(x1, x2, y, kernel_name="maximum"): """ do element-wise maximum operation between two input tensors Parameters: ---------- x1 : dict first input dict, only support float16, float32, int32 x2 : dict second input dict, only support float16, float32, int32 y: dict output dict, should be the broadcast shape and type as input kernel_name : str cce kernel name, default value is maximum Returns ------- None """ # check input tensor data dtype check_list = ["float16", "float32", "int32"] dtype_x1 = x1.get("dtype").lower() dtype_x2 = x2.get("dtype").lower() check_dtype(dtype_x1, check_list, param_name="x1") check_dtype(dtype_x2, check_list, param_name="x2") check_elewise_shape_range([x1, x2], support_broadcast=True) if dtype_x1 != dtype_x2: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'maximum' error_info['param_name1'] = 'dtype_x1' error_info['param_name2'] = 'dtype_x2' error_info['param1_dtype'] = str(dtype_x1) error_info['param2_dtype'] = str(dtype_x2) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): shape_x1, shape_x2 = variable_shape([x1, x2], support_broadcast=True) shape_x1, shape_x2 = refine_shapes_for_broadcast( shape_x1, shape_x2) data1 = tvm.placeholder(shape_x1, dtype=dtype_x1, name="data1") data2 = tvm.placeholder(shape_x2, dtype=dtype_x2, name="data2") res = maximum_compute(data1, data2, y, kernel_name) tensors.append([data1, data2, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def div(input_x, input_y, output_z, kernel_name="div"): """ algorithm: div calculating data's div, res =x / yq Parameters ---------- input_x: dict dict with keys(shape and dtype) of input_x input_y: dict dict with keys(shape and dtype) of input_y output_div: dict dict with keys(shape and dtype) of output kernel_name: str kernel name, default value is "div" Returns ------- None """ # check dtype x_dtype = input_x.get("dtype").lower() y_dtype = input_y.get("dtype").lower() check_list = ("float16", "float32", "int8", "uint8", "int32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if x_dtype != y_dtype: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'div' error_info['param_name1'] = 'x_dtype' error_info['param_name2'] = 'y_dtype' error_info['param1_dtype'] = str(x_dtype) error_info['param2_dtype'] = str(y_dtype) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([input_x, input_y], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y") res = div_compute(tensor_x, tensor_y, output_z, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)