def floor_div(input_x, input_y, output_z, kernel_name="floor_div"): """ algorithm: floordiv calculating data's floordiv, res =floor(x / y) Parameters ---------- input_x: dict input_y: dict output_z: dict kernel_name: str, default value is "floor_div" Returns ------- None """ # check dtype of input_x/input_y input_dtype_x = input_x.get("dtype").lower() input_dtype_y = input_y.get("dtype").lower() check_list = ('int8', 'uint8', 'int32', 'float16', 'float32') check_dtype(input_dtype_x, check_list, param_name="input_x") check_dtype(input_dtype_y, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if input_dtype_x != input_dtype_y: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'floor_div' error_info['param_name1'] = 'input_dtype_x' error_info['param_name2'] = 'input_dtype_y' error_info['param1_dtype'] = str(input_dtype_x) error_info['param2_dtype'] = str(input_dtype_y) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % ( error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([input_x, input_y], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) tensor_x = tvm.placeholder(x_shape, input_dtype_x, "tensor_x") tensor_y = tvm.placeholder(y_shape, input_dtype_y, "tensor_y") res = floor_div_compute(tensor_x, tensor_y, output_z, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def leaky_relu_grad(g, x, y, negative_slope=0, kernel_name="leaky_relu_grad"): """ calculate the backpropagation of leaky_relu operation y = gradients(x>0) or negative_slope*gradients(x<=0). support dtype:float16,float32 Parameters ---------- g : dict the backpropagated gradients to the corresponding leaky_relu operation x : dict the x passed as output of leaky_relu operation y : dict the output of leaky_relu back propagation negative_slope : float or int allow non-zero slope for negative inputs to speed up optimization kernel_name : str kernel name, default value is "leaky_relu_grad" Returns ------- None """ g_dtype = g.get("dtype").lower() x_dtype = x.get("dtype").lower() check_list = ("float16", "float32") check_dtype(g_dtype, check_list, param_name="input_g") check_dtype(x_dtype, check_list, param_name="input_x") check_elewise_shape_range([g, x], support_broadcast=True) if g_dtype != x_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "g", "x", g_dtype, x_dtype) ins = classify([g, x], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (g, x) in ins: with te.op.compute(): g_shape, x_shape = variable_shape([g, x], support_broadcast=True) g_shape, x_shape = refine_shapes_for_broadcast(g_shape, x_shape) tensor_g = tvm.placeholder(g_shape, g_dtype, "tensor_g") tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") res = leaky_relu_grad_compute(tensor_g, tensor_x, y, negative_slope, kernel_name) tensors.append((tensor_g, tensor_x, res)) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def real_div(x1, x2, y, kernel_name="real_div"): """ algorithm: real_div calculating data's real_div, c = a / b Parameters ---------- x1 : dict shape and dtype of first input, only support float16, float32, int32 x2 : dict shape and dtype of second input, only support float16, float32, int32 y: dict shape and dtype of output, should be broadcast shape and type as input kernel_name : str cce kernel name, default value is real_div Returns ------- None """ x_dtype = x1.get("dtype").lower() y_dtype = x2.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([x1, x2], support_broadcast=True) if x_dtype != y_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "x1", "x2", x_dtype, y_dtype) ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y") res = real_div_compute(tensor_x, tensor_y, y, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def sigmoid_grad(x, dx, out, kernel_name="sigmoid_grad"): """ do sigmoid grad sigmoid_grad = (sigmoid - sigmoid*sigmoid)*grad Parameters: ---------- x : dictionary shape of sigmoid input dx : dictionary shape of grad out: dictionary output kernel_name : cce kernel name, default value is "sigmoid_grad_cce" Returns ------- None """ x_dtype = x.get("dtype").lower() dx_dtype = dx.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(dx_dtype, check_list, param_name="input_dx") check_elewise_shape_range([x, dx], support_broadcast=False) if x_dtype != dx_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "x", "dx", x_dtype, dx_dtype) ins = classify([x, dx], Mode.ELEWISE) schedules, tensors = [], [] for (sig, dx) in ins: with te.op.compute(): shape_sig, shape_dx = variable_shape([sig, dx], support_broadcast=False) shape_sig, shape_dx = refine_shapes_for_broadcast( shape_sig, shape_dx) tensor_sig = tvm.placeholder(shape_sig, x_dtype, "tensor_x") tensor_dx = tvm.placeholder(shape_dx, dx_dtype, "tensor_dx") res = sigmoid_grad_compute(tensor_sig, tensor_dx, out, kernel_name) tensors.append([tensor_sig, tensor_dx, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def sqrt_grad(x, dx, out, kernel_name="sqrt_grad"): """ algorithm: sqrt_grad_cce Parameters ---------- x : dict of data: dict dx : dict of data_grad: dict out : dict of output: dict kernel_name : cce kernel name, default value is "sqrt_grad": str Returns ------- None """ x_dtype = x.get("dtype").lower() dx_dtype = dx.get("dtype").lower() check_list = ("float16", "float32") check_dtype(x_dtype, check_list, param_name="x") check_dtype(dx_dtype, check_list, param_name="dx") check_elewise_shape_range([x, dx], support_broadcast=False) if x_dtype != dx_dtype: error_manager_vector.raise_err_inputs_dtype_not_equal( kernel_name, "x", "dx", x_dtype, dx_dtype) ins = classify([x, dx], Mode.ELEWISE) schedules, tensors = [], [] for (x, dx) in ins: with te.op.compute(): x_shape, dx_shape = variable_shape([x, dx], support_broadcast=False) x_shape, dx_shape = refine_shapes_for_broadcast(x_shape, dx_shape) tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_dx = tvm.placeholder(dx_shape, dx_dtype, "tensor_dx") res = sqrt_grad_compute(tensor_x, tensor_dx, out, kernel_name) tensors.append([tensor_x, tensor_dx, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def less_equal(input_x, input_y, output_z, kernel_name="less_equal"): """ Returns the truth value of (x <= y) element-wise Parameters ---------- input_x: dict dict{"shape":tuple or list, "dtype":str, range: tuple or list}, shape, range, and dtype of first input, support float16,float32,int32,int8,uint8 input_y: dict dict{"shape":tuple or list, "dtype":str, range: tuple or list}, shape, range, and dtype of first input, support float16,float32,int32,int8,uint8 output_z: dict dict of output, should be broadcast shape and type as input kernel_name: str cce kernel name, default value is "less_equal" Returns ------- None """ # check input tensor data_type x_dtype = input_x.get("dtype").lower() y_dtype = input_y.get("dtype").lower() check_list = ("float16", "float32", "int32", "uint8", "int8") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if x_dtype != y_dtype: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'less_equal' error_info['param_name1'] = 'x_dtype' error_info['param_name2'] = 'y_dtype' error_info['param1_dtype'] = str(x_dtype) error_info['param2_dtype'] = str(y_dtype) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): # shape x_shape, y_shape = variable_shape([input_x, input_y], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) # less_equal compute tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y") res = less_equal_compute(tensor_x, tensor_y, output_z, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def maximum(x1, x2, y, kernel_name="maximum"): """ do element-wise maximum operation between two input tensors Parameters: ---------- x1 : dict first input dict, only support float16, float32, int32 x2 : dict second input dict, only support float16, float32, int32 y: dict output dict, should be the broadcast shape and type as input kernel_name : str cce kernel name, default value is maximum Returns ------- None """ # check input tensor data dtype check_list = ["float16", "float32", "int32"] dtype_x1 = x1.get("dtype").lower() dtype_x2 = x2.get("dtype").lower() check_dtype(dtype_x1, check_list, param_name="x1") check_dtype(dtype_x2, check_list, param_name="x2") check_elewise_shape_range([x1, x2], support_broadcast=True) if dtype_x1 != dtype_x2: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'maximum' error_info['param_name1'] = 'dtype_x1' error_info['param_name2'] = 'dtype_x2' error_info['param1_dtype'] = str(dtype_x1) error_info['param2_dtype'] = str(dtype_x2) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): shape_x1, shape_x2 = variable_shape([x1, x2], support_broadcast=True) shape_x1, shape_x2 = refine_shapes_for_broadcast( shape_x1, shape_x2) data1 = tvm.placeholder(shape_x1, dtype=dtype_x1, name="data1") data2 = tvm.placeholder(shape_x2, dtype=dtype_x2, name="data2") res = maximum_compute(data1, data2, y, kernel_name) tensors.append([data1, data2, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def floor_mod(x1, x2, y, kernel_name="floor_mod"): """ calculate the remainder of division, support fp16,fp32,int32 res = x1 -floor(input_data_x / input_data_y)* input_data_y Parameters ---------- x1: dict dict{"shape":tuple or list,"dtype":str, "range": tuple or list} shape of data the data type, src_dtype equals dst_dtype, support fp16,fp32,int32 x2: dict dict{"shape":tuple or list,"dtype":str, "range": tuple or list} shape of data the data type, src_dtype equals of dst_dtype, support fp16,fp32,int32 y: dict, reserved field dict with keys(shape, dtype and range) of output kernel_name: str cce kernel name, default value is "floor_mod" Returns ------ None """ # check input tensor data_type dtype_x = x1.get("dtype").lower() dtype_y = x2.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(dtype_x, check_list, param_name="x1") check_dtype(dtype_y, check_list, param_name="x2") check_elewise_shape_range([x1, x2], support_broadcast=True) if dtype_x != dtype_y: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'floor_mod' error_info['param_name1'] = 'dtype_x' error_info['param_name2'] = 'dtype_y' error_info['param1_dtype'] = str(dtype_x) error_info['param2_dtype'] = str(dtype_y) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % ( error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (x1, x2) in ins: with te.op.compute(): shape_x, shape_y = variable_shape([x1, x2], support_broadcast=True) shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y) input_data_x = tvm.placeholder(shape_x, name="input_data_x", dtype=dtype_x) input_data_y = tvm.placeholder(shape_y, name="input_data_y", dtype=dtype_y) res = floor_mod_compute(input_data_x, input_data_y, y, kernel_name) tensors.append([input_data_x, input_data_y, res]) with tvm.target.cce(): auto_sch = generic.auto_schedule(res) schedules.append(auto_sch) config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def mul(input1, input2, output, kernel_name="mul"): """ algorithm: mul calculating data's mul, c = a * b Parameters ---------- input1 : dict include ori_shape, shape, ori_format, format, dtype and range dtype only support float16, float32, int32 input2 : dict include ori_shape, shape, ori_format, format, dtype and range dtype only support float16, float32, int32 output: dict include ori_shape, shape, ori_format, format, dtype and range shape must be broadcast shape of input kernel_name : str cce kernel name, default value is mul Returns ------- None """ # check dtype dtype_x1 = input1.get("dtype").lower() dtype_x2 = input2.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(dtype_x1, check_list, param_name="input1") check_dtype(dtype_x2, check_list, param_name="input2") check_elewise_shape_range([input1, input1], support_broadcast=True) if dtype_x1 != dtype_x2: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'mul' error_info['param_name1'] = 'dtype_x1' error_info['param_name2'] = 'dtype_x2' error_info['param1_dtype'] = str(dtype_x1) error_info['param2_dtype'] = str(dtype_x2) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % ( error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input1, input2], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input1, input2) in ins: with te.op.compute(): # shape shape_x1, shape_x2 = variable_shape([input1, input2], support_broadcast=True) shape_x1, shape_x2 = refine_shapes_for_broadcast(shape_x1, shape_x2) # mul_compute data_x1 = tvm.placeholder(shape_x1, dtype=dtype_x1, name="data_x1") data_x2 = tvm.placeholder(shape_x2, dtype=dtype_x2, name="data_x2") res = mul_compute(data_x1, data_x2, output, kernel_name) tensors.append((data_x1, data_x2, res)) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def div(input_x, input_y, output_z, kernel_name="div"): """ algorithm: div calculating data's div, res =x / yq Parameters ---------- input_x: dict dict with keys(shape and dtype) of input_x input_y: dict dict with keys(shape and dtype) of input_y output_div: dict dict with keys(shape and dtype) of output kernel_name: str kernel name, default value is "div" Returns ------- None """ # check dtype x_dtype = input_x.get("dtype").lower() y_dtype = input_y.get("dtype").lower() check_list = ("float16", "float32", "int8", "uint8", "int32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if x_dtype != y_dtype: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'div' error_info['param_name1'] = 'x_dtype' error_info['param_name2'] = 'y_dtype' error_info['param1_dtype'] = str(x_dtype) error_info['param2_dtype'] = str(y_dtype) raise RuntimeError( error_info, "In op[%s], the parameter[%s][%s] are not equal in " "dtype with dtype[%s][%s]." % (error_info['op_name'], error_info['param_name1'], error_info['param_name2'], error_info['param1_dtype'], error_info['param2_dtype'])) ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): x_shape, y_shape = variable_shape([input_x, input_y], support_broadcast=True) x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape) tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x") tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y") res = div_compute(tensor_x, tensor_y, output_z, kernel_name) tensors.append([tensor_x, tensor_y, res]) with tvm.target.cce(): sch = generic.auto_schedule(res) schedules.append(sch) # build config = {"name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)
def add(input_x, input_y, output_z, kernel_name="add"): """ algorithm: add calculating data's add, c = a + b Parameters ---------- input_x : dict including shape, dtype and range, only support float16, float32, int32 input_y : dict including shape, dtype and range, only support float16, float32, int32 output_z: dict shape should be broadcast shape of input, and type equals to input kernel_name : str cce kernel name, default value is add Returns ------- None """ # check input tensor data_type x_dtype = input_x.get("dtype").lower() y_dtype = input_y.get("dtype").lower() check_list = ("float16", "float32", "int32") check_dtype(x_dtype, check_list, param_name="input_x") check_dtype(y_dtype, check_list, param_name="input_y") check_elewise_shape_range([input_x, input_y], support_broadcast=True) if x_dtype != y_dtype: error_info = {} error_info['errCode'] = OP_ERROR_CODE_018 error_info['op_name'] = 'add' error_info['param_name1'] = 'x_dtype' error_info['param_name2'] = 'y_dtype' error_info['param1_dtype'] = str(x_dtype) error_info['param2_dtype'] = str(y_dtype) raise RuntimeError(error_info, "In op[%s], the parameter[%s][%s] are not equal in" "dtype with dtype[%s][%s]" % (error_info['op_name'], error_info[ 'param_name1'], error_info[ 'param_name2'], error_info[ 'param1_dtype'], error_info[ 'param2_dtype'])) # format_pattern = 1 Nz and vector # format_pattern = 2 vector and Nz # format_pattern = 0 Nz scalar Nz Nz ND ND format_pattern = _add_check_format(input_x, input_y) # infer shape for supporting add shape_x, shape_y = _infer_shape(format_pattern, input_x, input_y) shape_x = scalar2tensor_one(shape_x) shape_y = scalar2tensor_one(shape_y) # normalize shape input_x["shape"] = shape_x input_y["shape"] = shape_y ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST) schedules, tensors = [], [] for (input_x, input_y) in ins: with te.op.compute(): shape_x, shape_y = variable_shape([input_x, input_y], support_broadcast=True) shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y) data_x = tvm.placeholder(shape_x, name="data_1", dtype=x_dtype) data_y = tvm.placeholder(shape_y, name="data_2", dtype=y_dtype) res = add_compute(data_x, data_y, output_z, kernel_name) tensors.append((data_x, data_y, res)) with tvm.target.cce(): schedule = generic.auto_schedule(res) schedules.append(schedule) config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors} te.lang.dynamic.build(schedules, config)