def fills_compute(x, value, dtype, kernel_name="fills"): """ calculating data Parameters ---------- x : TVM tensor the placeholder of input value : a number of float or int dtype : string the type of input kernel_name : str kernel name, default value is "fills" Returns ------- res: TVM tensor the calculation results """ res = te.lang.cce.broadcast(tvm.const(value, dtype=dtype), x.shape) with tvm.tag_scope("elewise_binary_phony"): res = te.tvm.compute(res.shape, lambda *indices: res[indices] + x[indices], name="elewise_binary_phony_output") return res
def fill_v2_compute(data_x, x1, x2, y, kernel_name="fill_v2_d"): # broadcast res = te.lang.cce.broadcast(tvm.const(x1), x2) with tvm.tag_scope("elewise_binary_phony"): res = te.tvm.compute(res.shape, lambda *indices: res[indices] + data_x[indices], name="elewise_binary_phony_output") return res
def _mul_compute_ex(input_x, input_y, shape_x, shape_y, shape_max): if shape_x == shape_max: small_input = input_y large_input = input_x elif shape_y == shape_max: small_input = input_x large_input = input_y else: return None small_index = [] small_shape = 1 for i in range(len(small_input.shape)): if int(small_input.shape[i]) < int(shape_max[i]): small_index.append(i) small_shape *= shape_max[i] elif int(small_input.shape[i]) == int(shape_max[i]): pass else: return None if small_shape < 10000: return None if int(small_input.shape[-1]) != 1: return None def get_tensor_slice(inp, small_index, is_large, *shapes): def get_index(inp_tensor, index): return inp_tensor[index] if is_large: for axis in shapes: inp = get_index(inp, axis) else: for ind, _ in enumerate(shapes): if ind in small_index: inp = get_index(inp, 0) else: inp = get_index(inp, shapes[ind]) return inp with tvm.tag_scope("elewise_binary_mul"): res = tvm.compute( shape_max, lambda *indices: get_tensor_slice( large_input, small_index, True, *indices) * get_tensor_slice( small_input, small_index, False, *indices), name="manual_mul_without_broadcast_" + str(te.lang.cce.te_compute.elewise_compute.NAME_INDEX[0])) te.lang.cce.te_compute.elewise_compute.NAME_INDEX[0] += 1 return res
def zeros_like_compute(x, y, kernel_name="zeros_like"): """ Enter a tensor, output a tensor of all zero, you can specify the output data type Parameters ---------- x: TVM tensor the placeholder of input data y: TVM tensor the placeholder of output data kernel_name : str cce kernel name, default value is "zeros_like" Returns ------- res: TVM tensor the result of zeros_like_compute """ src_dtype = x.dtype.lower() dst_type = src_dtype src_type_list = ("int8", "uint8") dst_type_list = ("int8", "uint8") if src_dtype in src_type_list: src_dtype = "float16" zero = tvm.const(0, dtype=src_dtype) zero_src = te.lang.cce.broadcast(zero, x.shape) if src_dtype in dst_type_list: zero_src = te.lang.cce.cast_to(zero_src, dst_type, f1628IntegerFlag=True) else: zero_src = te.lang.cce.cast_to(zero_src, dst_type) with tvm.tag_scope("elewise_binary_phony"): res = te.tvm.compute(x.shape, lambda *indices: zero_src[indices] + x[indices], name="elewise_binary_phony_output") return res
def ones_like_compute(input_x, output_y, kernel_name="ones_like"): """ Given a tensor, this operation returns a tensor of the same type and shape as `tensor` with all elements set to 1. Parameters ---------- input_x: TVM tensor the placeholder of input data output_y: TVM tensor the placeholder of output data kernel_name : str cce kernel name, default value is "ones_like" Returns ------- res: TVM tensor the result of ones_like_compute """ src_dtype = input_x.dtype.lower() dst_type = src_dtype src_type_list = ("int8", "uint8") dst_type_list = ("int8", "uint8") if src_dtype in src_type_list: src_dtype = "float16" one = tvm.const(1, dtype=src_dtype) one_src = te.lang.cce.broadcast(one, input_x.shape) if src_dtype in dst_type_list: one_src = te.lang.cce.cast_to(one_src, dst_type, f1628IntegerFlag=True) else: one_src = te.lang.cce.cast_to(one_src, dst_type) with tvm.tag_scope("elewise_binary_phony"): res = te.tvm.compute( input_x.shape, lambda *indices: one_src[indices] + input_x[indices], name="elewise_binary_phony_output") return res
def softmax_cross_entropy_with_logits_compute_ex(input_features, input_labels): """ Computes softmax cross entropy cost. softmax = e^(x-max) / ∑(e^(x-max)) log(softmax) = (x-max) - log(∑e^(x-max)) cross_entropy = -∑(y * log(softmax)) Parameters # ---------- input_features: TVM tensor input tensor contains shape and dtype attributes. source data type support "float16", "float32". input_labels: TVM tensor input tensor contains shape and dtype attributes. Must have the same type as 'input_features'. output_loss: dict data of output. Must have the same type as 'input_features'. output_backprop: dict data of output. Must have the same type as 'input_features'. kernel_name: str kernel name, default value is "softmax_cross_entropy_with_logits" Returns: res: TVM tensor output tensor. Has the same type as "input_features". """ shape_features = te.lang.cce.util.shape_to_list(input_features.shape) shape_labels = te.lang.cce.util.shape_to_list(input_labels.shape) dtype = input_features.dtype.lower() if list(shape_features) != list(shape_labels): shape_features, shape_labels, shape_broadcast = \ broadcast_shapes(shape_features, shape_labels, param_name_input1="input_features", param_name_input2="input_labels") input_features = te.lang.cce.broadcast(input_features, shape_broadcast, dtype) input_labels = te.lang.cce.broadcast(input_labels, shape_broadcast, dtype) else: shape_broadcast = shape_features if dtype == "float16": input_features = te.lang.cce.cast_to(input_features, "float32") input_labels = te.lang.cce.cast_to(input_labels, "float32") with tvm.tag_scope("last_axis_reduce_max"): reduce_axis = tvm.reduce_axis((0, shape_broadcast[1]), name="rax0") data_max = tvm.compute( (shape_broadcast[0], 1), lambda upper, lower: tvm.max(input_features[upper, reduce_axis], axis=reduce_axis), name="last_axis_reduce_max") with tvm.tag_scope("elewise_binary_sub_scalar_L1"): data_sub = tvm.compute(input_features.shape, lambda higher, lower: input_features[higher][ lower] - data_max[higher][0], name="manual_sub_0") data_exp = te.lang.cce.vexp(data_sub) data_sum = te.lang.cce.sum(data_exp, axis=-1, keepdims=True) with tvm.tag_scope("elewise_binary_div"): data_div = tvm.compute(data_exp.shape, lambda higher, lower: data_exp[higher][lower] / data_sum[higher][0], name="manual_div_0") data_log_tmp = te.lang.cce.vlog(data_sum) with tvm.tag_scope("elewise_get_L1_workspace"): fake_buffer = tvm.compute( data_sub.shape, lambda higher, lower: tvm.const(0, "float32"), name="get_L1_workspace") with tvm.tag_scope("elewise_binary_sub"): data_log = tvm.compute(data_sub.shape, lambda higher, lower: fake_buffer[higher][lower] - data_log_tmp[higher][0], name="manual_sub_1") data_mul = te.lang.cce.vmul(input_labels, data_log) with tvm.tag_scope("last_axis_reduce_sum_reuse"): reduce_axis = tvm.reduce_axis((0, shape_broadcast[1]), name="rax1") loss = tvm.compute( (shape_broadcast[0], 1), lambda upper, lower: tvm.sum(data_mul[upper, reduce_axis], axis=reduce_axis), name="last_axis_reduce_sum_reuse") loss = te.lang.cce.vmuls(loss, SCALAR_MINUS_ONE) backprop = te.lang.cce.vsub(data_div, input_labels) if dtype == "float16": loss = te.lang.cce.cast_to(loss, "float16") backprop = te.lang.cce.cast_to(backprop, "float16") res = [loss, backprop] return res