def lambda_func(*indice): deq_indice = [0] * 5 x1_indice = [0] * 5 x1_indice[4] = indice[c0_index] x1_indice[1] = indice[c1_index] if tensor_flag: deq_indice[4] = indice[c0_index] deq_indice[1] = indice[c1_index] if x1 is not None: if tensor_flag: func = tvm.vdeq_cast(x0(*indice), deq_scale(*deq_indice), "int16", do_relu=relu_flag) + x1(*x1_indice) else: func = tvm.deq_cast(x0(*indice), deq_scale(*deq_indice), "int16") + x1(*x1_indice) else: if tensor_flag: func = tvm.vdeq_cast(x0(*indice), deq_scale(*deq_indice), "int16", do_relu=relu_flag) else: func = tvm.deq_cast(x0(*indice), deq_scale(*deq_indice), "int16") return func
def _dequant_v200_v2(x_l0c, deq_ub, align_shape, x_shape, relu_flag, tensor_flag): """ dequant for vector in v200 """ if tensor_flag: res_f16 = tvm.compute( align_shape, lambda i, j, k, l: tvm.vdeq_cast(x_l0c(i, j, k, l), deq_ub(0, j, 0, l), dtype="float16", do_relu=relu_flag), name='dequant_to_fp16', tag="dequant_vector") else: res_f16 = tvm.compute( align_shape, lambda i, j, k, l: tvm.deq_cast( x_l0c(i, j, k, l), deq_ub(0, 0, 0, 0), dtype="float16"), name='dequant_to_fp16', tag="dequant_scale") is_scalar = 1 if tensor_flag: is_scalar = 0 res = tvm.compute(x_shape, lambda *indice: res_f16(*indice), name='res', tag="dequant_res", attrs={'is_scalar': is_scalar}) return res
def lambda_func(*indice): new_indice = [0] * 5 if tensor_flag: new_indice[4] = indice[c0_index] new_indice[1] = indice[c1_index] if tensor_flag: return tvm.select( indice[c1_index] < x_shape_list[c1_index], tvm.vdeq_cast(x(*indice), req_scale(*new_indice), "int8", do_relu=relu_flag), tvm.const(0, dtype="int8")) return tvm.select( indice[c1_index] < x_shape_list[c1_index], tvm.deq_cast(x(*indice), req_scale(*new_indice), "int8"), tvm.const(0, dtype="int8"))
def lambda_func(*indice): new_indice = [0] * 5 if tensor_flag: new_indice[4] = indice[c0_index] new_indice[1] = indice[c1_index] if is_v200_flag: if tensor_flag: func = tvm.vdeq_cast(x(*indice), deq_scale(*new_indice), dtype="float16", do_relu=relu_flag) else: func = tvm.deq_cast(x(*indice), deq_scale(*new_indice), dtype="float16") else: func = x(*indice).astype("float16") * deq_scale(*new_indice) return func
def _scalar_dequant_v200(x, x_shape, align_shape, deq_scale): """ dequant for scale in v200 """ res_f16 = tvm.compute( align_shape, lambda i, j, k, l: tvm.deq_cast( x(i, j, k, l), deq_scale(0, 0, 0, 0, 0), dtype="float16"), name='dequant', tag="dequant_scale") res = tvm.compute(x_shape, lambda *indice: res_f16(*indice), name='dequant_remove_pad', tag="dequant_remove_pad") return res
def _scalar_depthwise_fused_v200(x, x_shape, align_shape, deq_scale, relu_flag): """ depthwise dequant for vector in v200 """ res_f16 = tvm.compute( align_shape, lambda i, j, a, k, l: tvm.deq_cast(x(i, j // 2, j % 2, k, l), deq_scale(0, 0, 0, 0, 0), dtype="float16"), name='dequant1', tag="dequant1_scale") align_shape[3] = x_shape[3].value res = tvm.compute(align_shape, lambda *indice: res_f16(*indice), name='dequant_remove_pad', tag="dequant_remove_pad", attrs={"sqrt_flag": 0}) return res
def ascend_requant_compute(x, req_scale, y, relu_flag=False, kernel_name='ascend_requant'): """ int32 -> int8 Parameters: ---------- x : the placeholder of input req_scale: the placeholder of requant num y : the dict of output. relu_flag : the relu mode when true the result to do relu kernel_name : cce kernel name, default value is "ascend_requant" Returns: res : the result of ascend_requant ------- None """ x_shape = x.shape x_shape_list = te.lang.cce.util.shape_to_list(x_shape) align_shape = x_shape_list.copy() # the tensor is a constant or vector based on the original shape ori_shape_req = req_scale.op.attrs['ori_shape'] ori_shape_req_list = te.lang.cce.util.shape_to_list(ori_shape_req) req_dim = function_reduce(lambda x, y: x * y, ori_shape_req_list[:]) tensor_flag = False if req_dim > 1: tensor_flag = True c1_index = 1 if _is_nz_format(x): c1_index = len(x_shape) - 4 if x.op.tag == "depthwise_conv2d": align_shape[4] = 16 align_shape[3] = (x_shape_list[3] + 15) // 16 * 16 align_shape[2] = 1 if tensor_flag: align_shape[1] = (x_shape_list[1] * x_shape_list[2] * 16 + 31) \ // 32 * 32 // 16 else: align_shape[1] = x_shape_list[1] * x_shape_list[2] align_shape[0] = x_shape_list[0] if tensor_flag: res_ub = tvm.compute( align_shape, lambda i, j, a, k, l: tvm.vdeq_cast(x(i, j // 2, j % 2, k, l), req_scale(0, j, 0, 0, l), "int8", do_relu=relu_flag), name='s32_to_s8', tag="requant_vector") else: res_ub = tvm.compute( align_shape, lambda i, j, a, k, l: tvm.deq_cast(x( i, j // 2, j % 2, k, l), req_scale(0, 0, 0, 0, 0), "int8"), name='s32_to_s8', tag="requant_scale") else: align_shape[c1_index] = (align_shape[c1_index] + 1) // 2 * 2 align_shape[-2] = (align_shape[-2] + 15) // 16 * 16 res_ub = _s32_to_s8_normal_compute(x, req_scale, align_shape, c1_index, tensor_flag, relu_flag) if _is_nz_format(x): res = _format_transfer_nz(align_shape, res_ub, c1_index) return res res_ub_reform = _format_transfer(align_shape, res_ub, c1_index) res_shape = te.lang.cce.util.shape_to_list(res_ub_reform.shape) res_shape[-2] = x.shape[-2] res = tvm.compute(res_shape, lambda *indice: res_ub_reform(*indice), name='requant_remove_pad', tag="requant_remove_pad") return res