Exemple #1
0
    def __init__(self, var, indices, updates, var_out, use_locking,
                 kernel_name):
        self.tik_instance = tik.Tik(tik.Dprofile())
        self.var_dtype = var.get("dtype").lower()
        self.indices_dtype = indices.get("dtype").lower()
        self.updates_dtype = updates.get("dtype").lower()
        self.out_dtype = var_out.get("dtype").lower()
        indices_support_dtype_list = ("int32", )
        var_support_dtype_list = ("float32", )
        check_dtype(self.indices_dtype,
                    indices_support_dtype_list,
                    param_name="indices")
        check_dtype(self.var_dtype, var_support_dtype_list, param_name="var")
        if self.var_dtype != self.updates_dtype:
            error_manager_vector.raise_err_inputs_dtype_not_equal(
                kernel_name, "updates", "var", self.updates_dtype,
                self.var_dtype)
        if self.var_dtype != self.out_dtype:
            error_manager_vector.raise_err_inputs_dtype_not_equal(
                kernel_name, "out", "var", self.out_dtype, self.var_dtype)
        self.kernel_name = kernel_name

        self.ai_core_num = tbe_platform.cce_conf.get_soc_spec(
            tbe_platform.cce_conf.CORE_NUM)
        self.ub_size_bytes = (
            tbe_platform.cce_conf.get_soc_spec(tbe_platform.cce_conf.UB_SIZE) -
            RESERVED_UB_SIZE)
        self.var_dtype_bytes_size = tbe_platform.cce_intrin.get_bit_len(
            self.var_dtype) // 8
        self.indices_dtype_bytes_size = tbe_platform.cce_intrin.get_bit_len(
            self.indices_dtype) // 8
        self.var_data_each_block = 32 // self.var_dtype_bytes_size
        self.indices_data_each_block = 32 // self.indices_dtype_bytes_size

        self.tiling_gm = self.tik_instance.Tensor("int32", (TILING_ARG_NUM, ),
                                                  name="tiling_gm",
                                                  scope=tik.scope_gm)
        self.var_gm = self.tik_instance.Tensor(self.var_dtype, (MAX_INT32, ),
                                               name="var_gm",
                                               scope=tik.scope_gm)
        self.indices_gm = self.tik_instance.Tensor(self.indices_dtype,
                                                   (MAX_INT32, ),
                                                   name="indices_gm",
                                                   scope=tik.scope_gm)
        self.updates_gm = self.tik_instance.Tensor(self.updates_dtype,
                                                   (MAX_INT32, ),
                                                   name="updates_gm",
                                                   scope=tik.scope_gm)
        self.out_gm = self.tik_instance.Tensor(self.var_dtype, (MAX_INT32, ),
                                               name="out_gm",
                                               scope=tik.scope_gm)

        self.updates_ub = None
        self.indices_ub = None
        self.var_read_index = None
        self.updates_read_index = None
        self.indices_loop_index = None
Exemple #2
0
    def __init__(self, var, indices, updates, var_out, use_locking,
                 kernel_name):
        self.tik_instance = tik.Tik(tik.Dprofile())
        self.indicesdtype = indices.get("dtype").lower()
        self.updatesdtype = updates.get("dtype").lower()
        self.vardtype = var.get("dtype").lower()
        self.var_out_dtype = var_out.get("dtype").lower()
        indices_support_dtype_list = ("int32", )
        check_dtype(self.indicesdtype,
                    indices_support_dtype_list,
                    param_name="indices")
        updates_support_dtype_list = ("float32", )
        check_dtype(self.updatesdtype,
                    updates_support_dtype_list,
                    param_name="updates")
        self.tiling_dtype = "int32"
        if self.updatesdtype != self.vardtype:
            error_manager_vector.raise_err_inputs_dtype_not_equal(
                kernel_name, "updates", "var", self.updatesdtype,
                self.vardtype)
        if self.vardtype != self.var_out_dtype:
            error_manager_vector.raise_err_inputs_dtype_not_equal(
                kernel_name, "var_out", "var", self.var_out_dtype,
                self.vardtype)
        self.kernel_name = kernel_name
        self.var_read_index = self.tik_instance.Scalar("int32")
        self.updates_read_index = self.tik_instance.Scalar("int32")
        self.indices_loop_index = self.tik_instance.Scalar("int32")
        self.zero_var = self.tik_instance.Scalar(dtype=self.updatesdtype,
                                                 name="zero_var")
        self.zero_var.set_as(0)
        self.indices_ub = None
        self.updates_ub = None
        self.core_num = self._tik_get_core_num()
        self.ub_size = self._tik_get_ub_size()

        self.tiling_gm = self.tik_instance.Tensor(self.tiling_dtype, (32, ),
                                                  name="tiling_gm",
                                                  scope=tik.scope_gm)
        self.input_var = self.tik_instance.Tensor(self.updatesdtype,
                                                  (MAX_ZERO_DIM_VAR, ),
                                                  name="input_var",
                                                  scope=tik.scope_gm)
        self.input_indices = self.tik_instance.Tensor(self.indicesdtype,
                                                      (MAX_ZERO_DIM_INDICE, ),
                                                      name="input_indices",
                                                      scope=tik.scope_gm)
        self.input_updates = self.tik_instance.Tensor(self.updatesdtype,
                                                      (MAX_ZERO_DIM_INDICE, ),
                                                      name="input_updates",
                                                      scope=tik.scope_gm)
        self.output_var = self.tik_instance.Tensor(self.updatesdtype,
                                                   (MAX_ZERO_DIM_VAR, ),
                                                   name="output_var",
                                                   scope=tik.scope_gm)
Exemple #3
0
def __check_params(input_values, axis):
    _check_shape(input_values, "shape")
    _check_shape(input_values, "ori_shape")

    dim_num = len(input_values[0].get("ori_shape"))

    if axis >= dim_num or axis < -dim_num:
        error_manager.raise_err_input_value_invalid(
            "concat", "concat_dim",
            "between " + str(min(-dim_num, dim_num - 1)) + " and " +
            str(max(-dim_num, dim_num - 1)), axis)

    shape_value = []
    for _, tensor_dict in enumerate(input_values):
        shape_value.append(tensor_dict.get("ori_shape"))
    first_input_shape = input_values[0].get("ori_shape")

    # dims must equal except merge axis
    axis_new = axis % dim_num
    for j, _ in enumerate(first_input_shape):
        if j == axis_new:
            continue

        dim_values = set()
        for _, element_shape in enumerate(shape_value):
            dim_values.add(element_shape[j])

        if -1 in dim_values:
            dim_values.remove(-1)

        if len(dim_values) > 1:
            error_manager.raise_err_check_params_rules(
                "concat",
                "Dims must be equal except merge concat axis[%s]" % axis,
                "input_values", shape_value)

    dtype_lists = []
    for input_value in input_values:
        input_format = input_value.get("format")
        dtype_lists.append(input_value.get("dtype"))
        supported_formats = {"ND", "NHWC", "NCHW"}
        if input_format not in supported_formats:
            error_manager.raise_err_input_format_invalid(
                'concat', 'input_values', ','.join(supported_formats),
                input_format)

    dtype = dtype_lists[0]
    for index, dtype_ in enumerate(dtype_lists):
        if dtype != dtype_:
            error_manager.raise_err_inputs_dtype_not_equal(
                "concat", "input_values[0]", "input_values[%s]" % index, dtype,
                dtype_)
Exemple #4
0
 def __init__(self, indices, x, shape, y, kernel_name):
     self.indices_dtype = indices.get("dtype").lower()
     self.updates_dtype = x.get("dtype").lower()
     self.shape_dtype = shape.get("dtype").lower()
     self.y_dtype = y.get("dtype").lower()
     indices_support_dtype_list = ("int32", )
     check_dtype(self.indices_dtype, indices_support_dtype_list, param_name="indices")
     updates_support_dtype_list = ("float32", )
     check_dtype(self.updates_dtype, updates_support_dtype_list, param_name="updates")
     shape_support_dtype_list = ("int32", )
     check_dtype(self.shape_dtype, shape_support_dtype_list, param_name="shape")
     if self.y_dtype != self.updates_dtype:
         error_manager_vector.raise_err_inputs_dtype_not_equal(kernel_name, "y", "x",
                                                               self.y_dtype, self.updates_dtype)
     self.tiling_dtype = "int32"
     self.tik_instance = tik.Tik(tik.Dprofile())
     self.kernel_name = kernel_name
     self.core_start = self.tik_instance.Scalar("int32")
     self.core_end = self.tik_instance.Scalar("int32")
     self.var_read_index = self.tik_instance.Scalar("int32")
     self.updates_read_index = self.tik_instance.Scalar("int32")
     self.indices_var = self.tik_instance.Scalar("int32")
     self.block_idx = self.tik_instance.Scalar("int32")
     self.zero_var = self.tik_instance.Scalar(self.updates_dtype)
     self.zero_var.set_as(0)
     self.var_ub = None
     self.indices_ub = None
     self.updates_ub = None
     self.shape_ub = None
     self.updates_ub_one = None
     self.indices_ub_one = None
     self.cur_var = self.tik_instance.Scalar(dtype=self.updates_dtype)
     self.cur_update = self.tik_instance.Scalar(dtype=self.updates_dtype)
     self.acc_var = self.tik_instance.Scalar(dtype=self.updates_dtype)
     self.updates_var = self.tik_instance.Scalar(dtype=self.updates_dtype)
     self.aicore_num = self._tik_get_core_num()
     self.ub_size = self._tik_get_ub_size()
     self.tbe_product = self._tik_get_platform()
     self.tiling_gm = self.tik_instance.Tensor(self.tiling_dtype, (32,), name="tiling_gm", scope=tik.scope_gm)
     self.input_indices = self.tik_instance.Tensor(self.indices_dtype, (MAX_INPUT_SIZE, ), name="input_indices",
                                                   scope=tik.scope_gm)
     self.input_updates = self.tik_instance.Tensor(self.updates_dtype, (MAX_INPUT_SIZE, ), name="input_updates",
                                                   scope=tik.scope_gm)
     self.input_shape = self.tik_instance.Tensor(self.indices_dtype, (MAX_SHAPE, ), name="input_shape",
                                                 scope=tik.scope_gm)
     #check platform
     if self.updates_dtype == "float32" and self.tbe_product in ("Ascend910", "Ascend610"):
         self.output_var = self.tik_instance.Tensor(self.updates_dtype, (MAX_SHAPE, ), name="output_var",
                                                    scope=tik.scope_gm, is_atomic_add=True)
     else:
         self.output_var = self.tik_instance.Tensor(self.updates_dtype, (MAX_SHAPE, ), name="output_var",
                                                    scope=tik.scope_gm)
Exemple #5
0
    def check_input_params(self):
        """
        to the check whether the input parameters is valid or not
        """
        if self.input_dtype != self.output_dtype:
            error_manager_vector.raise_err_inputs_dtype_not_equal(
                "split_d", "self.input_dtype", "self.output_dtype",
                self.input_dtype, self.output_dtype)

        dtype_list = (
            "float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64"
        )
        check_dtype(self.input_dtype, dtype_list, param_name="x")
Exemple #6
0
def leaky_relu_grad(g, x, y, negative_slope=0, kernel_name="leaky_relu_grad"):
    """
    calculate the backpropagation of leaky_relu operation
    y = gradients(x>0) or negative_slope*gradients(x<=0).
    support dtype:float16,float32

    Parameters
    ----------
    g : dict
        the backpropagated gradients to the corresponding leaky_relu operation
    x : dict
        the x passed as output of leaky_relu operation
    y : dict
        the output of leaky_relu back propagation
    negative_slope : float or int
        allow non-zero slope for negative inputs to speed up optimization
    kernel_name : str
        kernel name, default value is "leaky_relu_grad"

    Returns
    -------
    None
    """
    g_dtype = g.get("dtype").lower()
    x_dtype = x.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(g_dtype, check_list, param_name="input_g")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_elewise_shape_range([g, x], support_broadcast=True)
    if g_dtype != x_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "g", "x", g_dtype, x_dtype)
    ins = classify([g, x], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (g, x) in ins:
        with te.op.compute():
            g_shape, x_shape = variable_shape([g, x], support_broadcast=True)
            g_shape, x_shape = refine_shapes_for_broadcast(g_shape, x_shape)
            tensor_g = tvm.placeholder(g_shape, g_dtype, "tensor_g")
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            res = leaky_relu_grad_compute(tensor_g, tensor_x, y,
                                          negative_slope, kernel_name)
            tensors.append((tensor_g, tensor_x, res))
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Exemple #7
0
def real_div(x1, x2, y, kernel_name="real_div"):
    """
    algorithm: real_div
    calculating data's real_div, c = a / b

    Parameters
    ----------
    x1 : dict
        shape and dtype of first input, only support float16, float32, int32
    x2 : dict
        shape and dtype of second input, only support float16, float32, int32
    y: dict
        shape and dtype of output, should be broadcast shape and type as input
    kernel_name : str
        cce kernel name, default value is real_div

    Returns
    -------
    None
    """

    x_dtype = x1.get("dtype").lower()
    y_dtype = x2.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(y_dtype, check_list, param_name="input_y")
    check_elewise_shape_range([x1, x2], support_broadcast=True)
    if x_dtype != y_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x1", "x2", x_dtype, y_dtype)
    ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y")
            res = real_div_compute(tensor_x, tensor_y, y, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Exemple #8
0
def sigmoid_grad(x, dx, out, kernel_name="sigmoid_grad"):
    """
    do sigmoid grad

    sigmoid_grad = (sigmoid - sigmoid*sigmoid)*grad

    Parameters:
    ----------
    x : dictionary shape of sigmoid input

    dx : dictionary shape of grad

    out: dictionary output

    kernel_name : cce kernel name, default value is "sigmoid_grad_cce"

    Returns
    -------
    None
    """
    x_dtype = x.get("dtype").lower()
    dx_dtype = dx.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(dx_dtype, check_list, param_name="input_dx")
    check_elewise_shape_range([x, dx], support_broadcast=False)
    if x_dtype != dx_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x", "dx", x_dtype, dx_dtype)
    ins = classify([x, dx], Mode.ELEWISE)
    schedules, tensors = [], []
    for (sig, dx) in ins:
        with te.op.compute():
            shape_sig, shape_dx = variable_shape([sig, dx],
                                                 support_broadcast=False)
            shape_sig, shape_dx = refine_shapes_for_broadcast(
                shape_sig, shape_dx)
            tensor_sig = tvm.placeholder(shape_sig, x_dtype, "tensor_x")
            tensor_dx = tvm.placeholder(shape_dx, dx_dtype, "tensor_dx")
            res = sigmoid_grad_compute(tensor_sig, tensor_dx, out, kernel_name)
            tensors.append([tensor_sig, tensor_dx, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Exemple #9
0
def pad_d(input_x, output_x, paddings, kernel_name="pad_d"):
    """ calculating pad tensor by paddings parameters

    Parameters
    ----------
    input_x : dict
        shape and dtype of input
    output_x: dict
        shape and dtype of output
    paddings: list or tuple.
        For each dimension D of input, paddings[D, 0] indicates how many
        values to add
        before the contents of tensor in that dimension, and paddings[D, 1]
        indicates
        how many values to add after the contents of tensor in that dimension.
    kernel_name : str
        cce kernel name, default value is "pad_d"

    Returns
    -------
    None.
    """
    in_shape = list(input_x.get("shape"))
    pads = []
    for i in paddings:
        pads.append(list(i))
    src_dtype = input_x.get("dtype").lower()
    dst_dtype = output_x.get("dtype").lower()

    if len(in_shape) != len(pads):
        error_detail = "Length of input must be as same as paddings"
        error_manager_vector.raise_err_two_input_shpae_invalid(
            "PadD", "input_x", "paddings", error_detail)

    if src_dtype != dst_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            "PadD", "src_dtype", "dst_dtype", src_dtype, dst_dtype)

    if src_dtype not in ["float32", "float16", "int32"]:
        error_detail = "Only support float, float16 and int32"
        error_manager_vector.raise_err_two_input_dtype_invalid(
            "PadD", "src_dtype", "dst_dtype", error_detail)

    tik_obj = tik.Tik()
    pad = pad_common.PadInit(pads, src_dtype, kernel_name, tik_obj, True)
    return pad_compute(pad)
Exemple #10
0
def sqrt_grad(x, dx, out, kernel_name="sqrt_grad"):
    """
    algorithm: sqrt_grad_cce

    Parameters
    ----------
    x : dict of data: dict

    dx : dict of data_grad: dict

    out : dict of output: dict

    kernel_name : cce kernel name, default value is "sqrt_grad": str

    Returns
    -------
    None

    """
    x_dtype = x.get("dtype").lower()
    dx_dtype = dx.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="x")
    check_dtype(dx_dtype, check_list, param_name="dx")
    check_elewise_shape_range([x, dx], support_broadcast=False)
    if x_dtype != dx_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x", "dx", x_dtype, dx_dtype)
    ins = classify([x, dx], Mode.ELEWISE)
    schedules, tensors = [], []
    for (x, dx) in ins:
        with te.op.compute():
            x_shape, dx_shape = variable_shape([x, dx],
                                               support_broadcast=False)
            x_shape, dx_shape = refine_shapes_for_broadcast(x_shape, dx_shape)
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_dx = tvm.placeholder(dx_shape, dx_dtype, "tensor_dx")
            res = sqrt_grad_compute(tensor_x, tensor_dx, out, kernel_name)
            tensors.append([tensor_x, tensor_dx, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Exemple #11
0
    def __init__(self, params_dict, indices_dict, axis_dict, y_dict, kernel_name):
        """
        constructor of GatherV2

        Parameters
        ----------
        params_dict: dict
            shape and dtype of input params
        indices_dict: dict
            shape and dtype of input indices
        axis_dict: dict
            shape and dtype of input axis
        y_dict: dict
            shape and dtype of output, should be same dtype as input
        kernel_name: str
            kernel name, default value is "GatherV2"

        Returns
        -------
        None
        """
        self.params_dtype = params_dict.get("dtype").lower()
        self.indices_dtype = indices_dict.get("dtype").lower()
        self.axis_dtype = axis_dict.get("dtype").lower()
        self.y_dtype = y_dict.get("dtype").lower()
        self.tiling_dtype = INT32
        dtype_list = ("int8", "int16", "int32", "int64", "uint8", "uint16",
                      "uint32", "uint64", "float16", "float32")
        indices_support_dtype_list = ("int32", "int64")
        check_dtype(self.params_dtype, dtype_list, param_name="x")
        check_dtype(self.indices_dtype, indices_support_dtype_list, param_name="indices")
        check_dtype(self.axis_dtype, (INT32,), param_name="axis")
        if self.y_dtype != self.params_dtype:
            error_manager_vector.raise_err_inputs_dtype_not_equal(kernel_name, "y", "x",
                                                                  self.y_dtype, self.params_dtype)

        profile = tik.Dprofile()
        self.ub_size = profile.get_unified_buffer_size()
        self.l1_size = profile.get_l1_buffer_size()
        self.core_num = profile.get_aicore_num()
        self.tik_instance = tik.Tik(profile, disable_debug=True)
        self.kernel_name = kernel_name

        self.axis_shape = (1,)
        self.x_shape = (PARAMS_SIZE,)
        self.indices_shape = (INDICES_NUM,)
        self.y_shape = (PARAMS_SIZE,)

        self.params_dsize = TYPE_LEN_DICT.get(self.params_dtype)
        self.indices_dsize = TYPE_LEN_DICT.get(self.indices_dtype)
        self.block_elem = BLOCK_SIZE // self.params_dsize

        self.x = None
        self.indices = None
        self.axis = None
        self.tiling_gm = None
        self.y = None

        self.params_pre = None
        self.params_axis = None
        self.params_row = None
        self.indices_num = None

        self.cache_params = None
        self.need_core_num = None
        self.tail_process_core = None
        self.indices_num_each_core = None
        self.indices_num_remaining = None
        self.indices_loop_num = None
        self.indices_row_num_once = None
        self.indices_row_num_last = None

        self.row_num_once_ub = None
        self.row_num_once_tail_ub = None
        self.inner_loop_num = None
        self.row_num_last_ub = None
        self.row_num_last_tail_ub = None
        self.inner_loop_num_last = None
Exemple #12
0
 def _check_equal_bias_dtype(p, name):
     if p["dtype"] != bias_dtype:
         error_manager_vector.raise_err_inputs_dtype_not_equal(
             "DynamicGRU", 'b', name, bias_dtype, p["dtype"])
def sparse_apply_proximal_adagrad_d(var_dict,
                                    accum_dict,
                                    lr_dict,
                                    l1_dict,
                                    l2_dict,
                                    grad_dict,
                                    indices_dict,
                                    var_out_dict,
                                    accum_out_dict,
                                    use_locking=False,
                                    kernel_name="SparseApplyProximalAdagradD"):
    """
    sparse_apply_proximal_adagrad_d op entry interface

    Parameters
    ----------
    var_dict: var params shape, dtype and range
    accum_dict: accum shape, dtype and range
    lr_dict: lr shape, dtype and range
    l1_dict: l1 shape, dtype and range
    l2_dict: l2 shape, dtype and range
    grad_dict: grad shape, dtype and range
    indices_dict: indices shape, dtype and range
    var_out_dict: var output shape, dtype and range
    accum_out_dict: accum output shape, dtype and range
    use_locking: default value is "False"
    kernel_name: kernel name of SparseApplyProximalAdagradD op

    Returns
    -------
    compile info
    """
    var_dtype_check_list = ("float32")
    indices_dtype_check_list = ("int32")

    var_dtype = var_dict.get("dtype").lower()
    check_dtype(var_dtype, var_dtype_check_list, param_name="var_dict")

    accum_dtype = accum_dict.get("dtype").lower()
    check_dtype(accum_dtype, var_dtype_check_list, param_name="accum_dict")

    lr_dtype = lr_dict.get("dtype").lower()
    check_dtype(lr_dtype, var_dtype_check_list, param_name="lr_dict")

    l1_dtype = l1_dict.get("dtype").lower()
    check_dtype(l1_dtype, var_dtype_check_list, param_name="l1_dict")

    l2_dtype = l2_dict.get("dtype").lower()
    check_dtype(l2_dtype, var_dtype_check_list, param_name="l2_dict")

    grad_dtype = grad_dict.get("dtype").lower()
    check_dtype(grad_dtype, var_dtype_check_list, param_name="grad_dict")

    indices_dtype = indices_dict.get("dtype").lower()
    check_dtype(indices_dtype,
                indices_dtype_check_list,
                param_name="indices_dict")

    var_out_dtype = var_out_dict.get("dtype").lower()
    check_dtype(var_out_dtype, var_dtype_check_list, param_name="var_out_dict")

    accum_out_dtype = accum_out_dict.get("dtype").lower()
    check_dtype(accum_out_dtype,
                var_dtype_check_list,
                param_name="accum_out_dict")

    if var_dtype != var_out_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "var", "var_out", var_dtype, var_out_dtype)
    if accum_dtype != accum_out_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "accum", "accum_out", accum_dtype, accum_out_dtype)

    obj = SparseApplyProximalAdagradD(var_dtype, indices_dtype, kernel_name)
    obj.sparse_apply_proximal_adagrad_d()
    # add compile info
    te.op.add_compile_info(
        "vars", {
            "ub_size": obj.ub_size,
            "core_num": obj.core_num,
            "ub_tensor_num": obj.ub_tensor_num
        })