Ejemplo n.º 1
0
def squared_difference(x1, x2, y, kernel_name="squared_difference"):
    """
    algorithm: squared_difference

    calculating data's tf_squared_difference,y= (x - y) * (x - y)

    Parameters
    ----------
    x2 : dict
        shape and dtype of y input, only support float16, float32
    input_dy : dict
        shape and dtype of dy input, only support float16, float32
    output_x: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is squared_difference

    Returns
    -------
    None
    """
    shape_x = x1.get("shape")
    shape_y = x2.get("shape")
    check_shape(shape_x, param_name="x1")
    check_shape(shape_y, param_name="x2")

    check_list = ["float16", "float32", "int32"]
    dtype = x1.get("dtype").lower()

    if not dtype in check_list:
        raise RuntimeError(
            "tf_squared_difference_cce only support float16, float32, int32")

    shape_x, shape_y, shape_max = broadcast_shapes(shape_x,
                                                   shape_y,
                                                   param_name_input1="x1",
                                                   param_name_input2="x2")

    shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)
    data_x = tvm.placeholder(shape_x, dtype=dtype, name="data_x")
    data_y = tvm.placeholder(shape_y, dtype=dtype, name="data_y")

    with tvm.target.cce():
        shape_x, shape_y, shape_max = broadcast_shapes(shape_x,
                                                       shape_y,
                                                       param_name_input1="x1",
                                                       param_name_input2="x2")
        data_x_tmp = te.lang.cce.broadcast(data_x, shape_max)
        data_y_tmp = te.lang.cce.broadcast(data_y, shape_max)
        data_sub = te.lang.cce.vsub(data_x_tmp, data_y_tmp)
        res = te.lang.cce.vmul(data_sub, data_sub)
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data_x, data_y, res]
    }

    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 2
0
def floor_mod(x1, x2, y, kernel_name="floor_mod"):
    """
    calculate the remainder of division, support fp16,fp32,int32
    res = x1 -floor(input_data_x / input_data_y)* input_data_y

    Parameters
    ----------
    x1: dict
        dict{"shape":tuple or list,"dtype":str}
        shape of data
        the data type, src_dtype equals dst_dtype, support fp16,fp32,int32
    x2: dict
        dict{"shape":tuple or list,"dtype":str}
        shape of data
        the data type, src_dtype equals dst_dtype, support fp16,fp32,int32
    y: dict, reserved field
        dict with keys(shape and dtype) of output
    kernel_name: str
        cce kernel name, default value is "floor_mod"

    Returns
    ------
    None
    """
    # get dtype and shape attributes
    dtype_x = x1.get("dtype").lower()
    shape_x = x1.get("shape")
    dtype_y = x2.get("dtype").lower()
    shape_y = x2.get("shape")

    # check_kernel_name & shape
    check_shape(shape_x, param_name="x1")
    check_shape(shape_y, param_name="x2")

    # check input tensor data_type
    check_list = ("float16", "float32", "int32")
    check_dtype(dtype_x, check_list, param_name="x1")
    check_dtype(dtype_y, check_list, param_name="x2")

    if dtype_x != dtype_y:
        raise RuntimeError("the type of dtype in two dict is not the same")

    shape_x, shape_y, shape_max = broadcast_shapes(shape_x,
                                                   shape_y,
                                                   param_name_input1="x1",
                                                   param_name_input2="x2")
    shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)

    input_data_x = tvm.placeholder(shape_x, name="input_data_x", dtype=dtype_x)
    input_data_y = tvm.placeholder(shape_y, name="input_data_y", dtype=dtype_y)
    res = floor_mod_compute(input_data_x, input_data_y, y, kernel_name)
    with tvm.target.cce():
        auto_sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [input_data_x, input_data_y, res]
    }
    te.lang.cce.cce_build_code(auto_sch, config)
Ejemplo n.º 3
0
def softplus_grad(input_gradients,
                  input_features,
                  output_backprops,
                  kernel_name="softplus_grad"):
    """
    Computes softplus gradients for a softplus operation.
    The gradients: "dy * exp(x) / (1 + exp(x))".

    Parameters
    ----------
    input_gradients: dict
        The backpropagated gradients to the corresponding softplus operation.
    input_features: dict
        The input_features passed as input to the corresponding softplus operation.
        source data type support "float16", "float32", "int32", "int8", "uint8".
    output_backprops: dict
        data of output.
    kernel_name: str
        kernel name, default value is "softplus_grad".

    Returns
    -------
    None
    """
    shape_dy = input_gradients.get("shape")
    dtype_dy = input_gradients.get("dtype")
    shape_x = input_features.get("shape")
    dtype_x = input_features.get("dtype")

    if dtype_dy.lower() != dtype_x.lower():
        raise RuntimeError("type of dy and type of x must be same, \
             while the types are different")
    dtype = dtype_dy

    check_shape(shape_dy, param_name="input_gradients")
    check_shape(shape_x, param_name="input_features")

    check_list = ("float16", "float32", "int32", "int8", "uint8")
    input_dtype = dtype.lower()
    check_dtype(input_dtype, check_list, param_name="input_gradients")
    shape_dy, shape_x, shape_max = broadcast_shapes(
        shape_dy,
        shape_x,
        param_name_input1="input_gradients",
        param_name_input2="input_features")
    reshape_dy, reshape_x = refine_shapes_for_broadcast(shape_dy, shape_x)

    data_dy = tvm.placeholder(reshape_dy, name="data_dy", dtype=input_dtype)
    data_x = tvm.placeholder(reshape_x, name="data_x", dtype=input_dtype)

    res = softplus_grad_compute(data_dy,
                                data_x,
                                output_backprops,
                                kernel_name=kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_dy, data_x, res]}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 4
0
def mul(x, y, output, kernel_name="mul"):
    """
    do element-wise mul operation between two input tensors

    Parameters:
    ----------
    x : dict.
        shape, dtype of input x
    y : dict.
        shape, dtype of input y
    output : dict.
        shape, dtype of ouput
    kernel_name : str.
        cce kernel name, default value is "mul"

    Returns
    -------
    None
    """
    # format_pattern = 1  Nz and vector
    # format_pattern = 2  vector and Nz
    # format_pattern = 0  Nz scalar  Nz Nz  ND ND
    format_pattern = _mul_check_format(x, y)
    shape_x, shape_y = _infer_shape(format_pattern, x, y)

    shape_x = util.scalar2tensor_one(shape_x)
    dtype_x = x.get("dtype").lower()
    shape_y = util.scalar2tensor_one(shape_y)
    dtype_y = y.get("dtype").lower()

    op_utils.check_shape(shape_x, param_name="x")
    op_utils.check_shape(shape_y, param_name="y")

    if dtype_x != dtype_y:
        raise RuntimeError("dtype of inputs should be consistent")
    dtype = dtype_x
    check_list = ("int32", "float16", "float32", "int16")
    op_utils.check_dtype(dtype, check_list, param_name="x")

    vmul_support = tbe_platform.cce_conf.api_check_support(
        "te.lang.cce.vmul", "float32")
    if dtype_x == "float32" and not vmul_support:
        raise RuntimeError(
            "Input dtype is float32, but do not support on the platform")

    shape_x, shape_y, shape_max = op_utils.broadcast_shapes(
        shape_x, shape_y, param_name_input1="x", param_name_input2="y")

    shape_x, shape_y = op_utils.refine_shapes_for_broadcast(shape_x, shape_y)
    input_x = tvm.placeholder(shape_x, dtype=dtype, name="x")
    input_y = tvm.placeholder(shape_y, dtype=dtype, name="y")

    res = _mul_compute(input_x, input_y, output, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": (input_x, input_y, res)}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 5
0
def floor_div(input_x, input_y, output_z, kernel_name="floor_div"):
    """
      algorithm: floordiv
      calculating data's floordiv, res =floor(x / y)

      Parameters
      ----------
      input_x: dict
      input_y: dict
      output_z: dict
      kernel_name: str, default value is "floor_div"

      Returns
      -------
      None
    """
    # check dtype of input_x/input_y
    input_dtype_x = input_x.get("dtype").lower()
    input_dtype_y = input_y.get("dtype").lower()
    check_list = ('int8', 'uint8', 'int32', 'float16', 'float32')
    check_dtype(input_dtype_x, check_list, param_name="input_x")
    check_dtype(input_dtype_y, check_list, param_name="input_y")
    check_elewise_shape_range([input_x, input_y], support_broadcast=True)
    if input_dtype_x != input_dtype_y:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'floor_div'
        error_info['param_name1'] = 'input_dtype_x'
        error_info['param_name2'] = 'input_dtype_y'
        error_info['param1_dtype'] = str(input_dtype_x)
        error_info['param2_dtype'] = str(input_dtype_y)
        raise RuntimeError(error_info,
                           "In op[%s], the parameter[%s][%s] are not equal in "
                           "dtype with dtype[%s][%s]." % (
                               error_info['op_name'],
                               error_info['param_name1'],
                               error_info['param_name2'],
                               error_info['param1_dtype'],
                               error_info['param2_dtype']))

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input_x, input_y) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([input_x, input_y],
                                              support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            tensor_x = tvm.placeholder(x_shape, input_dtype_x, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, input_dtype_y, "tensor_y")
            res = floor_div_compute(tensor_x, tensor_y, output_z, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 6
0
def inv_grad(input_y, input_dy, output_z, kernel_name="inv_grad"):
    """
    algorithm: inv_grad
    calculating data's reciprocal grad,dx = -1*dy*y*y, where `y = 1/x`, and `dy`
    is the corresponding input gradient.

    Parameters
    ----------
    input_y: dict
        shape and dtype of input_y, only support float16, float32, int32, int8
    input_dy: dict
        shape and dtype of input_dy, should be same shape and type as input_y
    output_z: dict
        shape and dtype of output, should be same shape and type as input_y
    kernel_name: str
        kernel name, default value is "inv_grad"

    Returns
    -------
    None
    """
    shape_input_y = input_y.get("shape")
    shape_input_dy = input_dy.get("shape")
    dtype_input_y = input_y.get("dtype")
    dtype_input_dy = input_dy.get("dtype")

    check_shape(shape_input_y, param_name="input_y")
    check_shape(shape_input_dy, param_name="input_dy")

    shape_input_y = util.shape_refine(shape_input_y)
    shape_input_dy = util.shape_refine(shape_input_dy)

    if list(shape_input_y) != list(shape_input_dy):
        raise RuntimeError("the shape of input must be equal!")

    dtype_input_y = dtype_input_y.lower()
    dtype_input_dy = dtype_input_dy.lower()

    if dtype_input_dy != dtype_input_y:
        raise RuntimeError("the dtype of input must be equal!")

    check_list = ("float16", "float32", "int32", "int8")
    check_dtype(dtype_input_y, check_list, param_name="input_y")

    shape_input_dy, shape_input_y = refine_shapes_for_broadcast(shape_input_dy,
                                                                shape_input_y)
    data_dy = tvm.placeholder(shape_input_dy, name="data_dy",
                              dtype=dtype_input_dy)
    data_y = tvm.placeholder(shape_input_y, name="data_y", dtype=dtype_input_y)

    res = inv_grad_compute(data_y, data_dy, output_z, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "tensor_list": [data_y, data_dy, res]}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 7
0
def leaky_relu_grad(g, x, y, negative_slope=0, kernel_name="leaky_relu_grad"):
    """
    calculate the backpropagation of leaky_relu operation
    y = gradients(x>0) or negative_slope*gradients(x<=0).
    support dtype:float16,float32

    Parameters
    ----------
    g : dict
        the backpropagated gradients to the corresponding leaky_relu operation
    x : dict
        the x passed as output of leaky_relu operation
    y : dict
        the output of leaky_relu back propagation
    negative_slope : float or int
        allow non-zero slope for negative inputs to speed up optimization
    kernel_name : str
        kernel name, default value is "leaky_relu_grad"

    Returns
    -------
    None
    """

    shape_g = g.get("shape")
    shape_x = x.get("shape")
    dtype_g = g.get("dtype").lower()
    dtype_x = x.get("dtype").lower()

    util.check_kernel_name(kernel_name)
    util.check_shape_rule(shape_g)
    util.check_shape_rule(shape_x)
    util.check_tensor_shape_size(shape_g)
    util.check_tensor_shape_size(shape_x)

    shape_list = util.produce_shapes(shape_g, shape_x)
    util.check_tensor_shape_size(shape_list[2])

    # check input tensor data_type
    check_list = ["float16", "float32"]
    util.check_dtype_rule(dtype_g, check_list)
    util.check_dtype_rule(dtype_x, check_list)
    util.compare_tensor_dict_key(g, x, "dtype")

    shape_g, shape_x = refine_shapes_for_broadcast(shape_list[0],
                                                   shape_list[1])
    data_g = tvm.placeholder(shape_g, name="data_g", dtype=dtype_g)
    data_x = tvm.placeholder(shape_x, name="data_x", dtype=dtype_g)
    res = leaky_relu_grad_compute(data_g, data_x, y, negative_slope,
                                  kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_g, data_x, res]}

    te.lang.cce.cce_build_code(schedule, config)
Ejemplo n.º 8
0
def real_div(x1, x2, y, kernel_name="real_div"):
    """
    algorithm: real_div
    calculating data's real_div, c = a / b

    Parameters
    ----------
    x1 : dict
        shape and dtype of first input, only support float16, float32, int32
    x2 : dict
        shape and dtype of second input, only support float16, float32, int32
    y: dict
        shape and dtype of output, should be broadcast shape and type as input
    kernel_name : str
        cce kernel name, default value is real_div

    Returns
    -------
    None
    """
    shape_x = util.scalar2tensor_one(x1.get("shape"))
    shape_y = util.scalar2tensor_one(x2.get("shape"))
    check_shape(shape_x, param_name="x1")
    check_shape(shape_y, param_name="x2")

    check_tuple = ("float16", "float32")
    input_data_type = x1.get("dtype").lower()
    check_dtype(input_data_type, check_tuple, param_name="x1")
    input_data_type_x2 = x2.get("dtype").lower()
    check_dtype(input_data_type_x2, check_tuple, param_name="x2")

    shape_x, shape_y, shape_max = broadcast_shapes(shape_x,
                                                   shape_y,
                                                   param_name_input1="x1",
                                                   param_name_input2="x2")
    if shape_x[-1] == 1 and shape_y[-1] == 1 and shape_max[-1] == 1:
        shape_x = shape_x if len(shape_x) == 1 else shape_x[:-1]
        shape_y = shape_y if len(shape_y) == 1 else shape_y[:-1]
        shape_max = shape_max if len(shape_max) == 1 else shape_max[:-1]

    shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)
    data_x = tvm.placeholder(shape_x, name="data_x", dtype=input_data_type)
    data_y = tvm.placeholder(shape_y, name="data_y", dtype=input_data_type)

    res = real_div_compute(data_x, data_y, y, kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": (data_x, data_y, res)
    }

    te.lang.cce.cce_build_code(schedule, config)
Ejemplo n.º 9
0
def rsqrt_grad(input_y, input_dy, output_z, kernel_name="rsqrt_grad"):
    """
    calculate the backpropagation of rsqrt operation
    rsqrt: y = 1 / sqrt(x)
    rsqrt_grad: -1/2 * y**3 *dy

    Parameters
    ----------
    input_y: dict
        dict of input_y, include keys(shape and dtype)
    input_dy: dict
        dict of input_dy, include keys(shape and dtype)
    output_z: dict
        dict of  output
    kernel_name: str
        cce kernel name, default value is "rsqrt_grad"

    Returns
    -------
    None
    """
    shape_input_y = input_y.get("shape")
    dtype_input_y = input_y.get("dtype")
    shape_input_dy = input_dy.get("shape")
    dtype_input_dy = input_dy.get("dtype")

    check_shape(shape_input_y, param_name="input_y")
    check_shape(shape_input_dy, param_name="input_dy")
    util.compare_tensor_dict_key(input_y, input_dy, "shape")

    check_list = ("float16", "float32", "int32", "int8")
    dtype_input_y = dtype_input_y.lower()
    check_dtype(dtype_input_y, check_list, param_name="input_y")
    dtype_input_dy = dtype_input_dy.lower()
    check_dtype(dtype_input_dy, check_list, param_name="input_dy")
    util.compare_tensor_dict_key(input_y, input_dy, "dtype")
    reshape_y, reshape_dy = refine_shapes_for_broadcast(
        shape_input_y, shape_input_dy)

    data_input_y = tvm.placeholder(reshape_y,
                                   name="data_input_y",
                                   dtype=dtype_input_y)
    data_input_dy = tvm.placeholder(reshape_dy,
                                    name="data_input_dy",
                                    dtype=dtype_input_dy)

    res = rsqrt_grad_compute(data_input_y, data_input_dy, output_z,
                             kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [data_input_y, data_input_dy, res]
    }
    te.lang.cce.cce_build_code(sch, config)
def sigmoid_cross_entropy_with_logits(
        predict,
        target,
        loss,
        kernel_name="sigmoid_cross_entropy_with_logits"):
    """
    calculating data

    Parameters
    ----------
    predict : dict
        shape and dtype of predict
    target : dict
        shape and dtype of target
    loss : dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "sigmoid_cross_entropy_with_logits"

    Returns
    -------
    None
    """
    shape_predict = predict.get("shape")
    dtype_predict = predict.get("dtype")
    input_dtype_predict = dtype_predict.lower()
    check_shape(shape_predict, param_name="predict")

    shape_target = target.get("shape")
    dtype_target = target.get("dtype")
    input_dtype_target = dtype_target.lower()
    check_shape(shape_target, param_name="target")

    check_list = ("float16", "float32")
    check_dtype(input_dtype_predict, check_list, param_name="predict")
    check_dtype(input_dtype_target, check_list, param_name="target")
    shape_predict, shape_target = \
        refine_shapes_for_broadcast(shape_predict, shape_target)
    data_predict = tvm.placeholder(shape_predict,
                                   name="data_predict",
                                   dtype=input_dtype_predict)
    data_target = tvm.placeholder(shape_target,
                                  name="data_target",
                                  dtype=input_dtype_target)
    loss = sigmoid_cross_entropy_with_logits_compute(data_predict, data_target,
                                                     loss, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(loss)

    config = {
        "name": kernel_name,
        "tensor_list": [data_predict, data_target, loss]
    }

    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 11
0
def bitwise_xor(x1, x2, y, kernel_name="bitwise_xor"):
    """
    algorithm: bitwise_xor
    calculating: gradient of bitwise_xor

    Parameters
    ----------
    x1 : dict
              the shape and dtype of the tensor x1
    x2 : dict
              the shape and dtype of the tensor x2
    y :  dict
              the shape and dtype of the tensor y
    kernel_name : string
                  cce kernel name, default value is "bitwise_xor"
    Returns
    -------
    None
    """
    shape_x = x1.get("shape")
    shape_y = x2.get("shape")
    dtype_x = x1.get("dtype").lower()
    dtype_y = x2.get("dtype").lower()

    check_shape(shape_x, param_name="x1")
    check_shape(shape_y, param_name="x2")

    check_tuple = ("int16", "uint16", "int32")
    input_data_type = dtype_x.lower()
    check_dtype(input_data_type, check_tuple, param_name="x1")

    if dtype_x != dtype_y:
        raise RuntimeError("two input type must be the same")

    shape_x, shape_y, shape_max = broadcast_shapes(shape_x,
                                                   shape_y,
                                                   param_name_input1="x1",
                                                   param_name_input2="x2")
    shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)

    if input_data_type == "int32":
        input_data_type = "int16"
        shape_x.append(2)
        shape_y.append(2)

    data_x = tvm.placeholder(shape_x, dtype=input_data_type, name="data_x")
    data_y = tvm.placeholder(shape_y, dtype=input_data_type, name="data_y")

    result = bitwise_xor_compute(data_x, data_y, y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(result)

    config = {"name": kernel_name, "tensor_list": [data_x, data_y, result]}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 12
0
def equal(input_x, input_y, output_z, kernel_name="equal"):
    """
    Returns the truth value of (x = y) element-wise

    Parameters
    ----------
    input_x: dict
        dict of input_x, include keys(shape and dtype)
    input_y: dict
        dict of input_y, include keys(shape and dtype)
    output_z: dict
        dict of  output
    kernel_name: str
        cce kernel name, default value is "equal"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    dtype_x = input_x.get("dtype")
    shape_y = input_y.get("shape")
    dtype_y = input_y.get("dtype")
    shape_x, shape_y, shape_broadcast = broadcast_shapes(
        shape_x,
        shape_y,
        param_name_input1="input_x",
        param_name_input2="input_y")

    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")

    check_list = ("float16", "float32", "int32", "int8", "uint8")
    dtype_x = dtype_x.lower()
    check_dtype(dtype_x, check_list, param_name="input_x")
    dtype_y = dtype_y.lower()
    check_dtype(dtype_y, check_list, param_name="input_y")
    util.compare_tensor_dict_key(input_x, input_y, "dtype")

    shape_x = list(shape_x)
    shape_y = list(shape_y)
    shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)
    data_input_x = tvm.placeholder(shape_x, name="data_input_x", dtype=dtype_x)
    data_input_y = tvm.placeholder(shape_y, name="data_input_y", dtype=dtype_y)

    res = equal_compute(data_input_x, data_input_y, output_z, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [data_input_x, data_input_y, res]
    }
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 13
0
def xlogy(input_x, input_y, output_z, kernel_name="xlogy"):
    """
    algorithm: xlogy
    calculating data's xlogy, res = 0 if x == 0 else x*log(y)

    Parameters
    ----------
    input_x: dict
        dict of input_x, include keys(shape and dtype)
    input_y: dict
        dict of input_y, include keys(shape and dtype)
    output_z: dict
        dict info of output_z
    kernel_name: str
        kernel name, default value is "xlogy"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    shape_y = input_y.get("shape")
    dtype = input_x.get("dtype")
    dtype_y = input_y.get("dtype")

    util.compare_tensor_dict_key(input_x, input_y, "dtype")
    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")

    input_dtype = dtype.lower()
    input_dtype_y = dtype_y.lower()
    check_list = ("float16", "float32")
    check_dtype(input_dtype, check_list, param_name="input_x")
    check_dtype(input_dtype_y, check_list, param_name="input_y")
    shape_list = broadcast_shapes(shape_x,
                                  shape_y,
                                  param_name_input1="input_x",
                                  param_name_input2="input_y")

    shape_x, shape_y = refine_shapes_for_broadcast(shape_list[0],
                                                   shape_list[1])
    data1 = tvm.placeholder(shape_x, name="data1", dtype=input_dtype)
    data2 = tvm.placeholder(shape_y, name="data2", dtype=input_dtype)
    res = xlogy_compute(data1, data2, output_z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [data1, data2, res],
        "bool_storage_as_1bit": False
    }
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 14
0
def atan2(x1, x2, y, kernel_name="atan2"):
    """
    Algorithm: arctan2
        arctan2(y, x) = arctan(y/x)
    ----------------------------------
    Parameters:

        x1: the dict of input data x1, only support float16, float32.

        x2: the dict of input data x2, only support float16, float32.

        y: the dict of output

        kernel_name: default value is "atan2".
    ----------------------------------
    Returns:
        None
    """

    y_shape = x1.get("shape")
    x_shape = x2.get("shape")

    y_dtype = x1.get("dtype")
    x_dtype = x2.get("dtype")

    check_shape(y_shape, param_name="x1")
    check_shape(x_shape, param_name="x2")

    shape_y, shape_x, shape_max = broadcast_shapes(
        y_shape, x_shape, param_name_input1="x1", param_name_input2="x2")

    check_list = ("float16", "float32")
    check_dtype(y_dtype, check_list, param_name="x1")
    check_dtype(x_dtype, check_list, param_name="x2")
    if y_dtype.lower() != x_dtype.lower():
        raise RuntimeError("The input tensor must have identical dtype!")
    shape_y, shape_x = refine_shapes_for_broadcast(shape_y, shape_x)
    input_y = tvm.placeholder(shape_y, dtype=y_dtype.lower(), name="input_y")
    input_x = tvm.placeholder(shape_x, dtype=x_dtype.lower(), name="input_x")

    res = atan2_compute(input_y, input_x, y, kernel_name)
    res = te.lang.cce.cast_to(res, x_dtype.lower())
    with tvm.target.cce():
        auto_sch = topi.generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": (input_y, input_x, res),
        "print_ir": False,
        "bool_storage_as_1bit": False
    }

    te.lang.cce.cce_build_code(auto_sch, config)
Ejemplo n.º 15
0
def logical_and(x1, x2, y, kernel_name="logical_and"):
    """
    calculating data

    Parameters
    ----------
    x1 : dict
        shape and dtype of input, only support float16, float32
    x2 : dict
        shape and dtype of input, only support float16, float32
    y : dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "logical_and"

    Returns
    -------
    None
    """
    shape_x = x1.get("shape")
    shape_y = x2.get("shape")
    dtype_x = x1.get("dtype")
    dtype_y = x2.get("dtype")

    check_shape(shape_x, param_name="x1")
    check_shape(shape_y, param_name="x2")

    if dtype_x != dtype_y:
        raise RuntimeError("The type of input must be the same")

    input_data_type = dtype_x.lower()
    check_tuple = ("int8", )
    check_dtype(input_data_type, check_tuple, param_name="x1")

    shape_x, shape_y, _ = broadcast_shapes(shape_x,
                                           shape_y,
                                           param_name_input1="x1",
                                           param_name_input2="x2")
    shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)
    data_x = tvm.placeholder(shape_x, dtype=dtype_x, name="data_x")
    data_y = tvm.placeholder(shape_y, dtype=dtype_y, name="data_y")

    res = logical_and_compute(data_x, data_y, y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": (data_x, data_y, res)}

    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 16
0
def less(input_x, input_y, output_z, kernel_name="less"):
    """
    do element-wise less operation between two input tensors

    Parameters:
    ----------
    input_x : dict
        shape and dtype of first input, support float16,float32,int32,
        int8,uint8
    input_y : dict
        shape and dtype of second input, support float16,float32,int32,
        int8,uint8
    output_x: dict
        shape and dtype of output, should be broadcast shape and type as input
    kernel_name : str
        cce kernel name, default value is less

    Returns
    -------
    None
    """
    shape_x = util.scalar2tensor_one(input_x.get("shape"))
    shape_y = util.scalar2tensor_one(input_y.get("shape"))
    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")

    check_list = ("float16", "float32", "int32", "int8", "uint8")
    input_dtype = input_x.get("dtype").lower()
    check_dtype(input_dtype, check_list, param_name="input_x")

    shape_x, shape_y, shape_max = broadcast_shapes(shape_x,
                                                   shape_y,
                                                   param_name_input1="input_x",
                                                   param_name_input2="input_y")

    shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)
    data_x = tvm.placeholder(shape_x, dtype=input_dtype, name="data_x")
    data_y = tvm.placeholder(shape_y, dtype=input_dtype, name="data_y")

    res = less_compute(data_x, data_y, output_z, kernel_name="less")
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data_x, data_y, res]
    }
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 17
0
def pow(input_x, input_y, output_z, kernel_name="pow"):
    """
    algorithm: pow
    calculating data pow, res =x ** y

    Parameters
    ----------
    input_x: dict
        dict with keys(shape and dtype) of input_x
    input_y: dict
        dict with keys(shape and dtype) of input_y
    output_z: dict
        dict with keys(shape and dtype) of output
    kernel_name: str
        kernel name, default value is "pow"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    shape_y = input_y.get("shape")
    if len(shape_x) == 0:
        shape_x = (1,)
    if len(shape_y) == 0:
        shape_y = (1,)
    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")
    shape_list = broadcast_shapes(shape_x, shape_y, param_name_input1="input_x", param_name_input2="input_y")

    input_x_dtype = input_x.get("dtype").lower()
    input_y_dtype = input_y.get("dtype").lower()
    if input_x_dtype != input_y_dtype:
        raise RuntimeError("Dtype of input_x and input_y must be the same.")
    check_list = ("float16", "float32", "int8", "uint8", "int32")
    check_dtype(input_x_dtype, check_list, param_name="input_x")

    shape_x, shape_y = refine_shapes_for_broadcast(shape_list[0],
                                                   shape_list[1])
    data_x = tvm.placeholder(shape_x, dtype=input_x_dtype, name="data_x")
    data_y = tvm.placeholder(shape_y, dtype=input_y_dtype, name="data_y")
    res = pow_compute(data_x, data_y, output_z, kernel_name="pow")

    with tvm.target.cce():
        sch = generic.auto_schedule(res)
    config = {"name": kernel_name,
              "tensor_list": [data_x, data_y, res],
              "bool_storage_as_1bit": False}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 18
0
def floor_div(input_x, input_y, output_z, kernel_name="floor_div"):
    """
      algorithm: floordiv
      calculating data's floordiv, res =floor(x / y)

      Parameters
      ----------
      input_x: dict
          dict with keys(shape and dtype) of input_x
      input_y: dict
          dict with keys(shape and dtype) of input_y
      output_z: dict
          dict with keys(shape and dtype) of output
      kernel_name: str
          kernel name, default value is "floordiv"

      Returns
      -------
      None
    """
    # check dtype of input_x/input_y
    input_dtype_x = input_x.get("dtype").lower()
    input_dtype_y = input_y.get("dtype").lower()
    check_list = ('int8', 'uint8', 'int32', 'float16', 'float32')
    check_dtype(input_dtype_x, check_list, param_name="input_x")
    if input_dtype_x != input_dtype_y:
        raise RuntimeError("The dtype of input_x and input_y must be the same")

    # check shape of input_x/input_y
    shape_x = input_x.get("shape")
    shape_y = input_y.get("shape")
    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")
    shape_list = broadcast_shapes(shape_x,
                                  shape_y,
                                  param_name_input1="input_x",
                                  param_name_input2="input_y")

    # compute result for floordiv() with floordiv_compute()
    shape_x, shape_y = refine_shapes_for_broadcast(shape_list[0],
                                                   shape_list[1])
    data_x = tvm.placeholder(shape_x, dtype=input_dtype_x, name='data_x')
    data_y = tvm.placeholder(shape_y, dtype=input_dtype_y, name='data_y')
    res = floor_div_compute(data_x, data_y, output_z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)
    config = {"name": kernel_name, "tensor_list": [data_x, data_y, res]}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 19
0
def mod(input_x, input_y, output_z, kernel_name="mod"):
    """
    Returns element-wise remainder of division.

    Parameters
    ----------
    input_x: dict
        input tensor contains shape and dtype attributes.
        source data type support "float16", "float32", "int8", "uint8", "int32".
    input_y: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'input_x'.
    output_z: dict
        data of output.
        Must have the same type as 'input_x'.
    kernel_name: str
        kernel name, default value is "mod"

    Returns:
    None
    """
    shape_x = input_x.get("shape")
    shape_y = input_y.get("shape")

    util.compare_tensor_dict_key(input_x, input_y, "dtype")
    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")

    check_list = ("float16", "float32", "int8", "uint8", "int32")
    input_dtype = input_x.get("dtype").lower()
    check_dtype(input_dtype, check_list, param_name="input_x")
    shape_x, shape_y, shape_broadcast = broadcast_shapes(
        shape_x,
        shape_y,
        param_name_input1="input_x",
        param_name_input2="input_y")

    reshape_x, reshape_y = refine_shapes_for_broadcast(shape_x, shape_y)
    data_x = tvm.placeholder(reshape_x, dtype=input_dtype, name="data_x")
    data_y = tvm.placeholder(reshape_y, dtype=input_dtype, name="data_y")
    res = mod_compute(data_x, data_y, output_z, kernel_name="mod")

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_x, data_y, res]}

    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 20
0
def leaky_relu_grad(g, x, y, negative_slope=0, kernel_name="leaky_relu_grad"):
    """
    calculate the backpropagation of leaky_relu operation
    y = gradients(x>0) or negative_slope*gradients(x<=0).
    support dtype:float16,float32

    Parameters
    ----------
    g : dict
        the backpropagated gradients to the corresponding leaky_relu operation
    x : dict
        the x passed as output of leaky_relu operation
    y : dict
        the output of leaky_relu back propagation
    negative_slope : float or int
        allow non-zero slope for negative inputs to speed up optimization
    kernel_name : str
        kernel name, default value is "leaky_relu_grad"

    Returns
    -------
    None
    """
    g_dtype = g.get("dtype").lower()
    x_dtype = x.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(g_dtype, check_list, param_name="input_g")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_elewise_shape_range([g, x], support_broadcast=True)
    if g_dtype != x_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "g", "x", g_dtype, x_dtype)
    ins = classify([g, x], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (g, x) in ins:
        with te.op.compute():
            g_shape, x_shape = variable_shape([g, x], support_broadcast=True)
            g_shape, x_shape = refine_shapes_for_broadcast(g_shape, x_shape)
            tensor_g = tvm.placeholder(g_shape, g_dtype, "tensor_g")
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            res = leaky_relu_grad_compute(tensor_g, tensor_x, y,
                                          negative_slope, kernel_name)
            tensors.append((tensor_g, tensor_x, res))
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 21
0
def xdivy(input_x, input_y, output_z, kernel_name="xdivy"):
    """
    algorithm: xdivy
    calculating data's xdivy,return 0 if x==0 and x/y otherwise, elementwise

    Parameters
    ----------
    input_x: dict
        dict with keys(shape and dtype) of input_x
    input_y: dict
        dict with keys(shape and dtype) of input_y
    output_z: dict
        dict with keys(shape and dtype) of output
    kernel_name : str
        kernel name, default value is "xdivy"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    dtype = input_x.get("dtype")
    shape_y = input_y.get("shape")
    dtype_y = input_y.get("dtype")

    util.compare_tensor_dict_key(input_x, input_y, "dtype")
    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")
    shape_list = broadcast_shapes(shape_x, shape_y, param_name_input1="input_x",
                                  param_name_input2="input_y")
    input_dtype = dtype.lower()
    input_dtype_y = dtype_y.lower()
    check_list = ("float16", "float32")
    check_dtype(input_dtype, check_list, param_name="input_x")
    check_dtype(input_dtype_y, check_list, param_name="input_y")

    reshape_x, reshape_y = refine_shapes_for_broadcast(shape_list[0],
                                                       shape_list[1])
    data_x = tvm.placeholder(reshape_x, dtype=input_dtype, name="data_x")
    data_y = tvm.placeholder(reshape_y, dtype=input_dtype, name="data_y")

    res = xdivy_compute(data_x, data_y, output_z, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "tensor_list": [data_x, data_y, res]}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 22
0
def truncate_div(input_x, input_y, output_x, kernel_name="truncate_div"):
    """
    algorithm: truncate_div
    calculating data's truncate_div, res = floor(x / y) if x/y>0 else ceil(x/y)

    Parameters
    ----------
    input_x: dict with keys(shape and dtype)
        only support {float16, float32, int8, uint8(on mini)},
        {float16, float32(on cloud)}
    input_y: dict with keys(shape and dtype)
        dict info of input_y
    output_x: dict with keys(shape and dtype)
        dict info of output_x
    kernel_name: str
        kernel name, default value is "truncate_div"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    shape_y = input_y.get("shape")
    dtype = input_x.get("dtype")

    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")

    input_dtype = dtype.lower()
    check_list = ("float16", "float32", "int32", "int8", "uint8")
    check_dtype(input_dtype, check_list, param_name="input_x")

    shape_list = broadcast_shapes(shape_x,
                                  shape_y,
                                  param_name_input1="input_x",
                                  param_name_input2="input_y")
    reshape_x, reshape_y = refine_shapes_for_broadcast(shape_list[0],
                                                       shape_list[1])
    data1 = tvm.placeholder(reshape_x, dtype=input_dtype, name="data1")
    data2 = tvm.placeholder(reshape_y, dtype=input_dtype, name="data2")
    res = truncate_div_compute(data1, data2, output_x, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data1, data2, res]}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 23
0
def real_div(x1, x2, y, kernel_name="real_div"):
    """
    algorithm: real_div
    calculating data's real_div, c = a / b

    Parameters
    ----------
    x1 : dict
        shape and dtype of first input, only support float16, float32, int32
    x2 : dict
        shape and dtype of second input, only support float16, float32, int32
    y: dict
        shape and dtype of output, should be broadcast shape and type as input
    kernel_name : str
        cce kernel name, default value is real_div

    Returns
    -------
    None
    """

    x_dtype = x1.get("dtype").lower()
    y_dtype = x2.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(y_dtype, check_list, param_name="input_y")
    check_elewise_shape_range([x1, x2], support_broadcast=True)
    if x_dtype != y_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x1", "x2", x_dtype, y_dtype)
    ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y")
            res = real_div_compute(tensor_x, tensor_y, y, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 24
0
def truncate_mod(input_x, input_y, output_z, kernel_name="truncate_mod"):
    """
    algorithm: truncatemod
    calculating data's truncate, res = x - truncate(x/y)*y

    Parameters
    ----------
    input_x: dict
        dict with keys(shape and dtype) of input_x
    input_y: dict
        dict with keys(shape and dtype) of input_y
    output_div: dict
        dict with keys(shape and dtype) of output
    kernel_name: str
        kernel name, default value is "truncatemod"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    dtype_x = input_x.get("dtype").lower()
    shape_y = input_y.get("shape")
    dtype_y = input_y.get("dtype").lower()

    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")

    shape_list = broadcast_shapes(shape_x,
                                  shape_y,
                                  param_name_input1="input_x",
                                  param_name_input2="input_y")
    check_list = ("float16", "float32", "int8", "uint8", "int32")
    check_dtype(dtype_x, check_list, param_name="input_x")
    check_dtype(dtype_y, check_list, param_name="input_y")

    shape_x, shape_y = refine_shapes_for_broadcast(shape_list[0],
                                                   shape_list[1])
    data_x = tvm.placeholder(shape_x, dtype=dtype_x, name="data_x")
    data_y = tvm.placeholder(shape_y, dtype=dtype_y, name="data_y")
    res = truncate_mod_compute(data_x, data_y, output_z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_x, data_y, res]}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 25
0
def sigmoid_grad(x, dx, out, kernel_name="sigmoid_grad"):
    """
    do sigmoid grad

    sigmoid_grad = (sigmoid - sigmoid*sigmoid)*grad

    Parameters:
    ----------
    x : dictionary shape of sigmoid input

    dx : dictionary shape of grad

    out: dictionary output

    kernel_name : cce kernel name, default value is "sigmoid_grad_cce"

    Returns
    -------
    None
    """
    x_dtype = x.get("dtype").lower()
    dx_dtype = dx.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(dx_dtype, check_list, param_name="input_dx")
    check_elewise_shape_range([x, dx], support_broadcast=False)
    if x_dtype != dx_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x", "dx", x_dtype, dx_dtype)
    ins = classify([x, dx], Mode.ELEWISE)
    schedules, tensors = [], []
    for (sig, dx) in ins:
        with te.op.compute():
            shape_sig, shape_dx = variable_shape([sig, dx],
                                                 support_broadcast=False)
            shape_sig, shape_dx = refine_shapes_for_broadcast(
                shape_sig, shape_dx)
            tensor_sig = tvm.placeholder(shape_sig, x_dtype, "tensor_x")
            tensor_dx = tvm.placeholder(shape_dx, dx_dtype, "tensor_dx")
            res = sigmoid_grad_compute(tensor_sig, tensor_dx, out, kernel_name)
            tensors.append([tensor_sig, tensor_dx, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 26
0
def greater(x, y, z, kernel_name="greater"):
    """
    do element-wise greater operation between two input tensors

    Parameters:
    ----------
    x : dict
        shape and dtype of input data_x
    y : dict
        shape and dtype of input data_y
    z : dict
        shape and dtype of output data_z
    kernel_name : str
        cce kernel name, default value is "greater"

    Returns
    -------
    None
    """
    shape_input_x = util.scalar2tensor_one(x.get("shape"))
    dtype_input_x = x.get("dtype").lower()
    shape_input_y = util.scalar2tensor_one(y.get("shape"))
    dtype_input_y = y.get("dtype").lower()

    check_shape(shape_input_x, param_name="x")
    check_shape(shape_input_y, param_name="y")

    check_list = ("float16", "float32", "int32", "int8", "uint8")
    check_dtype(dtype_input_x, check_list, param_name="x")

    shape_list = broadcast_shapes(shape_input_x,
                                  shape_input_y,
                                  param_name_input1="x",
                                  param_name_input2="y")

    reshape_x, reshape_y = refine_shapes_for_broadcast(shape_list[0],
                                                       shape_list[1])
    data_x = tvm.placeholder(reshape_x, dtype=dtype_input_x, name="data_x")
    data_y = tvm.placeholder(reshape_y, dtype=dtype_input_y, name="data_y")

    res = greater_compute(data_x, data_y, z, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_x, data_y, res]}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 27
0
def sqrt_grad(x, dx, out, kernel_name="sqrt_grad"):
    """
    algorithm: sqrt_grad_cce

    Parameters
    ----------
    x : dict of data: dict

    dx : dict of data_grad: dict

    out : dict of output: dict

    kernel_name : cce kernel name, default value is "sqrt_grad": str

    Returns
    -------
    None

    """
    x_dtype = x.get("dtype").lower()
    dx_dtype = dx.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="x")
    check_dtype(dx_dtype, check_list, param_name="dx")
    check_elewise_shape_range([x, dx], support_broadcast=False)
    if x_dtype != dx_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x", "dx", x_dtype, dx_dtype)
    ins = classify([x, dx], Mode.ELEWISE)
    schedules, tensors = [], []
    for (x, dx) in ins:
        with te.op.compute():
            x_shape, dx_shape = variable_shape([x, dx],
                                               support_broadcast=False)
            x_shape, dx_shape = refine_shapes_for_broadcast(x_shape, dx_shape)
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_dx = tvm.placeholder(dx_shape, dx_dtype, "tensor_dx")
            res = sqrt_grad_compute(tensor_x, tensor_dx, out, kernel_name)
            tensors.append([tensor_x, tensor_dx, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 28
0
def div_no_nan(input_x, input_y, output_z, kernel_name="div_no_nan"):
    """
    algorithm: div_no_nan_cce
    Returns 0 if the denominator is zero, else, like Div.
    Supports broadcasting.

    Parameters
    ----------
    input_x: dict
        dict with keys(shape and dtype) of input_x
    input_y: dict
        dict with keys(shape and dtype) of input_y
    output_z: dict
        dict with keys(shape and dtype) of output
    kernel_name: str
        cce kernel name, default value is "div_no_nan"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    shape_y = input_y.get("shape")
    dtype = input_x.get("dtype")

    for shape in (shape_x, shape_y):
        check_shape(shape, param_name="input_x")
    shape_x, shape_y, shape_max = broadcast_shapes(shape_x, shape_y,
                                                   param_name_input1="input_x",
                                                   param_name_input2="input_y")
    input_dtype = dtype.lower()
    check_dtype(input_dtype, ("float16", "float32",
                                        "int32", "int8", "uint8"), param_name="input_x")
    reshape_x, reshape_y = refine_shapes_for_broadcast(shape_x, shape_y)
    data_x = tvm.placeholder(reshape_x, name="data_x", dtype=input_dtype)
    data_y = tvm.placeholder(reshape_y, name="data_y", dtype=input_dtype)

    res = div_no_nan_compute(data_x, data_y, output_z, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "tensor_list": [data_x, data_y, res]}
    te.lang.cce.cce_build_code(sch, config)
Ejemplo n.º 29
0
def sub(input_x, input_y, output_z, kernel_name="sub"):
    """
    do element-wise sub operation between two input tensors

    Parameters:
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32,int32
    input_y : dict
        shape and dtype of input, only support float16, float32,int32
    output_z: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : kernel name, default value is "sub"

    Returns
    -------
    None
    """

    check_list = ["float16", "float32", "int32"]
    x_dtype = input_x.get("dtype").lower()
    y_dtype = input_x.get("dtype").lower()
    if not x_dtype in check_list or not y_dtype in check_list:
        error_detal = "sub only support float16, float32, int32"
        error_manager_vector.raise_err_two_input_dtype_invalid(
            kernel_name, "input_x", "input_y", error_detal)

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            data1 = tvm.placeholder(x_shape, x_dtype, "data1")
            data2 = tvm.placeholder(y_shape, y_dtype, "data2")
            res = sub_compute(data1, data2, output_z, kernel_name)
            tensors.append([data1, data2, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    # build
    config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 30
0
def bitwise_and(x1, x2, y, kernel_name="bitwise_and"):
    """
    algorithm: bitwise_and
    computes the bitwise and of `x1` and `x2`

    Parameters
    ----------
    x1 : dict
              the shape and dtype of the tensor x1, only support int16,uint16
    x2 : dict
              the shape and dtype of the tensor x2, only support int16,uint16
    y : dict
              the shape and dtype of the tensor y, only support int16,uint16
    kernel_name : string
                  cce kernel name, default value is "bitwise_and"

    Returns
    -------
    None
    """
    shape_x, shape_y, dtype = _check_parameters(x1, x2, y, kernel_name)
    shape_x, shape_y, shape_max = broadcast_shapes(shape_x,
                                                   shape_y,
                                                   param_name_input1="x1",
                                                   param_name_input2="x2")
    shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)

    if dtype == "int32":
        dtype = "int16"
        shape_x.append(2)
        shape_y.append(2)

    data_x = tvm.placeholder(shape_x, name="data_x", dtype=dtype)
    data_y = tvm.placeholder(shape_y, name="data_y", dtype=dtype)

    res = bitwise_and_compute(data_x, data_y, y, kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": (data_x, data_y, res)}
    te.lang.cce.cce_build_code(schedule, config)