Ejemplo n.º 1
0
def floor_div(input_x, input_y, output_z, kernel_name="floor_div"):
    """
      algorithm: floordiv
      calculating data's floordiv, res =floor(x / y)

      Parameters
      ----------
      input_x: dict
      input_y: dict
      output_z: dict
      kernel_name: str, default value is "floor_div"

      Returns
      -------
      None
    """
    # check dtype of input_x/input_y
    input_dtype_x = input_x.get("dtype").lower()
    input_dtype_y = input_y.get("dtype").lower()
    check_list = ('int8', 'uint8', 'int32', 'float16', 'float32')
    check_dtype(input_dtype_x, check_list, param_name="input_x")
    check_dtype(input_dtype_y, check_list, param_name="input_y")
    check_elewise_shape_range([input_x, input_y], support_broadcast=True)
    if input_dtype_x != input_dtype_y:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'floor_div'
        error_info['param_name1'] = 'input_dtype_x'
        error_info['param_name2'] = 'input_dtype_y'
        error_info['param1_dtype'] = str(input_dtype_x)
        error_info['param2_dtype'] = str(input_dtype_y)
        raise RuntimeError(error_info,
                           "In op[%s], the parameter[%s][%s] are not equal in "
                           "dtype with dtype[%s][%s]." % (
                               error_info['op_name'],
                               error_info['param_name1'],
                               error_info['param_name2'],
                               error_info['param1_dtype'],
                               error_info['param2_dtype']))

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input_x, input_y) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([input_x, input_y],
                                              support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            tensor_x = tvm.placeholder(x_shape, input_dtype_x, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, input_dtype_y, "tensor_y")
            res = floor_div_compute(tensor_x, tensor_y, output_z, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 2
0
def exp(input_x, output_y, base=-1.0, scale=1.0, shift=0.0, kernel_name="exp"):
    """
    algorithm: exp
        calculating data's exp
    if base == -1:
       y = exp(shift + scale * x)
    if base > 0:
       y = exp((shift+scale*x)*ln(base))

    Parameters
    ----------
    input_x : dict,shape and dtype of input, only support float16,float32
    output_y: dict,shape and dtype of output, should be same shape and type as input
    base: (optional, default -1 for a value of e the base gamma
    scale: (optional, default 1) the scale alpha
    shift: (optional, default 0) the shift beta
    kernel_name : str, kernel name, default value is "exp"

    Returns
    -------
    None
    """
    dtype = input_x.get("dtype")
    # input_x' dtype check, only supports fp16 and fp32
    check_list = ("float16", "float32")
    input_dtype = dtype.lower()
    check_dtype(input_dtype, check_list, param_name="input_x")
    if base <= 0 and (not isclose(base, -1.0)):
        expect_value = "strictly positive or -1"
        real_value = "base < 0 or base notequal with -1"
        error_manager_vector.raise_err_input_value_invalid(
            kernel_name, "base", expecte_value, real_value)
    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x,) in ins:
        with te.op.compute():
            shape_x = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, shape_x[0])
            data_input = tvm.placeholder(fuseshape, name="data_input",
                                         dtype=input_dtype)
            res = exp_compute(data_input, output_y, base, scale, shift,
                              kernel_name)
            tensors.append([data_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 3
0
def relu(x, y, kernel_name="relu"):
    """
    Algrithm: relu(x) = max(x, 0)

    Parameters
    ----------
    Algorithm: relu

    Parameters:

    x: dynamic input, include shape, dtype and range

    y: the dict of output

    kernel_name: kernel name, must be string, default value is "relu".

    Returns
    -------
    None
    """

    # check input tensor data_type
    dtype_x = x.get("dtype").lower()
    check_list = ("float16", "float32", "int8", "int32")
    check_dtype(dtype_x, check_list, param_name="x")

    ins = classify([x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (x, ) in ins:
        with te.op.compute():
            shape_x = variable_shape([x])

            fuse_shape = [1]
            fuse_shape[0] = reduceIns(lambda x, y: x * y, shape_x[0])

            input_data = tvm.placeholder(fuse_shape,
                                         name="input_data",
                                         dtype=dtype_x)
            res = relu_compute(input_data, y, kernel_name)

            tensors.append([input_data, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}

    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 4
0
def leaky_relu_grad(g, x, y, negative_slope=0, kernel_name="leaky_relu_grad"):
    """
    calculate the backpropagation of leaky_relu operation
    y = gradients(x>0) or negative_slope*gradients(x<=0).
    support dtype:float16,float32

    Parameters
    ----------
    g : dict
        the backpropagated gradients to the corresponding leaky_relu operation
    x : dict
        the x passed as output of leaky_relu operation
    y : dict
        the output of leaky_relu back propagation
    negative_slope : float or int
        allow non-zero slope for negative inputs to speed up optimization
    kernel_name : str
        kernel name, default value is "leaky_relu_grad"

    Returns
    -------
    None
    """
    g_dtype = g.get("dtype").lower()
    x_dtype = x.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(g_dtype, check_list, param_name="input_g")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_elewise_shape_range([g, x], support_broadcast=True)
    if g_dtype != x_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "g", "x", g_dtype, x_dtype)
    ins = classify([g, x], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (g, x) in ins:
        with te.op.compute():
            g_shape, x_shape = variable_shape([g, x], support_broadcast=True)
            g_shape, x_shape = refine_shapes_for_broadcast(g_shape, x_shape)
            tensor_g = tvm.placeholder(g_shape, g_dtype, "tensor_g")
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            res = leaky_relu_grad_compute(tensor_g, tensor_x, y,
                                          negative_slope, kernel_name)
            tensors.append((tensor_g, tensor_x, res))
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 5
0
def real_div(x1, x2, y, kernel_name="real_div"):
    """
    algorithm: real_div
    calculating data's real_div, c = a / b

    Parameters
    ----------
    x1 : dict
        shape and dtype of first input, only support float16, float32, int32
    x2 : dict
        shape and dtype of second input, only support float16, float32, int32
    y: dict
        shape and dtype of output, should be broadcast shape and type as input
    kernel_name : str
        cce kernel name, default value is real_div

    Returns
    -------
    None
    """

    x_dtype = x1.get("dtype").lower()
    y_dtype = x2.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(y_dtype, check_list, param_name="input_y")
    check_elewise_shape_range([x1, x2], support_broadcast=True)
    if x_dtype != y_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x1", "x2", x_dtype, y_dtype)
    ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y")
            res = real_div_compute(tensor_x, tensor_y, y, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 6
0
def sigmoid_grad(x, dx, out, kernel_name="sigmoid_grad"):
    """
    do sigmoid grad

    sigmoid_grad = (sigmoid - sigmoid*sigmoid)*grad

    Parameters:
    ----------
    x : dictionary shape of sigmoid input

    dx : dictionary shape of grad

    out: dictionary output

    kernel_name : cce kernel name, default value is "sigmoid_grad_cce"

    Returns
    -------
    None
    """
    x_dtype = x.get("dtype").lower()
    dx_dtype = dx.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(dx_dtype, check_list, param_name="input_dx")
    check_elewise_shape_range([x, dx], support_broadcast=False)
    if x_dtype != dx_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x", "dx", x_dtype, dx_dtype)
    ins = classify([x, dx], Mode.ELEWISE)
    schedules, tensors = [], []
    for (sig, dx) in ins:
        with te.op.compute():
            shape_sig, shape_dx = variable_shape([sig, dx],
                                                 support_broadcast=False)
            shape_sig, shape_dx = refine_shapes_for_broadcast(
                shape_sig, shape_dx)
            tensor_sig = tvm.placeholder(shape_sig, x_dtype, "tensor_x")
            tensor_dx = tvm.placeholder(shape_dx, dx_dtype, "tensor_dx")
            res = sigmoid_grad_compute(tensor_sig, tensor_dx, out, kernel_name)
            tensors.append([tensor_sig, tensor_dx, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 7
0
def sqrt(input_x, output_y, kernel_name="sqrt"):
    """
    algorithm: sqrt
    calculating data sqrt,y= x**0.5, mini not support vsqrt, use exp(0.5*log(x))

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is sqrt

    Returns
    -------
    None
    """

    # check dtype
    x_dtype = input_x.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")

    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x, ) in ins:
        with te.op.compute():
            # shape
            x_shape = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            # div_compute
            input_data = tvm.placeholder(fuseshape,
                                         name="input_data",
                                         dtype=x_dtype)
            res = sqrt_compute(input_data, output_y, kernel_name)

            tensors.append([input_data, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 8
0
def square(input_x, output, kernel_name="square"):
    """
    algorithm: square
    calculating data's square,y= x*x

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32, int32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "square"

    Returns
    -------
    None
    """

    # check dtype
    x_dtype = input_x.get("dtype").lower()
    check_list = ("float16", "float32", "int32")
    check_dtype(x_dtype, check_list, param_name="input_x")

    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x, ) in ins:
        with te.op.compute():
            # shape
            x_shape = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            # square_compute
            data_x = tvm.placeholder(fuseshape, x_dtype, name="data_x")
            res = square_compute(data_x, output, kernel_name)

            tensors.append((data_x, res))
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 9
0
def sqrt_grad(x, dx, out, kernel_name="sqrt_grad"):
    """
    algorithm: sqrt_grad_cce

    Parameters
    ----------
    x : dict of data: dict

    dx : dict of data_grad: dict

    out : dict of output: dict

    kernel_name : cce kernel name, default value is "sqrt_grad": str

    Returns
    -------
    None

    """
    x_dtype = x.get("dtype").lower()
    dx_dtype = dx.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="x")
    check_dtype(dx_dtype, check_list, param_name="dx")
    check_elewise_shape_range([x, dx], support_broadcast=False)
    if x_dtype != dx_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x", "dx", x_dtype, dx_dtype)
    ins = classify([x, dx], Mode.ELEWISE)
    schedules, tensors = [], []
    for (x, dx) in ins:
        with te.op.compute():
            x_shape, dx_shape = variable_shape([x, dx],
                                               support_broadcast=False)
            x_shape, dx_shape = refine_shapes_for_broadcast(x_shape, dx_shape)
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_dx = tvm.placeholder(dx_shape, dx_dtype, "tensor_dx")
            res = sqrt_grad_compute(tensor_x, tensor_dx, out, kernel_name)
            tensors.append([tensor_x, tensor_dx, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 10
0
def log1p(input_x, output_y, kernel_name="log1p"):
    """
    algorithm: log1p
    calculating data's log1p, y = log(x + 1)

    Parameters
    ----------
    input_x: dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name: str
        kernel name, default value is "log1p"

    Returns
    -------
    None
    """
    dtype = input_x.get("dtype")
    check_list = ("float16", "float32")
    input_dtype = dtype.lower()
    check_dtype(input_dtype, check_list, param_name="input_x")
    schedules, tensors = [], []
    ins = classify([input_x], Mode.ELEWISE)
    for (input_x, ) in ins:
        with te.op.compute():
            x_shape = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            data_input = tvm.placeholder(fuseshape,
                                         dtype=input_dtype,
                                         name="data_input")
            res = log1p_compute(data_input, output_y, kernel_name)
            tensors.append([data_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {
        "name": kernel_name,
        "tensor_list": tensors,
        "bool_storage_as_1bit": False
    }
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 11
0
def sub(input_x, input_y, output_z, kernel_name="sub"):
    """
    do element-wise sub operation between two input tensors

    Parameters:
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32,int32
    input_y : dict
        shape and dtype of input, only support float16, float32,int32
    output_z: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : kernel name, default value is "sub"

    Returns
    -------
    None
    """

    check_list = ["float16", "float32", "int32"]
    x_dtype = input_x.get("dtype").lower()
    y_dtype = input_x.get("dtype").lower()
    if not x_dtype in check_list or not y_dtype in check_list:
        error_detal = "sub only support float16, float32, int32"
        error_manager_vector.raise_err_two_input_dtype_invalid(
            kernel_name, "input_x", "input_y", error_detal)

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            data1 = tvm.placeholder(x_shape, x_dtype, "data1")
            data2 = tvm.placeholder(y_shape, y_dtype, "data2")
            res = sub_compute(data1, data2, output_z, kernel_name)
            tensors.append([data1, data2, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    # build
    config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 12
0
def neg(input_x, output_y, kernel_name="neg"):
    """
    Computes numerical negative value element-wise, y = -x.

    Parameters
    ----------
    input_x: dict
        shape and dtype of input, only support float16, float32, int32, int8
    output_y: dict
        shape and dtype of output, should be same type as input
    kernel_name: str
        kernel name, default value is "neg"

    Returns
    -------
    None
    """
    dtype_input = input_x.get("dtype").lower()
    check_list = ("float16", "float32", "int32", "int8")
    check_dtype(dtype_input, check_list, param_name="input_x")

    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x, ) in ins:
        with te.op.compute():
            x_shape = variable_shape([input_x])

            fuse_shape = [1]
            fuse_shape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            data_input = tvm.placeholder(fuse_shape,
                                         name="data_input",
                                         dtype=dtype_input)
            res = neg_compute(data_input, output_y, kernel_name)

            tensors.append([data_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 13
0
def zeros_like(x, y, kernel_name="zeros_like"):
    """
    output a tensor of all zero, you can specify the output type

    Parameters
    ----------
    x: dict
        shape and dtype of input, only support float16, float32,
        int32,int8,uint8,bool
    y: dict
        shape and dtype of output data
    kernel_name: str
        cce kernel name, default value is "zeros_like"

    Returns
    ------
    None
    """
    dtype_x = x.get("dtype")
    check_list_src = ("float16", "float32", "int32", "int8", "uint8", "bool")
    src_dtype = dtype_x.lower()
    check_dtype(src_dtype, check_list_src, param_name="x")
    schedules, tensors = [], []
    ins = classify([x], Mode.ELEWISE)
    for (input_x, ) in ins:
        with te.op.compute():
            shape_x = variable_shape([input_x])
            shape_x = (functools_reduce(lambda x, y: x * y, shape_x[0]), )
            x_input = tvm.placeholder(shape_x, name="x_input", dtype=src_dtype)
            res = zeros_like_compute(x_input, y, kernel_name=kernel_name)
            tensors.append([x_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 14
0
def mul(input1, input2, output, kernel_name="mul"):
    """
    algorithm: mul
    calculating data's mul, c = a * b

    Parameters
    ----------
    input1 : dict
        include ori_shape, shape, ori_format, format, dtype and range
        dtype only support float16, float32, int32
    input2 : dict
        include ori_shape, shape, ori_format, format, dtype and range
        dtype only support float16, float32, int32
    output: dict
        include ori_shape, shape, ori_format, format, dtype and range
        shape must be broadcast shape of input
    kernel_name : str
        cce kernel name, default value is mul

    Returns
    -------
    None
    """

    # check dtype
    dtype_x1 = input1.get("dtype").lower()
    dtype_x2 = input2.get("dtype").lower()
    check_list = ("float16", "float32", "int32")
    check_dtype(dtype_x1, check_list, param_name="input1")
    check_dtype(dtype_x2, check_list, param_name="input2")
    check_elewise_shape_range([input1, input1], support_broadcast=True)
    if dtype_x1 != dtype_x2:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'mul'
        error_info['param_name1'] = 'dtype_x1'
        error_info['param_name2'] = 'dtype_x2'
        error_info['param1_dtype'] = str(dtype_x1)
        error_info['param2_dtype'] = str(dtype_x2)
        raise RuntimeError(error_info,
                           "In op[%s], the parameter[%s][%s] are not equal in "
                           "dtype with dtype[%s][%s]." % (
                               error_info['op_name'],
                               error_info['param_name1'],
                               error_info['param_name2'],
                               error_info['param1_dtype'],
                               error_info['param2_dtype']))

    ins = classify([input1, input2], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input1, input2) in ins:
        with te.op.compute():
            # shape
            shape_x1, shape_x2 = variable_shape([input1, input2],
                                                support_broadcast=True)
            shape_x1, shape_x2 = refine_shapes_for_broadcast(shape_x1,
                                                             shape_x2)
            # mul_compute
            data_x1 = tvm.placeholder(shape_x1, dtype=dtype_x1, name="data_x1")
            data_x2 = tvm.placeholder(shape_x2, dtype=dtype_x2, name="data_x2")
            res = mul_compute(data_x1, data_x2, output, kernel_name)

            tensors.append((data_x1, data_x2, res))
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 15
0
def floor_mod(x1, x2, y, kernel_name="floor_mod"):
    """
    calculate the remainder of division, support fp16,fp32,int32
    res = x1 -floor(input_data_x / input_data_y)* input_data_y

    Parameters
    ----------
    x1: dict
        dict{"shape":tuple or list,"dtype":str, "range": tuple or list}
        shape of data
        the data type, src_dtype equals dst_dtype, support fp16,fp32,int32
    x2: dict
        dict{"shape":tuple or list,"dtype":str, "range": tuple or list}
        shape of data
        the data type, src_dtype equals  of dst_dtype, support fp16,fp32,int32
    y: dict, reserved field
        dict with keys(shape, dtype and range) of output
    kernel_name: str
        cce kernel name, default value is "floor_mod"

    Returns
    ------
    None
    """

    # check input tensor data_type
    dtype_x = x1.get("dtype").lower()
    dtype_y = x2.get("dtype").lower()
    check_list = ("float16", "float32", "int32")
    check_dtype(dtype_x, check_list, param_name="x1")
    check_dtype(dtype_y, check_list, param_name="x2")
    check_elewise_shape_range([x1, x2], support_broadcast=True)

    if dtype_x != dtype_y:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'floor_mod'
        error_info['param_name1'] = 'dtype_x'
        error_info['param_name2'] = 'dtype_y'
        error_info['param1_dtype'] = str(dtype_x)
        error_info['param2_dtype'] = str(dtype_y)
        raise RuntimeError(error_info,
                           "In op[%s], the parameter[%s][%s] are not equal in "
                           "dtype with dtype[%s][%s]." % (
                               error_info['op_name'],
                               error_info['param_name1'],
                               error_info['param_name2'],
                               error_info['param1_dtype'],
                               error_info['param2_dtype']))

    ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            shape_x, shape_y = variable_shape([x1, x2], support_broadcast=True)
            shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)
            input_data_x = tvm.placeholder(shape_x, name="input_data_x",
                                           dtype=dtype_x)
            input_data_y = tvm.placeholder(shape_y, name="input_data_y",
                                           dtype=dtype_y)
            res = floor_mod_compute(input_data_x, input_data_y, y, kernel_name)

            tensors.append([input_data_x, input_data_y, res])
        with tvm.target.cce():
            auto_sch = generic.auto_schedule(res)
        schedules.append(auto_sch)

    config = {"name": kernel_name,
              "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 16
0
def cast(input_x, output_y, dst_type, kernel_name="cast"):
    """
    cast a tensor/scaler with input shape form src data type to dst data
    type. restrictions of input algorithms are as follow
    only types' groups blow are support tensor process:
        float16->float32
        float16->int32
        float32->float16
        float32->int32
        int8->float32
        uint8->float32
        int8->float16
        uint8->float16
        int8->int32
        uint8->int32
        int32->uint8 // number out of [0,255] can get unexpected result
        int32->int8 // number out of [-128,127] can get unexpected result
        int32->float32 // For tans with fp16, only guarantees
                        number in [-1023,1023] get correct result
        int32->float16 // only guarantees
                        number in [-1023,1023] get correct result
    scale convert support:(means only support shape [1,])
        int64->int32
        int64->float32

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape as input,
        and the dtype is the dst dtype need to cast
    kernel_name : str
        cce kernel name, default value is cast

    Returns
    -------
    None
    """

    src_type = input_x.get("dtype").lower()

    if src_type == "bool":
        src_type = "int8"

    schedules, tensors = [], []
    ins = classify([input_x], Mode.ELEWISE)
    for (input_x,) in ins:
        with te.op.compute():
            x_shape = variable_shape([input_x])
            dst_type = _cast_dsttype_conversion(dst_type)
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            data = tvm.placeholder(fuseshape, name="data", dtype=src_type)
            if src_type == "int64":
                check_dtype(dst_type, ("float32", "int32"),
                            param_name="dst_type")
                res = tvm.extern(
                    [fuseshape], [data],
                    lambda ins, outs: _kernel_ir(outs, ins, dst_type, "int64"),
                    name="res",
                    dtype=dst_type)
                tensor_list = [data, res]
                schedule = tvm.create_schedule(res.op)
                with build_config:
                    tvm.build(schedule, tensor_list, "cce", name=kernel_name)
            else:
                res = cast_compute(data, output_y, dst_type, kernel_name)
                tensors.append([data, res])
        if src_type != "int64":
            with tvm.target.cce():
                sch = generic.auto_schedule(res)
            schedules.append(sch)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": tensors
    }
    te.lang.dynamic.build(sch, config)
Ejemplo n.º 17
0
def less_equal(input_x, input_y, output_z, kernel_name="less_equal"):
    """
    Returns the truth value of (x <= y) element-wise

    Parameters
    ----------
    input_x: dict
        dict{"shape":tuple or list, "dtype":str, range: tuple or list},
        shape, range, and dtype of first input,
        support float16,float32,int32,int8,uint8
    input_y: dict
        dict{"shape":tuple or list, "dtype":str, range: tuple or list},
        shape, range, and dtype of first input,
        support float16,float32,int32,int8,uint8
    output_z: dict
        dict of output, should be broadcast shape and type as input
    kernel_name: str
        cce kernel name, default value is "less_equal"

    Returns
    -------
    None
    """
    # check input tensor data_type
    x_dtype = input_x.get("dtype").lower()
    y_dtype = input_y.get("dtype").lower()
    check_list = ("float16", "float32", "int32", "uint8", "int8")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(y_dtype, check_list, param_name="input_y")
    check_elewise_shape_range([input_x, input_y], support_broadcast=True)
    if x_dtype != y_dtype:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'less_equal'
        error_info['param_name1'] = 'x_dtype'
        error_info['param_name2'] = 'y_dtype'
        error_info['param1_dtype'] = str(x_dtype)
        error_info['param2_dtype'] = str(y_dtype)
        raise RuntimeError(
            error_info, "In op[%s], the parameter[%s][%s] are not equal in "
            "dtype with dtype[%s][%s]." %
            (error_info['op_name'], error_info['param_name1'],
             error_info['param_name2'], error_info['param1_dtype'],
             error_info['param2_dtype']))

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input_x, input_y) in ins:
        with te.op.compute():
            # shape
            x_shape, y_shape = variable_shape([input_x, input_y],
                                              support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)

            # less_equal compute
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y")
            res = less_equal_compute(tensor_x, tensor_y, output_z, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 18
0
def add_n(inputs, output, tensor_num, kernel_name="add_n"):
    """
    algorithm: add_n
    calculating data's adds, z = a + b + c...

    Parameters
    ----------
    inputs : list or tuple of dict
        A list of Tensor objects,
        each with same shape, range and dtype of first input,
        only support float16, float32, int32.
    output : dict
        shape, range and dtype of output,
        should be broadcast shape and type as input.
    tensor_num:
        nums of input
    kernel_name : string
        cce kernel name, default value is add_n

    Returns
    -------
    None
    """
    # check inputs num
    input_num = len(inputs)
    if input_num < 2:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_012
        error_info['op_name'] = 'add_n'
        error_info['param_name'] = 'input_num'
        error_info['max_value'] = '8'
        error_info['min_value'] = '2'
        error_info['real_value'] = str(input_num)
        raise RuntimeError(
            error_info, "In op[%s], the num of dimensions of input[%s] "
            "should be in the range of [%s, %s], but actually "
            "is [%s]." % (error_info['op_name'], error_info['param_name'],
                          error_info['min_value'], error_info['max_value'],
                          error_info['real_value']))
    if input_num != tensor_num:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_017
        error_info['op_name'] = 'add_n'
        error_info['param_name1'] = 'input_num'
        error_info['param_name2'] = 'tensor_num'
        error_info['param1_shape'] = str(input_num)
        error_info['param2_shape'] = str(tensor_num)
        raise RuntimeError(
            error_info, "In op[%s], the parameter[%s][%s] is not match with"
            "the parameter[%s][%s],it should be the same." %
            (error_info['op_name'], error_info['param_name1'],
             error_info['param1_shape'], error_info['param_name2'],
             error_info['param2_shape']))

    dtype_0 = inputs[0].get("dtype").lower()
    for index in range(0, tensor_num):
        shape_input = inputs[index].get("shape")
        check_shape(shape_input, param_name="inputs")
        dtype_input = inputs[index].get("dtype").lower()
        check_list = ("float16", "float32", "int32")
        check_dtype(dtype_input, check_list, param_name="inputs")
        if dtype_input != dtype_0:
            error_info = {}
            error_info['errCode'] = OP_ERROR_CODE_018
            error_info['op_name'] = 'add_n'
            error_info['param_name1'] = 'dtype_input'
            error_info['param_name2'] = 'dtype_0'
            error_info['param1_dtype'] = str(dtype_input)
            error_info['param2_dtype'] = str(dtype_0)
            raise RuntimeError(
                error_info, "In op[%s], the parameter"
                "[%s][%s] are not equal in "
                "dtype with dtype[%s][%s]." %
                (error_info['op_name'], error_info['param_name1'],
                 error_info['param_name2'], error_info['param1_dtype'],
                 error_info['param2_dtype']))

    ins = classify(inputs, Mode.ELEWISE)
    schedules, tensors = [], []
    for inputs in ins:
        with te.op.compute():
            shape_normlize = variable_shape(inputs)
            fuse_shape = [1]
            datas = []
            for (i, input_dict), shape_i in zip(enumerate(inputs),
                                                shape_normlize):
                fuse_shape[0] = reduceIns(lambda x, y: x * y, shape_i)
                datas.append(
                    tvm.placeholder(fuse_shape,
                                    name="data_%d" % i,
                                    dtype=dtype_0))

            # add_n_compute
            res = add_n_compute(datas, output, kernel_name)

            tensors.append(datas)
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    datas.append(res)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 19
0
def add(input_x, input_y, output_z, kernel_name="add"):
    """
    algorithm: add
    calculating data's add, c = a + b

    Parameters
    ----------
    input_x : dict
       including shape, dtype and range, only support float16, float32, int32
    input_y : dict
       including shape, dtype and range, only support float16, float32, int32
    output_z: dict
       shape should be broadcast shape of input, and type equals to input
    kernel_name : str
       cce kernel name, default value is add

    Returns
    -------
    None
    """

    # check input tensor data_type
    x_dtype = input_x.get("dtype").lower()
    y_dtype = input_y.get("dtype").lower()
    check_list = ("float16", "float32", "int32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(y_dtype, check_list, param_name="input_y")
    check_elewise_shape_range([input_x, input_y], support_broadcast=True)
    if x_dtype != y_dtype:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'add'
        error_info['param_name1'] = 'x_dtype'
        error_info['param_name2'] = 'y_dtype'
        error_info['param1_dtype'] = str(x_dtype)
        error_info['param2_dtype'] = str(y_dtype)
        raise RuntimeError(error_info,
                           "In op[%s], the parameter[%s][%s] are not equal in"
                           "dtype with dtype[%s][%s]" % (error_info['op_name'],
                                                         error_info[
                                                             'param_name1'],
                                                         error_info[
                                                             'param_name2'],
                                                         error_info[
                                                             'param1_dtype'],
                                                         error_info[
                                                             'param2_dtype']))

    # format_pattern = 1  Nz and vector
    # format_pattern = 2  vector and Nz
    # format_pattern = 0  Nz scalar  Nz Nz  ND ND
    format_pattern = _add_check_format(input_x, input_y)

    # infer shape for supporting add
    shape_x, shape_y = _infer_shape(format_pattern, input_x, input_y)
    shape_x = scalar2tensor_one(shape_x)
    shape_y = scalar2tensor_one(shape_y)

    # normalize shape
    input_x["shape"] = shape_x
    input_y["shape"] = shape_y

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input_x, input_y) in ins:
        with te.op.compute():
            shape_x, shape_y = variable_shape([input_x, input_y],
                                              support_broadcast=True)
            shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)
            data_x = tvm.placeholder(shape_x, name="data_1", dtype=x_dtype)
            data_y = tvm.placeholder(shape_y, name="data_2", dtype=y_dtype)
            res = add_compute(data_x, data_y, output_z, kernel_name)

            tensors.append((data_x, data_y, res))
        with tvm.target.cce():
            schedule = generic.auto_schedule(res)
        schedules.append(schedule)

    config = {"print_ir": False, "name": kernel_name,
              "tensor_list": tensors}

    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 20
0
def reduce_sum(x, axes, y, keepdims=False, kernel_name="reduce_sum"):
    """reduce a tensor on a certain axes based on sum.

    Parameters:
    ----------
    x: dict
        the dict of input tensor.
    axes: dict
        the axes for reduce.
    y: dict
        the dict of output tensor.
    keepdims: bool or NONETYPE
        if true, retains reduced dimensions with length 1.
    kernel_name: str
        cce kernel name, default value is "reduce_sum".

    Returns
    -------
    None
    """

    dtype_x = x["dtype"]
    dtype_lower_x = dtype_x.lower()
    check_list_x = ("float16", "float32")
    check_dtype(dtype_lower_x, check_list_x, param_name="x")

    dtype_axes = axes["dtype"]
    dtype_lower_axes = dtype_axes.lower()
    check_list_axes = ("int32", "int64")
    check_dtype(dtype_lower_axes, check_list_axes, param_name="axes")
    input_shape = x.get("shape")

    if not _check_data_shape_const(input_shape):
        schedules = []
        ins = classify([x, axes], Mode.REDUCE)
        tensors = []
        shape_axes = variable_shape([axes])[0]
        data_input_axes = tvm.placeholder(shape_axes,
                                          name="data_input_axes",
                                          dtype=dtype_lower_axes)

        for (x, axes) in ins:
            with te.op.compute():
                shape_x = variable_shape([x])[0]
                data_input_x = tvm.placeholder(shape_x,
                                               name="data_input_x",
                                               dtype=dtype_lower_x)
                shape_len = len(shape_x)
                axes_d = cce_util.axis_check(shape_len, axes)
                res = reduce_sum_compute(data_input_x, axes_d, y, keepdims)

                tensors.append([data_input_x, data_input_axes, res])

            with tvm.target.cce():
                schedule = generic.auto_schedule(res)
            schedules.append(schedule)

        # build
        config = {"name": kernel_name, "tensor_list": tensors}
        te.lang.dynamic.build(schedules, config)
        add_compile_info("reduce_axis_unknown", 1)

    else:
        _reduce_sum_const(x, axes, keepdims, kernel_name)
Ejemplo n.º 21
0
def maximum(x1, x2, y, kernel_name="maximum"):
    """
    do element-wise maximum operation between two input tensors

    Parameters:
    ----------
    x1 : dict
        first input dict, only support float16, float32, int32
    x2 : dict
        second input dict, only support float16, float32, int32
    y: dict
        output dict, should be the broadcast shape and type as input
    kernel_name : str
        cce kernel name, default value is maximum

    Returns
    -------
    None
    """

    # check input tensor data dtype
    check_list = ["float16", "float32", "int32"]
    dtype_x1 = x1.get("dtype").lower()
    dtype_x2 = x2.get("dtype").lower()
    check_dtype(dtype_x1, check_list, param_name="x1")
    check_dtype(dtype_x2, check_list, param_name="x2")
    check_elewise_shape_range([x1, x2], support_broadcast=True)

    if dtype_x1 != dtype_x2:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'maximum'
        error_info['param_name1'] = 'dtype_x1'
        error_info['param_name2'] = 'dtype_x2'
        error_info['param1_dtype'] = str(dtype_x1)
        error_info['param2_dtype'] = str(dtype_x2)
        raise RuntimeError(
            error_info, "In op[%s], the parameter[%s][%s] are not equal in "
            "dtype with dtype[%s][%s]." %
            (error_info['op_name'], error_info['param_name1'],
             error_info['param_name2'], error_info['param1_dtype'],
             error_info['param2_dtype']))

    ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            shape_x1, shape_x2 = variable_shape([x1, x2],
                                                support_broadcast=True)
            shape_x1, shape_x2 = refine_shapes_for_broadcast(
                shape_x1, shape_x2)
            data1 = tvm.placeholder(shape_x1, dtype=dtype_x1, name="data1")
            data2 = tvm.placeholder(shape_x2, dtype=dtype_x2, name="data2")
            res = maximum_compute(data1, data2, y, kernel_name)

            tensors.append([data1, data2, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Ejemplo n.º 22
0
def div(input_x, input_y, output_z, kernel_name="div"):
    """
    algorithm: div
    calculating data's div, res =x / yq


    Parameters
    ----------
    input_x: dict
        dict with keys(shape and dtype) of input_x
    input_y: dict
        dict with keys(shape and dtype) of input_y
    output_div: dict
        dict with keys(shape and dtype) of output
    kernel_name: str
        kernel name, default value is "div"

    Returns
    -------
    None
    """

    # check dtype
    x_dtype = input_x.get("dtype").lower()
    y_dtype = input_y.get("dtype").lower()
    check_list = ("float16", "float32", "int8", "uint8", "int32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(y_dtype, check_list, param_name="input_y")
    check_elewise_shape_range([input_x, input_y], support_broadcast=True)

    if x_dtype != y_dtype:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'div'
        error_info['param_name1'] = 'x_dtype'
        error_info['param_name2'] = 'y_dtype'
        error_info['param1_dtype'] = str(x_dtype)
        error_info['param2_dtype'] = str(y_dtype)
        raise RuntimeError(
            error_info, "In op[%s], the parameter[%s][%s] are not equal in "
            "dtype with dtype[%s][%s]." %
            (error_info['op_name'], error_info['param_name1'],
             error_info['param_name2'], error_info['param1_dtype'],
             error_info['param2_dtype']))

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input_x, input_y) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([input_x, input_y],
                                              support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y")
            res = div_compute(tensor_x, tensor_y, output_z, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)