コード例 #1
0
def sin(x, y, kernel_name="sin"):
    """
    algorithm: sin
    calculating data's sin x = x - x^3/3! + x^5/5! + ... + (-1)^k*x^2(k+1)/(2(k+1))!

    Parameters
    ----------
    x : dict
        shape and dtype of input, only support float16, float32
    y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is "sin"

    Returns
    -------
    None
    """
    shape_input = x.get("shape")
    dtype_input = x.get("dtype").lower()

    check_shape(shape_input, param_name="x")
    check_list = (FLOAT_16, FLOAT_32)
    check_dtype(dtype_input, check_list, param_name="x")
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape_input)
    data_input = tvm.placeholder(fuseshape,
                                 name="data_input",
                                 dtype=dtype_input)
    res = sin_compute(data_input, y, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": (data_input, res)}
    te.lang.cce.cce_build_code(sch, config)
コード例 #2
0
def check_supported(input_x, output_y, dst_type, kernel_name="cast"):
    """
    verify the types of cast supported by tbe
    """
    src_type = input_x.get("dtype").lower()
    check_result = False
    if src_type == "bool":
        src_type = "int8"

    dst_type = _cast_dsttype_conversion(dst_type)

    check_list = []
    if src_type == "float16":
        check_list = ["float32", "int32", "uint8"]
    elif src_type == "float32":
        check_list = ["float16", "int32"]
    elif src_type == "int8":
        check_list = ["float32", "float16", "int32", "uint8"]
    elif src_type == "uint8":
        check_list = ["float32", "float16", "int32"]
    elif src_type == "int32":
        check_list = ["bool", "uint8", "int8", "float32", "float16"]

    src_shape = input_x.get("shape")
    shape_size = reduceIns(lambda x, y: x * y, src_shape)
    if shape_size == 1 and src_type == "int64":
        check_list = ["int32", "float32"]

    if dst_type in check_list:
        check_result = True

    return check_result
コード例 #3
0
def log(input_x, output_y, base=-1.0, scale=1.0, shift=0.0, kernel_name="log"):
    """
    calculating data

    Parameters
    ----------
    input_x : dict
        shape and dtype of input
    output_y : dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "log"

    Returns
    -------
    None
    """

    shape = input_x.get("shape")
    dtype = input_x.get("dtype")
    input_dtype = dtype.lower()

    # input_x' shape check
    op_utils.check_shape(shape, param_name="input_x")

    # input_x' dtype check, only supports fp16 and fp32
    check_list = ("float16", "float32")
    op_utils.check_dtype(input_dtype, check_list, param_name="input_x")

    if base <= 0 and (not isclose(base, -1.0)):
        error_info = {}
        error_info['errCode'] = 'E80000'
        error_info['param_name'] = 'base'
        error_info['op_name'] = 'log'
        error_info['expect_value'] = "strictly positive or -1"
        error_info['real_value'] = base
        raise RuntimeError("In op[%s], the parameter[%s] should be [%s], but actually is [%s]."
                           % (error_info['op_name'], error_info['param_name'], \
                              error_info['expect_value'], error_info['real_value']))

    fused_shape = [reduceIns(lambda x, y: x * y, shape[:])]
    data_input = tvm.placeholder(fused_shape,
                                 name="data_input",
                                 dtype=input_dtype)

    res = log_compute(data_input, output_y, base, scale, shift, kernel_name)

    # auto schedule
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    # operator build
    config = {
        "name": kernel_name,
        "need_build": True,
        "tensor_list": (data_input, res)
    }

    te.lang.cce.cce_build_code(sch, config)
コード例 #4
0
ファイル: gelu_grad.py プロジェクト: gekowa/ascend-opp
def gelu_grad(input_dy, input_x, input_y, output_z, kernel_name="gelu_grad"):
    """
    algorithm: gelu_grad
    calculating: dy*res'
    res' = res/x +
           x*0.5*(1 - tanh(math_four)*tanh(math_four))*
           np.sqrt(2 / np.pi)*(1 + 3*0.044715*x2)
    math_four = (np.sqrt(2 / np.pi)*(x + 0.044715*tf.pow(x, 3)))

    Parameters
    ----------
    input_dy : dict
        shape and dtype of dy input, only support float16, float32
    input_x : dict
        shape and dtype of x input, only support float16, float32
    input_y : dict
        shape and dtype of y input, only support float16, float32
    output_z: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is gelu_grad

    Returns:
    -------
    none.
    """
    shape_dy = input_dy.get("shape")
    shape_x = input_x.get("shape")
    shape_y = input_y.get("shape")

    check_shape(shape_dy, param_name="input_dy")
    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")
    input_dtype = input_dy.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(input_dtype, check_list, param_name="input_dy")
    shape_dy = list(shape_dy)
    shape_x = list(shape_x)
    shape_y = list(shape_y)
    if not (operator.eq(shape_dy, shape_x) and operator.eq(shape_dy, shape_y)):
        raise RuntimeError("all input shape must be equal")

    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape_dy)
    data_dy = tvm.placeholder(fuseshape, name="data_dy", dtype=input_dtype)
    data_x = tvm.placeholder(fuseshape, name="data_x", dtype=input_dtype)
    data_gelu = tvm.placeholder(fuseshape, name="data_gelu", dtype=input_dtype)
    res = gelu_grad_compute(data_dy, data_x, data_gelu, output_z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data_dy, data_x, data_gelu, res]
    }

    te.lang.cce.cce_build_code(sch, config)
コード例 #5
0
def elu(x, y, alpha=1.0, kernel_name="elu"):
    """
    do element-wise elu operation

    Parameters:
    ----------
    x: the dict of input, only support float16, float32

    alpha: float, coefficient when input tensor is less than zero.

    output_res : the dict of output

    kernel_name : cce kernel name, default value is "elu"

    Returns
    -------
    None
    """

    shape_input = x.get("shape")
    dtype_input = x.get("dtype")
    input_dtype = dtype_input.lower()

    check_shape(shape_input, param_name="x")

    check_list = ("float16", "float32")
    check_dtype(dtype_input, check_list, param_name="x")

    if not tbe_platform.cce_conf.api_check_support(
            "te.lang.cce.sum", "float32") and dtype_input == "float32":
        error_info = {}
        error_info['errCode'] = 'E80008'
        error_info['param_name'] = 'x'
        error_info['op_name'] = 'elu'
        error_info['expect_value'] = "float16"
        error_info['real_value'] = dtype_input
        raise RuntimeError(error_info, "In op[%s], the parameter[%s]'s dtype "
                                       "should be [%s], but actually is [%s]."
                           % (error_info['op_name'], error_info['param_name'], \
                              error_info['expect_value'], error_info['real_value']))

    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape_input)
    data_input = tvm.placeholder(fuseshape,
                                 name="data_input",
                                 dtype=input_dtype)

    res = elu_compute(data_input, y, alpha, kernel_name)

    with tvm.target.cce():
        auto_sch = topi.generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "print_ir": False,
        "tensor_list": [data_input, res],
        "bool_storage_as_1bit": False
    }
    te.lang.cce.cce_build_code(auto_sch, config)
コード例 #6
0
ファイル: exp.py プロジェクト: gekowa/ascend-opp
def exp(input_x, output_y, base=-1.0, scale=1.0, shift=0.0, kernel_name="exp"):
    """
    algorithm: exp
        calculating data's exp
    if base == -1:
       y = exp(shift + scale * x)
    if base > 0:
       y = exp((shift+scale*x)*ln(base))

    Parameters
    ----------
    input_x : dict,shape and dtype of input, only support float16,float32
    output_y: dict,shape and dtype of output, should be same shape and type as input
    base: (optional, default -1 for a value of e the base gamma
    scale: (optional, default 1) the scale alpha
    shift: (optional, default 0) the shift beta
    kernel_name : str, kernel name, default value is "exp"

    Returns
    -------
    None
    """
    shape = input_x.get("shape")
    dtype = input_x.get("dtype")

    check_shape(shape, param_name="input_x")

    # input_x' dtype check, only supports fp16 and fp32
    check_list = ("float16", "float32")
    input_dtype = dtype.lower()
    check_dtype(input_dtype, check_list, param_name="input_x")

    if base <= 0 and (not isclose(base, -1.0)):
        error_info = {}
        error_info['errCode'] = 'E80000'
        error_info['param_name'] = 'base'
        error_info['op_name'] = 'exp'
        error_info['expect_value'] = "strictly positive or -1"
        error_info['real_value'] = base
        raise RuntimeError(
            "In op[%s], the parameter[%s] should be [%s], but actually is [%s]."
            % (error_info['op_name'], error_info['param_name'],
               error_info['expect_value'], error_info['real_value']))
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data_input = tvm.placeholder(fuseshape,
                                 name="data_input",
                                 dtype=input_dtype)

    res = exp_compute(data_input, output_y, base, scale, shift, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_input, res]}
    te.lang.cce.cce_build_code(sch, config)
コード例 #7
0
def bnll(input_x, output_y, kernel_name="bnll"):
    """
    calculating data
    algrithm: y=x+log(1+exp(-x)) if x>0; y=log(1+exp(x)) otherwise

    Parameters
    ----------
    input_x : dict
        shape and dtype of input
    output_y : dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "bnll"

    Returns
    -------
    None
    """
    shape = input_x.get("shape")
    dtype = input_x.get("dtype")
    input_dtype = dtype.lower()
    check_shape(shape, param_name="x")

    check_list = ("float16", "float32")
    check_dtype(input_dtype, check_list, param_name="x")
    product = tbe_platform.cce_conf.get_soc_spec("SOC_VERSION")
    if product in ["Ascend310", "Hi3796CV300ES", "Hi3796CV300CS"] and \
        input_dtype == "float32":
        error_info = {}
        error_info['errCode'] = 'E80008'
        error_info['param_name'] = 'input_x'
        error_info['op_name'] = 'bnll'
        error_info['expect_value'] = "float16"
        error_info['real_value'] = input_dtype
        raise RuntimeError(error_info, "In op[%s], the parameter[%s]'s dtype "
                                       "should be [%s], but actually is [%s]."
                           % (error_info['op_name'], error_info['param_name'],\
                              error_info['expect_value'], error_info['real_value']))

    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x*y, shape)
    data_input = tvm.placeholder(fuseshape, name="data_input", dtype=input_dtype)

    res = _bnll_computer(data_input, product)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "print_ir": False,
              "bool_storage_as_1bit": False,
              "tensor_list": [data_input, res]}

    te.lang.cce.cce_build_code(schedule, config)
コード例 #8
0
def exp(input_x, output_y, base=-1.0, scale=1.0, shift=0.0, kernel_name="exp"):
    """
    algorithm: exp
        calculating data's exp
    if base == -1:
       y = exp(shift + scale * x)
    if base > 0:
       y = exp((shift+scale*x)*ln(base))

    Parameters
    ----------
    input_x : dict,shape and dtype of input, only support float16,float32
    output_y: dict,shape and dtype of output, should be same shape and type as input
    base: (optional, default -1 for a value of e the base gamma
    scale: (optional, default 1) the scale alpha
    shift: (optional, default 0) the shift beta
    kernel_name : str, kernel name, default value is "exp"

    Returns
    -------
    None
    """
    dtype = input_x.get("dtype")
    # input_x' dtype check, only supports fp16 and fp32
    check_list = ("float16", "float32")
    input_dtype = dtype.lower()
    check_dtype(input_dtype, check_list, param_name="input_x")
    if base <= 0 and (not isclose(base, -1.0)):
        expect_value = "strictly positive or -1"
        real_value = "base < 0 or base notequal with -1"
        error_manager_vector.raise_err_input_value_invalid(
            kernel_name, "base", expecte_value, real_value)
    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x,) in ins:
        with te.op.compute():
            shape_x = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, shape_x[0])
            data_input = tvm.placeholder(fuseshape, name="data_input",
                                         dtype=input_dtype)
            res = exp_compute(data_input, output_y, base, scale, shift,
                              kernel_name)
            tensors.append([data_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
コード例 #9
0
def relu(x, y, kernel_name="relu"):
    """
    Algrithm: relu(x) = max(x, 0)

    Parameters
    ----------
    Algorithm: relu

    Parameters:

    x: dynamic input, include shape, dtype and range

    y: the dict of output

    kernel_name: kernel name, must be string, default value is "relu".

    Returns
    -------
    None
    """

    # check input tensor data_type
    dtype_x = x.get("dtype").lower()
    check_list = ("float16", "float32", "int8", "int32")
    check_dtype(dtype_x, check_list, param_name="x")

    ins = classify([x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (x, ) in ins:
        with te.op.compute():
            shape_x = variable_shape([x])

            fuse_shape = [1]
            fuse_shape[0] = reduceIns(lambda x, y: x * y, shape_x[0])

            input_data = tvm.placeholder(fuse_shape,
                                         name="input_data",
                                         dtype=dtype_x)
            res = relu_compute(input_data, y, kernel_name)

            tensors.append([input_data, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}

    te.lang.dynamic.build(schedules, config)
コード例 #10
0
def sqrt(input_x, output_y, kernel_name="sqrt"):
    """
    algorithm: sqrt
    calculating data sqrt,y= x**0.5, mini not support vsqrt, use exp(0.5*log(x))

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is sqrt

    Returns
    -------
    None
    """

    # check dtype
    x_dtype = input_x.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")

    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x, ) in ins:
        with te.op.compute():
            # shape
            x_shape = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            # div_compute
            input_data = tvm.placeholder(fuseshape,
                                         name="input_data",
                                         dtype=x_dtype)
            res = sqrt_compute(input_data, output_y, kernel_name)

            tensors.append([input_data, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
コード例 #11
0
ファイル: leaky_relu.py プロジェクト: gekowa/ascend-opp
def leaky_relu(x, y, negative_slope=0, kernel_name="leaky_relu"):
    """leaky_relu op for input tensor

       f(x)= x(x>=0) or negative_slope*x(x<0) equal to
       f(x)=negative_slope*x

    Parameters
    ----------
    x : TVM tensor
        input tensor has shape and dtype attributes
    y : dict
        dict with keys(shape and dtype) of output

    negative_slope : float or int
        allow non-zero slope for negative inputs to speed up optimization

    kernel_name : str
        cce kernel name, default value is "leaky_relu"

    Returns
    ------
    None
    """

    # check input tensor shape
    shape = x.get("shape")
    dtype = x.get("dtype")
    check_shape(shape, param_name="x")

    # check input tensor data_type
    check_list = ["float16", "float32", "int32", "int8"]
    check_dtype(dtype.lower(), check_list, param_name="x")
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    inp_dtype = dtype.lower()
    input_data_x = tvm.placeholder(fuseshape,
                                   name="input_data_x",
                                   dtype=inp_dtype)

    with tvm.target.cce():

        res = leaky_relu_compute(input_data_x, y, negative_slope, kernel_name)
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [input_data_x, res]}
    te.lang.cce.cce_build_code(sch, config)
コード例 #12
0
def threshold(input_x, output_y, threshold=0.0, kernel_name="threshold"):
    """
    algorithm: threshold
    compare data with threshold: x > threshold ? 1; 0

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32, int32
    output_y: dict
        shape and dtype of output, should be broadcast shape and type as input
    threshold: scalar
        parameter of the operator
    kernel_name : str
        kernel name, default value is "threshold"

    Returns
    -------
    None
    """

    # check shape
    shape = input_x.get("shape")
    op_utils.check_shape(shape, param_name="input_x")

    # check data type
    input_data_type = input_x.get("dtype").lower()
    op_utils.check_dtype(input_data_type, ["float16", "float32"],
                         param_name="input_x")

    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data_x = tvm.placeholder(fuseshape, name="data_x", dtype=input_data_type)
    res = threshold_compute(data_x, threshold, output_y, kernel_name)

    with tvm.target.cce():
        schedule = cce.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "need_build": False,
        "tensor_list": (data_x, res)
    }
    te.lang.cce.cce_build_code(schedule, config)
コード例 #13
0
def tanh_grad(y, dy, z, kernel_name="tanh_grad"):
    """
    do element-wise tanh_grad operation between two input tensors

    Parameters
    ----------
    y : dict
        shape and dtype of y input, only support float16, float32
    dy : dict
        shape and dtype of dy input, only support float16, float32
    z: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is tanh_grad

    Returns
    -------
    None
    """
    shape_y = y.get("shape")
    shape_dy = dy.get("shape")
    check_shape(shape_y, param_name="y")
    check_shape(shape_dy, param_name="dy")

    check_list = ("float16", "float32")
    dtype = y.get("dtype").lower()
    check_dtype(dtype, check_list, param_name="y")
    if list(shape_y) != list(shape_dy):
        raise RuntimeError("tanh_grad only support input shape"
                           "while input_shape1 equals to input_shape2")
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape_y)
    data_y = tvm.placeholder(fuseshape, dtype=dtype, name="data1")
    data_dy = tvm.placeholder(fuseshape, dtype=dtype, name="data2")
    res = tanh_grad_compute(data_y, data_dy, z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)
    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data_y, data_dy, res]
    }
    te.lang.cce.cce_build_code(sch, config)
コード例 #14
0
def square(input_x, output, kernel_name="square"):
    """
    algorithm: square
    calculating data's square,y= x*x

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32, int32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "square"

    Returns
    -------
    None
    """

    # check dtype
    x_dtype = input_x.get("dtype").lower()
    check_list = ("float16", "float32", "int32")
    check_dtype(x_dtype, check_list, param_name="input_x")

    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x, ) in ins:
        with te.op.compute():
            # shape
            x_shape = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            # square_compute
            data_x = tvm.placeholder(fuseshape, x_dtype, name="data_x")
            res = square_compute(data_x, output, kernel_name)

            tensors.append((data_x, res))
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
コード例 #15
0
ファイル: mish.py プロジェクト: gekowa/ascend-opp
def mish(input_x, output_y, kernel_name="mish"):
    """
    algorithm: mish
    calculating data's mish,y= x*(1 - 2/(1+(1+exp(x))^2))

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is mish

    Returns
    -------
    None
    """

    input_shape = input_x.get("shape")
    input_format = input_x.get("format")
    input_dtype = input_x.get("dtype").lower()
    check_shape(input_shape, param_name="input_x")
    check_list = ("float16", "float32")
    check_dtype(input_dtype, check_list, param_name="input_x")
    check_format(input_format)

    # fuse single axis
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, input_shape)

    data_x = tvm.placeholder(fuseshape, dtype=input_dtype, name="data_x")
    res = mish_compute(data_x, output_y, kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data_x, res]
    }
    te.lang.cce.cce_build_code(schedule, config)
コード例 #16
0
def log1p(input_x, output_y, kernel_name="log1p"):
    """
    algorithm: log1p
    calculating data's log1p, y = log(x + 1)

    Parameters
    ----------
    input_x: dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name: str
        kernel name, default value is "log1p"

    Returns
    -------
    None
    """
    dtype = input_x.get("dtype")
    check_list = ("float16", "float32")
    input_dtype = dtype.lower()
    check_dtype(input_dtype, check_list, param_name="input_x")
    schedules, tensors = [], []
    ins = classify([input_x], Mode.ELEWISE)
    for (input_x, ) in ins:
        with te.op.compute():
            x_shape = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            data_input = tvm.placeholder(fuseshape,
                                         dtype=input_dtype,
                                         name="data_input")
            res = log1p_compute(data_input, output_y, kernel_name)
            tensors.append([data_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {
        "name": kernel_name,
        "tensor_list": tensors,
        "bool_storage_as_1bit": False
    }
    te.lang.dynamic.build(schedules, config)
コード例 #17
0
ファイル: abs.py プロジェクト: gekowa/ascend-opp
def abs(x, y, kernel_name="abs"):
    """
    algorithm: abs

    calculating data's abs,y= |x|

    Parameters
    ----------
    x : dict
        shape and dtype of input, only support float16, float32, int32
    y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is abs

    Returns
    -------
    None
    """
    shape = x.get("shape")
    check_shape(shape, param_name="x")

    check_list = ["float16", "float32", "int32"]
    inp_dtype = x.get("dtype").lower()
    check_dtype(inp_dtype, check_list, param_name="x")

    shape = util.shape_refine(shape)
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data = tvm.placeholder(fuseshape, name="data", dtype=inp_dtype)

    res = abs_compute(data, y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data, res]
    }

    te.lang.cce.cce_build_code(sch, config)
コード例 #18
0
def rint(input_x, output_y, kernel_name="rint"):
    """
    algorithm: rint
    calculating rint(x):
    returns the integer nearest to x by element-wise
    If the result is between two representable values,
     the even number should be used.
    For example:
    x :    [0.9, 2.5, 2.3, 1.5, -4.5]
    res : [ 1.0, 2.0, 2.0, 2.0, -4.0 ]

    Parameters
    ----------
    input_x: dict
        dict with keys(shape and dtype) of input_x
    output_y: dict
        dict with keys(shape and dtype) of output_y
    kernel_name: str
        kernel name, default value is "rint"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    dtype = input_x.get("dtype")

    check_shape(shape_x, param_name="input_x")

    check_list = ("float16", "float32")
    check_dtype(dtype.lower(), check_list, param_name="input_x")
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x*y, shape_x)
    data_x = tvm.placeholder(fuseshape, dtype=dtype.lower(), name="data")
    res = rint_compute(data_x, output_y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "tensor_list": [data_x, res]}
    te.lang.cce.cce_build_code(sch, config)
コード例 #19
0
def square(input_x, output_y, kernel_name="square"):
    """
    algorithm: square
    calculating data's square,y= x*x

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32, int32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "square"

    Returns
    -------
    None
    """
    shape = input_x.get("shape")
    dtype = input_x.get("dtype").lower()
    check_shape(shape, param_name="input_x")

    check_list = ["float16", "float32", "int32"]
    if not dtype in check_list:
        raise RuntimeError("square only support float16, float32, int32")

    shape = util.shape_refine(shape)
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data = tvm.placeholder(fuseshape, name="data", dtype=dtype.lower())

    with tvm.target.cce():
        res = square_compute(data, output_y, kernel_name)
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data, res]
    }

    te.lang.cce.cce_build_code(sch, config)
コード例 #20
0
def sign(input_x, output_y, kernel_name="sign"):
    """
                                 x*32768
    algrithm: sign = round(-------------------------)
                            2 ** (-15) + |x*32768|

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32, int32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is sign

    Returns
    -------
    None
    """
    shape = input_x.get("shape")
    check_shape(shape, param_name="input_x")

    check_list = ["float16", "float32", "int32"]
    inp_dtype = input_x.get("dtype").lower()
    if not inp_dtype in check_list:
        raise RuntimeError("sign only support float16, float32, int32")

    shape = util.shape_refine(shape)
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data = tvm.placeholder(fuseshape, name="data", dtype=inp_dtype)

    res = sign_compute(data, output_y, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data, res]
    }
    te.lang.cce.cce_build_code(sch, config)
コード例 #21
0
def neg(input_x, output_y, kernel_name="neg"):
    """
    Computes numerical negative value element-wise, y = -x.

    Parameters
    ----------
    input_x: dict
        shape and dtype of input, only support float16, float32, int32, int8
    output_y: dict
        shape and dtype of output, should be same type as input
    kernel_name: str
        kernel name, default value is "neg"

    Returns
    -------
    None
    """
    dtype_input = input_x.get("dtype").lower()
    check_list = ("float16", "float32", "int32", "int8")
    check_dtype(dtype_input, check_list, param_name="input_x")

    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x, ) in ins:
        with te.op.compute():
            x_shape = variable_shape([input_x])

            fuse_shape = [1]
            fuse_shape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            data_input = tvm.placeholder(fuse_shape,
                                         name="data_input",
                                         dtype=dtype_input)
            res = neg_compute(data_input, output_y, kernel_name)

            tensors.append([data_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
コード例 #22
0
ファイル: gelu.py プロジェクト: gekowa/ascend-opp
def gelu(input_x, output_y, kernel_name="gelu"):
    """
    mathematical formula of gelu(x):
    gelu(x) = 0.5*x*(1.0+tanh(np.sqrt(2/np.pi)*(x+0.044715*tf.pow(x,3))))
    tanh(y) = 2/(1+exp(-2y)) - 1
    convert gelu to result(x) =
     x/(1+e(-2*(np.sqrt(2/np.pi)*(x+0.044715*tf.pow(x,3)))))

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is gelu

    Returns
    -------
    none.
    """
    shape = input_x.get("shape")
    check_shape(shape, param_name="input_x")

    check_list = ("float16", "float32")
    input_dtype = input_x.get("dtype").lower()
    check_dtype(input_dtype, check_list, param_name="input_x")

    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x*y, shape)
    data = tvm.placeholder(fuseshape, name="data", dtype=input_dtype)
    result = gelu_compute(data, output_y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(result)

    config = {"print_ir": False,
              "name": kernel_name,
              "tensor_list": [data, result]}

    te.lang.cce.cce_build_code(sch, config)
コード例 #23
0
ファイル: threshold_v2_d.py プロジェクト: gekowa/ascend-opp
def threshold_v2_d(x, y, threshold, value, kernel_name="threshold_v2_d_cce"):
    """
    Thresholds each element of the input Tensor
    y = (x > threshold) ? x : value

    Parameters
    ----------
    input_x : dict
        shape and dtype of input
    output_y : dict
        shape and dtype of output, should be same shape and type as input
    threshold : float
        scale value to threshold at
    value : float
        scale value to replace with
    kernel_name : str
        kernel name, default value is "threshold_v2_d_cce"

    Returns
    -------
    output tensor
    """

    # get the shape and dtype
    shape_x = x.get("shape")
    dtype_x = x.get("dtype").lower()

    # check whether dtypes are right
    check_list = ("float16", "float32", "int8", "uint8", "int32")
    check_dtype(dtype_x, check_list)

    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape_x)

    data_x = tvm.placeholder(shape=fuseshape, name="data_x", dtype=dtype_x)
    res = threshold_v2_d_compute(data_x, y, threshold, value, kernel_name)
    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_x, res]}
    te.lang.cce.cce_build_code(schedule, config)
コード例 #24
0
def sqrt(input_x, output_y, kernel_name="sqrt"):
    """
    algorithm: sqrt
    calculating data sqrt,y= x**0.5, mini not support vsqrt, use exp(0.5*log(x))

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is sqrt

    Returns
    -------
    None
    """
    input_shape = input_x.get("shape")
    input_dtype = input_x.get("dtype").lower()

    check_shape(input_shape, param_name="input_x")
    check_dtype(input_dtype, ("float16", "float32"), param_name="input_x")

    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, input_shape)
    input_data = tvm.placeholder(fuseshape,
                                 name="input_data",
                                 dtype=input_dtype)
    result = sqrt_compute(input_data, output_y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(result)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [input_data, result]
    }

    te.lang.cce.cce_build_code(sch, config)
コード例 #25
0
ファイル: tanh.py プロジェクト: gekowa/ascend-opp
def tanh(input_x, output_y, kernel_name="tanh"):
    """
    algorithm: tanh
    calculating data's tanh,y= (e^(2x)-1)/(e^(2x)+1)

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is tanh

    Returns
    -------
    None
    """
    input_shape = input_x.get("shape")
    input_dtype = input_x.get("dtype").lower()

    check_shape(input_shape, param_name="input_x")

    check_list = ("float16", "float32")
    check_dtype(input_dtype, check_list, param_name="input_x")
    # fuse single axis
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, input_shape)

    data = tvm.placeholder(fuseshape, name="data", dtype=input_dtype)
    res = tanh_compute(data, output_y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"print_ir": False,
              "name": kernel_name,
              "tensor_list": [data, res]}

    te.lang.cce.cce_build_code(sch, config)
コード例 #26
0
def fills(x, y, value, kernel_name="fills"):
    """
    do  fill operation

    Parameters:
    ----------
    x : the dict of output
    y :  the dict of output
    value:  scalar  value,
    kernel_name : cce kernel name, default value is "fill"

    Returns
    -------
    None
    """
    # get the shape and dtype
    shape = x.get("shape")
    dtype = x.get("dtype").lower()

    # check whether dtypes are right
    check_list = ("int32", "float16", "float32")
    check_dtype(dtype, check_list)

    # fuse shapes
    shape = util.shape_refine(shape)
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data_x = tvm.placeholder(fuseshape, name="data_x", dtype=dtype)

    res = fills_compute(data_x, value, dtype)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": (data_x, res),
        "print_ir": False
    }
    te.lang.cce.cce_build_code(sch, config)
コード例 #27
0
ファイル: tan.py プロジェクト: gekowa/ascend-opp
def tan(x, y, kernel_name="tan"):
    """
    algorithm: tan
    calculating tan x = x + x^3/3 + 2*x^5/5 + 17*x^7/315 +
                        62*x^9/2835 + 1382*x^11/155925...(|x|<pi/2)

    Parameters
    ----------
    x: dict
        dict with keys(shape and dtype) of input
    y: dict
        dict with keys(shape and dtype) of output
    kernel_name: str
        kernel name, default value is "tan"

    Returns
    -------
    None
    """
    shape_input = x.get("shape")
    dtype_input = x.get("dtype").lower()

    check_shape(shape_input, param_name="x")
    check_list = (FLOAT_16, FLOAT_32, INT_32)
    check_dtype(dtype_input, check_list, param_name="x")

    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape_input)
    data_input = tvm.placeholder(fuseshape,
                                 name="data_input",
                                 dtype=dtype_input)
    res = tan_compute(data_input, y, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": (data_input, res)}
    te.lang.cce.cce_build_code(sch, config)
コード例 #28
0
ファイル: ceil.py プロジェクト: gekowa/ascend-opp
def ceil(input_x, output_y, kernel_name="ceil"):
    """
    algorithm: ceil
    calculating element-wise smallest integer not less than input_x,
    the type of input_x is float16 or float32

    Parameters
    ----------
    input_x: dict
        dict with keys(shape and dtype) of input
    output_y: dict
        dict with keys(shape and dtype) of output
    kernel_name: str
        kernel name, default value is "ceil"

    Returns
    -------
    None
    """
    shape = input_x.get("shape")
    dtype = input_x.get("dtype").lower()

    check_shape(shape, param_name="input_x")
    check_list = {"float16", "float32"}
    check_dtype(dtype, check_list, param_name="input_x")

    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data = tvm.placeholder(fuseshape, dtype=dtype, name="data")
    res = ceil_compute(data, output_y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data, res]}
    te.lang.cce.cce_build_code(sch, config)
コード例 #29
0
ファイル: sigmoid.py プロジェクト: gekowa/ascend-opp
def sigmoid(x, y, kernel_name="sigmoid"):
    """
    calculating data

    Parameters
    ----------
    x : dict
        dict of x, include keys(shape and dtype)
    y : dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "sigmoid"

    Returns
    -------
    None
    """
    shape = x.get("shape")
    dtype = x.get("dtype")
    op_utils.check_shape(shape, param_name="x")
    input_dtype = dtype.lower()
    check_list = ("float16", "float32")
    op_utils.check_dtype(dtype, check_list, param_name="x")

    fused_shape = [reduceIns(lambda a, b: a * b, shape[:])]
    data_input = tvm.placeholder(fused_shape,
                                 name="data_input",
                                 dtype=input_dtype)

    res = sigmoid_compute(data_input, y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_input, res]}
    te.lang.cce.cce_build_code(sch, config)
コード例 #30
0
ファイル: cast.py プロジェクト: gekowa/ascend-opp
def cast(input_x, output_y, dst_type, kernel_name="cast"):
    """
    cast a tensor/scaler with input shape form src data type to dst data
    type. restrictions of input algorithms are as follow
    only types' groups blow are support tensor process:
        float16->float32
        float16->int32
        float32->float16
        float32->int32
        int8->float32
        uint8->float32
        int8->float16
        uint8->float16
        int8->int32
        uint8->int32
        int32->uint8 // number out of [0,255] can get unexpected result
        int32->int8 // number out of [-128,127] can get unexpected result
        int32->float32 // For tans with fp16, only guarantees
                        number in [-1023,1023] get correct result
        int32->float16 // only guarantees
                        number in [-1023,1023] get correct result
    scale convert support:(means only support shape [1,])
        int64->int32
        int64->float32

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape as input,
        and the dtype is the dst dtype need to cast
    kernel_name : str
        cce kernel name, default value is cast

    Returns
    -------
    None
    """
    shape = util.scalar2tensor_one(input_x.get("shape"))
    src_type = input_x.get("dtype").lower()
    check_shape(shape, param_name="input_x")

    if src_type == "bool":
        src_type = "int8"

    dst_type = _cast_dsttype_conversion(dst_type)
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data = tvm.placeholder(fuseshape, name="data", dtype=src_type)
    if src_type == "int64":
        check_dtype(dst_type, ("float32", "int32"), param_name="dst_type")
        res = tvm.extern(
            [fuseshape], [data],
            lambda ins, outs: _kernel_ir(outs, ins, dst_type, "int64"),
            name="res",
            dtype=dst_type)
        tensor_list = [data, res]
        schedule = tvm.create_schedule(res.op)
        with build_config:
            tvm.build(schedule, tensor_list, "cce", name=kernel_name)
    else:
        with tvm.target.cce():
            res = cast_compute(data, output_y, dst_type, kernel_name)
            sch = generic.auto_schedule(res)
        config = {
            "print_ir": False,
            "name": kernel_name,
            "tensor_list": [data, res]
        }
        te.lang.cce.cce_build_code(sch, config)