Beispiel #1
0
def reduce_max_d(x, y, axes=None, keepdims=None, kernel_name="reduce_max_d"):
    """
    reduce a tensor on a certain axes based on max.

    Parameters
    ----------
    x : dict
        shape and dtype of input
    y : dict
        shape and dtype of output, should be same shape and type as input
    axes: list
        the first axes to reduce,may be negative to index from the end
        (e.g., -1 for the last axes).
        axes may be int or list(e.g. [1,2])
    keepdims: bool
        if true, retains reduced dimensions with length 1,
        default value is None
    kernel_name : str
        kernel name, default value is "reduce_max_d"

    Returns
    -------
    None
    """

    dtype = x["dtype"]
    dtype_lower = dtype.lower()
    check_list = ("float16", "float32", "int8", "uint8", "int32")
    check_dtype(dtype_lower, check_list)

    with te.op.compute():
        shape = x["shape"]
        shape_range = x["range"]

        shape_len = len(shape)
        if not axes:
            axes = range(shape_len)
        if hasattr(axes, 'index'):
            axes = list(axes)
        axes = cce_util.axis_check(shape_len, axes)

        shape_new, shape_range_new, axes_new, fused_rel_dic = \
            fused_reduce_axis(shape, shape_range, axes)
        add_compile_info("fused_rel_dic", fused_rel_dic)

        x["shape"] = shape_new
        x["range"] = shape_range_new
        shape_var_new = variable_shape([x])[0]

        data_input = tvm.placeholder(shape_var_new, name="data_input",
                                     dtype=dtype_lower)
        res = reduce_max_d_compute(data_input, y, axes_new, keepdims)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    # build
    config = {"name": kernel_name,
              "tensor_list": [data_input, res]}
    te.lang.dynamic.build(sch, config)
Beispiel #2
0
def floor_div(input_x, input_y, output_z, kernel_name="floor_div"):
    """
      algorithm: floordiv
      calculating data's floordiv, res =floor(x / y)

      Parameters
      ----------
      input_x: dict
      input_y: dict
      output_z: dict
      kernel_name: str, default value is "floor_div"

      Returns
      -------
      None
    """
    # check dtype of input_x/input_y
    input_dtype_x = input_x.get("dtype").lower()
    input_dtype_y = input_y.get("dtype").lower()
    check_list = ('int8', 'uint8', 'int32', 'float16', 'float32')
    check_dtype(input_dtype_x, check_list, param_name="input_x")
    check_dtype(input_dtype_y, check_list, param_name="input_y")
    check_elewise_shape_range([input_x, input_y], support_broadcast=True)
    if input_dtype_x != input_dtype_y:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'floor_div'
        error_info['param_name1'] = 'input_dtype_x'
        error_info['param_name2'] = 'input_dtype_y'
        error_info['param1_dtype'] = str(input_dtype_x)
        error_info['param2_dtype'] = str(input_dtype_y)
        raise RuntimeError(error_info,
                           "In op[%s], the parameter[%s][%s] are not equal in "
                           "dtype with dtype[%s][%s]." % (
                               error_info['op_name'],
                               error_info['param_name1'],
                               error_info['param_name2'],
                               error_info['param1_dtype'],
                               error_info['param2_dtype']))

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input_x, input_y) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([input_x, input_y],
                                              support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            tensor_x = tvm.placeholder(x_shape, input_dtype_x, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, input_dtype_y, "tensor_y")
            res = floor_div_compute(tensor_x, tensor_y, output_z, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #3
0
def reduce_sum_d(x, y, axis=None, keepdims=None, kernel_name="reduce_sum_d"):
    """reduce a tensor on a certain axis based on sum.

    Parameters:
    ----------
    x: dict
        the dict of input tensor.
    y: dict
        the dict of output tensor.
    axis: int, list, tuple or NONETYPE
        the axis for reduce.
    keepdims: bool or NONETYPE
        if true, retains reduced dimensions with length 1.
    kernel_name: str
        cce kernel name, default value is "reduce_sum_d".

    Returns
    -------
    None
    """

    dtype = x["dtype"]
    dtype_lower = dtype.lower()
    check_list = ("float16", "float32")
    check_dtype(dtype_lower, check_list, param_name="x")

    with te.op.compute():
        shape = x["shape"]
        shape_range = x["range"]

        axes = []
        shape_len = len(shape)
        if not axis:
            for i, _ in enumerate(shape):
                axes.append(i)
        else:
            axes = list(axis)
        axes = cce_util.axis_check(shape_len, axes)

        shape_new, shape_range_new, axes_new, fused_rel_dic = \
            fused_reduce_axis(shape, shape_range, axes)

        add_compile_info("fused_rel_dic", fused_rel_dic)
        x["shape"] = shape_new
        x["range"] = shape_range_new
        shape_var_new = variable_shape([x])[0]

        data_input = tvm.placeholder(shape_var_new,
                                     name="data_input",
                                     dtype=dtype_lower)
        res = reduce_sum_d_compute(data_input, y, axes_new, keepdims)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    # build
    config = {"name": kernel_name, "tensor_list": [data_input, res]}
    te.lang.dynamic.build(sch, config)
Beispiel #4
0
def bn_training_reduce(x, sum, square_sum, kernel_name="bn_training_reduce"):
    """
    algorithm: part of fused_batch_norm_v2
    The first step of batch_norm
    which to calculate the sum and square sum of x.
    The major component of this operator is reduce operation.

    Parameters
    ----------
    x: dict
        dict of input, A 5HD Tensor for input data.
    sum: dict
        dict of sum, A `Tensor`. Sum of x.
    square_sum: dict
        dict of square_sum, A `Tensor`. Square sum of x.
    kernel_name: str
        kernel name, default value is "bn_training_reduce"

    Returns
    -------
    None
    """
    data_format = x.get("format").upper()
    origin_format = x.get("ori_format").upper()
    dtype = x.get("dtype").lower()

    # check and format
    check_list = ("NC1HWC0", "NCHW")
    check_format(data_format, check_list, param_name="x")
    if data_format == "NCHW" and origin_format not in ("NCHW", ):
        raise RuntimeError("The origin format only supports "
                           "NCHW when format is NCHW")

    # check dtype
    check_list = ("float16", "float32")
    check_dtype(dtype, check_list, param_name="x")

    # get dynamic shape, x.get("shape"), x.get("range")
    shape_x = variable_shape([x])[0]

    # compute
    with te.op.compute():
        data_input = tvm.placeholder(shape_x, name="data_input", dtype=dtype)
        res = bn_training_reduce_compute(data_input,
                                         sum,
                                         square_sum,
                                         kernel_name=kernel_name)

    # schedule
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    # build
    tensor_list = [data_input] + list(res)
    config = {"name": kernel_name, "tensor_list": tensor_list}
    te.lang.dynamic.build(sch, config)
Beispiel #5
0
def exp(input_x, output_y, base=-1.0, scale=1.0, shift=0.0, kernel_name="exp"):
    """
    algorithm: exp
        calculating data's exp
    if base == -1:
       y = exp(shift + scale * x)
    if base > 0:
       y = exp((shift+scale*x)*ln(base))

    Parameters
    ----------
    input_x : dict,shape and dtype of input, only support float16,float32
    output_y: dict,shape and dtype of output, should be same shape and type as input
    base: (optional, default -1 for a value of e the base gamma
    scale: (optional, default 1) the scale alpha
    shift: (optional, default 0) the shift beta
    kernel_name : str, kernel name, default value is "exp"

    Returns
    -------
    None
    """
    dtype = input_x.get("dtype")
    # input_x' dtype check, only supports fp16 and fp32
    check_list = ("float16", "float32")
    input_dtype = dtype.lower()
    check_dtype(input_dtype, check_list, param_name="input_x")
    if base <= 0 and (not isclose(base, -1.0)):
        expect_value = "strictly positive or -1"
        real_value = "base < 0 or base notequal with -1"
        error_manager_vector.raise_err_input_value_invalid(
            kernel_name, "base", expecte_value, real_value)
    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x,) in ins:
        with te.op.compute():
            shape_x = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, shape_x[0])
            data_input = tvm.placeholder(fuseshape, name="data_input",
                                         dtype=input_dtype)
            res = exp_compute(data_input, output_y, base, scale, shift,
                              kernel_name)
            tensors.append([data_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #6
0
def leaky_relu_grad(g, x, y, negative_slope=0, kernel_name="leaky_relu_grad"):
    """
    calculate the backpropagation of leaky_relu operation
    y = gradients(x>0) or negative_slope*gradients(x<=0).
    support dtype:float16,float32

    Parameters
    ----------
    g : dict
        the backpropagated gradients to the corresponding leaky_relu operation
    x : dict
        the x passed as output of leaky_relu operation
    y : dict
        the output of leaky_relu back propagation
    negative_slope : float or int
        allow non-zero slope for negative inputs to speed up optimization
    kernel_name : str
        kernel name, default value is "leaky_relu_grad"

    Returns
    -------
    None
    """
    g_dtype = g.get("dtype").lower()
    x_dtype = x.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(g_dtype, check_list, param_name="input_g")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_elewise_shape_range([g, x], support_broadcast=True)
    if g_dtype != x_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "g", "x", g_dtype, x_dtype)
    ins = classify([g, x], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (g, x) in ins:
        with te.op.compute():
            g_shape, x_shape = variable_shape([g, x], support_broadcast=True)
            g_shape, x_shape = refine_shapes_for_broadcast(g_shape, x_shape)
            tensor_g = tvm.placeholder(g_shape, g_dtype, "tensor_g")
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            res = leaky_relu_grad_compute(tensor_g, tensor_x, y,
                                          negative_slope, kernel_name)
            tensors.append((tensor_g, tensor_x, res))
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #7
0
def relu(x, y, kernel_name="relu"):
    """
    Algrithm: relu(x) = max(x, 0)

    Parameters
    ----------
    Algorithm: relu

    Parameters:

    x: dynamic input, include shape, dtype and range

    y: the dict of output

    kernel_name: kernel name, must be string, default value is "relu".

    Returns
    -------
    None
    """

    # check input tensor data_type
    dtype_x = x.get("dtype").lower()
    check_list = ("float16", "float32", "int8", "int32")
    check_dtype(dtype_x, check_list, param_name="x")

    ins = classify([x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (x, ) in ins:
        with te.op.compute():
            shape_x = variable_shape([x])

            fuse_shape = [1]
            fuse_shape[0] = reduceIns(lambda x, y: x * y, shape_x[0])

            input_data = tvm.placeholder(fuse_shape,
                                         name="input_data",
                                         dtype=dtype_x)
            res = relu_compute(input_data, y, kernel_name)

            tensors.append([input_data, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}

    te.lang.dynamic.build(schedules, config)
Beispiel #8
0
def real_div(x1, x2, y, kernel_name="real_div"):
    """
    algorithm: real_div
    calculating data's real_div, c = a / b

    Parameters
    ----------
    x1 : dict
        shape and dtype of first input, only support float16, float32, int32
    x2 : dict
        shape and dtype of second input, only support float16, float32, int32
    y: dict
        shape and dtype of output, should be broadcast shape and type as input
    kernel_name : str
        cce kernel name, default value is real_div

    Returns
    -------
    None
    """

    x_dtype = x1.get("dtype").lower()
    y_dtype = x2.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(y_dtype, check_list, param_name="input_y")
    check_elewise_shape_range([x1, x2], support_broadcast=True)
    if x_dtype != y_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x1", "x2", x_dtype, y_dtype)
    ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y")
            res = real_div_compute(tensor_x, tensor_y, y, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #9
0
def sigmoid_grad(x, dx, out, kernel_name="sigmoid_grad"):
    """
    do sigmoid grad

    sigmoid_grad = (sigmoid - sigmoid*sigmoid)*grad

    Parameters:
    ----------
    x : dictionary shape of sigmoid input

    dx : dictionary shape of grad

    out: dictionary output

    kernel_name : cce kernel name, default value is "sigmoid_grad_cce"

    Returns
    -------
    None
    """
    x_dtype = x.get("dtype").lower()
    dx_dtype = dx.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(dx_dtype, check_list, param_name="input_dx")
    check_elewise_shape_range([x, dx], support_broadcast=False)
    if x_dtype != dx_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x", "dx", x_dtype, dx_dtype)
    ins = classify([x, dx], Mode.ELEWISE)
    schedules, tensors = [], []
    for (sig, dx) in ins:
        with te.op.compute():
            shape_sig, shape_dx = variable_shape([sig, dx],
                                                 support_broadcast=False)
            shape_sig, shape_dx = refine_shapes_for_broadcast(
                shape_sig, shape_dx)
            tensor_sig = tvm.placeholder(shape_sig, x_dtype, "tensor_x")
            tensor_dx = tvm.placeholder(shape_dx, dx_dtype, "tensor_dx")
            res = sigmoid_grad_compute(tensor_sig, tensor_dx, out, kernel_name)
            tensors.append([tensor_sig, tensor_dx, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #10
0
def sqrt(input_x, output_y, kernel_name="sqrt"):
    """
    algorithm: sqrt
    calculating data sqrt,y= x**0.5, mini not support vsqrt, use exp(0.5*log(x))

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is sqrt

    Returns
    -------
    None
    """

    # check dtype
    x_dtype = input_x.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="input_x")

    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x, ) in ins:
        with te.op.compute():
            # shape
            x_shape = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            # div_compute
            input_data = tvm.placeholder(fuseshape,
                                         name="input_data",
                                         dtype=x_dtype)
            res = sqrt_compute(input_data, output_y, kernel_name)

            tensors.append([input_data, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #11
0
def square(input_x, output, kernel_name="square"):
    """
    algorithm: square
    calculating data's square,y= x*x

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32, int32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "square"

    Returns
    -------
    None
    """

    # check dtype
    x_dtype = input_x.get("dtype").lower()
    check_list = ("float16", "float32", "int32")
    check_dtype(x_dtype, check_list, param_name="input_x")

    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x, ) in ins:
        with te.op.compute():
            # shape
            x_shape = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            # square_compute
            data_x = tvm.placeholder(fuseshape, x_dtype, name="data_x")
            res = square_compute(data_x, output, kernel_name)

            tensors.append((data_x, res))
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #12
0
def sqrt_grad(x, dx, out, kernel_name="sqrt_grad"):
    """
    algorithm: sqrt_grad_cce

    Parameters
    ----------
    x : dict of data: dict

    dx : dict of data_grad: dict

    out : dict of output: dict

    kernel_name : cce kernel name, default value is "sqrt_grad": str

    Returns
    -------
    None

    """
    x_dtype = x.get("dtype").lower()
    dx_dtype = dx.get("dtype").lower()
    check_list = ("float16", "float32")
    check_dtype(x_dtype, check_list, param_name="x")
    check_dtype(dx_dtype, check_list, param_name="dx")
    check_elewise_shape_range([x, dx], support_broadcast=False)
    if x_dtype != dx_dtype:
        error_manager_vector.raise_err_inputs_dtype_not_equal(
            kernel_name, "x", "dx", x_dtype, dx_dtype)
    ins = classify([x, dx], Mode.ELEWISE)
    schedules, tensors = [], []
    for (x, dx) in ins:
        with te.op.compute():
            x_shape, dx_shape = variable_shape([x, dx],
                                               support_broadcast=False)
            x_shape, dx_shape = refine_shapes_for_broadcast(x_shape, dx_shape)
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_dx = tvm.placeholder(dx_shape, dx_dtype, "tensor_dx")
            res = sqrt_grad_compute(tensor_x, tensor_dx, out, kernel_name)
            tensors.append([tensor_x, tensor_dx, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #13
0
def fill(dims, value, y, kernel_name="fill"):
    """
    do  fill operation

    Parameters:
    ----------
    dims : the dict of input
    value :  the dict of input
    y:  the dict of output
    kernel_name : cce kernel name, default value is "fill"

    Returns
    -------
    None
    """
    # get the shape and dtype
    shape = value.get("shape")
    dtype = value.get("dtype").lower()
    dtype_dims = dims.get("dtype").lower()
    dims["shape"] = [-1]
    dims['range'] = [[1, None]]

    # check whether dtypes are right
    check_list = ("int32", "float16", "float32")
    check_dtype(dtype, check_list)

    schedules, tensors = [], []

    with te.op.compute():
        shape_dim = variable_shape([dims])
        x_input = tvm.placeholder(shape, name="x_input", dtype=dtype)
        dim_input = tvm.placeholder(shape_dim[0], name="dim_input", dtype=dtype_dims)

        res = fill_compute(shape_dim[0], x_input, y, kernel_name=kernel_name)
        tensors.append([dim_input, x_input, res])
    with tvm.target.cce():
        sch = generic.auto_schedule(res)
    schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}

    te.lang.dynamic.build(schedules, config)
    te.op.add_compile_info("_use_special_pattern", False)
Beispiel #14
0
def log1p(input_x, output_y, kernel_name="log1p"):
    """
    algorithm: log1p
    calculating data's log1p, y = log(x + 1)

    Parameters
    ----------
    input_x: dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name: str
        kernel name, default value is "log1p"

    Returns
    -------
    None
    """
    dtype = input_x.get("dtype")
    check_list = ("float16", "float32")
    input_dtype = dtype.lower()
    check_dtype(input_dtype, check_list, param_name="input_x")
    schedules, tensors = [], []
    ins = classify([input_x], Mode.ELEWISE)
    for (input_x, ) in ins:
        with te.op.compute():
            x_shape = variable_shape([input_x])
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            data_input = tvm.placeholder(fuseshape,
                                         dtype=input_dtype,
                                         name="data_input")
            res = log1p_compute(data_input, output_y, kernel_name)
            tensors.append([data_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {
        "name": kernel_name,
        "tensor_list": tensors,
        "bool_storage_as_1bit": False
    }
    te.lang.dynamic.build(schedules, config)
Beispiel #15
0
def sub(input_x, input_y, output_z, kernel_name="sub"):
    """
    do element-wise sub operation between two input tensors

    Parameters:
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32,int32
    input_y : dict
        shape and dtype of input, only support float16, float32,int32
    output_z: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : kernel name, default value is "sub"

    Returns
    -------
    None
    """

    check_list = ["float16", "float32", "int32"]
    x_dtype = input_x.get("dtype").lower()
    y_dtype = input_x.get("dtype").lower()
    if not x_dtype in check_list or not y_dtype in check_list:
        error_detal = "sub only support float16, float32, int32"
        error_manager_vector.raise_err_two_input_dtype_invalid(
            kernel_name, "input_x", "input_y", error_detal)

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([x1, x2], support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            data1 = tvm.placeholder(x_shape, x_dtype, "data1")
            data2 = tvm.placeholder(y_shape, y_dtype, "data2")
            res = sub_compute(data1, data2, output_z, kernel_name)
            tensors.append([data1, data2, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    # build
    config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #16
0
def neg(input_x, output_y, kernel_name="neg"):
    """
    Computes numerical negative value element-wise, y = -x.

    Parameters
    ----------
    input_x: dict
        shape and dtype of input, only support float16, float32, int32, int8
    output_y: dict
        shape and dtype of output, should be same type as input
    kernel_name: str
        kernel name, default value is "neg"

    Returns
    -------
    None
    """
    dtype_input = input_x.get("dtype").lower()
    check_list = ("float16", "float32", "int32", "int8")
    check_dtype(dtype_input, check_list, param_name="input_x")

    ins = classify([input_x], Mode.ELEWISE)
    schedules, tensors = [], []
    for (input_x, ) in ins:
        with te.op.compute():
            x_shape = variable_shape([input_x])

            fuse_shape = [1]
            fuse_shape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            data_input = tvm.placeholder(fuse_shape,
                                         name="data_input",
                                         dtype=dtype_input)
            res = neg_compute(data_input, output_y, kernel_name)

            tensors.append([data_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #17
0
def zeros_like(x, y, kernel_name="zeros_like"):
    """
    output a tensor of all zero, you can specify the output type

    Parameters
    ----------
    x: dict
        shape and dtype of input, only support float16, float32,
        int32,int8,uint8,bool
    y: dict
        shape and dtype of output data
    kernel_name: str
        cce kernel name, default value is "zeros_like"

    Returns
    ------
    None
    """
    dtype_x = x.get("dtype")
    check_list_src = ("float16", "float32", "int32", "int8", "uint8", "bool")
    src_dtype = dtype_x.lower()
    check_dtype(src_dtype, check_list_src, param_name="x")
    schedules, tensors = [], []
    ins = classify([x], Mode.ELEWISE)
    for (input_x, ) in ins:
        with te.op.compute():
            shape_x = variable_shape([input_x])
            shape_x = (functools_reduce(lambda x, y: x * y, shape_x[0]), )
            x_input = tvm.placeholder(shape_x, name="x_input", dtype=src_dtype)
            res = zeros_like_compute(x_input, y, kernel_name=kernel_name)
            tensors.append([x_input, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #18
0
def add_n(inputs, output, tensor_num, kernel_name="add_n"):
    """
    algorithm: add_n
    calculating data's adds, z = a + b + c...

    Parameters
    ----------
    inputs : list or tuple of dict
        A list of Tensor objects,
        each with same shape, range and dtype of first input,
        only support float16, float32, int32.
    output : dict
        shape, range and dtype of output,
        should be broadcast shape and type as input.
    tensor_num:
        nums of input
    kernel_name : string
        cce kernel name, default value is add_n

    Returns
    -------
    None
    """
    # check inputs num
    input_num = len(inputs)
    if input_num < 2:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_012
        error_info['op_name'] = 'add_n'
        error_info['param_name'] = 'input_num'
        error_info['max_value'] = '8'
        error_info['min_value'] = '2'
        error_info['real_value'] = str(input_num)
        raise RuntimeError(
            error_info, "In op[%s], the num of dimensions of input[%s] "
            "should be in the range of [%s, %s], but actually "
            "is [%s]." % (error_info['op_name'], error_info['param_name'],
                          error_info['min_value'], error_info['max_value'],
                          error_info['real_value']))
    if input_num != tensor_num:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_017
        error_info['op_name'] = 'add_n'
        error_info['param_name1'] = 'input_num'
        error_info['param_name2'] = 'tensor_num'
        error_info['param1_shape'] = str(input_num)
        error_info['param2_shape'] = str(tensor_num)
        raise RuntimeError(
            error_info, "In op[%s], the parameter[%s][%s] is not match with"
            "the parameter[%s][%s],it should be the same." %
            (error_info['op_name'], error_info['param_name1'],
             error_info['param1_shape'], error_info['param_name2'],
             error_info['param2_shape']))

    dtype_0 = inputs[0].get("dtype").lower()
    for index in range(0, tensor_num):
        shape_input = inputs[index].get("shape")
        check_shape(shape_input, param_name="inputs")
        dtype_input = inputs[index].get("dtype").lower()
        check_list = ("float16", "float32", "int32")
        check_dtype(dtype_input, check_list, param_name="inputs")
        if dtype_input != dtype_0:
            error_info = {}
            error_info['errCode'] = OP_ERROR_CODE_018
            error_info['op_name'] = 'add_n'
            error_info['param_name1'] = 'dtype_input'
            error_info['param_name2'] = 'dtype_0'
            error_info['param1_dtype'] = str(dtype_input)
            error_info['param2_dtype'] = str(dtype_0)
            raise RuntimeError(
                error_info, "In op[%s], the parameter"
                "[%s][%s] are not equal in "
                "dtype with dtype[%s][%s]." %
                (error_info['op_name'], error_info['param_name1'],
                 error_info['param_name2'], error_info['param1_dtype'],
                 error_info['param2_dtype']))

    ins = classify(inputs, Mode.ELEWISE)
    schedules, tensors = [], []
    for inputs in ins:
        with te.op.compute():
            shape_normlize = variable_shape(inputs)
            fuse_shape = [1]
            datas = []
            for (i, input_dict), shape_i in zip(enumerate(inputs),
                                                shape_normlize):
                fuse_shape[0] = reduceIns(lambda x, y: x * y, shape_i)
                datas.append(
                    tvm.placeholder(fuse_shape,
                                    name="data_%d" % i,
                                    dtype=dtype_0))

            # add_n_compute
            res = add_n_compute(datas, output, kernel_name)

            tensors.append(datas)
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    datas.append(res)
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #19
0
def add(input_x, input_y, output_z, kernel_name="add"):
    """
    algorithm: add
    calculating data's add, c = a + b

    Parameters
    ----------
    input_x : dict
       including shape, dtype and range, only support float16, float32, int32
    input_y : dict
       including shape, dtype and range, only support float16, float32, int32
    output_z: dict
       shape should be broadcast shape of input, and type equals to input
    kernel_name : str
       cce kernel name, default value is add

    Returns
    -------
    None
    """

    # check input tensor data_type
    x_dtype = input_x.get("dtype").lower()
    y_dtype = input_y.get("dtype").lower()
    check_list = ("float16", "float32", "int32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(y_dtype, check_list, param_name="input_y")
    check_elewise_shape_range([input_x, input_y], support_broadcast=True)
    if x_dtype != y_dtype:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'add'
        error_info['param_name1'] = 'x_dtype'
        error_info['param_name2'] = 'y_dtype'
        error_info['param1_dtype'] = str(x_dtype)
        error_info['param2_dtype'] = str(y_dtype)
        raise RuntimeError(error_info,
                           "In op[%s], the parameter[%s][%s] are not equal in"
                           "dtype with dtype[%s][%s]" % (error_info['op_name'],
                                                         error_info[
                                                             'param_name1'],
                                                         error_info[
                                                             'param_name2'],
                                                         error_info[
                                                             'param1_dtype'],
                                                         error_info[
                                                             'param2_dtype']))

    # format_pattern = 1  Nz and vector
    # format_pattern = 2  vector and Nz
    # format_pattern = 0  Nz scalar  Nz Nz  ND ND
    format_pattern = _add_check_format(input_x, input_y)

    # infer shape for supporting add
    shape_x, shape_y = _infer_shape(format_pattern, input_x, input_y)
    shape_x = scalar2tensor_one(shape_x)
    shape_y = scalar2tensor_one(shape_y)

    # normalize shape
    input_x["shape"] = shape_x
    input_y["shape"] = shape_y

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input_x, input_y) in ins:
        with te.op.compute():
            shape_x, shape_y = variable_shape([input_x, input_y],
                                              support_broadcast=True)
            shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)
            data_x = tvm.placeholder(shape_x, name="data_1", dtype=x_dtype)
            data_y = tvm.placeholder(shape_y, name="data_2", dtype=y_dtype)
            res = add_compute(data_x, data_y, output_z, kernel_name)

            tensors.append((data_x, data_y, res))
        with tvm.target.cce():
            schedule = generic.auto_schedule(res)
        schedules.append(schedule)

    config = {"print_ir": False, "name": kernel_name,
              "tensor_list": tensors}

    te.lang.dynamic.build(schedules, config)
Beispiel #20
0
def reduce_sum(x, axes, y, keepdims=False, kernel_name="reduce_sum"):
    """reduce a tensor on a certain axes based on sum.

    Parameters:
    ----------
    x: dict
        the dict of input tensor.
    axes: dict
        the axes for reduce.
    y: dict
        the dict of output tensor.
    keepdims: bool or NONETYPE
        if true, retains reduced dimensions with length 1.
    kernel_name: str
        cce kernel name, default value is "reduce_sum".

    Returns
    -------
    None
    """

    dtype_x = x["dtype"]
    dtype_lower_x = dtype_x.lower()
    check_list_x = ("float16", "float32")
    check_dtype(dtype_lower_x, check_list_x, param_name="x")

    dtype_axes = axes["dtype"]
    dtype_lower_axes = dtype_axes.lower()
    check_list_axes = ("int32", "int64")
    check_dtype(dtype_lower_axes, check_list_axes, param_name="axes")
    input_shape = x.get("shape")

    if not _check_data_shape_const(input_shape):
        schedules = []
        ins = classify([x, axes], Mode.REDUCE)
        tensors = []
        shape_axes = variable_shape([axes])[0]
        data_input_axes = tvm.placeholder(shape_axes,
                                          name="data_input_axes",
                                          dtype=dtype_lower_axes)

        for (x, axes) in ins:
            with te.op.compute():
                shape_x = variable_shape([x])[0]
                data_input_x = tvm.placeholder(shape_x,
                                               name="data_input_x",
                                               dtype=dtype_lower_x)
                shape_len = len(shape_x)
                axes_d = cce_util.axis_check(shape_len, axes)
                res = reduce_sum_compute(data_input_x, axes_d, y, keepdims)

                tensors.append([data_input_x, data_input_axes, res])

            with tvm.target.cce():
                schedule = generic.auto_schedule(res)
            schedules.append(schedule)

        # build
        config = {"name": kernel_name, "tensor_list": tensors}
        te.lang.dynamic.build(schedules, config)
        add_compile_info("reduce_axis_unknown", 1)

    else:
        _reduce_sum_const(x, axes, keepdims, kernel_name)
Beispiel #21
0
def maximum(x1, x2, y, kernel_name="maximum"):
    """
    do element-wise maximum operation between two input tensors

    Parameters:
    ----------
    x1 : dict
        first input dict, only support float16, float32, int32
    x2 : dict
        second input dict, only support float16, float32, int32
    y: dict
        output dict, should be the broadcast shape and type as input
    kernel_name : str
        cce kernel name, default value is maximum

    Returns
    -------
    None
    """

    # check input tensor data dtype
    check_list = ["float16", "float32", "int32"]
    dtype_x1 = x1.get("dtype").lower()
    dtype_x2 = x2.get("dtype").lower()
    check_dtype(dtype_x1, check_list, param_name="x1")
    check_dtype(dtype_x2, check_list, param_name="x2")
    check_elewise_shape_range([x1, x2], support_broadcast=True)

    if dtype_x1 != dtype_x2:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'maximum'
        error_info['param_name1'] = 'dtype_x1'
        error_info['param_name2'] = 'dtype_x2'
        error_info['param1_dtype'] = str(dtype_x1)
        error_info['param2_dtype'] = str(dtype_x2)
        raise RuntimeError(
            error_info, "In op[%s], the parameter[%s][%s] are not equal in "
            "dtype with dtype[%s][%s]." %
            (error_info['op_name'], error_info['param_name1'],
             error_info['param_name2'], error_info['param1_dtype'],
             error_info['param2_dtype']))

    ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            shape_x1, shape_x2 = variable_shape([x1, x2],
                                                support_broadcast=True)
            shape_x1, shape_x2 = refine_shapes_for_broadcast(
                shape_x1, shape_x2)
            data1 = tvm.placeholder(shape_x1, dtype=dtype_x1, name="data1")
            data2 = tvm.placeholder(shape_x2, dtype=dtype_x2, name="data2")
            res = maximum_compute(data1, data2, y, kernel_name)

            tensors.append([data1, data2, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"print_ir": False, "name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #22
0
def mul(input1, input2, output, kernel_name="mul"):
    """
    algorithm: mul
    calculating data's mul, c = a * b

    Parameters
    ----------
    input1 : dict
        include ori_shape, shape, ori_format, format, dtype and range
        dtype only support float16, float32, int32
    input2 : dict
        include ori_shape, shape, ori_format, format, dtype and range
        dtype only support float16, float32, int32
    output: dict
        include ori_shape, shape, ori_format, format, dtype and range
        shape must be broadcast shape of input
    kernel_name : str
        cce kernel name, default value is mul

    Returns
    -------
    None
    """

    # check dtype
    dtype_x1 = input1.get("dtype").lower()
    dtype_x2 = input2.get("dtype").lower()
    check_list = ("float16", "float32", "int32")
    check_dtype(dtype_x1, check_list, param_name="input1")
    check_dtype(dtype_x2, check_list, param_name="input2")
    check_elewise_shape_range([input1, input1], support_broadcast=True)
    if dtype_x1 != dtype_x2:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'mul'
        error_info['param_name1'] = 'dtype_x1'
        error_info['param_name2'] = 'dtype_x2'
        error_info['param1_dtype'] = str(dtype_x1)
        error_info['param2_dtype'] = str(dtype_x2)
        raise RuntimeError(error_info,
                           "In op[%s], the parameter[%s][%s] are not equal in "
                           "dtype with dtype[%s][%s]." % (
                               error_info['op_name'],
                               error_info['param_name1'],
                               error_info['param_name2'],
                               error_info['param1_dtype'],
                               error_info['param2_dtype']))

    ins = classify([input1, input2], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input1, input2) in ins:
        with te.op.compute():
            # shape
            shape_x1, shape_x2 = variable_shape([input1, input2],
                                                support_broadcast=True)
            shape_x1, shape_x2 = refine_shapes_for_broadcast(shape_x1,
                                                             shape_x2)
            # mul_compute
            data_x1 = tvm.placeholder(shape_x1, dtype=dtype_x1, name="data_x1")
            data_x2 = tvm.placeholder(shape_x2, dtype=dtype_x2, name="data_x2")
            res = mul_compute(data_x1, data_x2, output, kernel_name)

            tensors.append((data_x1, data_x2, res))
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #23
0
def floor_mod(x1, x2, y, kernel_name="floor_mod"):
    """
    calculate the remainder of division, support fp16,fp32,int32
    res = x1 -floor(input_data_x / input_data_y)* input_data_y

    Parameters
    ----------
    x1: dict
        dict{"shape":tuple or list,"dtype":str, "range": tuple or list}
        shape of data
        the data type, src_dtype equals dst_dtype, support fp16,fp32,int32
    x2: dict
        dict{"shape":tuple or list,"dtype":str, "range": tuple or list}
        shape of data
        the data type, src_dtype equals  of dst_dtype, support fp16,fp32,int32
    y: dict, reserved field
        dict with keys(shape, dtype and range) of output
    kernel_name: str
        cce kernel name, default value is "floor_mod"

    Returns
    ------
    None
    """

    # check input tensor data_type
    dtype_x = x1.get("dtype").lower()
    dtype_y = x2.get("dtype").lower()
    check_list = ("float16", "float32", "int32")
    check_dtype(dtype_x, check_list, param_name="x1")
    check_dtype(dtype_y, check_list, param_name="x2")
    check_elewise_shape_range([x1, x2], support_broadcast=True)

    if dtype_x != dtype_y:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'floor_mod'
        error_info['param_name1'] = 'dtype_x'
        error_info['param_name2'] = 'dtype_y'
        error_info['param1_dtype'] = str(dtype_x)
        error_info['param2_dtype'] = str(dtype_y)
        raise RuntimeError(error_info,
                           "In op[%s], the parameter[%s][%s] are not equal in "
                           "dtype with dtype[%s][%s]." % (
                               error_info['op_name'],
                               error_info['param_name1'],
                               error_info['param_name2'],
                               error_info['param1_dtype'],
                               error_info['param2_dtype']))

    ins = classify([x1, x2], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (x1, x2) in ins:
        with te.op.compute():
            shape_x, shape_y = variable_shape([x1, x2], support_broadcast=True)
            shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)
            input_data_x = tvm.placeholder(shape_x, name="input_data_x",
                                           dtype=dtype_x)
            input_data_y = tvm.placeholder(shape_y, name="input_data_y",
                                           dtype=dtype_y)
            res = floor_mod_compute(input_data_x, input_data_y, y, kernel_name)

            tensors.append([input_data_x, input_data_y, res])
        with tvm.target.cce():
            auto_sch = generic.auto_schedule(res)
        schedules.append(auto_sch)

    config = {"name": kernel_name,
              "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #24
0
def div(input_x, input_y, output_z, kernel_name="div"):
    """
    algorithm: div
    calculating data's div, res =x / yq


    Parameters
    ----------
    input_x: dict
        dict with keys(shape and dtype) of input_x
    input_y: dict
        dict with keys(shape and dtype) of input_y
    output_div: dict
        dict with keys(shape and dtype) of output
    kernel_name: str
        kernel name, default value is "div"

    Returns
    -------
    None
    """

    # check dtype
    x_dtype = input_x.get("dtype").lower()
    y_dtype = input_y.get("dtype").lower()
    check_list = ("float16", "float32", "int8", "uint8", "int32")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(y_dtype, check_list, param_name="input_y")
    check_elewise_shape_range([input_x, input_y], support_broadcast=True)

    if x_dtype != y_dtype:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'div'
        error_info['param_name1'] = 'x_dtype'
        error_info['param_name2'] = 'y_dtype'
        error_info['param1_dtype'] = str(x_dtype)
        error_info['param2_dtype'] = str(y_dtype)
        raise RuntimeError(
            error_info, "In op[%s], the parameter[%s][%s] are not equal in "
            "dtype with dtype[%s][%s]." %
            (error_info['op_name'], error_info['param_name1'],
             error_info['param_name2'], error_info['param1_dtype'],
             error_info['param2_dtype']))

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input_x, input_y) in ins:
        with te.op.compute():
            x_shape, y_shape = variable_shape([input_x, input_y],
                                              support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y")
            res = div_compute(tensor_x, tensor_y, output_z, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    # build
    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #25
0
def cast(input_x, output_y, dst_type, kernel_name="cast"):
    """
    cast a tensor/scaler with input shape form src data type to dst data
    type. restrictions of input algorithms are as follow
    only types' groups blow are support tensor process:
        float16->float32
        float16->int32
        float32->float16
        float32->int32
        int8->float32
        uint8->float32
        int8->float16
        uint8->float16
        int8->int32
        uint8->int32
        int32->uint8 // number out of [0,255] can get unexpected result
        int32->int8 // number out of [-128,127] can get unexpected result
        int32->float32 // For tans with fp16, only guarantees
                        number in [-1023,1023] get correct result
        int32->float16 // only guarantees
                        number in [-1023,1023] get correct result
    scale convert support:(means only support shape [1,])
        int64->int32
        int64->float32

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape as input,
        and the dtype is the dst dtype need to cast
    kernel_name : str
        cce kernel name, default value is cast

    Returns
    -------
    None
    """

    src_type = input_x.get("dtype").lower()

    if src_type == "bool":
        src_type = "int8"

    schedules, tensors = [], []
    ins = classify([input_x], Mode.ELEWISE)
    for (input_x,) in ins:
        with te.op.compute():
            x_shape = variable_shape([input_x])
            dst_type = _cast_dsttype_conversion(dst_type)
            fuseshape = [1]
            fuseshape[0] = reduceIns(lambda x, y: x * y, x_shape[0])
            data = tvm.placeholder(fuseshape, name="data", dtype=src_type)
            if src_type == "int64":
                check_dtype(dst_type, ("float32", "int32"),
                            param_name="dst_type")
                res = tvm.extern(
                    [fuseshape], [data],
                    lambda ins, outs: _kernel_ir(outs, ins, dst_type, "int64"),
                    name="res",
                    dtype=dst_type)
                tensor_list = [data, res]
                schedule = tvm.create_schedule(res.op)
                with build_config:
                    tvm.build(schedule, tensor_list, "cce", name=kernel_name)
            else:
                res = cast_compute(data, output_y, dst_type, kernel_name)
                tensors.append([data, res])
        if src_type != "int64":
            with tvm.target.cce():
                sch = generic.auto_schedule(res)
            schedules.append(sch)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": tensors
    }
    te.lang.dynamic.build(sch, config)
Beispiel #26
0
def less_equal(input_x, input_y, output_z, kernel_name="less_equal"):
    """
    Returns the truth value of (x <= y) element-wise

    Parameters
    ----------
    input_x: dict
        dict{"shape":tuple or list, "dtype":str, range: tuple or list},
        shape, range, and dtype of first input,
        support float16,float32,int32,int8,uint8
    input_y: dict
        dict{"shape":tuple or list, "dtype":str, range: tuple or list},
        shape, range, and dtype of first input,
        support float16,float32,int32,int8,uint8
    output_z: dict
        dict of output, should be broadcast shape and type as input
    kernel_name: str
        cce kernel name, default value is "less_equal"

    Returns
    -------
    None
    """
    # check input tensor data_type
    x_dtype = input_x.get("dtype").lower()
    y_dtype = input_y.get("dtype").lower()
    check_list = ("float16", "float32", "int32", "uint8", "int8")
    check_dtype(x_dtype, check_list, param_name="input_x")
    check_dtype(y_dtype, check_list, param_name="input_y")
    check_elewise_shape_range([input_x, input_y], support_broadcast=True)
    if x_dtype != y_dtype:
        error_info = {}
        error_info['errCode'] = OP_ERROR_CODE_018
        error_info['op_name'] = 'less_equal'
        error_info['param_name1'] = 'x_dtype'
        error_info['param_name2'] = 'y_dtype'
        error_info['param1_dtype'] = str(x_dtype)
        error_info['param2_dtype'] = str(y_dtype)
        raise RuntimeError(
            error_info, "In op[%s], the parameter[%s][%s] are not equal in "
            "dtype with dtype[%s][%s]." %
            (error_info['op_name'], error_info['param_name1'],
             error_info['param_name2'], error_info['param1_dtype'],
             error_info['param2_dtype']))

    ins = classify([input_x, input_y], Mode.ELEWISE_WITH_BROADCAST)
    schedules, tensors = [], []
    for (input_x, input_y) in ins:
        with te.op.compute():
            # shape
            x_shape, y_shape = variable_shape([input_x, input_y],
                                              support_broadcast=True)
            x_shape, y_shape = refine_shapes_for_broadcast(x_shape, y_shape)

            # less_equal compute
            tensor_x = tvm.placeholder(x_shape, x_dtype, "tensor_x")
            tensor_y = tvm.placeholder(y_shape, y_dtype, "tensor_y")
            res = less_equal_compute(tensor_x, tensor_y, output_z, kernel_name)

            tensors.append([tensor_x, tensor_y, res])
        with tvm.target.cce():
            sch = generic.auto_schedule(res)
        schedules.append(sch)

    config = {"name": kernel_name, "tensor_list": tensors}
    te.lang.dynamic.build(schedules, config)
Beispiel #27
0
def reduce_mean_d(input_x,
                  output_y,
                  axes,
                  keepdims=None,
                  kernel_name="reduce_mean_d",
                  impl_mode="high_performance"):
    """
    Reduce a tensor on a certa in axes based on mean.

    Parameters:
    ----------
    input_x : dict
        shape and dtype of input
    output_y: dict
        shape and dtype of output
    axes : int, list, tuple, NoneType
        The dimensions to reduce. If None (the default), reduces all dimensions.
        Must be in the range [-rank(input_tensor), rank(input_tensor)).
    keepdims : bool, NoneType
        if true, retains reduced dimensions with length 1,
        default value is None.
    kernel_name : str
        cce kernel name, default value is reduce_mean_d

    Returns
    -------
    None
    """
    dtype = input_x["dtype"]
    dtype_lower = dtype.lower()
    check_list = ("float16", "float32", "int8", "uint8")
    check_dtype(dtype_lower, check_list)

    with te.op.compute():
        shape = input_x["shape"]
        shape_range = input_x["range"]

        shape_len = len(shape)
        if not axes:
            axes = range(shape_len)
        if hasattr(axes, 'index'):
            axes = list(axes)
        # not support 5HD
        is_5hdc = False

        shape_new, shape_range_new, axes_new, fused_rel_dic = \
            fused_reduce_axis(shape, shape_range, axes)

        add_compile_info("fused_rel_dic", fused_rel_dic)
        input_x["shape"] = shape_new
        input_x["range"] = shape_range_new
        shape_var_new = variable_shape([input_x])[0]

        data_input = tvm.placeholder(shape_var_new,
                                     name="data_input",
                                     dtype=dtype_lower)
        res = reduce_mean_d_compute(data_input,
                                    output_y,
                                    axes_new,
                                    keepdims,
                                    impl_mode=impl_mode,
                                    is_5hdc=is_5hdc)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_input, res]}
    te.lang.dynamic.build(sch, config)
Beispiel #28
0
def tile_d(input_x, output_x, multiples, kernel_name="tile_d"):
    """algorithm: tile.
    The tile in tensorflow can multiple the shape of the given tensor.
    For example, tiling [a b c d] by [2] produces [a b c d a b c d].
    The tile op in TBE is different from tf.tile, tile of TBE use broadcast
    api, and only support that at least an axis in shape is 1.The '1' axis
    is to be multipled.
    For example, if shape = [51, 1] and multiples = [1, 77], after computation,
    the output shape will be [51, 77].
    Abnormal condition:
    1. The length of shape must be equal to or less than the shape of multiples.
    2. The type of kernel_name is not string.
    3. The shape is neither list nor tuple.
    4. The dtype is not float32, float16, or int32.
    5. All of the axises of the multiples is 1.

    Parameters
    ----------
    input_x : dict
        shape and dtype of input
    output_x: dict
        dict of output.
    multiples : list or tuple.
        Number of the axis replicates.
    kernel_name : str.
        kernel name, default value is "tile_d".

    Returns
    -------
    None
    """

    dtype = input_x.get("dtype").lower()
    check_list = ("float16", "float32", "int32")
    check_dtype(dtype, check_list, param_name="input_x")
    unkown_shape = []
    shape = input_x.get("shape")
    for i in range(0, len(shape)):
        if shape[i] == -1:
            unkown_shape.append(i)

    with te.op.compute():
        shape = te.lang.dynamic.shape_to_list(variable_shape([input_x])[0])
        multiples = te.lang.dynamic.shape_to_list(multiples)
        origin_multiples = multiples

        input_format = input_x.get("format")
        output_format = output_x.get("format")
        if input_format in ("NCHW", "NHWC") and output_format in ("NC1HWC0", ):
            # branch: 4D tile to 5HD ((N, 1, 1, 1) to (N, C1, H, W, C0))
            # and output C is 16 align
            # change input shape from (N, 1, 1, 1) to (N, 1, 1, 1, 1)
            shape = shape + [1]
            if input_format == "NCHW":
                # change multiples from (1, C, H, W) to (1, C1, H, W, C0)
                multiples = [
                    multiples[0], multiples[1] // 16, multiples[2],
                    multiples[3], 16
                ]
            else:
                # change multiples from (1, H, W, C) to (1, C1, H, W, C0)
                multiples = [
                    multiples[0], multiples[3] // 16, multiples[1],
                    multiples[2], 16
                ]

        if len(shape) > len(multiples):
            error_info = {}
            error_info['errCode'] = OP_ERROR_CODE_012
            error_info['op_name'] = 'tile_d'
            error_info['param_name'] = 'shape'
            error_info['max_value'] = str(len(multiples))
            error_info['min_value'] = '1'
            error_info['real_value'] = str(len(shape))
            raise RuntimeError(
                error_info,
                "In op[%s], the num of dimensions of input[%s] should be in the range of "
                "[%s, %s], but actually is [%s]." %
                (error_info['op_name'], error_info['param_name'],
                 error_info['min_value'], error_info['max_value'],
                 error_info['real_value']))
        if len(shape) < len(multiples):
            len_error = len(multiples) - len(shape)
            shape = [1] * len_error + shape

        shape_adapt = []
        multiples_adapt = []
        for i, shape_i in enumerate(shape):
            multiples_i = multiples[i]
            if multiples_i != 1 and shape_i != 1:
                shape_adapt.append(1)
                multiples_adapt.append(multiples_i)
                multiples_i = 1
            shape_adapt.append(shape_i)
            multiples_adapt.append(multiples_i)

        shape = shape_adapt
        multiples = multiples_adapt

        for shape_i, multiples_i in zip(shape, multiples):
            if not (shape_i == 1 or multiples_i == 1):
                error_info = {}
                error_info['errCode'] = OP_ERROR_CODE_009
                error_info['op_name'] = 'tile_d'
                error_info[
                    'rule_desc'] = "Any axis of either shape or multiples have to be 1"
                error_info['param_name1'] = 'shape_i'
                error_info['param_name2'] = 'multiples_i'
                error_info['param1_value'] = str(shape_i)
                error_info['param2_value'] = str(multiples_i)
                raise RuntimeError(
                    error_info,
                    "Op[%s] has rule: %s, but [%s] is [%s], [%s] is [%s]." %
                    (error_info['op_name'], error_info['rule_desc'],
                     error_info['param_name1'], error_info['param1_value'],
                     error_info['param_name2'], error_info['param2_value']))

        axis_not_multiple = 0
        for multiples_i in multiples:
            if multiples_i == 1:
                axis_not_multiple += 1
        if axis_not_multiple == len(multiples):
            error_info = {}
            error_info['errCode'] = OP_ERROR_CODE_005
            error_info['op_name'] = 'tile_d'
            error_info['param_name'] = 'axis_not_multiple'
            error_info['min_len'] = '1'
            error_info['max_len'] = str(len(multiples) - 1)
            error_info['length'] = str(axis_not_multiple)
            raise RuntimeError(
                error_info,
                "In op[%s], the length of parameter[%s] be in the range of [%s, %s], but "
                "actually is [%s]." %
                (error_info['op_name'], error_info['param_name'],
                 error_info['min_len'], error_info['max_len'],
                 error_info['length']))

        data = tvm.placeholder(shape, name="data", dtype=dtype)

        res = tile_d_compute(data, output_x, multiples, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data, res]
    }

    te.lang.dynamic.build(sch, config)
    te.op.add_compile_info("_unknown_shape", unkown_shape)
    te.op.add_compile_info("_origin_multiples", origin_multiples)
    te.op.add_compile_info("_multiples_adapt", multiples_adapt)
Beispiel #29
0
def bn_training_update_v3(x, sum, square_sum, scale, offset,
                          y, batch_mean, batch_variance, reserve_1, reserve_2,
                          epsilon, kernel_name="bn_training_update_v3"):
    """
    algorithm: fused_batch_norm_v2
    Batch normalization.

    Parameters
    ----------
    x: dict
        dict of input, A 5HD Tensor for input data.
    sum: dict
        dict of sum, A 5HD Tensor for sum.
        The output of batch_normalization_forward_training_reduce.
    square_sum: dict
        dict of square_sum, A 5HD Tensor for square_sum.
        The output of batch_normalization_forward_training_reduce.
    scale: dict
        dict of scale, A 5HD Tensor for mean.
    offset: dict
        dict of offset, A 5HD Tensor for variance.
    y: dict
        dict of output, A `Tensor`. Has the same type as `x`.
    batch_mean: dict
        dict of batch_mean, A `Tensor`.
        One of the result which is called save mean.
    batch_variance: dict
        dict of batch_variance, A `Tensor`.
        Has the same type as `batch_mean`.
    reserve_1: dict
        dict of batch_mean, A `Tensor`.
        Has the same type as `batch_mean`.
    reserve_2: dict
        dict of batch_variance, A `Tensor`.
        Has the same type as `batch_variance`.
    epsilon: float
        A small float number added to the variance of x.
    kernel_name: str
        kernel name, default value is "bn_training_update_v3"

    Returns
    -------
    None
    """
    dtype_x = x.get("dtype").lower()
    dtype_sum = sum.get("dtype").lower()
    dtype_sqrsum = square_sum.get("dtype").lower()
    dtype_scale = scale.get("dtype").lower()
    dtype_offset = offset.get("dtype").lower()

    shape_x = x.get("shape")
    shape_sum = sum.get("shape")
    shape_sqrsum = square_sum.get("shape")
    shape_scale = scale.get("shape")
    shape_offset = offset.get("shape")

    data_format = x.get("format").upper()
    origin_format = x.get("ori_format").upper()

    # check dtype
    _check_dtype(dtype_x, dtype_sum, dtype_sqrsum,
                 dtype_scale, dtype_offset)

    # check format
    check_list = ("NC1HWC0", "NCHW")
    check_format(data_format, check_list, param_name="x")
    if data_format == "NCHW" and origin_format not in ("NCHW",):
        raise RuntimeError("The origin format only supports "
                           "NCHW when format is NCHW")

    # check shape
    if data_format == "NC1HWC0":
        _check_shape_5hd(shape_x, shape_sum, shape_sqrsum,
                         shape_scale, shape_offset)
        shape_list = [1, 1, 1, 1, 1]
        shape_list[1] = shape_x[1]
        shape_list[4] = shape_x[4]
        shape_sum = shape_list
    else:
        shape_list = [1, 1, 1, 1]
        shape_list[1] = shape_x[1]
        shape_sum = shape_list

    # get dynamic shape
    shape_x, shape_sum = variable_shape([x, sum])
    log.debug("input_x shape: " + str(shape_x))
    log.debug("input_sum shape: " + str(shape_sum))

    # compute
    with te.op.compute():
        in_x = tvm.placeholder(shape_x, name="x", dtype=dtype_x)
        in_sum = tvm.placeholder(shape_sum, name="sum", dtype=dtype_sum)
        in_sqrsum = tvm.placeholder(shape_sum, name="sqrsum", dtype=dtype_sum)
        in_scale = tvm.placeholder(shape_sum, name="scale", dtype=dtype_sum)
        in_offset = tvm.placeholder(shape_sum, name="offset", dtype=dtype_sum)
        res = bn_training_update_v3_compute(in_x, in_sum, in_sqrsum,
                                            in_scale, in_offset,
                                            y, batch_mean, batch_variance,
                                            reserve_1, reserve_2,
                                            epsilon, kernel_name=kernel_name)

    # schedule
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    # build
    tensor_list = [in_x, in_sum, in_sqrsum, in_scale, in_offset] + list(res)
    config = {"name": kernel_name,
              "tensor_list": tensor_list}
    te.lang.dynamic.build(sch, config)