Exemple #1
0
def inv_grad(input_y, input_dy, output_z, kernel_name="inv_grad"):
    """
    algorithm: inv_grad
    calculating data's reciprocal grad,dx = -1*dy*y*y, where `y = 1/x`, and `dy`
    is the corresponding input gradient.

    Parameters
    ----------
    input_y: dict
        shape and dtype of input_y, only support float16, float32, int32, int8
    input_dy: dict
        shape and dtype of input_dy, should be same shape and type as input_y
    output_z: dict
        shape and dtype of output, should be same shape and type as input_y
    kernel_name: str
        kernel name, default value is "inv_grad"

    Returns
    -------
    None
    """
    shape_input_y = input_y.get("shape")
    shape_input_dy = input_dy.get("shape")
    dtype_input_y = input_y.get("dtype")
    dtype_input_dy = input_dy.get("dtype")

    check_shape(shape_input_y, param_name="input_y")
    check_shape(shape_input_dy, param_name="input_dy")

    shape_input_y = util.shape_refine(shape_input_y)
    shape_input_dy = util.shape_refine(shape_input_dy)

    if list(shape_input_y) != list(shape_input_dy):
        raise RuntimeError("the shape of input must be equal!")

    dtype_input_y = dtype_input_y.lower()
    dtype_input_dy = dtype_input_dy.lower()

    if dtype_input_dy != dtype_input_y:
        raise RuntimeError("the dtype of input must be equal!")

    check_list = ("float16", "float32", "int32", "int8")
    check_dtype(dtype_input_y, check_list, param_name="input_y")

    shape_input_dy, shape_input_y = refine_shapes_for_broadcast(shape_input_dy,
                                                                shape_input_y)
    data_dy = tvm.placeholder(shape_input_dy, name="data_dy",
                              dtype=dtype_input_dy)
    data_y = tvm.placeholder(shape_input_y, name="data_y", dtype=dtype_input_y)

    res = inv_grad_compute(data_y, data_dy, output_z, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "tensor_list": [data_y, data_dy, res]}
    te.lang.cce.cce_build_code(sch, config)
Exemple #2
0
def CusSquare(input_x, output_y, kernel_name="square"):
    """
    algorithm: square
    calculating data's square,y= x*x

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "square"

    Returns
    -------
    None
    """
    shape = input_x.get("shape")
    dtype = input_x.get("dtype").lower()

    shape = util.shape_refine(shape)
    data = tvm.placeholder(shape, name="data", dtype=dtype.lower())

    with tvm.target.cce():
        res = square_compute(data, output_y, kernel_name)
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data, res]
    }

    te.lang.cce.cce_build_code(sch, config)
Exemple #3
0
def caffe_reduction_layer_compute(placeholders,
                                  shape,
                                  dtype,
                                  axis,
                                  op,
                                  coeff,
                                  kernel_name="cce_reductionLayer",
                                  need_build=False,
                                  need_print=False):
    """
        Since the shape of placeholder created by caffe_reduce is not same as
        input_shape, fusion_op could not process the fusion of two op
        which have different shape. So, caffe_reduce op could not be
        fused until tvm supports reshape in D.
    """
    data = placeholders[0]
    inp_dtype = dtype.lower()

    axis = util.axis_check(len(shape), axis)
    shape = list(shape)
    shape1 = shape[:axis] + [
        functools_reduce(lambda x, y: x * y, shape[axis:])
    ]
    shape1, axis = util.shape_refine(shape1, axis)
    if not axis:
        axis = [0]
        shape1 = [1] + shape1

    if op == "ASUM":
        data_tmp_input = te.lang.cce.vabs(data)
        cof = coeff
        tmp = te.lang.cce.vmuls(data_tmp_input, cof)
    elif op == "SUMSQ":
        data_tmp_input = te.lang.cce.vmul(data, data)
        cof = coeff
        tmp = te.lang.cce.vmuls(data_tmp_input, cof)
    elif op == "MEAN":
        size = shape1[-1]
        cof = float(coeff) * (size**(-1))
        if inp_dtype == "int8" \
                or inp_dtype == "uint8":
            data1 = te.lang.cce.vmuls(data, 1.0)
            data_cast = te.lang.cce.cast_to(data1, "float32")
            tmp = te.lang.cce.vmuls(data_cast, cof)
        else:
            tmp = te.lang.cce.vmuls(data, cof)
    elif op == "SUM":
        cof = coeff
        data_tmp_input = te.lang.cce.vmuls(data, cof)
        tmp = data_tmp_input

    res = te.lang.cce.sum(tmp, axis=axis)
    # Although the data type (int8/uint8) has changed,
    # the data values remain integer
    # during the calculation of other operators (SUM/ASUM/SUMSQ).
    if op != "MEAN":
        res = te.lang.cce.cast_to(res, inp_dtype, f1628IntegerFlag=True)
    return res
def custom_sign(shape,
                dtype,
                kernel_name="cce_custom_sign",
                need_build=False,
                need_print=False):
    """
                                  x*32768
    algrithm: sign = round(-------------------------)
                            2 ** (-15) + |x*32768|

    calculating data type is float16

    Parameters
    ----------
    shape : shape of data

    dtype : the data type, assume src_dtype equals dst_dtype,
            only support float16, float32, int32

    kernel_name : cce kernel name, default value is "cce_sign"

    need_buid : if need to build CCEC kernel, default value is False

    need_print : if need to print the ir, default value is False

    Returns
    -------
    None

    """
    util.check_kernel_name(kernel_name)
    util.check_shape_rule(shape)
    util.check_shape_size(shape, SHAPE_SIZE_LIMIT)

    check_list = ["float16", "float32", "int32"]
    if not dtype.lower() in check_list:
        raise RuntimeError(
            "custom_sign_cce only support %s while dtype is %s" %
            (",".join(check_list), dtype))

    shape = util.shape_refine(shape)
    inp_dtype = dtype.lower()
    data = tvm.placeholder(shape, name="data", dtype=inp_dtype)
    with tvm.target.cce():
        res = custom_sign_compute([data], shape, dtype, kernel_name,
                                  need_build, need_print)

        sch = generic.auto_schedule(res)

    config = {
        "print_ir": need_print,
        "need_build": need_build,
        "name": kernel_name,
        "tensor_list": [data, res]
    }
    te.lang.cce.cce_build_code(sch, config)
Exemple #5
0
def reduce_prod_d(x, y, axes, keep_dims=None, kernel_name="reduce_prod_d"):
    """
    Reduce a tensor on a certain axes based on product.

    Parameters:
    ----------
    x : dict
        shape and dtype of input
    y: dict
        shape and dtype of output
    axes : int, list, tuple, NoneType
        The dimensions to reduce. If None (the default), reduces all dimensions.
        Must be in the range [-rank(input_tensor), rank(input_tensor)).
    keep_dims : bool, NoneType
        if true, retains reduced dimensions with length 1,
        default value is None.
    kernel_name : str
        cce kernel name, default value is reduce_prod_d
    Returns
    -------
    None
    """
    shape = x.get("shape")
    check_shape(shape, param_name="x")

    inp_dtype = x.get("dtype").lower()
    check_list = ["float16", "float32", "int8", "uint8"]
    check_dtype(inp_dtype, check_list, param_name="x")

    shape_len = len(shape)

    if not axes:
        axes = range(shape_len)

    if hasattr(axes, 'index'):
        axes = list(axes)

    axes = util.axis_check(shape_len, axes)
    util.check_reduce_shape_rule(shape)

    shape, axes = util.shape_refine(list(shape), axes)
    shape, axes = util.simplify_axis_shape(shape, axes)

    data_input = tvm.placeholder(shape, name="data_input", dtype=inp_dtype)
    with tvm.target.cce():
        res = reduce_prod_d_compute(data_input, y, axes, keep_dims,
                                    kernel_name)
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data_input, res]
    }
    te.lang.cce.cce_build_code(sch, config)
Exemple #6
0
def abs(x, y, kernel_name="abs"):
    """
    algorithm: abs

    calculating data's abs,y= |x|

    Parameters
    ----------
    x : dict
        shape and dtype of input, only support float16, float32, int32
    y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is abs

    Returns
    -------
    None
    """
    shape = x.get("shape")
    check_shape(shape, param_name="x")

    check_list = ["float16", "float32", "int32"]
    inp_dtype = x.get("dtype").lower()
    check_dtype(inp_dtype, check_list, param_name="x")

    shape = util.shape_refine(shape)
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data = tvm.placeholder(fuseshape, name="data", dtype=inp_dtype)

    res = abs_compute(data, y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data, res]
    }

    te.lang.cce.cce_build_code(sch, config)
Exemple #7
0
def square(input_x, output_y, kernel_name="square"):
    """
    algorithm: square
    calculating data's square,y= x*x

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32, int32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "square"

    Returns
    -------
    None
    """
    shape = input_x.get("shape")
    dtype = input_x.get("dtype").lower()
    check_shape(shape, param_name="input_x")

    check_list = ["float16", "float32", "int32"]
    if not dtype in check_list:
        raise RuntimeError("square only support float16, float32, int32")

    shape = util.shape_refine(shape)
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data = tvm.placeholder(fuseshape, name="data", dtype=dtype.lower())

    with tvm.target.cce():
        res = square_compute(data, output_y, kernel_name)
        sch = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data, res]
    }

    te.lang.cce.cce_build_code(sch, config)
Exemple #8
0
def erf(input_x, output_y, kernel_name="erf"):
    """
    algorithm: erf
    Computes the Gauss error function of `x` element-wise

    Parameters
    ----------
    input_x: dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    kernel_name: str
        kernel name, default value is "erf"

    Returns
    -------
    None
    """
    shape_input = input_x.get("shape")
    dtype_input = input_x.get("dtype")

    check_shape(shape_input, param_name="input_x")

    dtype_input = dtype_input.lower()
    check_list = ("float16", "float32")
    check_dtype(dtype_input, check_list, param_name="input_x")

    shape_input = util.shape_refine(shape_input)
    reshape_input = (functools_reduce(lambda x, y: x * y, shape_input[:]),)
    data_input = tvm.placeholder(reshape_input, name="data_input",
                                 dtype=dtype_input)

    erf_result = erf_compute(data_input, output_y, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(erf_result)

    config = {"name": kernel_name,
              "tensor_list": [data_input, erf_result]}

    te.lang.cce.cce_build_code(sch, config)
Exemple #9
0
def fills(x, y, value, kernel_name="fills"):
    """
    do  fill operation

    Parameters:
    ----------
    x : the dict of output
    y :  the dict of output
    value:  scalar  value,
    kernel_name : cce kernel name, default value is "fill"

    Returns
    -------
    None
    """
    # get the shape and dtype
    shape = x.get("shape")
    dtype = x.get("dtype").lower()

    # check whether dtypes are right
    check_list = ("int32", "float16", "float32")
    check_dtype(dtype, check_list)

    # fuse shapes
    shape = util.shape_refine(shape)
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x * y, shape)
    data_x = tvm.placeholder(fuseshape, name="data_x", dtype=dtype)

    res = fills_compute(data_x, value, dtype)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": (data_x, res),
        "print_ir": False
    }
    te.lang.cce.cce_build_code(sch, config)
Exemple #10
0
def softsign(x, y, kernel_name="softsign"):
    """
    Computes for softsign.

    Parameters
    ----------
    x: dict
        data of input.
        source data type, support "float16", "float32".
    y: dict
        data of output.
    kernel_name : str
        kernel name, default value is "softsign".

    Returns
    -------
    None
    """
    shape_input = x.get("shape")
    dtype_input = x.get("dtype")

    check_shape(shape_input, param_name="x")

    check_list = ("float16", "float32")
    check_dtype(dtype_input.lower(), check_list, param_name="x")

    shape = util.shape_refine(shape_input)
    shape_x = (functools_reduce(lambda x, y: x*y, shape[:]),)
    input_dtype = dtype_input.lower()
    data = tvm.placeholder(shape_x, name="data", dtype=input_dtype)

    res = softsign_compute(data, y, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "tensor_list": [data, res]}

    te.lang.cce.cce_build_code(sch, config)
Exemple #11
0
def custom_Reduction(shape,
                     dtype,
                     axis,
                     op,
                     coeff,
                     kernel_name="cce_reductionLayer",
                     need_build=False,
                     need_print=False):
    """
    Reduce a tensor on a certain axis, and scale output with coeff

    Parameters
    ----------
    shape : shape of data

    dtype : source data type, only support float16, float32, int8, uint8

    axis : the first axis to reduce, may be negative to index from the end
           (e.g., -1 for the last axis).
           If axis == 0, the output Blob always has the empty shape (count 1),
           performing reduction across the entire input.

    op : can only be one of "SUM, ASUM (sum of abs), SUMSQ (sum of sqr), MEAN"

    coeff : scale for output

    kernel_name : cce kernel name, default value is "cce_reductionLayer"

    need_buid : if need to build CCEC kernel, default value is False

    need_print : if need to print the ir, default value is False

    Returns
    -------
    None

    """
    util.check_kernel_name(kernel_name)
    util.check_shape_rule(shape)

    check_list = ["float16", "float32", "int8", "uint8"]
    if not dtype.lower() in check_list:
        raise RuntimeError(
            "reductionLayer_cce only support %s while dtype is %s" %
            (",".join(check_list), dtype))

    reduction_op = ("SUM", "ASUM", "SUMSQ", "MEAN")

    if not isinstance(axis, int):
        raise RuntimeError("type of axis value should be int")
    if op not in reduction_op:
        raise RuntimeError("op can only be one of SUM, ASUM, SUMSQ , MEAN")
    if not isinstance(coeff, int) and not isinstance(coeff, float):
        raise RuntimeError("coeff must be a value")
    axis_origin = axis
    shape_origin = shape
    axis = util.axis_check(len(shape), axis)
    util.check_reduce_shape_rule(shape)
    shape = list(shape)
    shape1 = shape[:axis] + [
        functools_reduce(lambda x, y: x * y, shape[axis:])
    ]
    shape1, axis = util.shape_refine(shape1, axis)
    if not axis:
        axis = [0]
        shape1 = [1] + shape1
    inp_dtype = dtype.lower()
    data = tvm.placeholder(shape1, name="data_input", dtype=inp_dtype)
    with tvm.target.cce():
        res = caffe_reduction_layer_compute([data], shape_origin, dtype,
                                            axis_origin, op, coeff,
                                            kernel_name, need_build,
                                            need_print)

    if op == "MEAN" and (inp_dtype == "int8" or inp_dtype == "uint8"):
        util.check_shape_size(shape, SHAPE_SIZE_LIMIT)
        res = te.lang.cce.cast_to(res, inp_dtype)
        schedule = tvm.create_schedule(res.op)
        if need_print:
            with build_config:
                print(tvm.lower(schedule, [data, res], simple_mode=True))
        if need_build:
            with build_config:
                tvm.build(schedule, [data, res], "cce", name=kernel_name)
    else:
        with tvm.target.cce():
            sch = generic.auto_schedule(res)

        config = {
            "print_ir": need_print,
            "need_build": need_build,
            "name": kernel_name,
            "tensor_list": [data, res]
        }
        te.lang.cce.cce_build_code(sch, config)
Exemple #12
0
def clip_by_value(input_t,
                  clip_value_min,
                  clip_value_max,
                  output_t,
                  kernel_name="clip_by_value"):
    """
    algorithm: clip_by_value
    Clips tensor values to a specified min and max.
    Given a tensor t, this operation returns a tensor of
    the same type and shape as t
    with its values clipped to clip_value_min and clip_value_max.
    Any values less than clip_value_min are set to clip_value_min.
    Any values greater than clip_value_max are set to clip_value_max.

    Parameters
    ----------
    input_t: dict with keys(shape and dtype)
           input tensor
    clip_value_min: dict with keys(shape and dtype) or scaler
           The minimum value to clip by.
    clip_value_max: dict with keys(shape and dtype) or scaler
           The minimum value to clip by.
    output_t: dict
           info of output tensor with the same shape as input.
    kernel_name: str
           kernel name, default value is "clip_by_value"

    Returns
    -------
    None
    """
    shape_x = input_t.get("shape")
    dtype = input_t.get("dtype")
    shape_min = clip_value_min.get("shape")
    shape_max = clip_value_max.get("shape")
    input_dtype = dtype.lower()
    check_dtype(input_dtype, ("float16", "float32", "int32"),
                param_name="input_t")
    if (shape_min != 0 and shape_max != 0):
        if (len(shape_min) > 1 and list(shape_min) != list(shape_x)):
            for i in range(0, len(shape_x)):
                if shape_min[i] != shape_x[i] and shape_min[i] != 1:
                    raise RuntimeError("min/max: A 0-D (scalar) Tensor, "
                                       "or a Tensor with the same shape as t, "
                                       "or a Tensor broadcast to shape as t.")
        if (len(shape_max) > 1 and list(shape_max) != list(shape_x)):
            for i in range(0, len(shape_x)):
                if shape_max[i] != shape_x[i] and shape_max[i] != 1:
                    raise RuntimeError("min/max: A 0-D (scalar) Tensor, "
                                       "or a Tensor with the same shape as t, "
                                       "or a Tensor broadcast to shape as t.")
    check_shape(shape_x, param_name="input_t")
    shape_x = util.shape_refine(shape_x)
    data_x = tvm.placeholder(shape_x, name="data_x", dtype=input_dtype)

    data_value = {}
    check_shape(shape_min, param_name="clip_value_min")
    shape_min = util.shape_refine(shape_min)
    if len(shape_min) != len(shape_x) and len(shape_min) == 1:
        list_min = [1] * (len(shape_x) - 1)
        shape_min = shape_min + list_min
    data_value["min"] = tvm.placeholder(shape_min,
                                        name="data_min",
                                        dtype=input_dtype)

    check_shape(shape_max, param_name="clip_value_max")
    shape_max = util.shape_refine(shape_max)
    if len(shape_max) != len(shape_x) and len(shape_max) == 1:
        list_max = [1] * (len(shape_x) - 1)
        shape_max = shape_max + list_max
    data_value["max"] = tvm.placeholder(shape_max,
                                        name="data_max",
                                        dtype=input_dtype)

    res = clip_by_value_compute(data_x, data_value["min"], data_value["max"],
                                output_t, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)
    config = {
        "name": kernel_name,
        "tensor_list": [data_x, data_value["min"], data_value["max"], res]
    }
    te.lang.cce.cce_build_code(sch, config)
Exemple #13
0
def eltwise(x, y, mode=1, coeff=[], kernel_name="eltwise"):
    """
    Compute elementwise modes, such as 0:PRODUCT, 1:SUM and 2:MAX

    Parameters
    ----------
    x : the list of input data, it's element is dict:{"shape":[], "dtype":""}

    y : the dict of output

    mode : 0:product,1:sum,2:max;default is 1:sum.

    coeff : input_num should be equal with coeff size.

    kernel_name : cce kernel name, default value is "eltwise"

    Returns
    -------
    None

    """
    tensor_num = len(x)
    shapes = [item.get("shape") for item in x]
    shape0 = shapes[0]
    for i in range(1, tensor_num):
        if shapes[i] != shape0:
            errorInfo = {}
            errorInfo['errCode'] = "E81003"
            errorInfo['op_name'] = 'eltwise'
            errorInfo['shapes_list'] = str(shapes)
            raise RuntimeError(errorInfo, "In op[%s], the shapes[%s] of inputs should"
                                          " be the same." %
                               (errorInfo['op_name'], errorInfo['shapes_list']))
    _eltwise_check_para(x, y, mode=mode,
                        coeff=coeff, kernel_name=kernel_name)
    shape = x[0].get("shape")
    dtype = x[0].get("dtype").lower()

    shape = util.shape_refine(shape)
    fuseshape = [1]
    fuseshape[0] = reduceIns(lambda x, y: x*y, shape)

    tlist = []
    is_l1_depth_fusion = False
    with tvm.target.cce():
        for i in range(0, tensor_num):
            datan_name = 'data%d' % i
            l1_fusion_type = x[i].get("L1_fusion_type", -1)
            if l1_fusion_type == 1:
                raise RuntimeError("eltwise does not support l1 width fusion")
            is_l1_depth_fusion = (l1_fusion_type == 0) or is_l1_depth_fusion
            addr_type = x[i].get("addr_type", 0)
            valid_shape = x[i].get("valid_shape", [])
            slice_offset = x[i].get("slice_offset", [])
            attr_x = {"addr_type": addr_type,
                      "valid_shape": valid_shape,
                      "slice_offset": slice_offset,
                      "L1_fusion_type": l1_fusion_type}
            datan_tmp = tvm.placeholder(fuseshape, name=datan_name,
                                        dtype=dtype, attrs=attr_x)
            tlist.append(datan_tmp)

        res = eltwise_compute(tlist, y, mode, coeff, kernel_name)
        sch = generic.auto_schedule(res)
    tlist.append(res)

    config = {"print_ir": False,
              "need_build": False,
              "name": kernel_name,
              "tensor_list": tlist,
              "l1_fusion_option": is_l1_depth_fusion}
    te.lang.cce.cce_build_code(sch, config)
Exemple #14
0
def log_softmax_v2(input_x,
                   output_y,
                   axis=-1,
                   kernel_name="log_softmax_v2",
                   impl_mode="high_performance"):
    """
    algorithm: log_softmax
    calculating data's log_softmax, x - log(sum(exp(x)))

    Parameters
    ----------
    input_x : dict
        shape and dtype of input, only support float16, float32
    output_y: dict
        shape and dtype of output, should be same shape and type as input
    axis: int, list or tuple
        the data's axis, range is [-d, d-1]
    kernel_name : str
        cce kernel name, default value is log_softmax_v2

    Returns
    -------
    None
    """
    check_list = ("float16", "float32")
    shape = input_x.get("shape")
    input_dtype = input_x.get("dtype").lower()
    shape_len = len(shape)
    shape_list = list(shape)

    if not isinstance(axis, int):
        axis = list(axis)

    check_shape(shape, param_name="input_x")
    check_dtype(input_dtype, check_list, param_name="input_x")

    axis = util.axis_check(shape_len, axis)

    if not isinstance(axis, int):
        for i in axis:
            if shape_list[i] == 1:
                raise RuntimeError("Cannot reduce on an axis with dimension 1")
    else:
        if shape_list[axis] == 1:
            raise RuntimeError("Cannot reduce on an axis with dimension 1")

    shape, axis = util.shape_refine(list(shape), axis)
    shape, axis = util.simplify_axis_shape(shape, axis)

    data_input = tvm.placeholder(shape, name="data_input", dtype=input_dtype)
    result = log_softmax_v2_compute(data_input,
                                    output_y,
                                    axis=axis,
                                    kernel_name=kernel_name,
                                    impl_mode=impl_mode)

    with tvm.target.cce():
        sch = generic.auto_schedule(result)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data_input, result]
    }

    te.lang.cce.cce_build_code(sch, config)
Exemple #15
0
def reduce_min_d(input_min, output_min, axis,
                 keep_dims=None, kernel_name="reduce_min_d"):
    """
    Reduce a tensor on a certain axis based on min

    Parameters:
    ----------
    input_min: dict
        dict of input, which contains shape and dtype
    output_min: dict
        dict of output, which contains shape and dtype
    axis: int or None
        The dimensions to reduce. If None (the default), reduces all dimensions.
        Must be in the range (-rank(input_tensor), rank(input_tensor))
    keep_dims: True or False
        if true, retains reduced dimensions with length 1,
        default value is None
    kernel_name: str
        cce kernel name, default value is "reduce_min_d"

    Returns
    -------
    None
    """
    shape_input = input_min.get("shape")
    dtype_input = input_min.get("dtype")
    check_shape(shape_input, param_name="input_min")

    check_list = ("float16", "float32", "int8", "uint8")
    check_dtype(dtype_input.lower(), check_list, param_name="input_min")

    shape_len = len(shape_input)

    if not axis:
        axis = range(shape_len)

    if hasattr(axis, 'index'):
        axis = list(axis)

    axis = util.axis_check(shape_len, axis)

    is_5hdc = util.check_and_init_5hdc_reduce_support(input_min, axis)
    if not is_5hdc:
        shape_input, axis = util.shape_refine(list(shape_input), axis)
        shape_input, axis = util.simplify_axis_shape(shape_input, axis)

    data_input = tvm.placeholder(shape_input, name="data_input_" + kernel_name,
                                 dtype=dtype_input.lower())
    shape_len = len(shape_input)
    if dtype_input.lower() in ("float32", "int32") and len(axis) == 1 \
            and ((axis[0] == (shape_len - 1)) or (axis[0] == -1)):
        input_min["shape"] = tuple(shape_input)
        reduce_min_d_tik.reduce_min_d_tik(input_min, output_min, -1,
                                          kernel_name)
    else:
        res = reduce_min_d_compute(data_input, output_min, axis, keep_dims,
                                   kernel_name)
        if is_5hdc:
            res.ori_shape = input_min["ori_shape"]
            res.ori_format = input_min["ori_format"]
        with tvm.target.cce():
            sch = generic.auto_schedule(res)

        config = {"name": kernel_name,
                  "tensor_list": [data_input, res]}
        te.lang.cce.cce_build_code(sch, config)
Exemple #16
0
def reduce_any_d(x, y, axes, keepdims=None, kernel_name="reduce_any_d"):
    """
    Reduce a tensor on a certain axes based on max

    Parameters:
    ----------
    x : shape and dtype of input_data, only support int8

    y : shape and dtype of output_res, reserved parameter, not used now

    axes : the first axes to reduce, may be negative to index from the end
           (e.g., -1 for the last axes).
           aixs may be int or list(e.g. [1,2])

    keepdims : if true, retains reduced dimensions with length 1,
               default value is None

    kernel_name : cce kernel name, default value is "reduce_any_d"

    Returns
    -------
    None
    """
    shape = x.get("shape")
    dtype = x.get("dtype")

    check_shape(shape, param_name="x")

    if dtype == "bool":
        dtype = "int8"
    check_list = ("int8", )
    check_dtype(dtype, check_list, param_name="x")

    shape_len = len(shape)
    if not axes:
        axes = range(shape_len)

    if hasattr(axes, 'index'):
        axes = list(axes)

    axes = util.axis_check(shape_len, axes)

    is_5hdc = util.check_and_init_5hdc_reduce_support(x, axes)
    if not is_5hdc:
        shape, axes = util.shape_refine(list(shape), axes)
        shape, axes = util.simplify_axis_shape(shape, axes)

    inp_dtype = dtype.lower()
    data_input = tvm.placeholder(shape,
                                 name="data_input_" + kernel_name,
                                 dtype=inp_dtype)
    res = reduce_any_d_compute(data_input, y, axes, keepdims, kernel_name)

    if is_5hdc:
        res.ori_shape = x["ori_shape"]
        res.ori_format = x["ori_format"]

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_input, res]}
    te.lang.cce.cce_build_code(sch, config)
def custom_Power(shape,
                 dtype,
                 gamma,
                 alpha,
                 beta,
                 kernel_name="cce_caffe_power",
                 need_build=False,
                 need_print=False):
    """
    calculate (alpha * data + beta) ** gamma, calulation method exp(gamma * log(alpha * data + beta)).
    when alpha * data + beta < 0 , the output is a meaningless value.
    Parameters
    ----------
    shape : shape of data

    dtype : the data type, assume src_dtype equals dst_dtype, only support float16, float32

    gamma : the data type must be same with dtype parameter
        args in (alpha * data + beta) ** gamma

    alpha : the data type must be same with dtype parameter
        args in (alpha * data + beta) ** gamma

    beta : the data type must be same with dtype parameter
        args in (alpha * data + beta) ** gamma

    kernel_name : string
        kernel name in generated CCE kernal. default value is "cce_caffe_power"


    need_buid : bool
        if need to build CCEC kernel

    need_print : bool
        if need to print Halide IR

    Returns
    -------
    None
        
    """
    supported_dtypes = ["float16", "float32"]
    device_api = "cc_device_pow"

    util.check_kernel_name(kernel_name)
    util.check_shape_rule(shape)
    util.check_shape_size(shape, SHAPE_SIZE_LIMIT)

    if not (dtype.lower() in supported_dtypes):
        raise RuntimeError("power_cce only support %s while dtype is %s" %
                           (",".join(supported_dtypes), dtype))

    inp_dtype = dtype.lower()
    shape = util.shape_refine(shape)
    data_input = tvm.placeholder(shape, name="data_input", dtype=inp_dtype)

    v_datatype = util.get_device_api_dtype(inp_dtype)
    v_ndim_x = len(shape)
    v_ndim_y = 0
    p_shape_y = 0
    p_input_y = "nullptr"
    block_num = "block_num"
    block_idx = "block_idx"
    padC0 = 0

    p_scale = util.create_param_ptr([alpha], inp_dtype, "p_scale")
    p_shift = util.create_param_ptr([beta], inp_dtype, "p_shift")
    p_power = util.create_param_ptr([gamma], inp_dtype, "p_power")
    p_shape_x = util.create_param_ptr(shape, "int32", "p_shape_x")

    # scale --> alpha, shitf --> beta, power --> gamma
    output = tvm.extern(
        shape,
        [data_input, p_scale, p_shift, p_power, p_shape_x],
        lambda ins, outs: tvm.call_extern(
            "int32_t",
            device_api,
            block_num,
            block_idx,
            v_datatype,
            ins[1].access_ptr("r"),  # scale
            ins[2].access_ptr("r"),  # shift
            ins[3].access_ptr("r"),  # power
            v_ndim_x,
            ins[4].access_ptr("r"),  # shape
            padC0,
            ins[0].access_ptr("r"),  # input x
            v_ndim_y,
            v_ndim_y,
            p_shape_y,
            padC0,
            p_input_y,
            outs[0].access_ptr("w")),
        name="output",
        dtype=inp_dtype)

    s = tvm.create_schedule(output.op)

    if need_print:
        with build_config:
            print(tvm.lower(s, [data_input, output], simple_mode=True))
    if need_build:
        with build_config:
            tvm.build(s, [data_input, output], "cce", name=kernel_name)
Exemple #18
0
def reduce_all_d(input_data,
                 output_data,
                 axes,
                 keepdims=None,
                 kernel_name="reduce_all_d"):
    """
    Reduce a tensor on a certain axes based on min

    Parameters:
    ----------
    input_data: dict
        shape and dtype of input_data, only support int8
    output_data: dict
        source data type, only support int8
    axes : int, list ,tuple or None.
        the first axes to reduce, may be negative to index from the end
        (e.g., -1 for the last axes).
        axes may be int or list(e.g. [1,2])
    keepdims : bool or None .
        if true, retains reduced dimensions with length 1,
        default value is None
    kernel_name : str
        cce kernel name, default value is "cce_all"

    Returns
    -------
    None
    """
    input_shape = input_data.get("shape")
    input_dtype = input_data.get("dtype").lower()
    if input_dtype == "bool":
        input_dtype = "int8"
    check_shape(input_shape, param_name="input_data")
    check_dtype(input_dtype, ("int8"), param_name="input_data")

    shape_len = len(input_shape)
    if not axes:
        axes = range(shape_len)

    if hasattr(axes, 'index'):
        axes = list(axes)
    axes = util.axis_check(shape_len, axes)

    if not isinstance(axes, int):
        for i in axes:
            if i >= len(input_shape):
                raise RuntimeError("axes should be less than dimension")
    else:
        if axes >= len(input_shape):
            raise RuntimeError("axes should be less than dimension")

    # 5HD Special param for 5hd schedule
    is_5hdc = util.check_and_init_5hdc_reduce_support(input_data, axes)
    if not is_5hdc:
        input_shape, axes = util.shape_refine(list(input_shape), axes)
        input_shape, axes = util.simplify_axis_shape(input_shape, axes)

    data_input = tvm.placeholder(input_shape,
                                 name="data_input_" + kernel_name,
                                 dtype=input_dtype)
    result = reduce_all_d_compute(data_input, output_data, axes, keepdims,
                                  kernel_name)
    if is_5hdc:
        result.ori_shape = input_data["ori_shape"]
        result.ori_format = input_data["ori_format"]

    with tvm.target.cce():
        sch = generic.auto_schedule(result)

    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": [data_input, result]
    }
    te.lang.cce.cce_build_code(sch, config)
def custom_exp(shape,
               dtype,
               kernel_name="cce_tf_exp",
               need_build=False,
               need_print=False):
    """
    algorithm: exp  

    calculating data's exp,y= e ** x ,dtype is float16,
    
    Parameters
    ----------
    shape : shape of data

    dtype : the data type, assume src_dtype equals dst_dtype, only support float16, float32

    kernel_name : cce kernel name, default value is "cce_tf_exp"

    need_buid : if need to build CCEC kernel, default value is False

    need_print : if need to print the ir, default value is False

    Returns
    -------
    None

    """
    supported_dtypes = ["float16", "float32"]
    device_api = "DeviceExp"

    util.check_kernel_name(kernel_name)
    util.check_shape_rule(shape)
    util.check_shape_size(shape, SHAPE_SIZE_LIMIT)

    if not (dtype.lower() in supported_dtypes):
        raise RuntimeError("tf_exp_cce only support %s while dtype is %s" %
                           (",".join(supported_dtypes), dtype))

    inp_dtype = dtype.lower()
    shape = util.shape_refine(shape)
    data_input = tvm.placeholder(shape, name="data_input", dtype=inp_dtype)

    v_datatype = util.get_device_api_dtype(inp_dtype)
    v_ndim = len(shape)
    block_num = "block_num"
    block_idx = "block_idx"
    padC0 = 0
    p_scale = util.create_param_ptr([1], inp_dtype, "p_scale")
    p_shift = util.create_param_ptr([0], inp_dtype, "p_shift")
    p_base = util.create_param_ptr([-1], inp_dtype, "p_base")
    p_shape = util.create_param_ptr(shape, "int32", "p_shape")

    output = tvm.extern(
        shape,
        [data_input, p_scale, p_shift, p_base, p_shape],
        lambda ins, outs: tvm.call_extern(
            "int32_t",
            device_api,
            block_num,
            block_idx,
            v_datatype,
            ins[1].access_ptr("r"),  # scale
            ins[2].access_ptr("r"),  # shift
            ins[3].access_ptr("r"),  # base
            v_ndim,
            ins[4].access_ptr("r"),  # shape
            padC0,
            ins[0].access_ptr("r"),  # input x
            outs[0].access_ptr("w")),
        name="output",
        dtype=inp_dtype)

    s = tvm.create_schedule(output.op)

    if need_print:
        with build_config:
            print(tvm.lower(s, [data_input, output], simple_mode=True))
    if need_build:
        with build_config:
            tvm.build(s, [data_input, output], "cce", name=kernel_name)
Exemple #20
0
def reduce_max_d(x, y, axis, keepdims=False, kernel_name="reduce_max_d"):
    """
    calculating data

    Parameters
    ----------
    x : dict
        shape and dtype of input
    y : dict
        shape and dtype of output, should be same shape and type as input
    axis: list
        the first axis to reduce,may be negative to index from the end
        (e.g., -1 for the last axis).
        axis may be int or list(e.g. [1,2])
    keepdims: bool
        if true, retains reduced dimensions with length 1,
        default value is None
    kernel_name : str
        kernel name, default value is "reduce_max_d"

    Returns
    -------
    None
    """
    shape = x.get("shape")
    dtype = x.get("dtype")
    input_dtype = dtype.lower()

    check_shape(shape, param_name="x")

    check_list = ["float16", "float32", "int8", "uint8", "int32"]
    check_dtype(input_dtype, check_list, param_name="x")

    shape_len = len(shape)

    if not axis:
        axis = range(shape_len)

    if hasattr(axis, 'index'):
        axis = list(axis)

    axis = util.axis_check(shape_len, axis)

    # Shape should not be modified in 5HD mode
    # 5HD Special param for 5hd schedule
    is_5hdc = util.check_and_init_5hdc_reduce_support(x, axis)
    if not is_5hdc:
        shape, axis = util.shape_refine(list(shape), axis)
        shape, axis = util.simplify_axis_shape(shape, axis)
    shape_len = len(shape)
    x["shape"] = shape
    if input_dtype in ("float32", "int32") and len(axis) == 1 \
            and ((axis[0] == (shape_len - 1)) or (axis[0] == -1)):
        reduce_max_d_tik(x, y, axis[0], kernel_name)
    else:
        data_input = tvm.placeholder(shape,
                                     name="data_input_" + kernel_name,
                                     dtype=input_dtype)
        res = reduce_max_d_compute(data_input, y, axis, keepdims, kernel_name)

        if is_5hdc:
            res.ori_shape = x["ori_shape"]
            res.ori_format = x["ori_format"]
        with tvm.target.cce():
            sch = generic.auto_schedule(res)

        config = {"name": kernel_name, "tensor_list": [data_input, res]}

        te.lang.cce.cce_build_code(sch, config)