コード例 #1
0
ファイル: square_sum_all.py プロジェクト: gekowa/ascend-opp
    def check_param(self):
        """
        Check parameter

        Parameters
        ----------
        None

        Returns
        -------
        None
        """
        op_utils.check_shape(self.input_x_shape, param_name="input_x")
        op_utils.check_shape(self.input_y_shape, param_name="input_y")
        op_utils.check_dtype(self.input_x_dtype, ("float32", ),
                             param_name="input_x")
        op_utils.check_dtype(self.input_y_dtype, ("float32", ),
                             param_name="input_y")

        add_support = tbe_platform.cce_conf.api_check_support(
            "tik.vadd", "float32")

        if self.input_x_dtype != self.input_y_dtype:
            raise RuntimeError(
                "input_x and input_y do not have the same dtype")

        if self.input_x_dtype == "float32" and not add_support:
            raise RuntimeError(
                "Input dtype is float32, but do not support on the platform")
コード例 #2
0
def check_supported(x,
                    segment_ids,
                    y,
                    num_segments,
                    kernel_name="unsorted_segment_max_d"):
    """
    fusion pass test if num_segments is int32
    """
    shape = x.get("shape")
    dtype = x.get("dtype").lower()
    segment_ids_shape = segment_ids.get("shape")
    segment_ids_dtype = segment_ids.get("dtype").lower()
    check_list = ("float16", "float32", "int32", "int16")
    op_utils.check_dtype(dtype, check_list, param_name="x")
    op_utils.check_shape(shape, param_name="x")
    check_list_ids = ("int32")
    op_utils.check_dtype(segment_ids_dtype,
                         check_list_ids,
                         param_name="segment_ids")
    if num_segments <= 0:
        return False
    first_shape = int(shape[0])
    ids_length = int(segment_ids_shape[0])
    if first_shape != ids_length:
        return False
    total_ub_size = (num_segments + first_shape) * BLOCK_LENGTH + (
        (BLOCK_LENGTH // 2 - first_shape %
         (BLOCK_LENGTH // 4)) + first_shape) * (BLOCK_LENGTH // 8)
    if total_ub_size > UB_SIZE_MAX // 2:
        return False
    return True
コード例 #3
0
ファイル: kl_div.py プロジェクト: gekowa/ascend-opp
def _check_parameter(input_x, input_target):
    """
    Parameters
    ----------
    input_x : dict
        shape and dtype of input_x
    input_target : dict
        shape and dtype of input_target.Shape and dtype must be same as input_x
    Returns
    ------
    None
    """
    shape_x = input_x.get("shape")
    shape_target = input_target.get("shape")
    op_utils.check_shape(shape_x, param_name="input_x")
    if list(shape_x) != list(shape_target):
        raise RuntimeError("input_x and input_target must "
                           "have the same shape.")

    # check input tensor data_type
    dtype_x = input_x.get("dtype").lower()
    dtype_target = input_target.get("dtype").lower()
    check_list = ("float16", "float32")
    op_utils.check_dtype(dtype_x, check_list, param_name="input_x")
    if dtype_x != dtype_target:
        raise RuntimeError("input_x and input_target must "
                           "have the same dtype.")

    if dtype_x == "float32" and not tbe_platform.cce_conf.api_check_support(
            "te.lang.cce.vmul", "float32"):
        raise RuntimeError(
            "Instric only support float16 while input dtype is float32")
コード例 #4
0
ファイル: ascend_quant.py プロジェクト: gekowa/ascend-opp
def _check_params(x, y, scale, offset, sqrt_mode, round_mode, kernel_name):
    """
    check the parameters including shape, dtype, kernel_name, attr.
    """
    shape = x.get("shape")
    x_format = x.get("format")
    dtype = x.get("dtype").lower()
    format_list = ["NC1HWC0", "FRACTAL_NZ"]
    if x_format not in format_list:
        raise RuntimeError("ascend quant only support [NC1HWC0, FRACTAL_NZ]")
    if x_format == "NC1HWC0":
        if len(shape) != 5:
            raise RuntimeError(
                "ascend quant only support the length of shape is 4 or 5")
    if x_format == "FRACTAL_NZ":
        if len(shape) < 4:
            raise RuntimeError(
                "ascend quant only support the length of shape is >= 4")
    check_shape(shape, param_name="x")
    if is_lhisi_version():
        # es
        check_list = ["float16"]
    else:
        check_list = ["float16", "float32"]

    if dtype not in check_list:
        raise RuntimeError("ascend quant only support %s" %
                           (",".join(check_list)))
    round_mode_list = ["Round", "Ceil", "Floor", "Trunc"]
    if round_mode not in round_mode_list:
        raise RuntimeError("ascend quant only support %s while" %
                           (",".join(round_mode_list)))
コード例 #5
0
def _check_para_and_getplaceholder(scalar_input, tensor_input, input_dict):
    check_list = ("float32", )
    var_shape = input_dict["var"].get("shape")
    var_dtype = input_dict["var"].get("dtype")
    list_placeholder = []
    for key, value in input_dict.items():
        shape = util.scalar2tensor_one(value.get("shape"))
        op_utils.check_shape(shape)
        if value in scalar_input:
            if not util.is_scalar(shape):
                raise RuntimeError("The shape of ", key, " must be scalar")
        if value in tensor_input:
            if shape != var_shape:
                raise RuntimeError("The shape of", key,
                                   "must be the same as the var")

        dtype = value.get("dtype").lower()
        op_utils.check_dtype(dtype, check_list, param_name="var")
        if dtype != var_dtype:
            raise RuntimeError("The dtype of", key,
                               "must be the same as the var")

        shape_refine = (functools_reduce(operator.mul, shape), )
        list_placeholder.append(
            tvm.placeholder(shape=shape_refine, name=key, dtype=dtype))
    return list_placeholder
コード例 #6
0
def log(input_x, output_y, base=-1.0, scale=1.0, shift=0.0, kernel_name="log"):
    """
    calculating data

    Parameters
    ----------
    input_x : dict
        shape and dtype of input
    output_y : dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        kernel name, default value is "log"

    Returns
    -------
    None
    """

    shape = input_x.get("shape")
    dtype = input_x.get("dtype")
    input_dtype = dtype.lower()

    # input_x' shape check
    op_utils.check_shape(shape, param_name="input_x")

    # input_x' dtype check, only supports fp16 and fp32
    check_list = ("float16", "float32")
    op_utils.check_dtype(input_dtype, check_list, param_name="input_x")

    if base <= 0 and (not isclose(base, -1.0)):
        error_info = {}
        error_info['errCode'] = 'E80000'
        error_info['param_name'] = 'base'
        error_info['op_name'] = 'log'
        error_info['expect_value'] = "strictly positive or -1"
        error_info['real_value'] = base
        raise RuntimeError("In op[%s], the parameter[%s] should be [%s], but actually is [%s]."
                           % (error_info['op_name'], error_info['param_name'], \
                              error_info['expect_value'], error_info['real_value']))

    fused_shape = [reduceIns(lambda x, y: x * y, shape[:])]
    data_input = tvm.placeholder(fused_shape,
                                 name="data_input",
                                 dtype=input_dtype)

    res = log_compute(data_input, output_y, base, scale, shift, kernel_name)

    # auto schedule
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    # operator build
    config = {
        "name": kernel_name,
        "need_build": True,
        "tensor_list": (data_input, res)
    }

    te.lang.cce.cce_build_code(sch, config)
コード例 #7
0
ファイル: atan_grad.py プロジェクト: gekowa/ascend-opp
def atan_grad(y, dy, z, kernel_name="atan_grad"):
    """
    Gradient calculation for atan(x)

    Parameters:
    ----------
    y : dict of y, include shape and dtype, dtype support float16, float32
    dy : dict of dy, include shape and dtype, dtype support float16, float32
    z : dict of output, include shape and dtype
    kernel_name : cce kernel name, default value is atan_grad

    Algorithm :
    ----------
    forward :
        y = atan(x)
    backward gradient :
        de/dx = dy/dx*de/dy = 1/(1+x^2)*grad

    Returns
    ----------
    None
    """

    # get the shape and dtype
    shape = y.get("shape")
    shape_grad = dy.get("shape")
    dtype = y.get("dtype")
    dtype_grad = dy.get("dtype")

    # check whether kernel name is unique

    # check whether the shape is right
    check_shape(shape, param_name="y")
    check_shape(shape_grad, param_name="dy")
    if not operator.eq(shape, shape_grad):
        raise RuntimeError("all input shape must be the same")
    shape, _ = refine_shape_axes(shape, [])

    # check whether dtypes are fp16,fp32 and whether they are the same
    check_list = ("float16", "float32")
    check_dtype(dtype, check_list, param_name="y")
    check_dtype(dtype_grad, check_list, param_name="dy")
    dtype = dtype.lower()
    if dtype != dtype_grad.lower():
        raise RuntimeError("all input dtype must be same")

    # get 2 input placeholders: data_input, grad
    data_input = tvm.placeholder(shape, name="input_data", dtype=dtype)
    grad = tvm.placeholder(shape, name="input_grad", dtype=dtype)

    # compute the backward gradient
    res = atan_grad_compute(data_input, grad, z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "tensor_list": [data_input, grad, res]}
    te.lang.cce.cce_build_code(sch, config)
コード例 #8
0
ファイル: logical_or.py プロジェクト: gekowa/ascend-opp
def logical_or(x1, x2, y, kernel_name="logical_or"):
    """
    algorithm : logical_or
    calculating the value of x1 OR x2 element-wise

    Parameters
    ----------
    x1 : the dict of x1,
         include shape and dtype,
         dtype support int8, the value only support 0, 1

    x2 : the dict of x2,
         include shape and dtype,
         dtype support int8, the value only support 0, 1

    y : the dict of y, include shape and dtype

    kernel_name : string, cce kernel name, default value is "logical_or"

    Returns
    -------
    None
    """

    shape_x1 = x1.get("shape")
    shape_x2 = x2.get("shape")
    dtype_x1 = x1.get("dtype")
    dtype_x2 = x2.get("dtype")
    if dtype_x1 == "bool" or dtype_x2 == "bool":
        dtype_x1 = "int8"
        dtype_x2 = "int8"

    check_shape(shape_x1, param_name="x1")
    check_shape(shape_x2, param_name="x2")

    check_tuple = ("int8", )
    check_dtype(dtype_x1, check_tuple, param_name="x1")
    check_dtype(dtype_x2, check_tuple, param_name="x2")

    shape_x1, shape_x2, shape_max = broadcast_shapes(shape_x1,
                                                     shape_x2,
                                                     param_name_input1="x1",
                                                     param_name_input2="x2")
    dtype = dtype_x1.lower()
    data_x1 = tvm.placeholder(shape_x1, name="data_x1", dtype=dtype)
    data_x2 = tvm.placeholder(shape_x2, name="data_x2", dtype=dtype)

    res = logical_or_compute(data_x1, data_x2, y, kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {
        "print_ir": False,
        "need_build": False,
        "name": kernel_name,
        "tensor_list": (data_x1, data_x2, res)
    }
    te.lang.cce.cce_build_code(schedule, config)
コード例 #9
0
ファイル: mul.py プロジェクト: gekowa/ascend-opp
def mul(x, y, output, kernel_name="mul"):
    """
    do element-wise mul operation between two input tensors

    Parameters:
    ----------
    x : dict.
        shape, dtype of input x
    y : dict.
        shape, dtype of input y
    output : dict.
        shape, dtype of ouput
    kernel_name : str.
        cce kernel name, default value is "mul"

    Returns
    -------
    None
    """
    # format_pattern = 1  Nz and vector
    # format_pattern = 2  vector and Nz
    # format_pattern = 0  Nz scalar  Nz Nz  ND ND
    format_pattern = _mul_check_format(x, y)
    shape_x, shape_y = _infer_shape(format_pattern, x, y)

    shape_x = util.scalar2tensor_one(shape_x)
    dtype_x = x.get("dtype").lower()
    shape_y = util.scalar2tensor_one(shape_y)
    dtype_y = y.get("dtype").lower()

    op_utils.check_shape(shape_x, param_name="x")
    op_utils.check_shape(shape_y, param_name="y")

    if dtype_x != dtype_y:
        raise RuntimeError("dtype of inputs should be consistent")
    dtype = dtype_x
    check_list = ("int32", "float16", "float32", "int16")
    op_utils.check_dtype(dtype, check_list, param_name="x")

    vmul_support = tbe_platform.cce_conf.api_check_support(
        "te.lang.cce.vmul", "float32")
    if dtype_x == "float32" and not vmul_support:
        raise RuntimeError(
            "Input dtype is float32, but do not support on the platform")

    shape_x, shape_y, shape_max = op_utils.broadcast_shapes(
        shape_x, shape_y, param_name_input1="x", param_name_input2="y")

    shape_x, shape_y = op_utils.refine_shapes_for_broadcast(shape_x, shape_y)
    input_x = tvm.placeholder(shape_x, dtype=dtype, name="x")
    input_y = tvm.placeholder(shape_y, dtype=dtype, name="y")

    res = _mul_compute(input_x, input_y, output, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": (input_x, input_y, res)}
    te.lang.cce.cce_build_code(sch, config)
コード例 #10
0
def data_format_dim_map(x,
                        y,
                        src_format="NHWC",
                        dst_format="NCHW",
                        kernel_name="data_format_dim_map"):
    """
    Returns the dimension index in the destination data format given the one in.

    Parameters
    ----------
    x : A Tensor with each element as a dimension index in source data format.
        Must be the following types: `int32`. Must be in the range [-4, 4).
    y : Shape and dtype of y, reserved parameter, not used now.
    src_format : An optional `string`. Defaults to `"NHWC"`. source data format.
    dst_format : An optional `string`. Defaults to `"NCHW"`. destination data format.
    kernel_name : CCE kernel name, default value is "data_format_dim_map" (optional).

    Returns
    -------
    None
    """

    shape_input = x.get("shape")
    dtype_input = x.get("dtype")

    # check kernel name, shape, size, dtype
    check_shape(shape_input, param_name="x")
    shape_input, _ = refine_shape_axes(shape_input, [])
    check_list = ("int32", )
    dtype_input = dtype_input.lower()
    check_dtype(dtype_input, check_list, param_name="x")

    # check length of format
    if len(src_format) != 4:
        raise ValueError(
            "source format must of length 4, received src_format = %s" %
            src_format)

    if len(dst_format) != 4:
        raise ValueError(
            "destination format must of length 4, received dst_format = %s" %
            dst_format)
    # get data and compute
    data_input = tvm.placeholder(shape_input,
                                 dtype=dtype_input,
                                 name="data_input")
    res = _data_format_dim_map_compute(data_input, y, src_format, dst_format,
                                       kernel_name)

    with tvm.target.cce():
        sch = topi.generic.auto_schedule(res)
    config = {
        "name": kernel_name,
        "print_ir": False,
        "tensor_list": (data_input, res),
        "bool_storage_as_1bit": False
    }
    te.lang.cce.cce_build_code(sch, config)
コード例 #11
0
ファイル: assign_sub.py プロジェクト: gekowa/ascend-opp
def assign_sub(var, value, out, kernel_name='assign_sub'):
    """
    Update var by subtracting value from it.

    Parameters:
    ----------
    var : dict
        dict of input_var, include shape and dtype,
        dtype support int8, uint8, int32, float16, float32

    value : dict
        dict of input_value, include shape and dtype,
        dtype support int8, uint8, int32, float16, float32.
        Must have the same shape and dtype as input_var

    out : dict
        dict of out

    kernel_name : str
        cce kernel name, default value is "assign_sub"

    Returns
    -------
    None
    """

    # get the shape and dtype
    shape_var = var.get("shape")
    shape_value = value.get("shape")
    dtype_var = var.get("dtype")
    dtype_value = value.get("dtype")

    # kernel name check: should be unique

    # check whether the shape is right
    check_shape(shape_var, param_name="var")
    check_shape(shape_value, param_name="value")
    if not operator.eq(shape_var, shape_value):
        raise RuntimeError("all input shape must be the equal")

    # check whether dtypes are fp16, fp32, int8, uint8, int32
    # and whether they are the same
    check_list = ("float16", "float32", "int8", "uint8", "int32")
    check_dtype(dtype_var, check_list, param_name="var")
    check_dtype(dtype_value, check_list, param_name="value")
    dtype_var = dtype_var.lower()
    dtype_value = dtype_value.lower()
    if dtype_var != dtype_value:
        raise RuntimeError("all input dtype must be same")

    shape, _ = refine_shape_axes(shape_var, [])
    data_var = tvm.placeholder(shape, dtype=dtype_var, name='data_var')
    data_value = tvm.placeholder(shape, dtype=dtype_value, name='data_value')
    sch, res = _assign_sub_compute(data_var, data_value, out, kernel_name)

    with set_bool_storage_config():
        tvm.build(sch, [data_var, data_value, res], "cce", name=kernel_name)
コード例 #12
0
ファイル: upsample.py プロジェクト: gekowa/ascend-opp
def check_shape_dtype_format(input_shape, input_dtype, input_format, stride_h,
                             stride_w):
    """
    input_shape:input dic shape
    input_dtype: input dtype
    input_format: input format,NC1HWC0
    The common check rule for tensor shape, just for 5hd
    """
    op_utils.check_shape(input_shape)
    if len(input_shape) != DIM_5HD:
        error_info = {}
        error_info['errCode'] = 'E80012'
        error_info['opname'] = 'upsample'
        error_info['expect_value'] = '5'
        error_info['real_value'] = str(len(input_shape))
        raise RuntimeError(
            error_info,
            "In op[%s], the num of dimensions of input[%s] should be [%s], but actually is [%s]."
            % (error_info['opname'], 'x', error_info['expect_value'],
               error_info['real_value']))
    n, c1, h, w, c0 = input_shape

    op_utils.check_shape([n, c1, h * stride_h, w * stride_w, c0])
    product = tbe_platform.cce_conf.get_soc_spec("SOC_VERSION")
    product_list = ["Hi3796CV300ES", "Hi3796CV300CS"]
    if product in product_list:
        check_list = ["float16"]
    else:
        check_list = ["float16", "float32"]
    if input_dtype not in check_list:
        error_info = {}
        error_info['errCode'] = 'E80006'
        error_info['opname'] = 'upsample'
        error_info['tensor_name'] = 'x'
        error_info['excepted_dtype_list'] = str(check_list)
        error_info['dtype'] = str(input_dtype)
        raise RuntimeError(
            error_info,
            "In op[%s], the input[%s]'s dtype should be one of [%s], but actually is [%s]."
            % (error_info['opname'], 'x', str(check_list), str(input_dtype)))
    shape_c0 = C0
    if input_shape[DIM_5HD - 1] != shape_c0:
        raise RuntimeError("The value of C0 must be 16")

    if input_format != "NC1HWC0":
        error_info = {}
        error_info['errCode'] = 'E80015'
        error_info['opname'] = 'upsample'
        error_info['tensor_name'] = 'x'
        error_info['excepted_dtype_list'] = "NC1HWC0"
        error_info['format'] = str(input_format)
        raise RuntimeError(
            error_info,
            "In op[%s], the input[%s]'s dtype should be [%s], but actually is [%s]."
            % (error_info['opname'], 'x', "NC1HWC0", str(input_format)))
コード例 #13
0
def relu6_grad(input_grad, input_x, output_y, kernel_name="relu6_grad"):
    """
    Parameters
    ----------
    input_grad : dict
        shape and dtype of input_grad
    input_x : dict
        shape and dtype of input_x
    output_y : dict
        shape and dtype of output, should be same shape and type as input
    kernel_name : str
        cce kernel name, default value is "relu6_grad"

    Returns
    ------
    None
    """
    # check input shape
    shape_x = input_x.get("shape")
    shape_grad = input_grad.get("shape")
    op_utils.check_shape(shape_x, param_name="input_x")
    op_utils.check_shape(shape_grad, param_name="input_grad")
    if list(shape_x) != list(shape_grad):
        raise RuntimeError("input_grad and input_x must have the same shape.")

    # check input tensor data_type and kernel_name
    check_list = ("float16", "float32")
    input_dtype = input_x.get("dtype").lower()
    grad_dtype = input_grad.get("dtype").lower()
    op_utils.check_dtype(input_dtype, check_list, param_name="input_x")
    op_utils.check_dtype(grad_dtype, check_list, param_name="input_grad")
    if input_dtype == "float32" and not tbe_platform.cce_conf.api_check_support(
            "te.lang.cce.vmuls", "float32"):
        raise RuntimeError(
            "Input dtype only support float16 while input dtype is float32")

    shape_x = [reduce_ins(lambda x, y: x * y, shape_x[:])]
    input_data_orginal = tvm.placeholder(shape_x,
                                         name="input_data",
                                         dtype=input_dtype)
    input_grad = tvm.placeholder(shape_x, name="input_grad", dtype=grad_dtype)

    final_res = relu6_grad_compute(input_grad,
                                   input_data_orginal,
                                   output_y,
                                   kernel_name="relu6_grad")
    with tvm.target.cce():
        auto_sch = generic.auto_schedule(final_res)

    config = {
        "name": kernel_name,
        "tensor_list": (input_grad, input_data_orginal, final_res)
    }

    te.lang.cce.cce_build_code(auto_sch, config)
コード例 #14
0
def elu_grad(grads, activations, y, kernel_name="elu_grad"):
    """
    do element-wise elu_grad operation

    Parameters:
    ----------
    grads: the dict of gradient input, only support float16, float32

    activations: the dict of activation input, only support float16, float32

    y : the dict of output

    kernel_name : cce kernel name, default value is "cce_elu_grad"

    Returns
    -------
    None
    """

    shape_gradient = grads.get("shape")
    shape_activation = activations.get("shape")
    dtype_gradient = grads.get("dtype")
    dtype_activation = activations.get("dtype")

    check_shape(shape_gradient, param_name="grads")
    check_shape(shape_activation, param_name="activations")
    if not operator.eq(shape_gradient, shape_activation):
        raise RuntimeError("all input shape must be equal")
    shape_gradient, _ = refine_shape_axes(shape_gradient, [])
    shape_activation, _ = refine_shape_axes(shape_activation, [])

    check_list = ("float16", "float32")
    check_dtype(dtype_gradient, check_list, param_name="grads")
    check_dtype(dtype_activation, check_list, param_name="activations")
    if dtype_gradient.lower() != dtype_activation.lower():
        raise RuntimeError("all input dtype must be same")

    dtype = dtype_gradient.lower()
    data_gradient = tvm.placeholder(shape_gradient,
                                    dtype=dtype,
                                    name="data_gradient")
    data_activation = tvm.placeholder(shape_activation,
                                      dtype=dtype,
                                      name="data_activation")
    res = elu_grad_compute(data_gradient, data_activation, y, kernel_name)

    with tvm.target.cce():
        auto_sch = topi.generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "print_ir": False,
        "tensor_list": [data_gradient, data_activation, res]
    }
    te.lang.cce.cce_build_code(auto_sch, config)
コード例 #15
0
def lp_loss(predict, label, y, p, reduction="mean", kernel_name="lp_loss"):
    """
    :param predict: dict
        shape and dtype of input
    :param label: dict
        shape and dtype of label, should be same shape and type as predict
    :param y: dict
        shape and dtype of y, should be same shape and type as predict
    :param p: int
        decides which loss to compute, now the p only can be 1 to compute l1_loss
    :param reduction: str
        reduce mode,can be 'mean','sum' or 'none'
    :param kernel_name: kernel name, default value is "lp_loss"
    :return:
        None
    """
    predict_shape = predict.get("shape")
    predict_dtype = predict.get("dtype").lower()
    label_shape = label.get("shape")
    label_dtype = label.get("dtype").lower()

    dtype_list = ["float16", "float32"]
    reduction_list = ["none", "mean", "sum"]

    op_utils.check_dtype(predict_dtype, dtype_list)
    op_utils.check_dtype(label_dtype, dtype_list)
    op_utils.check_shape(predict_shape)
    op_utils.check_shape(label_shape)

    util.compare_tensor_dict_key(predict, label, "shape")
    util.compare_tensor_dict_key(predict, label, "dtype")

    if p != 1:
        raise RuntimeError("lp_loss only supports l1_loss")

    if reduction not in reduction_list:
        raise RuntimeError("reduction should be one of ['none','mean','sum']")

    predict_data = tvm.placeholder(predict_shape,
                                   dtype=predict_dtype,
                                   name="predict_data")
    label_data = tvm.placeholder(label_shape,
                                 dtype=label_dtype,
                                 name="label_data")

    res = lp_loss_compute(predict_data, label_data, p, reduction, kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [predict_data, label_data, res]
    }
    te.lang.cce.cce_build_code(schedule, config)
コード例 #16
0
def acos_grad(y, dy, z, kernel_name="acos_grad"):
    """
    do element-wise acos_grad operation between two input tensors

    Parameters:
    ----------
    y : dict of y, include shape and dtype, dtype support float16, float32

    dy : dict of dy, include shape and dtype, dtype support float16, float32

    z : dict of z, include shape and dtype, dtype support float16, float32

    kernel_name : cce kernel name, default value is "acos_grad"
    -------
    """

    # get the shape and dtype for input_1,input_2
    shape_y = y.get("shape")
    shape_dy = dy.get("shape")
    dtype = y.get("dtype")
    dtype1 = dy.get("dtype")

    check_shape(shape_y, param_name="y")
    check_shape(shape_dy, param_name="dy")
    shape_y, _ = refine_shape_axes(shape_y, [])
    shape_dy, _ = refine_shape_axes(shape_dy, [])

    # raise runtimeerror if the input paras are invalid
    check_list = ("float16", "float32")
    check_dtype(dtype, check_list, param_name="y")
    check_dtype(dtype1, check_list, param_name="dy")
    dtype = dtype.lower()
    dtype1 = dtype1.lower()
    if not operator.eq(shape_y, shape_dy):
        raise RuntimeError(
            "acos_grad only support input shape while input_shape1 equals"
            " to input_shape2")
    if dtype != dtype1:
        raise RuntimeError(
            "acos_grad only support dtype while input_dtype1 equals"
            " to input_dtype2")
    shape_y, _ = refine_shape_axes(shape_y, [])
    shape_dy, _ = refine_shape_axes(shape_dy, [])

    data_y = tvm.placeholder(shape_y, dtype=dtype, name="data1")
    data_dy = tvm.placeholder(shape_dy, dtype=dtype, name="data2")

    res = acos_grad_compute(data_y, data_dy, z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": (data_y, data_dy, res)}
    te.lang.cce.cce_build_code(sch, config)
コード例 #17
0
ファイル: atan2.py プロジェクト: gekowa/ascend-opp
def atan2(x1, x2, y, kernel_name="atan2"):
    """
    Algorithm: arctan2
        arctan2(y, x) = arctan(y/x)
    ----------------------------------
    Parameters:

        x1: the dict of input data x1, only support float16, float32.

        x2: the dict of input data x2, only support float16, float32.

        y: the dict of output

        kernel_name: default value is "atan2".
    ----------------------------------
    Returns:
        None
    """

    y_shape = x1.get("shape")
    x_shape = x2.get("shape")

    y_dtype = x1.get("dtype")
    x_dtype = x2.get("dtype")

    check_shape(y_shape, param_name="x1")
    check_shape(x_shape, param_name="x2")

    shape_y, shape_x, shape_max = broadcast_shapes(
        y_shape, x_shape, param_name_input1="x1", param_name_input2="x2")

    check_list = ("float16", "float32")
    check_dtype(y_dtype, check_list, param_name="x1")
    check_dtype(x_dtype, check_list, param_name="x2")
    if y_dtype.lower() != x_dtype.lower():
        raise RuntimeError("The input tensor must have identical dtype!")
    shape_y, shape_x = refine_shapes_for_broadcast(shape_y, shape_x)
    input_y = tvm.placeholder(shape_y, dtype=y_dtype.lower(), name="input_y")
    input_x = tvm.placeholder(shape_x, dtype=x_dtype.lower(), name="input_x")

    res = atan2_compute(input_y, input_x, y, kernel_name)
    res = te.lang.cce.cast_to(res, x_dtype.lower())
    with tvm.target.cce():
        auto_sch = topi.generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": (input_y, input_x, res),
        "print_ir": False,
        "bool_storage_as_1bit": False
    }

    te.lang.cce.cce_build_code(auto_sch, config)
コード例 #18
0
def asin_grad(y, dy, z, kernel_name="asin_grad"):
    """
    do element-wise asin_grad operation between two input tensors

    Parameters:
    ----------
    y : dict of y, include shape and dtype, dtype support float16, float32

    dy : dict of dy, include shape and dtype, dtype support float16, float32

    z : dict of output

    kernel_name : cce kernel name, default value is "asin_grad"

    Returns
    -------
    None
    """

    # get the shape and dtype
    shape_y = y.get("shape")
    shape_dy = dy.get("shape")
    dtype_y = y.get("dtype")
    dtype_dy = dy.get("dtype")

    # kernel name check: should be unique

    # check whether the shape is right
    check_shape(shape_y, param_name="y")
    check_shape(shape_dy, param_name="dy")
    if not operator.eq(shape_y, shape_dy):
        raise RuntimeError("all input shape must be the same")
    shape_y, _ = refine_shape_axes(shape_y, [])
    shape_dy, _ = refine_shape_axes(shape_dy, [])

    # check whether dtypes are fp16,fp32 and whether they are the same
    check_list = ("float16", "float32")
    check_dtype(dtype_y, check_list, param_name="y")
    check_dtype(dtype_dy, check_list, param_name="dy")
    dtype_y = dtype_y.lower()
    if dtype_y != dtype_dy.lower():
        raise RuntimeError("all input dtype must be same")

    # get 2 input tensors: data_y, data_dy
    data_y = tvm.placeholder(shape_y, name="data_y", dtype=dtype_y)
    data_dy = tvm.placeholder(shape_y, name="data_dy", dtype=dtype_y)
    res = asin_grad_compute(data_y, data_dy, z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_y, data_dy, res]}
    te.lang.cce.cce_build_code(sch, config)
コード例 #19
0
ファイル: swap_ci.py プロジェクト: gekowa/ascend-opp
    def input_param_check(self, profile):
        """
        check if the inputs are valid

        Parameters
        ----------
        profile: Dprofile, ai_core profile explanation

        Returns
        -------
        None
        """
        product_name = tbe_platform.cce_conf.get_soc_spec("SOC_VERSION")
        if product_name in ("Ascend310", "Ascend910", "Hi3796CV300ES",
                            "Hi3796CV300CS"):
            op_utils.check_dtype(self.dtype.lower(), ["float16"],
                                 param_name="input_x")
            op_utils.check_dtype(self.y_dtype.lower(), ["float16"],
                                 param_name="input_y")
        else:
            op_utils.check_dtype(self.dtype.lower(), ["float16", "float32"],
                                 param_name="input_x")
            op_utils.check_dtype(self.y_dtype.lower(), ["float16", "float32"],
                                 param_name="input_y")

        if self.dtype != self.y_dtype:
            raise RuntimeError("dtype in x and y must be equal")
        op_utils.check_shape(self.x_shape, param_name="input_x")
        op_utils.check_shape(self.y_shape, param_name="input_y")

        # x must be 4D, NCHW
        if len(self.x_shape) != DIGIT_4:
            raise RuntimeError("input params check error,"
                               " x shape must be 4D: NCHW")
        if len(self.y_shape) != DIGIT_5:
            raise RuntimeError("input params check error, y shape must be 5HD")

        if self.group_size >= DIGIT_128:
            raise RuntimeError("input params check error,"
                               " group_size must be less than 128")

        calc_c = self.output_dim * self.group_size * self.group_size
        if self.x_shape[1] != calc_c and \
                self.x_shape[1] != align_value(calc_c, C0):
            raise RuntimeError(
                "input_param_check, input fm channel number"
                " does not match layer parameters,", calc_c)
        if self.x_shape[0] != self.y_shape[0] or \
                self.x_shape[2] != self.y_shape[2] or \
                self.x_shape[3] != self.y_shape[3] or self.y_shape[1] != \
                ceil_value(self.output_dim, C0)*self.group_size*self.group_size:
            raise RuntimeError("input params check error,"
                               " x shape and y shape is not match")
コード例 #20
0
def depthwise_weight_4d_2_6d(x,
                             y,
                             src_format,
                             dst_format,
                             kernel_name="depthwise_weight_4d_2_6d"):
    """Operation and Schedule for depthwise_weight_4d_2_6d.

    Parameters
    ----------
    x: shape and dtype of input, the dtype support float16,
    float32, int32, uint16.

    y: the shape and dtype of outputs, the dtype same as input.

    src_format: the source data_format

    dst_format: the target data_format

    kernel_name : cce kernel name, default value is "depthwise_weight_4d_2_6d"

    Returns
    -------
        convert HWCN to C1HWNCoC0
    """
    if src_format.lower() != "hwcn":
        raise RuntimeError("dst_format must be HWCN!")

    if dst_format.lower() != "c1hwncoc0":
        raise RuntimeError("src_format must be C1HWNCoC0 !")

    input_shape = x.get("shape")
    dtype = x.get("dtype")
    op_utils.check_shape(input_shape, param_name="x")
    check_list = ("float16", "float32", "int32", "uint16")
    dtype = dtype.lower()
    op_utils.check_dtype(dtype, check_list, param_name="x")

    input_data = tvm.placeholder(input_shape, name="input_data", dtype=dtype)
    four2six = _Four2SixParam(input_shape)

    res = tvm.extern(
        [four2six.get_out_shape()], [input_data],
        lambda ins, outs: _intrin_factor(four2six, dtype, ins, outs),
        name="res",
        dtype=dtype)

    sch = tvm.create_schedule(res.op)
    build_list = [input_data, res]

    with build_config:
        tvm.build(sch, build_list, "cce", name=kernel_name)
コード例 #21
0
def depthwise_weight_6d_2_4d(x,
                             y,
                             src_format,
                             dst_format,
                             kernel_name="depthwise_weight_6d_2_4d"):
    """Operation and Schedule for depthwise_weight_6d_2_4d.

    Parameters
    ----------
    x: shape and dtype of input, the dtype support float16, float32,
    int32, uint16.

    y: the shape and dtype of outputs, the dtype same as input.

    src_format: the source data_format

    dst_format: the target data_format

    kernel_name : cce kernel name, default value is "depthwise_weight_6d_2_4d"

    Returns
    -------
        convert C1HWNCoC0 tp HWCN
    """
    _check_parameters(x, y, src_format, dst_format)
    output_shape = y.get("shape")
    channel_size = output_shape[2]
    input_shape = x.get("shape")
    dtype = x.get("dtype")
    channel_4d = channel_size
    op_utils.check_shape(input_shape, param_name="x")

    check_list = ("float16", "float32", "int32", "uint16")
    dtype = dtype.lower()
    op_utils.check_dtype(dtype, check_list, param_name="x")

    input_data = tvm.placeholder(input_shape, name="input_data", dtype=dtype)

    six2four = _Six2FourParam(input_shape, channel_4d)

    res = tvm.extern(
        [six2four.get_out_shape()], [input_data],
        lambda ins, outs: _intrin_factor(six2four, dtype, ins, outs),
        name="res",
        dtype=dtype)

    sch = tvm.create_schedule(res.op)
    build_list = [input_data, res]

    with build_config:
        tvm.build(sch, build_list, "cce", name=kernel_name)
コード例 #22
0
ファイル: sigmoid_grad.py プロジェクト: gekowa/ascend-opp
def sigmoid_grad(x, y, z, kernel_name="sigmoid_grad"):
    """
    do sigmoid grad

    sigmoid_grad = (sigmoid - sigmoid*sigmoid)*grad

    Parameters:
    ----------
    x : dictionary shape of sigmoid input

    y : dictionary shape of grad

    z: dictionary output

    kernel_name : cce kernel name, default value is "sigmoid_grad_cce"

    Returns
    -------
    None
    """
    shape_sig = x.get("shape")
    shape_d = y.get("shape")
    dtype = x.get("dtype")
    dtype_y = y.get("dtype")
    if dtype != dtype_y:
        raise RuntimeError("Input dtype must be equal")
    if not operator.eq(list(shape_sig), list(shape_d)):
        raise RuntimeError("Input shapes must be equal")
    op_utils.check_shape(shape_sig, param_name="x")
    input_dtype = dtype.lower()
    op_utils.check_dtype(input_dtype, ("float16", "float32"), param_name="x")

    shape_sig = [reduce_ins(lambda x, y: x * y, shape_sig[:])]
    input_sigmoid = tvm.placeholder(shape_sig,
                                    name="input_sigmoid",
                                    dtype=input_dtype)
    input_grad = tvm.placeholder(shape_sig,
                                 name="input_grad",
                                 dtype=input_dtype)

    with tvm.target.cce():
        res = sigmoid_grad_compute(input_sigmoid, input_grad, z, kernel_name)
        auto_sch = topi.generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [input_sigmoid, input_grad, res]
    }

    te.lang.cce.cce_build_code(auto_sch, config)
コード例 #23
0
ファイル: fill_d.py プロジェクト: gekowa/ascend-opp
def fill_d(value, y, dims, kernel_name="fill_d"):
    """
    do  fill operation

    Parameters:
    ----------
    value:   the dict of input value, include shape and dtype,
             dtype support int8, uint8, int32, float16, float32

    y :  the dict of output

    dims :  the output shape, type support int32

    kernel_name : cce kernel name, default value is "fill_d"

    Returns
    -------
    None
    """
    # get the shape and dtype
    shape_value = value.get("shape")
    dtype_value = value.get("dtype")

    # check whether the shape is right
    check_shape(dims, param_name="dims")
    check_shape(shape_value, param_name="value")

    # check whether dtypes are right
    check_list_value = ("int8", "uint8", "int32", "float16", "float32")
    check_dtype(dtype_value, check_list_value, param_name="value")

    # get 2 input tensors: data_dims, data_value
    compatible_shape_in = _check_shape_compatibility(shape_value, dims)

    dtype_value = dtype_value.lower()
    data_value = tvm.placeholder(compatible_shape_in,
                                 dtype=dtype_value,
                                 name="data_value")
    res = _fill_compute(data_value, y, dims, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": (data_value, res),
        "print_ir": False
    }
    te.lang.cce.cce_build_code(sch, config)
コード例 #24
0
def asinh(input_x, output_y, kernel_name="asinh"):
    """
    algrithm: asinh(x) = log(x + sqrt(x^2 + 1))

    Parameters
    ----------
    input_x: the dict of input_x, only support float16, float32

    output_y : the dict of output_y

    kernel_name : cce kernel name, default value is "asinh"

    Returns
    -------
    None

    """

    shape_input = input_x.get("shape")
    dtype_input = input_x.get("dtype")

    check_shape(shape_input, param_name="input_x")
    shape_input, _ = refine_shape_axes(shape_input, [])

    check_list = ("float16", "float32")
    check_dtype(dtype_input, check_list, param_name="input_x")

    inp_dtype = dtype_input.lower()
    shape_input = (functool_reduce(lambda x, y: x * y, shape_input), )
    data_input = tvm.placeholder(shape_input,
                                 dtype=inp_dtype,
                                 name="data_input")

    with tvm.target.cce():
        if tbe_platform.cce_conf.api_check_support("te.lang.cce.vlog",
                                                   "float32") or not \
                tbe_platform.cce_conf.api_check_support("te.lang.cce.vrec",
                                                        "float32"):
            res = asinh_compute_cloud(data_input, output_y, kernel_name)
        else:
            res = asinh_compute_mini(data_input, output_y, kernel_name)
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [data_input, res],
        "bool_storage_as_1bit": False
    }
    te.lang.cce.cce_build_code(sch, config)
コード例 #25
0
def check_grad_param(grad_dic):
    """
    check the parameters grad is valid

    Parameters
    ----------
    grad_dic: dict,shape and datatype,datatype supports float32
    Returns
    -------
    None
    """
    grad_dtype = grad_dic.get("dtype").lower()
    grad_shape = grad_dic.get("shape")
    op_utils.check_shape(grad_shape)
    op_utils.check_dtype(grad_dtype, ["float32"])
コード例 #26
0
def check_indices_param(indices_dic):
    """
    check the parameters indices is valid

    Parameters
    ----------
    indices_dic: dict,shape and datatype,datatype supports int32
    Returns
    -------
    None
    """
    indices_dtype = indices_dic.get("dtype").lower()
    indices_shape = indices_dic.get("shape")
    op_utils.check_shape(indices_shape)
    op_utils.check_dtype(indices_dtype, ["int32"])
コード例 #27
0
ファイル: relu6_d.py プロジェクト: gekowa/ascend-opp
def relu6_d(input_x, output_y, scale=1.0, kernel_name="relu6_d"):
    """
       f(x)= 6(x >= 6)
       f(x)= 0(x <= 0)
       f(x)= x(0<x<6)

    Parameters
    ----------
    input_x : dict
        shape and dtype of input_x
    output_y : dict
        shape and dtype of output_y, should be same shape and type as input

    kernel_name : str
        cce kernel name, default value is "relu6"

    Returns
    ------
    None
    """
    input_shape = util.scalar2tensor_one(input_x.get("shape"))
    input_dtype = input_x.get("dtype").lower()
    op_utils.check_shape(input_shape, param_name="input_x")

    vmaxs_support = tbe_platform.cce_conf.api_check_support(
        "te.lang.cce.vmaxs", "float32")
    if input_dtype == "float32" and not vmaxs_support:
        raise RuntimeError(
            "Input dtype is float32, but do not support on the platform")

    # check input tensor data_type
    check_list = ("int32", "float16", "float32")
    op_utils.check_dtype(input_dtype, check_list, param_name="input_x")

    input_shape = [reduce_ins(lambda x, y: x * y, input_shape[:])]
    input_data = tvm.placeholder(input_shape,
                                 name="input_data",
                                 dtype=input_dtype)
    final_res = relu6_d_compute(input_data,
                                output_y,
                                scale,
                                kernel_name=kernel_name)

    with tvm.target.cce():
        auto_sch = topi.generic.auto_schedule(final_res)

    config = {"name": kernel_name, "tensor_list": (input_data, final_res)}
    te.lang.cce.cce_build_code(auto_sch, config)
コード例 #28
0
ファイル: relu_v2.py プロジェクト: gekowa/ascend-opp
def relu_v2(x, y, mask, kernel_name="relu_v2"):
    """
    Algrithm: relu_v2(x) = x and 1 when x > 0 , else 0, 0

    Parameters
    ----------
    Algorithm: relu_v2

    Parameters:

    x: the dict of input data, support float16, float32, int8, int32, uint8

    y: the dict of output

    mask: the dict of mask_output

    kernel_name: cce kernel name, default value is "relu_v2".

    Returns
    -------
    None
    """

    shape = x.get("shape")
    dtype = x.get("dtype")

    check_shape(shape, param_name="x")

    if shape[-1] % 8 != 0:
        raise RuntimeError("the last axis if shape must be dive by 8")

    check_list = ("float16", "float32", "int8", "int32", "uint8")
    check_dtype(dtype, check_list, param_name="x")

    dtype = dtype.lower()
    input_data = tvm.placeholder(shape, dtype, "input_data")

    with tvm.target.cce():
        res, res_mask = relu_v2_compute(input_data, y, mask, kernel_name)
        sch = generic.auto_schedule([res, res_mask])

    config = {
        "name": kernel_name,
        "tensor_list": [input_data, res, res_mask],
        "print_ir": False
    }

    te.lang.cce.cce_build_code(sch, config)
コード例 #29
0
ファイル: normalize_scale.py プロジェクト: gekowa/ascend-opp
def check_shape_1(shape_1):
    """
    check the shape for x1

    Parameters
    ----------
    shape_1 : list or tuple
        shape for x1

    Returns
    -------
    None
    """

    op_utils.check_shape(shape_1, param_name="x1")
    op_utils.check_shape(shape_1, min_rank=4, max_rank=4, param_name="x1")
コード例 #30
0
def softplus_v2(x, y, beta=1.0, threshold=20.0, kernel_name="softplus_v2"):
    """
    Computes softplus operation with attribute beta and threshold.
    The output: log(1+exp(beta*x))/beta if x/beta <= threshold else x.

    Parameters
    ----------
    x: dict
        The input_features passed as input to the corresponding softplus operation.
        source data type support "float16", "float32".
    y: dict
        data of output.
    beta: float16/float32, option, default:1.0
    threshold: float16/float32, option, default:20.0

    kernel_name: str
        kernel name, default value is "softplus_v2".
    Returns
    -------
    None
    """
    shape_feature = x.get("shape")
    dtype_feature = x.get("dtype")
    dtype_output = y.get("dtype")
    # check dtype and shape
    check_list = ("float16", "float32")
    check_dtype(dtype_feature, check_list, param_name="x")
    check_dtype(dtype_output, check_list, param_name="y")
    check_shape(shape_feature, param_name="x")

    if beta == 0.0:
        raise ZeroDivisionError("the value of beta must be non-zero")

    data_features = tvm.placeholder(shape_feature,
                                    dtype=dtype_feature,
                                    name="data_features")

    res = softplus_v2_compute(data_features, beta, threshold, kernel_name)

    # TODO:auto schedule
    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    # TODO:operator build
    config = {"name": kernel_name, "tensor_list": [data_features, res]}
    te.lang.cce.cce_build_code(schedule, config)