Example #1
0
def mean_v2(data, axis=None, keepdims=False, target=utils.CCE):
    """
    Simple implementation of mean.

    Supported Platforms:
        'Ascend'
    """
    # Check types
    utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT)

    # Check shape
    shape = [x.value for x in data.shape]
    utils.reduce_axis_check(shape, axis)
    axis = ft_util.refine_reduce_axis(data, axis)

    dtype = data.dtype
    count = 1
    for i in axis:
        count *= shape[i]

    count_rec = 1 / count
    output = sum_v2(data, axis, keepdims, target=target)
    res = output * akg.tvm.const(count_rec, dtype)
    attrs = get_attrs(data)
    if shape_is_dynamic(data):
        attrs["custom_tiling"] = mean_dynamic_tiling_strategy(data, axis)
    return res, attrs
Example #2
0
def mean(data, axis=None, keepdims=False, target=utils.CCE):
    """
    Computes the mean of the values of a Tensor over the whole dataset.

    Note:
        If the tuple's elements are unsorted, this function will call preprocess_axis firstly to let these elements
        sorted. if tuple is empty, this function will compute all elements' sum.
        if the data type is folat 16 and the whole dim not less than 65536, this function will compute the mean by
        divide 65535 first to avoid whole dim too large.

    Args:
        data (tvm.tensor.Tensor): Tensor of type float16, float32.
        axis (Union[list, tuple, int, None]): If the tuple is empty, the axis equal to None.
        keepdims (bool): If keepdims equal to True, the result shape length is same to input shape length.

    Returns:
            tvm.tensor.Tensor, has the same type as data. If keepdims equal to True, all reduced dimensions are
            retained with length 1. else these reduced axis will be eliminate.

    Supported Platforms:
        'Ascend'
    """
    # Check types
    utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT)

    # Check shape
    shape = ft_util.get_shape(data)
    utils.reduce_axis_check(shape, axis)
    axis = ft_util.refine_reduce_axis(data, axis)

    count = 1
    for i in axis:
        count *= shape[i]
    output = sum(data, axis, keepdims, target=target)

    if shape_is_dynamic(data):
        res = akg.tvm.compute(
            output.shape,
            lambda *i: akg.lang.ascend.divide_var(output(*i), count),
            name="res")
    else:
        res = akg.topi.divide(output, count)

    attrs = get_attrs(data)
    if shape_is_dynamic(data):
        attrs["custom_tiling"] = mean_dynamic_tiling_strategy(data, axis)
    return res, attrs
Example #3
0
def Softmax(data, axis, target=utils.CCE):
    """
    Map all element of data to (0,1) and sum to 1.

    Args:
        data (tvm.tensor.Tensor): input.
        axis (int): along which normalization is applied.

    Return:
        tvm.tensor.Tensor, output.
    
    Supported Platforms:
        'Ascend'
    """
    utils.check_shape(data.shape)
    shape = data.shape

    utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    utils.reduce_axis_check(shape, axis)
    axis = ft_util.refine_reduce_axis(data, axis)

    if isinstance(axis, (list, tuple)):
        if len(axis) != 1:
            raise RuntimeError(
                "Reduce axis for softmax op must be 1-dimension, while current is %d-dimension"
                % (len(axis)))
        axis = axis[0]
    output = softmax_op(data, axis, shape)
    attr_map = {}
    if ds.shape_is_dynamic(data):
        # For shifted loops, should have:
        #     dynamic_shape_bound mod tile_size_prime == 2
        # This aims to ensure that the shift constant is a multiple of tile_size_prime.
        # So the generated IR will not have complicated head and tail for shifted blocks.
        attr_map = {
            "pragma_modshift":
            1,
            "pragma_outerband_need_split":
            1,
            "enable_post_poly_loop_partition":
            False,
            "pragma_disable_whole_component":
            False,
            "dynamic_shape":
            ds.set_dynamic_shape_limit_for_tensor(output, 2048, axis) +
            ds.set_poly_upper_bound_for_tensor(output, 2048, axis),
            "custom_tiling":
            ct.create_constraint_on_tensor(
                tensor=output,
                values=[1 for i, _ in enumerate(shape) if i != axis],
                constraints=ct.TileConstraint.FACTOR,
                tensor_pos=[i for i, _ in enumerate(shape) if i != axis])
        }
    return output, attr_map
Example #4
0
def get_attrs(tensor):
    """get attrs config"""
    attrs_map = {
        "pragma_checkcoincident": 0,
        "pragma_modshift": 1,
        "disable_cse": 1,
        "enable_bisect_optimize": 0,
        "enable_remove_broadcast_copy": True,
    }
    if shape_is_dynamic(tensor):
        attrs_map["pragma_analyze_reuse_buffer"] = True
    return attrs_map
Example #5
0
def five2four_tiling_strategy(tensor, c_value=None, expansion=None):
    """Custom tiling strategy for five2four op."""
    strategy = list()
    if c_value is None:
        strategy = ct_util.create_template(
            tensor=tensor, template=ct_util.TileTemplate.NC1HWC0)
    elif not shape_is_dynamic(tensor):
        c_value = 16 if c_value < 16 else c_value
        node_n = ct_util.create_constraint_on_tensor(
            tensor=tensor,
            values=1,
            constraints=ct_util.TileConstraint.FACTOR,
            tensor_pos=0)
        node_c1 = ct_util.create_constraint_on_tensor(
            tensor=tensor,
            values="FULL",
            constraints=ct_util.TileConstraint.MAX,
            tensor_pos=1)
        node_c0 = ct_util.create_constraint_on_tensor(
            tensor=tensor,
            values=c_value,
            constraints=ct_util.TileConstraint.FACTOR,
            tensor_pos=4)
        strategy = node_n + node_c1 + node_c0
    if expansion:
        strategy.append(
            ct_util.create_constraint_on_tensor(
                tensor=tensor,
                values=expansion,
                constraints=ct_util.TileConstraint.SET_EXPANSION)[0])
    if shape_is_dynamic(tensor):
        # axis should be full tiled due to cast operator
        strategy.append(
            ct_util.modify_common_constraints(
                value=0.85, constraint=ct_util.TileConstraint.SET_MEM_RATIO))
    return strategy
Example #6
0
def _reshape_ascend(data, out_shape):
    """
    Rearranges input tensor data to new shape out_shape.

    Args:
        data (tvm.tensor.Tensor): The tensor to be reshaped.
        out_shape (list, tuple): The new shape applied on the input tensor data,
                                should be compatible with the original shape of data.

    Returns:
        The reshaped akg.tvm.tensor of same type as input tensor data.

    Supported Platforms:
        'Ascend'
    """
    utils.ops_dtype_check(
        data.dtype, utils.DtypeForDavinci.INT32.value +
        utils.DtypeForDavinci.ALL_FLOAT.value)

    data_shape = data.shape
    utils.check_shape(data_shape)

    in_shape = get_shape(data)
    out_shape = list(out_shape)
    is_dynamic = ds.shape_is_dynamic(data)

    if -1 in out_shape:
        out_shape = get_out_shape(in_shape, out_shape)
    else:
        if not is_dynamic:
            if reduce(lambda x, y: x * y, in_shape) != reduce(
                    lambda x, y: x * y, out_shape):
                raise ValueError(
                    "the total length of out_shape is not equal to the in_shape"
                )

    inputs = akg.tvm.compute(in_shape,
                             lambda *indice: data(*indice),
                             name="inputs")
    res = akg.topi.reshape(inputs, out_shape)
    output = akg.tvm.compute(out_shape,
                             lambda *indice: res(*indice),
                             name="reshape")
    return output
Example #7
0
def reshape(data, out_shape):
    """
    Rearranges input tensor data to new shape out_shape.

    Args:
        data (tvm.tensor.Tensor): The tensor to be reshaped.
        out_shape (list, tuple): The new shape applied on the input tensor data,
                                should be compatible with the original shape of data.

    Returns:
        The reshaped akg.tvm.tensor of same type as input tensor data.
    """
    ops_dtype_check(data.dtype, DtypeForDavinci.INT32.value + DtypeForDavinci.ALL_FLOAT.value)

    data_shape = data.shape
    check_shape(data_shape)

    in_shape = get_shape(data)
    out_shape = list(out_shape)
    is_dynamic = ds.shape_is_dynamic(data)

    if -1 in out_shape:
        access_size = 1
        for i, o_shape in enumerate(out_shape):
            if -1 != o_shape:
                access_size *= o_shape
            else:
                hit_idx = i
        ori_size = reduce(lambda x, y: x * y, in_shape)
        if ori_size % access_size != 0:
            raise ValueError(("Invalid out_shape ({})".format(out_shape)))

        out_shape[hit_idx] = int(ori_size / access_size)
    else:
        if not is_dynamic:
            if reduce(lambda x, y: x * y, in_shape) != reduce(lambda x, y: x * y, out_shape):
                raise ValueError("the total length of out_shape is not equal to the in_shape")

    inputs = akg.tvm.compute(in_shape, lambda *indice: data(*indice), name="inputs")
    res = akg.topi.reshape(inputs, out_shape)
    output = akg.tvm.compute(out_shape, lambda *indice: res(*indice), name="reshape")
    attr_map = {}
    return output, attr_map
Example #8
0
File: mean.py Project: zhuyawen/akg
def mean_v2(data, axis=None, keepdims=False):
    """Simple implementation of mean."""
    # Check types
    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)

    # Check shape
    shape = [x.value for x in data.shape]
    vc_util.reduce_axis_check(shape, axis)
    axis = ft_util.refine_reduce_axis(data, axis)

    dtype = data.dtype
    count = 1
    for i in axis:
        count *= shape[i]

    count_rec = 1 / count
    output, _ = sum.sum_v2(data, axis, keepdims)
    res = output * akg.tvm.const(count_rec, dtype)
    attrs = get_attrs(data)
    if shape_is_dynamic(data):
        attrs["custom_tiling"] = mean_dynamic_tiling_strategy(data, axis)
    return res, attrs
Example #9
0
def four2five(data, format_, dst_dtype='float16', need_custom_tiling=True):
    """
    Convert 4-dims "data" to 5-dims,the format of "data" is defined in "format_"

    Args:
        data (tvm.tensor.Tensor): 4-dims tensor of type float16, float32
        format_ (str): a str defined the format of "data"
        dst_dtype (str): a str defined the type of output, could be float16 or float32

    Returns:
        5-dims tvm.tensor.Tensor,type is defined by dst_dtype,
        which shape is [N, ceil(C / 16), H, W, 16] and attr about tiling args

    Raises:
        ValueError: If the type of format_ is invalid.

    """
    # Check dtype
    vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    # Check shape
    shape = get_shape(data)
    vc_util.davinci_format_check(shape, format_, dim=4)

    # Check format
    if format_ not in ['NCHW', 'NHWC']:
        raise ValueError(
            "{} format is not support, four2five only support NCHW and NHWC format input"
            .format(format_))
    last_channel = 16
    if format_ == "NCHW":
        bs, c, h, w = get_shape(data)
    else:
        bs, h, w, c = get_shape(data)
    pad_c = c
    if c % last_channel != 0:
        pad_c = (c + 15) // last_channel * last_channel
    c1 = pad_c // last_channel
    c0 = last_channel
    is_dynamic = ds.shape_is_dynamic(data)
    if not is_dynamic:
        attrs = get_attrs()
    else:
        attrs = get_dynamic_attrs()
    # Check size c when casting happens
    if data.dtype != dst_dtype and c0 * c1 >= C_LIMIT_FOR_CAST:
        raise ValueError(
            "When input and output data type is not matched, shape of 'c' axis should not exceed {}, "
            "while currently set is {}".format(C_LIMIT_FOR_CAST, c0 * c1))

    @script(capture=locals())
    def nchw_to_nc1hwc0_step(inputs, bs, c1, h, w, c0):
        output = allocate((bs, c1, h, c0, w), inputs.dtype, "local")
        for n_i in range(bs):
            for c_i in range(c1):
                for h_i in range(h):
                    for w_i in range(w):
                        for c_i0 in range(c0):
                            output[n_i, c_i, h_i, c_i0,
                                   w_i] = inputs[n_i,
                                                 c_i * last_channel + c_i0,
                                                 h_i, w_i]
        output1 = allocate((bs, c1, h, w, c0), inputs.dtype, "local")
        for n_i in range(bs):
            for c_i in range(c1):
                for h_i in range(h):
                    for w_i in range(w):
                        for c_i0 in range(c0):
                            output1[n_i, c_i, h_i, w_i,
                                    c_i0] = output[n_i, c_i, h_i, c_i0, w_i]
        return output1

    @script(capture=locals())
    def nchw_to_nc1hwc0(inputs, bs, c1, h, w, c0):
        output = allocate((bs, c1, h, w, c0), inputs.dtype, "local")
        for n_i in range(bs):
            for c_i in range(c1):
                for h_i in range(h):
                    for w_i in range(w):
                        for c_i0 in range(c0):
                            output[n_i, c_i, h_i, w_i,
                                   c_i0] = inputs[n_i,
                                                  c_i * last_channel + c_i0,
                                                  h_i, w_i]
        return output

    @script(capture=locals())
    def nhwc_to_nc1hwc0(inputs, zero, bs, c1, h, w, c0):
        output = allocate((bs, c1, h, w, c0), inputs.dtype, "local")
        for n_i in range(bs):
            for c_i in range(c1):
                for h_i in range(h):
                    for w_i in range(w):
                        for c_i0 in range(c0):
                            if c_i * last_channel + c_i0 < c:
                                output[n_i, c_i, h_i, w_i,
                                       c_i0] = inputs[n_i, h_i, w_i,
                                                      c_i * last_channel +
                                                      c_i0]
                            else:
                                output[n_i, c_i, h_i, w_i, c_i0] = zero

        return output

    cast_data = data
    need_cast = data.dtype == 'float32' and dst_dtype == 'float16'
    if c % last_channel != 0 or need_cast:
        expansion = int(ct_util.BLOCK_SIZE / get_bytes(data.dtype))
    else:
        expansion = None
    # float32 -> float16, need to cast before transform
    if need_cast:
        cast_data = akg.lang.cce.cast_to(data, dst_dtype)

    zero_ = akg.tvm.const(0.0, cast_data.dtype)
    if format_ == "NCHW":
        if c % last_channel != 0:
            pad_shape = [bs, pad_c, h, w]
            if h == 1 and w == 1:
                # if h and w both are 1, it is pad last dim case
                output_shape = [bs, pad_c // last_channel, h, w, last_channel]

                output = akg.tvm.compute(
                    output_shape,
                    lambda i, c1, k, l, c0: akg.tvm.expr.Select(
                        c0 < c - c1 * last_channel, cast_data[
                            i, c1 * last_channel + c0, k, l],
                        akg.tvm.const(0, cast_data.dtype)),
                    name="output")
            else:
                # if need to pad c dim, separate transpose to two steps
                # first is nchw -> nc1hc0w, second is nc1hc0w -> nc1hwc0
                pad_data = akg.tvm.compute(
                    pad_shape,
                    lambda i, j, k, l: akg.tvm.expr.Select(
                        j < c, cast_data[i, j, k, l], zero_),
                    name="pad_data")
                output = nchw_to_nc1hwc0_step(pad_data, to_tvm_const(bs),
                                              to_tvm_const(c1),
                                              to_tvm_const(h), to_tvm_const(w),
                                              to_tvm_const(c0))

        else:
            if not is_dynamic and data.dtype == "float16" and h * w % last_channel == 0 and h * w < 3600:
                output_shape = [bs, c1, h, w, c0]
                output = akg.tvm.compute(
                    output_shape,
                    lambda n, c1, h, w, c0: akg.lang.cce.four2five_nchw(
                        cast_data[n, c1 * last_channel + c0, h, w]),
                    name="output")

            else:
                output = nchw_to_nc1hwc0(cast_data, to_tvm_const(bs),
                                         to_tvm_const(c1), to_tvm_const(h),
                                         to_tvm_const(w), to_tvm_const(c0))

    else:
        if not is_dynamic and c < last_channel:
            rank = 5  # (n, c1, h, w, c0)
            pad_before = []
            pad_after = []
            for _ in range(rank):
                pad_before.append(0)
                pad_after.append(0)
            pad_after[-1] = last_channel - c
            # As c < last_channel, c1 is 1
            output = akg.tvm.compute(
                (bs, c1, h, w, c),
                lambda bs_i, _, h_i, w_i, c_i: cast_data[bs_i, h_i, w_i, c_i],
                name="output")
            output = tvm_pad(output,
                             pad_before,
                             pad_after=pad_after,
                             name='pad_output')
        else:
            output = nhwc_to_nc1hwc0(cast_data, zero_, to_tvm_const(bs),
                                     to_tvm_const(c1), to_tvm_const(h),
                                     to_tvm_const(w), to_tvm_const(c0))

    # float16 -> float32, need to cast after transform
    if data.dtype == 'float16' and dst_dtype == 'float32':
        output = akg.lang.cce.cast_to(output, dst_dtype)

    vc_util.davinci_format_check(output.shape, "NC1HWC0", dim=5)

    if not is_dynamic:
        dim_info, _ = four2five_set_dim_func(data, format_, dst_dtype)
        if dim_info != "":
            attrs["dim"] = dim_info
        if need_custom_tiling:
            attrs["custom_tiling"] = four2five_tiling_strategy(
                output, format_, expansion)
    elif need_custom_tiling:
        attrs["custom_tiling"] = four2five_tiling_strategy_dynamic(
            output, format_)

    if is_dynamic:
        attrs["enable_feature_library_pre_poly"] = True
    return output, attrs
Example #10
0
File: add.py Project: zhuyawen/akg
def add(first_input, second_input, scale=1.0, polyhedral=True, attrs=None):
    """
    Computes first_input + second_input * scale elementwise.

    Args:
        first_input (tvm.tensor.Tensor): Tensor of type float16, float32, int32.
        second_input (tvm.tensor.Tensor): Tensor with same type as first_input.
                                      Broadcast will happen if shapes of input tensors are different.
        scale (float): scale factor applied on second_input, default value is 1.0.
        polyhedral (bool): If True, use auto-schedule, else use manual-schedule, default value is True.
        attrs (dict): Specifies parameters used in manual-schedule.

    Returns:
        tvm.tensor.Tensor of same type as input tensor with shape the broadcast shape of input tensors.
    """
    vc_util.check_shape(first_input.shape)
    vc_util.check_shape(second_input.shape)
    attr_map = {}

    first_input_shape = get_shape(first_input)
    second_input_shape = get_shape(second_input)

    if shape_is_dynamic([first_input, second_input]):
        if first_input_shape != second_input_shape:
            raise RuntimeError(
                "Input tensors have different shapes, broadcast is not supported for dynamic."
            )
        first_broadcast = first_input
        second_broadcast = second_input
    else:
        if first_input_shape != second_input_shape:
            _, _, out_shape = produce_shapes(first_input_shape,
                                             second_input_shape)
        else:
            out_shape = first_input_shape
        first_broadcast = akg.topi.broadcast_to(first_input, out_shape)
        second_broadcast = akg.topi.broadcast_to(second_input, out_shape)

    first_input_type = first_input.dtype
    second_input_type = second_input.dtype
    if first_input_type != second_input_type:
        raise TypeError("Input tensors have different data types.")
    vc_util.ops_dtype_check(first_input_type,
                            vc_util.DtypeForDavinci.ALL_TYPES)

    temp = vmuls(second_broadcast, scale)
    res = vadd(first_broadcast, temp)
    res_cast = res.astype(first_input_type)
    if polyhedral:
        return res_cast, attr_map

    def comp_func(s):
        first_ub = s.cache_read(first_input, "local.UB", [first_broadcast])
        second_ub = s.cache_read(second_input, "local.UB", [second_broadcast])
        res_cast_ub = s.cache_write(res_cast, "local.UB")

        s[first_broadcast].set_scope("local.UB")
        s[second_broadcast].set_scope("local.UB")
        s[temp].set_scope("local.UB")
        s[res].set_scope("local.UB")

        split_axis = []
        for i in range(len(attrs["tile"])):
            outer, inner = s[res_cast].split(res_cast.op.axis[i],
                                             attrs["tile"][i])
            axis_dict = {"outer": outer, "inner": inner}
            split_axis.append(axis_dict)

        s[first_ub].compute_at(s[res], res.op.axis[0])
        s[second_ub].compute_at(s[res], res.op.axis[0])

        s[first_broadcast].compute_at(s[res], res.op.axis[0])
        s[second_broadcast].compute_at(s[res], res.op.axis[0])

        s[temp].compute_at(s[res], res.op.axis[0])
        s[res].compute_at(s[res_cast_ub], res_cast_ub.op.axis[0])

        s[res_cast_ub].compute_at(s[res_cast], split_axis[-1]['outer'])

        # no scaling nedeed
        if scale == 1:
            s[temp].compute_inline()

        # no broadcast needed
        if first_input_shape == second_input_shape:
            s[first_broadcast].compute_inline()
            s[second_broadcast].compute_inline()

    return res_cast, comp_func, attr_map
Example #11
0
File: mean.py Project: zhuyawen/akg
def get_attrs(tensor):
    """generate default attrs."""
    if shape_is_dynamic(tensor):
        return {"enable_double_buffer": 0, "enable_divide_var": 1}

    return {}
Example #12
0
def fused_batch_norm(inputs, attrs):
    r"""
    Batch normalization.

    See Source:
    <a href="https://arxiv.org/abs/1502.03167">
        Batch Normalization: Accelerating Deep Network Training by Reducing
        Internal Covariate Shift; S. Ioffe, C. Szegedy.
    </a>

    .. math::
        \begin{array}{ll} \\
            \mu = \frac{1}{m} \sum^m_{i=1}{x_i} \\
            \sigma^2 = \frac{1}{m} \sum^m_{i=1}{(x_i-\mu)^2} \\
            \hat{x_i} = \frac{x_i - \mu}{ \sqrt{\sigma^2 + \epsilon} } \\
            y_i = \gamma \hat{x_i} + \beta \equiv BN_{\gamma, \beta}(x_i)
        \end{array}

    This momentum argument is different from one used in optimizer classes and
    the conventional notion of momentum. Mathematically, the update rule for
    running statistics here is

    .. math::
        \hat{z_{new}} = momentum \cdot \hat{z} + (1-momentum) \cdot z_t

    where :math:`\hat{z}` is the estimated statistic and :math:`z_t` is the
    new observed value.

    Note:
        When data_format is \"NC1HWC0\", the `gamma`, `beta`, `moving_mean`
        and `moving_variance` should be 5D tensors of shape
        `(1, C1, 1, 1, C0)`, otherwise, they should be 1D tensors
        of shape `(C,)`.

    Args:
        inputs:
            data (tvm.tensor.Tensor): Tensor of type float16, float32. (:math:`x_i`)
            gamma (tvm.tensor.Tensor): Tensor for scaling (:math:`\gamma`).
            beta (tvm.tensor.Tensor): Tensor for bias (:math:`\beta`).
            moving_mean (tvm.tensor.Tensor): Tensor for population mean used for
                                            inference.
            moving_variance (tvm.tensor.Tensor): Tensor for population variance used
                                             for inference.
        attrs:
            momentum (float): A float number used for the moving_mean and
                            moving_variance computation.
            eps (float): A small float added to variance to avoid dividing by zero.
            is_training (bool): A bool value to specify if the operation is used for
                                training or inference.
            data_format (str): Support format, \"DefaultFormat\", \"NCHW\", \"NHWC\"
                            or \"NC1HWC0\".
            axis (Union[int, list, tuple]): Integer to specify the channel axis when
                                            data_format is \"DefaultFormat\". List
                                            or tuple for \"NC1HWC0\". When format is
                                            \"NCHW\" or \"NHWC\", it's not work.
                                            Must be in the range
                                            [-rank(data), rank(data)).
            single_sum (bool): whether use "mul_axis_sum".

    Returns:
        outs (tvm.tensor.Tensor): Tensor for normalized, scaled, shifted data.
        new_moving_mean (tvm.tensor.Tensor): Tensor of same type and shape as
                                             `moving_mean`. The `moving_mean`
                                             updated by data. Only returns when
                                             `is_training` is True.
        new_moving_variance (tvm.tensor.Tensor): Tensor of same type and shape as
                                                 `moving_variance`. The
                                                 `moving_variance` updated by
                                                 data. Only returns when
                                                 `is_training` is True.
        sample_mean (tvm.tensor.Tensor): Tensor of same type and shape as
                                         `moving_mean`. The mean of `data`. Only
                                         returns when `is_training` is True.
        sample_var (tvm.tensor.Tensor): Tensor of same type and shape as
                                        `moving_variance`. The variance of `data`.
                                        Only returns when `is_training` is True.
    """
    if len(inputs) != 5:
        raise ValueError(
            "Input tensors number should be 5, but get %s." % len(inputs))
    data_format = attrs.get("data_format", "DefaultFormat")
    params = check_inputs(inputs, data_format, attrs.get("axis", 1))

    data = inputs[0]
    gamma = inputs[1]
    beta = inputs[2]
    moving_mean = inputs[3]
    moving_variance = inputs[4]
    ori_dtype = data.dtype
    shape = get_shape(data)
    axes = params.get("axes", (0,))
    keepdims = params.get("is_special5d", False)
    mid_shape = params.get("mid_shape", [1, ])
    data = akg.tvm.compute(data.shape, lambda *i: data(*i),
                           "batchnorm_" + data_format)
    ori_moving_mean = moving_mean
    ori_moving_variance = moving_variance
    if ori_dtype != DTYPE_FLOAT32:
        data = akg.topi.cast(data, DTYPE_FLOAT32)
        gamma = akg.topi.cast(gamma, DTYPE_FLOAT32)
        beta = akg.topi.cast(beta, DTYPE_FLOAT32)
        moving_mean = akg.topi.cast(moving_mean, DTYPE_FLOAT32)
        moving_variance = akg.topi.cast(moving_variance, DTYPE_FLOAT32)

    ######## following is dsl ########
    is_training = attrs.get("is_training", True)
    if is_training:
        value_num = 1
        for index in axes:
            value_num *= shape[index]

        avg_num = round(float(1) / float(value_num), 12)

        data_square = akg.tvm.compute(data.shape,
                                      lambda *i: data(*i) * data(*i),
                                      name="data_square")
        # cal mean
        data_mean = akg.lang.ascend.vmuls(
            sum_data(data, axes, keepdims, attrs.get("single_sum", False)), avg_num)
        data_square_mean = akg.lang.ascend.vmuls(sum_data(data_square, axes, keepdims, attrs.get("single_sum", False)),
                                                 avg_num)
        data_mean_square = akg.tvm.compute(data_mean.shape,
                                           lambda *i: data_mean(*i) *
                                           data_mean(*i),
                                           name="data_mean_square")

        data_variance = akg.tvm.compute(data_mean.shape,
                                        lambda *i:
                                        data_square_mean(
                                            *i) - data_mean_square(*i),
                                        name="data_variance")

        mean_new = update_by_moving_average(
            moving_mean, data_mean, attrs.get("momentum", 0.99))
        variance_new = update_by_moving_average(moving_variance,
                                                data_variance, attrs.get("momentum", 0.99))
    else:
        # no_bc version
        data_variance = moving_variance
        data_mean = moving_mean

    rsveps = akg.lang.ascend.vadds(data_variance, akg.tvm.const(
        attrs.get("eps", 1e-3), dtype=DTYPE_FLOAT32))
    rsveps = rsqrt(rsveps, utils.CCE)
    rsveps = akg.lang.ascend.broadcast(rsveps, shape)

    mean2 = akg.lang.ascend.vmuls(data_mean, akg.tvm.const(-1, data.dtype))
    mean2 = akg.lang.ascend.broadcast(mean2, shape)

    dmean = akg.tvm.compute(
        shape, lambda *i: data(*i) + mean2(*i), name="dmean")
    dmsve = akg.tvm.compute(shape, lambda *i: dmean(*i)
                            * rsveps(*i), name="dmsve")

    if not keepdims:
        gamma = akg.topi.reshape(gamma, mid_shape)
        beta = akg.topi.reshape(beta, mid_shape)
    gamma_bc = akg.lang.ascend.broadcast(gamma, shape)
    beta_bc = akg.lang.ascend.broadcast(beta, shape)
    dmsveg = akg.tvm.compute(shape, lambda *i: dmsve(*i) * gamma_bc(*i),
                             name="dmsveg")
    outs = akg.tvm.compute(shape, lambda *i: dmsveg(*i) + beta_bc(*i),
                           name="output")
    out_attrs = get_attrs(outs)

    if is_training:
        if ori_dtype != DTYPE_FLOAT32:
            outs = akg.topi.cast(outs, ori_dtype)
            mean_new = akg.topi.cast(mean_new, ori_dtype)
            variance_new = akg.topi.cast(variance_new, ori_dtype)
            data_mean = akg.topi.cast(data_mean, ori_dtype)
            data_variance = akg.topi.cast(data_variance, ori_dtype)

        mean_new, binds_info_mean = TensorUtils.inplace_set(
            ori_moving_mean, mean_new, buffer_name="mean_buf")
        variance_new, binds_info_var = TensorUtils.inplace_set(
            ori_moving_variance, variance_new, buffer_name="var_buf")
        binds_info_all = binds_info_mean
        binds_info_all.update(binds_info_var)
        out_attrs[BINDS] = binds_info_all

        # the new moving_mean and moving_var are updated inplace in
        # inputs(moving_mean and moving_var). But Mindspore needs
        # These two fake outputs though it never uses them
        fake_moving_mean = akg.tvm.compute(mean_new.shape,
                                           lambda *indices: mean_new(*indices),
                                           "fake_moving_mean")
        fake_moving_var = akg.tvm.compute(mean_new.shape,
                                          lambda *indices: variance_new(
                                              *indices),
                                          "fake_moving_var")
        out_tensors = (outs, fake_moving_mean, fake_moving_var, data_mean,
                       data_variance, mean_new, variance_new,)
    else:
        if ori_dtype != DTYPE_FLOAT32:
            outs = akg.topi.cast(outs, ori_dtype)
        out_tensors = (outs,)
    out_tensors = list(out_tensors) if isinstance(
        out_tensors, tuple) else out_tensors
    if shape_is_dynamic(out_tensors):
        out_attrs["custom_tiling"] = batch_norm_tiling_strategy_dynamic(outs)
    else:
        out_attrs["custom_tiling"] = batch_norm_tiling_strategy(
            outs, data_format)
    out_tensors.append(out_attrs)

    return out_tensors
Example #13
0
def five2four(data, shape4d, dst_type, format_):
    """
    Convert 5-dims "data" to 4-dims,the format of "data" is defined in "format_"

    Args:
        data (tvm.tensor.Tensor): 5-dims tensor of type float16, float32
        shape4d (Union[list, tuple]): a list has 4 nums, shape of output Tensor
        dst_type (str): data type of output Tensor
        format_ (str): a str defined the format of returns, support NCHW and NHWC

    Returns:
        4-dims tvm.tensor.Tensor.

    """
    vc_util.ops_dtype_check([data.dtype, dst_type],
                            vc_util.DtypeForDavinci.ALL_FLOAT)
    shape5d = get_shape(data)
    if not shape_is_dynamic(data):
        if len(shape5d) != 5 or shape5d[-1] != 16:
            raise ValueError(
                "five2four_cce only support 5-dim data and last dim should be 16"
            )

    bs, c1, h, w, c0 = shape5d
    if not shape_is_dynamic(data):
        vc_util.davinci_format_check(shape5d, "NC1HWC0", dim=5)
    # Check format
    if format_ not in ['NCHW', 'NHWC']:
        raise ValueError(
            "{} format is not support, five2four only support NCHW and NHWC format input"
            .format(format_))
    if format_ == "NCHW":
        if shape_is_dynamic(data):
            shape4d = [bs, c1 * c0, h, w]
        _, c, h_4d, w_4d = shape4d
    else:
        if shape_is_dynamic(data):
            shape4d = [bs, h, w, c1 * c0]
        _, h_4d, w_4d, c = shape4d
    vc_util.davinci_format_check(shape4d, format_, dim=4)

    # Check is shape4d and shape5d match
    if False not in [
            isinstance(s, (int, akg.tvm.expr.IntImm)) for s in shape5d
    ]:
        if h_4d != h or w_4d != w:
            raise ValueError(
                "five2four_cce's shape4d h and w should equal to data shape's h and w"
            )
        if c > c1 * c0 or c <= (c1 - 1) * c0:
            raise ValueError(
                "five2four_cce's shape4d c should in set ((c1 - 1) * c0, c1 * c0]"
            )

    # Check size c when casting happens
    if not shape_is_dynamic(data):
        if data.dtype != dst_type and c >= C_LIMIT_FOR_CAST:
            raise ValueError(
                "When input and output data type is not matched, shape of 'c' axis should not exceed {}, "
                "while currently set is {}".format(C_LIMIT_FOR_CAST, c))

    @script(capture=locals())
    def nc1hwc0_to_nhwc(inputs, bs, h, w, c, c1, c0):
        output = allocate((bs, h, w, c), inputs.dtype, "local")
        for n_i in range(bs):
            for h_i in range(h):
                for w_i in range(w):
                    for c_i in range(c1):
                        for c_i0 in range(c0):
                            output[n_i, h_i, w_i,
                                   c_i * c0 + c_i0] = inputs[n_i, c_i, h_i,
                                                             w_i, c_i0]
        return output

    @script(capture=locals())
    def nc1hwc0_to_nchw(inputs, bs, h, w, c, c1, c0):
        output = allocate((bs, c, h, w), inputs.dtype, "local")
        for n_i in range(bs):
            for c_i in range(c1):
                for h_i in range(h):
                    for w_i in range(w):
                        for c_i0 in range(c0):
                            output[n_i, c_i * c0 + c_i0, h_i,
                                   w_i] = inputs[n_i, c_i, h_i, w_i, c_i0]
        return output

    # if c % 16 == 0, h and w == 1, five2four is a reshape operation
    if shape_is_dynamic(data):
        call_reshape = isinstance(h, int) and isinstance(
            w, int) and h == 1 and w == 1
    else:
        call_reshape = h == 1 and w == 1 and c % 16 == 0
    c_value = None
    expansion = None
    if format_ == "NHWC":
        if call_reshape:
            output = akg.topi.reshape(data, (bs, h, w, c))
            if shape_is_dynamic(data):
                output = akg.tvm.compute((bs, h, w, c),
                                         lambda *indice: output(*indice),
                                         name="reshape")
        elif c < c0:
            reshape_output = akg.topi.reshape(data, (bs, h, w, c0))
            output = akg.tvm.compute((bs, h, w, c),
                                     lambda *i: reshape_output(*i),
                                     name='slice_output')
        else:
            output = nc1hwc0_to_nhwc(data, to_tvm_const(bs), to_tvm_const(h),
                                     to_tvm_const(w), to_tvm_const(c),
                                     to_tvm_const(c1), to_tvm_const(c0))

    else:
        if call_reshape:
            output = akg.topi.reshape(data, (bs, c, h, w))
            if shape_is_dynamic(data):
                output = akg.tvm.compute((bs, c, h, w),
                                         lambda *indice: output(*indice),
                                         name="reshape")
        else:
            output = nc1hwc0_to_nchw(data, to_tvm_const(bs), to_tvm_const(h),
                                     to_tvm_const(w), to_tvm_const(c),
                                     to_tvm_const(c1), to_tvm_const(c0))

    # two special cases for tiling strategy
    if not shape_is_dynamic(data):
        if c < c0 or output.dtype != dst_type:
            c_value = c
        if c % c0 != 0 and output.dtype != dst_type:
            expansion = int(ct_util.BLOCK_SIZE / get_bytes(data.dtype))
    attrs = get_attrs()
    if not call_reshape:
        attrs["custom_tiling"] = five2four_tiling_strategy(
            data, c_value, expansion)

    if output.dtype != dst_type:
        output = akg.topi.cast(output, dst_type)
    return output, attrs
Example #14
0
def common(data, axis, method="min"):
    """
    Returns the index with the max or min value across axes of a tensor.

    Note:
        method can be "max" or "min" to get argmax or argmin.

    Args:
        data (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32.
        axis (int): Describe the axis of input tensor.
        method (str): Can be "max" or "min".

    Returns:
        tvm.tensor.Tensor, has type of int32.
    """
    shape = get_shape(data)
    dtype = data.dtype

    utils.ops_dtype_check(
        data.dtype,
        [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.ALL_INT])
    utils.reduce_axis_check(shape, axis)
    real_axis = refine_reduce_axis(shape, axis)[0]
    out_shape = get_reduce_out_shape(shape, axis=axis)
    attr_map = {}
    if shape_is_dynamic(data):
        attr_map["dynamic_shape"] = set_dynamic_shape_limit_for_tensor(
            data, 4096, real_axis)
    if dtype != "float16":
        data = akg.topi.cast(data, "float16")
    k = akg.tvm.reduce_axis((0, data.shape[real_axis]), "k")
    if axis in (len(shape) - 1, -1):
        if method == "min":
            reducer = akg.tvm.comm_reducer(lambda x, y: dav.fargmin(x, y),
                                           lambda t: akg.tvm.max_value(t))
        elif method == "max":
            reducer = akg.tvm.comm_reducer(lambda x, y: dav.fargmax(x, y),
                                           lambda t: akg.tvm.min_value(t))
        else:
            raise ValueError("not support {}".format(method))

        if len(data.shape) == 1:
            res = akg.tvm.compute((1, ), lambda i: reducer(data[k], axis=k))
        else:
            res = akg.tvm.compute(
                out_shape, lambda *indice: reducer(data(*indice, k), axis=k))

        res = akg.tvm.compute(out_shape,
                              lambda *indice: res(*indice).astype("int32"),
                              "argred_output")
    elif axis in (0, -len(shape)):
        tmp_idx = akg.tvm.compute(
            shape[1:],
            lambda *indice: akg.tvm.const(0.0, "float16"),
            name='tmp_index')
        local_data = akg.tvm.compute(shape[1:],
                                     lambda *indice: data(0, *indice),
                                     name="tmp_data")
        for idx in range(shape[axis] - 1):
            if method == 'min':
                tmp_idx = akg.tvm.compute(
                    shape[1:],
                    lambda *indice, ite_idx=idx: akg.tvm.expr.Select(
                        local_data(*indice) > data(ite_idx + 1, *indice),
                        akg.tvm.const(ite_idx + 1, "float16"), tmp_idx(*indice)
                    ))
                local_data = akg.tvm.compute(
                    shape[1:],
                    lambda *indice, ite_idx=idx: akg.tvm.expr.Select(
                        local_data(*indice) > data(ite_idx + 1, *indice),
                        data(ite_idx + 1, *indice), local_data(*indice)))
            elif method == "max":
                tmp_idx = akg.tvm.compute(
                    shape[1:],
                    lambda *indice, ite_idx=idx: akg.tvm.expr.Select(
                        local_data(*indice) < data(ite_idx + 1, *indice),
                        akg.tvm.const(ite_idx + 1, "float16"), tmp_idx(*indice)
                    ))
                local_data = akg.tvm.compute(
                    shape[1:],
                    lambda *indice, ite_idx=idx: akg.tvm.expr.Select(
                        local_data(*indice) < data(ite_idx + 1, *indice),
                        data(ite_idx + 1, *indice), local_data(*indice)))
            else:
                raise ValueError("not support " + method)

        res = akg.tvm.compute(out_shape,
                              lambda *indice: tmp_idx(*indice).astype("int32"),
                              "cast1")
    else:
        raise ValueError(
            "Argmax only support first axis and is last axis now!")

    lager = out_shape if len(out_shape) > len(shape) else shape
    strategy = argminmax_tiling_strategy(lager, real_axis)
    if strategy:
        attr_map["custom_tiling"] = strategy
    return res, attr_map