예제 #1
0
def apply_proximal_adagrad(var, accum, lr, l1, l2, grad, target=utils.CCE):
    """
    The FOBOS optimization algorithm with Adagrad learning rate.

    Note:
        accum_new = accum + grad * grad
        ada_lr = lr * rsqrt(accum_new)
        prox_var = var - ada_lr * grad
        if l1 > 0:
            var_new = Sign(prox_var)/(1+ada_lr*l2) * max{|prox_var|-ada_lr*l1,0}
        else:
            var_new = prox_var/(1+ada_lr*l2)

    Args:
        var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32.
        accum (tvm.tensor.Tensor): A tensor of same shape and type as var. Eatch entry in it must be
                                   greater or equal to zero.
        lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        grad (tvm.tensor.Tensor): A tensor of same shape and type as var.

    Returns:
        tvm.tensor.Tensor, updated var.
        tvm.tensor.Tensor, updated accum.
    """

    # check_shape
    utils.check_shape(var)
    shape = get_shape(var)
    for tensor in (accum, grad):
        utils.elemwise_shape_check(shape, tensor.shape)
    sclar_shape = (1, )
    for sclar in (lr, l1, l2):
        utils.elemwise_shape_check(sclar.shape, sclar_shape)

    # check dtype
    dtype = var.dtype
    utils.ops_dtype_check(
        dtype, [utils.DtypeForDavinci.FLOAT16, utils.DtypeForDavinci.FLOAT32])
    for tensor in (var, accum, lr, l1, l2, grad):
        utils.elemwise_dtype_check(tensor.dtype, dtype)

    var_new, accum_new = _apply_proximal_adagrad_compute(
        var, accum, lr, l1, l2, grad)
    (var_new, accum_new), binds_info = TensorUtils.inplace_set_tensors(
        [var, accum], [var_new, accum_new])
    attrs = {utils.BINDS: binds_info}
    return var_new, accum_new, attrs
예제 #2
0
def avgpool_ad_no_custom_diff_manual_schedule(head, data, kernel, stride, pad):
    """automatic differentiate of avgpool with manual schedule."""
    attrs = {
        "enable_post_poly_loop_partition": False,
        "enable_pre_poly_loop_partition": False
    }
    avgpool_fwd, _ = avgpool.avgpool(data, kernel, stride, pad)
    [dl_ddata] = akg.differentiate(avgpool_fwd, [data], head)
    # schedule for differetiation operation
    s = akg.tvm.create_schedule([dl_ddata.op])

    kh, kw = kernel
    shape = get_shape(data)
    ib, ic1, ih, iw, ic0 = shape

    if kh == ih and kw == iw:
        pad2d_input_2_grad = dl_ddata
        res_value_res_grad = pad2d_input_2_grad.op.input_tensors[0]
        head = res_value_res_grad.op.input_tensors[0]

        def comp_func(s):
            head_ub = s.cache_read(head, "local.UB", [res_value_res_grad])
            result_ub = s.cache_write(pad2d_input_2_grad, "local.UB")

            s[res_value_res_grad].set_scope("local.UB")

            b, c1, h, w, c0 = pad2d_input_2_grad.op.axis
            s[head_ub].compute_at(s[pad2d_input_2_grad], b)
            s[res_value_res_grad].compute_at(s[pad2d_input_2_grad], b)
            s[result_ub].compute_at(s[pad2d_input_2_grad], b)
    else:
        pad2d_input_2_grad = dl_ddata
        Broadcast_jac = pad2d_input_2_grad.op.input_tensors[0]
        res_value_res_grad = Broadcast_jac.op.input_tensors[0]
        head = res_value_res_grad.op.input_tensors[0]

        def comp_func(s):
            head_ub = s.cache_read(head, "local.UB", [res_value_res_grad])
            result_ub = s.cache_write(pad2d_input_2_grad, "local.UB")

            s[Broadcast_jac].set_scope("local.UB")
            s[res_value_res_grad].set_scope("local.UB")

            b, c1, h, w, c0 = result_ub.op.axis
            s[result_ub].reorder(*result_ub.op.reduce_axis, b, c1, h, w, c0)

            s[Broadcast_jac].compute_at(s[result_ub], b)

    return dl_ddata, comp_func, attrs
예제 #3
0
def apply_centered_rms_prop(var, mg, ms, mom, grad, lr, momentum, rho, epsilon):
    """
    Update `var` according to the centered RMSProp algorithm.

    out_mean_grad = decay * mg + (1-decay) * grad
    out_mean_square = decay * ms + (1-decay) * grad * grad
    out_mom = momentum * mom + lr * grad / sqrt(out_mean_square - out_mean_grad^2 + epsilon)
    out_var = var - out_mom

    Args:
        var (tvm.tensor.Tensor): Input data of type float16 or float32.
        mg (tvm.tensor.Tensor): A tensor of the same type and shape as `var`.
        ms (tvm.tensor.Tensor): A tensor of the same type and shape as `var`.
        mom (tvm.tensor.Tensor): A tensor of the same type and shape as `var`.
        grad (tvm.tensor.Tensor): A tensor of the same type and shape as `var`.
        lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        momentum (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        rho (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        epsilon (float): A scalar tensor of the same type as `var`.

    Returns:
        tvm.tensor.Tensor, updated var.
        tvm.tensor.Tensor, updated mean_grad.
        tvm.tensor.Tensor, updated mean_square.
        tvm.tensor.Tensor, updated mom.
    """

    vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    for i in (mg, ms, mom, lr, rho, momentum, grad):
        vc_util.elemwise_dtype_check(var.dtype, i.dtype)
    for i in (mg, ms, mom, grad):
        vc_util.elemwise_shape_check(var.shape, i.shape)
    for i in (lr, rho, momentum):
        if tuple(get_shape(i)) != (1,):
            raise RuntimeError("lr, rho and momentum only support scalar tensor.")
    if epsilon <= 0:
        raise ValueError("epsilon should be greater than 0.")

    out_var, out_mg, out_ms, out_mom = _apply_centered_rms_prop_compute(
        var, mg, ms, mom, grad, lr, momentum, rho, epsilon)
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_mg, binds_info2 = TensorUtils.inplace_set(mg, out_mg, "mg_buf")
    out_ms, binds_info3 = TensorUtils.inplace_set(ms, out_ms, "ms_buf")
    out_mom, binds_info4 = TensorUtils.inplace_set(mom, out_mom, "mom_buf")
    binds_info.update(binds_info2)
    binds_info.update(binds_info3)
    binds_info.update(binds_info4)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_mg, out_ms, out_mom, attrs
예제 #4
0
def fake_quant_with_min_max_args_gradient(input_gradients,
                                          input_data,
                                          min=-6,
                                          max=6,
                                          num_bits=8,
                                          narrow_range=False):
    """
    Computes gradients of Fake-quantize on the 'input_data' tensor,

    output_backprops = input_gradients*(if input_data>=nudged_min and <=nudged_max 1 else 0)

    Args:
        input_gradients (tvm.tensor.Tensor): input gradients from previously operation
        input_data (tvm.tensor.Tensor): input of fake-quantize, only supports "float32"
        min ([float, int]): scalar, defaults to -6
        max ([float, int]): scalar, defaults to 6. [min; max] define the 
                            clamping range for the input_data data
        num_bits ([float, int]): Defaults to 8. num_bits is the bitwidth
                                 of the quantization,between 2 and 16
        narrow_range ([bool]): 
            True, quantized into the quantization range [1; 2^num_bits - 1]
            False,quantized into the quantization range [0; 2^num_bits - 1]

    Returns:
        tvm.tensor.Tensor
    """
    shape = get_shape(input_data)
    utils.check_shape(shape)
    utils.elemwise_shape_check(input_gradients.shape, input_data.shape)

    utils.ops_dtype_check(input_data.dtype, utils.DtypeForDavinci.FLOAT32)
    utils.ops_dtype_check(input_gradients.dtype, utils.DtypeForDavinci.FLOAT32)

    nudged_min, nudged_max, scale = nudge_min_max(min, max, num_bits,
                                                  narrow_range)

    zero_tensor = tvm.compute(input_data.shape,
                              lambda *i: tvm.const(0, dtype="float32"),
                              name="zero_tensor")
    nudged_max_tensor = topi.add(zero_tensor, nudged_max)
    nudged_min_tensor = topi.add(zero_tensor, nudged_min)

    # where((input_data<=nudged_max)&(x>=nudged_min),1,0),Convert the input to 0 and 1 tensor
    between_nudged_min_max = _cmpare_value(input_data, nudged_min_tensor,
                                           nudged_max_tensor)

    res = topi.multiply(input_gradients, between_nudged_min_max)

    return res
예제 #5
0
 def check_shape(tensor_format, tensor, shape_nc1hwc0, name):
     shape = get_shape(tensor)
     if tensor_format == "C1C0":
         if not is_all_1_but_axis_equal(shape, shape_nc1hwc0, (1, 4)):
             raise AssertionError("{} shape {} did not match data_shape {}"
                                  "".format(name, shape, shape_nc1hwc0))
     elif tensor_format == "NC1C0":
         if not is_all_1_but_axis_equal(shape, shape_nc1hwc0, (0, 1, 4)):
             raise AssertionError("{} shape {} did not match data_shape {}"
                                  "".format(name, shape, shape_nc1hwc0))
     elif tensor_format == "NC1HWC0":
         if not is_all_1_but_axis_equal(shape, shape_nc1hwc0,
                                        (0, 1, 2, 3, 4)):
             raise AssertionError("{} shape {} did not match data_shape {}"
                                  "".format(name, shape, shape_nc1hwc0))
예제 #6
0
파일: avgpool.py 프로젝트: zhuyawen/akg
def avgpool_set_dim_func(a_value, kernel, stride, pad):
    """set dim info to attr with avgpool_set_dim_map"""
    key = []
    key.append(tuple(get_shape(a_value)))
    key.append(kernel)
    key.append(stride)
    if isinstance(pad, list):
        pad = tuple(pad)
    key.append(pad)
    key.append(a_value.dtype)
    hash_key = str(tuple(key))

    if hash_key in avgpool_set_dim_map.keys():
        return ct_util.set_dims(avgpool_set_dim_map[hash_key]), hash_key
    return "", hash_key
예제 #7
0
def eltwise(data, mode=1, coeff=()):
    """
    Compute elementwise modes, such as 0:PRODUCT, 1:SUM and 2:MAX.

    Args:
        data (list of tvm.tensor.Tensor): a list of tensor, tensor support fp16 and fp32.
        mode (int): 0:product, 1:sum, 2:max.
        coeff (tuple): tensor name of data should be equal with coeff size, only
                      used by sum, support int and float.
    Returns:
        tvm.tensor.Tensor.
    """
    dtype = data[0].dtype
    vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT)

    vc_util.check_shape(data[0].shape)
    shape_data = get_shape(data[0])

    if not mode in [0, 1, 2]:
        raise RuntimeError("mode only support 0, 1, or 2")

    if not len(data) == len(coeff) and len(coeff) != 0:
        raise RuntimeError("coeff should be [] or its length be same as data")

    tensor_num = len(data)
    #tensor num must be [1, 120]
    if tensor_num < 1 or tensor_num > 120:
        raise RuntimeError("tensor_num need in range [1,120].")

    if mode == 1 and len(coeff) == 0:
        return addn.addn(data)

    if len(coeff) != 0:
        if type(coeff[0]) != int and type(coeff[0]) != float:
            raise RuntimeError("ele of coeff must be a number.")

    for i in range(1, len(data)):
        vc_util.elemwise_dtype_check(data[0].dtype, data[i].dtype)
        vc_util.elemwise_shape_check(data[0].shape, data[i].shape)

    if mode == 1 and len(coeff) > 0:
        return _addn(data, coeff)

    if mode == 0:
        return _product(data)

    if mode == 2:
        return _max(data)
예제 #8
0
def cosh_call(x):
    """Compute cosh by the call method."""
    dtype = x.dtype
    shape = get_shape(x)
    # in order to get the precise calcuate result
    if utils.product_is_mini() and dtype == "float32":
        x = akg.lang.cce.cast_to(x, "float16")

    res = akg.tvm.compute(shape,
                          lambda *indice: akg.lang.cce.cosh(x(*indice)),
                          name="res")

    if utils.product_is_mini() and dtype == "float32":
        res = akg.lang.cce.cast_to(res, "float32")

    return res, get_attrs()
예제 #9
0
def _check_inputs(var, grad_accum, grad_squared_accum, grad, lr, l1, l2,
                  global_step):
    """Check op inputs"""
    # check dtype
    utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    for i in (grad_accum, grad_squared_accum, grad, lr, l1, l2):
        utils.elemwise_dtype_check(var.dtype, i.dtype)
    utils.ops_dtype_check(global_step.dtype, utils.DtypeForDavinci.INT32)

    # check shape
    for i in (grad_accum, grad_squared_accum, grad):
        utils.elemwise_shape_check(var.shape, i.shape)
    for i in (lr, l1, l2, global_step):
        if tuple(get_shape(i)) != (1, ):
            raise RuntimeError(
                "lr, l1, l2 and global_step only support scalar tensor.")
예제 #10
0
def focal_loss_2_classification_bwd(labels, logits, grad, alpha=0.5, gamma=2):
    """focalloss for 2 classification"""
    batch_size = get_shape(labels)[0]
    pred = akg.topi.sigmoid(logits)
    log_p = akg.topi.log(pred)
    neg_log_p = akg.topi.log(1 - pred)
    pred_pow = akg.topi.power(pred, gamma)
    neg_pred_pow = akg.topi.power(1 - pred, gamma)
    d_labels = akg.tvm.compute((batch_size,),
        lambda i: (-alpha * neg_pred_pow[i] * log_p[i] + (1 - alpha) * pred_pow[i] * neg_log_p[i]) * grad[i])
    d_logits = akg.tvm.compute((batch_size,),
        lambda i: (-labels[i] * alpha *
            (-log_p[i] * gamma * neg_pred_pow[i] * pred[i] + neg_pred_pow[i] * (1 - pred[i])) +
            (labels[i] - 1) * (1 - alpha) *
            (gamma * pred_pow[i] * (1 - pred[i]) * neg_log_p[i] - pred_pow[i] * pred[i])) * grad[i])
    return d_labels, d_logits
예제 #11
0
def fused_layer_norm_grad_set_dim_func(x, _dy, _variance, _mean, _gamma,
                                       begin_norm_axis, begin_params_axis):
    """dim function"""
    shape = get_shape(x)
    if begin_norm_axis < 0:
        begin_norm_axis = begin_norm_axis + len(shape)
    if begin_params_axis < 0:
        begin_params_axis = begin_params_axis + len(shape)
    hash_key = str((shape, begin_norm_axis, begin_params_axis, x.dtype))
    attr_map = dict()
    attr_map["pragma_checkcoincident"] = 0
    if hash_key in fused_layer_norm_grad_set_attr_map.keys():
        for attr in fused_layer_norm_grad_set_attr_map[hash_key]:
            attr_map[attr] = 1

    return ct_util.set_dims_by_key(hash_key, fused_layer_norm_grad_set_dim_map), hash_key, attr_map
예제 #12
0
파일: mean.py 프로젝트: mindspore-ai/akg
def mean(data, axis=None, keepdims=False, target=utils.CCE):
    """
    Computes the mean of the values of a Tensor over the whole dataset.

    Note:
        If the tuple's elements are unsorted, this function will call preprocess_axis firstly to let these elements
        sorted. if tuple is empty, this function will compute all elements' sum.
        if the data type is folat 16 and the whole dim not less than 65536, this function will compute the mean by
        divide 65535 first to avoid whole dim too large.

    Args:
        data (tvm.tensor.Tensor): Tensor of type float16, float32.
        axis (Union[list, tuple, int, None]): If the tuple is empty, the axis equal to None.
        keepdims (bool): If keepdims equal to True, the result shape length is same to input shape length.

    Returns:
            tvm.tensor.Tensor, has the same type as data. If keepdims equal to True, all reduced dimensions are
            retained with length 1. else these reduced axis will be eliminate.

    Supported Platforms:
        'Ascend'
    """
    # Check types
    utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT)

    # Check shape
    shape = ft_util.get_shape(data)
    utils.reduce_axis_check(shape, axis)
    axis = ft_util.refine_reduce_axis(data, axis)

    count = 1
    for i in axis:
        count *= shape[i]
    output = sum(data, axis, keepdims, target=target)

    if shape_is_dynamic(data):
        res = akg.tvm.compute(
            output.shape,
            lambda *i: akg.lang.ascend.divide_var(output(*i), count),
            name="res")
    else:
        res = akg.topi.divide(output, count)

    attrs = get_attrs(data)
    if shape_is_dynamic(data):
        attrs["custom_tiling"] = mean_dynamic_tiling_strategy(data, axis)
    return res, attrs
예제 #13
0
def fused_bn_grad2(dgamma_red_hw, dbeta_red_hw, var, gamma, eps, data_shape):
    """Second part of fused_bn_grad, reduce axis N, calculate the result of dgamma and dbeta."""
    check_inputs(2, dgamma_red_hw, dbeta_red_hw, var, gamma, eps, data_shape)
    attrs = copy_attrs(attrs_bng2_)
    dim_info = set_dim_func_bng2_(data_shape)[0]
    m = data_shape[0] * data_shape[2] * data_shape[3]
    neg_m_rec = akg.tvm.const((-1.0 / m), dtype=var.dtype)
    eps = akg.tvm.const(eps, var.dtype)
    shape = get_shape(var)

    dbeta = akg.topi.sum(dbeta_red_hw, 0, keepdims=True)

    v = akg.tvm.compute(shape, lambda *i: var(*i) + eps, name="var_plus_eps")
    if product_is_mini():
        v = akg.topi.cast(v, "float16")
    rsqvar = akg.tvm.compute(
        shape,
        lambda *i: akg.tvm.exp(
            akg.tvm.log(v(*i)) * akg.tvm.const(-0.5, v.dtype)),
        name="rsqvar",
        attrs={'no_inline': 1})
    if product_is_mini():
        rsqvar = akg.topi.cast(rsqvar, "float32")

    dgamma_red_n = akg.topi.sum(dgamma_red_hw, 0, keepdims=True)
    dgamma = akg.tvm.compute(shape,
                             lambda *i: dgamma_red_n(*i) * rsqvar(*i),
                             name="dgamma")

    rs = akg.tvm.compute(shape,
                         lambda *i: gamma(*i) * rsqvar(*i),
                         name="rs",
                         attrs={'no_inline': 1})
    rs_div_m = akg.tvm.compute(shape,
                               lambda *i: rs(*i) * neg_m_rec,
                               name="rs_div_m",
                               attrs={'no_inline': 1})
    dgamma_dx = akg.tvm.compute(
        shape,
        lambda *i: rs_div_m(*i) * rsqvar(*i) * dgamma(*i),
        name="dgamma_dx")
    dbeta_dx = akg.tvm.compute(shape,
                               lambda *i: rs_div_m(*i) * dbeta(*i),
                               name="dbeta_dx")
    if dim_info != "":
        attrs["dim"] = dim_info
    return dgamma, dbeta, rs, dgamma_dx, dbeta_dx, attrs
예제 #14
0
def reduction_layer(data, axis, op, coeff):
    """
    Reduce data on axis and scale by coeff.

    Args:
        data (tvm.tensor.Tensor): tensor with type float16 or float32, int8, uint8.
        axis (int): the beginning axis to reduce, -1 means the last axis. if 0, reduction to scalar.
        op (str): one of "SUM", "ASUM"(abs and sum), "SUMSQ"(sqr and sum), "MEAN".
        coeff ([int, float]): scale
    Returns:
        tvm.tensor.Tensor.
    """
    dtype = data.dtype
    vc_util.ops_dtype_check(data.dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, 
                                         vc_util.DtypeForDavinci.INT8,
                                         vc_util.DtypeForDavinci.UINT8])

    vc_util.check_shape(data.shape)

    if op not in ["SUM", "ASUM", "SUMSQ", "MEAN"]:
        raise RuntimeError("op can only be one of SUM, ASUM, SUMSQ, MEAN")
    
    shape = get_shape(data)
    
    vc_util.reduce_axis_check(shape, axis)
    axis = _get_axis_list(axis, shape)
    
    if dtype in ["int8", "uint8"]:
        data = topi.cast(data, "float16")
    data = topi.cast(data, "float32")
    cof = tvm.const(coeff, "float32")
   
    if op == "ASUM":
        tmp = _asum(data, axis, cof) 
    elif op == "SUMSQ":
        tmp =_sumsq(data, axis, cof) 
    elif op == "MEAN":
        tmp = _mean(data, axis, cof, shape)
    elif op == "SUM":
        tmp = _sum(data, axis, cof)
    
    if dtype in ["int8", "uint8"]:
        tmp = topi.cast(tmp, "float16")    
    res = topi.cast(tmp, dtype)
    
    return res
예제 #15
0
def _check_inputs(var, accum, accum_update, grad, lr, rho, epsilon):
    """Check op inputs"""
    # check dtype
    utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    for i in (accum, accum_update, grad, lr, rho):
        utils.elemwise_dtype_check(var.dtype, i.dtype)

    # check shape
    for i in (accum, accum_update, grad):
        utils.elemwise_shape_check(var.shape, i.shape)
    for i in (lr, rho):
        if tuple(get_shape(i)) != (1,):
            raise RuntimeError("lr and rho only support scalar tensor.")

    # check value
    if epsilon <= 0:
        raise ValueError("epsilon should be greater than zero.")
예제 #16
0
def apply_add_sign(var,
                   m,
                   grad,
                   lr,
                   alpha,
                   sign_decay,
                   beta,
                   target=utils.CCE):
    """
    Update 'var' according to the AddSign update.

    m_out = m * beta + grad * (1 - beta)
    var_out = var - lr * (alpha + sign_decay * Sign(grad) *Sign(m)) * grad

    Args:
        var (tvm.tensor.Tensor): A tensor of type float16 or float32
        m (tvm.tensor.Tensor): A tensor of type float16 or float32
        grad (tvm.tensor.Tensor): A tensor of type float16 or float32
        lr (tvm.tensor.Tensor): A scalar tensor of type float16 or float32
        alpha (tvm.tensor.Tensor): A scalar tensor of type float16 or float32
        sign_decay (tvm.tensor.Tensor): A scalar tensor of type float16 or float32
        beta (tvm.tensor.Tensor): A scalar tensor of type float16 or float32

    Returns:
        tvm.tensor.Tensor, updated var.
        tvm.tensor.Tensor, updated m.
    """

    utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    for i in (m, lr, alpha, sign_decay, beta, grad):
        utils.elemwise_dtype_check(var.dtype, i.dtype)
    for i in (m, grad):
        utils.elemwise_shape_check(var.shape, i.shape)
    for i in (lr, alpha, sign_decay, beta):
        if tuple(get_shape(i)) != (1, ):
            raise RuntimeError(
                "lr, alpha, sign_decay and beta only support scalar.")

    out_var, out_m = _apply_add_sign_compute(var, m, grad, lr, alpha,
                                             sign_decay, beta)

    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf")
    binds_info.update(binds_info2)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_m, attrs
예제 #17
0
 def check_shape(tensor_format, tensor, shape_nc1hwc0, name):
     def is_all_1_but_axis_equal(shape1, shape2, axis):
         return len(shape1) == len(shape2) and \
             all([shape1[i] == shape2[i] if i in axis else shape1[i] == 1 for i in range(len(shape2))])
     shape = get_shape(tensor)
     if tensor_format == "C1C0":
         if not is_all_1_but_axis_equal(shape, shape_nc1hwc0, (1, 4)):
             raise AssertionError("{} shape {} did not match data_shape {}"
                                  "".format(name, shape, shape_nc1hwc0))
     elif tensor_format == "NC1C0":
         if not is_all_1_but_axis_equal(shape, shape_nc1hwc0, (0, 1, 4)):
             raise AssertionError("{} shape {} did not match data_shape {}"
                                  "".format(name, shape, shape_nc1hwc0))
     elif tensor_format == "NC1HWC0":
         if not is_all_1_but_axis_equal(shape, shape_nc1hwc0, (0, 1, 2, 3, 4)):
             raise AssertionError("{} shape {} did not match data_shape {}"
                                  "".format(name, shape, shape_nc1hwc0))
예제 #18
0
def _check_inputs(var, m, v, grad, lr, beta1, beta1_power, beta2, epsilon):
    """Check op inputs"""
    # check dtype
    vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    for i in (m, v, grad, beta1_power, lr, beta1, beta2):
        vc_util.elemwise_dtype_check(var.dtype, i.dtype)

    # check shape
    for i in (m, v, grad):
        vc_util.elemwise_shape_check(var.shape, i.shape)
    for i in (beta1_power, lr, beta1, beta2):
        if tuple(get_shape(i)) != (1,):
            raise RuntimeError("beta1_power, lr, beta1 and beta2 only support scalar tensor.")

    # check value
    if epsilon <= 0:
        raise ValueError("epsilon should be greater than zero.")
예제 #19
0
파일: elu.py 프로젝트: mindspore-ai/akg
def elu(data):
    """
    do element-wise elu operation

    f(x) = max(min(e^x - 1, 0), x), in cloud scene, for all inputs
    f(x) = max(min(e^x - 1, 0), x), in mini scene, for x <= TAYLOR_THRESHOLD or x >= 0
    f(x) = fifth taylor computer, in mini scene, for TAYLOR_THRESHOLD < x < 0

    Args:
        data (tvm.tensor.Tensor): tensor with type float16 or float32.

    Returns:
        tvm.tensor.Tensor.
    """
    dtype = data.dtype
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)
    utils.check_shape(data.shape)

    compute_dtype = dtype
    if dtype == "float16" and not product_is_mini():
        data = akg.lang.ascend.cast_to(data, "float32")
        compute_dtype = "float32"

    if dtype == "float32" and product_is_mini():
        data = akg.lang.ascend.cast_to(data, "float16")
        compute_dtype = "float16"

    input_border = tvm.const(0.0, compute_dtype)
    shape = get_shape(data.shape)
    tensor_input_border = tvm.compute(shape, lambda *i: input_border)

    exp_res = akg.topi.exp(data)
    exp_res = akg.lang.ascend.vadds(exp_res, -1)

    if product_is_mini():
        exp_res = _elu_mini_compute(exp_res, data, shape)

    negative_part = akg.lang.ascend.vmin(exp_res, tensor_input_border)
    res = akg.lang.ascend.vmax(negative_part, data)

    if dtype == "float16" and not product_is_mini():
        res = akg.lang.ascend.cast_to(res, "float16")
    if dtype == "float32" and product_is_mini():
        res = akg.lang.ascend.cast_to(res, "float32")
    return res
예제 #20
0
def squeeze(data, axis, target="cce"):
    """
    Remove the dimensions which have shape size 1.

    Args:
        data: Tensor, input whose shape is to be squeeze.
        axis: Interger, specify which size 1 dimension to be removed.

    Return:
        Tensor, has the same type and element as data, but some size 1 dimensions are removed.
    """
    shape = get_shape(data)
    if len(shape) == 1:
        raise RuntimeError("invalid input shape")
    utils.check_shape(shape)
    utils.ops_dtype_check(
        data.dtype,
        [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32])
    new_shape = []
    shape_to_squeeze = []
    if axis is None:
        axis = [i for i, sh in enumerate(shape) if sh == 1]
    if not isinstance(axis, (list, tuple)):
        axis = [axis]
    for i, sh in enumerate(shape):
        if not isinstance(sh, int) or i not in axis:
            new_shape.append(sh)
            shape_to_squeeze.append(True)
        else:
            shape_to_squeeze.append(False)

    def get_old_indices(indices):
        old_indices = []
        new_index = 0
        for i, sh in enumerate(shape_to_squeeze):
            if sh:
                old_indices.append(indices[new_index])
                new_index += 1
            else:
                old_indices.append(0)
        return old_indices

    B = akg.tvm.compute(new_shape,
                        lambda *indices: data(*get_old_indices(indices)))
    return B
예제 #21
0
def maxpool_with_argmax_tiling_strategy(data, kernel, stride, pad):
    """Custom tiling for maxpool with argmax version."""
    batch, c1, fm_h, fm_w, c0 = data.shape
    _, [out_h, _] = \
        cal_pad_shapes_by_strategy(get_shape(data), kernel, stride, pad)
    strategy = list()
    if data.ndim == 5 and c0.value == 16:
        h_cut = out_h
        if isinstance(fm_h, akg.tvm.expr.Var) or (fm_h.value >= 50
                                                  and fm_w.value >= 50):
            h_cut = 3
        dim_ind = 0
        if isinstance(batch, akg.tvm.expr.Var) or batch.value > 1:
            strategy += ct_util.create_constraint_on_axis(
                values=1,
                constraints=ct_util.TileConstraint.FACTOR,
                axis=dim_ind)
            dim_ind = dim_ind + 1
        if isinstance(c1, akg.tvm.expr.Var) or c1.value > 1:
            strategy += ct_util.create_constraint_on_axis(
                values=1,
                constraints=ct_util.TileConstraint.FACTOR,
                axis=dim_ind)
            dim_ind = dim_ind + 1
        strategy += ct_util.create_constraint_on_axis(
            values=h_cut,
            constraints=ct_util.TileConstraint.FACTOR,
            axis=dim_ind)
        strategy += ct_util.create_constraint_on_axis(
            values="H",
            constraints=ct_util.TileConstraint.SET_AXIS_INFO,
            axis=dim_ind)
        strategy += ct_util.create_constraint_on_axis(
            values="FULL",
            constraints=ct_util.TileConstraint.MAX,
            axis=dim_ind + 1)
        strategy += ct_util.create_constraint_on_axis(
            values=5,
            constraints=ct_util.TileConstraint.FACTOR,
            axis=dim_ind + 2)
        strategy += ct_util.create_constraint_on_axis(
            values=16,
            constraints=ct_util.TileConstraint.FACTOR,
            axis=dim_ind + 3)
    return strategy
예제 #22
0
파일: rsqrt.py 프로젝트: mindspore-ai/akg
def _rsqrt_ascend(data):
    """
    Computes reciprocal of square root of x element-wise.

     :math:`y = \frac{1}{\\sqrt x} = x^{-\frac{1}{2}}`

    Note:
        In order to prevent loss of precision, the function uses exponential constant changes:
        :math:`y = [e^{lnx}]^{-\frac{1}{2}}`

    Args:
        data (tvm.tensor.Tensor): Tensor of type float16, float32

    Returns:
        tvm.tensor.Tensor, has same type and shape as data.
    """

    dtype = data.dtype

    shape = get_shape(data)
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)
    utils.check_shape(shape)

    if not product_is_mini():
        return akg.topi.rsqrt(data)

    is_needed_conv = (dtype == 'float32')

    data_ = data.astype('float16') if is_needed_conv else data
    power_num = akg.tvm.const(-0.5,
                              dtype=('float16' if is_needed_conv else dtype))

    vlog_t = akg.tvm.compute(shape,
                             lambda *indice: akg.tvm.log(data_(*indice)),
                             name="vlog_t")
    vmuls_t = akg.tvm.compute(shape,
                              lambda *indice: vlog_t(*indice) * power_num,
                              name="vmuls_t")
    res = akg.tvm.compute(shape,
                          lambda *indice: akg.tvm.exp(vmuls_t(*indice)),
                          name="res")

    res = res.astype('float32') if is_needed_conv else res

    return res
예제 #23
0
def cos(input_x):
    """Compute cosine value of a tensor."""
    dtype = input_x.dtype
    shape = get_shape(input_x)
    vc_util.ops_dtype_check(input_x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    vc_util.check_shape(input_x.shape)
    if dtype == "float16":
        input_x = akg.lang.cce.cast_to(input_x, "float32")

    res = akg.tvm.compute(shape,
                          lambda *indice: akg.lang.cce.cos(input_x(*indice)),
                          name="res")

    # cast the dtype to float16
    if dtype == "float16":
        res = akg.lang.cce.cast_to(res, "float16")

    return res, get_attrs()
예제 #24
0
def set_dim_func_(dy,
                  data,
                  mean,
                  var,
                  gamma,
                  eps=1e-3,
                  data_format="DefaultFormat",
                  axis=1):
    """set dim info"""
    shape = get_shape(data)
    params = check_inputs(dy, data, mean, var, gamma, data_format, axis)
    axis = params["axis"]
    is_special5d = params["is_special5d"]
    hash_key1 = str((tuple(shape), data.dtype))
    if hash_key1 in set_dim_map_:
        return ct_util.set_dims_by_key(hash_key1, set_dim_map_), hash_key1
    hash_key = str((len(shape), data.dtype, axis, is_special5d))
    return ct_util.set_dims_by_key(hash_key, set_dim_map_), hash_key
예제 #25
0
def fake_quant_with_min_max_vars_per_channel_gradient_compute(input_gradients, inputs_data,
                                                              min_broadcast, max_broadcast,
                                                              num_bits=8,
                                                              narrow_range=False):
    """Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation."""
    shape = get_shape(inputs_data)
    sum_axis = [x for x in range(0, len(shape) - 1)]
    dtype = inputs_data.dtype

    nudged_min, nudged_max, _ = nudged_min_max_compute(min_broadcast, max_broadcast, num_bits, narrow_range)
    # both zero yields zero
    bool_both_zero_value = bool_both_zero_compute(min_broadcast, max_broadcast)
    bool_both_zero_negate = _bool_negate(bool_both_zero_value)

    bool_less_equal_nudged_max = _less_equal_compare_float32(inputs_data, nudged_max)
    bool_more_equal_nudged_min = _less_equal_compare_float32(nudged_min, inputs_data)
    bool_between_nudged_min_max = topi.multiply(bool_less_equal_nudged_max, bool_more_equal_nudged_min)
    # gradient is 1 if input in [min, max] else 0
    backprops_input_tmp = topi.multiply(bool_between_nudged_min_max, input_gradients)
    backprops_bool_both_zero = topi.multiply(backprops_input_tmp, bool_both_zero_value)
    # if min and max are both zero, gradients is input_gradients
    input_gradients_both_zero = topi.multiply(input_gradients, bool_both_zero_negate)
    backprops_input = topi.add(backprops_bool_both_zero, input_gradients_both_zero)

    # gradients for min is input_gradients if inputs_data < nudged_min else 0
    bool_less_nudged_min = _bool_negate(bool_more_equal_nudged_min)
    output_backprop_min_tmp = topi.multiply(bool_less_nudged_min, input_gradients)
    # gradients for min is 0 if min and max are both 0
    output_backprop_min_bool = topi.multiply(output_backprop_min_tmp, bool_both_zero_value)
    if sum_axis == []:
        output_backprop_min = output_backprop_min_bool
    else:
        output_backprop_min = topi.sum(output_backprop_min_bool, sum_axis)

    # gradients for max is input_gradients if inputs_data > nudged_max else 0
    bool_more_nudged_max = _bool_negate(bool_less_equal_nudged_max)
    output_backprop_max_tmp = topi.multiply(bool_more_nudged_max, input_gradients)
    # gradients for max is 0 if min and max are both 0
    output_backprop_max_bool = topi.multiply(output_backprop_max_tmp, bool_both_zero_value)
    if sum_axis == []:
        output_backprop_max = output_backprop_max_bool
    else:
        output_backprop_max = topi.sum(output_backprop_max_bool, sum_axis)
    return backprops_input, output_backprop_min, output_backprop_max
예제 #26
0
def pooling(x,
            window,
            stride,
            mode=0,
            pad_mode=5,
            pad=(0, 0, 0, 0),
            global_pooling=False,
            ceil_mode=0):
    """
    Pooling operation, including MaxPool and AvgPool.

    Args:
        x (tvm.tensor.Tensor): Input tensor, only support float16
                               dtype, and NC1HWC0 format.
        window (Union[list, tuple]): Pooling window, only support pooling
                                     in H or W.
        stride (Union[list, tuple]): Pooling stride, only support pooling
                                     in H or W.
        mode (int): Mode of pooling, support MaxPool and AvgPool. 0 for MaxPool,
                    1 for AvgPool.
        pad_mode (int): Mode of padding, 5 for VALID, 6 for SAME.
        pad (Union[list, tuple]): Implicit padding size to up/down/left/right.
        global_pooling (bool): Global pooling flag, invalid now, should be False.
        ceil_mode (int): Round_mode params, invalid now, should be 0.

    Returns:
        A tvm.tensor.Tensor with same dtype as input.
    """
    utils.check_shape(get_shape(x))
    utils.ops_dtype_check(x.dtype, utils.DtypeForDavinci.FLOAT16)

    if len(window) != 2:
        raise RuntimeError(
            "Invalid shape params, window shape must be 2 dims, "
            "including window_h and window_w.")
    if len(stride) != 2:
        raise RuntimeError(
            "Invalid shape params, stride shape must be 2 dims, "
            "including stride_h and stride_w.")

    if global_pooling or ceil_mode != 0:
        raise RuntimeError("Not support global_pooling and ceil_mode for now.")

    return _pooling_compute(x, window, stride, mode, pad_mode, pad)
예제 #27
0
def _cast_ascend(data, dst_type):
    ori_type = data.dtype
    shape = get_shape(data)
    # dtype check
    dst_check_list = ["int8", "float32", "float16", "uint8", "int32"]

    if dst_type not in dst_check_list:
        raise RuntimeError("cast only support cast to %s while dtype is %s" %
                           (",".join(dst_check_list), dst_type))

    if product_is_mini():
        # product mini has not conv insn between float32 and int32.
        if ori_type == "float32" and dst_type == "int32":
            tmp = akg.topi.cast(data, "float16")
            return akg.topi.cast(tmp, dst_type)
        if ori_type == "int32" and dst_type == "float32":
            tmp = akg.topi.cast(data, "float16")
            return akg.topi.cast(tmp, dst_type)

    dtype_pair = (ori_type, dst_type)
    support_dtype = (('float32', 'float16'), ('float16', 'float32'), ('float16', 'int8'), ('float16', 'uint8'),
                     ('int32', 'float16'), ('int32', 'float32'), ('float16', 'int32'), ('float32', 'int32'),
                     ('uint8', 'float16'), ('int8', 'float16'))
    tmp_trans_dtype = (('int8', 'float32'), ('float32', 'int8'), ('bool', 'float32'), ('uint8', 'float32'),
                       ('uint8', 'int32'), ('bool', 'int32'), ('float32', 'uint8'))
    if dtype_pair not in support_dtype and dtype_pair not in tmp_trans_dtype and ori_type != dst_type:
        raise RuntimeError("Don't support cast from ", ori_type, " to ", dst_type)
    need_tmp_transfer = dtype_pair in tmp_trans_dtype

    if need_tmp_transfer:
        if data.dtype == 'float32' and dst_type == 'int8' and not product_is_mini():
            tmp = akg.tvm.compute(shape, lambda *indice: akg.tvm.trunc(data(*indice)).astype('int32'))
            tmp = akg.topi.cast(tmp, 'float16')
            out = akg.tvm.compute(shape, lambda *indice: akg.tvm.trunc(tmp(*indice)).astype(dst_type))
        else:
            tmp = akg.topi.cast(data, 'float16')
            out = akg.topi.cast(tmp, dst_type)
    else:
        if data.dtype in ('float16', 'float32') and dst_type in ('int8, int32') and not product_is_mini():
            out = akg.tvm.compute(shape, lambda *indice: akg.tvm.trunc(data(*indice)).astype(dst_type))
        else:
            out = akg.topi.cast(data, dst_type)

    return out
예제 #28
0
파일: reshape.py 프로젝트: mindspore-ai/akg
def _reshape_ascend(data, out_shape):
    """
    Rearranges input tensor data to new shape out_shape.

    Args:
        data (tvm.tensor.Tensor): The tensor to be reshaped.
        out_shape (list, tuple): The new shape applied on the input tensor data,
                                should be compatible with the original shape of data.

    Returns:
        The reshaped akg.tvm.tensor of same type as input tensor data.

    Supported Platforms:
        'Ascend'
    """
    utils.ops_dtype_check(
        data.dtype, utils.DtypeForDavinci.INT32.value +
        utils.DtypeForDavinci.ALL_FLOAT.value)

    data_shape = data.shape
    utils.check_shape(data_shape)

    in_shape = get_shape(data)
    out_shape = list(out_shape)
    is_dynamic = ds.shape_is_dynamic(data)

    if -1 in out_shape:
        out_shape = get_out_shape(in_shape, out_shape)
    else:
        if not is_dynamic:
            if reduce(lambda x, y: x * y, in_shape) != reduce(
                    lambda x, y: x * y, out_shape):
                raise ValueError(
                    "the total length of out_shape is not equal to the in_shape"
                )

    inputs = akg.tvm.compute(in_shape,
                             lambda *indice: data(*indice),
                             name="inputs")
    res = akg.topi.reshape(inputs, out_shape)
    output = akg.tvm.compute(out_shape,
                             lambda *indice: res(*indice),
                             name="reshape")
    return output
예제 #29
0
def less_compare_float32(data_x, data_y):
    """if x is less than y, then return 1, else return 0"""
    shape_inputs = get_shape(data_x)
    # minimun num of float32 2**(-126)
    data_min = akg.lang.ascend.broadcast(tvm.const(2**(-126), dtype="float32"),
                                         shape_inputs, "float32")
    data_zero = akg.lang.ascend.broadcast(dc.zero_const("float32"),
                                          shape_inputs, "float32")
    res_sub = topi.subtract(data_y, data_x)
    res_min = topi.minimum(res_sub, data_min)
    res_max = topi.maximum(res_min, data_zero)
    # max num of float32 is 2**126
    # but cce can only support 2**62, so use 62 * 62 * 2 to adaptor 126
    res_mul_fierst = topi.multiply(res_max, tvm.const(2**62, dtype="float32"))
    res_mul_second = topi.multiply(res_mul_fierst,
                                   tvm.const(2**62, dtype="float32"))
    res = topi.multiply(res_mul_second, tvm.const(2**2, dtype="float32"))

    return res
예제 #30
0
def apply_power_sign(var, m, grad, lr, logbase, sign_decay, beta):
    """
    Update 'var' according to the PowerSign update

    m_out = beta * m + (1 - beta) * grad
    var_out = var - lr_t * (exp(logbase * sign_decay * sign(grad) * sign(m_out)) * grad)

    Args:
        var (tvm.tensor.Tensor): A tensor of type float16 or float32
        m (tvm.tensor.Tensor): A tensor of same shape and type as var.
        grad (tvm.tensor.Tensor): A tensor of same shape and type as var.
        lr (tvm.tensor.Tensor): A scalar tensor of of same type as var.
        logbase (tvm.tensor.Tensor): A scalar tensor of of same type as var.
        sign_decay (tvm.tensor.Tensor): A scalar tensor of of same type as var.
        beta (tvm.tensor.Tensor): A scalar tensor of of same type as var.

    Returns:
        tvm.tensor.Tensor, updated var.
        tvm.tensor.Tensor, updated m.
    """
    # check dtypes
    vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    for i in (m, grad, lr, logbase, sign_decay, beta):
        vc_util.elemwise_dtype_check(var.dtype, i.dtype)

    # check shapes
    for i in (m, grad):
        vc_util.elemwise_shape_check(var.shape, i.shape)
    for i in (lr, logbase, sign_decay, beta):
        if tuple(get_shape(i)) != (1, ):
            raise RuntimeError(
                "lr, logbase, sign_decay and beta only support scalar tensor.")

    # compute
    out_var, out_m = _apply_power_sign_compute(var, m, grad, lr, logbase,
                                               sign_decay, beta)

    # reuse var, m
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf")
    binds_info.update(binds_info2)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_m, attrs