예제 #1
0
def apply_rms_prop_mixed_precision(var, ms, mom, grad, lr, momentum, rho,
                                   epsilon):
    """
    Mixed precision version for apply_rms_prop.

    Args:
        var (tvm.tensor.Tensor): The tensor to be updated. Should be float32.
        ms (tvm.tensor.Tensor): Mean square, a tensor of same shape and type as var.
        mom (tvm.tensor.Tensor): A tensor of same shape and type as var.
        grad (tvm.tensor.Tensor): A tensor of same shape and type as var.
        lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var.
        momentum (float): Coefficient for calculate new mom, 0.0 <= momentum <= 1.0.
        rho (float): Coefficient for calculate new ms, 0.0 <= rho <= 1.0.
        epsilon (float): A small value to prevent division by 0.

    Returns:
        tvm.tensor.Tensor, Updated var of type float32.
        tvm.tensor.Tensor, Updated var of type float16.
        tvm.tensor.Tensor, Updated ms.
        tvm.tensor.Tensor, Updated mom.
    """

    utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.FLOAT32)
    _apply_rms_prop_check(var, ms, mom, grad, lr, momentum, rho, epsilon)

    out_var, out_var_fp16, out_ms, out_mom = _apply_rms_prop_mixed_precision_compute(
        var, ms, mom, grad, lr, momentum, rho, epsilon)
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_ms, binds_info2 = TensorUtils.inplace_set(ms, out_ms, "ms_buf")
    out_mom, binds_info3 = TensorUtils.inplace_set(mom, out_mom, "mom_buf")
    binds_info.update(binds_info2)
    binds_info.update(binds_info3)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_var_fp16, out_ms, out_mom, attrs
예제 #2
0
def sgd(parameters, gradient, accum, stat, learning_rate, momentum, dampening=0.0, weight_decay=0.0, nesterov=False):
    """
    Update parameters, accum and stat according to the SGD algorithm.

    accum = accum * momentum + grad
    if use_nesterov is True:
        parameters -= grad * lr + accum * momentum * lr
    else:
        parameters -= accum * lr

    Args:
        parameters (tvm.tensor.Tensor): parameters tensor of float32, float16, to be updated.
        gradient (tvm.tensor.Tensor): gradient tensor of float32, float16.
        accum (tvm.tensor.Tensor): accum tensor of float32, float16, to be updated.
        stat (tvm.tensor.Tensor): stat tensor of float32, float16, to be updated.
        momentum (tvm.tensor.Tensor): momentum tensor of float32, float16, shape must be equal to (1,).
        learning_rate (tvm.tensor.Tensor): learning_rate tensor of float32, float16, shape must be equal to (1,).
        dampening (float): Default value is 0.0.
        weight_decay (float): Default value is 0.0.
        nesterov (bool): Default is False.

    Return:
        accum_t (tvm.tensor.Tensor): updated accum with same type and shape as accum.
        stat_t (tvm.tensor.Tensor): updated stat with same type and shape as stat.
        parameters_t (tvm.tensor.Tensor): updated parameters with same type and shape as parameters.

    """
    if nesterov and dampening != 0:
        raise ValueError("Nesterov requires zero dampening!")
    if weight_decay < 0:
        raise ValueError("weight_decay must > 0.")

    # shape check
    utils.elemwise_shape_check(parameters.shape, gradient.shape)
    utils.elemwise_shape_check(parameters.shape, accum.shape)
    utils.elemwise_shape_check(parameters.shape, stat.shape)

    # dtype check
    utils.ops_dtype_check([parameters.dtype, gradient.dtype, accum.dtype, stat.dtype],
                            utils.DtypeForDavinci.ALL_FLOAT)

    parameters_t, accum_t, stat_t = sgd_compute(parameters, gradient, learning_rate, accum, momentum, stat, dampening,
                                                weight_decay, nesterov)
    parameters_t, binds_info = TensorUtils.inplace_set(parameters, parameters_t, "parameters_buf")
    accum_t, binds_info2 = TensorUtils.inplace_set(accum, accum_t, "accum_buf")
    stat_t, binds_info3 = TensorUtils.inplace_set(stat, stat_t, "stat_buf")
    binds_info.update(binds_info2)
    binds_info.update(binds_info3)
    attrs = {utils.BINDS: binds_info}


    return parameters_t, accum_t, stat_t, attrs
예제 #3
0
def apply_adagrad_da(var,
                     grad_accum,
                     grad_squared_accum,
                     grad,
                     lr,
                     l1,
                     l2,
                     global_step,
                     target=utils.CCE):
    """
    Update var according to the Adagrad Dual Averaging algorithm.

    grad_accum += grad
    grad_squared_accum += grad * grad
    tmp_val = Sign(grad_accum) * max(|grad_accum|-l1*global_step, 0) if l1 > 0 else grad_accum
    x_value = -1 * lr * tmp_val
    y_value = l2 * global_step * lr + sqrt(grad_squared_accum)
    var = x_value / y_value

    Args:
        var (tvm.tensor.Tensor): Input var to be updated of type float16, float32.
        grad_accum (tvm.tensor.Tensor): Accumulation of the gradients of same shape and type as var.
        grad_squared_accum (tvm.tensor.Tensor): Accumulation of the squared gradients of same shape and type as var.
        grad (tvm.tensor.Tensor): Input grad of same shape and type as var.
        lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var.
        l1 (tvm.tensor.Tensor): L1 regularization, a scalar tensor of same type as var.
        l2 (tvm.tensor.Tensor): L2 regularization, a scalar tensor of same type as var.
        global_step (tvm.tensor.Tensor): Training step number, a scalar tensor of type int32.

    Returns:
        tvm.tensor.Tensor, the updated var.
        tvm.tensor.Tensor, the updated grad_accum.
        tvm.tensor.Tensor, the updated grad_squared_accum.
    """

    _check_inputs(var, grad_accum, grad_squared_accum, grad, lr, l1, l2,
                  global_step)

    out_var, out_ga, out_gsa = _apply_adagrad_da_compute(
        var, grad_accum, grad_squared_accum, grad, lr, l1, l2, global_step)

    # reuse var, grad_accum and grad_squared_accum
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_ga, binds_info2 = TensorUtils.inplace_set(grad_accum, out_ga,
                                                  "grad_accum_buf")
    out_gsa, binds_info3 = TensorUtils.inplace_set(grad_squared_accum, out_gsa,
                                                   "grad_squared_accum_buf")
    binds_info.update(binds_info2)
    binds_info.update(binds_info3)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_ga, out_gsa, attrs
예제 #4
0
def apply_power_sign(var,
                     m,
                     grad,
                     lr,
                     logbase,
                     sign_decay,
                     beta,
                     target=utils.CCE):
    """
    Update 'var' according to the PowerSign update

    m_out = beta * m + (1 - beta) * grad
    var_out = var - lr_t * (exp(logbase * sign_decay * Sign(grad) * Sign(m_out)) * grad)

    Args:
        var (tvm.tensor.Tensor): A tensor of type float16 or float32
        m (tvm.tensor.Tensor): A tensor of same shape and type as var.
        grad (tvm.tensor.Tensor): A tensor of same shape and type as var.
        lr (tvm.tensor.Tensor): A scalar tensor of of same type as var.
        logbase (tvm.tensor.Tensor): A scalar tensor of of same type as var.
        sign_decay (tvm.tensor.Tensor): A scalar tensor of of same type as var.
        beta (tvm.tensor.Tensor): A scalar tensor of of same type as var.

    Returns:
        tvm.tensor.Tensor, updated var.
        tvm.tensor.Tensor, updated m.
    """
    # check dtypes
    utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    for i in (m, grad, lr, logbase, sign_decay, beta):
        utils.elemwise_dtype_check(var.dtype, i.dtype)

    # check shapes
    for i in (m, grad):
        utils.elemwise_shape_check(var.shape, i.shape)
    for i in (lr, logbase, sign_decay, beta):
        if tuple(get_shape(i)) != (1, ):
            raise RuntimeError(
                "lr, logbase, sign_decay and beta only support scalar tensor.")

    # compute
    out_var, out_m = _apply_power_sign_compute(var, m, grad, lr, logbase,
                                               sign_decay, beta)

    # reuse var, m
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf")
    binds_info.update(binds_info2)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_m, attrs
예제 #5
0
def apply_centered_rms_prop(var, mg, ms, mom, grad, lr, momentum, rho, epsilon):
    """
    Update `var` according to the centered RMSProp algorithm.

    out_mean_grad = decay * mg + (1-decay) * grad
    out_mean_square = decay * ms + (1-decay) * grad * grad
    out_mom = momentum * mom + lr * grad / sqrt(out_mean_square - out_mean_grad^2 + epsilon)
    out_var = var - out_mom

    Args:
        var (tvm.tensor.Tensor): Input data of type float16 or float32.
        mg (tvm.tensor.Tensor): A tensor of the same type and shape as `var`.
        ms (tvm.tensor.Tensor): A tensor of the same type and shape as `var`.
        mom (tvm.tensor.Tensor): A tensor of the same type and shape as `var`.
        grad (tvm.tensor.Tensor): A tensor of the same type and shape as `var`.
        lr (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        momentum (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        rho (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        epsilon (float): A scalar tensor of the same type as `var`.

    Returns:
        tvm.tensor.Tensor, updated var.
        tvm.tensor.Tensor, updated mean_grad.
        tvm.tensor.Tensor, updated mean_square.
        tvm.tensor.Tensor, updated mom.
    """

    vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    for i in (mg, ms, mom, lr, rho, momentum, grad):
        vc_util.elemwise_dtype_check(var.dtype, i.dtype)
    for i in (mg, ms, mom, grad):
        vc_util.elemwise_shape_check(var.shape, i.shape)
    for i in (lr, rho, momentum):
        if tuple(get_shape(i)) != (1,):
            raise RuntimeError("lr, rho and momentum only support scalar tensor.")
    if epsilon <= 0:
        raise ValueError("epsilon should be greater than 0.")

    out_var, out_mg, out_ms, out_mom = _apply_centered_rms_prop_compute(
        var, mg, ms, mom, grad, lr, momentum, rho, epsilon)
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_mg, binds_info2 = TensorUtils.inplace_set(mg, out_mg, "mg_buf")
    out_ms, binds_info3 = TensorUtils.inplace_set(ms, out_ms, "ms_buf")
    out_mom, binds_info4 = TensorUtils.inplace_set(mom, out_mom, "mom_buf")
    binds_info.update(binds_info2)
    binds_info.update(binds_info3)
    binds_info.update(binds_info4)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_mg, out_ms, out_mom, attrs
예제 #6
0
def apply_rms_prop(var,
                   ms,
                   mom,
                   grad,
                   lr,
                   momentum,
                   rho,
                   epsilon,
                   target=utils.CCE):
    """
    Updates var using the RMSProp algorithm.

    .. math::
        \\begin{array}{ll} \\\\
            \\hat{ms} &= rho \\cdot ms + (1 - rho) \\cdot grad^2 \\\\
            \\hat{mom} &= momentum \\cdot mom +
                \\frac{lr \\cdot grad}{\\sqrt{\\hat{ms} + epsilon}} \\\\
            var &= var - mom
        \\end{array}

    Args:
        var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32.
        ms (tvm.tensor.Tensor): Mean square, a tensor of same shape and type as var.
        mom (tvm.tensor.Tensor): A tensor of same shape and type as var.
        grad (tvm.tensor.Tensor): A tensor of same shape and type as var.
        lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var.
        momentum (tvm.tensor.Tensor): Coefficient for calculate new mom, 0.0 <= momentum <= 1.0.
        rho (tvm.tensor.Tensor): Coefficient for calculate new ms, 0.0 <= rho <= 1.0.
        epsilon (float): A small value to prevent division by 0.

    Returns:
        tvm.tensor.Tensor, Updated var.
        tvm.tensor.Tensor, Updated ms.
        tvm.tensor.Tensor, Updated mom.
    """

    utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    _apply_rms_prop_check(var, ms, mom, grad, lr, momentum, rho, epsilon)

    out_var, out_ms, out_mom = _apply_rms_prop_compute(var, ms, mom, grad, lr,
                                                       momentum, rho, epsilon)
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_ms, binds_info2 = TensorUtils.inplace_set(ms, out_ms, "ms_buf")
    out_mom, binds_info3 = TensorUtils.inplace_set(mom, out_mom, "mom_buf")
    binds_info.update(binds_info2)
    binds_info.update(binds_info3)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_ms, out_mom, attrs
예제 #7
0
def hpl_cholesky(a):
    attrs = {"RewriteVarTensorIdx": True}

    @script
    def func(a):
        w = a.shape[0]
        tmp = allocate((a.shape[0], ), a.dtype, "local")
        tmp_0 = allocate((a.shape[0], ), a.dtype, "local")
        tmp_1 = allocate((a.shape[0], ), a.dtype, "local")
        out_0 = allocate(a.shape, a.dtype, "local")
        out_1 = allocate(a.shape, a.dtype, "local")
        for i in range(w):
            for j in range(w):
                tmp_0[j] = a[i, i]
                tmp_1[j] = sqrt(tmp_0[j])
                tmp[j] = a[i, j] / tmp_1[j]
            for j in range(w):
                if j >= i:
                    a[i, j] = tmp[j]
                else:
                    a[i, j] = float16(0.0)
            for k in range(a.shape[0]):
                for l in range(a.shape[1]):
                    if k > i and l > i:
                        out_0[k, l] = a[i, k]
                        out_1[k, l] = out_0[k, l] * a[i, l]
                        a[k, l] = a[k, l] - out_1[k, l]
        return a

    out = func(a)
    out, binds_info = TensorUtils.inplace_set(a, out)
    attrs[utils.BINDS] = binds_info
    return out, attrs
예제 #8
0
def apply_gradient_descent(var, alpha, delta):
    """
    Update var by subtracting alpha * delta from it.

    .. math::
        var_{t} = var_{t-1} - \\alpha \\delta

    Args:
        var (tvm.tensor.Tensor): Input var of dtype float16, float32.
        alpha (tvm.tensor.Tensor): A scalar tensor of same type as input var.
        delta (tvm.tensor.Tensor): A tensor of same shape and dtype as input var.

    Returns:
        tvm.tensor.Tensor, Updated var.
    """
    # check dtypes
    vc_util.ops_dtype_check(var.dtype, vc_util.DtypeForDavinci.ALL_FLOAT)
    for i in (alpha, delta):
        vc_util.elemwise_dtype_check(var.dtype, i.dtype)

    # check shapes
    vc_util.elemwise_shape_check(var.shape, delta.shape)
    if tuple(get_shape(alpha)) != (1, ):
        raise RuntimeError("input alpha only support scalar tensor.")

    # compute
    out_var = _apply_gradient_descent_compute(var, alpha, delta)

    # reuse var
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    attrs = {utils.BINDS: binds_info}
    return out_var, attrs
예제 #9
0
def apply_ada_max(var,
                  m,
                  v,
                  grad,
                  lr,
                  beta1,
                  beta1_power,
                  beta2,
                  epsilon,
                  target=utils.CCE):
    """
    Update var according to the AdaMax algorithm.

    m_t <- beta1 * m_{t-1} + (1 - beta1) * g
    v_t <- max(beta2 * v_{t-1}, abs(g))
    variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)

    Args:
        var (tvm.tensor.Tensor): The tensor to be updated. Should be float32.
        m (tvm.tensor.Tensor): A tensor of same shape and type as var.
        v (tvm.tensor.Tensor): A tensor of same shape and type as var.
        grad (tvm.tensor.Tensor): A tensor of same shape and type as var.
        lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var.
        beta1 (tvm.tensor.Tensor): A scalar tensor of same type as var, 0.0 <= beta1 <= 1.0.
        beta1_power (tvm.tensor.Tensor): The value of :math:`beta1^t`, a scalar tensor of same type as var.
        beta2 (tvm.tensor.Tensor): A scalar tensor of same type as var, 0.0 <= beta2 <= 1.0.
        epsilon (float): A small value to prevent division by 0.

    Returns:
        tvm.tensor.Tensor, Updated var.
        tvm.tensor.Tensor, Updated m.
        tvm.tensor.Tensor, Updated v.
    """

    _check_inputs(var, m, v, grad, lr, beta1, beta1_power, beta2, epsilon)

    out_var, out_m, out_v = _apply_ada_max_compute(var, m, v, grad, lr, beta1,
                                                   beta1_power, beta2, epsilon)

    # reuse var, m and v
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf")
    out_v, binds_info3 = TensorUtils.inplace_set(v, out_v, "v_buf")
    binds_info.update(binds_info2)
    binds_info.update(binds_info3)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_m, out_v, attrs
예제 #10
0
def apply_add_sign(var,
                   m,
                   grad,
                   lr,
                   alpha,
                   sign_decay,
                   beta,
                   target=utils.CCE):
    """
    Update 'var' according to the AddSign update.

    m_out = m * beta + grad * (1 - beta)
    var_out = var - lr * (alpha + sign_decay * Sign(grad) *Sign(m)) * grad

    Args:
        var (tvm.tensor.Tensor): A tensor of type float16 or float32
        m (tvm.tensor.Tensor): A tensor of type float16 or float32
        grad (tvm.tensor.Tensor): A tensor of type float16 or float32
        lr (tvm.tensor.Tensor): A scalar tensor of type float16 or float32
        alpha (tvm.tensor.Tensor): A scalar tensor of type float16 or float32
        sign_decay (tvm.tensor.Tensor): A scalar tensor of type float16 or float32
        beta (tvm.tensor.Tensor): A scalar tensor of type float16 or float32

    Returns:
        tvm.tensor.Tensor, updated var.
        tvm.tensor.Tensor, updated m.
    """

    utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    for i in (m, lr, alpha, sign_decay, beta, grad):
        utils.elemwise_dtype_check(var.dtype, i.dtype)
    for i in (m, grad):
        utils.elemwise_shape_check(var.shape, i.shape)
    for i in (lr, alpha, sign_decay, beta):
        if tuple(get_shape(i)) != (1, ):
            raise RuntimeError(
                "lr, alpha, sign_decay and beta only support scalar.")

    out_var, out_m = _apply_add_sign_compute(var, m, grad, lr, alpha,
                                             sign_decay, beta)

    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_m, binds_info2 = TensorUtils.inplace_set(m, out_m, "m_buf")
    binds_info.update(binds_info2)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_m, attrs
예제 #11
0
def apply_adadelta(var, accum, accum_update, grad, lr, rho, epsilon, target=utils.CCE):
    """
    Update var according to the adadelta scheme.

    accum = rho * accum + (1 - rho) * grad^2
    update = sqrt(accum_update + epsilon).sqrt() / sqrt(accum + epsilon) * grad
    accum_update = rho * accum_update + (1 - rho) * update^2
    var -= update * lr

    Args:
        var (tvm.tensor.Tensor): The tensor to be updated. Should be float32.
        accum (tvm.tensor.Tensor): The accumulate gradient, a tensor of same shape and type as var.
        accum_update (tvm.tensor.Tensor): The accumulate updates, tensor of same shape and type as var.
        grad (tvm.tensor.Tensor): A tensor of same shape and type as var.
        lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var.
        rho (tvm.tensor.Tensor): Coefficient for calculate new accum, 0.0 <= rho <= 1.0.
        epsilon (float): A small value to prevent division by 0.

    Returns:
        tvm.tensor.Tensor, Updated var.
        tvm.tensor.Tensor, Updated accum.
        tvm.tensor.Tensor, Updated accum_update.
    """

    _check_inputs(var, accum, accum_update, grad, lr, rho, epsilon)

    out_var, out_accum, out_accum_update = _apply_adadelta_compute(var, accum, accum_update, grad, lr, rho, epsilon)

    # reuse var, accum and accum_update
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_accum, binds_info2 = TensorUtils.inplace_set(accum, out_accum, "accum_buf")
    out_accum_update, binds_info3 = TensorUtils.inplace_set(accum_update, out_accum_update, "accum_update_buf")
    binds_info.update(binds_info2)
    binds_info.update(binds_info3)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_accum, out_accum_update, attrs
예제 #12
0
def inplace_operate_bind(in_tensors, out_tensors, inplace_binds):
    """
    Some tensor need to be calculate inplace.

    Args:
        in_tensors (Union[list, tuple]): Origin input tensors.
        out_tensors (Union[list, tuple]): Origin output tensors.
        inplace_binds (tuple): Should be a tuple of tuples, the first value
                               of each element is input tensor index, the
                               second is output tensor index,
                               consist (in_id, out_id),
                               meanning out_id output tensor is inplace
                               update to in_id input tensor.
    Returns:
        Two elements tuple, one for output tensors, the other for tensor bind relations.
    """

    for in_id, out_id in inplace_binds:
        if in_id >= len(in_tensors) or out_id >= len(out_tensors):
            raise RuntimeError("Inplace binds is invalid, while there are {} "
                               "input tensors and {} output tensors, but get "
                               "bind {}.".format(len(in_tensors),
                                                 len(out_tensors),
                                                 inplace_binds))

    out_tensors = list(out_tensors)
    tensor_binds = {}
    inplaced_tensors = []

    for i, bind in enumerate(inplace_binds):
        in_tensor = in_tensors[bind[0]]
        out_tensor = out_tensors[bind[1]]
        out_tensor, binds_info = TensorUtils.inplace_set(
            in_tensor, out_tensor, buffer_name="inp_buf_{}".format(i))
        tensor_binds.update(binds_info)
        # Caculation is updated inplace in input tensor. But Mindspore
        # needs a related fake tensor(never use) in output list...
        out_tensor_shape = out_tensor.shape

        fake_tensor = akg.tvm.compute(
            out_tensor_shape,
            lambda *index, o_tensor=out_tensor: o_tensor(*index),
            name="fake_tensor_{}".format(i))

        out_tensors[bind[1]] = fake_tensor
        inplaced_tensors.append(out_tensor)

    return (tuple(out_tensors + inplaced_tensors), tensor_binds)
예제 #13
0
def ClearZero(data, target=utils.CCE):
    """
    Sets all elements in tensor to zero.

    Args:xiasn
         data (tvm.tensor.Tensor): Tensor needs to be cleared to zero.

    Returns:
         out: tvm.tensor.Tensor will all elements with value zero.
         attrs: dict.
    """

    shape = [x for x in data.shape]

    zero = akg.tvm.const(0, data.dtype)
    out = akg.tvm.compute(shape, lambda *i: zero, "out")
    out, binds_info = TensorUtils.inplace_set(data, out)
    attrs = {utils.BINDS: binds_info}
    return out, attrs
예제 #14
0
def Assign(ref, val, target=utils.CUDA):
    """
    Assign val to ref.

    Args:
        ref: Tensor, which is mutable.
        val: Tensor, which will be assigned to ref.

    Returns:
        fake_output: Tensor, all zeros has the same shape as ref, needed by ME.
        ref_val: Tensor, ref assigned with val.
        attrs: Dictionary, indicates that ref and ref_val share the same buf.
    
    Supported Platforms:
        'Ascend', 'GPU', 'CPU'
    """
    utils.check_supported_target(target)
    dtype = val.dtype
    utils.ops_dtype_check(dtype, [
        utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT8,
        utils.DtypeForDavinci.INT16, utils.DtypeForDavinci.INT32,
        utils.DtypeForDavinci.INT64, utils.DtypeForDavinci.UINT8,
        utils.DtypeForDavinci.UINT16, utils.DtypeForDavinci.UINT32,
        utils.DtypeForDavinci.UINT64
    ])
    shape1 = [x.value for x in ref.shape]
    shape2 = [x.value for x in val.shape]
    if shape1 != shape2:
        raise RuntimeError("assign operations need input shape equal!")
    utils.check_shape(shape2)
    ref_val = akg.tvm.compute(shape2,
                              lambda *indice: val(*indice),
                              name="ref_val")
    ref_val, binds_info = TensorUtils.inplace_set(ref, ref_val)
    attrs = {utils.BINDS: binds_info}
    fake_output = akg.tvm.compute(ref.shape,
                                  lambda *indice: ref_val(*indice),
                                  name="fake_output")

    return fake_output, ref_val, attrs
def apply_proximal_gradient_descent(var, alpha, l1, l2, delta, target=utils.CCE):
    """
    The FOBOS algorithm with fixed learning rate.

    Note:
        prox_var = var - alpha * delta
        if l1 > 0:
            var_new = Sign(prox_var)/(1+alpha*l2) * max{|prox_var|-alpha*l1,0}
        else:
            var_new = prox_var/(1+alpha*l2)

    Args:
        var (tvm.tensor.Tensor): The tensor to be updated. Should be float16 or float32.
        alpha (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        l1 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        l2 (tvm.tensor.Tensor): A scalar tensor of the same type as `var`.
        delta (tvm.tensor.Tensor): A tensor of same shape and type as var.

    Returns:
        tvm.tensor.Tensor, updated var.
    """

    # check_shape
    utils.check_shape(var)
    shape = get_shape(var)
    utils.elemwise_shape_check(shape, delta.shape)
    sclar_shape = (1,)
    for sclar in (alpha, l1, l2):
        utils.elemwise_shape_check(sclar.shape, sclar_shape)

    # check dtype
    dtype = var.dtype
    utils.ops_dtype_check(dtype, [utils.DtypeForDavinci.FLOAT16, utils.DtypeForDavinci.FLOAT32])
    for tensor in (var, alpha, l1, l2, delta):
        utils.elemwise_dtype_check(tensor.dtype, dtype)

    var_new = apply_proximal_gradient_descent_impl(var, alpha, l1, l2, delta)
    var_new, binds_info = TensorUtils.inplace_set(var, var_new, "var_buf")
    attrs = {utils.BINDS: binds_info}
    return var_new, attrs
예제 #16
0
def hpl_trsm(a, b):
    attrs = {"RewriteVarTensorIdx": True}

    @script
    def func(a, b):
        inverse_0 = allocate(b.shape, b.dtype, "local")
        row = b.shape[0]
        col = b.shape[1]
        for l in range(col // 16):
            for i in serial(row):
                for j in serial(i):
                    for k in range(16):
                        inverse_0[i, l*16+k] = a[i, j] * b[j, l*16+k]
                        b[i, l*16+k] = b[i, l*16+k] - inverse_0[i, l*16+k]
                for k in range(16):
                    b[i, l*16+k] = b[i, l*16+k] / a[i, i]
        return b

    out = func(a, b)
    out, binds_info = TensorUtils.inplace_set(b, out)
    attrs[utils.BINDS] = binds_info
    return out, attrs
예제 #17
0
파일: hpl_lu.py 프로젝트: mindspore-ai/akg
def hpl_lu(a):
    attrs = {"RewriteVarTensorIdx": True}

    @script
    def func(a):
        out_0 = allocate(a.shape, a.dtype, "local")
        out_1 = allocate(a.shape, a.dtype, "local")
        for i in range(a.shape[0]):
            for j in range(a.shape[1]):
                if j > i:
                    a[j, i] = a[j, i] / a[i, i]
            for k in range(a.shape[0]):
                for l in range(a.shape[1]):
                    if k > i and l > i:
                        out_0[k, l] = a[k, i]
                        out_1[k, l] = out_0[k, l] * a[i, l]
                        a[k, l] = a[k, l] - out_1[k, l]
        return a

    out = func(a)
    out, binds_info = TensorUtils.inplace_set(a, out)
    attrs[utils.BINDS] = binds_info
    return out, attrs
예제 #18
0
파일: assign_add.py 프로젝트: zhuyawen/akg
def assign_add(data, value):
    """
    Computes data + value elementwise.

    Note:
        Only supports broadcast on input tensor value.

    Args:
        data (tvm.tensor.Tensor): Data tensor.
        value (tvm.tensor.Tensor): Value tensor, broadcast is allowed.

    Returns:
        fake_output: Invalid value, just to suit for framework.
        res: assign add result, tvm.tensor.Tensor, with same type and shape as input tensor data.
        attrs: dict.
    """
    input_shape = [x.value for x in data.shape]
    value_shape = [x.value for x in value.shape]

    if len(input_shape) < len(value_shape):
        raise RuntimeError("Do not support broadcast on input tensor data!")

    for i in range(len(value_shape)):
        if input_shape[len(input_shape) - i -
                       1] < value_shape[len(value_shape) - i - 1]:
            raise RuntimeError("Only support on input tensor value!")

    # broadcast adds extra compute and stage, avoid by checking the shapes before hand
    if len(value_shape) < len(input_shape) or value_shape != input_shape:
        broadcasted_value = akg.topi.broadcast_to(value, input_shape)
        res = akg.lang.cce.vadd(data, broadcasted_value)
    else:
        res = akg.lang.cce.vadd(data, value)
    res, binds_info = TensorUtils.inplace_set(data, res)
    attrs = {utils.BINDS: binds_info}
    return res, attrs
예제 #19
0
def ApplyMomentum(weight,
                  grad,
                  accum,
                  lr_mat,
                  momt_mat,
                  use_nesterov=False,
                  grad_scale=1.0,
                  target=utils.CCE):
    """
    Apply momentum operator.

    Note:
        apply mometum is an op with inplace computing and binds is used.

    Args:
        weight (tvm.tensor.Tensor): weight tensor to be updated.
        grad (tvm.tensor.Tensor): gradient tensor.
        accum (tvm.tensor.Tensor): accum tensor to be updated.
        lr_mat (tvm.tensor.Tensor): tensor with shape (1,).
        momt_mat (tvm.tensor.Tensor): momt_mat tensor with shape (1,).
        use_nesterov (bool): Default value is False.
        grad_scale (float): Default value is 1.0

    Returns:
        fake_output: Invalid value, just suit for framework.
        accum_inplace: tvm.tensor.Tensor, updated accum.
        weight_inplace: tvm.tensor.Tensor, updated weight.
        atts: dict.
    """
    shape = [x.value for x in weight.shape]
    # shape check
    utils.elemwise_shape_check(weight.shape, grad.shape)
    utils.elemwise_shape_check(weight.shape, accum.shape)
    # dtype check
    utils.ops_dtype_check([weight.dtype, grad.dtype, accum.dtype],
                          utils.DtypeForDavinci.ALL_FLOAT)

    grad = akg.tvm.compute(
        shape,
        lambda *indice: grad(*indice) * akg.tvm.const(grad_scale, grad.dtype),
        name="grad")
    momt_accum = akg.tvm.compute(shape,
                                 lambda *indice: accum(*indice) * momt_mat[0],
                                 name="momt_accum")
    accum_inplace = akg.tvm.compute(
        shape,
        lambda *indice: momt_accum(*indice) + grad(*indice),
        name="accum_inplace")

    if not use_nesterov:
        sum_grad = akg.tvm.compute(
            shape,
            lambda *indice: accum_inplace(*indice) * lr_mat[0],
            name="nesterov_lr")
        weight_inplace = akg.tvm.compute(
            shape,
            lambda *indice: weight(*indice) - sum_grad(*indice),
            name="weight_inplace")
    else:
        weight_inplace = akg.tvm.compute(
            shape,
            lambda *indice: weight(*indice) - grad(*indice) * lr_mat[
                0] - accum_inplace(*indice) * momt_mat[0] * lr_mat[0],
            name="weight_inplace")
    weight_inplace, weight_binds_info = TensorUtils.inplace_set(
        weight, weight_inplace, "data_buf")
    accum_inplace, accum_binds_info = TensorUtils.inplace_set(
        accum, accum_inplace, "accum_buf")
    binds_info_all = weight_binds_info
    binds_info_all.update(accum_binds_info)
    attrs = {utils.BINDS: binds_info_all}
    fake_output = akg.tvm.compute(shape,
                                  lambda *indice: momt_accum(*indice),
                                  name="fake_output")
    # The variable fake_ouput is a invalid value, just to suit for framework of ME !
    # The variable weight_inplace is the updated value of weight .
    # The variable accum_inplace is the updated value of accum .
    return fake_output, accum_inplace, weight_inplace, attrs
예제 #20
0
def scatter_add(ref, indices, updates):
    """
    Add ref with updates based on sparse index: indices.

    Note:
        updates.shape need equal to indices.shape + ref.shape[1:].

    Args:
        ref (tvm.tensor.Tensor): Tensor of type float16, float32, int8, int32 and uint8.
        indices (tvm.tensor.Tensor): Tensor of type int32.
        updates (tvm.tensor.Tensor): Tensor has the same type as ref.

    Returns:
        tvm.tensor.Tensor, has the same type and shape as ref.

    """
    shape_ref = get_shape(ref)
    shape_indices = get_shape(indices)
    shape_updates = get_shape(updates)

    utils.check_shape(shape_ref)
    utils.check_shape(shape_indices)
    utils.check_shape(shape_updates)
    utils.ops_dtype_check(
        [ref.dtype, updates.dtype],
        [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32])

    utils.ops_dtype_check(indices.dtype, utils.DtypeForDavinci.INT32)
    new_shape_indices = (reduce(lambda x, y: x * y, shape_indices), )
    if len(shape_ref) > 1:
        new_shape_ref = (shape_ref[0], reduce(lambda x, y: x * y,
                                              shape_ref[1:]))
        new_indices = topi.reshape(indices, new_shape_indices)
        new_updates_shape = (tuple(new_indices.shape) +
                             tuple(new_shape_ref[1:]))
        new_updates = topi.reshape(updates, new_updates_shape)
        new_ref = topi.reshape(ref, new_shape_ref)
    else:
        new_indices = topi.reshape(indices, new_shape_indices)
        new_updates_shape = (tuple(new_indices.shape) + tuple(shape_ref[1:]))
        new_updates = topi.reshape(updates, new_updates_shape)
        new_ref = ref

    # 1D case hybrid
    @script
    def scatter_add_1d(input, input_indices, input_updates):
        n, = input.shape
        idx_len = input_indices.shape[0]
        for i in range(n):
            for idx in range(idx_len):
                if i == input_indices[idx]:
                    input[input_indices[idx]] += input_updates[idx]
        return input

    # ND case reshape to 2D's hybrid, now 2D -- 5D are OK
    @script
    def scatter_add(input, input_indices, input_updates):
        n, h = input.shape
        idx_len = input_indices.shape[0]
        for i in range(n):
            for idx in range(idx_len):
                if i == input_indices[idx]:
                    for j in range(h):
                        input[input_indices[idx], j] += input_updates[idx, j]
        return input

    if len(shape_ref) == 1:
        out = scatter_add_1d(new_ref, new_indices, new_updates)
    else:
        out = scatter_add(new_ref, new_indices, new_updates)
        out = topi.reshape(out, shape_ref)
        attrs["enable_feature_library"] = True
    out, binds_info = TensorUtils.inplace_set(ref, out)
    attrs[utils.BINDS] = binds_info

    return out, attrs