Example #1
0
def assign_sub(var, value, out, kernel_name='assign_sub'):
    """
    Update var by subtracting value from it.

    Parameters:
    ----------
    var : dict
        dict of input_var, include shape and dtype,
        dtype support int8, uint8, int32, float16, float32

    value : dict
        dict of input_value, include shape and dtype,
        dtype support int8, uint8, int32, float16, float32.
        Must have the same shape and dtype as input_var

    out : dict
        dict of out

    kernel_name : str
        cce kernel name, default value is "assign_sub"

    Returns
    -------
    None
    """

    # get the shape and dtype
    shape_var = var.get("shape")
    shape_value = value.get("shape")
    dtype_var = var.get("dtype")
    dtype_value = value.get("dtype")

    # kernel name check: should be unique

    # check whether the shape is right
    check_shape(shape_var, param_name="var")
    check_shape(shape_value, param_name="value")
    if not operator.eq(shape_var, shape_value):
        raise RuntimeError("all input shape must be the equal")

    # check whether dtypes are fp16, fp32, int8, uint8, int32
    # and whether they are the same
    check_list = ("float16", "float32", "int8", "uint8", "int32")
    check_dtype(dtype_var, check_list, param_name="var")
    check_dtype(dtype_value, check_list, param_name="value")
    dtype_var = dtype_var.lower()
    dtype_value = dtype_value.lower()
    if dtype_var != dtype_value:
        raise RuntimeError("all input dtype must be same")

    shape, _ = refine_shape_axes(shape_var, [])
    data_var = tvm.placeholder(shape, dtype=dtype_var, name='data_var')
    data_value = tvm.placeholder(shape, dtype=dtype_value, name='data_value')
    sch, res = _assign_sub_compute(data_var, data_value, out, kernel_name)

    with set_bool_storage_config():
        tvm.build(sch, [data_var, data_value, res], "cce", name=kernel_name)
Example #2
0
def apply_proximal_gradient_descent(
        var,
        alpha,
        l1,
        l2,
        delta,
        out,
        kernel_name="apply_proximal_gradient_descent"):
    """
    Update '*var' as FOBOS algorithm with fixed learning rate..

    prox_v = var - alpha * delta
    var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}

    Parameters:
    ----------
    var: the dict of var, only support float16, float32
    alpha: the dict of alpha, only support float16, float32
    l1: the dict of l1, only support float16, float32
    l2: the dict of l2, only support float16, float32
    delta: the dict of delta, only support float16, float32
    out: the dict of output, only support float16, float32

    kernel_name : cce kernel name, default value is
        "apply_proximal_gradient_descent"

    Returns
    -------
    None
    """

    check_list = ('float16', 'float32')
    dtype = var.get('dtype')
    check_dtype(dtype, check_list, param_name="var")
    dtype = dtype.lower()

    input_dict = (var, alpha, l1, l2, delta)

    args = ApplyOpConfig.TensorArgs(input_dict,
                                    apply_proximal_gradient_descent_compute,
                                    out, 5 if dtype == 'float32' else 10)
    name = ApplyOpConfig.TensorName(all=('var', 'alpha', 'l1', 'l2', 'delta'),
                                    scalar=('alpha', 'l1', 'l2'),
                                    reuse=('var', ))
    options = ApplyOpConfig.TensorOptions(build=set_bool_storage_config())

    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
Example #3
0
def apply_add_sign_d(var,
                     m,
                     lr,
                     alpha,
                     sign_decay,
                     beta,
                     grad,
                     var_out,
                     m_out,
                     kernel_name="apply_add_sign_d"):
    """
    Update '*var' according to the AddSign update.

    m_t <- beta1 * m_{t-1} + (1 - beta1) * g
    update <- (alpha + sign_decay * sign(g) *sign(m)) * g
    variable <- variable - lr_t * update

    Parameters:
    ----------
    var: the dict of var, support float16, float32
    m: the dict of m, support float16, float32
    lr: the dict of lr, support float16, float32
    alpha: the dict of alpha, support float16, float32
    sign_decay: the dict of sign_decay, support float16, float32
    beta: the dict of beta, support float16, float32
    grad: the dict of grad, support float16, float32
    var_out: the dict of var output data
    m_out: the dict of m output data
    otherwise the behavior is undefined, but may exhibit less contention.
    kernel_name : cce kernel name, default value is "apply_add_sign_d"

    Returns
    -------
    None
    """

    input_dict = (var, m, lr, alpha, sign_decay, beta, grad)
    out = [var_out, m_out]
    args = ApplyOpConfig.TensorArgs(input_dict, apply_add_sign_d_compute, out,
                                    10)
    name = ApplyOpConfig.TensorName(all=('var', 'm', 'lr', 'alpha',
                                         'sign_decay', 'beta', 'grad'),
                                    scalar=('lr', 'alpha', 'sign_decay',
                                            'beta'),
                                    reuse=('var', 'm'))
    options = ApplyOpConfig.TensorOptions(build=set_bool_storage_config())
    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
Example #4
0
def apply_ftrl_d(var,
                 accum,
                 linear,
                 grad,
                 lr,
                 l1,
                 l2,
                 lr_power,
                 var_out,
                 accum_out,
                 linear_out,
                 kernel_name="apply_ftrl_d"):
    """
    Update '*var' according to the Ftrl-proximal algorithm.
    accum_new = accum + grad * grad
    linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
    quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
    var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
    accum = accum_new

    Parameters:
    ----------
    var : the dict of mutable tensor var, only support float16, float32

    accum : the dict of mutable tensor accum.
        Must have the same data type as `var`.

    linear : the dict of mutable tensor linear.
        Must have the same data type as `var`.

    grad : the dict of tensor grad. Must have the same data type as `var`.

    lr : the dict of scalar lr. Must have the same data type as `var`.

    l1 : the dict of scalar l1. Must have the same data type as `var`.

    l2 : the dict of scalar l2. Must have the same data type as `var`.

    lr_power : the dict of scalar lr_power.
        Must have the same data type as `var`.

    var_out: the dict of var output data.

    accum_out: the dict of accum output data.

    linear_out: the dict of linear output data

    kernel_name : cce kernel name, default value is "apply_ftrl_d".

    Returns
    -------
    None
    """

    input_dict = (var, accum, linear, grad, lr, l1, l2, lr_power)
    out = [var_out, accum_out, linear_out]
    args = ApplyOpConfig.TensorArgs(input_dict, apply_ftrl_d_compute, out, 15)
    name = ApplyOpConfig.TensorName(all=('var', 'accum', 'linear', 'grad',
                                         'lr', 'l1', 'l2', 'lr_power'),
                                    scalar=('lr', 'l1', 'l2', 'lr_power'),
                                    reuse=('var', 'accum', 'linear'))
    options = ApplyOpConfig.TensorOptions(build=set_bool_storage_config())

    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
Example #5
0
def apply_ftrl_v2_d(var,
                    accum,
                    linear,
                    grad,
                    lr,
                    l1,
                    l2,
                    l2_shrinkage,
                    lr_power,
                    var_out,
                    accum_out,
                    linear_out,
                    use_locking=False,
                    kernel_name="apply_ftrl_v2_d"):
    """
    Update '*var' according to the Ftrl-proximal algorithm.

    grad_with_shrinkage = grad + 2 * l2_shrinkage * var
    accum_new = accum + grad * grad
    linear += grad_with_shrinkage -
        (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
    x = l1 * linear.sign - linear
    y = accum_new^(-lr_power) / lr + 2 * l2
    var = x / y if |linear| > l1 else 0.0
    accum = accum_new

    Parameters:
    ----------
    var : the dict of mutable tensor var, only support float16, float32

    accum : the dict of mutable tensor accum.
        Must have the same data type as `var`.

    linear : the dict of mutable tensor linear.
        Must have the same data type as `var`.

    grad : the dict of tensor grad. Must have the same data type as `var`.

    lr : the dict of scalar lr. Must have the same data type as `var`.

    l1 : the dict of scalar l1. Must have the same data type as `var`.

    l2 : the dict of scalar l2. Must have the same data type as `var`.

    l2_shrinkage: the dict of scalar l2_shrinkage.
        Must have the same data type as `var`.

    lr_power : the dict of scalar lr_power.
        Must have the same data type as `var`.

    var_out : the dict of output var.

    accum_out : the dict of output accum.

    linear_out : the dict of output linear.

    use_locking : optional attr, default value is False.

    kernel_name : cce kernel name, default value is "apply_ftrl_v2_d".

    Returns
    -------
    None
    """
    input_dict = (var, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power)

    args = ApplyOpConfig.TensorArgs(input_dict, apply_ftrl_v2_d_compute,
                                    [var_out, accum_out, linear_out], 15)
    name = ApplyOpConfig.TensorName(all=('var', 'accum', 'linear', 'grad',
                                         'lr', 'l1', 'l2', 'l2_shrinkage',
                                         'lr_power'),
                                    scalar=('lr', 'l1', 'l2', 'l2_shrinkage',
                                            'lr_power'),
                                    reuse=('var', 'accum', 'linear'))
    options = ApplyOpConfig.TensorOptions(build=set_bool_storage_config())
    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
Example #6
0
def apply_adagrad_da_d(var,
                       gradient_accumulator,
                       gradient_squared_accumulator,
                       grad,
                       lr,
                       l1,
                       l2,
                       global_step,
                       var_out,
                       gradient_accumulator_out,
                       gradient_squared_accumulator_out,
                       use_locking=False,
                       kernel_name='apply_adagrad_da_d'):
    """
    Update '*var' according to the Ftrl-proximal algorithm.

    grad_accum += grad
    grad_squared_accum += grad * grad
    tmp_val=sign(grad_accum) * max⁡{|grad_accum|-l1*global_step, 0}
        if l1>0 else grad_accum
    x_value = -1 * lr * tmp_val
    y_value = l2 * global_step * lr + sqrt(grad_squared_accum)
    var = x_value / y_value

    Parameters:
    ----------
    var : the dict of mutable tensor var, only support float16, float32

    gradient_accumulator:
        the dict of mutable tensor gradient_accumulator,
        Must have the same data type as `var`.

    gradient_squared_accumulator :
        the dict of mutable tensor gradient_squared_accumulator,
        Must have the same data type as `var`.

    grad : the dict of tensor grad. Must have the same data type as `var`.

    lr : the dict of scalar lr. Must have the same data type as `var`.

    l1 : the dict of scalar l1. Must have the same data type as `var`.

    l2 : the dict of scalar l2. Must have the same data type as `var`.

    global_step : the dict of scalar global_step, only support int32.

    var_out : the dict of output.

    gradient_accumulator_out : the dict of output.

    gradient_squared_accumulator_out : the dict of output.

    use_locking : optional attr, default value is False.

    kernel_name : cce kernel name, default value is "apply_adagrad_da".

    Returns:
    -------
    None
    """
    # check dtype same
    stype_dict = (var, gradient_accumulator, gradient_squared_accumulator,
                  grad, lr, l1, l2)
    normalized_dtype_list = [None] * len(stype_dict)
    for i, d in enumerate(stype_dict):
        dtype = d.get('dtype')
        normalized_dtype_list[i] = dtype.lower()
    if any(elem != normalized_dtype_list[0] for elem in normalized_dtype_list):
        raise RuntimeError("All input data types must be the same")

    # check global_step dtype
    dtype = global_step.get("dtype").lower()
    check_dtype(dtype, ("int32", ), param_name="global_step")

    input_dict = (var, gradient_accumulator, gradient_squared_accumulator,
                  grad, lr, l1, l2, global_step)
    args = ApplyOpConfig.TensorArgs(
        input_dict, apply_adagrad_da_d_compute,
        [var_out, gradient_accumulator_out, gradient_squared_accumulator_out],
        15)
    name = ApplyOpConfig.TensorName(
        all=('var', 'gradient_accumulator', 'gradient_squared_accumulator',
             'grad', 'lr', 'l1', 'l2', 'global_step'),
        scalar=('lr', 'l1', 'l2', 'global_step'),
        reuse=('var', 'gradient_accumulator', 'gradient_squared_accumulator'))
    options = ApplyOpConfig.TensorOptions(build=set_bool_storage_config(),
                                          dtype=('float16', 'float32',
                                                 'int32'))
    common_apply_op_process(ApplyOpConfig(args, name, options),
                            kernel_name,
                            same_flag=False)