Beispiel #1
0
    def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False,
                 use_nesterov=False, weight_decay=0.0, loss_scale=1.0):
        super(LazyAdam, self).__init__(learning_rate, params, weight_decay, loss_scale)
        _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name)
        validator.check_value_type("use_locking", use_locking, [bool], self.cls_name)
        validator.check_value_type("use_nesterov", use_nesterov, [bool], self.cls_name)

        self.beta1 = Tensor(beta1, mstype.float32)
        self.beta2 = Tensor(beta2, mstype.float32)
        self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power")
        self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power")
        self.eps = Tensor(eps, mstype.float32)
        self.use_nesterov = use_nesterov
        self.use_locking = use_locking
        self._is_device = True
        self.moment1 = self.parameters.clone(prefix="moment1", init='zeros')
        self.moment2 = self.parameters.clone(prefix="moment2", init='zeros')

        self.hyper_map = C.HyperMap()
        self.opt = P.Adam(use_locking, use_nesterov)
        self.sparse_opt = P.FusedSparseLazyAdam(use_locking, use_nesterov)
        self.sparse_opt.add_prim_attr("primitive_target", "CPU")
        self._ps_pull = P.Pull()
        self._ps_push = P.Push("Adam", [0, 1, 2])
        self._ps_push.add_prim_attr("use_nesterov", use_nesterov)
Beispiel #2
0
 def __init__(self, params, initial_accum=0.1, learning_rate=0.001, lr_power=-0.5, l1=0.0, l2=0.0,
              use_locking=False, loss_scale=1.0, weight_decay=0.0):
     super(FTRL, self).__init__(learning_rate, params, weight_decay, loss_scale=loss_scale)
     if self.dynamic_lr or self.is_group_lr:
         raise ValueError('Dynamic learning rate or group learning rate is currently not supported.')
     _check_param(initial_accum, lr_power, l1, l2, use_locking, self.cls_name)
     self.moments = self.parameters.clone(prefix="moments", init=initial_accum)
     self.linear = self.parameters.clone(prefix="linear", init='zeros')
     self.l1 = l1
     self.l2 = l2
     self.lr = learning_rate
     self.lr_power = lr_power
     if not self.is_group:
         self.decay_flags = tuple((lambda: True)() for x in self.parameters)
     self.hyper_map = C.HyperMap()
     self.opt = P.ApplyFtrl(use_locking=use_locking)
     self.use_locking = use_locking
     self.sparse_opt = P.SparseApplyFtrl(learning_rate, l1, l2, lr_power, use_locking=use_locking)
     self._ps_pull = P.Pull()
     self._ps_push = P.Push("Ftrl", [0, 1, 2])
     self._ps_push.add_prim_attr("init_accum", initial_accum)
     self._ps_push.add_prim_attr("lr", learning_rate)
     self._ps_push.add_prim_attr("l1", l1)
     self._ps_push.add_prim_attr("l2", l2)
     self._ps_push.add_prim_attr("lr_power", lr_power)
Beispiel #3
0
def _run_opt_with_sparse(opt, sparse_opt, beta1_power, beta2_power, beta1,
                         beta2, eps, lr, gradient, params, moment1, moment2,
                         ps_parameter):
    """Apply sparse adam optimizer to the weight parameter when the gradient is sparse."""
    success = True
    indices = gradient.indices()
    values = gradient.values()
    if ps_parameter:
        op_shape = P.Shape()
        _ps_pull = P.Pull()
        _ps_push = P.Push("Adam", [0, 1, 2])
        shapes = (op_shape(params), op_shape(moment1), op_shape(moment2),
                  op_shape(beta1_power), op_shape(beta2_power), op_shape(lr),
                  op_shape(beta1), op_shape(beta2), op_shape(eps),
                  op_shape(values), op_shape(indices))
        success = F.depend(
            success,
            _ps_pull(
                _ps_push((beta1_power, beta2_power, lr, beta1, beta2, eps,
                          values, indices), shapes), params))
    else:
        success = F.depend(
            success,
            sparse_opt(params, moment1, moment2, beta1_power, beta2_power, lr,
                       beta1, beta2, eps, values, indices))
    return success
Beispiel #4
0
    def __init__(self,
                 params,
                 initial_accum=0.1,
                 learning_rate=0.001,
                 lr_power=-0.5,
                 l1=0.0,
                 l2=0.0,
                 use_locking=False,
                 loss_scale=1.0,
                 weight_decay=0.0):
        super(PSFTRL, self).__init__(learning_rate,
                                     params,
                                     loss_scale=loss_scale)
        if self.is_group:
            raise RuntimeError(
                f"The {self.cls_name} optimizer cannot support group setting.")
        _check_param(initial_accum, lr_power, l1, l2, use_locking,
                     self.cls_name)
        self.moments = self.parameters.clone(prefix="moments",
                                             init=initial_accum)
        self.linear = self.parameters.clone(prefix="linear", init='zeros')
        self.l1 = l1
        self.l2 = l2
        self.lr_power = lr_power
        self.weight_decay = weight_decay
        self.decay_tf = tuple((lambda: True)() for x in self.parameters)

        self.hyper_map = C.HyperMap()
        self.push = P.Push("Ftrl", [0, 1, 2])
        self.push.add_prim_attr("primitive_target", "CPU")
        self.pull = P.Pull()
        self.pull.add_prim_attr("primitive_target", "CPU")
Beispiel #5
0
def _tensor_run_opt_ext(opt, momentum, learning_rate, gradient, weight, moment, ps_parameter):
    """Apply momentum optimizer to the weight parameter using Tensor."""
    success = True
    if ps_parameter:
        op_shape = P.Shape()
        _ps_pull = P.Pull()
        _ps_push = P.Push("ApplyMomentum", [])
        shapes = (op_shape(learning_rate), op_shape(gradient), op_shape(momentum))
        success = F.depend(success, _ps_pull(_ps_push((learning_rate, gradient, momentum), shapes), weight))
    else:
        success = F.depend(success, opt(weight, moment, learning_rate, gradient, momentum))
    return success
Beispiel #6
0
def _run_opt_with_one_number(opt, sparse_opt, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params,
                             moment1, moment2, ps_parameter):
    """Apply adam optimizer to the weight parameter using Tensor."""
    success = True
    if ps_parameter:
        op_shape = P.Shape()
        _ps_pull = P.Pull()
        _ps_push = P.Push("Adam", [0, 1, 2])
        success = F.depend(success, _ps_pull(_ps_push((beta1_power, beta2_power, lr, beta1, beta2, eps, gradient),
                                                      (op_shape(params), op_shape(moment1), op_shape(moment2))),
                                             params))
    else:
        success = F.depend(success, opt(params, moment1, moment2, beta1_power, beta2_power, lr, beta1, beta2,
                                        eps, gradient))
    return success
Beispiel #7
0
def _tensor_run_opt_with_sparse(opt, spars_opt, l1, l2, lr_power,
                                learning_rate, linear, gradient, weight,
                                moment, ps_parameter):
    """Apply sparse ftrl optimizer to the weight parameter when the gradient is sparse."""
    success = True
    indices = gradient.indices()
    values = gradient.values()
    if ps_parameter:
        op_shape = P.Shape()
        _ps_pull = P.Pull()
        _ps_push = P.Push("Ftrl", [0, 1, 2])
        shapes = (op_shape(weight), op_shape(moment), op_shape(linear),
                  op_shape(values), op_shape(indices))
        success = F.depend(
            success, _ps_pull(_ps_push((values, indices), shapes), weight))
    else:
        success = F.depend(success,
                           spars_opt(weight, moment, linear, values, indices))
    return success
Beispiel #8
0
def _tensor_run_opt(opt, spars_opt, l1, l2, lr_power, learning_rate, linear,
                    gradient, weight, moment, ps_parameter):
    """Apply ftrl optimizer to the weight parameter."""
    success = True
    if ps_parameter:
        op_shape = P.Shape()
        _ps_pull = P.Pull()
        _ps_push = P.Push("Ftrl", [0, 1, 2])
        success = F.depend(
            success,
            _ps_pull(
                _ps_push(
                    (gradient, learning_rate, l1, l2, lr_power),
                    (op_shape(weight), op_shape(moment), op_shape(linear))),
                weight))
    else:
        success = F.depend(
            success,
            opt(weight, moment, linear, gradient, learning_rate, l1, l2,
                lr_power))
    return success