def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False, use_nesterov=False, weight_decay=0.0, loss_scale=1.0): super(Adam, self).__init__(learning_rate, params, weight_decay, loss_scale) _check_param_value(beta1, beta2, eps, self.cls_name) validator.check_value_type("use_locking", use_locking, [bool], self.cls_name) validator.check_value_type("use_nesterov", use_nesterov, [bool], self.cls_name) self.beta1 = Tensor(beta1, mstype.float32) self.beta2 = Tensor(beta2, mstype.float32) self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power") self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power") self.eps = Tensor(eps, mstype.float32) self.use_nesterov = use_nesterov self.use_locking = use_locking self.moment1 = self.parameters.clone(prefix="moment1", init='zeros') self.moment2 = self.parameters.clone(prefix="moment2", init='zeros') self._is_device = True self.hyper_map = C.HyperMap() self.opt = P.Adam(use_locking, use_nesterov) self.sparse_opt = P.FusedSparseAdam(use_locking, use_nesterov) self.sparse_opt.add_prim_attr("primitive_target", "CPU") self._ps_pull = P.Pull() self._ps_push = P.Push("Adam", [0, 1, 2]) self._ps_push.add_prim_attr("use_nesterov", use_nesterov)
def __init__(self): super(Net, self).__init__() self.fused_sparse_adam = P.FusedSparseAdam() self.var = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="var") self.m = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="m") self.v = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="v")
def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False, use_nesterov=False, weight_decay=0.0, loss_scale=1.0): super(Adam, self).__init__(learning_rate, params, weight_decay, loss_scale) _check_param_value(beta1, beta2, eps, self.cls_name) validator.check_value_type("use_locking", use_locking, [bool], self.cls_name) validator.check_value_type("use_nesterov", use_nesterov, [bool], self.cls_name) self.beta1 = Tensor(beta1, mstype.float32) self.beta2 = Tensor(beta2, mstype.float32) self.beta1_power = Parameter(initializer(1, [1], mstype.float32), name="beta1_power") self.beta2_power = Parameter(initializer(1, [1], mstype.float32), name="beta2_power") self.eps = eps self.moment1 = self.parameters.clone(prefix="moment1", init='zeros') self.moment2 = self.parameters.clone(prefix="moment2", init='zeros') self.hyper_map = C.HyperMap() self.opt = P.Adam(use_locking, use_nesterov) self.sparse_opt = P.FusedSparseAdam(use_locking, use_nesterov)
def __init__(self, var, m, v): super(FusedSparseAdamNet, self).__init__() self.fused_sparse_adam = P.FusedSparseAdam() self.var = Parameter(var, name="var") self.m = Parameter(m, name="m") self.v = Parameter(v, name="v")