def __init__(self, model, step_size=0.1, prior_std=1.): ''' log_N(θ|0,1) = :param model: :param step_size: :param norm_sigma: :param addnoise: ''' weight_decay = 1 / (prior_std ** 2) if prior_std != 0 else 0 if weight_decay < 0.0: raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) if step_size < 0.0: raise ValueError("Invalid learning rate: {}".format(step_size)) self.num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) self.A = 1. defaults = dict(step_size=step_size, weight_decay=weight_decay, traj_step=0, num_params=self.num_params, A=self.A) self.model = model params = self.model.parameters() Optimizer.__init__(self, params=params, defaults=defaults) MCMC_Optim.__init__(self)
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-6, weight_decay=0): if not 0.0 <= lr: raise ValueError("Invalid learning rate: {}".format(lr)) if not 0.0 <= eps: raise ValueError("Invalid epsilon value: {}".format(eps)) if not 0.0 <= betas[0] < 1.0: raise ValueError("Invalid beta parameter at index 0: {}".format( betas[0])) if not 0.0 <= betas[1] < 1.0: raise ValueError("Invalid beta parameter at index 1: {}".format( betas[1])) PT_Optimizer.__init__( self, params, { "lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay }, )
def __init__( self, params, optimizer_class: Type[Optimizer], process_group: Optional[Any] = None, parameters_as_bucket_view: bool = False, **defaults: Any, ): # Perform type and assumption checks on the input parameters self._verify_and_init_params(params) self._verify_same_dense_param_type() # NOTE: The parent constructor uses `add_param_group()` which is # partially overloaded in ZeroRedundancyOptimizer, so we use the # `initialized` flag to dissociate the behaviour of `add_param_group()` # between the parent and child. self.initialized = False Optimizer.__init__(self, self._all_params, defaults) _Joinable.__init__(self) # Now, all parameters are held in both `self._all_params` and # `self.param_groups` # Partition information (evaluated lazily) self._param_to_rank_cache: Dict[torch.Tensor, int] = {} self._param_to_index_cache: Dict[torch.Tensor, int] = {} self._partition_parameters_cache: List[List[Dict]] = [] self._index_to_param_cache: List[torch.Tensor] = [] self._device_to_per_rank_params_cache: Dict[ torch.device, List[List[torch.Tensor]]] = {} # Default device for collective communication and buckets self._default_device = self._all_params[0].device self.process_group = process_group if process_group is not None else dist.group.WORLD self.world_size = dist.get_world_size(self.process_group) self.rank = dist.get_rank(self.process_group) self.global_rank = _get_global_rank(self.process_group, self.rank) self._optim_defaults = defaults self._optim_constructor = optimizer_class self._init_local_optimizer() self.parameters_as_bucket_view = parameters_as_bucket_view self._is_trainable_mask = self._get_is_trainable_mask() self._buckets: List[List[torch.Tensor]] = [] self._build_param_buckets() # Optional consolidated optimizer state, only populated if this rank # is the target in `consolidate_state_dict()` self._all_state_dicts: List[Dict[str, Any]] = [] self.initialized = True
def __init__(self, model, step_length): if step_length < 0.0: raise ValueError("Invalid learning rate: {}".format(step_length)) defaults = dict(lr=step_length) params = model.parameters() self.model = model Optimizer.__init__(self, params=params, defaults=defaults) MCMC_Optim.__init__(self)
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): defaults = { "lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay } self.buffer = [[None, None, None] for ind in range(10)] PT_Optimizer.__init__(self, params, defaults)
def __init__(self, model, step_size=0.1, prior_std=1., addnoise=True): ''' log_N(θ|0,1) = :param model: :param step_size: :param norm_sigma: :param addnoise: ''' weight_decay = 1 / (prior_std ** 2) if prior_std!=0 else 0 if weight_decay < 0.0: raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) if step_size < 0.0: raise ValueError("Invalid learning rate: {}".format(step_size)) defaults = dict(step_size=step_size, weight_decay=weight_decay, addnoise=addnoise) self.model = model params = self.model.parameters() Optimizer.__init__(self, params=params, defaults=defaults) MCMC_Optim.__init__(self)