Beispiel #1
0
	def __init__(self, model, step_size=0.1, prior_std=1.):
		'''
		log_N(θ|0,1) =
		:param model:
		:param step_size:
		:param norm_sigma:
		:param addnoise:
		'''

		weight_decay = 1 / (prior_std ** 2) if prior_std != 0 else 0
		if weight_decay < 0.0:
			raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
		if step_size < 0.0:
			raise ValueError("Invalid learning rate: {}".format(step_size))

		self.num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
		self.A = 1.

		defaults = dict(step_size=step_size,
				weight_decay=weight_decay,
				traj_step=0,
				num_params=self.num_params,
				A=self.A)

		self.model = model
		params = self.model.parameters()

		Optimizer.__init__(self, params=params, defaults=defaults)
		MCMC_Optim.__init__(self)
Beispiel #2
0
 def __init__(self,
              params,
              lr=1e-3,
              betas=(0.9, 0.999),
              eps=1e-6,
              weight_decay=0):
     if not 0.0 <= lr:
         raise ValueError("Invalid learning rate: {}".format(lr))
     if not 0.0 <= eps:
         raise ValueError("Invalid epsilon value: {}".format(eps))
     if not 0.0 <= betas[0] < 1.0:
         raise ValueError("Invalid beta parameter at index 0: {}".format(
             betas[0]))
     if not 0.0 <= betas[1] < 1.0:
         raise ValueError("Invalid beta parameter at index 1: {}".format(
             betas[1]))
     PT_Optimizer.__init__(
         self,
         params,
         {
             "lr": lr,
             "betas": betas,
             "eps": eps,
             "weight_decay": weight_decay
         },
     )
Beispiel #3
0
    def __init__(
        self,
        params,
        optimizer_class: Type[Optimizer],
        process_group: Optional[Any] = None,
        parameters_as_bucket_view: bool = False,
        **defaults: Any,
    ):
        # Perform type and assumption checks on the input parameters
        self._verify_and_init_params(params)
        self._verify_same_dense_param_type()

        # NOTE: The parent constructor uses `add_param_group()` which is
        # partially overloaded in ZeroRedundancyOptimizer, so we use the
        # `initialized` flag to dissociate the behaviour of `add_param_group()`
        # between the parent and child.
        self.initialized = False

        Optimizer.__init__(self, self._all_params, defaults)
        _Joinable.__init__(self)
        # Now, all parameters are held in both `self._all_params` and
        # `self.param_groups`

        # Partition information (evaluated lazily)
        self._param_to_rank_cache: Dict[torch.Tensor, int] = {}
        self._param_to_index_cache: Dict[torch.Tensor, int] = {}
        self._partition_parameters_cache: List[List[Dict]] = []
        self._index_to_param_cache: List[torch.Tensor] = []
        self._device_to_per_rank_params_cache: Dict[
            torch.device, List[List[torch.Tensor]]] = {}

        # Default device for collective communication and buckets
        self._default_device = self._all_params[0].device

        self.process_group = process_group if process_group is not None else dist.group.WORLD
        self.world_size = dist.get_world_size(self.process_group)
        self.rank = dist.get_rank(self.process_group)
        self.global_rank = _get_global_rank(self.process_group, self.rank)

        self._optim_defaults = defaults
        self._optim_constructor = optimizer_class
        self._init_local_optimizer()

        self.parameters_as_bucket_view = parameters_as_bucket_view
        self._is_trainable_mask = self._get_is_trainable_mask()
        self._buckets: List[List[torch.Tensor]] = []
        self._build_param_buckets()

        # Optional consolidated optimizer state, only populated if this rank
        # is the target in `consolidate_state_dict()`
        self._all_state_dicts: List[Dict[str, Any]] = []

        self.initialized = True
Beispiel #4
0
	def __init__(self, model, step_length):

		if step_length < 0.0:
			raise ValueError("Invalid learning rate: {}".format(step_length))

		defaults = dict(lr=step_length)

		params = model.parameters()
		self.model = model

		Optimizer.__init__(self, params=params, defaults=defaults)
		MCMC_Optim.__init__(self)
Beispiel #5
0
 def __init__(self,
              params,
              lr=1e-3,
              betas=(0.9, 0.999),
              eps=1e-8,
              weight_decay=0):
     defaults = {
         "lr": lr,
         "betas": betas,
         "eps": eps,
         "weight_decay": weight_decay
     }
     self.buffer = [[None, None, None] for ind in range(10)]
     PT_Optimizer.__init__(self, params, defaults)
Beispiel #6
0
	def __init__(self, model, step_size=0.1, prior_std=1., addnoise=True):
		'''
		log_N(θ|0,1) =
		:param model:
		:param step_size:
		:param norm_sigma:
		:param addnoise:
		'''

		weight_decay = 1 / (prior_std ** 2) if prior_std!=0 else 0
		if weight_decay < 0.0:
			raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
		if step_size < 0.0:
			raise ValueError("Invalid learning rate: {}".format(step_size))

		defaults = dict(step_size=step_size, weight_decay=weight_decay, addnoise=addnoise)

		self.model = model
		params = self.model.parameters()

		Optimizer.__init__(self, params=params, defaults=defaults)
		MCMC_Optim.__init__(self)