def get_optimizer(self, stage: str, model: nn.Module) -> _Optimizer: fp16 = isinstance(model, Fp16Wrap) params = utils.prepare_optimizable_params(model.parameters(), fp16) optimizer_params = \ self.stages_config[stage].get("optimizer_params", {}) optimizer = OPTIMIZERS.get_from_params(**optimizer_params, params=params) return optimizer
def __init__(self, params, optimizer_params: dict, swa_start=None, swa_freq=None, swa_lr=None): optimizer_params["params"] = params optimizer = OPTIMIZERS.get_from_params(**optimizer_params) super().__init__(optimizer, swa_start, swa_freq, swa_lr)
def get_from_params( cls, params: Dict, base_optimizer_params: Dict = None, **kwargs, ) -> "Lookahead": """@TODO: Docs. Contribution is welcome.""" from catalyst.dl.registry import OPTIMIZERS base_optimizer = OPTIMIZERS.get_from_params( params=params, **base_optimizer_params ) optimizer = cls(optimizer=base_optimizer, **kwargs) return optimizer
def get_from_params( cls, params: Dict, base_optimizer_params: Dict = None, **kwargs, ) -> "Lookahead": from catalyst.dl.registry import OPTIMIZERS base_optimizer = OPTIMIZERS.get_from_params(params=params, **base_optimizer_params) optimizer = cls(optimizer=base_optimizer, **kwargs) return optimizer
def _get_optimizer(*, model_params, **params): key_value_flag = params.pop("_key_value", False) if key_value_flag: optimizer = {} for key, params_ in params.items(): optimizer[key] = ConfigExperiment._get_optimizer( model_params=model_params, **params_) else: optimizer = OPTIMIZERS.get_from_params(**params, params=model_params) return optimizer
def _get_optimizer(self, *, model_params, **params): load_from_previous_stage = \ params.pop("load_from_previous_stage", False) optimizer = OPTIMIZERS.get_from_params(**params, params=model_params) if load_from_previous_stage: checkpoint_path = f"{self.logdir}/checkpoints/best.pth" checkpoint = utils.load_checkpoint(checkpoint_path) utils.unpack_checkpoint(checkpoint, optimizer=optimizer) for key, value in params.items(): for pg in optimizer.param_groups: pg[key] = value return optimizer
def _get_optimizer(self, *, model_params, **params): key_value_flag = params.pop("_key_value", False) if key_value_flag: optimizer = {} for key, params_ in params.items(): optimizer[key] = self._get_optimizer(model_params=model_params, **params_) else: load_from_previous_stage = \ params.pop("load_from_previous_stage", False) optimizer = OPTIMIZERS.get_from_params(**params, params=model_params) if load_from_previous_stage: checkpoint_path = \ f"{self.logdir}/checkpoints/best.pth" checkpoint = UtilsFactory.load_checkpoint(checkpoint_path) UtilsFactory.unpack_checkpoint(checkpoint, optimizer=optimizer) for key, value in params.items(): for pg in optimizer.param_groups: pg[key] = value return optimizer
def _get_optimizer(self, *, model_params, **params): load_from_previous_stage = \ params.pop("load_from_previous_stage", False) optimizer = OPTIMIZERS.get_from_params(**params, params=model_params) if load_from_previous_stage: checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth" checkpoint = utils.load_checkpoint(checkpoint_path) utils.unpack_checkpoint(checkpoint, optimizer=optimizer) # move optimizer to device device = get_device() for param in model_params: param = param["params"][0] state = optimizer.state[param] for key, value in state.items(): state[key] = any2device(value, device) # update optimizer params for key, value in params.items(): for pg in optimizer.param_groups: pg[key] = value return optimizer
def _get_optimizer(self, stage: str, model: Union[Model, Dict[str, Model]], **params) -> Optimizer: # @TODO 1: refactoring; this method is too long # @TODO 2: load state dicts for schedulers & criterion layerwise_params = params.pop("layerwise_params", OrderedDict()) no_bias_weight_decay = params.pop("no_bias_weight_decay", True) # linear scaling rule from https://arxiv.org/pdf/1706.02677.pdf lr_scaling_params = params.pop("lr_linear_scaling", None) if lr_scaling_params: data_params = dict(self.stages_config[stage]["data_params"]) batch_size = data_params.get("batch_size") per_gpu_scaling = data_params.get("per_gpu_scaling", False) distributed_rank = utils.get_rank() distributed = distributed_rank > -1 if per_gpu_scaling and not distributed: num_gpus = max(1, torch.cuda.device_count()) batch_size *= num_gpus base_lr = lr_scaling_params.get("lr") base_batch_size = lr_scaling_params.get("base_batch_size", 256) lr_scaling = batch_size / base_batch_size params["lr"] = base_lr * lr_scaling # scale default lr else: lr_scaling = 1.0 # getting model parameters model_key = params.pop("_model", None) if model_key is None: assert isinstance( model, nn.Module ), "model is key-value, but optimizer has no specified model" model_params = utils.process_model_params(model, layerwise_params, no_bias_weight_decay, lr_scaling) elif isinstance(model_key, str): model_params = utils.process_model_params( model[model_key], layerwise_params, no_bias_weight_decay, lr_scaling, ) elif isinstance(model_key, (list, tuple)): model_params = [] for model_key_ in model_key: model_params_ = utils.process_model_params( model[model_key_], layerwise_params, no_bias_weight_decay, lr_scaling, ) model_params.extend(model_params_) else: raise ValueError("unknown type of model_params") load_from_previous_stage = params.pop("load_from_previous_stage", False) optimizer_key = params.pop("optimizer_key", None) optimizer = OPTIMIZERS.get_from_params(**params, params=model_params) if load_from_previous_stage and self.stages.index(stage) != 0: checkpoint_path = f"{self.logdir}/checkpoints/best_full.pth" checkpoint = utils.load_checkpoint(checkpoint_path) dict2load = optimizer if optimizer_key is not None: dict2load = {optimizer_key: optimizer} utils.unpack_checkpoint(checkpoint, optimizer=dict2load) # move optimizer to device device = utils.get_device() for param in model_params: param = param["params"][0] state = optimizer.state[param] for key, value in state.items(): state[key] = utils.any2device(value, device) # update optimizer params for key, value in params.items(): for pg in optimizer.param_groups: pg[key] = value return optimizer