def __init__(self, optimizer, params:dict={}):
        # Suggested weight_decay: 1e-4 for l2 regularization (sgd, adam) and 
        #                         1e-1 for decouped weight decay (sgdw, adamw, radam, ralamb, adamod etc.)
        default_params = {
            "name":"warmR",
            "1cycle.learn_rate":0.001,
            "warmR.T_max":10,
            "warmR.T_mult":1,
            "warmR.factor":1.0,
            "warmR.eta_min":4e-8,
            "warmR.log_decay":False,
            "warmR.lr_decay_step":1,
            "reduceP.metric":'valid_acc',
            "reduceP.check_interval":0, 
            "reduceP.factor":0.1, 
            "reduceP.patience":10, 
            "reduceP.threshold":0.0001, 
            "reduceP.cooldown":0, 
            "reduceP.min_lr":0
        }

        used_params = utils.assign_params_dict(default_params, params, force_check=False, support_unknow=True)
        split_params = utils.split_params(used_params)

        if isinstance(optimizer, Lookahead):
            base_optimizer = optimizer.optimizer
        else:
            base_optimizer = optimizer

        self.name = split_params["public"]["name"]
        if self.name == "1cycle":
            # To do.
            self.lr_scheduler = optim.lr_scheduler.OneCycleLR(base_optimizer, **split_params["1cycle"])
        elif self.name == "warmR":
            T_max = split_params["warmR"].pop("T_max")
            self.lr_decay_step = split_params["warmR"].pop("lr_decay_step")
            self.lr_scheduler = CosineAnnealingWarmRestarts(base_optimizer, T_max, **split_params["warmR"])
        elif self.name == "reduceP":
            self.check_interval = split_params["reduceP"].pop("check_interval")
            self.metric = split_params["reduceP"].pop("metric")
            if self.metric == "valid_acc":
                mode = "max"
            elif self.metric == "valid_loss":
                mode = "min"
            else:
                raise ValueError("Do not support {} metric for ReduceLROnPlateau strategy.".format(self.metric))
            self.lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(base_optimizer, mode=mode, **split_params["reduceP"])
            self.init = False
            if utils.use_horovod():
                raise TypeError("Do not support ReduceLROnPlateau for multi-gpu of Horovod now.")
        else:
            raise ValueError("Do not support {0} lr_scheduler now.".format(name))
    def __init__(self, optimizer, params: dict = {}):
        # Suggested weight_decay: 1e-4 for l2 regularization (sgd, adam) and
        #                         1e-1 for decouped weight decay (sgdw, adamw, radam, ralamb, adamod etc.)
        default_params = {
            "name": "warmR",
            "1cycle.learn_rate": 0.001,
            "warmR.T_max": 10,
            "warmR.T_mult": 1,
            "warmR.factor": 1.0,
            "warmR.eta_min": 4e-8,
            "warmR.log_decay": False,
            "warmR.lr_decay_step": 1
        }

        used_params = utils.assign_params_dict(default_params,
                                               params,
                                               force_check=False,
                                               support_unknow=True)
        split_params = utils.split_params(used_params)

        if isinstance(optimizer, Lookahead):
            base_optimizer = optimizer.optimizer
        else:
            base_optimizer = optimizer

        self.name = split_params["public"]["name"]
        if self.name == "1cycle":
            # To do.
            self.lr_scheduler = optim.lr_scheduler.OneCycleLR(
                base_optimizer, **split_params["1cycle"])
        elif self.name == "warmR":
            T_max = split_params["warmR"].pop("T_max")
            self.lr_decay_step = split_params["warmR"].pop("lr_decay_step")
            self.lr_scheduler = CosineAnnealingWarmRestarts(
                base_optimizer, T_max, **split_params["warmR"])
        else:
            raise ValueError(
                "Do not support {0} lr_scheduler now.".format(name))
Exemple #3
0
    def __init__(self, optimizer, params:dict={}):
        # Suggested weight_decay: 1e-4 for l2 regularization (sgd, adam) and 
        #                         1e-1 for decouped weight decay (sgdw, adamw, radam, ralamb, adamod etc.)
        default_params = {
            "name":"warmR",

            "cyclic.max_lr":1e-3,
            "cyclic.base_lr":1e-8,
            "cyclic.step_size_up":2e4,
            "cyclic.step_size_down":None,
            "cyclic.mode":'triangular2', 
            "cyclic.gamma":1.0, 
            "cyclic.scale_fn":None, 
            "cyclic.scale_mode":'cycle', 
            "cyclic.cycle_momentum":False, 
            "cyclic.base_momentum":0.8, 
            "cyclic.max_momentum":0.9,

            "1cycle.learn_rate":0.001,
            "1cycle.total_steps":None,
            "1cycle.epochs":None,
            "1cycle.steps_per_epoch":None,
            "1cycle.pct_start":0.3,
            "1cycle.anneal_strategy":'linear',
            "1cycle.cycle_momentum":False,
            "1cycle.base_momentum":0.85,
            "1cycle.max_momentum":0.95,
            "1cycle.div_factor":25.0,
            "1cycle.final_div_factor":10000.0,

            "warmR.T_max":10,
            "warmR.T_mult":1,
            "warmR.factor":1.0,
            "warmR.eta_min":4e-8,
            "warmR.log_decay":False,
            "warmR.lr_decay_step":1,

            "reduceP.metric":'valid_acc',
            "reduceP.check_interval":0, 
            "reduceP.factor":0.5, 
            "reduceP.patience":10, 
            "reduceP.threshold":0.0001, 
            "reduceP.cooldown":0, 
            "reduceP.min_lr":0.
        }

        used_params = utils.assign_params_dict(default_params, params, force_check=False, support_unknow=True)
        split_params = utils.split_params(used_params)

        if isinstance(optimizer, Lookahead):
            base_optimizer = optimizer.optimizer
        else:
            base_optimizer = optimizer

        self.name = split_params["public"]["name"]
        if self.name == "cyclic":
            base_lr = split_params["cyclic"].pop("base_lr")
            max_lr = split_params["cyclic"].pop("max_lr")
            self.lr_scheduler = torch.optim.lr_scheduler.CyclicLR(base_optimizer, base_lr, max_lr, **split_params["cyclic"])
        elif self.name == "1cycle":
            max_lr = split_params["1cycle"].pop("learn_rate")
            self.lr_scheduler = optim.lr_scheduler.OneCycleLR(base_optimizer, max_lr, **split_params["1cycle"])
        elif self.name == "warmR":
            T_max = split_params["warmR"].pop("T_max")
            self.lr_decay_step = split_params["warmR"].pop("lr_decay_step")
            self.lr_scheduler = CosineAnnealingWarmRestarts(base_optimizer, T_max, **split_params["warmR"])
        elif self.name == "reduceP":
            self.check_interval = split_params["reduceP"].pop("check_interval")
            self.metric = split_params["reduceP"].pop("metric")
            self.min_lr = split_params["reduceP"]["min_lr"]
            if self.metric == "valid_acc":
                mode = "max"
            elif self.metric == "valid_loss":
                mode = "min"
            else:
                raise ValueError("Do not support {} metric for ReduceLROnPlateau strategy.".format(self.metric))
            self.lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(base_optimizer, mode=mode, **split_params["reduceP"])
            self.init = False
            if utils.use_horovod():
                raise TypeError("Do not support ReduceLROnPlateau for multi-gpu of Horovod now.")
        else:
            raise ValueError("Do not support {0} lr_scheduler now.".format(name))