def test_closed_form_lwca_lr_with_nz_start_lr_nz_eta_min():
    seed_everything()

    warmup_start_lr = 0.009
    base_lr = 0.07
    eta_min = 0.003
    warmup_epochs = 15
    max_epochs = 115
    multiplier = 32

    test_lr_scheduler = TestLRScheduler(base_lr=base_lr, multiplier=multiplier)
    scheduler = LinearWarmupCosineAnnealingLR(
        optimizer=test_lr_scheduler.optimizer,
        warmup_epochs=warmup_epochs,
        max_epochs=max_epochs,
        warmup_start_lr=warmup_start_lr,
        eta_min=eta_min,
    )

    closed_form_scheduler = LinearWarmupCosineAnnealingLR(
        optimizer=test_lr_scheduler.closed_form_opt,
        warmup_epochs=warmup_epochs,
        max_epochs=max_epochs,
        warmup_start_lr=warmup_start_lr,
        eta_min=eta_min,
    )

    test_lr_scheduler._test_against_closed_form(scheduler,
                                                closed_form_scheduler,
                                                epochs=max_epochs)
def test_closed_form_lwca_lr_with_nz_start_lr(tmpdir):
    seed_everything()

    warmup_start_lr = 0.2
    base_lr = 0.8
    eta_min = 0.0
    warmup_epochs = 9
    max_epochs = 28
    multiplier = 10

    test_lr_scheduler = TestLRScheduler(base_lr=base_lr, multiplier=multiplier)
    scheduler = LinearWarmupCosineAnnealingLR(
        optimizer=test_lr_scheduler.optimizer,
        warmup_epochs=warmup_epochs,
        max_epochs=max_epochs,
        warmup_start_lr=warmup_start_lr,
        eta_min=eta_min,
    )

    closed_form_scheduler = LinearWarmupCosineAnnealingLR(
        optimizer=test_lr_scheduler.closed_form_opt,
        warmup_epochs=warmup_epochs,
        max_epochs=max_epochs,
        warmup_start_lr=warmup_start_lr,
        eta_min=eta_min,
    )

    test_lr_scheduler._test_against_closed_form(
        scheduler, closed_form_scheduler, epochs=max_epochs
    )
def test_closed_form_lwca_lr_with_nz_eta_min(tmpdir):
    reset_seed()

    warmup_start_lr = 0.0
    base_lr = 0.04
    eta_min = 0.0001
    warmup_epochs = 15
    max_epochs = 47
    multiplier = 17

    test_lr_scheduler = TestLRScheduler(base_lr=base_lr, multiplier=multiplier)
    scheduler = LinearWarmupCosineAnnealingLR(
        optimizer=test_lr_scheduler.optimizer,
        warmup_epochs=warmup_epochs,
        max_epochs=max_epochs,
        warmup_start_lr=warmup_start_lr,
        eta_min=eta_min,
    )

    closed_form_scheduler = LinearWarmupCosineAnnealingLR(
        optimizer=test_lr_scheduler.closed_form_opt,
        warmup_epochs=warmup_epochs,
        max_epochs=max_epochs,
        warmup_start_lr=warmup_start_lr,
        eta_min=eta_min,
    )

    test_lr_scheduler._test_against_closed_form(
        scheduler, closed_form_scheduler, epochs=max_epochs
    )
예제 #4
0
    def configure_optimizers(self):
        parameters = self.exclude_from_wt_decay(
            self.named_parameters(),
            weight_decay=self.hparams.opt_weight_decay
        )

        optimizer = LARSWrapper(Adam(parameters, lr=self.hparams.lr))

        self.hparams.warmup_epochs = self.hparams.warmup_epochs * self.train_iters_per_epoch
        max_epochs = self.trainer.max_epochs * self.train_iters_per_epoch

        linear_warmup_cosine_decay = LinearWarmupCosineAnnealingLR(
            optimizer,
            warmup_epochs=self.hparams.warmup_epochs,
            max_epochs=max_epochs,
            warmup_start_lr=0,
            eta_min=0
        )

        scheduler = {
            'scheduler': linear_warmup_cosine_decay,
            'interval': 'step',
            'frequency': 1 
        }

        return [optimizer], [scheduler]
예제 #5
0
    def configure_optimizers(self):
        # TRICK 1 (Use lars + filter weights)
        # exclude certain parameters
        parameters = self.exclude_from_wt_decay(
            self.named_parameters(),
            weight_decay=self.hparams.opt_weight_decay)

        optimizer = LARSWrapper(Adam(parameters,
                                     lr=self.hparams.learning_rate))

        # Trick 2 (after each step)
        self.hparams.warmup_epochs = self.hparams.warmup_epochs * self.train_iters_per_epoch
        max_epochs = self.trainer.max_epochs * self.train_iters_per_epoch

        linear_warmup_cosine_decay = LinearWarmupCosineAnnealingLR(
            optimizer,
            warmup_epochs=self.hparams.warmup_epochs,
            max_epochs=max_epochs,
            warmup_start_lr=0,
            eta_min=0)

        scheduler = {
            'scheduler': linear_warmup_cosine_decay,
            'interval': 'step',
            'frequency': 1
        }

        if self.perc == 0.01:
            return [optimizer], []
        else:
            return [optimizer], [scheduler]
예제 #6
0
    def configure_optimizers(self) -> Any:
        """
        Configures the optimizer to use for training: Adam optimizer with Lars scheduling, excluding certain parameters
        (batch norm and bias of convolution) from weight decay. Apply Linear Cosine Annealing schedule of learning
        rate with warm-up.
        """
        # TRICK 1 (Use lars + filter weights)
        # exclude certain parameters
        parameters = self.exclude_from_wt_decay(self.online_network.named_parameters(),
                                                weight_decay=self.hparams.weight_decay)  # type: ignore
        optimizer = LARSWrapper(Adam(parameters, lr=self.hparams.learning_rate))  # type: ignore

        # Trick 2 (after each step)
        self.hparams.warmup_epochs = self.hparams.warmup_epochs * self.train_iters_per_epoch  # type: ignore
        max_epochs = self.trainer.max_epochs * self.train_iters_per_epoch

        linear_warmup_cosine_decay = LinearWarmupCosineAnnealingLR(
            optimizer,
            warmup_epochs=self.hparams.warmup_epochs,  # type: ignore
            max_epochs=max_epochs,
            warmup_start_lr=0,
            eta_min=self.min_learning_rate,
        )

        scheduler = {'scheduler': linear_warmup_cosine_decay, 'interval': 'step', 'frequency': 1}

        return [optimizer], [scheduler]
예제 #7
0
 def configure_optimizers(self):
     optimizer = Adam(self.parameters(),
                      lr=self.hparams.learning_rate,
                      weight_decay=self.hparams.weight_decay)
     scheduler = LinearWarmupCosineAnnealingLR(
         optimizer,
         warmup_epochs=self.hparams.warmup_epochs,
         max_epochs=self.hparams.max_epochs)
     return [optimizer], [scheduler]
 def configure_optimizers(self) -> Any:
     # exclude certain parameters
     parameters = self.exclude_from_wt_decay(
         self.online_network.named_parameters(),
         weight_decay=self.hparams.weight_decay)  # type: ignore
     optimizer = Adam(
         parameters,
         lr=self.hparams.learning_rate,  # type: ignore
         weight_decay=self.hparams.weight_decay)  # type: ignore
     scheduler = LinearWarmupCosineAnnealingLR(
         optimizer,
         warmup_epochs=self.hparams.warmup_epochs,  # type: ignore
         max_epochs=self.hparams.max_epochs)  # type: ignore
     return [optimizer], [scheduler]
def test_lwca_lr(tmpdir):
    seed_everything()

    warmup_start_lr = 0.0
    base_lr = 0.4
    eta_min = 0.0
    warmup_epochs = 6
    max_epochs = 15
    multiplier = 10

    # define target schedule
    targets = []

    # param-group1
    warmup_lr_schedule = np.linspace(warmup_start_lr, base_lr, warmup_epochs)
    iters = np.arange(max_epochs - warmup_epochs)
    cosine_lr_schedule = np.array(
        [
            eta_min + 0.5 * (base_lr - eta_min) * (
                1 + math.cos(math.pi * t / (max_epochs - warmup_epochs))
            ) for t in iters
        ]
    )
    lr_schedule = np.concatenate((warmup_lr_schedule, cosine_lr_schedule))
    targets.append(list(lr_schedule))

    # param-group2
    base_lr2 = base_lr * multiplier
    warmup_lr_schedule = np.linspace(warmup_start_lr, base_lr2, warmup_epochs)
    cosine_lr_schedule = np.array(
        [
            eta_min + 0.5 * (base_lr2 - eta_min) * (
                1 + math.cos(math.pi * t / (max_epochs - warmup_epochs))
            ) for t in iters
        ]
    )
    lr_schedule = np.concatenate((warmup_lr_schedule, cosine_lr_schedule))
    targets.append(list(lr_schedule))

    test_lr_scheduler = TestLRScheduler(base_lr=base_lr, multiplier=multiplier)
    scheduler = LinearWarmupCosineAnnealingLR(
        optimizer=test_lr_scheduler.optimizer,
        warmup_epochs=warmup_epochs,
        max_epochs=max_epochs,
        warmup_start_lr=warmup_start_lr,
        eta_min=eta_min,
    )

    test_lr_scheduler._test_lr(scheduler, targets, epochs=max_epochs)
예제 #10
0
    def configure_optimizers(self):

        lr = (self.hparams.learning_rate * (self.effective_bsz / 256))

        params = list(self.encoder_online.parameters()) + \
            list(self.predictor_theta_online.parameters()) + \
            list(self.proj_head_online.parameters())

        if self.hparams.optimiser == 'lars':

            models = [
                self.encoder_online, self.predictor_theta_online,
                self.proj_head_online
            ]

            param_list = collect_params(models, exclude_bias_and_bn=True)

            # print(params)

            optimizer = LARSSGD(param_list,
                                lr=lr,
                                weight_decay=self.hparams.weight_decay,
                                eta=0.001,
                                nesterov=False)

        elif self.hparams.optimiser == 'adam':
            optimizer = Adam(params,
                             lr=lr,
                             weight_decay=self.hparams.weight_decay)
        elif self.hparams.optimiser == 'sgd':
            optimizer = SGD(params,
                            lr=lr,
                            weight_decay=self.hparams.weight_decay,
                            momentum=0.9,
                            nesterov=True)
        else:
            raise NotImplementedError('{} not setup.'.format(
                self.ft_optimiser))

        scheduler = LinearWarmupCosineAnnealingLR(
            optimizer,
            warmup_epochs=self.hparams.warmup_epochs,
            max_epochs=self.hparams.max_epochs,
            warmup_start_lr=1e-3 * lr)
        return [optimizer], [scheduler]
예제 #11
0
    def configure_optimizers(self) -> Tuple[list, list]:

        parameters = self.exclude_from_wt_decay(
            self.named_parameters(), weight_decay=self.config.opt_weight_decay)
        optimizer = torch.optim.Adam(
            parameters,
            lr=self.config.lr *
            math.sqrt(self.config.batch_size * self.config.num_of_mini_batch),
        )
        warmup_epochs = (self.config.warmup_epochs *
                         self.train_iters_per_epoch //
                         self.config.num_of_mini_batch)
        # updating the max epochs for learning rate scheduler for fair comparision of fine-tunes and fully
        # supervised models.
        if ("lr_max_epochs" in self.config.keys()
                and self.config["lr_max_epochs"] is not None):
            max_epochs = (self.config["lr_max_epochs"] *
                          self.train_iters_per_epoch //
                          self.config.num_of_mini_batch)
        else:
            max_epochs = (self.trainer.max_epochs *
                          self.train_iters_per_epoch //
                          self.config.num_of_mini_batch)

        if self.config.optimizer == "LARS":
            optimizer = LARSWrapper(optimizer)
            scheduler = LinearWarmupCosineAnnealingLR(
                optimizer,
                warmup_epochs=warmup_epochs,
                max_epochs=max_epochs,
                warmup_start_lr=0,
                eta_min=0,
            )
        else:
            scheduler = CosineAnnealingLR(optimizer, T_max=max_epochs)

        scheduler = {
            "scheduler": scheduler,
            "interval": "step",
            "frequency": 1
        }

        return [optimizer], [scheduler]
예제 #12
0
    def configure_optimizers(self):
        # exclude certain parameters
        # ignore from the weight_decay, all the parameters
        # it looks through all the parameters in the model (e.g. encoder, projection) and do not apply weight decay to the bias and batch norm
        parameters = self.exclude_from_wt_decay(self.named_parameters(), weight_decay=self.hparams.opt_weight_decay)

        # the parameters do not include the bias or batch norm
        # TRICK 1 --> use LARS + filter weights
        optimizer = torch.optim.SGD(parameters, lr=self.hparams.lars_lr)
        optimizer_LARS = LARSWrapper(optimizer, eta=self.hparams.lars_eta)

        # TRICK 2 --> after each step
        # After optimizer is defined, the scheduler is then defined
        # The scheduler is used after each step (also known as iterations) --> warm_up_epochs x train_iter = total number of steps for warm_ups
        # update the learning rate every training steps (training iterations)
        self.hparams.warmup_epochs = self.hparams.warmup_epochs * self.train_iters_per_epoch
        max_epochs = self.trainer.max_epochs * self.train_iters_per_epoch

        # the scheduler, which is perform each step
        # from pl_bolts
        # the scheduler takes all these parameters
        # from the warmup_start_lr --> max learning rate of optimizer and the number of epochs spcified for warmups --> cosine decay for the remainder of the epochs
        linear_warmup_cosine_decay = LinearWarmupCosineAnnealingLR(
            optimizer,
            warmup_epochs=self.hparams.warmup_epochs,
            max_epochs=self.hparams.max_epochs,
            warmup_start_lr=0,
            # final learning rate
            eta_min=0
        )

        # use a dictionary to define the scheduler for each step (pytorch lightning)
        # default pytorch lightning updates scheduler every epoch, can overwrite it as shown below
        scheduler = {
            'scheduler': linear_warmup_cosine_decay,
            'interval': 'step',
            # every 1 step
            # if value change to 5, it means the scheduler will update every 5 steps
            'frequency': 1
        }

        # return an array because you can have multiple optimizers or schedulers
        return [optimizer], [scheduler]
예제 #13
0
    def configure_optimizers(self):
        parameters = self.exclude_from_wt_decay(
            self.named_parameters(),
            weight_decay=self.hparams.opt_weight_decay)

        optimizer = Adam(parameters, lr=self.hparams.lr)
        optimizer = LARSWrapper(optimizer)

        # Trick 2 (after each step)
        linear_warmup_cosine_decay = LinearWarmupCosineAnnealingLR(
            optimizer,
            warmup_epochs=self.hparams.warmup_epochs,
            max_epochs=self.hparams.max_epochs,
            warmup_start_lr=0,
            eta_min=0,
        )

        scheduler = {
            "scheduler": linear_warmup_cosine_decay,
            "interval": "step",
            "frequency": 1,
        }

        return [optimizer], [scheduler]
예제 #14
0
 def configure_optimizers(self):
     optimizer = torch.optim.Adam(self.parameters(), lr=5e-4)
     scheduler = LinearWarmupCosineAnnealingLR(optimizer,
                                               warmup_epochs=1,
                                               max_epochs=40)
     return [optimizer], [scheduler]