Ejemplo n.º 1
0
    def __init__(self, config, pretrained=True):

        self.config = config
        self.model, self.vocab = build_model(config)

        self.device = config['device']
        self.num_iters = config['trainer']['iters']
        self.beamsearch = config['predictor']['beamsearch']

        self.data_root = config['dataset']['data_root']
        self.train_annotation = config['dataset']['train_annotation']
        self.valid_annotation = config['dataset']['valid_annotation']
        self.dataset_name = config['dataset']['name']

        self.batch_size = config['trainer']['batch_size']
        self.print_every = config['trainer']['print_every']
        self.valid_every = config['trainer']['valid_every']

        self.checkpoint = config['trainer']['checkpoint']
        self.export_weights = config['trainer']['export']
        self.metrics = config['trainer']['metrics']
        logger = config['trainer']['log']

        if logger:
            self.logger = Logger(logger)

        if pretrained:
            weight_file = download_weights(**config['pretrain'],
                                           quiet=config['quiet'])
            self.load_weights(weight_file)

        self.iter = 0

        self.optimizer = AdamW(self.model.parameters(),
                               betas=(0.9, 0.98),
                               eps=1e-09)
        self.scheduler = OneCycleLR(self.optimizer, **config['optimizer'])
        #        self.optimizer = ScheduledOptim(
        #            Adam(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        #            #config['transformer']['d_model'],
        #            512,
        #            **config['optimizer'])

        self.criterion = LabelSmoothingLoss(len(self.vocab),
                                            padding_idx=self.vocab.pad,
                                            smoothing=0.1)

        transforms = ImgAugTransform()

        self.train_gen = self.data_gen('train_{}'.format(self.dataset_name),
                                       self.data_root,
                                       self.train_annotation,
                                       transform=transforms)
        if self.valid_annotation:
            self.valid_gen = self.data_gen(
                'valid_{}'.format(self.dataset_name), self.data_root,
                self.valid_annotation)

        self.train_losses = []
Ejemplo n.º 2
0
 def build(self):
     self._sp_sch = OneCycleLR(self.sp_optim,
                               max_lr=self.max_lr,
                               total_steps=self.total_steps)
     self._disc_sch = OneCycleLR(self.disc_optim,
                                 max_lr=self.max_lr,
                                 total_steps=self.total_steps)
     return self
Ejemplo n.º 3
0
    def __call__(
        self,
        net: nn.Module,
        train_iter: DataLoader,
        validation_iter: Optional[DataLoader] = None,
    ) -> None:
        wandb.watch(net, log="all", log_freq=self.num_batches_per_epoch)

        optimizer = Adam(net.parameters(),
                         lr=self.learning_rate,
                         weight_decay=self.weight_decay)

        lr_scheduler = OneCycleLR(
            optimizer,
            max_lr=self.maximum_learning_rate,
            steps_per_epoch=self.num_batches_per_epoch,
            epochs=self.epochs,
        )

        for epoch_no in range(self.epochs):
            # mark epoch start time
            tic = time.time()
            avg_epoch_loss = 0.0

            with tqdm(train_iter) as it:
                for batch_no, data_entry in enumerate(it, start=1):
                    optimizer.zero_grad()
                    inputs = [v.to(self.device) for v in data_entry.values()]

                    output = net(*inputs)
                    if isinstance(output, (list, tuple)):
                        loss = output[0]
                    else:
                        loss = output

                    avg_epoch_loss += loss.item()
                    it.set_postfix(
                        ordered_dict={
                            "avg_epoch_loss": avg_epoch_loss / batch_no,
                            "epoch": epoch_no,
                        },
                        refresh=False,
                    )
                    wandb.log({"loss": loss.item()})

                    loss.backward()
                    if self.clip_gradient is not None:
                        nn.utils.clip_grad_norm_(net.parameters(),
                                                 self.clip_gradient)

                    optimizer.step()
                    lr_scheduler.step()

                    if self.num_batches_per_epoch == batch_no:
                        break

            # mark epoch end time and log time cost of current epoch
            toc = time.time()
Ejemplo n.º 4
0
    def fit(self, learning_rate: Tuple[float, float]):
        # Capture learning errors
        self.train_val_error = {"train": [], "validation": [], "lr": []}
        self._init_model(
            model=self.model_, optimizer=self.optimizer_, criterion=self.criterion_
        )

        # Setup one cycle policy
        scheduler = OneCycleLR(
            optimizer=self.optimizer,
            max_lr=learning_rate,
            steps_per_epoch=len(self.train_loader),
            epochs=self.n_epochs,
            anneal_strategy="cos",
        )

        # Iterate over epochs
        for epoch in range(self.n_epochs):
            # Training set
            self.model.train()
            train_loss = 0
            for batch_num, samples in enumerate(self.train_loader):
                # Forward pass, get loss
                loss = self._forward_pass(samples=samples)
                train_loss += loss.item()

                # Zero gradients, perform a backward pass, and update the weights.
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                # Update scheduler
                self.train_val_error["lr"].append(scheduler.get_lr()[0])
                # One cycle scheduler must be called per batch
                # https://pytorch.org/docs/stable/optim.html#torch.optim.lr_scheduler.OneCycleLR
                scheduler.step()

            # Append train loss per current epoch
            train_err = train_loss / batch_num
            self.train_val_error["train"].append(train_err)

            # Validation set
            self.model.eval()
            validation_loss = 0
            for batch_num, samples in enumerate(self.valid_loader):
                # Forward pass, get loss
                loss = self._forward_pass(samples=samples)
                validation_loss += loss.item()
            # Append validation loss per current epoch
            val_err = validation_loss / batch_num
            self.train_val_error["validation"].append(val_err)

        return pd.DataFrame(data={
            'Train error' : self.train_val_error['train'],
            'Validation error': self.train_val_error['validation']
        })
Ejemplo n.º 5
0
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    correct = 0
    processed = 0
    lambda_l1 = 0.01

    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    scheduler = OneCycleLR(optimizer,
                           max_lr=0.020,
                           epochs=20,
                           steps_per_epoch=len(train_loader))

    for batch_idx, (data, target) in enumerate(pbar):
        # get samples
        data, target = data.to(device), target.to(device)

        # Init
        optimizer.zero_grad()
        # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
        # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

        # Predict
        y_pred = model(data)

        # Calculate loss
        loss = F.nll_loss(y_pred, target)
        train_losses.append(loss)

        l1 = 0
        for p in model.parameters():
            l1 += p.abs().sum()

        #print("l1 at 1st epoch: ", l1)

        loss = loss + lambda_l1 * l1
        # Backpropagation
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Update pbar-tqdm

        pred = y_pred.argmax(
            dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
        processed += len(data)

        pbar.set_description(
            desc=
            f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}'
        )
        train_acc.append(100 * correct / processed)
    return train_losses, train_acc
Ejemplo n.º 6
0
    def fit(self, epochs, print_each_img, use_cycle=False):
        torch.cuda.empty_cache()
        self.train_losses = []
        self.valid_losses = []
        self.train_scores = []
        self.valid_scores = []

        self.scheduler = OneCycleLR(self.tmp_optimizer,
                                    self.max_lr,
                                    epochs=epochs,
                                    steps_per_epoch=1,
                                    div_factor=25.0,
                                    final_div_factor=100)
        for epoch in range(epochs):
            self.scheduler.step()
            lr = self.tmp_optimizer.param_groups[0]['lr']
            self.lrs.append(lr)
        del self.tmp_optimizer, self.scheduler
        gc.collect()
        for epoch in range(epochs):
            self.model.train()
            total_loss = 0
            total_score = 0
            print('epoch: ' + str(epoch))
            if use_cycle:
                lr = self.lrs[epoch]
                self.optimizer.param_groups[0]['lr'] = lr
            else:
                lr = self.lr
            print(lr)
            for index, batch in tqdm(enumerate(self.train_loader),
                                     total=len(self.train_loader)):
                sample_img, sample_mask = batch
                sample_img = sample_img.to(self.device)
                sample_mask = sample_mask.to(self.device)
                predicted_mask = self.model(sample_img)
                loss = self.loss_function(predicted_mask, sample_mask)
                #                 score = self.metrics(predicted_mask,sample_mask)
                with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                    scaled_loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
                total_loss += loss.item()
                #                 total_score += score.item()
                if print_each_img:
                    print('batch loss: ' + str(loss.item()))
                del batch, sample_img, sample_mask, predicted_mask, loss, scaled_loss
                gc.collect()
                torch.cuda.empty_cache()
            print('total_loss: ' + str(total_loss / len(self.train_loader)))
            self.train_losses.append(total_loss / len(self.train_loader))
            #             self.train_scores.append(total_score/len(self.train_set))
            val_score = self.val()
            self.save_checkpoint(self.name, epoch, val_score)
Ejemplo n.º 7
0
 def __init__(self, cfg):
     self.device = cfg["device"]
     self.model = Models().get_model(cfg["network"]) # cfg.network
     self.model.to(self.device)
     params = [p for p in self.model.parameters() if p.requires_grad]
     self.optimizer = AdamW(params, lr=0.00001)
     self.lr_scheduler = OneCycleLR(self.optimizer,
                                    max_lr=1e-4,
                                    epochs=cfg["nepochs"],
                                    steps_per_epoch=169,  # len(dataloader)/accumulations
                                    div_factor=25,  # for initial lr, default: 25
                                    final_div_factor=1e3,  # for final lr, default: 1e4
                                    )
Ejemplo n.º 8
0
    def start_train(self, epochs=10, device=device):
        optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9)
        # scheduler = StepLR(optimizer, step_size=6, gamma=0.1)
        scheduler = OneCycleLR(optimizer,
                               max_lr=0.1,
                               steps_per_epoch=len(self.train_loader),
                               epochs=epochs)

        for epoch in range(epochs):
            # Print Learning Rate
            print("EPOCH:", epoch + 1, 'LR:', scheduler.get_lr())
            self.train_epoch(optimizer, scheduler)
            self.test_epoch()
Ejemplo n.º 9
0
    def create_scheduler(self, num_training_steps: int):
        """
        Setup the optimizer and the learning rate scheduler. This overrides super
        in a way that just customizes the lr scheduler while the optimizer remains the
        default.
        """

        # Unpack arguments from trainer_mixin_args
        mixin_args = self.args.trainer_mixin_args

        max_lr = mixin_args.get("max_lr", 1e-2)
        pct_start = mixin_args.get("pct_start", 0.3)
        anneal_strategy = mixin_args.get("anneal_strategy", "linear")
        cycle_momentum = mixin_args.get("cycle_momentum", True)
        base_momentum = mixin_args.get("base_momentum", 0.85)
        max_momentum = mixin_args.get("max_momentum", 0.95)
        div_factor = mixin_args.get("div_factor", 25)
        final_div_factor = mixin_args.get("final_div_factor", 1e4)
        last_epoch = mixin_args.get("last_epoch", -1)

        # Now define the lr scheduler, given the optimizer.
        self.lr_scheduler = OneCycleLR(
            self.optimizer,
            total_steps=num_training_steps,
            max_lr=max_lr,
            pct_start=pct_start,
            anneal_strategy=anneal_strategy,
            cycle_momentum=cycle_momentum,
            base_momentum=base_momentum,
            max_momentum=max_momentum,
            div_factor=div_factor,
            final_div_factor=final_div_factor,
            last_epoch=last_epoch,
        )
    def configure_optimizers(self):
        optimizer = AdamW(
            [p for p in self.parameters() if p.requires_grad],
            lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon,
        )
        # scheduler = {
        #     'scheduler': ReduceLROnPlateau(optimizer),
        #     'monitor': 'val_loss',
        #     'interval': 'epoch',
        #     'frequency': 1
        # }

        # steps_per_epoch = math.ceil(5217 * 8 * 3 / (self.hparams.gpus * self.hparams.batch_size))
        # steps_per_epoch = math.ceil(10990 * 6 * 3 / (self.hparams.gpus * self.hparams.batch_size))
        steps_per_epoch = math.ceil(197822 / (self.hparams.gpus * self.hparams.batch_size))
        scheduler = {
            # 'scheduler': OneCycleLR(optimizer, max_lr=self.hparams.learning_rate,
            #                         epochs=self.hparams.max_epochs, steps_per_epoch=6956),
            # 'scheduler': OneCycleLR(optimizer, max_lr=self.hparams.learning_rate,
            #                         epochs=self.hparams.max_epochs, steps_per_epoch=8348),
            # 'scheduler': OneCycleLR(optimizer, max_lr=self.hparams.learning_rate,
            #                         epochs=self.hparams.max_epochs, steps_per_epoch=5217),
            'scheduler': OneCycleLR(optimizer, max_lr=self.hparams.learning_rate,
                                    epochs=self.hparams.max_epochs, steps_per_epoch=steps_per_epoch),
            'interval': 'step',
            'frequency': 1
        }

        return [optimizer], [scheduler]
Ejemplo n.º 11
0
def one_cycle_lr(
    optimizer, max_lr, epochs, steps_per_epoch, pct_start=0.5, div_factor=10.0, final_div_factor=10000
):
    """Create One Cycle Policy for Learning Rate.

    Args:
        optimizer (torch.optim): Model optimizer.
        max_lr (float): Upper learning rate boundary in the cycle.
        epochs (int): The number of epochs to train for. This is used along with
            steps_per_epoch in order to infer the total number of steps in the cycle.
        steps_per_epoch (int): The number of steps per epoch to train for. This is
            used along with epochs in order to infer the total number of steps in the cycle.
        pct_start (:obj:`float`, optional): The percentage of the cycle (in number of steps)
            spent increasing the learning rate. (default: 0.5)
        div_factor (:obj:`float`, optional): Determines the initial learning rate via
            initial_lr = max_lr / div_factor. (default: 10.0)
        final_div_factor (:obj:`float`, optional): Determines the minimum learning rate via
            min_lr = initial_lr / final_div_factor. (default: 1e4)

    Returns:
        OneCycleLR instance.
    """

    return OneCycleLR(
        optimizer, max_lr, epochs=epochs, steps_per_epoch=steps_per_epoch,
        pct_start=pct_start, div_factor=div_factor, final_div_factor=final_div_factor
    )
Ejemplo n.º 12
0
    def configure_optimizers(self):
        opt_cfg = self.cfg['optimizer']
        lr = float(self.cfg['optimizer']['lr'])
        if opt_cfg['name'] == 'AdamW':
            optimizer = AdamW(self.model.parameters(), lr=lr, )
        elif opt_cfg['name'] == 'Adam_GCC':
            optimizer = Adam_GCC(self.model.parameters(), lr=lr)
        elif opt_cfg['name'] == 'AdamW_GCC2':
            optimizer = AdamW_GCC2(self.model.parameters(), lr=lr)

        if self.cfg['scheduler']['type'] == 'none':
            sched = None
        elif self.cfg['scheduler']['type'] == 'CosineAnnealingWarmRestarts':
            T_mult = self.cfg['scheduler']['T_mult']
            T_0 = self.cfg['scheduler']['T_0']
            eta_min = float(self.cfg['scheduler']['eta_min'])
            sched = CosineAnnealingWarmRestarts(optimizer, T_0=T_0, T_mult=T_mult, eta_min=eta_min, last_epoch=-1)
        elif self.cfg['scheduler']['type'] == 'OneCycleLR':
            max_lr = float(self.cfg['scheduler']['max_lr'])
            steps_per_epoch = cfg['scheduler']['steps_per_epoch']
            epochs = cfg['scheduler']['epochs']
            sched = OneCycleLR(optimizer, max_lr=max_lr, steps_per_epoch=steps_per_epoch, epochs=epochs)
        else:
            raise Exception('scheduler {} not supported'.format(self.cfg['scheduler']['type']))
        if sched is not None:
            sched = {'scheduler': sched, 'name': format(self.cfg['scheduler']['type'])}

        if sched is not None:
            return [optimizer], [sched]
        else:
            return optimizer

        return optimizer
Ejemplo n.º 13
0
 def _reset_scheduler(self, lr, num_epochs, sched_type='onecycle'):
     if sched_type == 'onecycle':
         self.scheduler = OneCycleLR(self.optimizer, lr, num_epochs * len(self.train_loader))
     elif sched_type == 'cosine':
         self.scheduler = CosineAnnealingLR(self.optimizer, num_epochs * len(self.train_loader), eta_min=lr / 25e4)
     else:
         raise ValueError(f"The following scheduler type is not supported: {sched_type}")
Ejemplo n.º 14
0
def main():
    device = torch.device("cuda" if not hyperparams.hyperparameter_defaults['no_cuda'] else "cpu")

    
    hyperparams.hyperparameter_defaults['run_name'] = fileutils.rand_run_name()
    print("Initializing datasets and dataloaders")    
    train_path = "/content/t2/train"
    test_path="/content/t2/val"
    #model_new = basemodelclass.ResNet18(hyperparams.hyperparameter_defaults['dropout'], num_classes=200)
    trainloader, testloader = dataloader.get_imagenet_loaders(train_path, test_path, transform_train=None, transform_test=None)
    model_new = basemodelclass.S11ResNet()

    wandb_run_init = wandb.init(config=hyperparams.hyperparameter_defaults, project=hyperparams.hyperparameter_defaults['project'])
    wandb.watch_called = False
    config = wandb.config
    print(config)
    wandb.watch(model_new, log="all")

    #trainloader, testloader = dataloader.get_train_test_dataloader_cifar10()
    optimizer=optim.SGD(model_new.parameters(), lr=config.lr,momentum=config.momentum,
                         weight_decay=config.weight_decay)
    
    #optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    criterion=nn.CrossEntropyLoss
    #scheduler = None
    cycle_momentum = True if config.cycle_momentum == "True" else False
    print("Momentum cycling set to {}".format(cycle_momentum))
    if (config.lr_policy == "clr"):
        scheduler = CyclicLR(optimizer, 
                             base_lr=config.lr*0.01, 
                             max_lr=config.lr, mode='triangular', 
                             gamma=1., 
                             cycle_momentum=True,
                             step_size_up=256)#, scale_fn='triangular',step_size_up=200)
    else:
        scheduler = OneCycleLR(optimizer, 
                                config.ocp_max_lr, 
                                epochs=config.epochs, 
                                cycle_momentum=cycle_momentum, 
                                steps_per_epoch=len(trainloader), 
                                base_momentum=config.momentum,
                                max_momentum=0.95, 
                                pct_start=config.split_pct,
                                anneal_strategy=config.anneal_strategy,
                                div_factor=config.div_factor,
                                final_div_factor=config.final_div_factor
                            )
    
    final_model_path = traintest.execute_model(model_new, 
                hyperparams.hyperparameter_defaults, 
                trainloader, testloader, 
                device, dataloader.classes,
                wandb=wandb,
                optimizer_in=optimizer,
                scheduler=scheduler,
                prev_saved_model=saved_model_path,
                criterion=criterion,
                save_best=True,
                lars_mode=False,
                batch_step=True)
Ejemplo n.º 15
0
def build_lr_scheduler(
        cfg, optimizer: torch.optim.Optimizer
) -> torch.optim.lr_scheduler._LRScheduler:
    """
    Build a LR scheduler from config.
    """
    name = cfg.NAME
    if name == "WarmupMultiStepLR":
        return WarmupMultiStepLR(
            optimizer,
            cfg.STEPS,
            cfg.GAMMA,
            warmup_factor=cfg.WARMUP_FACTOR,
            warmup_iters=cfg.WARMUP_ITERS,
            warmup_method=cfg.WARMUP_METHOD,
        )
    elif name == "WarmupCosineLR":
        return WarmupCosineLR(
            optimizer,
            cfg.MAX_ITER,
            warmup_factor=cfg.WARMUP_FACTOR,
            warmup_iters=cfg.WARMUP_ITERS,
            warmup_method=cfg.WARMUP_METHOD,
        )
    elif name == "OneCycleLR":
        return OneCycleLR(optimizer,
                          cfg.MAX_LR,
                          total_steps=cfg.MAX_ITER,
                          pct_start=cfg.PCT_START,
                          base_momentum=cfg.BASE_MOM,
                          max_momentum=cfg.MAX_MOM,
                          div_factor=cfg.DIV_FACTOR)
    else:
        raise ValueError("Unknown LR scheduler: {}".format(name))
Ejemplo n.º 16
0
    def configure_optimizers(self):

        lr = float(self.cfg['train_params']['lr'])

        if self.cfg['optimizer'] == 'adam':
            optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        elif self.cfg['optimizer'] == 'adamw':
            optimizer = torch.optim.AdamW(self.model.parameters(), lr=lr)
        elif self.cfg['optimizer'] == 'adamw_gcc2':
            optimizer = AdamW_GCC2(self.model.parameters(), lr=lr)
        elif self.cfg['optimizer'] == 'radam':
            optimizer = RAdam(self.model.parameters(), lr=lr)
        else:
            raise Exception('optimizer {} not supported'.format(self.cfg['optimizer']))

        self.opt = optimizer

        if self.cfg['scheduler']['type'] == 'CosineAnnealingWarmRestarts':
            T_mult = self.cfg['scheduler']['T_mult']
            T_0 = self.cfg['scheduler']['T_0']
            eta_min = float(self.cfg['scheduler']['eta_min'])
            sched = CosineAnnealingWarmRestarts(optimizer, T_0=T_0, T_mult=T_mult, eta_min=eta_min, last_epoch=-1)
        elif self.cfg['scheduler']['type'] == 'OneCycleLR':
            max_lr = float(self.cfg['scheduler']['max_lr'])
            steps_per_epoch = cfg['scheduler']['steps_per_epoch']
            epochs = cfg['scheduler']['epochs']
            sched = OneCycleLR(optimizer, max_lr=max_lr, steps_per_epoch=steps_per_epoch, epochs=epochs)
        else:
            raise Exception('scheduler {} not supported')
        sched = {'scheduler': sched, 'name': 'adam+{}'.format(self.cfg['scheduler']['type'])}
        return [optimizer], [sched]
Ejemplo n.º 17
0
    def fit(self, train_dl, valid_dl, epochs, lr, metrics=None, optimizer=None, scheduler=None):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model.to(device)
        optimizer = optimizer or Adam(self.model.parameters(), lr)
        if scheduler != False:
            scheduler = scheduler or OneCycleLR(optimizer, lr, epochs*len(train_dl))
        else:
            scheduler = None
        self.train_stats = TrainTracker(metrics, validate=(valid_dl is not None))
        bar = master_bar(range(epochs))
        bar.write(self.train_stats.metrics_names, table=True)

        for epoch in bar:
            self.model.train()
            for batch in progress_bar(train_dl, parent=bar):
                batch = batch_to_device(batch, device)
                loss = self._train_batch(batch, optimizer, scheduler)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                if scheduler:
                    scheduler.step()
                self.train_stats.update_train_loss(loss)

            valid_outputs = []
            if valid_dl:
                self.model.eval()
                for batch in progress_bar(valid_dl, parent=bar):
                    batch = batch_to_device(batch, device)
                    output = self._valid_batch(batch)
                    valid_outputs.append(output)

            self.train_stats.log_epoch_results(valid_outputs)
            bar.write(self.train_stats.get_metrics_values(), table=True)
Ejemplo n.º 18
0
def experiments(train_loader, test_loader, norm_type, l1_factor, l2_factor,
                dropout, epochs):

    train_losses = []
    test_losses = []
    train_accuracy = []
    test_accuracy = []

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    model = m.Net(norm_type, dropout).to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=0.015,
                          momentum=0.7,
                          weight_decay=l2_factor)
    scheduler = OneCycleLR(optimizer,
                           max_lr=0.015,
                           epochs=epochs,
                           steps_per_epoch=len(train_loader))
    epochs = epochs

    for epoch in range(1, epochs + 1):
        print(f'Epoch {epoch}:')
        trn.train(model, device, train_loader, optimizer, epoch,
                  train_accuracy, train_losses, l1_factor, scheduler)
        tst.test(model, device, test_loader, test_accuracy, test_losses)

    return (train_accuracy, train_losses, test_accuracy, test_losses), model
Ejemplo n.º 19
0
    def create_optimizer_and_scheduler(self, num_training_steps: int):
        """
        Setup the optimizer and the learning rate scheduler. This overrides super
        in a way that just customizes the lr scheduler while the optimizer remains the
        default.
        """

        # Set lr scheduler to dummy variable so it's not created in the call to super.
        self.lr_scheduler = ...

        # Create just the optimizer.
        super().create_optimizer_and_scheduler(num_training_steps)

        # Now define the lr scheduler, given the optimizer.
        self.lr_scheduler = OneCycleLR(
            self.optimizer,
            total_steps=num_training_steps,
            max_lr=self.max_lr,
            pct_start=self.pct_start,
            anneal_strategy=self.anneal_strategy,
            cycle_momentum=self.cycle_momentum,
            base_momentum=self.base_momentum,
            max_momentum=self.max_momentum,
            div_factor=self.div_factor,
            final_div_factor=self.final_div_factor,
            last_epoch=self.last_epoch,
        )
Ejemplo n.º 20
0
 def configure_optimizers(
     self
 ) -> Optional[Union[Optimizer, Sequence[Optimizer], Dict, Sequence[Dict],
                     Tuple[List, List]]]:
     optimizer = AdamW(self.parameters(), lr=self.lr, weight_decay=self.wd)
     if self.scheduler is None:
         return optimizer
     elif self.scheduler == 'plateau':
         return {
             'optimizer':
             optimizer,
             'lr_scheduler':
             ReduceLROnPlateau(optimizer,
                               factor=0.1,
                               patience=25,
                               eps=1e-4,
                               cooldown=0,
                               min_lr=2e-7,
                               verbose=True),
             'monitor':
             'val_loss',
         }
     elif self.scheduler == '1cycle':
         return {
             'optimizer':
             optimizer,
             'lr_scheduler':
             OneCycleLR(optimizer,
                        max_lr=10**2 * self.lr,
                        total_steps=self.total_steps)
         }
Ejemplo n.º 21
0
    def configure_optimizers(self):
        VGAE_optimizer = AdamW(self.module.VGAE.parameters(), lr=self.lr)
        VGAE_lr_scheduler = {
            'scheduler':
            OneCycleLR(
                VGAE_optimizer,
                max_lr=10 * self.lr,
                total_steps=self.numsteps,
                anneal_strategy="cos",
                final_div_factor=10,
            ),
            'name':
            'learning_rate',
            'interval':
            'step',
            'frequency':
            1
        }

        disc_optimizer = AdamW(self.module.discriminator.parameters(),
                               lr=self.lr)
        disc_lr_scheduler = {
            'scheduler':
            OneCycleLR(
                disc_optimizer,
                max_lr=10 * self.lr,
                total_steps=self.numsteps,
                anneal_strategy="cos",
                final_div_factor=10,
            ),
            'name':
            'learning_rate',
            'interval':
            'step',
            'frequency':
            1
        }

        return [{
            'optimizer': VGAE_optimizer,
            'frequency': 10,
            'lr_scheduler': VGAE_lr_scheduler
        }, {
            'optimizer': disc_optimizer,
            'frequency': 10,
            'lr_scheduler': disc_lr_scheduler
        }]
def model_load(trainloader, n_f, n_r_f, n_fr_f, msg_dim, hidden, aggr, init_lr,
               args, data_params):
    n = data_params['n']
    dim = data_params['dim'] * 2
    sim = data_params['sim']
    sparsity_mode = args.connection_value
    sparsity_prior = args.sparsity_prior
    total_epochs = args.epochs

    ogn = OGN(n_f,
              n_r_f,
              n_fr_f,
              msg_dim,
              dim,
              hidden=hidden,
              edge_index=get_edge_index(n, sim),
              aggr=aggr,
              sparsity_mode=sparsity_mode,
              sparsity_prior=sparsity_prior,
              test=args.test).cuda()
    rogn = ROGN(n_f,
                n_r_f,
                n_fr_f,
                msg_dim,
                dim,
                sparsity_mode,
                hidden=hidden,
                edge_index=get_edge_index(n, sim),
                aggr=aggr).cuda()

    opt = torch.optim.Adam(ogn.parameters(), lr=init_lr, weight_decay=1e-8)
    ropt = torch.optim.Adam(rogn.parameters(), lr=init_lr, weight_decay=1e-8)

    batch_per_epoch = len(trainloader)
    sched = OneCycleLR(opt,
                       max_lr=init_lr,
                       steps_per_epoch=batch_per_epoch,
                       epochs=total_epochs,
                       final_div_factor=1e5)
    rsched = OneCycleLR(ropt,
                        max_lr=init_lr,
                        steps_per_epoch=batch_per_epoch,
                        epochs=total_epochs,
                        final_div_factor=1e5)
    return ogn, rogn, opt, ropt, sched, rsched, total_epochs, batch_per_epoch
Ejemplo n.º 23
0
def load_optimizer(model, cfg, state, steps_per_epoch=None):
    resuming = cfg['resume'].get('path', False) is not False
    resetting_epoch = cfg['resume'].get('epoch', 0) == 1 and resuming
    resetting_optimizer = cfg['resume'].get('reset_optimizer',
                                            False) is not False

    # Create optimizer
    lr = cfg['training']['lr']
    wd = cfg['training']['weight_decay']
    opt = cfg['training']['optimizer']
    if opt == 'adam':
        optimizer = Adam((p for p in model.parameters() if p.requires_grad),
                         lr=lr,
                         weight_decay=wd)
    elif opt == 'adamw':
        optimizer = AdamW((p for p in model.parameters() if p.requires_grad),
                          lr=lr,
                          weight_decay=wd)
    else:
        raise ValueError(f"Unknown optimizer {opt}")

    # Load optimizer weights if in state dict
    opt_path = state.get('optimizer', None)
    if opt_path:
        if resetting_optimizer:
            print(f"Resetting optimizer state")
        else:
            optimizer.load_state_dict(opt_path)

    # Load scheduler if in state dict AND if we're not resetting the epoch or optimizer
    scheduler = state.get('scheduler', None)
    sched = cfg['training'].get('scheduler', None)
    if scheduler and not resetting_epoch and not resetting_optimizer:
        print(f"Loaded scheduler from state dict")
        return optimizer, scheduler

    # Otherwise create scheduler if needed

    elif sched:
        # If we are resuming but not resetting the epoch to 1, user should be warned we aren't continuing the scheduler

        if resuming and not resetting_epoch and not resetting_optimizer:
            print(
                f"WARNING: Resuming training from a checkpoint without resetting the epochs or optimzier, and yet no"
                f"scheduler found - creating new scheduler")

        if sched == 'one_cycle':
            assert steps_per_epoch
            scheduler = OneCycleLR(optimizer,
                                   max_lr=cfg['training']['lr'],
                                   steps_per_epoch=steps_per_epoch,
                                   epochs=cfg['training']['n_epochs'])
            print(f"Using one-cycle LR")

    else:
        scheduler = None
    return optimizer, scheduler
Ejemplo n.º 24
0
def get_scheduler(opts, optimizer):
    return {
        'OneCycleLR': lambda: OneCycleLR(
            optimizer, max_lr=opts.lr, total_steps=opts.epochs, anneal_strategy='linear'),
        'CosineAnnealingLR': lambda: CosineAnnealingLR(
            optimizer, T_max=opts.epochs, eta_min=0, last_epoch=opts.last_epoch),
        'CosineAnnealingWarmRestarts': lambda: CosineAnnealingWarmRestarts(
            optimizer, T_0=10, T_mult=2, eta_min=0, last_epoch=opts.last_epoch)
    }[opts.scheduler_name]()
Ejemplo n.º 25
0
def get_scheduler(opts, optimizer):
    return {
        'OneCycleLR': lambda: LRScheduler(OneCycleLR(
            optimizer, max_lr=opts.lr, total_steps=opts.total_steps)),
        'CosineAnnealingLR': lambda: LRScheduler(CosineAnnealingLR(
            optimizer, T_max=opts.total_steps, eta_min=0)),
        'CosineAnnealingWarmRestarts': lambda: LRScheduler(CosineAnnealingWarmRestarts(
            optimizer, T_0=10, T_mult=2, eta_min=0)),
    }[opts.scheduler_name]()
Ejemplo n.º 26
0
def get_scheduler(scheduler_name: str,
                  optimizer,
                  lr,
                  num_epochs,
                  batches_in_epoch=None):
    if scheduler_name is None or scheduler_name.lower() == "none":
        return None

    if scheduler_name.lower() == "poly":
        return PolyLR(optimizer, num_epochs, gamma=0.9)

    if scheduler_name.lower() == "cos":
        return CosineAnnealingLR(optimizer, num_epochs, eta_min=1e-5)

    if scheduler_name.lower() == "cosr":
        return CosineAnnealingWarmRestarts(optimizer,
                                           T_0=max(2, num_epochs // 4),
                                           eta_min=1e-5)

    if scheduler_name.lower() in {"1cycle", "one_cycle"}:
        return OneCycleLR(optimizer,
                          lr_range=(lr, 1e-6, 1e-5),
                          num_steps=batches_in_epoch,
                          warmup_fraction=0.05,
                          decay_fraction=0.1)

    if scheduler_name.lower() == "exp":
        return ExponentialLR(optimizer, gamma=0.95)

    if scheduler_name.lower() == "clr":
        return CyclicLR(
            optimizer,
            base_lr=1e-6,
            max_lr=lr,
            step_size_up=batches_in_epoch // 4,
            # mode='exp_range',
            gamma=0.99,
        )

    if scheduler_name.lower() == "multistep":
        return MultiStepLR(optimizer,
                           milestones=[
                               int(num_epochs * 0.5),
                               int(num_epochs * 0.7),
                               int(num_epochs * 0.9)
                           ],
                           gamma=0.3)

    if scheduler_name.lower() == "simple":
        return MultiStepLR(
            optimizer,
            milestones=[int(num_epochs * 0.4),
                        int(num_epochs * 0.7)],
            gamma=0.4)

    raise KeyError(scheduler_name)
Ejemplo n.º 27
0
def get_one_cycle(optimizer, start_lr, max_lr, final_lr, num_epochs,
                  steps_per_epoch):
    start_div_factor = max_lr / start_lr
    final_div_factor = start_lr / final_lr
    return OneCycleLR(optimizer,
                      epochs=num_epochs,
                      steps_per_epoch=steps_per_epoch,
                      max_lr=max_lr,
                      div_factor=start_div_factor,
                      final_div_factor=final_div_factor)
Ejemplo n.º 28
0
def get_lr_scheduler(optimizer, lr_config, **kwargs):

    scheduler_name = lr_config["name"]
    step_per_epoch = False

    if scheduler_name == '1cycle-yolo':

        def one_cycle(y1=0.0, y2=1.0, steps=100):
            # lambda function for sinusoidal ramp from y1 to y2
            return lambda x: (
                (1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1

        lf = one_cycle(1, 0.158, kwargs['num_epochs'])  # cosine 1->hyp['lrf']
        scheduler = LambdaLR(optimizer, lr_lambda=lf)
        step_per_epoch = True

    elif scheduler_name == '1cycle':
        scheduler = OneCycleLR(optimizer,
                               max_lr=0.001,
                               epochs=kwargs['num_epochs'],
                               steps_per_epoch=int(
                                   len(kwargs["trainset"]) /
                                   kwargs["batch_size"]),
                               pct_start=0.1,
                               anneal_strategy='cos',
                               final_div_factor=10**5)
        step_per_epoch = False

    elif scheduler_name == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer,
                                      mode='min',
                                      factor=0.5,
                                      patience=1,
                                      verbose=False,
                                      threshold=0.0001,
                                      threshold_mode='abs',
                                      cooldown=0,
                                      min_lr=1e-8,
                                      eps=1e-08)
        step_per_epoch = True

    elif scheduler_name == 'cosine':
        scheduler = CosineAnnealingWarmRestarts(optimizer,
                                                T_0=kwargs['num_epochs'],
                                                T_mult=1,
                                                eta_min=0.0001,
                                                last_epoch=-1,
                                                verbose=False)
        step_per_epoch = False

    elif scheduler_name == 'cosine2':
        scheduler = CosineWithRestarts(optimizer, T_max=kwargs['train_len'])
        step_per_epoch = False

    return scheduler, step_per_epoch
Ejemplo n.º 29
0
 def __init__(self,
              optim_type,
              model,
              lr,
              momentum,
              max_lr,
              len_loader,
              weight_decay=0):
     self.optimizer = getattr(optim,
                              optim_type)(getattr(model, 'parameters')(),
                                          lr=lr,
                                          momentum=momentum,
                                          weight_decay=weight_decay)
     self.scheduler = OneCycleLR(self.optimizer,
                                 max_lr=max_lr,
                                 steps_per_epoch=len_loader,
                                 epochs=50,
                                 div_factor=10,
                                 final_div_factor=1,
                                 pct_start=10 / 50)
Ejemplo n.º 30
0
    def init_train(self, con_weight: float = 1.0):

        test_img = self.get_test_image()
        meter = AverageMeter("Loss")
        self.writer.flush()
        lr_scheduler = OneCycleLR(self.optimizer_G,
                                  max_lr=0.9999,
                                  steps_per_epoch=len(self.dataloader),
                                  epochs=self.init_train_epoch)

        for g in self.optimizer_G.param_groups:
            g['lr'] = self.init_lr

        for epoch in tqdm(range(self.init_train_epoch)):

            meter.reset()

            for i, (style, smooth, train) in enumerate(self.dataloader, 0):
                # train = transform(test_img).unsqueeze(0)
                self.G.zero_grad(set_to_none=self.grad_set_to_none)
                train = train.to(self.device)

                generator_output = self.G(train)
                # content_loss = loss.reconstruction_loss(generator_output, train) * con_weight
                content_loss = self.loss.content_loss(generator_output,
                                                      train) * con_weight
                # content_loss = F.mse_loss(train, generator_output) * con_weight
                content_loss.backward()
                self.optimizer_G.step()
                lr_scheduler.step()

                meter.update(content_loss.detach())

            self.writer.add_scalar(f"Loss : {self.init_time}",
                                   meter.sum.item(), epoch)
            self.write_weights(epoch + 1, write_D=False)
            self.eval_image(epoch, f'{self.init_time} reconstructed img',
                            test_img)

        for g in self.optimizer_G.param_groups:
            g['lr'] = self.G_lr