def __init__(self, config, pretrained=True): self.config = config self.model, self.vocab = build_model(config) self.device = config['device'] self.num_iters = config['trainer']['iters'] self.beamsearch = config['predictor']['beamsearch'] self.data_root = config['dataset']['data_root'] self.train_annotation = config['dataset']['train_annotation'] self.valid_annotation = config['dataset']['valid_annotation'] self.dataset_name = config['dataset']['name'] self.batch_size = config['trainer']['batch_size'] self.print_every = config['trainer']['print_every'] self.valid_every = config['trainer']['valid_every'] self.checkpoint = config['trainer']['checkpoint'] self.export_weights = config['trainer']['export'] self.metrics = config['trainer']['metrics'] logger = config['trainer']['log'] if logger: self.logger = Logger(logger) if pretrained: weight_file = download_weights(**config['pretrain'], quiet=config['quiet']) self.load_weights(weight_file) self.iter = 0 self.optimizer = AdamW(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09) self.scheduler = OneCycleLR(self.optimizer, **config['optimizer']) # self.optimizer = ScheduledOptim( # Adam(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09), # #config['transformer']['d_model'], # 512, # **config['optimizer']) self.criterion = LabelSmoothingLoss(len(self.vocab), padding_idx=self.vocab.pad, smoothing=0.1) transforms = ImgAugTransform() self.train_gen = self.data_gen('train_{}'.format(self.dataset_name), self.data_root, self.train_annotation, transform=transforms) if self.valid_annotation: self.valid_gen = self.data_gen( 'valid_{}'.format(self.dataset_name), self.data_root, self.valid_annotation) self.train_losses = []
def build(self): self._sp_sch = OneCycleLR(self.sp_optim, max_lr=self.max_lr, total_steps=self.total_steps) self._disc_sch = OneCycleLR(self.disc_optim, max_lr=self.max_lr, total_steps=self.total_steps) return self
def __call__( self, net: nn.Module, train_iter: DataLoader, validation_iter: Optional[DataLoader] = None, ) -> None: wandb.watch(net, log="all", log_freq=self.num_batches_per_epoch) optimizer = Adam(net.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay) lr_scheduler = OneCycleLR( optimizer, max_lr=self.maximum_learning_rate, steps_per_epoch=self.num_batches_per_epoch, epochs=self.epochs, ) for epoch_no in range(self.epochs): # mark epoch start time tic = time.time() avg_epoch_loss = 0.0 with tqdm(train_iter) as it: for batch_no, data_entry in enumerate(it, start=1): optimizer.zero_grad() inputs = [v.to(self.device) for v in data_entry.values()] output = net(*inputs) if isinstance(output, (list, tuple)): loss = output[0] else: loss = output avg_epoch_loss += loss.item() it.set_postfix( ordered_dict={ "avg_epoch_loss": avg_epoch_loss / batch_no, "epoch": epoch_no, }, refresh=False, ) wandb.log({"loss": loss.item()}) loss.backward() if self.clip_gradient is not None: nn.utils.clip_grad_norm_(net.parameters(), self.clip_gradient) optimizer.step() lr_scheduler.step() if self.num_batches_per_epoch == batch_no: break # mark epoch end time and log time cost of current epoch toc = time.time()
def fit(self, learning_rate: Tuple[float, float]): # Capture learning errors self.train_val_error = {"train": [], "validation": [], "lr": []} self._init_model( model=self.model_, optimizer=self.optimizer_, criterion=self.criterion_ ) # Setup one cycle policy scheduler = OneCycleLR( optimizer=self.optimizer, max_lr=learning_rate, steps_per_epoch=len(self.train_loader), epochs=self.n_epochs, anneal_strategy="cos", ) # Iterate over epochs for epoch in range(self.n_epochs): # Training set self.model.train() train_loss = 0 for batch_num, samples in enumerate(self.train_loader): # Forward pass, get loss loss = self._forward_pass(samples=samples) train_loss += loss.item() # Zero gradients, perform a backward pass, and update the weights. self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Update scheduler self.train_val_error["lr"].append(scheduler.get_lr()[0]) # One cycle scheduler must be called per batch # https://pytorch.org/docs/stable/optim.html#torch.optim.lr_scheduler.OneCycleLR scheduler.step() # Append train loss per current epoch train_err = train_loss / batch_num self.train_val_error["train"].append(train_err) # Validation set self.model.eval() validation_loss = 0 for batch_num, samples in enumerate(self.valid_loader): # Forward pass, get loss loss = self._forward_pass(samples=samples) validation_loss += loss.item() # Append validation loss per current epoch val_err = validation_loss / batch_num self.train_val_error["validation"].append(val_err) return pd.DataFrame(data={ 'Train error' : self.train_val_error['train'], 'Validation error': self.train_val_error['validation'] })
def train(model, device, train_loader, optimizer, epoch): model.train() pbar = tqdm(train_loader) correct = 0 processed = 0 lambda_l1 = 0.01 optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) scheduler = OneCycleLR(optimizer, max_lr=0.020, epochs=20, steps_per_epoch=len(train_loader)) for batch_idx, (data, target) in enumerate(pbar): # get samples data, target = data.to(device), target.to(device) # Init optimizer.zero_grad() # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly. # Predict y_pred = model(data) # Calculate loss loss = F.nll_loss(y_pred, target) train_losses.append(loss) l1 = 0 for p in model.parameters(): l1 += p.abs().sum() #print("l1 at 1st epoch: ", l1) loss = loss + lambda_l1 * l1 # Backpropagation loss.backward() optimizer.step() scheduler.step() # Update pbar-tqdm pred = y_pred.argmax( dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() processed += len(data) pbar.set_description( desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}' ) train_acc.append(100 * correct / processed) return train_losses, train_acc
def fit(self, epochs, print_each_img, use_cycle=False): torch.cuda.empty_cache() self.train_losses = [] self.valid_losses = [] self.train_scores = [] self.valid_scores = [] self.scheduler = OneCycleLR(self.tmp_optimizer, self.max_lr, epochs=epochs, steps_per_epoch=1, div_factor=25.0, final_div_factor=100) for epoch in range(epochs): self.scheduler.step() lr = self.tmp_optimizer.param_groups[0]['lr'] self.lrs.append(lr) del self.tmp_optimizer, self.scheduler gc.collect() for epoch in range(epochs): self.model.train() total_loss = 0 total_score = 0 print('epoch: ' + str(epoch)) if use_cycle: lr = self.lrs[epoch] self.optimizer.param_groups[0]['lr'] = lr else: lr = self.lr print(lr) for index, batch in tqdm(enumerate(self.train_loader), total=len(self.train_loader)): sample_img, sample_mask = batch sample_img = sample_img.to(self.device) sample_mask = sample_mask.to(self.device) predicted_mask = self.model(sample_img) loss = self.loss_function(predicted_mask, sample_mask) # score = self.metrics(predicted_mask,sample_mask) with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() self.optimizer.step() self.optimizer.zero_grad() total_loss += loss.item() # total_score += score.item() if print_each_img: print('batch loss: ' + str(loss.item())) del batch, sample_img, sample_mask, predicted_mask, loss, scaled_loss gc.collect() torch.cuda.empty_cache() print('total_loss: ' + str(total_loss / len(self.train_loader))) self.train_losses.append(total_loss / len(self.train_loader)) # self.train_scores.append(total_score/len(self.train_set)) val_score = self.val() self.save_checkpoint(self.name, epoch, val_score)
def __init__(self, cfg): self.device = cfg["device"] self.model = Models().get_model(cfg["network"]) # cfg.network self.model.to(self.device) params = [p for p in self.model.parameters() if p.requires_grad] self.optimizer = AdamW(params, lr=0.00001) self.lr_scheduler = OneCycleLR(self.optimizer, max_lr=1e-4, epochs=cfg["nepochs"], steps_per_epoch=169, # len(dataloader)/accumulations div_factor=25, # for initial lr, default: 25 final_div_factor=1e3, # for final lr, default: 1e4 )
def start_train(self, epochs=10, device=device): optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.9) # scheduler = StepLR(optimizer, step_size=6, gamma=0.1) scheduler = OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(self.train_loader), epochs=epochs) for epoch in range(epochs): # Print Learning Rate print("EPOCH:", epoch + 1, 'LR:', scheduler.get_lr()) self.train_epoch(optimizer, scheduler) self.test_epoch()
def create_scheduler(self, num_training_steps: int): """ Setup the optimizer and the learning rate scheduler. This overrides super in a way that just customizes the lr scheduler while the optimizer remains the default. """ # Unpack arguments from trainer_mixin_args mixin_args = self.args.trainer_mixin_args max_lr = mixin_args.get("max_lr", 1e-2) pct_start = mixin_args.get("pct_start", 0.3) anneal_strategy = mixin_args.get("anneal_strategy", "linear") cycle_momentum = mixin_args.get("cycle_momentum", True) base_momentum = mixin_args.get("base_momentum", 0.85) max_momentum = mixin_args.get("max_momentum", 0.95) div_factor = mixin_args.get("div_factor", 25) final_div_factor = mixin_args.get("final_div_factor", 1e4) last_epoch = mixin_args.get("last_epoch", -1) # Now define the lr scheduler, given the optimizer. self.lr_scheduler = OneCycleLR( self.optimizer, total_steps=num_training_steps, max_lr=max_lr, pct_start=pct_start, anneal_strategy=anneal_strategy, cycle_momentum=cycle_momentum, base_momentum=base_momentum, max_momentum=max_momentum, div_factor=div_factor, final_div_factor=final_div_factor, last_epoch=last_epoch, )
def configure_optimizers(self): optimizer = AdamW( [p for p in self.parameters() if p.requires_grad], lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon, ) # scheduler = { # 'scheduler': ReduceLROnPlateau(optimizer), # 'monitor': 'val_loss', # 'interval': 'epoch', # 'frequency': 1 # } # steps_per_epoch = math.ceil(5217 * 8 * 3 / (self.hparams.gpus * self.hparams.batch_size)) # steps_per_epoch = math.ceil(10990 * 6 * 3 / (self.hparams.gpus * self.hparams.batch_size)) steps_per_epoch = math.ceil(197822 / (self.hparams.gpus * self.hparams.batch_size)) scheduler = { # 'scheduler': OneCycleLR(optimizer, max_lr=self.hparams.learning_rate, # epochs=self.hparams.max_epochs, steps_per_epoch=6956), # 'scheduler': OneCycleLR(optimizer, max_lr=self.hparams.learning_rate, # epochs=self.hparams.max_epochs, steps_per_epoch=8348), # 'scheduler': OneCycleLR(optimizer, max_lr=self.hparams.learning_rate, # epochs=self.hparams.max_epochs, steps_per_epoch=5217), 'scheduler': OneCycleLR(optimizer, max_lr=self.hparams.learning_rate, epochs=self.hparams.max_epochs, steps_per_epoch=steps_per_epoch), 'interval': 'step', 'frequency': 1 } return [optimizer], [scheduler]
def one_cycle_lr( optimizer, max_lr, epochs, steps_per_epoch, pct_start=0.5, div_factor=10.0, final_div_factor=10000 ): """Create One Cycle Policy for Learning Rate. Args: optimizer (torch.optim): Model optimizer. max_lr (float): Upper learning rate boundary in the cycle. epochs (int): The number of epochs to train for. This is used along with steps_per_epoch in order to infer the total number of steps in the cycle. steps_per_epoch (int): The number of steps per epoch to train for. This is used along with epochs in order to infer the total number of steps in the cycle. pct_start (:obj:`float`, optional): The percentage of the cycle (in number of steps) spent increasing the learning rate. (default: 0.5) div_factor (:obj:`float`, optional): Determines the initial learning rate via initial_lr = max_lr / div_factor. (default: 10.0) final_div_factor (:obj:`float`, optional): Determines the minimum learning rate via min_lr = initial_lr / final_div_factor. (default: 1e4) Returns: OneCycleLR instance. """ return OneCycleLR( optimizer, max_lr, epochs=epochs, steps_per_epoch=steps_per_epoch, pct_start=pct_start, div_factor=div_factor, final_div_factor=final_div_factor )
def configure_optimizers(self): opt_cfg = self.cfg['optimizer'] lr = float(self.cfg['optimizer']['lr']) if opt_cfg['name'] == 'AdamW': optimizer = AdamW(self.model.parameters(), lr=lr, ) elif opt_cfg['name'] == 'Adam_GCC': optimizer = Adam_GCC(self.model.parameters(), lr=lr) elif opt_cfg['name'] == 'AdamW_GCC2': optimizer = AdamW_GCC2(self.model.parameters(), lr=lr) if self.cfg['scheduler']['type'] == 'none': sched = None elif self.cfg['scheduler']['type'] == 'CosineAnnealingWarmRestarts': T_mult = self.cfg['scheduler']['T_mult'] T_0 = self.cfg['scheduler']['T_0'] eta_min = float(self.cfg['scheduler']['eta_min']) sched = CosineAnnealingWarmRestarts(optimizer, T_0=T_0, T_mult=T_mult, eta_min=eta_min, last_epoch=-1) elif self.cfg['scheduler']['type'] == 'OneCycleLR': max_lr = float(self.cfg['scheduler']['max_lr']) steps_per_epoch = cfg['scheduler']['steps_per_epoch'] epochs = cfg['scheduler']['epochs'] sched = OneCycleLR(optimizer, max_lr=max_lr, steps_per_epoch=steps_per_epoch, epochs=epochs) else: raise Exception('scheduler {} not supported'.format(self.cfg['scheduler']['type'])) if sched is not None: sched = {'scheduler': sched, 'name': format(self.cfg['scheduler']['type'])} if sched is not None: return [optimizer], [sched] else: return optimizer return optimizer
def _reset_scheduler(self, lr, num_epochs, sched_type='onecycle'): if sched_type == 'onecycle': self.scheduler = OneCycleLR(self.optimizer, lr, num_epochs * len(self.train_loader)) elif sched_type == 'cosine': self.scheduler = CosineAnnealingLR(self.optimizer, num_epochs * len(self.train_loader), eta_min=lr / 25e4) else: raise ValueError(f"The following scheduler type is not supported: {sched_type}")
def main(): device = torch.device("cuda" if not hyperparams.hyperparameter_defaults['no_cuda'] else "cpu") hyperparams.hyperparameter_defaults['run_name'] = fileutils.rand_run_name() print("Initializing datasets and dataloaders") train_path = "/content/t2/train" test_path="/content/t2/val" #model_new = basemodelclass.ResNet18(hyperparams.hyperparameter_defaults['dropout'], num_classes=200) trainloader, testloader = dataloader.get_imagenet_loaders(train_path, test_path, transform_train=None, transform_test=None) model_new = basemodelclass.S11ResNet() wandb_run_init = wandb.init(config=hyperparams.hyperparameter_defaults, project=hyperparams.hyperparameter_defaults['project']) wandb.watch_called = False config = wandb.config print(config) wandb.watch(model_new, log="all") #trainloader, testloader = dataloader.get_train_test_dataloader_cifar10() optimizer=optim.SGD(model_new.parameters(), lr=config.lr,momentum=config.momentum, weight_decay=config.weight_decay) #optim.SGD(model.parameters(), lr=0.001, momentum=0.9) criterion=nn.CrossEntropyLoss #scheduler = None cycle_momentum = True if config.cycle_momentum == "True" else False print("Momentum cycling set to {}".format(cycle_momentum)) if (config.lr_policy == "clr"): scheduler = CyclicLR(optimizer, base_lr=config.lr*0.01, max_lr=config.lr, mode='triangular', gamma=1., cycle_momentum=True, step_size_up=256)#, scale_fn='triangular',step_size_up=200) else: scheduler = OneCycleLR(optimizer, config.ocp_max_lr, epochs=config.epochs, cycle_momentum=cycle_momentum, steps_per_epoch=len(trainloader), base_momentum=config.momentum, max_momentum=0.95, pct_start=config.split_pct, anneal_strategy=config.anneal_strategy, div_factor=config.div_factor, final_div_factor=config.final_div_factor ) final_model_path = traintest.execute_model(model_new, hyperparams.hyperparameter_defaults, trainloader, testloader, device, dataloader.classes, wandb=wandb, optimizer_in=optimizer, scheduler=scheduler, prev_saved_model=saved_model_path, criterion=criterion, save_best=True, lars_mode=False, batch_step=True)
def build_lr_scheduler( cfg, optimizer: torch.optim.Optimizer ) -> torch.optim.lr_scheduler._LRScheduler: """ Build a LR scheduler from config. """ name = cfg.NAME if name == "WarmupMultiStepLR": return WarmupMultiStepLR( optimizer, cfg.STEPS, cfg.GAMMA, warmup_factor=cfg.WARMUP_FACTOR, warmup_iters=cfg.WARMUP_ITERS, warmup_method=cfg.WARMUP_METHOD, ) elif name == "WarmupCosineLR": return WarmupCosineLR( optimizer, cfg.MAX_ITER, warmup_factor=cfg.WARMUP_FACTOR, warmup_iters=cfg.WARMUP_ITERS, warmup_method=cfg.WARMUP_METHOD, ) elif name == "OneCycleLR": return OneCycleLR(optimizer, cfg.MAX_LR, total_steps=cfg.MAX_ITER, pct_start=cfg.PCT_START, base_momentum=cfg.BASE_MOM, max_momentum=cfg.MAX_MOM, div_factor=cfg.DIV_FACTOR) else: raise ValueError("Unknown LR scheduler: {}".format(name))
def configure_optimizers(self): lr = float(self.cfg['train_params']['lr']) if self.cfg['optimizer'] == 'adam': optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) elif self.cfg['optimizer'] == 'adamw': optimizer = torch.optim.AdamW(self.model.parameters(), lr=lr) elif self.cfg['optimizer'] == 'adamw_gcc2': optimizer = AdamW_GCC2(self.model.parameters(), lr=lr) elif self.cfg['optimizer'] == 'radam': optimizer = RAdam(self.model.parameters(), lr=lr) else: raise Exception('optimizer {} not supported'.format(self.cfg['optimizer'])) self.opt = optimizer if self.cfg['scheduler']['type'] == 'CosineAnnealingWarmRestarts': T_mult = self.cfg['scheduler']['T_mult'] T_0 = self.cfg['scheduler']['T_0'] eta_min = float(self.cfg['scheduler']['eta_min']) sched = CosineAnnealingWarmRestarts(optimizer, T_0=T_0, T_mult=T_mult, eta_min=eta_min, last_epoch=-1) elif self.cfg['scheduler']['type'] == 'OneCycleLR': max_lr = float(self.cfg['scheduler']['max_lr']) steps_per_epoch = cfg['scheduler']['steps_per_epoch'] epochs = cfg['scheduler']['epochs'] sched = OneCycleLR(optimizer, max_lr=max_lr, steps_per_epoch=steps_per_epoch, epochs=epochs) else: raise Exception('scheduler {} not supported') sched = {'scheduler': sched, 'name': 'adam+{}'.format(self.cfg['scheduler']['type'])} return [optimizer], [sched]
def fit(self, train_dl, valid_dl, epochs, lr, metrics=None, optimizer=None, scheduler=None): device = 'cuda' if torch.cuda.is_available() else 'cpu' self.model.to(device) optimizer = optimizer or Adam(self.model.parameters(), lr) if scheduler != False: scheduler = scheduler or OneCycleLR(optimizer, lr, epochs*len(train_dl)) else: scheduler = None self.train_stats = TrainTracker(metrics, validate=(valid_dl is not None)) bar = master_bar(range(epochs)) bar.write(self.train_stats.metrics_names, table=True) for epoch in bar: self.model.train() for batch in progress_bar(train_dl, parent=bar): batch = batch_to_device(batch, device) loss = self._train_batch(batch, optimizer, scheduler) loss.backward() optimizer.step() optimizer.zero_grad() if scheduler: scheduler.step() self.train_stats.update_train_loss(loss) valid_outputs = [] if valid_dl: self.model.eval() for batch in progress_bar(valid_dl, parent=bar): batch = batch_to_device(batch, device) output = self._valid_batch(batch) valid_outputs.append(output) self.train_stats.log_epoch_results(valid_outputs) bar.write(self.train_stats.get_metrics_values(), table=True)
def experiments(train_loader, test_loader, norm_type, l1_factor, l2_factor, dropout, epochs): train_losses = [] test_losses = [] train_accuracy = [] test_accuracy = [] use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") model = m.Net(norm_type, dropout).to(device) optimizer = optim.SGD(model.parameters(), lr=0.015, momentum=0.7, weight_decay=l2_factor) scheduler = OneCycleLR(optimizer, max_lr=0.015, epochs=epochs, steps_per_epoch=len(train_loader)) epochs = epochs for epoch in range(1, epochs + 1): print(f'Epoch {epoch}:') trn.train(model, device, train_loader, optimizer, epoch, train_accuracy, train_losses, l1_factor, scheduler) tst.test(model, device, test_loader, test_accuracy, test_losses) return (train_accuracy, train_losses, test_accuracy, test_losses), model
def create_optimizer_and_scheduler(self, num_training_steps: int): """ Setup the optimizer and the learning rate scheduler. This overrides super in a way that just customizes the lr scheduler while the optimizer remains the default. """ # Set lr scheduler to dummy variable so it's not created in the call to super. self.lr_scheduler = ... # Create just the optimizer. super().create_optimizer_and_scheduler(num_training_steps) # Now define the lr scheduler, given the optimizer. self.lr_scheduler = OneCycleLR( self.optimizer, total_steps=num_training_steps, max_lr=self.max_lr, pct_start=self.pct_start, anneal_strategy=self.anneal_strategy, cycle_momentum=self.cycle_momentum, base_momentum=self.base_momentum, max_momentum=self.max_momentum, div_factor=self.div_factor, final_div_factor=self.final_div_factor, last_epoch=self.last_epoch, )
def configure_optimizers( self ) -> Optional[Union[Optimizer, Sequence[Optimizer], Dict, Sequence[Dict], Tuple[List, List]]]: optimizer = AdamW(self.parameters(), lr=self.lr, weight_decay=self.wd) if self.scheduler is None: return optimizer elif self.scheduler == 'plateau': return { 'optimizer': optimizer, 'lr_scheduler': ReduceLROnPlateau(optimizer, factor=0.1, patience=25, eps=1e-4, cooldown=0, min_lr=2e-7, verbose=True), 'monitor': 'val_loss', } elif self.scheduler == '1cycle': return { 'optimizer': optimizer, 'lr_scheduler': OneCycleLR(optimizer, max_lr=10**2 * self.lr, total_steps=self.total_steps) }
def configure_optimizers(self): VGAE_optimizer = AdamW(self.module.VGAE.parameters(), lr=self.lr) VGAE_lr_scheduler = { 'scheduler': OneCycleLR( VGAE_optimizer, max_lr=10 * self.lr, total_steps=self.numsteps, anneal_strategy="cos", final_div_factor=10, ), 'name': 'learning_rate', 'interval': 'step', 'frequency': 1 } disc_optimizer = AdamW(self.module.discriminator.parameters(), lr=self.lr) disc_lr_scheduler = { 'scheduler': OneCycleLR( disc_optimizer, max_lr=10 * self.lr, total_steps=self.numsteps, anneal_strategy="cos", final_div_factor=10, ), 'name': 'learning_rate', 'interval': 'step', 'frequency': 1 } return [{ 'optimizer': VGAE_optimizer, 'frequency': 10, 'lr_scheduler': VGAE_lr_scheduler }, { 'optimizer': disc_optimizer, 'frequency': 10, 'lr_scheduler': disc_lr_scheduler }]
def model_load(trainloader, n_f, n_r_f, n_fr_f, msg_dim, hidden, aggr, init_lr, args, data_params): n = data_params['n'] dim = data_params['dim'] * 2 sim = data_params['sim'] sparsity_mode = args.connection_value sparsity_prior = args.sparsity_prior total_epochs = args.epochs ogn = OGN(n_f, n_r_f, n_fr_f, msg_dim, dim, hidden=hidden, edge_index=get_edge_index(n, sim), aggr=aggr, sparsity_mode=sparsity_mode, sparsity_prior=sparsity_prior, test=args.test).cuda() rogn = ROGN(n_f, n_r_f, n_fr_f, msg_dim, dim, sparsity_mode, hidden=hidden, edge_index=get_edge_index(n, sim), aggr=aggr).cuda() opt = torch.optim.Adam(ogn.parameters(), lr=init_lr, weight_decay=1e-8) ropt = torch.optim.Adam(rogn.parameters(), lr=init_lr, weight_decay=1e-8) batch_per_epoch = len(trainloader) sched = OneCycleLR(opt, max_lr=init_lr, steps_per_epoch=batch_per_epoch, epochs=total_epochs, final_div_factor=1e5) rsched = OneCycleLR(ropt, max_lr=init_lr, steps_per_epoch=batch_per_epoch, epochs=total_epochs, final_div_factor=1e5) return ogn, rogn, opt, ropt, sched, rsched, total_epochs, batch_per_epoch
def load_optimizer(model, cfg, state, steps_per_epoch=None): resuming = cfg['resume'].get('path', False) is not False resetting_epoch = cfg['resume'].get('epoch', 0) == 1 and resuming resetting_optimizer = cfg['resume'].get('reset_optimizer', False) is not False # Create optimizer lr = cfg['training']['lr'] wd = cfg['training']['weight_decay'] opt = cfg['training']['optimizer'] if opt == 'adam': optimizer = Adam((p for p in model.parameters() if p.requires_grad), lr=lr, weight_decay=wd) elif opt == 'adamw': optimizer = AdamW((p for p in model.parameters() if p.requires_grad), lr=lr, weight_decay=wd) else: raise ValueError(f"Unknown optimizer {opt}") # Load optimizer weights if in state dict opt_path = state.get('optimizer', None) if opt_path: if resetting_optimizer: print(f"Resetting optimizer state") else: optimizer.load_state_dict(opt_path) # Load scheduler if in state dict AND if we're not resetting the epoch or optimizer scheduler = state.get('scheduler', None) sched = cfg['training'].get('scheduler', None) if scheduler and not resetting_epoch and not resetting_optimizer: print(f"Loaded scheduler from state dict") return optimizer, scheduler # Otherwise create scheduler if needed elif sched: # If we are resuming but not resetting the epoch to 1, user should be warned we aren't continuing the scheduler if resuming and not resetting_epoch and not resetting_optimizer: print( f"WARNING: Resuming training from a checkpoint without resetting the epochs or optimzier, and yet no" f"scheduler found - creating new scheduler") if sched == 'one_cycle': assert steps_per_epoch scheduler = OneCycleLR(optimizer, max_lr=cfg['training']['lr'], steps_per_epoch=steps_per_epoch, epochs=cfg['training']['n_epochs']) print(f"Using one-cycle LR") else: scheduler = None return optimizer, scheduler
def get_scheduler(opts, optimizer): return { 'OneCycleLR': lambda: OneCycleLR( optimizer, max_lr=opts.lr, total_steps=opts.epochs, anneal_strategy='linear'), 'CosineAnnealingLR': lambda: CosineAnnealingLR( optimizer, T_max=opts.epochs, eta_min=0, last_epoch=opts.last_epoch), 'CosineAnnealingWarmRestarts': lambda: CosineAnnealingWarmRestarts( optimizer, T_0=10, T_mult=2, eta_min=0, last_epoch=opts.last_epoch) }[opts.scheduler_name]()
def get_scheduler(opts, optimizer): return { 'OneCycleLR': lambda: LRScheduler(OneCycleLR( optimizer, max_lr=opts.lr, total_steps=opts.total_steps)), 'CosineAnnealingLR': lambda: LRScheduler(CosineAnnealingLR( optimizer, T_max=opts.total_steps, eta_min=0)), 'CosineAnnealingWarmRestarts': lambda: LRScheduler(CosineAnnealingWarmRestarts( optimizer, T_0=10, T_mult=2, eta_min=0)), }[opts.scheduler_name]()
def get_scheduler(scheduler_name: str, optimizer, lr, num_epochs, batches_in_epoch=None): if scheduler_name is None or scheduler_name.lower() == "none": return None if scheduler_name.lower() == "poly": return PolyLR(optimizer, num_epochs, gamma=0.9) if scheduler_name.lower() == "cos": return CosineAnnealingLR(optimizer, num_epochs, eta_min=1e-5) if scheduler_name.lower() == "cosr": return CosineAnnealingWarmRestarts(optimizer, T_0=max(2, num_epochs // 4), eta_min=1e-5) if scheduler_name.lower() in {"1cycle", "one_cycle"}: return OneCycleLR(optimizer, lr_range=(lr, 1e-6, 1e-5), num_steps=batches_in_epoch, warmup_fraction=0.05, decay_fraction=0.1) if scheduler_name.lower() == "exp": return ExponentialLR(optimizer, gamma=0.95) if scheduler_name.lower() == "clr": return CyclicLR( optimizer, base_lr=1e-6, max_lr=lr, step_size_up=batches_in_epoch // 4, # mode='exp_range', gamma=0.99, ) if scheduler_name.lower() == "multistep": return MultiStepLR(optimizer, milestones=[ int(num_epochs * 0.5), int(num_epochs * 0.7), int(num_epochs * 0.9) ], gamma=0.3) if scheduler_name.lower() == "simple": return MultiStepLR( optimizer, milestones=[int(num_epochs * 0.4), int(num_epochs * 0.7)], gamma=0.4) raise KeyError(scheduler_name)
def get_one_cycle(optimizer, start_lr, max_lr, final_lr, num_epochs, steps_per_epoch): start_div_factor = max_lr / start_lr final_div_factor = start_lr / final_lr return OneCycleLR(optimizer, epochs=num_epochs, steps_per_epoch=steps_per_epoch, max_lr=max_lr, div_factor=start_div_factor, final_div_factor=final_div_factor)
def get_lr_scheduler(optimizer, lr_config, **kwargs): scheduler_name = lr_config["name"] step_per_epoch = False if scheduler_name == '1cycle-yolo': def one_cycle(y1=0.0, y2=1.0, steps=100): # lambda function for sinusoidal ramp from y1 to y2 return lambda x: ( (1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 lf = one_cycle(1, 0.158, kwargs['num_epochs']) # cosine 1->hyp['lrf'] scheduler = LambdaLR(optimizer, lr_lambda=lf) step_per_epoch = True elif scheduler_name == '1cycle': scheduler = OneCycleLR(optimizer, max_lr=0.001, epochs=kwargs['num_epochs'], steps_per_epoch=int( len(kwargs["trainset"]) / kwargs["batch_size"]), pct_start=0.1, anneal_strategy='cos', final_div_factor=10**5) step_per_epoch = False elif scheduler_name == 'plateau': scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=1, verbose=False, threshold=0.0001, threshold_mode='abs', cooldown=0, min_lr=1e-8, eps=1e-08) step_per_epoch = True elif scheduler_name == 'cosine': scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=kwargs['num_epochs'], T_mult=1, eta_min=0.0001, last_epoch=-1, verbose=False) step_per_epoch = False elif scheduler_name == 'cosine2': scheduler = CosineWithRestarts(optimizer, T_max=kwargs['train_len']) step_per_epoch = False return scheduler, step_per_epoch
def __init__(self, optim_type, model, lr, momentum, max_lr, len_loader, weight_decay=0): self.optimizer = getattr(optim, optim_type)(getattr(model, 'parameters')(), lr=lr, momentum=momentum, weight_decay=weight_decay) self.scheduler = OneCycleLR(self.optimizer, max_lr=max_lr, steps_per_epoch=len_loader, epochs=50, div_factor=10, final_div_factor=1, pct_start=10 / 50)
def init_train(self, con_weight: float = 1.0): test_img = self.get_test_image() meter = AverageMeter("Loss") self.writer.flush() lr_scheduler = OneCycleLR(self.optimizer_G, max_lr=0.9999, steps_per_epoch=len(self.dataloader), epochs=self.init_train_epoch) for g in self.optimizer_G.param_groups: g['lr'] = self.init_lr for epoch in tqdm(range(self.init_train_epoch)): meter.reset() for i, (style, smooth, train) in enumerate(self.dataloader, 0): # train = transform(test_img).unsqueeze(0) self.G.zero_grad(set_to_none=self.grad_set_to_none) train = train.to(self.device) generator_output = self.G(train) # content_loss = loss.reconstruction_loss(generator_output, train) * con_weight content_loss = self.loss.content_loss(generator_output, train) * con_weight # content_loss = F.mse_loss(train, generator_output) * con_weight content_loss.backward() self.optimizer_G.step() lr_scheduler.step() meter.update(content_loss.detach()) self.writer.add_scalar(f"Loss : {self.init_time}", meter.sum.item(), epoch) self.write_weights(epoch + 1, write_D=False) self.eval_image(epoch, f'{self.init_time} reconstructed img', test_img) for g in self.optimizer_G.param_groups: g['lr'] = self.G_lr