def __init__(self, model, data_tuple_dict, config): self.model = model self.criterion = nn.BCEWithLogitsLoss() base_optim = Lamb(params=self.model.parameters(), lr=1e-5, weight_decay=1.2e-6, min_trust=0.25) self.optim = Lookahead(base_optimizer=base_optim, k=5, alpha=0.8) self.lr_scheduler = CyclicLR(self.optim, base_lr=1e-5, max_lr=5e-5, cycle_momentum=False) self.train_tuple = data_tuple_dict["train_tuple"] self.valid_tuple = data_tuple_dict["valid_tuple"] self.test_tuple = data_tuple_dict["test_tuple"] self.device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")) self.output = home + "/snap/" os.makedirs(self.output, exist_ok=True) self.model.to(self.device) self.adaptive = config["adaptive_enable"] self.measure_flops = config["measure_flops"] if self.measure_flops: from thop import clever_format, profile self.sparse = sparse = config["sparse_enable"] if config["load_model"] == None: load_lxmert_qa(load_lxmert_qa_path, self.model, label2ans=self.train_tuple[0].label2ans)
def range_test(self, trainloader, testloader, start_lr, end_lr, epochs): step_size_up = epochs * len( trainloader) # stepsize for LR cycle policy # CyclicLR- use one cycle policy: MAX-LR at end of last epoch, Triangular policy lr_scheduler = CyclicLR(self.optimizer, base_lr=start_lr, max_lr=end_lr, step_size_up=step_size_up, last_epoch=-1) print("Running LR Range test") for epoch in range(1, epochs + 1): cur_lr1 = self.optimizer.state_dict()["param_groups"][0]["lr"] train_acc, train_loss = self.train(trainloader, lr_scheduler=lr_scheduler) test_acc, test_loss = self.test(testloader) cur_lr2 = self.optimizer.state_dict()["param_groups"][0]["lr"] # store the epoch train and test results self.addToHistory(train_acc, train_loss, test_acc, test_loss, cur_lr2) print("Epoch={} Accuracy={} lr={} ==> {}".format( epoch, test_acc, cur_lr1, cur_lr2))
def find_bounds_clr(model, loader, optimizer, criterion, device, dtype, min_lr=8e-6, max_lr=8e-5, step_size=2000, mode='triangular', save_path='.'): model.train() correct1, correct5 = 0, 0 scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr=max_lr, step_size_up=step_size, mode=mode) epoch_count = step_size // len(loader) # Assuming step_size is multiple of batch per epoch accuracy = [] for _ in trange(epoch_count): for batch_idx, (data, target) in enumerate(tqdm(loader)): if scheduler is not None: scheduler.step() data, target = data.to(device=device, dtype=dtype), target.to(device=device) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() corr = correct(output, target) accuracy.append(corr[0] / data.shape[0]) lrs = np.linspace(min_lr, max_lr, step_size) plt.plot(lrs, accuracy) plt.show() plt.savefig(os.path.join(save_path, 'find_bounds_clr.pdf')) np.save(os.path.join(save_path, 'acc.npy'), accuracy) return
def configure_optimizers(self): for p in self.aligner.lm.parameters(): p.requires_grad = False grad_params = list( filter(lambda p: p.requires_grad, self.aligner.parameters())) optimizer = torch.optim.AdamW(grad_params, lr=self.hparams.learning_rate) if self.hparams.scheduler == 'cosine_restarts': scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=1, T_mult=2) elif self.hparams.scheduler == 'cosine': scheduler = CosineAnnealingLR(optimizer, T_max=self.hparams.epochs) elif self.hparams.scheduler == 'triangular': base_lr = 1e-8 steps = int(np.log2(self.hparams.learning_rate / base_lr)) steps = self.hparams.epochs // steps scheduler = CyclicLR(optimizer, base_lr, max_lr=self.hparams.learning_rate, step_size_up=steps, mode='triangular2', cycle_momentum=False) elif self.hparams.scheduler == 'steplr': m = 1e-6 # minimum learning rate steps = int(np.log2(self.hparams.learning_rate / m)) steps = self.hparams.epochs // steps scheduler = StepLR(optimizer, step_size=steps, gamma=0.5) elif self.hparams.scheduler == 'none': return [optimizer] else: s = self.hparams.scheduler raise ValueError(f'`{s}` scheduler is not implemented.') return [optimizer], [scheduler]
def main(): args = parse_args() train_loader, val_loader = get_loaders( 'catalogsearch_query.csv') model = LTRSimpleModel(num_features=len(num_fts)) optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = CyclicLR(optimizer, base_lr=args.lr, max_lr=1e-2, step_size_up=int(len(train_loader.dataset)/2), step_size_down=int(len(train_loader.dataset)/2), cycle_momentum=False) print(scheduler) train(model=model, train_loader=train_loader, val_loader=val_loader, optimizer=optimizer, epochs=args.epochs, scheduler=scheduler, problem_type='regression')
def cyclic_lr_base(parameters, mode="triangular"): # pick defaults for "min_lr", "max_lr", "max_lr_multiplier" if not present in parameters if not ("min_lr" in parameters["scheduler"]): parameters["scheduler"]["min_lr"] = parameters["learning_rate"] * 0.001 if not ("max_lr" in parameters["scheduler"]): parameters["scheduler"]["max_lr"] = parameters["learning_rate"] if not ("gamma" in parameters["scheduler"]): parameters["scheduler"]["gamma"] = 0.1 if not ("scale_mode" in parameters["scheduler"]): parameters["scheduler"]["scale_mode"] = "cycle" if not ("cycle_momentum" in parameters["scheduler"]): parameters["scheduler"]["cycle_momentum"] = False if not ("base_momentum" in parameters["scheduler"]): parameters["scheduler"]["base_momentum"] = 0.8 if not ("max_momentum" in parameters["scheduler"]): parameters["scheduler"]["max_momentum"] = 0.9 return CyclicLR( parameters["optimizer_object"], parameters["learning_rate"] * 0.001, parameters["learning_rate"], step_size_up=parameters["scheduler"]["step_size"], step_size_down=None, mode=mode, gamma=1.0, scale_fn=None, scale_mode=parameters["scheduler"]["scale_mode"], cycle_momentum=parameters["scheduler"]["cycle_momentum"], base_momentum=parameters["scheduler"]["base_momentum"], max_momentum=parameters["scheduler"]["max_momentum"], )
def get_scheduler(self, optimizer) -> object: if "plateau" == self.hparams.scheduler: return ReduceLROnPlateau(optimizer) elif "plateau+warmup" == self.hparams.scheduler: plateau = ReduceLROnPlateau(optimizer) return GradualWarmupScheduler( optimizer, multiplier=self.hparams.warmup_factor, total_epoch=self.hparams.warmup_epochs, after_scheduler=plateau, ) elif "cyclic" == self.hparams.scheduler: return CyclicLR( optimizer, base_lr=self.learning_rate / 100, max_lr=self.learning_rate, step_size_up=4000 / self.batch_size, ) elif "cosine" == self.hparams.scheduler: return CosineAnnealingLR(optimizer, self.hparams.max_epochs) elif "cosine+warmup" == self.hparams.scheduler: cosine = CosineAnnealingLR( optimizer, self.hparams.max_epochs - self.hparams.warmup_epochs) return GradualWarmupScheduler( optimizer, multiplier=self.hparams.warmup_factor, total_epoch=self.hparams.warmup_epochs, after_scheduler=cosine, ) else: raise NotImplementedError("Not a valid scheduler configuration.")
def main(): device = torch.device("cuda" if not hyperparams.hyperparameter_defaults['no_cuda'] else "cpu") hyperparams.hyperparameter_defaults['run_name'] = fileutils.rand_run_name() print("Initializing datasets and dataloaders") train_path = "/content/t2/train" test_path="/content/t2/val" #model_new = basemodelclass.ResNet18(hyperparams.hyperparameter_defaults['dropout'], num_classes=200) trainloader, testloader = dataloader.get_imagenet_loaders(train_path, test_path, transform_train=None, transform_test=None) model_new = basemodelclass.S11ResNet() wandb_run_init = wandb.init(config=hyperparams.hyperparameter_defaults, project=hyperparams.hyperparameter_defaults['project']) wandb.watch_called = False config = wandb.config print(config) wandb.watch(model_new, log="all") #trainloader, testloader = dataloader.get_train_test_dataloader_cifar10() optimizer=optim.SGD(model_new.parameters(), lr=config.lr,momentum=config.momentum, weight_decay=config.weight_decay) #optim.SGD(model.parameters(), lr=0.001, momentum=0.9) criterion=nn.CrossEntropyLoss #scheduler = None cycle_momentum = True if config.cycle_momentum == "True" else False print("Momentum cycling set to {}".format(cycle_momentum)) if (config.lr_policy == "clr"): scheduler = CyclicLR(optimizer, base_lr=config.lr*0.01, max_lr=config.lr, mode='triangular', gamma=1., cycle_momentum=True, step_size_up=256)#, scale_fn='triangular',step_size_up=200) else: scheduler = OneCycleLR(optimizer, config.ocp_max_lr, epochs=config.epochs, cycle_momentum=cycle_momentum, steps_per_epoch=len(trainloader), base_momentum=config.momentum, max_momentum=0.95, pct_start=config.split_pct, anneal_strategy=config.anneal_strategy, div_factor=config.div_factor, final_div_factor=config.final_div_factor ) final_model_path = traintest.execute_model(model_new, hyperparams.hyperparameter_defaults, trainloader, testloader, device, dataloader.classes, wandb=wandb, optimizer_in=optimizer, scheduler=scheduler, prev_saved_model=saved_model_path, criterion=criterion, save_best=True, lars_mode=False, batch_step=True)
def make_scheduler_from_config(optimizer, config): if 'schedule' in config: if config['schedule'] is None: return None if config['schedule'] == 'reduce_lr_on_plateau': return ReduceLROnPlateau(optimizer, factor=0.1, patience=10) elif config['schedule'] == 'cosine_annealing_warm_restarts': T_0 = get_dict_value_or_default(dict_=config, key='T_0', default_value=4) return CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=T_0) elif config['schedule'] == 'cosine_annealing': return CosineAnnealingLR(optimizer, T_max=4) elif config['schedule'] == 'exponential': return ExponentialLR(optimizer, gamma=0.99) elif config['schedule'] == 'cyclic': max_lr = get_dict_value_or_default(config, 'max_lr', 1e-1) base_lr = get_dict_value_or_default(config, 'base_lr', 1e-4) step_size_down = get_dict_value_or_default(config, 'step_size_down', 2000) mode = get_dict_value_or_default(config, 'cycle_mode', 'triangular') return CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr, step_size_down=step_size_down, mode=mode) raise Exception('check your config, config not supported') else: return ReduceLROnPlateau(optimizer, factor=0.1, patience=5)
def configure_optimizers(self): if self.hparams['opt'] == 'sgd': opt = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4) elif self.hparams['opt'] == 'adam': opt = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=5e-4) if self.hparams['sched'] == 'cyclic:': scheduler = CyclicLR(optimizer=opt, base_lr=self.hparams.lr / 500, max_lr=self.hparams.lr / 10) elif self.hparams['sched'] == 'cosine_annealing_warm_restarts': scheduler = CosineAnnealingWarmRestarts( optimizer=opt, T_0=2000, eta_min=self.hparams.lr / 1000.0, T_mult=1, ) elif self.hparams['sched'] == 'exp': scheduler_steplr = ExponentialLR(opt, gamma=0.95) scheduler = GradualWarmupScheduler( opt, multiplier=1, total_epoch=5, after_scheduler=scheduler_steplr) self.sched = scheduler self.opt = opt return opt
def setup_model(args): """Returns a tuple of the model, criterion, optimizer and lr_scheduler""" print("Building model") model = C3D(inchannels=2, outchannels=64) criterion = nn.MSELoss(size_average=True) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) # See https://arxiv.org/abs/1608.03983 if args.schedule.lower() == 'warm': lr_scheduler = CosineAnnealingLR(optimizer, T_max=(args.nEpochs // 10) + 1) if args.schedule.lower() == 'cyclical': lr_scheduler = CyclicLR(optimizer, max_lr=args.lr, mode='exp_range') if args.schedule.lower() == 'step': lr_scheduler = ReduceLROnPlateau(optimizer, 'min', factor=args.lr_factor, patience=4, threshold=1e-3, threshold_mode='rel', verbose=True) return (model, criterion, optimizer, lr_scheduler)
def get_warmup_scheduler(optimizer, min_lr, max_lr, warmup_steps): if float(min_lr) == 0.0: min_lr = 1e-12 scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr=max_lr, step_size_up=warmup_steps) return scheduler
def test_cycle_lr_triangular_mode_one_lr(self): target = [1, 2, 3, 4, 5, 4, 3, 2, 1, 2, 3] targets = [target, target] scheduler = CyclicLR(self.opt, base_lr=1, max_lr=5, step_size_up=4, mode='triangular') self._test_cycle_lr(scheduler, targets, len(target))
def get_scheduler(scheduler_name: str, optimizer, lr, num_epochs, batches_in_epoch=None): if scheduler_name is None or scheduler_name.lower() == "none": return None if scheduler_name.lower() == "poly": return PolyLR(optimizer, num_epochs, gamma=0.9) if scheduler_name.lower() == "cos": return CosineAnnealingLR(optimizer, num_epochs, eta_min=1e-5) if scheduler_name.lower() == "cosr": return CosineAnnealingWarmRestarts(optimizer, T_0=max(2, num_epochs // 4), eta_min=1e-5) if scheduler_name.lower() in {"1cycle", "one_cycle"}: return OneCycleLR(optimizer, lr_range=(lr, 1e-6, 1e-5), num_steps=batches_in_epoch, warmup_fraction=0.05, decay_fraction=0.1) if scheduler_name.lower() == "exp": return ExponentialLR(optimizer, gamma=0.95) if scheduler_name.lower() == "clr": return CyclicLR( optimizer, base_lr=1e-6, max_lr=lr, step_size_up=batches_in_epoch // 4, # mode='exp_range', gamma=0.99, ) if scheduler_name.lower() == "multistep": return MultiStepLR(optimizer, milestones=[ int(num_epochs * 0.5), int(num_epochs * 0.7), int(num_epochs * 0.9) ], gamma=0.3) if scheduler_name.lower() == "simple": return MultiStepLR( optimizer, milestones=[int(num_epochs * 0.4), int(num_epochs * 0.7)], gamma=0.4) raise KeyError(scheduler_name)
def train_stage_two(dataset, best_model_file, model_file): bestaccuracy = 0.9 device = 'cudo:0' if torch.cuda.is_available() else 'cpu' net = ResNet(BasicBlock, [3, 3, 4, 3]).to(device) # [2,2,2,2] net.train() for parameter in net.parameters(): if len(parameter.shape) > 1: torch.nn.init.xavier_uniform_(parameter) if isfile(best_model_file): net.load_state_dict(torch.load(best_model_file)) train_loader = DataLoader(dataset, batch_size=64, shuffle=True) optimizer = AdamW(net.parameters(), lr=0.0001) scheduler = CyclicLR(optimizer, 0.000001, 0.0001, step_size_up=200, mode='triangular2', cycle_momentum=False, last_epoch=-1) L1 = torch.nn.L1Loss() BCE = torch.nn.BCEWithLogitsLoss() for epoch in range(50): running_accuracy = [] for (images, targets) in tqdm(train_loader): images, targets = images.to(device), targets.to(device) optimizer.zero_grad() outputs = net(images) clsloss = BCE(outputs[:, 0], targets[:, 0]) regloss = L1(outputs[:, 1:], targets[:, 1:]) loss = clsloss + regloss cls_preds = np.greater(outputs[:, 0].cpu().detach().numpy(), 0) cls_truth = targets[:, 0].cpu().detach().numpy() correctness = np.equal(cls_preds, cls_truth).astype(int) accuracy = sum(correctness) / 64 running_accuracy.append(accuracy) running_accuracy = running_accuracy[-10:] print(' clsloss ' + str(clsloss.cpu().detach().numpy())[:4] + ' regloss ' + str(regloss.cpu().detach().numpy())[:4] + ' accuracy ' + str(np.mean(running_accuracy)), end='\r') if np.mean(running_accuracy) > bestaccuracy: bestaccuracy = np.mean(running_accuracy) torch.save(net.state_dict(), best_model_file) # print('totalloss', str(loss.detach().numpy())[:4], 'saved!', end = '\n') else: pass # print('totalloss', str(loss.detach().numpy())[:4]+' ', end = '\n') loss.backward() optimizer.step() scheduler.step(None) # if idx%5==0: # print('\n', outputs[0].cpu().detach().numpy(), targets[0].cpu().detach().numpy(), '\n') # idx+=1 torch.save(net.state_dict(), model_file) print(epoch)
def test_cycle_lr_triangular_mode(self): target_1 = [1, 2, 3, 4, 5, 4, 3, 2, 1, 2, 3] target_2 = list(map(lambda x: x + 1, target_1)) targets = [target_1, target_2] scheduler = CyclicLR(self.opt, base_lr=[1, 2], max_lr=[5, 6], step_size_up=4, mode='triangular') self._test_cycle_lr(scheduler, targets, len(target_1))
def test_cycle_lr_triangular2_mode_one_lr(self): target = [1, 2, 3, 4, 5, 4, 3, 2, 1, 1.5, 2.0, 2.5, 3.0, 2.5, 2.0, 1.5, 1] + \ [1.25, 1.50, 1.75, 2.00, 1.75] targets = [target, target] scheduler = CyclicLR(self.opt, base_lr=1, max_lr=5, step_size_up=4, mode='triangular2') self._test_cycle_lr(scheduler, targets, len(target))
def train(self, train_loader, eval_loader, epoch): # 定义优化器 if self.args.swa: logger.info('SWA training') base_opt = torch.optim.Adam(self.model.parameters(), lr=self.args.learning_rate) optimizer = SWA(base_opt, swa_start=self.args.swa_start, swa_freq=self.args.swa_freq, swa_lr=self.args.swa_lr) scheduler = CyclicLR(optimizer, base_lr=5e-5, max_lr=7e-5, step_size_up=(self.args.epochs * len(train_loader) / self.args.batch_accumulation), cycle_momentum=False) else: logger.info('Adam training') optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.learning_rate) scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=self.args.warmup, num_training_steps=(self.args.epochs * len( train_loader) / self.args.batch_accumulation)) bar = tqdm(range(self.args.train_steps), total=self.args.train_steps) train_batches = cycle(train_loader) loss_sum = 0.0 start = time.time() self.model.train() for step in bar: batch = next(train_batches) input_ids, input_mask, segment_ids, label_ids = [t.to(self.device) for t in batch] loss, _ = self.model(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=label_ids) if self.gpu_num > 1: loss = loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # optimizer.update_swa() loss_sum += loss.cpu().item() train_loss = loss_sum / (step + 1) bar.set_description("loss {}".format(train_loss)) if (step + 1) % self.args.eval_steps == 0: logger.info("***** Training result *****") logger.info(' time %.2fs ', time.time() - start) logger.info(" %s = %s", 'global_step', str(step + 1)) logger.info(" %s = %s", 'train loss', str(train_loss)) # 每eval_steps进行一次evaluate self.result = {'epoch': epoch, 'global_step': step + 1, 'loss': train_loss} if self.args.swa: optimizer.swap_swa_sgd() self.evaluate(eval_loader, epoch) if self.args.swa: optimizer.swap_swa_sgd() if self.args.swa: optimizer.swap_swa_sgd() logging.info('The training of epoch ' + str(epoch + 1) + ' has finished.')
def test_cycle_lr_triangular2_mode(self): target_1 = [1, 2, 3, 4, 5, 4, 3, 2, 1, 1.5, 2.0, 2.5, 3.0, 2.5, 2.0, 1.5, 1] + \ [1.25, 1.50, 1.75, 2.00, 1.75] target_2 = list(map(lambda x: x + 2, target_1)) targets = [target_1, target_2] scheduler = CyclicLR(self.opt, base_lr=[1, 3], max_lr=[5, 7], step_size_up=4, mode='triangular2') self._test_cycle_lr(scheduler, targets, len(target_1))
def configure_optimizers(self): if self.optimiser == 'sgd': optim = SGD(self.parameters(), lr=self.hparams.lr, momentum=self.hparams.momentum) sched = CyclicLR(optim, base_lr=1e-8, max_lr=self.hparams.lr) return [optim], [sched] elif self.optimiser == 'adam': return AdamW(self.parameters(), lr=self.lr) else: raise NameError('invalid string passed to optimiser argument')
def CyclicLR_(self, base_lr=0.00001, max_lr=0.003, step_size_up=20, step_size_down=20, mode='triangular'): scheduler = CyclicLR(self.optimizer, base_lr=base_lr, max_lr=max_lr, step_size_up=step_size_up, step_size_down=step_size_down, mode=mode) return scheduler
def test_triangular_mode_step_size_up_down(self): target = [ 1.0, 2.0, 3.0, 4.0, 5.0, 13.0 / 3, 11.0 / 3, 9.0 / 3, 7.0 / 3, 5.0 / 3, 1.0 ] targets = [target, target] scheduler = CyclicLR(self.opt, base_lr=1, max_lr=5, step_size_up=4, step_size_down=6, mode='triangular') self._test_cycle_lr(scheduler, targets, len(target))
def _run_cyclic_scheduler(steps: int) -> float: params = Parameter(torch.tensor([0], dtype=torch.float)) # type: ignore optimizer = AdamW([params], lr=1e-4) scheduler = CyclicLR(optimizer, base_lr=1e-6, max_lr=1e-4, step_size_up=100, cycle_momentum=False) for _ in range(steps): optimizer.step() scheduler.step() # type: ignore return scheduler.get_last_lr()[0] # type:ignore
def configure_optimizers(self): scheduler = None params = [p for p in self.parameters() if p.requires_grad] optimizer = RangerLars(params) # noinspection PyUnresolvedReferences if self.hparams.Train.scheduler == 'OneCycleLR': scheduler = OneCycleLR( optimizer, max_lr=self.hparams.Train.lr, epochs=self.hparams.Train.epochs, steps_per_epoch=self.hparams.Train.steps_per_epoch, pct_start=self.hparams.Train.Schedulers.OneCycleLR.pct_start, anneal_strategy=self.hparams.Train.Schedulers.OneCycleLR. anneal_strategy, cycle_momentum=False, div_factor=self.hparams.Train.Schedulers.OneCycleLR.div_factor) elif self.hparams.Train.scheduler == 'NCycleLR': scheduler = NCycleLR( optimizer, max_lr=self.hparams.Train.lr, n=self.hparams.Train.Schedulers.NCycleLR.n, lr_factor=self.hparams.Train.Schedulers.NCycleLR.lr_factor, epochs=self.hparams.Train.epochs, steps_per_cycle=self.hparams.Train.Schedulers.NCycleLR. steps_per_cycle, pct_start=self.hparams.Train.Schedulers.NCycleLR.pct_start, anneal_strategy=self.hparams.Train.Schedulers.NCycleLR. anneal_strategy, cycle_momentum=False, div_factor=self.hparams.Train.Schedulers.NCycleLR.div_factor) elif self.hparams.Train.scheduler == 'CyclicLR': scheduler = CyclicLR( optimizer, base_lr=self.hparams.Train.lr / 1e5, max_lr=self.hparams.Train.lr, step_size_up=self.hparams.Train.steps_per_epoch, mode=self.hparams.Train.Schedulers.CyclicLR.mode, gamma=self.hparams.Train.Schedulers.CyclicLR.gamma, cycle_momentum=False) elif self.hparams.Train.scheduler == 'ReduceLROnPlateau': scheduler = ReduceLROnPlateau( optimizer, factor=self.hparams.Train.Schedulers.ReduceLROnPlateau.factor, patience=self.hparams.Train.Schedulers.ReduceLROnPlateau. patience, verbose=True) schedulers = [{ 'scheduler': scheduler, 'interval': self.hparams.Train.Schedulers.interval }] return [optimizer], schedulers
def dispatch_lr_scheduler(optimizer, args): if args.lr_scheduler is None: return IdleScheduler() elif args.lr_scheduler == 'StepLR': return StepLR(optimizer, step_size=args.step_lr_step_size, gamma=args.step_lr_gamma) elif args.lr_scheduler == 'MultiStepLR': return MultiStepLR(optimizer, milestones=args.multistep_lr_milestones, gamma=args.multistep_lr_gamma) elif args.lr_scheduler == 'CyclicLR': return CyclicLR(optimizer, base_lr=args.learning_rate, gamma=args.cyclic_lr_gamma) elif args.lr_scheduler == 'OneCycleLR': return OneCycleLR(optimizer) elif args.lr_scheduler == 'CosineAnnealingLR': return CosineAnnealingLR() elif args.lr_scheduler == 'CosineAnnealingWarmRestarts': return CosineAnnealingWarmRestarts(optimizer)
def build_step_scheduler(self): scheduler = None cfg = self.cfg if cfg.solver.one_cycle and cfg.solver.num_epochs > 1: total_steps = cfg.solver.num_epochs * self.steps_per_epoch step_size_up = (cfg.solver.num_epochs // 2) * self.steps_per_epoch step_size_down = total_steps - step_size_up step_scheduler = CyclicLR(self.opt, base_lr=cfg.solver.lr / 10, max_lr=cfg.solver.lr, step_size_up=step_size_up, step_size_down=step_size_down, cycle_momentum=False) for _ in range(self.start_epoch * self.steps_per_epoch): step_scheduler.step() return scheduler
def test_cycle_lr_exp_range_mode_one_lr(self): base_lr, max_lr = 1, 5 diff_lr = max_lr - base_lr gamma = 0.9 xs = [0, 0.25, 0.5, 0.75, 1, 0.75, 0.50, 0.25, 0, 0.25, 0.5, 0.75, 1] target = list( map(lambda x: base_lr + x[1] * diff_lr * gamma**x[0], enumerate(xs))) targets = [target, target] scheduler = CyclicLR(self.opt, base_lr=base_lr, max_lr=max_lr, step_size_up=4, mode='exp_range', gamma=gamma) self._test_cycle_lr(scheduler, targets, len(target))
def configure_optimizers(self): optimizer = Adam(self.classifier.parameters(), lr=self.hparams.lr, weight_decay=self.hparams.weight_decay) if self.hparams.scheduler == 'CosineAnnealingLR': scheduler = CosineAnnealingLR(optimizer, T_max=self.hparams.t_max, eta_min=self.hparams.min_lr, last_epoch=-1) return {'optimizer': optimizer, 'scheduler': scheduler} elif self.hparams.scheduler == 'CosineAnnealingWarmRestarts': scheduler = CosineAnnealingWarmRestarts( optimizer, T_0=self.hparams.T_0, eta_min=self.hparams.min_lr, last_epoch=-1), return {'optimizer': optimizer, 'scheduler': scheduler} elif self.hparams.scheduler == 'CyclicLR': scheduler = CyclicLR(optimizer, base_lr=self.hparams.min_lr, max_lr=self.hparams.lr, step_size_up=100, step_size_down=1000, scale_mode='iteration', mode='triangular2', cycle_momentum=False) return [optimizer], {'scheduler': scheduler, 'interval': 'step'} elif self.hparams.scheduler == 'CosineAnnealingWarmupRestarts': scheduler = CosineAnnealingWarmupRestarts( optimizer=optimizer, first_cycle_steps=self.hparams.first_cycle_steps, warmup_steps=self.hparams.warmup_steps, min_lr=self.hparams.min_lr, max_lr=self.hparams.lr, gamma=self.hparams.gamma) return [optimizer], {'scheduler': scheduler, 'interval': 'step'} elif self.hparams.scheduler == 'ReduceLROnPlateau': scheduler = ReduceLROnPlateau(optimizer=optimizer, mode='max', factor=self.hparams.factor, patience=self.hparams.patience, min_lr=self.hparams.min_lr) return [optimizer], { 'scheduler': scheduler, 'monitor': 'val_score' } return {'optimizer': optimizer}
def build_scheduler(cfg, optimizer): name_scheduler = cfg.lr_scheduler.type scheduler = None if name_scheduler == 'StepLR': # >>> train(...) # >>> validate(...) # >>> scheduler.step() scheduler = StepLR(optimizer=optimizer, step_size=cfg.lr_scheduler.step_size, gamma=cfg.lr_scheduler.gamma) elif name_scheduler == 'CosineAnnealingLR': scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=cfg.lr_scheduler.T_max) elif name_scheduler == 'ReduceLROnPlateau': # >>> train(...) # >>> validate(...) # >>> scheduler.step(val_loss) scheduler = ReduceLROnPlateau(optimizer=optimizer, mode=cfg.lr_scheduler.mode) elif name_scheduler == 'LambdaLR': # >>> train(...) # >>> validate(...) # >>> scheduler.step() scheduler = LambdaLR(optimizer=optimizer, lr_lambda=cfg.lr_scheduler.lr_lambda) elif name_scheduler == 'MultiStepLR': # >>> train(...) # >>> validate(...) # >>> scheduler.step() scheduler = MultiStepLR(optimizer=optimizer, milestones=cfg.lr_scheduler.milestones, gamma=cfg.lr_scheduler.gamma) elif name_scheduler == 'CyclicLR': # >>> for epoch in range(10): # >>> for batch in data_loader: # >>> train_batch(...) # >>> scheduler.step() scheduler = CyclicLR(optimizer=optimizer, base_lr=cfg.lr_scheduler.base_lr, max_lr=cfg.lr_scheduler.max_lr) elif name_scheduler == 'ExponentialLR': scheduler = ExponentialLR(optimizer=optimizer, gamma=cfg.lr_scheduler.gamma) elif name_scheduler == 'CosineAnnealingWarmRestarts': # >>> scheduler = CosineAnnealingWarmRestarts(optimizer, T_0, T_mult) # >>> for epoch in range(20): # >>> scheduler.step() # >>> scheduler.step(26) # >>> scheduler.step() # scheduler.step(27), instead of scheduler(20) scheduler = CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=cfg.lr_scheduler.T_0, T_mult=cfg.lr_scheduler.T_mult) if scheduler is None: raise Exception('scheduler is wrong') return scheduler
def test_triangular2_mode_step_size_up_down(self): base_target = ([ 1.0, 3.0, 5.0, 13.0 / 3, 11.0 / 3, 9.0 / 3, 7.0 / 3, 5.0 / 3, 1.0, 2.0, 3.0, 8.0 / 3, 7.0 / 3, 6.0 / 3, 5.0 / 3, 4.0 / 3, 1.0, 3.0 / 2, 2.0, 11.0 / 6, 10.0 / 6, 9.0 / 6, 8.0 / 6, 7.0 / 6 ]) deltas = [2 * i for i in range(0, 2)] base_lrs = [1 + delta for delta in deltas] max_lrs = [5 + delta for delta in deltas] targets = [[x + delta for x in base_target] for delta in deltas] scheduler = CyclicLR(self.opt, base_lr=base_lrs, max_lr=max_lrs, step_size_up=2, step_size_down=6, mode='triangular2') self._test_cycle_lr(scheduler, targets, len(base_target))