def train(cfg, args): logger = logging.getLogger('SSD.trainer') model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank) lr = cfg.SOLVER.LR * args.num_gpus # scale by num gpus optimizer = make_optimizer(cfg, model, lr) milestones = [step // args.num_gpus for step in cfg.SOLVER.LR_STEPS] if args.scheduler == 'cosine': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=cfg.SOLVER.MAX_ITER, eta_min=0.026666) elif args.scheduler == 'cosine_warmup': scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=cfg.SOLVER.MAX_ITER/50, eta_min=0.026666) else: scheduler = make_lr_scheduler(cfg, optimizer, milestones) arguments = {"iteration": 0} save_to_disk = dist_util.get_rank() == 0 checkpointer = CheckPointer(model, optimizer, scheduler, cfg.OUTPUT_DIR, save_to_disk, logger) extra_checkpoint_data = checkpointer.load() arguments.update(extra_checkpoint_data) max_iter = cfg.SOLVER.MAX_ITER // args.num_gpus train_loader = make_data_loader(cfg, is_train=True, distributed=args.distributed, max_iter=max_iter, start_iter=arguments['iteration']) model = do_train(cfg, model, train_loader, optimizer, scheduler, checkpointer, device, arguments, args) return model
def __init__(self, para, target): # create optimizer # trainable = filter(lambda x: x.requires_grad, target.parameters()) trainable = target.parameters() optimizer_name = para.optimizer lr = para.lr module = import_module('torch.optim') self.optimizer = getattr(module, optimizer_name)(trainable, lr=lr) # create scheduler milestones = para.milestones gamma = para.decay_gamma try: if para.lr_scheduler == "multi_step": self.scheduler = lr_scheduler.MultiStepLR( self.optimizer, milestones=milestones, gamma=gamma) elif para.lr_scheduler == "cosine": self.scheduler = lr_scheduler.CosineAnnealingLR( self.optimizer, T_max=para.end_epoch, eta_min=1e-8) elif para.lr_scheduler == "cosineW": self.scheduler = lr_scheduler.CosineAnnealingWarmRestarts( self.optimizer, T_0=10, T_mult=2, eta_min=1e-8) else: raise NotImplementedError except: raise NotImplementedError
def configure_optimizers(self): params = self.parameters() params = list(filter(lambda p: p.requires_grad, params)) weight_decay = self.hparams.get('weight_decay', 0) if self.hparams.optimizer == 'adam': optimizer = torch.optim.Adam( params, lr=self.learning_rate, weight_decay=weight_decay) elif self.hparams.optimizer == 'sgd': optimizer = torch.optim.SGD( params, lr=self.learning_rate, momentum=0.9, weight_decay=self.hparams.weight_decay, nesterov=self.hparams.nesterov) if self.hparams.lr_scheduler is None: return optimizer else: if self.hparams.lr_scheduler == 'step': scheduler = lrs.MultiStepLR( optimizer, self.hparams.lr_decay_steps, gamma=self.hparams.lr_decay_rate) elif self.hparams.lr_scheduler == 'cosine': scheduler = lrs.CosineAnnealingLR( optimizer, T_max=self.hparams.max_epochs, eta_min=self.hparams.final_lr) elif self.args.scheduler == "cosineanneal": scheduler = lrs.CosineAnnealingWarmRestarts( optimizer, T_0=10, T_mult=1, eta_min=1e-5, last_epoch=-1) else: raise ValueError('Invalid lr_scheduler type!') return [optimizer], [scheduler]
def make_scheduler(optimizer, cfg): if cfg.SOLVER.SCHEDULER == 'CosineAnnealingWarmRestarts': scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer, T_0=cfg.SOLVER.SCHEDULER_T0, T_mult=cfg.SOLVER.SCHEDULER_T_MUL, eta_min=cfg.SOLVER.MIN_LR, last_epoch=-1 ) return scheduler elif cfg.SOLVER.SCHEDULER == 'ReduceLROnPlateau': return lr_scheduler.ReduceLROnPlateau( optimizer=optimizer, mode=cfg.SOLVER.SCHEDULER_MODE, factor=cfg.SOLVER.SCHEDULER_REDFACT, patience=cfg.SOLVER.SCHEDULER_PATIENCE, min_lr=cfg.SOLVER.MIN_LR ) if cfg.SOLVER.SCHEDULER == 'CosineAnnealingLR': scheduler = lr_scheduler.CosineAnnealingLR( optimizer, T_max=cfg.SOLVER.T_MAX, eta_min=cfg.SOLVER.MIN_LR, last_epoch=-1 ) return scheduler else: print('NOME SCHEDULER NON RICONOSCIUTO!')
def pytorch_cos(): model = AlexNet(num_classes=2) optimizer = optim.SGD(params=model.parameters(), lr=0.0001) epoch = 100 len_loader = 100 scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=2, T_mult=2, eta_min=1e-6, last_epoch=-1) plt.figure() x = [] y = [] for e in range(epoch): for i in range(len_loader): step = e + i / len_loader scheduler.step(step) lr = scheduler.get_last_lr()[0] x.append(step) y.append(lr) plt.plot(x, y) plt.xticks(np.arange(0, epoch + 1, 4)) plt.show()
def configure_lr_scheduler(optimizer, cfg): r""" Return the learning rate scheduler for the trainable parameters Basically, it returns the learning rate scheduler defined by :attr:`cfg.TRAIN.LR_SCHEDULER.SCHEDULER`. Some parameters for the learning rate scheduler are also defined in :attr:`cfg.TRAIN.LR_SCHEDULER`. Currently, there are 4 popular learning rate scheduler supported: step, multi_step, exponential and sgdr. TODO: directly fetch the optimizer by getattr(lr_scheduler, cfg.SCHEDULER) and send the the relative parameter by dict. Args: optimizer: the optimizer in the given ssds model, check :meth:`configure_optimizer` for more details. cfg: the config dict, which is defined in :attr:`cfg.TRAIN.LR_SCHEDULER`. """ if cfg.SCHEDULER == "step": scheduler = lr_scheduler.StepLR(optimizer, step_size=cfg.STEPS[0], gamma=cfg.GAMMA) elif cfg.SCHEDULER == "multi_step": scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=cfg.STEPS, gamma=cfg.GAMMA) elif cfg.SCHEDULER == "exponential": scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=cfg.GAMMA) elif cfg.SCHEDULER == "inverted_exponential": scheduler = InvertedExponentialLR(optimizer, end_lr=cfg.LR_MIN) elif cfg.SCHEDULER == "sgdr": scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer, T_0=2, T_mult=2, eta_min=cfg.LR_MIN) else: AssertionError("scheduler can not be recognized.") return scheduler
def test_last_epoch(self): """Tests the assumption that last_epoch is the internal epoch counter. Should just be range(start_epoch, last_epoch) """ start_epoch = 0 num_epochs = 20 model = torch.nn.Linear(2, 1) optimizer = torch.optim.SGD(model.parameters(), lr=0.5) lr_scheduler_1 = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.1) steps = [] for i in range(start_epoch, num_epochs): steps.append(lr_scheduler_1.last_epoch) lr_scheduler_1.step() self.assertEqual(steps, list(range(start_epoch, num_epochs))) lr_scheduler_2 = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-6) steps = [] for i in range(start_epoch, num_epochs): steps.append(lr_scheduler_2.last_epoch) lr_scheduler_2.step() self.assertEqual(steps, list(range(start_epoch, num_epochs)))
def main_with_centerloss(model): """ train model :param model: :param epoch: :param data_name: :return: """ criterion_xent = nn.CrossEntropyLoss() criterion_cent = CenterLoss(num_classes=cfg['out_num'], feat_dim=1024) optimizer_model = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4) optimizer_centloss = optim.SGD(criterion_cent.parameters(), lr=0.5) cosine_anneal_warmup_lr_scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer_model, T_0=10, T_mult=10, eta_min=0, last_epoch=-1) print('start loading ImageDataset...') trainloader, valloader, testloader = data_loader.load_imagedataset_data() dataloaders = {'train': trainloader, 'val': valloader, 'test': testloader} train_model_for_centerloss(model=model, dataloaders=dataloaders, criterion_xent=criterion_xent, criterion_cent=criterion_cent, optimizer_model=optimizer_model, optimizer_centloss=optimizer_centloss, scheduler=cosine_anneal_warmup_lr_scheduler)
def main_with_asoftmaxloss(model): """ train model with vanilla ASoftmaxLoss as supervision :param model: :return: """ criterion_aloss = AngularLoss() optimizer = optim.SGD(model.parameters(), lr=cfg['init_lr'], momentum=0.9, weight_decay=cfg['weight_decay']) cosine_anneal_warmup_lr_scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer, T_0=10, T_mult=10, eta_min=0, last_epoch=-1) print('start loading ImageDataset...') trainloader, valloader, testloader = data_loader.load_imagedataset_data() dataloaders = {'train': trainloader, 'val': valloader, 'test': testloader} train_model_with_modified_softmax_loss( model=model, dataloaders=dataloaders, criterion=criterion_aloss, optimizer=optimizer, scheduler=cosine_anneal_warmup_lr_scheduler)
def __init__(self, T_0: int, T_mult: int = 1, eta_min: int = 0, last_epoch: int = -1, step_on_batch: bool = True): # noqa """Constructor for CosineAnnealingWarmRestarts. Args: T_0 (int): Number of epochs or iterations for the first restart. T_mult (int): T increase factor after a restart. eta_min (float, optional): Min learning rate. Defaults to 0. last_epoch (int): The index of last epoch. Default: -1. step_on_batch (bool): Step on each training iteration rather than each epoch. Defaults to True. """ super().__init__( lambda opt: _schedulers.CosineAnnealingWarmRestarts( opt, T_0, T_mult=T_mult, eta_min=eta_min, last_epoch=last_epoch), step_on_batch=step_on_batch, )
def __init__(self, T_0: int, T_mult: int = 1, eta_min: int = 0, last_epoch: int = -1, step_on_iteration: bool = False): super().__init__(lambda opt: _scheduler.CosineAnnealingWarmRestarts( opt, T_0, T_mult=T_mult, eta_min=eta_min, last_epoch=last_epoch), step_on_iteration=step_on_iteration)
def init_optimizer(optimizer_name, model, lr, wd, lr_restart_step=1, lr_decay_gamma=0.9, scheduler="step", nesterov=False, num_epochs=None, steps_per_epoch=None): if optimizer_name == "sgd": optimizer_ft = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=wd, nesterov=nesterov) elif optimizer_name == "adam": optimizer_ft = optim.Adam(model.parameters(), lr=lr, weight_decay=wd) elif optimizer_name == "adamp": from adamp import AdamP optimizer_ft = AdamP(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=wd) # 1e-2) elif optimizer_name == "sgdp": from adamp import SGDP optimizer_ft = SGDP(model.parameters(), lr=lr, weight_decay=wd, momentum=0.9, nesterov=nesterov) # else: # opt_attr = getattr(toptim, optimizer_name) # if opt_attr: # optimizer_ft = opt_attr(model.parameters()) # else: # raise Exception("unknown optimizer name", optimizer_name) if scheduler == "cosine": exp_lr_scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer_ft, lr_restart_step) use_lr_schedule_steps = True elif scheduler == "cycle": exp_lr_scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer_ft, max_lr=lr, steps_per_epoch=steps_per_epoch, epochs=num_epochs, pct_start=0.1) use_lr_schedule_steps = False elif scheduler == "step": exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=lr_restart_step, gamma=lr_decay_gamma) use_lr_schedule_steps = False return optimizer_ft, exp_lr_scheduler, use_lr_schedule_steps
def _build_model(self): if self.opt.network_name == 'riff_net_v1': self.generator = RiffG_v1(self.opt.pitch_range, self.opt.seed_size) self.discriminator = RiffD_v1(self.opt.pitch_range) elif self.opt.network_name == 'riff_net_v2': self.generator = RiffG_v2(self.opt.pitch_range, self.opt.seed_size) self.discriminator = RiffD_v2(self.opt.pitch_range) elif self.opt.network_name == 'riff_net_v3': self.generator = RiffG_v3(self.opt.pitch_range, self.opt.seed_size) self.discriminator = RiffD_v3(self.opt.pitch_range) elif self.opt.network_name == 'riff_net_v4': self.generator = RiffG_v3(self.opt.pitch_range, self.opt.seed_size) self.discriminator = RiffD_v3(self.opt.pitch_range) else: assert self.opt.network_name == 'midi_net' self.generator = MidiNetG(self.opt.pitch_range) self.discriminator = MidiNetD(self.opt.pitch_range) init_weight_(self.generator) init_weight_(self.discriminator) if self.opt.gpu: self.generator.to(self.device) # summary(self.generator, input_size=self.opt.input_shape) self.discriminator.to(self.device) # summary(self.discriminator, input_size=self.opt.input_shape) self.G_optimizer = Adam(params=self.generator.parameters(), lr=self.opt.g_lr, betas=(self.opt.beta1, self.opt.beta2)) self.D_optimizer = Adam(params=self.discriminator.parameters(), lr=self.opt.d_lr, betas=(self.opt.beta1, self.opt.beta2)) self.G_scheduler = lr_scheduler.CosineAnnealingWarmRestarts( self.G_optimizer, T_0=1, T_mult=2, eta_min=4e-08) self.D_scheduler = lr_scheduler.CosineAnnealingWarmRestarts( self.D_optimizer, T_0=1, T_mult=2, eta_min=4e-08)
def test_lr(): model = Discriminator() optimizer = torch.optim.Adam(model.parameters(), lr=0.0002) scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=1, T_mult=2, eta_min=4e-08) lr_list = [] for epoch in range(20): scheduler.step(epoch) lr_list.append(optimizer.state_dict()['param_groups'][0]['lr']) plt.plot(range(20), lr_list) plt.show()
def get_scheduler(optimizer, scheduler_type, **kwargs): """ Return learning rate scheduler. Realize three type of scheduler. Parameters ---------- optimizer: torch.optim optimizer picked for training scheduler_type: str define scheduler type 'step' - decrease learning rate in 10 time step by step. 'cos' - decrease learning rate using a cosine annealing schedule. 'warmup' - increase learning rate from zero to initial. **kwargs : dict, learning_rate: float Initial learning rate. step_len: int Quantity of epochs between learning rate decay at 10 times. Use with 'step' scheduler type only. cycle_len: int Quantity of epochs till the learning rate decay from initial to zero. Use with 'step' scheduler type only. batch_per_epoch: int Quantity batches in datasets. warmup_epoch: int Quantity epochs to rise learning rate from zero to initial. Returns ------- scheduler: torch.optim.lr_scheduler See Also -------- torch.optim.lr_scheduler.StepLR torch.optim.lr_scheduler.CosineAnnealingWarmRestarts torch.optim.lr_scheduler.CyclicLR """ if scheduler_type == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=kwargs['step_size'], gamma=0.1) elif scheduler_type == 'cos': scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=kwargs['cycle_len'], eta_min=0) elif scheduler_type == 'warmup': scheduler = lr_scheduler.CyclicLR( optimizer, base_lr=kwargs['learning_rate'] / (kwargs['batch_per_epoch'] * kwargs['warmup_epoch']), max_lr=kwargs['learning_rate'], step_size_up=(kwargs['batch_per_epoch'] + 1) * kwargs['warmup_epoch'], step_size_down=0, cycle_momentum=False ) return scheduler
def __init__( self, optimizer: Optimizer, T_0: int, T_mult: int = 1, eta_min: int = 0, last_epoch: int = -1, step_duration: int = 1, ): scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer, T_0, T_mult, eta_min, last_epoch) super().__init__(scheduler, step_duration)
def __init__(self, T_0, T_mult=1, eta_min=0): try: from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts except ImportError: raise ImportError("Update torch>=1.1.0 to use 'CosineAnnealingWarmRestart'") super().__init__( lambda opt: _scheduler.CosineAnnealingWarmRestarts(opt, T_0, T_mult=T_mult, eta_min=eta_min) )
def _build_model(self): self.classifier = NewClassifier() if self.opt.gpu: self.classifier.to(self.device) summary(self.classifier, input_size=self.opt.input_shape) self.classifier_optimizer = Adam(params=self.classifier.parameters(), lr=self.opt.lr, betas=(self.opt.beta1, self.opt.beta2), weight_decay=self.opt.weight_decay) self.classifier_scheduler = lr_scheduler.CosineAnnealingWarmRestarts( self.classifier_optimizer, T_0=1, T_mult=2, eta_min=4e-08)
def init_tools(self, lr, wd, mom, period, optim_): if optim_ == 'adam': self.optim = optim.AdamW(self.net.parameters(), lr=lr, weight_decay=wd) elif optim_ == 'sgd': self.optim = optim.SGD(self.net.parameters(), lr=lr, momentum=mom, nesterov=True, weight_decay=wd) # LR Schedule self.sched = schedule.CosineAnnealingWarmRestarts( self.optim, period, 2) self.batch_sched_step = lambda x: self.sched.step(x) self.epoch_sched_step = lambda: None
def train_loop(): model = registry_model.get( config.model['name'])(num_classes=config.num_class) model = torch.nn.DataParallel(model) model = model.to(torch.device('cuda')) optimizer = Adam(model.parameters(), lr=config.lr) # step_scheduler = lr_scheduler.StepLR(optimizer, 3, gamma=0.7, last_epoch=-1) cos_scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=1, T_mult=2, eta_min=1e-6, last_epoch=-1) scheduler = cos_scheduler criterion = registry_loss.get(config.criterion)() print(config.criterion) print(config.lr) train_tf = transforms.Compose([ transforms.Resize(config.resize), transforms.RandomHorizontalFlip(0.5), # transforms.RandomVerticalFlip(0.5), transforms.ToTensor(), ]) test_tf = transforms.Compose([ transforms.Resize(config.resize), transforms.ToTensor(), ]) GIC_train_dataset = ImageFolder(root='/home/youliang/datasets/GIC/train', transform=train_tf) GIC_train_loader = DataLoader(GIC_train_dataset, batch_size=config.batch_size, shuffle=True, pin_memory=True) GIC_val_dataset = ImageFolder(root='/home/youliang/datasets/GIC/val', transform=test_tf) GIC_val_loader = DataLoader(GIC_val_dataset, batch_size=config.test_batch_size, pin_memory=True) tqdm_length = math.ceil(len(GIC_train_dataset) / config.batch_size) trainer(model, optimizer, criterion, scheduler, GIC_train_loader, GIC_val_loader, tqdm_length)
def fit(self,data_loader,epochs,test_dataloader=None,verbose=False): """ fits the classifier to the input data. Parameters ---------- data_loader : torch-geometric dataloader the training dataset. epochs : int number of epochs. test_dataloader : torch-geometric dataloader, default=None the test dataset on which the model is evaluated in each epoch. verbose : boolean, default=False whether to print out loss during training. """ if self.logging: data= next(iter(data_loader)) self.writer.add_graph(self.net,[data.x,data.edge_index]) # self.scheduler = lr_scheduler.CyclicLR(self.optimizer, base_lr=self.lr, max_lr=0.01,step_size_up=5,mode="triangular2") self.scheduler = lr_scheduler.CosineAnnealingWarmRestarts(self.optimizer, T_0=50, T_mult=1, eta_min=0.00005, last_epoch=-1) for epoch in range(epochs): self.net.train() self.net.to(self.device) total_loss = 0 for batch in data_loader: x, edge_index, label = batch.x.to(self.device), batch.edge_index.to(self.device), batch.y.to(self.device) self.optimizer.zero_grad() pred = self.net(x, edge_index) loss = self.criterion(pred,label) loss.backward() self.optimizer.step() self.scheduler.step() total_loss += loss.item() * batch.num_graphs total_loss /= len(data_loader.dataset) if verbose and epoch%(epochs//10)==0: print('[%d] loss: %.3f' % (epoch + 1,total_loss)) if self.logging: #Save the training loss, the training accuracy and the test accuracy for tensorboard vizualisation self.writer.add_scalar("Training Loss",total_loss,epoch) accuracy_train = self.eval(data_loader,verbose=False)[0] self.writer.add_scalar("Accuracy on Training Dataset",accuracy_train,epoch) if test_dataloader is not None: accuracy_test = self.eval(test_dataloader,verbose=False)[0] self.writer.add_scalar("Accuracy on Test Dataset",accuracy_test,epoch)
def __init__( self, T_0: int, T_mult: int = 1, eta_min: int = 0, last_epoch: int = -1, step_on_batch: bool = True, ): # noqa """Constructor for CosineAnnealingWarmRestarts.""" super().__init__( lambda opt: _schedulers.CosineAnnealingWarmRestarts( opt, T_0, T_mult=T_mult, eta_min=eta_min, last_epoch=last_epoch), step_on_batch=step_on_batch, )
def configure_lr_scheduler(self, optimizer, cfg): if cfg.SCHEDULER == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=cfg.STEPS[0], gamma=cfg.GAMMA) elif cfg.SCHEDULER == 'multi_step': scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=cfg.STEPS, gamma=cfg.GAMMA) elif cfg.SCHEDULER == 'exponential': scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=cfg.GAMMA) elif cfg.SCHEDULER == 'SGDr': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=cfg.MAX_EPOCHS) elif cfg.SCHEDULER == 'SGDR': scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer, T_0=cfg.T_0, T_mult=cfg.T_MULT, eta_min=cfg.ETA_MIN) else: AssertionError('scheduler can not be recognized.') return scheduler
def get_scheduler(optimizer, opt): if opt.lr_policy == 'lambda': def lambda_rule(epoch): lr_l = 1.0 - max(0, epoch + 1 + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1) return lr_l scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) elif opt.lr_policy == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.5) elif opt.lr_policy == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.8, threshold=0.01, patience=2, min_lr=opt.min_lr) elif opt.lr_policy == 'cyclic': scheduler = lr_scheduler.CyclicLR(optimizer, opt.min_lr, opt.lr, step_size_up=5, step_size_down=None, gamma=0.99, mode='exp_range', cycle_momentum=False) elif opt.lr_policy == 'cosine_restarts': scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, opt.lr_decay_iters, T_mult=1, eta_min=0) else: return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy) return scheduler
def configure_optimizers(self): params = self.hparams if params.optimizer == 'sgd': optimizer = torch_optim.SGD(self.parameters(), lr=params.lr, weight_decay=params.weight_decay, momentum=0.9) elif params.optimizer == 'adam': optimizer = torch_optim.Adam(self.parameters(), lr=params.lr, weight_decay=params.weight_decay) elif params.optimizer == 'adabound': import adabound optimizer = adabound.AdaBound(self.parameters(), lr=params.lr, final_lr=params.lr * 10, weight_decay=params.weight_decay) else: raise NotImplementedError() if params.sched == 'plat': sched = lr_sched.ReduceLROnPlateau(optimizer, patience=0, factor=params.sched_factor, verbose=True, min_lr=0.0004) return [optimizer], [sched] elif self.hparams.sched == 'sgdr': sched = lr_sched.CosineAnnealingWarmRestarts( optimizer, self.hparams.sched_factor) return [optimizer], [sched] elif self.hparams.sched == 'step': sched = lr_sched.MultiStepLR(optimizer, milestones=[3, 6], gamma=0.3) return [optimizer], [sched] elif params.sched == 'none': return optimizer else: raise NotImplementedError()
def get_optimizer(policy, args): if args.optimizer == "adam": optimizer = optim.Adam(policy.parameters(), lr=args.lr) elif args.optimizer == "sgd": optimizer = optim.SGD(policy.parameters(), lr=args.lr) elif args.optimizer == "rmsprop": optimizer = optim.RMSprop(policy.parameters(), lr=args.lr) scheduler = args.opt_schedule if scheduler == "cyclic": scheduler = lr_scheduler.OneCycleLR( optimizer=optimizer, max_lr=args.div_factor * args.lr, total_steps=args.num_episodes_train) elif scheduler == "cyclic_multi": scheduler = lr_scheduler.CyclicLR(optimizer=optimizer, base_lr=args.lr, max_lr=args.div_factor * args.lr) elif scheduler == "WR": T_0 = max(1, int(args.num_episodes_train / 1000)) scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer=optimizer, T_0=T_0) return optimizer, scheduler
def get_scheduler(optimizer, opts, cur_ep=-1): if opts.lr_policy == 'lambda': def lambda_rule(ep): lr_l = 1.0 - max(0, ep - opts.n_ep_decay) / float( opts.n_ep - opts.n_ep_decay + 1) return lr_l scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule, last_epoch=-1) elif opts.lr_policy == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=opts.n_ep_decay, gamma=0.1, last_epoch=cur_ep) # SGDR: Stochastic Gradient Descent with Warm Restarts- CosineAnnealingWarmRestarts elif opts.lr_policy == 'CosineAnnealingWarmRestarts': scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer, T_0=1000, T_mult=1, eta_min=0.0000001, last_epoch=-1) # T_0 = 6480 (dan) else: return NotImplementedError('no such learn rate policy') return scheduler
def test_CosineAnnealingWarmRestarts(self, debug=True): """ Usage: python template_lib/modelarts/scripts/copy_tool.py \ -s s3://bucket-7001/ZhouPeng/pypi/torch1_7_0 -d /cache/pypi -t copytree for filename in /cache/pypi/*.whl; do pip install $filename done proj_root=moco-exp python template_lib/modelarts/scripts/copy_tool.py \ -s s3://bucket-7001/ZhouPeng/codes/$proj_root -d /cache/$proj_root -t copytree -b /cache/$proj_root/code.zip cd /cache/$proj_root pip install -r requirements.txt export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export TIME_STR=1 export PYTHONPATH=./exp:./stylegan2-pytorch:./ python -c "from exp.tests.test_styleganv2 import Testing_stylegan2;\ Testing_stylegan2().test_train_ffhq_128()" :return: """ if 'CUDA_VISIBLE_DEVICES' not in os.environ: os.environ['CUDA_VISIBLE_DEVICES'] = '0' if 'TIME_STR' not in os.environ: os.environ['TIME_STR'] = '0' if utils.is_debugging() else '0' from template_lib.v2.config_cfgnode.argparser import \ (get_command_and_outdir, setup_outdir_and_yaml, get_append_cmd_str, start_cmd_run) tl_opts = ' '.join(sys.argv[sys.argv.index('--tl_opts') + 1:]) if '--tl_opts' in sys.argv else '' print(f'tl_opts:\n {tl_opts}') command, outdir = get_command_and_outdir( self, func_name=sys._getframe().f_code.co_name, file=__file__) argv_str = f""" --tl_config_file none --tl_command none --tl_outdir {outdir} """ args = setup_outdir_and_yaml(argv_str, return_cfg=True) import torch.nn as nn from torch.optim import lr_scheduler from matplotlib import pyplot as plt model = nn.Linear(3, 64) def create_optimizer(): return SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) def plot_lr(scheduler, title='', labels=['base'], nrof_epoch=100): lr_li = [[] for _ in range(len(labels))] epoch_li = list(range(nrof_epoch)) for epoch in epoch_li: scheduler.step() # 调用step()方法,计算和更新optimizer管理的参数基于当前epoch的学习率 lr = scheduler.get_last_lr() # 获取当前epoch的学习率 for i in range(len(labels)): lr_li[i].append(lr[i]) for lr, label in zip(lr_li, labels): plt.plot(epoch_li, lr, label=label) plt.grid() plt.xlabel('epoch') plt.ylabel('lr') plt.title(title) plt.legend() plt.show() optimizer = create_optimizer() scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2) plot_lr(scheduler, title='CosineAnnealingWarmRestarts') pass
# Gather the parameters to be optimized/updated in this run. If we are # finetuning we will be updating all parameters. However, if we are # doing feature extract method, we will only update the parameters # that we have just initialized, i.e. the parameters with requires_grad # is True. print("Params to learn:") if feature_extract: params_to_update = [] for name, param in model_ft.named_parameters(): if param.requires_grad: params_to_update.append(param) print("\t", name) else: params_to_update = model_ft.parameters() for name, param in model_ft.named_parameters(): if param.requires_grad: print("\t", name) # Observe that all parameters are being optimized optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9) exp_lr_scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer_ft, T_0=10, T_mult=2) # Setup the loss fxn criterion = nn.CrossEntropyLoss() # Train and evaluate model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, exp_lr_scheduler, num_epochs)
def train_val(config): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') train_loader = get_dataloader(img_dir=config.train_img_dir, mask_dir=config.train_mask_dir, mode="train", batch_size=config.batch_size, num_workers=config.num_workers, smooth=config.smooth) val_loader = get_dataloader(img_dir=config.val_img_dir, mask_dir=config.val_mask_dir, mode="val", batch_size=4, num_workers=config.num_workers) writer = SummaryWriter( comment="LR_%f_BS_%d_MODEL_%s_DATA_%s" % (config.lr, config.batch_size, config.model_type, config.data_type)) if config.model_type == "UNet": model = UNet() elif config.model_type == "UNet++": model = UNetPP() elif config.model_type == "SEDANet": model = SEDANet() elif config.model_type == "RefineNet": model = rf101() elif config.model_type == "BASNet": model = BASNet(n_classes=8) elif config.model_type == "DANet": model = DANet(backbone='resnet101', nclass=config.output_ch, pretrained=True, norm_layer=nn.BatchNorm2d) elif config.model_type == "Deeplabv3+": model = deeplabv3_plus.DeepLabv3_plus(in_channels=3, num_classes=8, backend='resnet101', os=16, pretrained=True, norm_layer=nn.BatchNorm2d) elif config.model_type == "HRNet_OCR": model = seg_hrnet_ocr.get_seg_model() elif config.model_type == "scSEUNet": model = scSEUNet(pretrained=True, norm_layer=nn.BatchNorm2d) else: model = UNet() if config.iscontinue: model = torch.load("./exp/24_Deeplabv3+_0.7825757691389714.pth").module for k, m in model.named_modules(): m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatability if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model = model.to(device) labels = [100, 200, 300, 400, 500, 600, 700, 800] objects = ['水体', '交通建筑', '建筑', '耕地', '草地', '林地', '裸土', '其他'] if config.optimizer == "sgd": optimizer = SGD(model.parameters(), lr=config.lr, weight_decay=1e-4, momentum=0.9) elif config.optimizer == "adamw": optimizer = adamw.AdamW(model.parameters(), lr=config.lr) else: optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) # weight = torch.tensor([1, 1.5, 1, 2, 1.5, 2, 2, 1.2]).to(device) # criterion = nn.CrossEntropyLoss(weight=weight) criterion = BasLoss() # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[25, 30, 35, 40], gamma=0.5) # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.1, patience=5, verbose=True) scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=15, eta_min=1e-4) global_step = 0 max_fwiou = 0 frequency = np.array( [0.1051, 0.0607, 0.1842, 0.1715, 0.0869, 0.1572, 0.0512, 0.1832]) for epoch in range(config.num_epochs): epoch_loss = 0.0 cm = np.zeros([8, 8]) print(optimizer.param_groups[0]['lr']) with tqdm(total=config.num_train, desc="Epoch %d / %d" % (epoch + 1, config.num_epochs), unit='img', ncols=100) as train_pbar: model.train() for image, mask in train_loader: image = image.to(device, dtype=torch.float32) mask = mask.to(device, dtype=torch.float16) pred = model(image) loss = criterion(pred, mask) epoch_loss += loss.item() writer.add_scalar('Loss/train', loss.item(), global_step) train_pbar.set_postfix(**{'loss (batch)': loss.item()}) optimizer.zero_grad() loss.backward() optimizer.step() train_pbar.update(image.shape[0]) global_step += 1 # if global_step > 10: # break # scheduler.step() print("\ntraining epoch loss: " + str(epoch_loss / (float(config.num_train) / (float(config.batch_size))))) torch.cuda.empty_cache() val_loss = 0 with torch.no_grad(): with tqdm(total=config.num_val, desc="Epoch %d / %d validation round" % (epoch + 1, config.num_epochs), unit='img', ncols=100) as val_pbar: model.eval() locker = 0 for image, mask in val_loader: image = image.to(device, dtype=torch.float32) target = mask.to(device, dtype=torch.long).argmax(dim=1) mask = mask.cpu().numpy() pred, _, _, _, _, _, _, _ = model(image) val_loss += F.cross_entropy(pred, target).item() pred = pred.cpu().detach().numpy() mask = semantic_to_mask(mask, labels) pred = semantic_to_mask(pred, labels) cm += get_confusion_matrix(mask, pred, labels) val_pbar.update(image.shape[0]) if locker == 25: writer.add_images('mask_a/true', mask[2, :, :], epoch + 1, dataformats='HW') writer.add_images('mask_a/pred', pred[2, :, :], epoch + 1, dataformats='HW') writer.add_images('mask_b/true', mask[3, :, :], epoch + 1, dataformats='HW') writer.add_images('mask_b/pred', pred[3, :, :], epoch + 1, dataformats='HW') locker += 1 # break miou = get_miou(cm) fw_miou = (miou * frequency).sum() scheduler.step() if fw_miou > max_fwiou: if torch.__version__ == "1.6.0": torch.save(model, config.result_path + "/%d_%s_%.4f.pth" % (epoch + 1, config.model_type, fw_miou), _use_new_zipfile_serialization=False) else: torch.save( model, config.result_path + "/%d_%s_%.4f.pth" % (epoch + 1, config.model_type, fw_miou)) max_fwiou = fw_miou print("\n") print(miou) print("testing epoch loss: " + str(val_loss), "FWmIoU = %.4f" % fw_miou) writer.add_scalar('mIoU/val', miou.mean(), epoch + 1) writer.add_scalar('FWIoU/val', fw_miou, epoch + 1) writer.add_scalar('loss/val', val_loss, epoch + 1) for idx, name in enumerate(objects): writer.add_scalar('iou/val' + name, miou[idx], epoch + 1) torch.cuda.empty_cache() writer.close() print("Training finished")