def build_scheduler(optimizer, config, steps_per_epoch, start_epoch): # FIXME: if config.train.sched.type == "cosine": return torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=config.train.epochs * steps_per_epoch, last_epoch=start_epoch * steps_per_epoch - 1, ) elif config.train.sched.type == "warmup_cosine": return WarmupCosineAnnealingLR( optimizer, epoch_warmup=config.train.sched.epochs_warmup * steps_per_epoch, epoch_max=config.train.epochs * steps_per_epoch, last_epoch=start_epoch * steps_per_epoch - 1, ) elif config.train.sched.type == "step": return torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[e * steps_per_epoch for e in config.train.sched.steps], gamma=0.1, last_epoch=start_epoch * steps_per_epoch - 1, ) else: raise AssertionError("invalid config.train.sched.type {}".format( config.train.sched.type))
def build_scheduler(optimizer, config, steps_per_epoch): if config.train.sched.type == "cosine": scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, config.epochs * steps_per_epoch) elif config.train.sched.type == "warmup_cosine": scheduler = WarmupCosineAnnealingLR( optimizer, epoch_warmup=config.epochs_warmup * steps_per_epoch, epoch_max=config.epochs * steps_per_epoch, ) elif config.train.sched.type == "step": scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=(config.epochs * steps_per_epoch) // 3, gamma=0.1) elif config.train.sched.type == "multistep": scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, [epoch * steps_per_epoch for epoch in config.train.sched.epochs], gamma=0.1) else: raise AssertionError("invalid scheduler {}".format( config.train.sched.type)) return scheduler
def set_optimizer(model): optim = torch.optim.SGD(model.parameters(), lr=lr0, momentum=momentum, weight_decay=wd) # lr_sheduler = WarmupMultiStepLR( # optim, # warmup_start_lr=warmup_start_lr, # warmup_epochs=n_warmup_epochs, # warmup=warmup_method, # milestones=[60, 120, 160], # gamma=0.2, # ) lr_sheduler = WarmupCosineAnnealingLR( optim, warmup_start_lr=warmup_start_lr, warmup_epochs=n_warmup_epochs, warmup=warmup_method, max_epochs=n_epochs, cos_eta=lr_eta, ) # lr_sheduler = WarmupCyclicLR( # optim, # warmup_start_lr=warmup_start_lr, # warmup_epochs=n_warmup_epochs, # warmup=warmup_method, # max_epochs=n_epochs, # cycle_len=cycle_len, # cycle_mult=cycle_mult, # lr_decay=lr_decay, # cos_eta=lr_eta, # ) return optim, lr_sheduler
def get_schedule(opt, optimizer, train_loader_len=None): if opt.scheduler == 'multistep': scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60, 100, 130], gamma=0.1) elif opt.scheduler == 'cycle': step_size = train_loader_len * 4 print(step_size) scheduler = lr_scheduler.CyclicLR(optimizer, step_size_up=step_size, base_lr=opt.lr / 100, max_lr=opt.lr, cycle_momentum=False) elif opt.scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5) elif opt.scheduler == 'warmup': step = train_loader_len scheduler = WarmupMultiStepLR( optimizer, milestones=[step * 30, step * 60, step * 100, step * 130], gamma=0.1) elif opt.scheduler == 'cos': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, train_loader_len * 5, eta_min=1e-8) elif opt.scheduler == 'cosw': scheduler = WarmupCosineAnnealingLR(optimizer, train_loader_len * 5, eta_min=1e-8) elif opt.scheduler == 'sgdr': scheduler = CosineAnnealingWithRestartsLR(optimizer, train_loader_len * 5, eta_min=1e-10, T_mult=1.1) elif opt.scheduler == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) elif opt.scheduler == 'exponential': scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.97) else: scheduler = None return scheduler
def get_schedule(opt, optimizer, train_loader_len): if opt.scheduler == 'multistep': scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[25, 45, 70], gamma=0.1) elif opt.scheduler == 'cycle': step_size = train_loader_len*6 print(step_size) scheduler = lr_scheduler.CyclicLR(optimizer, step_size_up=step_size, base_lr=opt.lr/100, max_lr=opt.lr) elif opt.scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5) elif opt.scheduler == 'warmup': step = train_loader_len scheduler = WarmupMultiStepLR(optimizer, milestones=[step*25, step*70, step*90], gamma=0.1) elif opt.scheduler == 'cos': scheduler = lr_scheduler.CosineAnnealingLR(optimizer, train_loader_len*3, eta_min=opt.lr/1000) elif opt.scheduler == 'cosw': scheduler = WarmupCosineAnnealingLR(optimizer, train_loader_len*4, eta_min=1e-8) else: scheduler = None return scheduler
def train(gen_path, save_pth): model = Resnet18(n_classes=n_classes, pre_act=pre_act) model.train() model.cuda() criteria = nn.KLDivLoss(reduction='batchmean') generator = Resnet18(n_classes=10) state_dict = torch.load(gen_path) generator.load_state_dict(state_dict) generator.train() generator.cuda() batchsize = 256 n_workers = 8 dltrain = get_train_loader( batch_size=batchsize, num_workers=n_workers, dataset=ds_name, pin_memory=True ) lr0 = 2e-1 lr_eta = 1e-5 momentum = 0.9 wd = 5e-4 n_epochs = 50 n_warmup_epochs = 10 warmup_start_lr = 1e-5 warmup_method = 'linear' optim = torch.optim.SGD( model.parameters(), lr=lr0, momentum=momentum, weight_decay=wd ) lr_sheduler = WarmupCosineAnnealingLR( optim, warmup_start_lr=warmup_start_lr, warmup_epochs=n_warmup_epochs, warmup=warmup_method, max_epochs=n_epochs, cos_eta=lr_eta, ) for e in range(n_epochs): tic = time.time() model.train() lr_sheduler.step() loss_epoch = [] for _, (ims, _) in enumerate(dltrain): ims = ims.cuda() # generate labels with torch.no_grad(): lbs = generator(ims).clone() lbs = torch.softmax(lbs, dim=1) optim.zero_grad() if mixup: bs = ims.size(0) idx = torch.randperm(bs) lam = np.random.beta(mixup_alpha, mixup_alpha) ims_mix = lam * ims + (1.-lam) * ims[idx] logits = model(ims_mix) probs = F.log_softmax(logits, dim=1) loss1 = criteria(probs, lbs) loss2 = criteria(probs, lbs[idx]) loss = lam * loss1 + (1.-lam) * loss2 else: logits = model(ims) probs = F.log_softmax(logits, dim=1) loss = criteria(probs, lbs) loss.backward() loss_epoch.append(loss.item()) optim.step() model.eval() acc = evaluate(model, verbose=False) toc = time.time() msg = 'epoch: {}, loss: {:.4f}, lr: {:.4f}, acc: {:.4f}, time: {:.2f}'.format( e, sum(loss_epoch)/len(loss_epoch), list(optim.param_groups)[0]['lr'], acc, toc - tic ) print(msg) model.cpu() if hasattr(model, 'module'): state_dict = model.module.state_dict() else: state_dict = model.state_dict() torch.save(state_dict, save_pth) return model