def prepare_lr_finder(task, **kwargs): model = task.model optimizer = task.optimizer criterion = task.criterion config = { "device": kwargs.get("device", None), "memory_cache": kwargs.get("memory_cache", True), "cache_dir": kwargs.get("cache_dir", None), } lr_finder = LRFinder(model, optimizer, criterion, **config) return lr_finder
def lf(): args = get_args() model = select_model(args) optimizer = select_optimizer(args, model) train_transforms = get_transforms(args) train_params = { 'num_workers': 2, 'batch_size': args.batch_size, 'shuffle': True } train_generator = datasets.ImageFolder(args.root_path + '/' + 'train', train_transforms) train, _ = torch.utils.data.random_split(train_generator, [48000, 12000]) train_loader = DataLoader(train, pin_memory=True, **train_params) criterion = nn.CrossEntropyLoss(reduction='mean') lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder.range_test(train_loader, end_lr=10, num_iter=300, step_mode="exp") lr_finder.plot()
def find_lr(self): lr_finder = LRFinder(self.model, self.optimizer, self.criterion, self.device) lr_finder.range_test(self.data_loaders["train"], end_lr=10, num_iter=1000) lr_finder.plot() plt.savefig(self.save_folder + "/LRvsLoss.png") plt.close()
def lr_range_test(self, val_loss=False): lr_finder = LRFinder(self.model, self.optimizer, self.criterion, device=self.device) val_loader = self.dl_valid if val_loss else None lr_finder.range_test(self.dl_train, val_loader=val_loader, end_lr=100, num_iter=100, step_mode="exp") lr_finder.plot() lr_finder.reset() self.latest_lr_finder_result = lr_finder
def findLR(model, train_loader, test_loader, criterion, optimizer, num_iteration): # Add this line before running `LRFinder` #model, optimizer = amp.initialize(model, optimizer, opt_level='O1') lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder.range_test(train_loader, end_lr=0.5, num_iter=num_iteration) # fast ai method #lr_finder.range_test(train_loader, val_loader=test_loader, end_lr=10, num_iter = num_iteration, step_mode="linear") lr_finder.plot(log_lr=False) lr_finder.reset() best_lr = lr_finder.history['lr'][lr_finder.history['loss'].index( lr_finder.best_loss)] return best_lr
def learningrate_finder(uper_bound,lower_bound,dataset_directory,end_learning =100,num_iterations=100): hparams_tmp = Namespace( train_path=dataset_directory + '/train.txt', val_path=dataset_directory + '/val.txt', test_path=dataset_directory + '/test.txt', batch_size=16, warmup_steps=100, epochs=1, lr= uper_bound, accumulate_grad_batches=1,) module = TrainingModule(hparams_tmp) criterion = nn.CrossEntropyLoss() optimizer = AdamW(module.parameters(), lr=lower_bound) ## lower bound LR lr_finder = LRFinder(module, optimizer, criterion, device="gpu") lr_finder.range_test(module.train_dataloader(), end_lr=end_learning, num_iter=num_iterations, accumulation_steps=hparams_tmp.accumulate_grad_batches) lr_finder.plot() #lr_finer.plot(show_lr=lr) show using learning rate lr_finder.reset()
def find_lr(model: torch.nn.Module, train_data: CircleDataset): # range test for finding learning rate as described in # https://towardsdatascience.com/finding-good-learning-rate-and-the-one-cycle-policy-7159fe1db5d6 lr_image = 'learning_rate.png' train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=False) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-7, weight_decay=1e-2) lr_finder = LRFinder(model, optimizer, criterion, device="cuda") logger.info("Running range test for learning rate") lr_finder.range_test(train_loader, end_lr=100, num_iter=100) fig, ax = plt.subplots() lr_finder.plot(ax=ax) # to inspect the loss-learning rate graph logger.info(f"Saving image with learning rate plot to {lr_image}") fig.savefig(lr_image, dpi=fig.dpi) lr_finder.reset() # to reset the model and optimizer to their initial state
def lr_find(self, device="cuda"): """ This method is a pretraining method that plots the result of the learning rate finder to find an optimal learning rate. See also * https://github.com/davidtvs/pytorch-lr-finder * """ # with torch.no_grad(): lr_finder = LRFinder(self.model, self.optimizer, self.criterion, device=device) lr_finder.range_test(self.train_dataloader(), start_lr=0.0000001, end_lr=10, num_iter=100) lr_finder.plot() # to inspect the loss-learning rate graph lr_finder.reset( ) # to reset the model and optimizer to their initial state
def main(params): start_time = time.time() #evauntually uncomment this leaving asis in order ot keep the same results as before to compare. set_random_seeds(params) if not os.path.exists(params['save_dir']): os.makedirs(params['save_dir']) with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f: pickle.dump(params, f) data_input = DataInput(params['data_params']) data_input.split_data() print('%d samples in the training data' %len(data_input.x_tr)) # force identity for the first transform data_transformer = DataTransformerFactory({'transform_type': 'identity'}, params['random_seed']).manufacture_transformer() data_input.embed_data_and_fit_transformer(\ data_transformer, cells_to_subsample=params['transform_params']['cells_to_subsample'], num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'], use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data'] ) data_input.normalize_data() # gates aren't plotted because we're in n dimensions unused_cluster_gate_inits = init_gates(data_input, params) # data_input.convert_all_data_to_tensors() figscale = 8 fig, axs = plt.subplots(nrows=len(unused_cluster_gate_inits), figsize=(figscale, len(unused_cluster_gate_inits)*figscale)) print("initializing model") for gate, ax in zip(unused_cluster_gate_inits, axs): dataset = torch.utils.data.TensorDataset(torch.tensor(data_input.x_tr, dtype=torch.float), torch.tensor(data_input.y_tr, dtype=torch.float)) trainloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True) criterion = torch.nn.BCEWithLogitsLoss() model = SingleGateModel(params, gate) optimizer = torch.optim.Adam(model.parameters(), lr=1e-7, weight_decay=1e-2) print("initializing LR finder") lr_finder = LRFinder(model, optimizer, criterion) lr_finder.range_test(trainloader, end_lr=1e4, num_iter=100) lr_finder.plot(ax=ax) print("LR History:", lr_finder.history) plt.savefig(os.path.join(params['save_dir'], 'lr_find.png')) print('Complete main loop took %.4f seconds' %(time.time() - start_time)) return
def run(): device = torch.device(GPU_ID if torch.cuda.is_available() else "cpu") print(f'Using device {device}') hyperparameter = { 'learning_rate': [1e-2, 1e-3, 3e-4, 1e-4, 3e-5, 1e-7], # 1e-4 'weight_decay': [0, 1e-3, 5e-4, 1e-4, 1e-5], # 1e-4 'num_epochs': 70, # 100 'weights': [0.0, 0.2, 0.4, 0.6, 0.8, 1.0], # 0.6 'optimizer': [optim.Adam, optim.SGD], # Adam 'image_size': 300, 'crop_size': 299 } loaders = prepare_dataset('retina', hyperparameter) #model: nn.Module = models.resnet50(pretrained=True) #num_ftrs = model.fc.in_features #model.fc = nn.Linear(num_ftrs, 2) model = ptm.inceptionv4(num_classes=1000, pretrained='imagenet') num_ft = model.last_linear.in_features model.last_linear = nn.Linear(num_ft, 2) children = model.features.children() for i, child in enumerate(children): if i < 0.0 * len(list(children)): for param in child.parameters(): param.require_grad = False optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-7, weight_decay=0) criterion = nn.CrossEntropyLoss() lr_finder = LRFinder(model, optimizer_ft, criterion, device=device) lr_finder.range_test(loaders[0], end_lr=0.1, num_iter=100, step_mode='exp') lr_finder.plot() lr_finder.reset() return 0
def find_lr(self): """finding suitable learning rate """ model = self._model params = self.__set_lr() criterion = torch.nn.L1Loss(size_average=False) optimizer = CaffeSGD(params, lr=1e-8, momentum=self.hparams.momentum, weight_decay=self.hparams.wd) lr_finder = LRFinder(model, optimizer, criterion, device="cuda") trainloader = self.train_dataloader() lr_finder.range_test(trainloader, start_lr=1e-7, end_lr=1, num_iter=500) lr_finder.plot()
def lr_find(self, dl, optimizer=None, start_lr=1e-7, end_lr=1e-2, num_iter=200): if optimizer is None: optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-6, momentum=0.9) lr_finder = LRFinder(self.model, optimizer, self.loss_fn, device=self.device) lr_finder.range_test(dl, start_lr=start_lr, end_lr=end_lr, num_iter=num_iter) lr_finder.plot()
def main_worker(index, opt): random.seed(opt.manual_seed) np.random.seed(opt.manual_seed) torch.manual_seed(opt.manual_seed) if index >= 0 and opt.device.type == 'cuda': opt.device = torch.device(f'cuda:{index}') if opt.distributed: opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index dist.init_process_group(backend='nccl', init_method=opt.dist_url, world_size=opt.world_size, rank=opt.dist_rank) opt.batch_size = int(opt.batch_size / opt.ngpus_per_node) opt.n_threads = int( (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node) opt.is_master_node = not opt.distributed or opt.dist_rank == 0 model = generate_model(opt) if opt.batchnorm_sync: assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.' model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) if opt.pretrain_path: model = load_pretrained_model(model, opt.pretrain_path, opt.model, opt.n_finetune_classes) if opt.dropout: n_classes = opt.n_classes if opt.pretrain_path is not None: n_classes = opt.n_finetune_classes model = replace_fc_layer(model=model, dropout_factor=opt.dropout_factor, n_classes=n_classes) if opt.resume_path is not None: model = resume_model(opt.resume_path, opt.arch, model) model = make_data_parallel(model, opt.distributed, opt.device) if opt.pretrain_path: parameters = get_fine_tuning_parameters(model, opt.ft_begin_module) else: parameters = model.parameters() if opt.is_master_node: print(model) if opt.labelsmoothing: criterion = LabelSmoothingCrossEntropy().to(opt.device) else: criterion = CrossEntropyLoss().to(opt.device) if not opt.no_train: (train_loader, train_sampler, train_logger, train_batch_logger, optimizer, scheduler) = get_train_utils(opt, parameters) if opt.resume_path is not None: opt.begin_epoch, optimizer, scheduler = resume_train_utils( opt.resume_path, opt.begin_epoch, optimizer, scheduler) if opt.overwrite_milestones: scheduler.milestones = opt.multistep_milestones if not opt.no_val: val_loader, val_logger = get_val_utils(opt) if opt.tensorboard and opt.is_master_node: from torch.utils.tensorboard import SummaryWriter if opt.begin_epoch == 1: tb_writer = SummaryWriter(log_dir=opt.result_path) else: tb_writer = SummaryWriter(log_dir=opt.result_path, purge_step=opt.begin_epoch) else: tb_writer = None if opt.lr_finder and not opt.no_train and not opt.no_val: print( "Performing Learning Rate Search\nWith Leslie Smith's approach...") lr_finder = LRFinder(model, optimizer, criterion, device=opt.device) lr_finder.range_test(train_loader, val_loader=val_loader, start_lr=opt.learning_rate, end_lr=opt.lrf_end_lr, num_iter=opt.lrf_num_it, step_mode=opt.lrf_mode) lr_finder.plot(log_lr=False) with (opt.result_path / 'lr_search.json').open('w') as results_file: json.dump(lr_finder.history, results_file, default=json_serial) lr_finder.reset() return prev_val_loss = None for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: if opt.distributed: train_sampler.set_epoch(i) #current_lr = get_lr(optimizer) train_epoch(i, train_loader, model, criterion, optimizer, opt.device, train_logger, train_batch_logger, scheduler, opt.lr_scheduler, tb_writer, opt.distributed) if i % opt.checkpoint == 0 and opt.is_master_node: save_file_path = opt.result_path / 'save_{}.pth'.format(i) save_checkpoint(save_file_path, i, opt.arch, model, optimizer, scheduler) if not opt.no_val: prev_val_loss = val_epoch(i, val_loader, model, criterion, opt.device, val_logger, tb_writer, opt.distributed) if not opt.no_train and opt.lr_scheduler == 'multistep': scheduler.step() elif not opt.no_train and opt.lr_scheduler == 'plateau': scheduler.step(prev_val_loss) elif not opt.no_train and opt.lr_scheduler == 'cosineannealing': scheduler.step() if opt.inference: inference_loader, inference_class_names = get_inference_utils(opt) inference_result_path = opt.result_path / '{}.json'.format( opt.inference_subset) inference.inference(inference_loader, model, inference_result_path, inference_class_names, opt.inference_no_average, opt.output_topk)
def Interpol(N, neurons, iter, fun=0, a=1, b=1): datasamp = datagen(N, neurons, fun, a, b, legendre) val_inputs, val_labels = datasamp.get_val() train_inputs, train_labels = datasamp.get_train() train_loader = DataLoader(dataset=datasamp, num_workers=0) # Initiate the data and labels class LockedCybenko(torch.nn.Module ): # Cybenko with inner weight=1 and bias=-x[i] def __init__(self): super(LockedCybenko, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc1.weight.data = torch.ones(neurons).reshape(-1, 1) self.fc1.bias.data = -torch.linspace(-1, 1, neurons).reshape( 1, -1).float() self.fc1.weight.requires_grad_(False) self.fc1.bias.requires_grad_(False) self.fc2 = torch.nn.Linear(neurons, 1, bias=False) self.relu = torch.nn.ReLU() def forward(self, x): x = self.relu(self.fc1(x)) return self.fc2(x) class SemilockedCybenko( torch.nn.Module ): # Cybenko with inner weight=-1, one node less and free bias def __init__(self): super(SemilockedCybenko, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc1.weight.data = torch.ones(neurons - 1).reshape(-1, 1) self.fc1.weight.requires_grad_(False) self.fc1.bias.requires_grad_(True) self.fc2 = torch.nn.Linear(neurons, 1, bias=False) self.relu = torch.nn.Sigmoid() def forward(self, x): x = self.relu(self.fc1(x)) return self.fc2(x) class UnlockedCybenko(torch.nn.Module ): # Cybenko with free inner weight or bias def __init__(self): super(UnlockedCybenko, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc2 = torch.nn.Linear(neurons, 1, bias=True) self.relu = torch.nn.Sigmoid() def forward(self, x): x = self.relu(self.fc1(x)) return self.fc2(x) class Network(torch.nn.Module): # Arbitrary network def __init__(self): super(Network, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc2 = torch.nn.Linear(neurons, 2 * neurons, bias=True) self.fc3 = torch.nn.Linear(2 * neurons, 1, bias=True) self.relu = torch.nn.ReLU() def forward(self, x): x = self.relu(self.fc1(x)) x = self.relu(self.fc2(x)) return self.fc3(x) model = Network() criterion = torch.nn.MSELoss(reduction="sum") optimizer = torch.optim.SGD(model.parameters(), lr=0.005) lr_finder = LRFinder(model, optimizer, criterion) lr_finder.range_test(train_loader, start_lr=0.001, end_lr=1.5, num_iter=1000) lr_finder.reset( ) # to reset the model and optimizer to their initial state learning = lr_finder.history.get('lr')[np.argmin( lr_finder.history.get('loss'))] optimizer = torch.optim.SGD(model.parameters(), lr=0.1) EL2Val = [] EL2train = [] ELinf = [] EL2 = [] # L2 integral between f and u_teta for epoch in range(iter): x = [] ytrue = [] ypred = [] for i, (inputs, labels) in enumerate(train_loader): y_pred = model(inputs) loss = criterion(y_pred, labels) x.append(inputs.data.numpy()) ytrue.append(labels.data.numpy()) ypred.append(y_pred.data.numpy()) optimizer.zero_grad() loss.backward() optimizer.step() def modelonx(x): return model( torch.tensor(x.reshape(-1, 1).tolist(), requires_grad=False)).data.numpy().reshape(1, -1) def L2error(x): return (modelonx(x) - np.array(truef(x, fun)).reshape(1, -1))**2 ELinf.append(max(abs(val_labels - model(val_inputs)))) EL2.append(quadrature(L2error, -1, 1)[0][0]) EL2Val.append(criterion(val_labels, model(val_inputs))) EL2train.append((criterion(train_labels, model(train_inputs)))) print( f'Epoch: {epoch} L2 Error on training : {EL2train[-1]:.6e} | L2 Error on validation : {EL2Val[-1]:.6e} | L2 on [-1,1] : {EL2[-1]:.6e}' ) if epoch % 5 == 0: fig, ax = pl.subplots(nrows=1, ncols=2) plotrange = np.linspace(a - 0.1, b + 0.1, 100) """ Function and Model Plot""" ax[0].scatter(val_inputs.data.numpy(), val_labels.data.numpy(), c='red', s=15) ax[0].scatter(train_inputs, train_labels, s=15) ax[0].plot( plotrange, model(torch.linspace(a - 0.1, b + 0.1, 100).reshape(-1, 1)).data.numpy(), 'r') """ # Code qui permet d'afficher la fonction linéaire par morceau alpha = model.fc2.weight.data.numpy()[0] X = -model.fc1.bias.data.numpy()[0] ReLU = lambda t : np.where(t<=0,0,t) ax[0].plot(xx,alpha[0]*ReLU(xx-X[0])+alpha[1]*ReLU(xx-X[1])+alpha[2]*ReLU(xx-X[2])+alpha[3]*ReLU(xx-X[3])+alpha[4]*ReLU(xx-X[4])+alpha[5]*ReLU(xx-X[5])) """ ax[0].plot(plotrange, truef(plotrange, fun), c='blue') #ax[0].plot(np.linspace(a-0.1,b+0.1,100),np.polyval(np.polyfit(train_inputs.data.numpy().reshape(1,-1)[0],train_labels.data.numpy().reshape(1,-1)[0],10),np.linspace(a-0.1,b+0.1,100)),c='green') if fun == 7: ax[0].plot(plotrange, maclaurin(plotrange, 50), c='green') ax[0].set_ylim(-0.1, 1.1) """ Error Plot """ ax[1].semilogy(range(epoch + 1), EL2Val, color='red') ax[1].semilogy(range(epoch + 1), EL2train, color='blue') #ax[1].semilogy(range(epoch+1),EL2,color='magenta') #ax[1].semilogy(range(epoch+1),ELinf,color='black') pl.show() return model
def train(model, device, train_loader, test_loader, EPOCH, FACTOR, PATIENCE, MOMENTUM, LEARNING_RATE): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True, weight_decay=0.0001) scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, max_lr=0.008, pct_start=5 / 24, epochs=24, steps_per_epoch=len(trainloader)) train_losses = [] train_acc = [] test_losses = [] test_acc = [] for epoch in range(EPOCH): correct = 0 processed = 0 pbar = tqdm(train_loader) model.train() for batch_idx, (data, target) in enumerate(pbar): # get samples data, target = data.to(device), target.to(device) # Init optimizer.zero_grad() # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly. # Predict y_pred = model(data) # Calculate loss # regularization_loss = 0 # for param in model.parameters(): # regularization_loss += torch.sum(abs(param)) # classify_loss = criterion(y_pred,target) loss = F.nll_loss(y_pred, target) #loss = classify_loss + LAMDA * regularization_loss # train_losses.append(loss) # Backpropagation loss.backward() optimizer.step() scheduler.step() # Update pbar-tqdm pred = y_pred.argmax( dim=1, keepdim=True) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() processed += len(data) pbar.set_description( desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}' ) # train_acc.append(100*correct/processed) train_losses.append(loss.item()) train_acc.append(100 * correct / processed) img, true_wrong, pred_wrong, tst_acc, tst_loss = test( model, device, test_loader) test_losses.append(tst_loss) test_acc.append(tst_acc) lr_finder = LRFinder(model, optimizer, criterion, device) lr_finder.range_test(train_loader, end_lr=100, num_iter=100) lr_finder.plot() # to inspect the loss-learning rate graph # lr_finder.reset() return train_losses, train_acc, model, img, true_wrong, pred_wrong, test_acc, test_losses, lr_finder
def train_fully_supervised(model,n_epochs,train_loader,val_loader,criterion,optimizer,scheduler,auto_lr,\ save_folder,model_name,benchmark=False,save_all_ep=True, save_best=False, device='cpu',num_classes=21): """ A complete training of fully supervised model. save_folder : Path to save the model, the courb of losses,metric... benchmark : enable or disable backends.cudnn save_all_ep : if True, the model is saved at each epoch in save_folder scheduler : if True, the model will apply a lr scheduler during training auto_lr : Auto lr finder """ torch.backends.cudnn.benchmark = benchmark if auto_lr: print('Auto finder for the Learning rate') lr_finder = LRFinder(model, optimizer, criterion, memory_cache=False, cache_dir='/tmp', device=device) lr_finder.range_test(train_loader, start_lr=10e-5, end_lr=10, num_iter=100) if scheduler: lr_scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lambda x: (1 - x / (len(train_loader) * n_epochs))**0.9) loss_test = [] loss_train = [] iou_train = [] iou_test = [] accuracy_train = [] accuracy_test = [] model.to(device) for ep in range(n_epochs): print("EPOCH", ep) model.train() state = step_train_supervised(model,train_loader=train_loader,criterion=criterion,\ optimizer=optimizer,device=device,num_classes=num_classes) iou = state.metrics['mean IoU'] acc = state.metrics['accuracy'] loss = state.metrics['CE Loss'] loss_train.append(loss) iou_train.append(iou) accuracy_train.append(acc) print('TRAIN - EP:', ep, 'iou:', iou, 'Accuracy:', acc, 'Loss CE', loss) if scheduler: lr_scheduler.step() #Eval model model.eval() with torch.no_grad(): state = eval_model(model, val_loader, device=device, num_classes=num_classes) iou = state.metrics['mean IoU'] acc = state.metrics['accuracy'] loss = state.metrics['CE Loss'] loss_test.append(loss) iou_test.append(iou) accuracy_test.append(acc) print('TEST - EP:', ep, 'iou:', iou, 'Accuracy:', acc, 'Loss CE', loss) ## Save model U.save_model(model, save_all_ep, save_best, save_folder, model_name, ep=ep, iou=iou, iou_test=iou_test) U.save_curves(path=save_folder,loss_train=loss_train,iou_train=iou_train,accuracy_train=accuracy_train\ ,loss_test=loss_test,iou_test=iou_test,accuracy_test=accuracy_test)
transforms = utils.build_transforms(second_stage=True) loaders = utils.build_loaders(data_dir, transforms, batch_sizes, num_workers, second_stage=True) model = utils.build_model(backbone, second_stage=True, num_classes=num_classes, ckpt_pretrained=ckpt_pretrained).cuda() optim = utils.build_optim(model, optimizer_params, scheduler_params, criterion_params) criterion, optimizer, scheduler = ( optim["criterion"], optim["optimizer"], optim["scheduler"], ) lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder.range_test(loaders["train_features_loader"], end_lr=1, num_iter=300) fig, ax = plt.subplots() lr_finder.plot(ax=ax) fig.savefig( "lr_finder_plots/supcon_{}_{}_bs_{}_stage_{}_lr_finder.png".format( optimizer_params["name"], data_dir.split("/")[-1], batch_sizes["train_batch_size"], 'second'))
# Net net = myutils.get_network(args.net_dir, params.network) optimizer = myutils.get_optimizer(params.optimizer, net, params.learning_rate, params.momentum, params.weight_decay) # Loss function criterion = myutils.get_loss_fn(args.net_dir, params.network, weight) criterion = criterion.to(device) logging_process.info( f'Model: {args.model_dir}\tFile: train{fold}.csv\tWeight: {weight}' ) # FIND LR lr_finder = LRFinder(net, optimizer, criterion, device=device) trainloader = dataloaders['train'] valloader = dataloaders['val'] class CustomTrainIter(TrainDataLoaderIter): # My dataloader returns index, X, y def inputs_labels_from_batch(self, batch_data): return batch_data[1], batch_data[2] class CustomValIter(ValDataLoaderIter): # My dataloader returns index, X, y def inputs_labels_from_batch(self, batch_data): return batch_data[1], batch_data[2] custom_train_iter = CustomTrainIter(trainloader) custom_val_iter = CustomValIter(valloader)
class Shrink: '''Shrinks the code and gets the output''' def __init__(self, in_config): self.config = in_config self.class_names = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') self.mean = (0.491, 0.482, 0.446) self.std = (0.247, 0.243, 0.261) self.device = "cuda" if torch.cuda.is_available else "cpu" self.model_path = self.config['modelpath']['args'] plt.style.use("dark_background") def seed_everything(self,seed: int) -> None: '''Seeds the Code so that we get predictable outputs''' random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) def load_data(self, train_transforms, test_transforms, in_dir='./data'): '''Downloads the dataset and returns train and testloaders after applying the Transformations''' trainset = datasets.CIFAR10(in_dir, train=True, download=True, transform=train_transforms()) testset = datasets.CIFAR10(in_dir, train=False, download=True, transform=test_transforms()) self.trainloader = torch.utils.data.DataLoader(trainset, **self.config['train_data_loader']['args']) self.testloader = torch.utils.data.DataLoader(testset, **self.config['test_data_loader']['args']) return self.trainloader, self.testloader def load_imagenet_data(self, train_transforms, test_transforms): '''Loads the imagenet dataset''' self.trainloader, self.testloader = get_imagenet_loader( train_transforms, test_transforms, self.config['train_data_loader']['args'], self.config['test_data_loader']['args']) def mean_std_dev(self): pass def show_data(self, mode='train', n=25): '''Plots the images on a gridplot to show the images passed via dataloader''' figure = plt.figure(figsize=(20,20)) images = None labels = None if mode.lower() == 'train': images, labels = next(iter(self.trainloader)) labels = np.array(labels) elif mode.lower() == 'test': images, labels = next(iter(self.testloader)) labels = np.array(labels) images = self.denormalize(images) # images = self.denormalize(images) for index in range(1,n+1): plt.subplot(5,5,index) plt.axis('off') # Gets the first n images of the dataset plt.imshow(np.transpose(images[index], (1,2,0))) # Plots the dataset # plt.title(self.class_names[labels[index]]) def get_batched_data(self,in_data): '''Takes in the list data and outputs data, targets and preds''' in_imgs = [] in_preds = [] in_targets = [] for index, i in enumerate(in_data): in_imgs.append(i[0]) in_preds.append(i[1]) in_targets.append(i[2]) return torch.stack(in_imgs), torch.stack(in_preds), torch.stack(in_targets) def plot_gradcam(self, target_layers, images, pred, target, nimgs): '''Plot GradCam - ''' index = 0 in_data = None # model.load_state_dict(torch.load(self.model_path)) images = images[index:nimgs].to(self.device) target = target[index:nimgs] pred = pred[index:nimgs] gcam_layers, predicted_probs, predicted_classes = get_gradcam(images, target, self.model, self.device, target_layers) # get the denomarlization function unorm = UnNormalize(mean=self.mean, std=self.std) plt_gradcam(gcam_layers=gcam_layers, images=images, target_labels=target, predicted_labels= predicted_classes, class_labels= self.class_names, denormalize= unorm) def get_gradoutput(self, misclassified=False): '''Outputs a gradcam output when Inputting an image''' if misclassified: in_data = self.misclassified else: in_data = self.correct_classified target_layers = ["layer1", "layer2", "layer3", "layer4"] imgs, preds, targets = self.get_batched_data(in_data) self.plot_gradcam(target_layers, imgs, preds, targets, 25) def denormalize(self,tensor): '''Denormalize the data''' if not tensor.ndimension() == 4: raise TypeError('tensor should be 4D') mean = torch.FloatTensor(self.mean).view(1, 3, 1, 1).expand_as(tensor).to(tensor.device) std = torch.FloatTensor(self.std).view(1, 3, 1, 1).expand_as(tensor).to(tensor.device) return tensor.mul(std).add(mean) def get_model(self, train=True): self.model = get_attributes(model_arch, 'model', self.config).to(self.device) self.epochs = self.config['epochs'] if train: '''Trains the model and sends the output''' criterion = nn.CrossEntropyLoss(reduction='mean') optimizer = optim.SGD(self.model.parameters(),lr = 0.01, momentum=0.9)# **self.config['optimizer']['args']) max_at_epoch = 5 self.best_lr = self.config['best_lr'] pct_start_val = (max_at_epoch * len(self.trainloader)) / (self.epochs * len(self.trainloader)) scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=self.best_lr, total_steps = len(self.trainloader) *self.epochs, steps_per_epoch=len(self.trainloader), epochs=self.epochs, pct_start=pct_start_val, anneal_strategy='cos', div_factor=10, final_div_factor=10 ) self.train_acc = [] self.train_losses = [] self.test_acc = [] self.test_losses = [] self.lr_metric = [] EPOCHS = self.epochs print(f'Starting Training for {EPOCHS} Epochs') for i in range(EPOCHS): lr_value = [group['lr'] for group in optimizer.param_groups][0] self.lr_metric.append(lr_value) print(f'EPOCHS : {i} Learning Rate: {lr_value}') model_training(self.model, self.device, self.trainloader, optimizer, scheduler, self.train_acc, self.train_losses, criterion, l1_loss=False) torch.save(self.model.state_dict(), self.model_path) self.misclassified, self.correct_classified = model_testing(self.model, self.device, self.testloader, self.test_acc, self.test_losses, criterion) else: return self.model def test_model(self): '''Loads and saves the test model''' test_losses = [] test_acc = [] model_path = 'latest_model.h5' self.model.load_state_dict(torch.load(model_path)) self.misclassified, self.correct_classified = model_testing(self.model, self.device, self.testloader, test_acc, test_losses) return self.misclassified, self.correct_classified def findbestlr(self): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(self.model.parameters(), lr= 0.01, momentum= 0.95, weight_decay= 0.0005) self.lr_finder = LRFinder(self.model, optimizer, criterion, device=self.device) self.lr_finder.range_test(self.trainloader, **self.config['range_test']['args']) self.lr_finder.plot() # to inspect the loss-learning rate graph self.lr_finder.reset() # to reset the model and optimizer to their initial state return self.lr_finder def model_metrics(self): fig, axs = plt.subplots(2,2, figsize=(15,10)) axs[0,0].plot(self.train_losses) axs[0,0].set_title('Train_Losses') axs[0,1].plot(self.train_acc) axs[0,1].set_title('Training_Accuracy') axs[1,0].plot(self.test_losses) axs[1,0].set_title('Test_Losses') axs[1,1].plot(self.test_acc) axs[1,1].set_title('Test_Accuracy') def print_visualization(self, input_size): '''Prints a visualization graph for Torch models''' C, H, W = input_size x = torch.zeros(1, C, H, W, dtype=torch.float, requires_grad=False) x = x.to(self.device) out = self.model(x) # plot graph of variable, not of a nn.Module dot_graph = torchviz.make_dot(out) dot_graph.view() return dot_graph
# grad_clip = 0.001 # weight_decay = 1e-4 opt_func = torch.optim.Adam criterion = F.cross_entropy train_dl, val_dl = get_data_loader(subject='S2', train_batch_size=train_batch_size, val_batch_size=val_batch_size) # model = WesadFeedForward(input_dim, output_dim) model = WesadLSTM(input_dim=input_dim, hidden_dim=input_dim, output_dim=output_dim, lstm_layers=lstm_layers) # optimizer = opt_func(model.parameters(), lr=max_lr, weight_decay=weight_decay) optimizer = opt_func(model.parameters(), lr=max_lr) lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder.range_test(train_dl, end_lr=10000, num_iter=1000) lr_finder.plot() # to inspect the loss-learning rate graph # lr_finder.reset() # to reset the model and optimizer to their initial state # ## Running models - LOSO CV # In[18]: epochs = 20 lr = 1e-4 models = [] histories = [] val_histories = [] for subject in subjects:
def find_lr(self): from torch_lr_finder import LRFinder logger.info('finding the best learning rate') cfg = self.config if self.tsai_mode: import sodium.tsai_model as module_arch else: import sodium.model.model as module_arch # create a model instance model = get_instance(module_arch, 'arch', cfg) # setup the model with the device model, device = setup_device(model, cfg['target_device']) param_groups = setup_param_groups(model, cfg['optimizer']) optimizer = get_instance(module_optimizer, 'optimizer', cfg, param_groups) criterion = getattr(module_loss, cfg['criterion'])() self.lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder_epochs = cfg['lr_finder']['epochs'] logger.info(f'Running LR-Test for {lr_finder_epochs} epochs') # my method self.lr_finder.range_test(self.trainer.train_loader, start_lr=1e-3, end_lr=1, num_iter=len(self.trainer.test_loader) * lr_finder_epochs, step_mode='linear') # leslie smith method # self.lr_finder.range_test(self.trainer.train_loader, val_loader = self.trainer.test_loader, # end_lr=1, num_iter=len(self.trainer.train_loader), step_mode='linear') # fast ai method # self.lr_finder.range_test( # self.trainer.train_loader, end_lr=100, num_iter=len(self.trainer.train_loader)) self.best_lr = self.lr_finder.history['lr'][ self.lr_finder.history['loss'].index(self.lr_finder.best_loss)] sorted_lrs = [ x for _, x in sorted( zip(self.lr_finder.history['loss'], self.lr_finder.history['lr'])) ] logger.info(f'sorted lrs : {sorted_lrs[:10]}') logger.info(f'found the best lr : {self.best_lr}') logger.info('plotting lr_finder') plt.style.use("dark_background") self.lr_finder.plot() # reset the model and the optimizer self.lr_finder.reset() plt.show() del model, optimizer, criterion
def lr_finder(model, optimizer, criterion, device): return LRFinder(model=model, optimizer=optimizer, criterion=criterion, device=device)
class Runner: def __init__(self, config): self.config = config def find_lr(self): from torch_lr_finder import LRFinder logger.info('finding the best learning rate') cfg = self.config if self.tsai_mode: import sodium.tsai_model as module_arch else: import sodium.model.model as module_arch # create a model instance model = get_instance(module_arch, 'arch', cfg) # setup the model with the device model, device = setup_device(model, cfg['target_device']) param_groups = setup_param_groups(model, cfg['optimizer']) optimizer = get_instance(module_optimizer, 'optimizer', cfg, param_groups) criterion = getattr(module_loss, cfg['criterion'])() self.lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder_epochs = cfg['lr_finder']['epochs'] logger.info(f'Running LR-Test for {lr_finder_epochs} epochs') # my method self.lr_finder.range_test(self.trainer.train_loader, start_lr=1e-3, end_lr=1, num_iter=len(self.trainer.test_loader) * lr_finder_epochs, step_mode='linear') # leslie smith method # self.lr_finder.range_test(self.trainer.train_loader, val_loader = self.trainer.test_loader, # end_lr=1, num_iter=len(self.trainer.train_loader), step_mode='linear') # fast ai method # self.lr_finder.range_test( # self.trainer.train_loader, end_lr=100, num_iter=len(self.trainer.train_loader)) self.best_lr = self.lr_finder.history['lr'][ self.lr_finder.history['loss'].index(self.lr_finder.best_loss)] sorted_lrs = [ x for _, x in sorted( zip(self.lr_finder.history['loss'], self.lr_finder.history['lr'])) ] logger.info(f'sorted lrs : {sorted_lrs[:10]}') logger.info(f'found the best lr : {self.best_lr}') logger.info('plotting lr_finder') plt.style.use("dark_background") self.lr_finder.plot() # reset the model and the optimizer self.lr_finder.reset() plt.show() del model, optimizer, criterion def train(self, use_bestlr=False, lr_value=None): # if the best lr was found use that value instead if use_bestlr and self.best_lr is not None: logger.info(f'using max_lr : {self.best_lr}') logger.info(f'using min_lr : {self.best_lr/30}') logger.info(f'using initial_lr : {self.best_lr/20}') for param_group in self.trainer.optimizer.param_groups: param_group['lr'] = self.best_lr / 10 param_group['max_lr'] = self.best_lr param_group['min_lr'] = self.best_lr / 30 param_group['intial_lr'] = self.best_lr / 20 if not use_bestlr and (lr_value is not None): for param_group in self.trainer.optimizer.param_groups: param_group['lr'] = lr_value self.trainer.train() logger.info('Finished!') def setup_train(self, tsai_mode=False): cfg = self.config self.tsai_mode = tsai_mode if tsai_mode: import sodium.tsai_model as module_arch else: import sodium.model.model as module_arch logger.info('Training Config') # display the config for line in pprint.pformat(cfg).split('\n'): logger.info(line) # to get consistent results, seed everything seed_everything(cfg['seed']) # create a model instance model = get_instance(module_arch, 'arch', cfg) # setup the model with the device model, device = setup_device(model, cfg['target_device']) param_groups = setup_param_groups(model, cfg['optimizer']) optimizer = get_instance(module_optimizer, 'optimizer', cfg, param_groups) self.transforms = get_instance(module_aug, 'augmentation', cfg) # get the train and test loaders self.data_loader = get_instance(module_data, 'data_loader', cfg, self.transforms) train_loader, test_loader = self.data_loader.get_loaders() logger.info('Getting loss function handle') criterion = getattr(module_loss, cfg['criterion'])() batch_scheduler = False if cfg['lr_scheduler']['type'] == 'OneCycleLR': logger.info('Building: torch.optim.lr_scheduler.OneCycleLR') max_at_epoch = cfg['lr_scheduler']['max_lr_at_epoch'] pct_start = (max_at_epoch) / \ cfg['training']['epochs'] if max_at_epoch else 0.8 sch_cfg = cfg['lr_scheduler']['args'] lr_scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=sch_cfg['max_lr'], steps_per_epoch=len(train_loader), pct_start=pct_start, epochs=cfg['training']['epochs']) batch_scheduler = True else: lr_scheduler = get_instance(module_scheduler, 'lr_scheduler', cfg, optimizer) logger.info('Initializing trainer') self.trainer = Trainer(model, criterion, optimizer, cfg, device, train_loader, test_loader, lr_scheduler=lr_scheduler, batch_scheduler=batch_scheduler) def plot_metrics(self): plt.style.use("dark_background") logger.info('Plotting Metrics...') plot.plot_metrics(self.trainer.train_metric, self.trainer.test_metric) plot.plot_lr_metric(self.trainer.lr_metric) def plot_gradcam(self, target_layers): plt.style.use("dark_background") logger.info('Plotting Grad-CAM...') # use the test images data, target = next(iter(self.trainer.test_loader)) data, target = data.to(self.trainer.device), target.to( self.trainer.device) logger.info('Taking {5} samples') # get 5 images data = data[:5] target = target[:5] # get the generated grad cam gcam_layers, predicted_probs, predicted_classes = get_gradcam( data, target, self.trainer.model, self.trainer.device, target_layers) # get the denomarlization function unorm = module_aug.UnNormalize(mean=self.transforms.mean, std=self.transforms.std) plot_gradcam(gcam_layers, data, target, predicted_classes, self.data_loader.class_names, unorm) def print_summary(self, input_size): summary(self.trainer.model, input_size) def print_visualization(self, input_size): C, H, W = input_size x = torch.zeros(1, C, H, W, dtype=torch.float, requires_grad=False) x = x.to(self.trainer.device) out = self.trainer.model(x) # plot graph of variable, not of a nn.Module dot_graph = torchviz.make_dot(out) dot_graph.view() return dot_graph def plot_misclassifications(self, target_layers): plt.style.use("dark_background") assert (self.trainer.model is not None) # get the data, target of only missclassified and do what you do for gradcam logger.info('getting misclassifications') misclassified = [] misclassified_target = [] misclassified_pred = [] model, device = self.trainer.model, self.trainer.device # set the model to evaluation mode model.eval() # turn off gradients with torch.no_grad(): for data, target in self.trainer.test_loader: # move them to respective device data, target = data.to(device), target.to(device) # do inferencing output = model(data) # get the predicted output pred = output.argmax(dim=1, keepdim=True) # get the current misclassified in this batch list_misclassified = (target.eq(pred.view_as(target)) == False) batch_misclassified = data[list_misclassified] batch_mis_pred = pred[list_misclassified] batch_mis_target = target[list_misclassified] # batch_misclassified = misclassified.append(batch_misclassified) misclassified_pred.append(batch_mis_pred) misclassified_target.append(batch_mis_target) # group all the batched together misclassified = torch.cat(misclassified) misclassified_pred = torch.cat(misclassified_pred) misclassified_target = torch.cat(misclassified_target) logger.info('Taking {25} samples') # get 5 images data = misclassified[:25] target = misclassified_target[:25] # get the generated grad cam gcam_layers, predicted_probs, predicted_classes = get_gradcam( data, target, self.trainer.model, self.trainer.device, target_layers) # get the denomarlization function unorm = module_aug.UnNormalize(mean=self.transforms.mean, std=self.transforms.std) plot_gradcam(gcam_layers, data, target, predicted_classes, self.data_loader.class_names, unorm)
from torch_lr_finder import LRFinder criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) lr_finder = LRFinder(model, optimizer, criterion, device=device) lr_finder.range_test(trainloader, end_lr=10, num_iter=1564, step_mode='exp') lr_finder.plot() # to inspect the loss-learning rate graph lr_finder.reset() # to reset the model and optimizer to their initial state a = zip(lr_finder.history['lr'], lr_finder.history['loss']) best_lrloss = sorted(a, key=take_lr, reverse=False)[:50] def take_lr(x): # print(x) return x[1] tup = zip(lr_finder.history['loss'], lr_finder.history['lr']) sorted(tup, key=take_lr, reverse=False)[:50] class shrink: def __init__(self, config): self.config = config def apply_augmentations(self): pass
net = nn.Sequential( nn.Linear(12, 4), nn.ReLU(), nn.Linear(4, 1), #nn.ReLU(), #nn.Linear(5, 1) ) net.apply(init_weights) # net = net# .cuda() opt = optim.Adam(net.parameters(), lr=10**-2.8) criterion = nn.L1Loss() from torch_lr_finder import LRFinder lrf = LRFinder(net, opt, criterion) lrf.range_test(train_loader=trainloader, start_lr=0.0001, end_lr=1) lrf.plot() lrf.reset() #%% train_losses = [] val_losses = [] scheduler = torch.optim.lr_scheduler.CyclicLR( opt, 10**-3, 10**-2, mode='exp_range', step_size_up=(xtrain.size(0) / BATCHSIZE) * 8, cycle_momentum=False)
# PyTorch import torchvision from torchvision import transforms, datasets, models import torch from torch import optim, cuda from torch.utils.data import DataLoader, sampler import torch.nn as nn from torch_lr_finder import LRFinder from utils.model import get_model, get_dataloaders model = get_model() dataloaders = get_dataloaders() # we will be using negative log likelihood as the loss function criterion = nn.CrossEntropyLoss() # we will be using the SGD optimizer as our optimizer optimizer = optim.SGD(model.fc.parameters(), lr=1e-4) lr_finder = LRFinder(model, optimizer, criterion, device='cuda') lr_finder.range_test(dataloaders['train'], end_lr=1, num_iter=2500) lr_finder.plot() lr_finder.reset()
def lr_range_test( model, dataset, loss_func, optimizer="AdamW", batch_size=32, num_iter=None, skip_start=10, skip_end=10, start_lr=1e-7, end_lr=10, plot=False, ): if num_iter is None: num_iter = 100 + int(np.log10(10 + len(dataset)) * 50) n_train = min(len(dataset), num_iter * batch_size) n_val = min(int(0.3 * len(dataset)), 2 * num_iter) log.debug("num_iter: {}, n_val: {}".format(num_iter, n_val)) split_idx = int(0.7 * len(dataset)) idx_train = np.random.choice(split_idx, size=n_train) idx_val = np.random.choice(np.arange(split_idx, len(dataset)), size=n_val) train_data = Subset(dataset, idx_train) val_data = Subset(dataset, idx_val) lrtest_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) lrtest_loader_val = DataLoader(val_data, batch_size=1024, shuffle=True) lrtest_optimizer = create_optimizer(optimizer, model.parameters(), start_lr) with utils.HiddenPrints(): lr_finder = LRFinder(model, lrtest_optimizer, loss_func) lr_finder.range_test( lrtest_loader, val_loader=lrtest_loader_val, end_lr=end_lr, num_iter=num_iter, smooth_f=0.2, # re-consider if lr-rate varies a lot ) lrs = lr_finder.history["lr"] losses = lr_finder.history["loss"] if skip_end == 0: lrs = lrs[skip_start:] losses = losses[skip_start:] else: lrs = lrs[skip_start:-skip_end] losses = losses[skip_start:-skip_end] if plot: with utils.HiddenPrints(): ax, steepest_lr = lr_finder.plot( ) # to inspect the loss-learning rate graph chosen_idx = None try: steep_idx = (np.gradient(np.array(losses))).argmin() min_idx = (np.array(losses)).argmin() chosen_idx = int((steep_idx + min_idx) / 2.0) # chosen_idx = min_idx log.debug("lr-range-test results: steep: {:.2E}, min: {:.2E}".format( lrs[steep_idx], lrs[min_idx])) except ValueError: log.error( "Failed to compute the gradients, there might not be enough points." ) if chosen_idx is not None: max_lr = lrs[chosen_idx] log.info("learning rate range test selected lr: {:.2E}".format(max_lr)) else: max_lr = 0.1 log.error("lr range test failed. defaulting to lr: {}".format(max_lr)) with utils.HiddenPrints(): lr_finder.reset( ) # to reset the model and optimizer to their initial state return max_lr
def run_lr_finder( args, model, train_loader, optimizer, criterion, val_loader=None, verbose=True, show=True, figpth=None, device=None, recommender="logmean14", fieldnames=None, outfile_path=None, hparams=None, ): if verbose: print("Running learning rate finder") if args.mix_pre_apex: model, optimizer = amp.initialize(model, optimizer, opt_level="O2") lr_finder = LRFinder(model, optimizer, criterion, device=device) min_lr = 1e-7 if args.model == 'mlp' else 1e-10 lr_finder.range_test( train_loader, val_loader=val_loader, start_lr=min_lr, end_lr=10, num_iter=200, diverge_th=3, ) min_index = np.argmin(lr_finder.history["loss"]) lr_at_min = lr_finder.history["lr"][min_index] min_loss = lr_finder.history["loss"][min_index] max_index = np.argmax(lr_finder.history["loss"][:min_index]) lr_at_max = lr_finder.history["lr"][max_index] max_loss = lr_finder.history["loss"][max_index] # Outputting data to CSV at end of epoch if fieldnames and outfile_path: with open(outfile_path, mode='a') as out_file: writer = csv.DictWriter(out_file, fieldnames=fieldnames, lineterminator='\n') writer.writerow({ 'hp_idx': args.hp_idx, 'hyperparam_set': hparams, 'seed': args.seed, 'lr': lr_finder.history["lr"], 'loss': lr_finder.history["loss"] }) if not show and not figpth: lr_steepest = None else: if verbose: print("Plotting learning rate finder results") hf = plt.figure(figsize=(15, 9)) ax = plt.axes() _, lr_steepest = lr_finder.plot(skip_start=0, skip_end=3, log_lr=True, ax=ax) ylim = np.array([min_loss, max_loss]) ylim += 0.1 * np.diff(ylim) * np.array([-1, 1]) plt.ylim(ylim) plt.tick_params(reset=True, color=(0.2, 0.2, 0.2)) plt.tick_params(labelsize=14) ax.minorticks_on() ax.tick_params(direction="out") init_loss = lr_finder.history["loss"][0] loss_12 = min_loss + 0.5 * (max_loss - min_loss) index_12 = max_index + np.argmin( np.abs( np.array(lr_finder.history["loss"][max_index:min_index]) - loss_12)) lr_12 = lr_finder.history["lr"][index_12] loss_13 = min_loss + 1 / 3 * (max_loss - min_loss) index_13 = max_index + np.argmin( np.abs( np.array(lr_finder.history["loss"][max_index:min_index]) - loss_13)) lr_13 = lr_finder.history["lr"][index_13] loss_23 = min_loss + 2 / 3 * (max_loss - min_loss) index_23 = max_index + np.argmin( np.abs( np.array(lr_finder.history["loss"][max_index:min_index]) - loss_23)) lr_23 = lr_finder.history["lr"][index_23] loss_14 = min_loss + 1 / 4 * (max_loss - min_loss) index_14 = max_index + np.argmin( np.abs( np.array(lr_finder.history["loss"][max_index:min_index]) - loss_14)) lr_14 = lr_finder.history["lr"][index_14] if recommender == "div10": lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_12)])) elif recommender == "min12": lr_recomend = np.min([lr_at_min / 10, lr_12]) elif recommender == "min13": lr_recomend = np.min([lr_at_min / 10, lr_13]) elif recommender == "min14": lr_recomend = np.min([lr_at_min / 10, lr_14]) elif recommender == "logmean12": lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_12)])) elif recommender == "logmean13": lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_13)])) elif recommender == "logmean14": lr_recomend = np.exp(np.mean([np.log(lr_at_min / 10), np.log(lr_14)])) if verbose: if lr_steepest is not None: print("LR at steepest grad: {:.3e} (red)".format(lr_steepest)) print("LR at minimum loss : {:.3e}".format(lr_at_min)) print("LR a tenth of min : {:.3e} (orange)".format(lr_at_min / 10)) print("LR when 1/4 up : {:.3e} (yellow)".format(lr_14)) print("LR when 1/3 up : {:.3e} (blue)".format(lr_13)) print("LR when 1/2 up : {:.3e} (cyan)".format(lr_12)) print("LR when 2/3 up : {:.3e} (green)".format(lr_23)) print("LR recommended : {:.3e} (black)".format(lr_recomend)) if show or figpth: ax.axvline(x=lr_steepest, color="red") ax.axvline(x=lr_at_min / 10, color="orange") ax.axvline(x=lr_14, color="yellow") ax.axvline(x=lr_13, color="blue") ax.axvline(x=lr_12, color="cyan") ax.axvline(x=lr_23, color="green") ax.axvline(x=lr_recomend, color="black", ls=":") if figpth: # Save figure os.makedirs(os.path.dirname(figpth), exist_ok=True) plt.savefig(figpth) if verbose: print("LR Finder results saved to {}".format(figpth)) if show: plt.show() return lr_recomend
nn.Linear(in_features=30, out_features=1), nn.Sigmoid()) optim = torch.optim.Adam(net.parameters()) criterion = nn.BCELoss() def init_weights(m): if type(m) == nn.Linear: nn.init.xavier_uniform(m.weight) net.apply(init_weights) #%% from torch_lr_finder import LRFinder lrf = LRFinder(net, optim, criterion) lrf.range_test(trainloader, start_lr=10**-5, end_lr=1) lrf.plot() lrf.reset() #%% n_epochs = 20 scheduler = torch.optim.lr_scheduler.CyclicLR(optim, 10**-3, 10**-2, cycle_momentum=False) history = {'train': [], 'val': []} for epoch in range(n_epochs): for x, y in trainloader: yhat = net(x)
random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) legend = [] fig = None for wd in [0, .1, 1e-2, 1e-3, 1e-4]: for dp in [.1, 0.2, .3]: nerbert = BertForTokenClassificationCustom.from_pretrained(pretrained_model_name_or_path=MODEL_NAME, num_labels=len(labels2ind), hidden_dropout_prob=dp, attention_probs_dropout_prob=dp) # Prepare optimizer and schedule (linear warmup and decay) optimizer = get_optimizer_with_weight_decay(model=nerbert, optimizer=OPTIMIZER, learning_rate=LEARNING_RATE, weight_decay=wd) lr_finder = LRFinder(nerbert, optimizer, nn.CrossEntropyLoss(), device='cuda') lr_finder.range_test(train_loader=dataloader_tr, end_lr=1, num_iter=100) fig = lr_finder.plot(ax=fig) legend.append(f"wd: {wd}") fig.figure.legend(legend, loc='best') fig.figure.tight_layout() fig.figure.show() fig.figure.savefig('lr_finder.png')