def findLR(model, criterion, optimizer, trainloader): lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder.range_test(trainloader, end_lr=100, num_iter=100) lr_finder.plot() # to inspect the loss-learning rate graph lr_finder.reset( ) # to reset the model and optimizer to their initial state
def find(model_name: str, pretrained: bool, train_only_last_layer: bool, train_data_loader: torch.utils.data.DataLoader, min_learning_rate: float, max_learning_rate: float, num_iter: int, step_mode: str): """ Find learning rate based on Leslie Smith's approach and https://github.com/davidtvs/pytorch-lr-finder implementation. Arguments ---------- model_name : str Model to train pretrained : bool True if model should be pretrained, False otherwise train_only_last_layer : bool Value indicating part of model that were trained (filename will contain information about it) train_data_loader: torch.utils.data.DataLoader Data loader used for training min_learning_rate : float Minimum learning rate used for searching max_learning_rate : float Maximum learning rate used for searching num_iter : int Number of iterations after which test will be performed step_mode : float Mode to perform search """ model = get_model(model_name, train_only_last_layer, pretrained) criterion, optimizer = get_loss_and_optimizer(model, min_learning_rate) lr_finder = LRFinder(model, optimizer, criterion) lr_finder.range_test(train_loader=train_data_loader, end_lr=max_learning_rate, num_iter=num_iter, step_mode=step_mode) lr_finder.plot() lr_finder.reset()
def plot_lr_(model, train_loader, test_loader, optimizer, criterion ,device = 'cpu' , step_mode = "linear" ): lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder.range_test(train_loader, end_lr=100, num_iter=100) # lr_finder.range_test(train_loader, val_loader=test_loader, end_lr=1, num_iter=100, step_mode = step_mode) lr_finder.plot(log_lr=False) lr_finder.reset()
def find_lr(self, start_lr=1e-6, end_lr=1e2, accum_steps=1, opt='AdamW', wd=0): self.set_optimizer(opt=opt, lr=start_lr, wd=wd) dl = self.LRDataloader(self.data['train']) lr_finder = LRFinder(self.model, self.opt, self.loss_func, device="cuda" if torch.cuda.is_available() else "cpu" ) lr_finder.range_test(dl, end_lr=end_lr, num_iter=100, accumulation_steps=accum_steps) lr_finder.plot() lr_finder.reset()
def lrfinder(net, optimizer, criterion, trainloader, valloader): lr_finder = LRFinder(net, optimizer, criterion, device="cuda") lr_finder.range_test(trainloader, val_loader=valloader, end_lr=10, num_iter=100, step_mode="exponential") lr_finder.plot() lr_finder.reset()
def find_lr(model, opt, loss_func, device, dataLoader): lr_finder = LRFinder(model=model, optimizer=opt, criterion=loss_func, device=device) lr_finder.range_test(dataLoader, end_lr=100, num_iter=200) lr_finder.plot() # reset model & opt to their original weights lr_finder.reset()
def lr_finder(self, end_lr=100, num_iter=100): lr_finder = LRFinder(self.model, self.opt_fn, self.loss_fn, device=self.device) lr_finder.range_test(self.data.train_dl, end_lr=end_lr, num_iter=num_iter) lr_finder.plot() lr_finder.reset()
def findLR(model, train_loader, test_loader, criterion, optimizer, num_iteration): # Add this line before running `LRFinder` #model, optimizer = amp.initialize(model, optimizer, opt_level='O1') lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder.range_test(train_loader, end_lr=0.5, num_iter=num_iteration) # fast ai method #lr_finder.range_test(train_loader, val_loader=test_loader, end_lr=10, num_iter = num_iteration, step_mode="linear") lr_finder.plot(log_lr=False) lr_finder.reset() best_lr = lr_finder.history['lr'][lr_finder.history['loss'].index( lr_finder.best_loss)] return best_lr
def lr_range_test(self, val_loss=False): lr_finder = LRFinder(self.model, self.optimizer, self.criterion, device=self.device) val_loader = self.dl_valid if val_loss else None lr_finder.range_test(self.dl_train, val_loader=val_loader, end_lr=100, num_iter=100, step_mode="exp") lr_finder.plot() lr_finder.reset() self.latest_lr_finder_result = lr_finder
def find_lr(model: torch.nn.Module, train_data: CircleDataset): # range test for finding learning rate as described in # https://towardsdatascience.com/finding-good-learning-rate-and-the-one-cycle-policy-7159fe1db5d6 lr_image = 'learning_rate.png' train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=False) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-7, weight_decay=1e-2) lr_finder = LRFinder(model, optimizer, criterion, device="cuda") logger.info("Running range test for learning rate") lr_finder.range_test(train_loader, end_lr=100, num_iter=100) fig, ax = plt.subplots() lr_finder.plot(ax=ax) # to inspect the loss-learning rate graph logger.info(f"Saving image with learning rate plot to {lr_image}") fig.savefig(lr_image, dpi=fig.dpi) lr_finder.reset() # to reset the model and optimizer to their initial state
def learningrate_finder(uper_bound,lower_bound,dataset_directory,end_learning =100,num_iterations=100): hparams_tmp = Namespace( train_path=dataset_directory + '/train.txt', val_path=dataset_directory + '/val.txt', test_path=dataset_directory + '/test.txt', batch_size=16, warmup_steps=100, epochs=1, lr= uper_bound, accumulate_grad_batches=1,) module = TrainingModule(hparams_tmp) criterion = nn.CrossEntropyLoss() optimizer = AdamW(module.parameters(), lr=lower_bound) ## lower bound LR lr_finder = LRFinder(module, optimizer, criterion, device="gpu") lr_finder.range_test(module.train_dataloader(), end_lr=end_learning, num_iter=num_iterations, accumulation_steps=hparams_tmp.accumulate_grad_batches) lr_finder.plot() #lr_finer.plot(show_lr=lr) show using learning rate lr_finder.reset()
def lr_find(self, device="cuda"): """ This method is a pretraining method that plots the result of the learning rate finder to find an optimal learning rate. See also * https://github.com/davidtvs/pytorch-lr-finder * """ # with torch.no_grad(): lr_finder = LRFinder(self.model, self.optimizer, self.criterion, device=device) lr_finder.range_test(self.train_dataloader(), start_lr=0.0000001, end_lr=10, num_iter=100) lr_finder.plot() # to inspect the loss-learning rate graph lr_finder.reset( ) # to reset the model and optimizer to their initial state
def run(): device = torch.device(GPU_ID if torch.cuda.is_available() else "cpu") print(f'Using device {device}') hyperparameter = { 'learning_rate': [1e-2, 1e-3, 3e-4, 1e-4, 3e-5, 1e-7], # 1e-4 'weight_decay': [0, 1e-3, 5e-4, 1e-4, 1e-5], # 1e-4 'num_epochs': 70, # 100 'weights': [0.0, 0.2, 0.4, 0.6, 0.8, 1.0], # 0.6 'optimizer': [optim.Adam, optim.SGD], # Adam 'image_size': 300, 'crop_size': 299 } loaders = prepare_dataset('retina', hyperparameter) #model: nn.Module = models.resnet50(pretrained=True) #num_ftrs = model.fc.in_features #model.fc = nn.Linear(num_ftrs, 2) model = ptm.inceptionv4(num_classes=1000, pretrained='imagenet') num_ft = model.last_linear.in_features model.last_linear = nn.Linear(num_ft, 2) children = model.features.children() for i, child in enumerate(children): if i < 0.0 * len(list(children)): for param in child.parameters(): param.require_grad = False optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-7, weight_decay=0) criterion = nn.CrossEntropyLoss() lr_finder = LRFinder(model, optimizer_ft, criterion, device=device) lr_finder.range_test(loaders[0], end_lr=0.1, num_iter=100, step_mode='exp') lr_finder.plot() lr_finder.reset() return 0
from torch_lr_finder import LRFinder criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) lr_finder = LRFinder(model, optimizer, criterion, device=device) lr_finder.range_test(trainloader, end_lr=10, num_iter=1564, step_mode='exp') lr_finder.plot() # to inspect the loss-learning rate graph lr_finder.reset() # to reset the model and optimizer to their initial state a = zip(lr_finder.history['lr'], lr_finder.history['loss']) best_lrloss = sorted(a, key=take_lr, reverse=False)[:50] def take_lr(x): # print(x) return x[1] tup = zip(lr_finder.history['loss'], lr_finder.history['lr']) sorted(tup, key=take_lr, reverse=False)[:50] class shrink: def __init__(self, config): self.config = config def apply_augmentations(self): pass
def lr_range_test( model, dataset, loss_func, optimizer="AdamW", batch_size=32, num_iter=None, skip_start=10, skip_end=10, start_lr=1e-7, end_lr=10, plot=False, ): if num_iter is None: num_iter = 100 + int(np.log10(10 + len(dataset)) * 50) n_train = min(len(dataset), num_iter * batch_size) n_val = min(int(0.3 * len(dataset)), 2 * num_iter) log.debug("num_iter: {}, n_val: {}".format(num_iter, n_val)) split_idx = int(0.7 * len(dataset)) idx_train = np.random.choice(split_idx, size=n_train) idx_val = np.random.choice(np.arange(split_idx, len(dataset)), size=n_val) train_data = Subset(dataset, idx_train) val_data = Subset(dataset, idx_val) lrtest_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) lrtest_loader_val = DataLoader(val_data, batch_size=1024, shuffle=True) lrtest_optimizer = create_optimizer(optimizer, model.parameters(), start_lr) with utils.HiddenPrints(): lr_finder = LRFinder(model, lrtest_optimizer, loss_func) lr_finder.range_test( lrtest_loader, val_loader=lrtest_loader_val, end_lr=end_lr, num_iter=num_iter, smooth_f=0.2, # re-consider if lr-rate varies a lot ) lrs = lr_finder.history["lr"] losses = lr_finder.history["loss"] if skip_end == 0: lrs = lrs[skip_start:] losses = losses[skip_start:] else: lrs = lrs[skip_start:-skip_end] losses = losses[skip_start:-skip_end] if plot: with utils.HiddenPrints(): ax, steepest_lr = lr_finder.plot( ) # to inspect the loss-learning rate graph chosen_idx = None try: steep_idx = (np.gradient(np.array(losses))).argmin() min_idx = (np.array(losses)).argmin() chosen_idx = int((steep_idx + min_idx) / 2.0) # chosen_idx = min_idx log.debug("lr-range-test results: steep: {:.2E}, min: {:.2E}".format( lrs[steep_idx], lrs[min_idx])) except ValueError: log.error( "Failed to compute the gradients, there might not be enough points." ) if chosen_idx is not None: max_lr = lrs[chosen_idx] log.info("learning rate range test selected lr: {:.2E}".format(max_lr)) else: max_lr = 0.1 log.error("lr range test failed. defaulting to lr: {}".format(max_lr)) with utils.HiddenPrints(): lr_finder.reset( ) # to reset the model and optimizer to their initial state return max_lr
criterion = nn.BCELoss() def init_weights(m): if type(m) == nn.Linear: nn.init.xavier_uniform(m.weight) net.apply(init_weights) #%% from torch_lr_finder import LRFinder lrf = LRFinder(net, optim, criterion) lrf.range_test(trainloader, start_lr=10**-5, end_lr=1) lrf.plot() lrf.reset() #%% n_epochs = 20 scheduler = torch.optim.lr_scheduler.CyclicLR(optim, 10**-3, 10**-2, cycle_momentum=False) history = {'train': [], 'val': []} for epoch in range(n_epochs): for x, y in trainloader: yhat = net(x) loss = criterion(yhat, y) optim.zero_grad()
class Shrink: '''Shrinks the code and gets the output''' def __init__(self, in_config): self.config = in_config self.class_names = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') self.mean = (0.491, 0.482, 0.446) self.std = (0.247, 0.243, 0.261) self.device = "cuda" if torch.cuda.is_available else "cpu" self.model_path = self.config['modelpath']['args'] plt.style.use("dark_background") def seed_everything(self,seed: int) -> None: '''Seeds the Code so that we get predictable outputs''' random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) def load_data(self, train_transforms, test_transforms, in_dir='./data'): '''Downloads the dataset and returns train and testloaders after applying the Transformations''' trainset = datasets.CIFAR10(in_dir, train=True, download=True, transform=train_transforms()) testset = datasets.CIFAR10(in_dir, train=False, download=True, transform=test_transforms()) self.trainloader = torch.utils.data.DataLoader(trainset, **self.config['train_data_loader']['args']) self.testloader = torch.utils.data.DataLoader(testset, **self.config['test_data_loader']['args']) return self.trainloader, self.testloader def load_imagenet_data(self, train_transforms, test_transforms): '''Loads the imagenet dataset''' self.trainloader, self.testloader = get_imagenet_loader( train_transforms, test_transforms, self.config['train_data_loader']['args'], self.config['test_data_loader']['args']) def mean_std_dev(self): pass def show_data(self, mode='train', n=25): '''Plots the images on a gridplot to show the images passed via dataloader''' figure = plt.figure(figsize=(20,20)) images = None labels = None if mode.lower() == 'train': images, labels = next(iter(self.trainloader)) labels = np.array(labels) elif mode.lower() == 'test': images, labels = next(iter(self.testloader)) labels = np.array(labels) images = self.denormalize(images) # images = self.denormalize(images) for index in range(1,n+1): plt.subplot(5,5,index) plt.axis('off') # Gets the first n images of the dataset plt.imshow(np.transpose(images[index], (1,2,0))) # Plots the dataset # plt.title(self.class_names[labels[index]]) def get_batched_data(self,in_data): '''Takes in the list data and outputs data, targets and preds''' in_imgs = [] in_preds = [] in_targets = [] for index, i in enumerate(in_data): in_imgs.append(i[0]) in_preds.append(i[1]) in_targets.append(i[2]) return torch.stack(in_imgs), torch.stack(in_preds), torch.stack(in_targets) def plot_gradcam(self, target_layers, images, pred, target, nimgs): '''Plot GradCam - ''' index = 0 in_data = None # model.load_state_dict(torch.load(self.model_path)) images = images[index:nimgs].to(self.device) target = target[index:nimgs] pred = pred[index:nimgs] gcam_layers, predicted_probs, predicted_classes = get_gradcam(images, target, self.model, self.device, target_layers) # get the denomarlization function unorm = UnNormalize(mean=self.mean, std=self.std) plt_gradcam(gcam_layers=gcam_layers, images=images, target_labels=target, predicted_labels= predicted_classes, class_labels= self.class_names, denormalize= unorm) def get_gradoutput(self, misclassified=False): '''Outputs a gradcam output when Inputting an image''' if misclassified: in_data = self.misclassified else: in_data = self.correct_classified target_layers = ["layer1", "layer2", "layer3", "layer4"] imgs, preds, targets = self.get_batched_data(in_data) self.plot_gradcam(target_layers, imgs, preds, targets, 25) def denormalize(self,tensor): '''Denormalize the data''' if not tensor.ndimension() == 4: raise TypeError('tensor should be 4D') mean = torch.FloatTensor(self.mean).view(1, 3, 1, 1).expand_as(tensor).to(tensor.device) std = torch.FloatTensor(self.std).view(1, 3, 1, 1).expand_as(tensor).to(tensor.device) return tensor.mul(std).add(mean) def get_model(self, train=True): self.model = get_attributes(model_arch, 'model', self.config).to(self.device) self.epochs = self.config['epochs'] if train: '''Trains the model and sends the output''' criterion = nn.CrossEntropyLoss(reduction='mean') optimizer = optim.SGD(self.model.parameters(),lr = 0.01, momentum=0.9)# **self.config['optimizer']['args']) max_at_epoch = 5 self.best_lr = self.config['best_lr'] pct_start_val = (max_at_epoch * len(self.trainloader)) / (self.epochs * len(self.trainloader)) scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=self.best_lr, total_steps = len(self.trainloader) *self.epochs, steps_per_epoch=len(self.trainloader), epochs=self.epochs, pct_start=pct_start_val, anneal_strategy='cos', div_factor=10, final_div_factor=10 ) self.train_acc = [] self.train_losses = [] self.test_acc = [] self.test_losses = [] self.lr_metric = [] EPOCHS = self.epochs print(f'Starting Training for {EPOCHS} Epochs') for i in range(EPOCHS): lr_value = [group['lr'] for group in optimizer.param_groups][0] self.lr_metric.append(lr_value) print(f'EPOCHS : {i} Learning Rate: {lr_value}') model_training(self.model, self.device, self.trainloader, optimizer, scheduler, self.train_acc, self.train_losses, criterion, l1_loss=False) torch.save(self.model.state_dict(), self.model_path) self.misclassified, self.correct_classified = model_testing(self.model, self.device, self.testloader, self.test_acc, self.test_losses, criterion) else: return self.model def test_model(self): '''Loads and saves the test model''' test_losses = [] test_acc = [] model_path = 'latest_model.h5' self.model.load_state_dict(torch.load(model_path)) self.misclassified, self.correct_classified = model_testing(self.model, self.device, self.testloader, test_acc, test_losses) return self.misclassified, self.correct_classified def findbestlr(self): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(self.model.parameters(), lr= 0.01, momentum= 0.95, weight_decay= 0.0005) self.lr_finder = LRFinder(self.model, optimizer, criterion, device=self.device) self.lr_finder.range_test(self.trainloader, **self.config['range_test']['args']) self.lr_finder.plot() # to inspect the loss-learning rate graph self.lr_finder.reset() # to reset the model and optimizer to their initial state return self.lr_finder def model_metrics(self): fig, axs = plt.subplots(2,2, figsize=(15,10)) axs[0,0].plot(self.train_losses) axs[0,0].set_title('Train_Losses') axs[0,1].plot(self.train_acc) axs[0,1].set_title('Training_Accuracy') axs[1,0].plot(self.test_losses) axs[1,0].set_title('Test_Losses') axs[1,1].plot(self.test_acc) axs[1,1].set_title('Test_Accuracy') def print_visualization(self, input_size): '''Prints a visualization graph for Torch models''' C, H, W = input_size x = torch.zeros(1, C, H, W, dtype=torch.float, requires_grad=False) x = x.to(self.device) out = self.model(x) # plot graph of variable, not of a nn.Module dot_graph = torchviz.make_dot(out) dot_graph.view() return dot_graph
def main_worker(index, opt): random.seed(opt.manual_seed) np.random.seed(opt.manual_seed) torch.manual_seed(opt.manual_seed) if index >= 0 and opt.device.type == 'cuda': opt.device = torch.device(f'cuda:{index}') if opt.distributed: opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index dist.init_process_group(backend='nccl', init_method=opt.dist_url, world_size=opt.world_size, rank=opt.dist_rank) opt.batch_size = int(opt.batch_size / opt.ngpus_per_node) opt.n_threads = int( (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node) opt.is_master_node = not opt.distributed or opt.dist_rank == 0 model = generate_model(opt) if opt.batchnorm_sync: assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.' model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) if opt.pretrain_path: model = load_pretrained_model(model, opt.pretrain_path, opt.model, opt.n_finetune_classes) if opt.dropout: n_classes = opt.n_classes if opt.pretrain_path is not None: n_classes = opt.n_finetune_classes model = replace_fc_layer(model=model, dropout_factor=opt.dropout_factor, n_classes=n_classes) if opt.resume_path is not None: model = resume_model(opt.resume_path, opt.arch, model) model = make_data_parallel(model, opt.distributed, opt.device) if opt.pretrain_path: parameters = get_fine_tuning_parameters(model, opt.ft_begin_module) else: parameters = model.parameters() if opt.is_master_node: print(model) if opt.labelsmoothing: criterion = LabelSmoothingCrossEntropy().to(opt.device) else: criterion = CrossEntropyLoss().to(opt.device) if not opt.no_train: (train_loader, train_sampler, train_logger, train_batch_logger, optimizer, scheduler) = get_train_utils(opt, parameters) if opt.resume_path is not None: opt.begin_epoch, optimizer, scheduler = resume_train_utils( opt.resume_path, opt.begin_epoch, optimizer, scheduler) if opt.overwrite_milestones: scheduler.milestones = opt.multistep_milestones if not opt.no_val: val_loader, val_logger = get_val_utils(opt) if opt.tensorboard and opt.is_master_node: from torch.utils.tensorboard import SummaryWriter if opt.begin_epoch == 1: tb_writer = SummaryWriter(log_dir=opt.result_path) else: tb_writer = SummaryWriter(log_dir=opt.result_path, purge_step=opt.begin_epoch) else: tb_writer = None if opt.lr_finder and not opt.no_train and not opt.no_val: print( "Performing Learning Rate Search\nWith Leslie Smith's approach...") lr_finder = LRFinder(model, optimizer, criterion, device=opt.device) lr_finder.range_test(train_loader, val_loader=val_loader, start_lr=opt.learning_rate, end_lr=opt.lrf_end_lr, num_iter=opt.lrf_num_it, step_mode=opt.lrf_mode) lr_finder.plot(log_lr=False) with (opt.result_path / 'lr_search.json').open('w') as results_file: json.dump(lr_finder.history, results_file, default=json_serial) lr_finder.reset() return prev_val_loss = None for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: if opt.distributed: train_sampler.set_epoch(i) #current_lr = get_lr(optimizer) train_epoch(i, train_loader, model, criterion, optimizer, opt.device, train_logger, train_batch_logger, scheduler, opt.lr_scheduler, tb_writer, opt.distributed) if i % opt.checkpoint == 0 and opt.is_master_node: save_file_path = opt.result_path / 'save_{}.pth'.format(i) save_checkpoint(save_file_path, i, opt.arch, model, optimizer, scheduler) if not opt.no_val: prev_val_loss = val_epoch(i, val_loader, model, criterion, opt.device, val_logger, tb_writer, opt.distributed) if not opt.no_train and opt.lr_scheduler == 'multistep': scheduler.step() elif not opt.no_train and opt.lr_scheduler == 'plateau': scheduler.step(prev_val_loss) elif not opt.no_train and opt.lr_scheduler == 'cosineannealing': scheduler.step() if opt.inference: inference_loader, inference_class_names = get_inference_utils(opt) inference_result_path = opt.result_path / '{}.json'.format( opt.inference_subset) inference.inference(inference_loader, model, inference_result_path, inference_class_names, opt.inference_no_average, opt.output_topk)
class Runner: def __init__(self, config): self.config = config def find_lr(self): from torch_lr_finder import LRFinder logger.info('finding the best learning rate') cfg = self.config if self.tsai_mode: import sodium.tsai_model as module_arch else: import sodium.model.model as module_arch # create a model instance model = get_instance(module_arch, 'arch', cfg) # setup the model with the device model, device = setup_device(model, cfg['target_device']) param_groups = setup_param_groups(model, cfg['optimizer']) optimizer = get_instance(module_optimizer, 'optimizer', cfg, param_groups) criterion = getattr(module_loss, cfg['criterion'])() self.lr_finder = LRFinder(model, optimizer, criterion, device="cuda") lr_finder_epochs = cfg['lr_finder']['epochs'] logger.info(f'Running LR-Test for {lr_finder_epochs} epochs') # my method self.lr_finder.range_test(self.trainer.train_loader, start_lr=1e-3, end_lr=1, num_iter=len(self.trainer.test_loader) * lr_finder_epochs, step_mode='linear') # leslie smith method # self.lr_finder.range_test(self.trainer.train_loader, val_loader = self.trainer.test_loader, # end_lr=1, num_iter=len(self.trainer.train_loader), step_mode='linear') # fast ai method # self.lr_finder.range_test( # self.trainer.train_loader, end_lr=100, num_iter=len(self.trainer.train_loader)) self.best_lr = self.lr_finder.history['lr'][ self.lr_finder.history['loss'].index(self.lr_finder.best_loss)] sorted_lrs = [ x for _, x in sorted( zip(self.lr_finder.history['loss'], self.lr_finder.history['lr'])) ] logger.info(f'sorted lrs : {sorted_lrs[:10]}') logger.info(f'found the best lr : {self.best_lr}') logger.info('plotting lr_finder') plt.style.use("dark_background") self.lr_finder.plot() # reset the model and the optimizer self.lr_finder.reset() plt.show() del model, optimizer, criterion def train(self, use_bestlr=False, lr_value=None): # if the best lr was found use that value instead if use_bestlr and self.best_lr is not None: logger.info(f'using max_lr : {self.best_lr}') logger.info(f'using min_lr : {self.best_lr/30}') logger.info(f'using initial_lr : {self.best_lr/20}') for param_group in self.trainer.optimizer.param_groups: param_group['lr'] = self.best_lr / 10 param_group['max_lr'] = self.best_lr param_group['min_lr'] = self.best_lr / 30 param_group['intial_lr'] = self.best_lr / 20 if not use_bestlr and (lr_value is not None): for param_group in self.trainer.optimizer.param_groups: param_group['lr'] = lr_value self.trainer.train() logger.info('Finished!') def setup_train(self, tsai_mode=False): cfg = self.config self.tsai_mode = tsai_mode if tsai_mode: import sodium.tsai_model as module_arch else: import sodium.model.model as module_arch logger.info('Training Config') # display the config for line in pprint.pformat(cfg).split('\n'): logger.info(line) # to get consistent results, seed everything seed_everything(cfg['seed']) # create a model instance model = get_instance(module_arch, 'arch', cfg) # setup the model with the device model, device = setup_device(model, cfg['target_device']) param_groups = setup_param_groups(model, cfg['optimizer']) optimizer = get_instance(module_optimizer, 'optimizer', cfg, param_groups) self.transforms = get_instance(module_aug, 'augmentation', cfg) # get the train and test loaders self.data_loader = get_instance(module_data, 'data_loader', cfg, self.transforms) train_loader, test_loader = self.data_loader.get_loaders() logger.info('Getting loss function handle') criterion = getattr(module_loss, cfg['criterion'])() batch_scheduler = False if cfg['lr_scheduler']['type'] == 'OneCycleLR': logger.info('Building: torch.optim.lr_scheduler.OneCycleLR') max_at_epoch = cfg['lr_scheduler']['max_lr_at_epoch'] pct_start = (max_at_epoch) / \ cfg['training']['epochs'] if max_at_epoch else 0.8 sch_cfg = cfg['lr_scheduler']['args'] lr_scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=sch_cfg['max_lr'], steps_per_epoch=len(train_loader), pct_start=pct_start, epochs=cfg['training']['epochs']) batch_scheduler = True else: lr_scheduler = get_instance(module_scheduler, 'lr_scheduler', cfg, optimizer) logger.info('Initializing trainer') self.trainer = Trainer(model, criterion, optimizer, cfg, device, train_loader, test_loader, lr_scheduler=lr_scheduler, batch_scheduler=batch_scheduler) def plot_metrics(self): plt.style.use("dark_background") logger.info('Plotting Metrics...') plot.plot_metrics(self.trainer.train_metric, self.trainer.test_metric) plot.plot_lr_metric(self.trainer.lr_metric) def plot_gradcam(self, target_layers): plt.style.use("dark_background") logger.info('Plotting Grad-CAM...') # use the test images data, target = next(iter(self.trainer.test_loader)) data, target = data.to(self.trainer.device), target.to( self.trainer.device) logger.info('Taking {5} samples') # get 5 images data = data[:5] target = target[:5] # get the generated grad cam gcam_layers, predicted_probs, predicted_classes = get_gradcam( data, target, self.trainer.model, self.trainer.device, target_layers) # get the denomarlization function unorm = module_aug.UnNormalize(mean=self.transforms.mean, std=self.transforms.std) plot_gradcam(gcam_layers, data, target, predicted_classes, self.data_loader.class_names, unorm) def print_summary(self, input_size): summary(self.trainer.model, input_size) def print_visualization(self, input_size): C, H, W = input_size x = torch.zeros(1, C, H, W, dtype=torch.float, requires_grad=False) x = x.to(self.trainer.device) out = self.trainer.model(x) # plot graph of variable, not of a nn.Module dot_graph = torchviz.make_dot(out) dot_graph.view() return dot_graph def plot_misclassifications(self, target_layers): plt.style.use("dark_background") assert (self.trainer.model is not None) # get the data, target of only missclassified and do what you do for gradcam logger.info('getting misclassifications') misclassified = [] misclassified_target = [] misclassified_pred = [] model, device = self.trainer.model, self.trainer.device # set the model to evaluation mode model.eval() # turn off gradients with torch.no_grad(): for data, target in self.trainer.test_loader: # move them to respective device data, target = data.to(device), target.to(device) # do inferencing output = model(data) # get the predicted output pred = output.argmax(dim=1, keepdim=True) # get the current misclassified in this batch list_misclassified = (target.eq(pred.view_as(target)) == False) batch_misclassified = data[list_misclassified] batch_mis_pred = pred[list_misclassified] batch_mis_target = target[list_misclassified] # batch_misclassified = misclassified.append(batch_misclassified) misclassified_pred.append(batch_mis_pred) misclassified_target.append(batch_mis_target) # group all the batched together misclassified = torch.cat(misclassified) misclassified_pred = torch.cat(misclassified_pred) misclassified_target = torch.cat(misclassified_target) logger.info('Taking {25} samples') # get 5 images data = misclassified[:25] target = misclassified_target[:25] # get the generated grad cam gcam_layers, predicted_probs, predicted_classes = get_gradcam( data, target, self.trainer.model, self.trainer.device, target_layers) # get the denomarlization function unorm = module_aug.UnNormalize(mean=self.transforms.mean, std=self.transforms.std) plot_gradcam(gcam_layers, data, target, predicted_classes, self.data_loader.class_names, unorm)
def Interpol(N, neurons, iter, fun=0, a=1, b=1): datasamp = datagen(N, neurons, fun, a, b, legendre) val_inputs, val_labels = datasamp.get_val() train_inputs, train_labels = datasamp.get_train() train_loader = DataLoader(dataset=datasamp, num_workers=0) # Initiate the data and labels class LockedCybenko(torch.nn.Module ): # Cybenko with inner weight=1 and bias=-x[i] def __init__(self): super(LockedCybenko, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc1.weight.data = torch.ones(neurons).reshape(-1, 1) self.fc1.bias.data = -torch.linspace(-1, 1, neurons).reshape( 1, -1).float() self.fc1.weight.requires_grad_(False) self.fc1.bias.requires_grad_(False) self.fc2 = torch.nn.Linear(neurons, 1, bias=False) self.relu = torch.nn.ReLU() def forward(self, x): x = self.relu(self.fc1(x)) return self.fc2(x) class SemilockedCybenko( torch.nn.Module ): # Cybenko with inner weight=-1, one node less and free bias def __init__(self): super(SemilockedCybenko, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc1.weight.data = torch.ones(neurons - 1).reshape(-1, 1) self.fc1.weight.requires_grad_(False) self.fc1.bias.requires_grad_(True) self.fc2 = torch.nn.Linear(neurons, 1, bias=False) self.relu = torch.nn.Sigmoid() def forward(self, x): x = self.relu(self.fc1(x)) return self.fc2(x) class UnlockedCybenko(torch.nn.Module ): # Cybenko with free inner weight or bias def __init__(self): super(UnlockedCybenko, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc2 = torch.nn.Linear(neurons, 1, bias=True) self.relu = torch.nn.Sigmoid() def forward(self, x): x = self.relu(self.fc1(x)) return self.fc2(x) class Network(torch.nn.Module): # Arbitrary network def __init__(self): super(Network, self).__init__() self.fc1 = torch.nn.Linear(1, neurons, bias=True) self.fc2 = torch.nn.Linear(neurons, 2 * neurons, bias=True) self.fc3 = torch.nn.Linear(2 * neurons, 1, bias=True) self.relu = torch.nn.ReLU() def forward(self, x): x = self.relu(self.fc1(x)) x = self.relu(self.fc2(x)) return self.fc3(x) model = Network() criterion = torch.nn.MSELoss(reduction="sum") optimizer = torch.optim.SGD(model.parameters(), lr=0.005) lr_finder = LRFinder(model, optimizer, criterion) lr_finder.range_test(train_loader, start_lr=0.001, end_lr=1.5, num_iter=1000) lr_finder.reset( ) # to reset the model and optimizer to their initial state learning = lr_finder.history.get('lr')[np.argmin( lr_finder.history.get('loss'))] optimizer = torch.optim.SGD(model.parameters(), lr=0.1) EL2Val = [] EL2train = [] ELinf = [] EL2 = [] # L2 integral between f and u_teta for epoch in range(iter): x = [] ytrue = [] ypred = [] for i, (inputs, labels) in enumerate(train_loader): y_pred = model(inputs) loss = criterion(y_pred, labels) x.append(inputs.data.numpy()) ytrue.append(labels.data.numpy()) ypred.append(y_pred.data.numpy()) optimizer.zero_grad() loss.backward() optimizer.step() def modelonx(x): return model( torch.tensor(x.reshape(-1, 1).tolist(), requires_grad=False)).data.numpy().reshape(1, -1) def L2error(x): return (modelonx(x) - np.array(truef(x, fun)).reshape(1, -1))**2 ELinf.append(max(abs(val_labels - model(val_inputs)))) EL2.append(quadrature(L2error, -1, 1)[0][0]) EL2Val.append(criterion(val_labels, model(val_inputs))) EL2train.append((criterion(train_labels, model(train_inputs)))) print( f'Epoch: {epoch} L2 Error on training : {EL2train[-1]:.6e} | L2 Error on validation : {EL2Val[-1]:.6e} | L2 on [-1,1] : {EL2[-1]:.6e}' ) if epoch % 5 == 0: fig, ax = pl.subplots(nrows=1, ncols=2) plotrange = np.linspace(a - 0.1, b + 0.1, 100) """ Function and Model Plot""" ax[0].scatter(val_inputs.data.numpy(), val_labels.data.numpy(), c='red', s=15) ax[0].scatter(train_inputs, train_labels, s=15) ax[0].plot( plotrange, model(torch.linspace(a - 0.1, b + 0.1, 100).reshape(-1, 1)).data.numpy(), 'r') """ # Code qui permet d'afficher la fonction linéaire par morceau alpha = model.fc2.weight.data.numpy()[0] X = -model.fc1.bias.data.numpy()[0] ReLU = lambda t : np.where(t<=0,0,t) ax[0].plot(xx,alpha[0]*ReLU(xx-X[0])+alpha[1]*ReLU(xx-X[1])+alpha[2]*ReLU(xx-X[2])+alpha[3]*ReLU(xx-X[3])+alpha[4]*ReLU(xx-X[4])+alpha[5]*ReLU(xx-X[5])) """ ax[0].plot(plotrange, truef(plotrange, fun), c='blue') #ax[0].plot(np.linspace(a-0.1,b+0.1,100),np.polyval(np.polyfit(train_inputs.data.numpy().reshape(1,-1)[0],train_labels.data.numpy().reshape(1,-1)[0],10),np.linspace(a-0.1,b+0.1,100)),c='green') if fun == 7: ax[0].plot(plotrange, maclaurin(plotrange, 50), c='green') ax[0].set_ylim(-0.1, 1.1) """ Error Plot """ ax[1].semilogy(range(epoch + 1), EL2Val, color='red') ax[1].semilogy(range(epoch + 1), EL2train, color='blue') #ax[1].semilogy(range(epoch+1),EL2,color='magenta') #ax[1].semilogy(range(epoch+1),ELinf,color='black') pl.show() return model
# PyTorch import torchvision from torchvision import transforms, datasets, models import torch from torch import optim, cuda from torch.utils.data import DataLoader, sampler import torch.nn as nn from torch_lr_finder import LRFinder from utils.model import get_model, get_dataloaders model = get_model() dataloaders = get_dataloaders() # we will be using negative log likelihood as the loss function criterion = nn.CrossEntropyLoss() # we will be using the SGD optimizer as our optimizer optimizer = optim.SGD(model.fc.parameters(), lr=1e-4) lr_finder = LRFinder(model, optimizer, criterion, device='cuda') lr_finder.range_test(dataloaders['train'], end_lr=1, num_iter=2500) lr_finder.plot() lr_finder.reset()