def main(): device = torch.device("cuda" if not hyperparams.hyperparameter_defaults['no_cuda'] else "cpu") hyperparams.hyperparameter_defaults['run_name'] = fileutils.rand_run_name() print("Initializing datasets and dataloaders") train_path = "/content/t2/train" test_path="/content/t2/val" #model_new = basemodelclass.ResNet18(hyperparams.hyperparameter_defaults['dropout'], num_classes=200) trainloader, testloader = dataloader.get_imagenet_loaders(train_path, test_path, transform_train=None, transform_test=None) model_new = basemodelclass.S11ResNet() wandb_run_init = wandb.init(config=hyperparams.hyperparameter_defaults, project=hyperparams.hyperparameter_defaults['project']) wandb.watch_called = False config = wandb.config print(config) wandb.watch(model_new, log="all") #trainloader, testloader = dataloader.get_train_test_dataloader_cifar10() optimizer=optim.SGD(model_new.parameters(), lr=config.lr,momentum=config.momentum, weight_decay=config.weight_decay) #optim.SGD(model.parameters(), lr=0.001, momentum=0.9) criterion=nn.CrossEntropyLoss #scheduler = None cycle_momentum = True if config.cycle_momentum == "True" else False print("Momentum cycling set to {}".format(cycle_momentum)) if (config.lr_policy == "clr"): scheduler = CyclicLR(optimizer, base_lr=config.lr*0.01, max_lr=config.lr, mode='triangular', gamma=1., cycle_momentum=True, step_size_up=256)#, scale_fn='triangular',step_size_up=200) else: scheduler = OneCycleLR(optimizer, config.ocp_max_lr, epochs=config.epochs, cycle_momentum=cycle_momentum, steps_per_epoch=len(trainloader), base_momentum=config.momentum, max_momentum=0.95, pct_start=config.split_pct, anneal_strategy=config.anneal_strategy, div_factor=config.div_factor, final_div_factor=config.final_div_factor ) final_model_path = traintest.execute_model(model_new, hyperparams.hyperparameter_defaults, trainloader, testloader, device, dataloader.classes, wandb=wandb, optimizer_in=optimizer, scheduler=scheduler, prev_saved_model=saved_model_path, criterion=criterion, save_best=True, lars_mode=False, batch_step=True)
def main(): device = torch.device("cuda" if not hyperparams. hyperparameter_defaults['no_cuda'] else "cpu") hyperparams.hyperparameter_defaults['run_name'] = fileutils.rand_run_name() trainloader, testloader = dataloader.get_train_test_dataloader_cifar10() print("Initializing datasets and dataloaders") model_new = basemodelclass.ResNet18( hyperparams.hyperparameter_defaults['dropout']) wandb_run_init = wandb.init( config=hyperparams.hyperparameter_defaults, project=hyperparams.hyperparameter_defaults['project']) wandb.watch_called = False config = wandb.config print(config) wandb.watch(model_new, log="all") trainloader, testloader = dataloader.get_train_test_dataloader_cifar10() optimizer = optim.SGD(model_new.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.weight_decay) #optim.SGD(model.parameters(), lr=0.001, momentum=0.9) criterion = nn.CrossEntropyLoss #scheduler = None #scheduler = CyclicLR(optimizer, base_lr=config.lr*0.01, max_lr=config.lr, mode='triangular', gamma=1.) #, cycle_momentum=False)#,step_size_up=1000)#, scale_fn='triangular',step_size_up=200) #scheduler = StepLR(optimizer, step_size=config.sched_lr_step, gamma=config.sched_lr_gamma) #scheduler = MultiStepLR(optimizer, milestones=[10,20], gamma=config.sched_lr_gamma) #scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=2, verbose=True, threshold=0.0001) scheduler = traintest.MyOwnReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=2, verbose=True, threshold=0.0001) #scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=config.factor, patience=4, verbose=True, threshold=config.lr_decay_threshold) final_model_path = traintest.execute_model( model_new, hyperparams.hyperparameter_defaults, trainloader, testloader, device, dataloader.classes, wandb=wandb, optimizer_in=optimizer, scheduler=scheduler, prev_saved_model=saved_model_path, criterion=criterion, save_best=True, lars_mode=False, batch_step=False)
def main(): device = torch.device("cuda" if not hyperparams. hyperparameter_defaults['no_cuda'] else "cpu") hyperparams.hyperparameter_defaults['run_name'] = fileutils.rand_run_name() transform_train = Compose([ PadIfNeeded(min_height=40, min_width=40, always_apply=True, p=1.0), #,value=(0,0,0), border_mode=0), RandomCrop(height=32, width=32, p=1), #Flip(p=0.5), IAAFliplr(p=0.5), Cutout(num_holes=1, max_h_size=8, max_w_size=8, always_apply=True, p=1, fill_value=[0.4914 * 255, 0.4826 * 255, 0.44653 * 255]), Normalize( mean=[0.4914, 0.4826, 0.44653], std=[0.24703, 0.24349, 0.26519], ), ToTensor() ]) trainloader, testloader = dataloader.get_train_test_dataloader_cifar10( transform_train=transform_train) print("Initializing datasets and dataloaders") #model_new = basemodelclass.ResNet18(hyperparams.hyperparameter_defaults['dropout']) model_new = basemodelclass.S11ResNet() wandb_run_init = wandb.init( config=hyperparams.hyperparameter_defaults, project=hyperparams.hyperparameter_defaults['project']) wandb.watch_called = False config = wandb.config print(config) wandb.watch(model_new, log="all") #trainloader, testloader = dataloader.get_train_test_dataloader_cifar10() optimizer = optim.SGD(model_new.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.weight_decay) #optim.SGD(model.parameters(), lr=0.001, momentum=0.9) criterion = nn.CrossEntropyLoss #scheduler = None cycle_momentum = True if config.cycle_momentum == "True" else False print("Momentum cycling set to {}".format(cycle_momentum)) #scheduler = CyclicLR(optimizer, base_lr=config.lr*0.01, max_lr=config.lr, mode='triangular', gamma=1., cycle_momentum=True,step_size_up=2000)#, scale_fn='triangular',step_size_up=200) scheduler = OneCycleLR(optimizer, config.ocp_max_lr, epochs=config.epochs, cycle_momentum=cycle_momentum, steps_per_epoch=len(trainloader), base_momentum=config.momentum, max_momentum=0.95, pct_start=0.208, anneal_strategy=config.anneal_strategy, div_factor=config.div_factor, final_div_factor=config.final_div_factor) #scheduler =CyclicLR(optimizer, base_lr=config.lr*0.01, max_lr=config.lr, mode='triangular', gamma=1., cycle_momentum=True,step_size_up=2000)#, scale_fn='triangular',step_size_up=200) #scheduler = StepLR(optimizer, step_size=config.sched_lr_step, gamma=config.sched_lr_gamma) #scheduler = MultiStepLR(optimizer, milestones=[10,20], gamma=config.sched_lr_gamma) #scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=2, verbose=True, threshold=0.0001) #scheduler = traintest.MyOwnReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=2, verbose=True, threshold=0.0001) #scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=config.factor, patience=4, verbose=True, threshold=config.lr_decay_threshold) final_model_path = traintest.execute_model( model_new, hyperparams.hyperparameter_defaults, trainloader, testloader, device, dataloader.classes, wandb=wandb, optimizer_in=optimizer, scheduler=scheduler, prev_saved_model=saved_model_path, criterion=criterion, save_best=True, lars_mode=False, batch_step=True)
def main(): device = torch.device("cuda" if not hyperparams. hyperparameter_defaults['no_cuda'] else "cpu") hyperparams.hyperparameter_defaults['run_name'] = fileutils.rand_run_name() print("Initializing datasets and dataloaders") train_csv_file = "/content/drive/My Drive/EVA4/S2_Train.csv" test_csv_file = "/content/drive/My Drive/EVA4/S2_Test.csv" #model_new = basemodelclass.ResNet18(hyperparams.hyperparameter_defaults['dropout'], num_classes=200) #trainloader, testloader = dataloader.get_imagenet_loaders(train_path, test_path, transform_train=None, transform_test=None) transform_train = resize_bg_train_rrs(224, 224, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform_test = resize_bg(224, 224, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) default_model = torch.hub.load('pytorch/vision:v0.6.0', 'mobilenet_v2', pretrained=True) model_new = basemodelclass.MobileNetV2New(default_model, 4) #model_new = basemodelclass.MobileNetV24C(default_model, 4) updatable_params = model_new.unfreeze_core_layer( hyperparams.hyperparameter_defaults['unfreeze_layer']) trainset = dataloader.QDFDataSet( '/content/drive/My Drive/EVA4/S2_Train.csv', transform=transform_train) trainloader = dataloader.get_dataloader( trainset, hyperparams.hyperparameter_defaults['batch_size'], shuffle=True, num_workers=4) testset = dataloader.QDFDataSet('/content/drive/My Drive/EVA4/S2_Test.csv', transform=transform_test) testloader = dataloader.get_dataloader( testset, hyperparams.hyperparameter_defaults['batch_size'], shuffle=False, num_workers=4) wandb_run_init = wandb.init( config=hyperparams.hyperparameter_defaults, project=hyperparams.hyperparameter_defaults['project']) wandb.watch_called = False config = wandb.config print(config) wandb.watch(model_new, log="all") #trainloader, testloader = dataloader.get_train_test_dataloader_cifar10() optimizer = optim.SGD(updatable_params, lr=config.lr, momentum=config.momentum, weight_decay=config.weight_decay) #optim.SGD(model.parameters(), lr=0.001, momentum=0.9) #criterion=nn.CrossEntropyLoss criterion = nn.NLLLoss scheduler = None cycle_momentum = True if config.cycle_momentum == "True" else False print("Momentum cycling set to {}".format(cycle_momentum)) if (config.lr_policy == "clr"): scheduler = CyclicLR( optimizer, base_lr=config.lr * 0.01, max_lr=config.lr, mode='triangular', gamma=1., cycle_momentum=True, step_size_up=256) #, scale_fn='triangular',step_size_up=200) else: scheduler = OneCycleLR(optimizer, config.ocp_max_lr, epochs=config.epochs, cycle_momentum=cycle_momentum, steps_per_epoch=len(trainloader), base_momentum=config.momentum, max_momentum=0.95, pct_start=config.split_pct, anneal_strategy=config.anneal_strategy, div_factor=config.div_factor, final_div_factor=config.final_div_factor) local_classes = [ 'Large QuadCopters', 'Flying Birds', 'Winged Drones', 'Small QuadCopters' ] final_model_path = traintest.execute_model( model_new, hyperparams.hyperparameter_defaults, trainloader, testloader, device, local_classes, wandb=wandb, optimizer_in=optimizer, scheduler=scheduler, prev_saved_model=saved_model_path, criterion=criterion, save_best=True, lars_mode=False, batch_step=True)
def execute_model(model_class, hyperparams, train_loader, test_loader, device, classes, optimizer_in=optim.SGD, wandb=None, criterion=nn.CrossEntropyLoss, scheduler=None, prev_saved_model=None, save_best=False, batch_step=False, lars_mode=False, **kwargs): if wandb is None: hyperparams['run_name'] = fileutils.rand_run_name() wandb.init(config=hyperparams, project=hyperparams['project']) #wandb.watch_called = False # Re-run the model without restarting the runtime, unnecessary after our next release config = wandb.config model_path = fileutils.generate_model_save_path( rand_string=config.run_name) print("Model saved to: ", model_path) #print("Hyper Params:") #print(config) use_cuda = not config.no_cuda and torch.cuda.is_available() best_acc = 0.0 #device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {} # Set random seeds and deterministic pytorch for reproducibility # random.seed(config.seed) # python random seed torch.manual_seed(config.seed) # pytorch random seed # numpy.random.seed(config.seed) # numpy random seed torch.backends.cudnn.deterministic = True # Initialize our model, recursively go over all modules and convert their parameters and buffers to CUDA tensors (if device is set to cuda) if (prev_saved_model != None): # model = model_builder(model_class, # weights_path=prev_saved_model, # local_device=device) model, best_acc = model_builder2(model_class, weights_path=prev_saved_model, local_device=device) print("Model loaded from ", prev_saved_model, " with previous accuracy:", best_acc) else: #model = model_class(config.dropout).to(device) model = model_class.to(device) summary(model.to(device), input_size=(3, 32, 32)) optimizer = optimizer_in #(model.parameters(), lr=config.lr,momentum=config.momentum, #weight_decay=config.weight_decay) # ### We will skip LR-scheduler when using LARS because of unknown interactions #if(lars_mode == True): #optimizer = LARS(optimizer=base_optimizer, eps=1e-8, trust_coef=0.001) #optimizer = LARS(optimizer=optimizer, eps=0.6, trust_coef=0.001) #else: #scheduler = None #scheduler = CyclicLR(optimizer, base_lr=config.lr*0.01, max_lr=config.lr, mode='triangular', gamma=1.)#, cycle_momentum=False)#,step_size_up=1000)#, scale_fn='triangular',step_size_up=200) #scheduler = StepLR(optimizer, step_size=config.sched_lr_step, gamma=config.sched_lr_gamma) #scheduler = MultiStepLR(optimizer, milestones=[10,20], gamma=config.sched_lr_gamma) # WandB – wandb.watch() automatically fetches all layer dimensions, gradients, model parameters and logs them automatically to your dashboard. # Using log="all" log histograms of parameter values in addition to gradients #wandb.watch(model, log="all") for epoch in range(1, config.epochs + 1): #epoch_train_acc,epoch_train_loss = train(config, model, device, train_loader, optimizer,criterion(), epoch) epoch_train_acc, epoch_train_loss = train(config, model, device, train_loader, optimizer, scheduler, criterion(), epoch, batch_step=batch_step) epoch_test_acc, epoch_test_loss = test(config, model, device, test_loader, criterion(reduction='sum'), classes, epoch) last_lr = scheduler.get_last_lr()[0] print( '\nEpoch: {:.0f} Train set: Average loss: {:.4f}, Accuracy: {:.3f}%, lr:{}' .format(epoch, epoch_train_loss, epoch_train_acc, last_lr)) print( 'Epoch: {:.0f} Test set: Average loss: {:.4f}, Accuracy: {:.3f}%'. format(epoch, epoch_test_loss, epoch_test_acc)) #myoptim = optimizer.state_dict()['param_groups'][0] #print('Epoch: {:.0f} Optimizer values: LR: {:.10f}, LastLR:{:.10f}, Momentum: {:.10f}, Weight Decay: {:.10f}'.format( #epoch, scheduler.get_lr()[0],scheduler.get_last_lr()[0],myoptim['momentum'],myoptim['weight_decay'])) #print('Epoch: {:.0f} Optimizer values: LastLR:{:.10f}, Momentum: {:.10f}, Weight Decay: {:.10f}'.format( #epoch, scheduler.get_last_lr()[0],myoptim['momentum'],myoptim['weight_decay'])) #stats_logger(global_stats_array, 1,0.1,99.0,0.1,98.0,0.001,0.78,0.00001) wandb.log({ "Train Accuracy": epoch_train_acc, "Train Loss": epoch_train_loss, "Test Accuracy": epoch_test_acc, "Test Loss": epoch_test_loss, #"Learning Rate": config.lr}) "Learning Rate": last_lr }) if (save_best == True and epoch_test_acc > best_acc): print("Model saved as Test Accuracy increased from ", best_acc, " to ", epoch_test_acc) torch.save( { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'test_acc': epoch_test_acc, 'epoch': epoch }, model_path) best_acc = epoch_test_acc if (scheduler != None and epoch > config.start_lr and batch_step == False): print("Non CyclicLR Case") scheduler.step(epoch_test_loss) print("Final model save path:", model_path, " best Accuracy:", best_acc) wandb.save('model.h5') return model_path
def execute_model_tpu(model_class, hyperparams, train_loader, test_loader, device, criterion=rmse_loss, optimizer_in=optim.SGD, wandb=None, scheduler=None, prev_saved_model=None, save_best=False, batch_step=False, lars_mode=False, **kwargs): if wandb is None: hyperparams['run_name'] = fileutils.rand_run_name() wandb.init(config=hyperparams, project=hyperparams['project']) config = wandb.config model_path = fileutils.generate_model_save_path( rand_string=config.run_name) print("Model saved to: ", model_path) best_loss = 10000.0 # Set random seeds and deterministic pytorch for reproducibility # random.seed(config.seed) # python random seed torch.manual_seed(config.seed) # pytorch random seed # numpy.random.seed(config.seed) # numpy random seed #torch.backends.cudnn.deterministic = True # Initialize our model, recursively go over all modules and convert their parameters and buffers to CUDA tensors (if device is set to cuda) if (prev_saved_model != None): print("Not Implemented for TPUs") else: #model = model_class(config.dropout).to(device) model = model_class.to(device) if not xm.is_master_ordinal(): xm.rendezvous('load_model') if xm.is_master_ordinal(): xm.rendezvous('load_model') #xm.master_print(f'model_loaded, start train', flush=True) xm.master_print(f'model_loaded, start train') #summary(model.to(device),input_size=(3, 64, 64)) optimizer = optimizer_in #(model.parameters(), lr=config.lr,momentum=config.momentum, #weight_decay=config.weight_decay) # for epoch in range(1, config.epochs + 1): para_loader = pl.ParallelLoader(train_loader, [device]) #print("Invoking training fn") #epoch_train_acc,epoch_train_loss = train(config, model, device, train_loader, optimizer,criterion(), epoch) epoch_train_loss, epoch_train_mask_loss, epoch_train_depth_loss = train_batch_tpu( model, device, para_loader.per_device_loader(device), optimizer, criterion, # = nn.MSELoss(), scheduler=None, batch_step=batch_step, stop_train=200000) #para_loader = pl.ParallelLoader(test_loader, [device]) #epoch_test_loss,epoch_test_mask_loss,epoch_test_depth_loss = test_batch_tpu(model, device, test_loader, criterion) last_lr = 0 #scheduler._last_lr xm.master_print( '\nEpoch: {:.0f} Train set: Average loss: {:.6f}, Mask loss:{:.6f}, Depth loss:{:.6f}, lr:{} Time={}' .format(epoch, epoch_train_loss, epoch_train_mask_loss, epoch_train_depth_loss, last_lr, time.asctime())) # print('Epoch: {:.0f} Test set: Average loss: {:.6f}, Mask loss:{:.6f}, Depth loss:{:.6f}'.format(epoch, # epoch_test_loss,epoch_test_mask_loss,epoch_test_depth_loss)) # if(save_best == True and epoch_test_loss < best_loss): # print("Model saved as Test loss reduced from ", best_loss, " to ", epoch_test_loss) # torch.save({ # 'model_state_dict': model.state_dict(), # 'optimizer_state_dict': optimizer.state_dict(), # 'test_loss':epoch_test_loss, # 'epoch':epoch # }, model_path) # best_loss = epoch_test_loss if (scheduler != None and epoch > config.start_lr and batch_step == False): print("Non CyclicLR Case") #scheduler.step(epoch_test_loss) scheduler.step(epoch_train_loss) print("Final model save path:", model_path, " best loss:", best_loss) #wandb.save('model.h5') return model_path
def main(): device = torch.device("cuda" if not hyperparams. hyperparameter_defaults['no_cuda'] else "cpu") fileutils.rand_run_name() # print(len(trainloader)) # dataiter = iter(trainloader) # images, labels = dataiter.next() # print(images.shape) #hyperparams.print_hyperparams() # fileutils.get_image_samples(trainloader, classes) # model_new = basemodelclass.CIFARModelDepthDilate().to(device) # summary(model_new,input_size=(3, 32, 32)) # type(model_new) print("Initializing datasets and dataloaders") torch.manual_seed(hyperparams.hyperparameter_defaults['seed']) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = dataloader.get_dataloader( trainset, hyperparams.hyperparameter_defaults['batch_size'], shuffle=True, num_workers=2) # torch.utils.data.DataLoader(trainset, batch_size=hyperparameter_defaults['batch_size'], # shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = dataloader.get_dataloader( testset, hyperparams.hyperparameter_defaults['batch_size'], shuffle=False, num_workers=2) # torch.utils.data.DataLoader(testset, batch_size=hyperparameter_defaults['batch_size'], # shuffle=False, num_workers=2) optimizer = optim.SGD #(model.parameters(), lr=0.001, momentum=0.9) criterion = nn.CrossEntropyLoss #model = basemodelclass.CIFARModelBuilder()#.to(device) #model_new = basemodelclass.CIFARModelDepthDilate#.to(device) model_new = basemodelclass.ResNet18( hyperparams.hyperparameter_defaults['dropout']) #execute_model(model, hyperparameter_defaults, ) final_model_path = traintest.execute_model( model_new, hyperparams.hyperparameter_defaults, trainloader, testloader, device, dataloader.classes, optimizer=optimizer, prev_saved_model=saved_model_path, criterion=criterion, save_best=True)