def restore_checkpoint(self,opt): if opt.from_epoch!=0: util.restore_checkpoint_from_epoch(opt,self,["network","optim","sched"]) elif opt.load is not None: util.restore_checkpoint(opt,self,opt.load,["network","optim","sched"]) elif opt.imagenet_enc or opt.pretrained_dec is not None: pass else: print(util.magenta("training from scratch..."))
def main(device=torch.device('cuda:0')): # CLI arguments parser = arg.ArgumentParser( description='We all know what we are doing. Fighting!') parser.add_argument("--datasize", "-d", default="small", type=str, help="data size you want to use, small, medium, total") # Parsing args = parser.parse_args() # Data loaders datasize = args.datasize pathname = "data/nyu.zip" tr_loader, va_loader, te_loader = getTrainingValidationTestingData( datasize, pathname, batch_size=config("unet.batch_size")) # Model model = Net() # define loss function # criterion = torch.nn.L1Loss() # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = util.restore_checkpoint( model, util.config("unet.checkpoint")) acc, loss = util.evaluate_model(model, te_loader, device) # axes = util.make_training_plot() print(f'Test Accuracy:{acc}') print(f'Test Loss:{loss}')
def main(device=torch.device('cuda:0')): """Print performance metrics for model at specified epoch.""" # Data loaders pathname = "data/nyu.zip" tr_loader, va_loader, te_loader = getTrainingValidationTestingData(pathname, batch_size=util.config("unet.batch_size")) # Model model = Net() # define loss function # criterion = torch.nn.L1Loss() # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = util.restore_checkpoint(model, util.config("unet.checkpoint")) acc, loss = util.evaluate_model(model, te_loader, device) # axes = util.make_training_plot() print(f'Test Accuracy:{acc}') print(f'Test Loss:{loss}')
def restore_checkpoint(self, opt): util.restore_checkpoint(opt, self, opt.load, ["network"])
def main(device=torch.device('cuda:0')): # CLI arguments parser = arg.ArgumentParser( description='We all know what we are doing. Fighting!') parser.add_argument("--datasize", "-d", default="small", type=str, help="data size you want to use, small, medium, total") # Parsing args = parser.parse_args() # Data loaders datasize = args.datasize pathname = "data/nyu.zip" tr_loader, va_loader, te_loader = getTrainingValidationTestingData( datasize, pathname, batch_size=config("unet.batch_size")) # Model model = Net() # TODO: define loss function, and optimizer learning_rate = util.config("unet.learning_rate") criterion = DepthLoss(0.1) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) number_of_epoches = 10 # # print("Number of float-valued parameters:", util.count_parameters(model)) # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = util.restore_checkpoint( model, util.config("unet.checkpoint")) # axes = utils.make_training_plot() # Evaluate the randomly initialized model # evaluate_epoch( # axes, tr_loader, va_loader, te_loader, model, criterion, start_epoch, stats # ) # loss = criterion() # initial val loss for early stopping # prev_val_loss = stats[0][1] running_va_loss = [] running_va_acc = [] running_tr_loss = [] running_tr_acc = [] # TODO: define patience for early stopping # patience = 1 # curr_patience = 0 # tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device) acc, loss = util.evaluate_model(model, va_loader, device) running_va_acc.append(acc) running_va_loss.append(loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) # Loop over the entire dataset multiple times # for epoch in range(start_epoch, config('cnn.num_epochs')): epoch = start_epoch # while curr_patience < patience: while epoch < number_of_epoches: # Train model util.train_epoch(tr_loader, model, criterion, optimizer, device) tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device) va_acc, va_loss = util.evaluate_model(model, va_loader, device) running_va_acc.append(va_acc) running_va_loss.append(va_loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) # Evaluate model # evaluate_epoch( # axes, tr_loader, va_loader, te_loader, model, criterion, epoch + 1, stats # ) # Save model parameters util.save_checkpoint(model, epoch + 1, util.config("unet.checkpoint"), stats) # update early stopping parameters """ curr_patience, prev_val_loss = early_stopping( stats, curr_patience, prev_val_loss ) """ epoch += 1 print("Finished Training") # Save figure and keep plot open # utils.save_training_plot() # utils.hold_training_plot() util.make_plot(running_tr_loss, running_tr_acc, running_va_loss, running_va_acc)
def main(device=torch.device('cuda:0')): """Train CNN and show training plots.""" # Data loaders """ if check_for_augmented_data("./data"): tr_loader, va_loader, te_loader, _ = get_train_val_test_loaders( task="target", batch_size=config("cnn.batch_size"), augment=True ) else: tr_loader, va_loader, te_loader, _ = get_train_val_test_loaders( task="target", batch_size=config("cnn.batch_size"), ) """ # pathname = "data/nyu_depth.zip" pathname = "data/nyu_small.zip" tr_loader, va_loader, te_loader = getTrainingValidationTestingData(pathname, batch_size=util.config("unet.batch_size")) # Model model = Net() # TODO: define loss function, and optimizer learning_rate = util.config("unet.learning_rate") criterion = DepthLoss(0.1) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) number_of_epoches = 10 # # print("Number of float-valued parameters:", util.count_parameters(model)) # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = util.restore_checkpoint(model, util.config("unet.checkpoint")) # axes = utils.make_training_plot() # Evaluate the randomly initialized model # evaluate_epoch( # axes, tr_loader, va_loader, te_loader, model, criterion, start_epoch, stats # ) # loss = criterion() # initial val loss for early stopping # prev_val_loss = stats[0][1] running_va_loss = [] running_va_acc = [] running_tr_loss = [] running_tr_acc = [] # TODO: define patience for early stopping # patience = 1 # curr_patience = 0 # tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device) acc, loss = util.evaluate_model(model, va_loader, device) running_va_acc.append(acc) running_va_loss.append(loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) # Loop over the entire dataset multiple times # for epoch in range(start_epoch, config('cnn.num_epochs')): epoch = start_epoch # while curr_patience < patience: while epoch < number_of_epoches: # Train model util.train_epoch(tr_loader, model, criterion, optimizer) tr_acc, tr_loss = util.evaluate_model(model, tr_loader, device) va_acc, va_loss = util.evaluate_model(model, va_loader, device) running_va_acc.append(va_acc) running_va_loss.append(va_loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) # Evaluate model # evaluate_epoch( # axes, tr_loader, va_loader, te_loader, model, criterion, epoch + 1, stats # ) # Save model parameters util.save_checkpoint(model, epoch + 1, util.config("unet.checkpoint"), stats) # update early stopping parameters """ curr_patience, prev_val_loss = early_stopping( stats, curr_patience, prev_val_loss ) """ epoch += 1 print("Finished Training") # Save figure and keep plot open # utils.save_training_plot() # utils.hold_training_plot() util.make_plot(running_tr_loss, running_tr_acc, running_va_loss, running_va_acc)
resumable = args.resume and util.is_resumable(args.exp_dir) os.makedirs(args.exp_dir, exist_ok=True) if not resumable: util.save_args(args, args.exp_dir) # Seed random = np.random.RandomState(args.seed) dataloaders, pos_prop = wrappers.load_data(args, random_state=random, use_random_transpose=True) model, optimizer, loss = wrappers.build_mvae(args, pos_prop=pos_prop) # If resume, load metrics; otherwise init metrics if resumable: util.restore_checkpoint(model, optimizer, args.exp_dir) metrics = util.load_metrics(args.exp_dir) start_epoch = metrics['current_epoch'] + 1 print("Resuming from epoch {}".format(metrics['current_epoch'])) else: metrics = init_metrics() start_epoch = 1 if start_epoch > args.epochs: raise RuntimeError("start_epoch {} > total epochs {}".format( start_epoch, args.epochs)) # Enumerate subsampled modality combinations m_combos = enumerate_combinations(args.n_tracks)