def main(cfg): # Setting up GPU args use_cuda = (cfg.NUM_GPUS > 0) and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = { 'num_workers': cfg.DATA_LOADER.NUM_WORKERS, 'pin_memory': cfg.DATA_LOADER.PIN_MEMORY } if use_cuda else {} # Using specific GPU os.environ['CUDA_VISIBLE_DEVICES'] = str(cfg.GPU_ID) print("Using GPU : {}.\n".format(cfg.GPU_ID)) # Getting the output directory ready (default is "/output") cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR) if not os.path.exists(cfg.OUT_DIR): os.mkdir(cfg.OUT_DIR) # Create "DATASET" specific directory dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME) if not os.path.exists(dataset_out_dir): os.mkdir(dataset_out_dir) # Creating the experiment directory inside the dataset specific directory # all logs, labeled, unlabeled, validation sets are stroed here # E.g., output/CIFAR10/{timestamp or cfg.EXP_NAME based on arguments passed} if cfg.EXP_NAME == 'auto': now = datetime.now() exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}' else: exp_dir = cfg.EXP_NAME exp_dir = os.path.join(dataset_out_dir, exp_dir) if not os.path.exists(exp_dir): os.mkdir(exp_dir) print("Experiment Directory is {}.\n".format(exp_dir)) else: print( "Experiment Directory Already Exists: {}. Reusing it may lead to loss of old logs in the directory.\n" .format(exp_dir)) cfg.EXP_DIR = exp_dir # Setup Logger lu.setup_logging(cfg) # Dataset preparing steps print("\n======== PREPARING DATA AND MODEL ========\n") cfg.DATASET.ROOT_DIR = os.path.join(os.path.abspath('..'), cfg.DATASET.ROOT_DIR) data_obj = Data(cfg) train_data, train_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=True, isDownload=True) test_data, test_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=False, isDownload=True) print( "\nDataset {} Loaded Sucessfully.\nTotal Train Size: {} and Total Test Size: {}\n" .format(cfg.DATASET.NAME, train_size, test_size)) logger.info( "Dataset {} Loaded Sucessfully. Total Train Size: {} and Total Test Size: {}\n" .format(cfg.DATASET.NAME, train_size, test_size)) trainSet_path, valSet_path = data_obj.makeTVSets(train_split_ratio=cfg.ACTIVE_LEARNING.INIT_RATIO, \ val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data, seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR) trainSet, valSet = data_obj.loadTVPartitions(trainSetPath=trainSet_path, valSetPath=valSet_path) print("Data Partitioning Complete. \nTrain Set: {}, Validation Set: {}\n". format(len(trainSet), len(valSet))) logger.info("\nTrain Set: {}, Validation Set: {}\n".format( len(trainSet), len(valSet))) # Preparing dataloaders for initial training trainSet_loader = data_obj.getIndexesDataLoader( indexes=trainSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) valSet_loader = data_obj.getIndexesDataLoader( indexes=valSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) test_loader = data_obj.getTestLoader(data=test_data, test_batch_size=cfg.TRAIN.BATCH_SIZE, seed_id=cfg.RNG_SEED) # Initialize the models num_ensembles = cfg.ENSEMBLE.NUM_MODELS models = [] for i in range(num_ensembles): models.append(model_builder.build_model(cfg)) print("{} ensemble models of type: {}\n".format(cfg.ENSEMBLE.NUM_MODELS, cfg.ENSEMBLE.MODEL_TYPE)) logger.info("{} ensemble models of type: {}\n".format( cfg.ENSEMBLE.NUM_MODELS, cfg.ENSEMBLE.MODEL_TYPE)) # This is to seamlessly use the code originally written for AL episodes cfg.EPISODE_DIR = cfg.EXP_DIR # Train models print("======== ENSEMBLE TRAINING ========") logger.info("======== ENSEMBLE TRAINING ========") best_model_paths = [] test_accs = [] for i in range(num_ensembles): print("=== Training ensemble [{}/{}] ===".format(i + 1, num_ensembles)) # Construct the optimizer optimizer = optim.construct_optimizer(cfg, models[i]) print("optimizer: {}\n".format(optimizer)) logger.info("optimizer: {}\n".format(optimizer)) # Each ensemble gets its own output directory cfg.EPISODE_DIR = os.path.join(cfg.EPISODE_DIR, 'model_{} '.format(i + 1)) # Train the model best_val_acc, best_val_epoch, checkpoint_file = ensemble_train_model( trainSet_loader, valSet_loader, models[i], optimizer, cfg) best_model_paths.append(checkpoint_file) print("Best Validation Accuracy by Model {}: {}\nBest Epoch: {}\n". format(i + 1, round(best_val_acc, 4), best_val_epoch)) logger.info( "Best Validation Accuracy by Model {}: {}\tBest Epoch: {}\n". format(i + 1, round(best_val_acc, 4), best_val_epoch)) # Test the model print("=== Testing ensemble [{}/{}] ===".format(i + 1, num_ensembles)) test_acc = ensemble_test_model(test_loader, checkpoint_file, cfg, cur_episode=0) test_accs.append(test_acc) print("Test Accuracy by Model {}: {}.\n".format( i + 1, round(test_acc, 4))) logger.info("Test Accuracy by Model {}: {}.\n".format(i + 1, test_acc)) # Reset EPISODE_DIR cfg.EPISODE_DIR = cfg.EXP_DIR # Test each best model checkpoint and report the average print("======== ENSEMBLE TESTING ========\n") logger.info("======== ENSEMBLE TESTING ========\n") mean_test_acc = np.mean(test_accs) print("Average Ensemble Test Accuracy: {}.\n".format( round(mean_test_acc, 4))) logger.info("Average Ensemble Test Accuracy: {}.\n".format(mean_test_acc)) print("================================\n\n") logger.info("================================\n\n")
def main(cfg): # Setting up GPU args use_cuda = (cfg.NUM_GPUS > 0) and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = { 'num_workers': cfg.DATA_LOADER.NUM_WORKERS, 'pin_memory': cfg.DATA_LOADER.PIN_MEMORY } if use_cuda else {} # Using specific GPU os.environ['CUDA_VISIBLE_DEVICES'] = str(cfg.GPU_ID) print("Using GPU : {}.\n".format(cfg.GPU_ID)) # Getting the output directory ready (default is "/output") cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR) if not os.path.exists(cfg.OUT_DIR): os.mkdir(cfg.OUT_DIR) # Create "DATASET" specific directory dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME, cfg.MODEL.TYPE) if not os.path.exists(dataset_out_dir): os.makedirs(dataset_out_dir) # Creating the experiment directory inside the dataset specific directory # all logs, labeled, unlabeled, validation sets are stroed here # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed} if cfg.EXP_NAME == 'auto': now = datetime.now() exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}' else: exp_dir = cfg.EXP_NAME exp_dir = os.path.join(dataset_out_dir, exp_dir) if not os.path.exists(exp_dir): os.mkdir(exp_dir) print("Experiment Directory is {}.\n".format(exp_dir)) else: print( "Experiment Directory Already Exists: {}. Reusing it may lead to loss of old logs in the directory.\n" .format(exp_dir)) cfg.EXP_DIR = exp_dir # Setup Logger lu.setup_logging(cfg) # Dataset preparing steps print("\n======== PREPARING DATA AND MODEL ========\n") cfg.DATASET.ROOT_DIR = os.path.join(os.path.abspath('..'), cfg.DATASET.ROOT_DIR) data_obj = Data(cfg) train_data, train_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=True, isDownload=True) test_data, test_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=False, isDownload=True) print( "\nDataset {} Loaded Sucessfully.\nTotal Train Size: {} and Total Test Size: {}\n" .format(cfg.DATASET.NAME, train_size, test_size)) logger.info( "Dataset {} Loaded Sucessfully. Total Train Size: {} and Total Test Size: {}\n" .format(cfg.DATASET.NAME, train_size, test_size)) print("\nSampling Initial Pool using {}.".format( str.upper(cfg.INIT_POOL.SAMPLING_FN))) logger.info("\nSampling Initial Pool using {}.".format( str.upper(cfg.INIT_POOL.SAMPLING_FN))) if cfg.INIT_POOL.SAMPLING_FN == 'random': lSet_path, uSet_path, valSet_path = data_obj.makeLUVSets(train_split_ratio=cfg.INIT_POOL.INIT_RATIO, \ val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data, seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR) else: lSet, uSet = InitialPool(cfg).sample_from_uSet(train_data) lSet_path = f'{cfg.EXP_DIR}/lSet.npy' np.save(lSet_path, lSet) np.save(f'{cfg.EXP_DIR}/lSet_initial.npy', lSet) uSet_path, valSet_path = data_obj.makeUVSets( val_split_ratio=cfg.DATASET.VAL_RATIO, data=uSet, seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR) cfg.ACTIVE_LEARNING.LSET_PATH = lSet_path cfg.ACTIVE_LEARNING.USET_PATH = uSet_path cfg.ACTIVE_LEARNING.VALSET_PATH = valSet_path lSet, uSet, valSet = data_obj.loadPartitions(lSetPath=cfg.ACTIVE_LEARNING.LSET_PATH, \ uSetPath=cfg.ACTIVE_LEARNING.USET_PATH, valSetPath = cfg.ACTIVE_LEARNING.VALSET_PATH) print( "Data Partitioning Complete. \nLabeled Set: {}, Unlabeled Set: {}, Validation Set: {}\n" .format(len(lSet), len(uSet), len(valSet))) logger.info( "Labeled Set: {}, Unlabeled Set: {}, Validation Set: {}\n".format( len(lSet), len(uSet), len(valSet))) # Preparing dataloaders for initial training lSet_loader = data_obj.getIndexesDataLoader( indexes=lSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) valSet_loader = data_obj.getIndexesDataLoader( indexes=valSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) uSet_loader = data_obj.getIndexesDataLoader( indexes=uSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) test_loader = data_obj.getTestLoader(data=test_data, test_batch_size=cfg.TRAIN.BATCH_SIZE, seed_id=cfg.RNG_SEED) # Initialize the models num_ensembles = cfg.ENSEMBLE.NUM_MODELS models = [] for i in range(num_ensembles): models.append(model_builder.build_model(cfg)) print("{} ensemble models of type: {}\n".format(cfg.ENSEMBLE.NUM_MODELS, cfg.ENSEMBLE.MODEL_TYPE)) logger.info("{} ensemble models of type: {}\n".format( cfg.ENSEMBLE.NUM_MODELS, cfg.ENSEMBLE.MODEL_TYPE)) print("Max AL Episodes: {}\n".format(cfg.ACTIVE_LEARNING.MAX_ITER)) logger.info("Max AL Episodes: {}\n".format(cfg.ACTIVE_LEARNING.MAX_ITER)) for cur_episode in range(0, cfg.ACTIVE_LEARNING.MAX_ITER + 1): wandb.log({"Episode": cur_episode}) print("======== EPISODE {} BEGINS ========\n".format(cur_episode)) logger.info( "======== EPISODE {} BEGINS ========\n".format(cur_episode)) # Creating output directory for the episode episode_dir = os.path.join(cfg.EXP_DIR, f'episode_{cur_episode}') if not os.path.exists(episode_dir): os.mkdir(episode_dir) cfg.EPISODE_DIR = episode_dir # Train models print("======== ENSEMBLE TRAINING ========") logger.info("======== ENSEMBLE TRAINING ========") best_model_paths = [] test_accs = [] for i in range(num_ensembles): print("=== Training ensemble [{}/{}] ===".format( i + 1, num_ensembles)) # Construct the optimizer optimizer = optim.construct_optimizer(cfg, models[i]) print("optimizer: {}\n".format(optimizer)) logger.info("optimizer: {}\n".format(optimizer)) # Each ensemble gets its own output directory cfg.EPISODE_DIR = os.path.join(cfg.EPISODE_DIR, 'model_{}'.format(i + 1)) # Train the model best_val_acc, best_val_epoch, checkpoint_file = ensemble_train_model( lSet_loader, valSet_loader, models[i], optimizer, cfg) best_model_paths.append(checkpoint_file) print("Best Validation Accuracy by Model {}: {}\nBest Epoch: {}\n". format(i + 1, round(best_val_acc, 4), best_val_epoch)) logger.info( "EPISODE {} Best Validation Accuracy by Model {}: {}\tBest Epoch: {}\n" .format(cur_episode, i + 1, round(best_val_acc, 4), best_val_epoch)) # Test the model print("=== Testing ensemble [{}/{}] ===".format( i + 1, num_ensembles)) test_acc = ensemble_test_model(test_loader, checkpoint_file, cfg, cur_episode) test_accs.append(test_acc) print("Test Accuracy by Model {}: {}.\n".format( i + 1, round(test_acc, 4))) logger.info("EPISODE {} Test Accuracy by Model {}: {}.\n".format( cur_episode, i + 1, test_acc)) # Reset EPISODE_DIR cfg.EPISODE_DIR = episode_dir # Test each best model checkpoint and report the average print("======== ENSEMBLE TESTING ========\n") logger.info("======== ENSEMBLE TESTING ========\n") mean_test_acc = np.mean(test_accs) print("Average Ensemble Test Accuracy: {}.\n".format( round(mean_test_acc, 4))) logger.info("EPISODE {} Average Ensemble Test Accuracy: {}.\n".format( cur_episode, mean_test_acc)) wandb.log({"Test Accuracy": mean_test_acc}) global plot_episode_xvalues global plot_episode_yvalues global plot_epoch_xvalues global plot_epoch_yvalues global plot_it_x_values global plot_it_y_values plot_episode_xvalues.append(cur_episode) plot_episode_yvalues.append(mean_test_acc) plot_arrays(x_vals=plot_episode_xvalues, y_vals=plot_episode_yvalues, \ x_name="Episodes", y_name="Test Accuracy", dataset_name=cfg.DATASET.NAME, out_dir=cfg.EXP_DIR) save_plot_values([plot_episode_xvalues, plot_episode_yvalues], \ ["plot_episode_xvalues", "plot_episode_yvalues"], out_dir=cfg.EXP_DIR, saveInTextFormat=True) # No need to perform active sampling in the last episode iteration if cur_episode == cfg.ACTIVE_LEARNING.MAX_ITER: break # Active Sample print("======== ENSEMBLE ACTIVE SAMPLING ========\n") logger.info("======== ENSEMBLE ACTIVE SAMPLING ========\n") al_obj = ActiveLearning(data_obj, cfg) clf_models = [] for i in range(num_ensembles): temp = model_builder.build_model(cfg) clf_models.append(cu.load_checkpoint(best_model_paths[i], temp)) activeSet, new_uSet = al_obj.sample_from_uSet( None, lSet, uSet, train_data, supportingModels=clf_models) # Save current lSet, new_uSet and activeSet in the episode directory data_obj.saveSets(lSet, uSet, activeSet, cfg.EPISODE_DIR) # Add activeSet to lSet, save new_uSet as uSet and update dataloader for the next episode lSet = np.append(lSet, activeSet) uSet = new_uSet lSet_loader = data_obj.getIndexesDataLoader( indexes=lSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) valSet_loader = data_obj.getIndexesDataLoader( indexes=valSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) uSet_loader = data_obj.getSequentialDataLoader( indexes=uSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) print( "Ensemble Active Sampling Complete. After Episode {}:\nNew Labeled Set: {}, New Unlabeled Set: {}, Active Set: {}\n" .format(cur_episode, len(lSet), len(uSet), len(activeSet))) logger.info( "Ensemble Active Sampling Complete. After Episode {}:\nNew Labeled Set: {}, New Unlabeled Set: {}, Active Set: {}\n" .format(cur_episode, len(lSet), len(uSet), len(activeSet))) print("================================\n\n") logger.info("================================\n\n")
def main(cfg): # Setting up GPU args use_cuda = (cfg.NUM_GPUS > 0) and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = { 'num_workers': cfg.DATA_LOADER.NUM_WORKERS, 'pin_memory': cfg.DATA_LOADER.PIN_MEMORY } if use_cuda else {} # Using specific GPU # os.environ['NVIDIA_VISIBLE_DEVICES'] = str(cfg.GPU_ID) # os.environ['CUDA_VISIBLE_DEVICES'] = '0' # print("Using GPU : {}.\n".format(cfg.GPU_ID)) # Getting the output directory ready (default is "/output") cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR) if not os.path.exists(cfg.OUT_DIR): os.mkdir(cfg.OUT_DIR) # Create "DATASET/MODEL TYPE" specific directory dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME, cfg.MODEL.TYPE) if not os.path.exists(dataset_out_dir): os.makedirs(dataset_out_dir) # Creating the experiment directory inside the dataset specific directory # all logs, labeled, unlabeled, validation sets are stroed here # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed} if cfg.EXP_NAME == 'auto': now = datetime.now() exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}' else: exp_dir = cfg.EXP_NAME exp_dir = os.path.join(dataset_out_dir, exp_dir) if not os.path.exists(exp_dir): os.mkdir(exp_dir) print("Experiment Directory is {}.\n".format(exp_dir)) else: print( "Experiment Directory Already Exists: {}. Reusing it may lead to loss of old logs in the directory.\n" .format(exp_dir)) cfg.EXP_DIR = exp_dir # Save the config file in EXP_DIR dump_cfg(cfg) # Setup Logger lu.setup_logging(cfg) # Dataset preparing steps print("\n======== PREPARING TEST DATA ========\n") cfg.DATASET.ROOT_DIR = os.path.join(os.path.abspath('..'), cfg.DATASET.ROOT_DIR) data_obj = Data(cfg) test_data, test_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=False, isDownload=True) print("\nDataset {} Loaded Sucessfully. Total Test Size: {}\n".format( cfg.DATASET.NAME, test_size)) logger.info("Dataset {} Loaded Sucessfully. Total Test Size: {}\n".format( cfg.DATASET.NAME, test_size)) # Preparing dataloaders for testing test_loader = data_obj.getTestLoader(data=test_data, test_batch_size=cfg.TRAIN.BATCH_SIZE, seed_id=cfg.RNG_SEED) print("======== TESTING ========\n") logger.info("======== TESTING ========\n") test_acc = test_model( test_loader, os.path.join(os.path.abspath('..'), cfg.TEST.MODEL_PATH), cfg) print("Test Accuracy: {}.\n".format(round(test_acc, 4))) logger.info("Test Accuracy {}.\n".format(test_acc)) print('Check the test accuracy inside {}/stdout.log'.format(cfg.EXP_DIR)) print("================================\n\n") logger.info("================================\n\n")
def main(cfg): # Setting up GPU args use_cuda = (cfg.NUM_GPUS > 0) and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = { 'num_workers': cfg.DATA_LOADER.NUM_WORKERS, 'pin_memory': cfg.DATA_LOADER.PIN_MEMORY } if use_cuda else {} # Auto assign a RNG_SEED when not supplied a value if cfg.RNG_SEED is None: cfg.RNG_SEED = np.random.randint(100) # Using specific GPU # os.environ['NVIDIA_VISIBLE_DEVICES'] = str(cfg.GPU_ID) # os.environ['CUDA_VISIBLE_DEVICES'] = '0' # print("Using GPU : {}.\n".format(cfg.GPU_ID)) # Getting the output directory ready (default is "/output") cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR) if not os.path.exists(cfg.OUT_DIR): os.mkdir(cfg.OUT_DIR) # Create "DATASET/MODEL TYPE" specific directory dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME, cfg.MODEL.TYPE) if not os.path.exists(dataset_out_dir): os.makedirs(dataset_out_dir) # Creating the experiment directory inside the dataset specific directory # all logs, labeled, unlabeled, validation sets are stroed here # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed} if cfg.EXP_NAME == 'auto': now = datetime.now() exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}' else: exp_dir = cfg.EXP_NAME exp_dir = os.path.join(dataset_out_dir, exp_dir) if not os.path.exists(exp_dir): os.mkdir(exp_dir) print("Experiment Directory is {}.\n".format(exp_dir)) else: print( "Experiment Directory Already Exists: {}. Reusing it may lead to loss of old logs in the directory.\n" .format(exp_dir)) cfg.EXP_DIR = exp_dir # Save the config file in EXP_DIR dump_cfg(cfg) # Setup Logger lu.setup_logging(cfg) # Dataset preparing steps print("\n======== PREPARING DATA AND MODEL ========\n") cfg.DATASET.ROOT_DIR = os.path.join(os.path.abspath('..'), cfg.DATASET.ROOT_DIR) data_obj = Data(cfg) train_data, train_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=True, isDownload=True) test_data, test_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=False, isDownload=True) print( "\nDataset {} Loaded Sucessfully.\nTotal Train Size: {} and Total Test Size: {}\n" .format(cfg.DATASET.NAME, train_size, test_size)) logger.info( "Dataset {} Loaded Sucessfully. Total Train Size: {} and Total Test Size: {}\n" .format(cfg.DATASET.NAME, train_size, test_size)) lSet_path, uSet_path, valSet_path = data_obj.makeLUVSets(train_split_ratio=cfg.ACTIVE_LEARNING.INIT_L_RATIO, \ val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data, seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR) cfg.ACTIVE_LEARNING.LSET_PATH = lSet_path cfg.ACTIVE_LEARNING.USET_PATH = uSet_path cfg.ACTIVE_LEARNING.VALSET_PATH = valSet_path lSet, uSet, valSet = data_obj.loadPartitions(lSetPath=cfg.ACTIVE_LEARNING.LSET_PATH, \ uSetPath=cfg.ACTIVE_LEARNING.USET_PATH, valSetPath = cfg.ACTIVE_LEARNING.VALSET_PATH) print( "Data Partitioning Complete. \nLabeled Set: {}, Unlabeled Set: {}, Validation Set: {}\n" .format(len(lSet), len(uSet), len(valSet))) logger.info( "Labeled Set: {}, Unlabeled Set: {}, Validation Set: {}\n".format( len(lSet), len(uSet), len(valSet))) # Preparing dataloaders for initial training lSet_loader = data_obj.getIndexesDataLoader( indexes=lSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) valSet_loader = data_obj.getIndexesDataLoader( indexes=valSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) test_loader = data_obj.getTestLoader(data=test_data, test_batch_size=cfg.TRAIN.BATCH_SIZE, seed_id=cfg.RNG_SEED) # Initialize the model. model = model_builder.build_model(cfg) print("model: {}\n".format(cfg.MODEL.TYPE)) logger.info("model: {}\n".format(cfg.MODEL.TYPE)) # Construct the optimizer optimizer = optim.construct_optimizer(cfg, model) print("optimizer: {}\n".format(optimizer)) logger.info("optimizer: {}\n".format(optimizer)) print("AL Query Method: {}\nMax AL Episodes: {}\n".format( cfg.ACTIVE_LEARNING.SAMPLING_FN, cfg.ACTIVE_LEARNING.MAX_ITER)) logger.info("AL Query Method: {}\nMax AL Episodes: {}\n".format( cfg.ACTIVE_LEARNING.SAMPLING_FN, cfg.ACTIVE_LEARNING.MAX_ITER)) for cur_episode in range(0, cfg.ACTIVE_LEARNING.MAX_ITER + 1): print("======== EPISODE {} BEGINS ========\n".format(cur_episode)) logger.info( "======== EPISODE {} BEGINS ========\n".format(cur_episode)) # Creating output directory for the episode episode_dir = os.path.join(cfg.EXP_DIR, f'episode_{cur_episode}') if not os.path.exists(episode_dir): os.mkdir(episode_dir) cfg.EPISODE_DIR = episode_dir # Train model print("======== TRAINING ========") logger.info("======== TRAINING ========") best_val_acc, best_val_epoch, checkpoint_file = train_model( lSet_loader, valSet_loader, model, optimizer, cfg) print("Best Validation Accuracy: {}\nBest Epoch: {}\n".format( round(best_val_acc, 4), best_val_epoch)) logger.info( "EPISODE {} Best Validation Accuracy: {}\tBest Epoch: {}\n".format( cur_episode, round(best_val_acc, 4), best_val_epoch)) # Test best model checkpoint print("======== TESTING ========\n") logger.info("======== TESTING ========\n") test_acc = test_model(test_loader, checkpoint_file, cfg, cur_episode) print("Test Accuracy: {}.\n".format(round(test_acc, 4))) logger.info("EPISODE {} Test Accuracy {}.\n".format( cur_episode, test_acc)) # No need to perform active sampling in the last episode iteration if cur_episode == cfg.ACTIVE_LEARNING.MAX_ITER: # Save current lSet, uSet in the final episode directory data_obj.saveSet(lSet, 'lSet', cfg.EPISODE_DIR) data_obj.saveSet(uSet, 'uSet', cfg.EPISODE_DIR) break # Active Sample print("======== ACTIVE SAMPLING ========\n") logger.info("======== ACTIVE SAMPLING ========\n") al_obj = ActiveLearning(data_obj, cfg) clf_model = model_builder.build_model(cfg) clf_model = cu.load_checkpoint(checkpoint_file, clf_model) activeSet, new_uSet = al_obj.sample_from_uSet(clf_model, lSet, uSet, train_data) # Save current lSet, new_uSet and activeSet in the episode directory data_obj.saveSets(lSet, uSet, activeSet, cfg.EPISODE_DIR) # Add activeSet to lSet, save new_uSet as uSet and update dataloader for the next episode lSet = np.append(lSet, activeSet) uSet = new_uSet lSet_loader = data_obj.getIndexesDataLoader( indexes=lSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) valSet_loader = data_obj.getIndexesDataLoader( indexes=valSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) uSet_loader = data_obj.getSequentialDataLoader( indexes=uSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) print( "Active Sampling Complete. After Episode {}:\nNew Labeled Set: {}, New Unlabeled Set: {}, Active Set: {}\n" .format(cur_episode, len(lSet), len(uSet), len(activeSet))) logger.info( "Active Sampling Complete. After Episode {}:\nNew Labeled Set: {}, New Unlabeled Set: {}, Active Set: {}\n" .format(cur_episode, len(lSet), len(uSet), len(activeSet))) print("================================\n\n") logger.info("================================\n\n")
def main(cfg): # Setting up GPU args use_cuda = (cfg.NUM_GPUS > 0) and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = { 'num_workers': cfg.DATA_LOADER.NUM_WORKERS, 'pin_memory': cfg.DATA_LOADER.PIN_MEMORY } if use_cuda else {} # Using specific GPU os.environ['NVIDIA_VISIBLE_DEVICES'] = str(cfg.GPU_ID) os.environ['CUDA_VISIBLE_DEVICES'] = '0' print("Using GPU : {}.\n".format(cfg.GPU_ID)) # Getting the output directory ready (default is "/output") cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR) if not os.path.exists(cfg.OUT_DIR): os.makedirs(cfg.OUT_DIR) # Create "DATASET" specific directory dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME, cfg.MODEL.TYPE) if not os.path.exists(dataset_out_dir): os.makedirs(dataset_out_dir) # Creating the experiment directory inside the dataset specific directory # all logs, labeled, unlabeled, validation sets are stroed here # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed} if cfg.EXP_NAME == 'auto': now = datetime.now() exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}' else: exp_dir = cfg.EXP_NAME exp_dir = os.path.join(dataset_out_dir, exp_dir) if not os.path.exists(exp_dir): os.mkdir(exp_dir) print("Experiment Directory is {}.\n".format(exp_dir)) else: print( "Experiment Directory Already Exists: {}. Reusing it may lead to loss of old logs in the directory.\n" .format(exp_dir)) cfg.EXP_DIR = exp_dir # Setup Logger lu.setup_logging(cfg) # Dataset preparing steps print("\n======== PREPARING DATA AND MODEL ========\n") cfg.DATASET.ROOT_DIR = os.path.join(os.path.abspath('..'), cfg.DATASET.ROOT_DIR) data_obj = Data(cfg) train_data, train_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=True, isDownload=True) train_data = RotNetDataset(cfg.DATASET.NAME, train_data) train_size = len(train_data) print("\n Rotation Dataset {} Loaded Sucessfully.\nTotal Train Size: {}\n". format(cfg.DATASET.NAME, train_size)) logger.info( "Rotation Dataset {} Loaded Sucessfully. Total Train Size: {}\n". format(cfg.DATASET.NAME, train_size)) trainSet_path, valSet_path = data_obj.makeTVSets(val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data,\ seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR) cfg.INIT_POOL.TRAINSET_PATH = trainSet_path cfg.INIT_POOL.VALSET_PATH = valSet_path trainSet, valSet = data_obj.loadTVPartitions( trainSetPath=cfg.INIT_POOL.TRAINSET_PATH, valSetPath=cfg.INIT_POOL.VALSET_PATH) print("Data Partitioning Complete. \nTrain Set: {}, Validation Set: {}\n". format(len(trainSet), len(valSet))) logger.info("Train Set: {}, Validation Set: {}\n".format( len(trainSet), len(valSet))) # Preparing dataloaders for initial training trainSet_loader = data_obj.getSequentialDataLoader( indexes=trainSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) valSet_loader = data_obj.getSequentialDataLoader( indexes=valSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) # Initialize the model. model = model_builder.build_model(cfg) print("model: {}\n".format(cfg.MODEL.TYPE)) logger.info("model: {}\n".format(cfg.MODEL.TYPE)) # Construct the optimizer optimizer = optim.construct_optimizer(cfg, model) print("optimizer: {}\n".format(optimizer)) logger.info("optimizer: {}\n".format(optimizer)) # This is to seamlessly use the code originally written for AL episodes cfg.EPISODE_DIR = cfg.EXP_DIR # Train model print("======== ROTATION TRAINING ========") logger.info("======== ROTATION TRAINING ========") best_val_acc, best_val_epoch, checkpoint_file = train_model( trainSet_loader, valSet_loader, model, optimizer, cfg) print("Best Validation Accuracy: {}\nBest Epoch: {}\n".format( round(best_val_acc, 4), best_val_epoch)) logger.info("Best Validation Accuracy: {}\tBest Epoch: {}\n".format( round(best_val_acc, 4), best_val_epoch)) # Test best model checkpoint print("======== ROTATION TESTING ========\n") logger.info("======== ROTATION TESTING ========\n") test_acc = test_model(trainSet_loader, checkpoint_file, cfg, cur_episode=1) print("Test Accuracy: {}.\n".format(round(test_acc, 4))) logger.info("Test Accuracy {}.\n".format(test_acc)) print("================================\n\n") logger.info("================================\n\n")
def main(cfg): # Login to wandb wandb.login() # Initialize a new wandb run wandb.init(project="rotation-pred", name=cfg.EXP_NAME) # Setting up GPU args use_cuda = (cfg.NUM_GPUS > 0) and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = { 'num_workers': cfg.DATA_LOADER.NUM_WORKERS, 'pin_memory': cfg.DATA_LOADER.PIN_MEMORY } if use_cuda else {} # Using specific GPU os.environ['NVIDIA_VISIBLE_DEVICES'] = str(cfg.GPU_ID) os.environ['CUDA_VISIBLE_DEVICES'] = '0' print("Using GPU : {}.\n".format(cfg.GPU_ID)) # Getting the output directory ready (default is "/output") cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR) if not os.path.exists(cfg.OUT_DIR): os.makedirs(cfg.OUT_DIR) # Create "DATASET" specific directory dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME, cfg.MODEL.TYPE) if not os.path.exists(dataset_out_dir): os.makedirs(dataset_out_dir) # Creating the experiment directory inside the dataset specific directory # all logs, labeled, unlabeled, validation sets are stroed here # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed} if cfg.EXP_NAME == 'auto': now = datetime.now() exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}' else: exp_dir = cfg.EXP_NAME exp_dir = os.path.join(dataset_out_dir, exp_dir) if not os.path.exists(exp_dir): os.mkdir(exp_dir) print("Experiment Directory is {}.\n".format(exp_dir)) else: print( "Experiment Directory Already Exists: {}. Reusing it may lead to loss of old logs in the directory.\n" .format(exp_dir)) cfg.EXP_DIR = exp_dir # Setup Logger lu.setup_logging(cfg) # Dataset preparing steps print("\n======== PREPARING DATA AND SSL EVALUATION MODEL ========\n") cfg.DATASET.ROOT_DIR = os.path.join(os.path.abspath('..'), cfg.DATASET.ROOT_DIR) data_obj = Data(cfg) train_data, train_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=True, isDownload=True) trainSet = [i for i in range(train_size)] print("\n Rotation Dataset {} Loaded Sucessfully.\nTotal Train Size: {}\n". format(cfg.DATASET.NAME, train_size)) logger.info( "Rotation Dataset {} Loaded Sucessfully. Total Train Size: {}\n". format(cfg.DATASET.NAME, train_size)) trainSet_path = data_obj.saveSet(setArray=trainSet, setName='trainSet', save_dir=cfg.EXP_DIR) trainSet = data_obj.loadPartition(setPath=trainSet_path) # Preparing dataloaders for initial training trainSet_loader = data_obj.getSequentialDataLoader( indexes=trainSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) # Initialize the evaluation model if cfg.MODEL.TYPE == 'linear': model = SSLEvaluator(n_input=cfg.MODEL.NUM_INPUT, n_classes=cfg.MODEL.NUM_OUTPUT, n_hidden=None) else: model = SSLEvaluator(n_input=cfg.MODEL.NUM_INPUT, n_classes=cfg.MODEL.NUM_OUTPUT, n_hidden=cfg.MODEL.NUM_HIDDEN) print("Evaluation model: {}\n".format(cfg.MODEL.EVAL)) logger.info("Evalution model: {}\n".format(cfg.MODEL.EVAL)) # Initialize the SSL model ssl_model = model_builder.build_model(cfg) ssl_checkpoint_file = os.path.join(os.path.abspath('..'), cfg.TEST.MODEL_PATH) ssl_model = cu.load_checkpoint(ssl_checkpoint_file, ssl_model) # Construct the optimizer optimizer = optim.construct_optimizer(cfg, model) print("optimizer: {}\n".format(optimizer)) logger.info("optimizer: {}\n".format(optimizer)) # This is to seamlessly use the code originally written for AL episodes cfg.EPISODE_DIR = cfg.EXP_DIR # Train model print("======== EVALUATOR TRAINING ========") logger.info("======== EVALUATOR TRAINING ========") _, _, eval_checkpoint_file = train_model(trainSet_loader, None, model, ssl_model, optimizer, cfg) # eval_checkpoint_file = os.path.join(os.path.abspath('..'), '') # Test best model checkpoint print("======== EVALUATOR TESTING ========\n") logger.info("======== EVALUATOR TESTING ========\n") test_acc = test_model(trainSet_loader, eval_checkpoint_file, ssl_checkpoint_file, cfg, cur_episode=1) print("Test Accuracy: {}.\n".format(round(test_acc, 4))) logger.info("Test Accuracy {}.\n".format(test_acc)) print("================================\n\n") logger.info("================================\n\n")