def main(cfg): # Setting up GPU args use_cuda = (cfg.NUM_GPUS > 0) and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = { 'num_workers': cfg.DATA_LOADER.NUM_WORKERS, 'pin_memory': cfg.DATA_LOADER.PIN_MEMORY } if use_cuda else {} # Using specific GPU os.environ['CUDA_VISIBLE_DEVICES'] = str(cfg.GPU_ID) print("Using GPU : {}.\n".format(cfg.GPU_ID)) # Getting the output directory ready (default is "/output") cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR) if not os.path.exists(cfg.OUT_DIR): os.mkdir(cfg.OUT_DIR) # Create "DATASET" specific directory dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME) if not os.path.exists(dataset_out_dir): os.mkdir(dataset_out_dir) # Creating the experiment directory inside the dataset specific directory # all logs, labeled, unlabeled, validation sets are stroed here # E.g., output/CIFAR10/{timestamp or cfg.EXP_NAME based on arguments passed} if cfg.EXP_NAME == 'auto': now = datetime.now() exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}' else: exp_dir = cfg.EXP_NAME exp_dir = os.path.join(dataset_out_dir, exp_dir) if not os.path.exists(exp_dir): os.mkdir(exp_dir) print("Experiment Directory is {}.\n".format(exp_dir)) else: print( "Experiment Directory Already Exists: {}. Reusing it may lead to loss of old logs in the directory.\n" .format(exp_dir)) cfg.EXP_DIR = exp_dir # Setup Logger lu.setup_logging(cfg) # Dataset preparing steps print("\n======== PREPARING DATA AND MODEL ========\n") cfg.DATASET.ROOT_DIR = os.path.join(os.path.abspath('..'), cfg.DATASET.ROOT_DIR) data_obj = Data(cfg) train_data, train_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=True, isDownload=True) test_data, test_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=False, isDownload=True) print( "\nDataset {} Loaded Sucessfully.\nTotal Train Size: {} and Total Test Size: {}\n" .format(cfg.DATASET.NAME, train_size, test_size)) logger.info( "Dataset {} Loaded Sucessfully. Total Train Size: {} and Total Test Size: {}\n" .format(cfg.DATASET.NAME, train_size, test_size)) trainSet_path, valSet_path = data_obj.makeTVSets(train_split_ratio=cfg.ACTIVE_LEARNING.INIT_RATIO, \ val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data, seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR) trainSet, valSet = data_obj.loadTVPartitions(trainSetPath=trainSet_path, valSetPath=valSet_path) print("Data Partitioning Complete. \nTrain Set: {}, Validation Set: {}\n". format(len(trainSet), len(valSet))) logger.info("\nTrain Set: {}, Validation Set: {}\n".format( len(trainSet), len(valSet))) # Preparing dataloaders for initial training trainSet_loader = data_obj.getIndexesDataLoader( indexes=trainSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) valSet_loader = data_obj.getIndexesDataLoader( indexes=valSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) test_loader = data_obj.getTestLoader(data=test_data, test_batch_size=cfg.TRAIN.BATCH_SIZE, seed_id=cfg.RNG_SEED) # Initialize the models num_ensembles = cfg.ENSEMBLE.NUM_MODELS models = [] for i in range(num_ensembles): models.append(model_builder.build_model(cfg)) print("{} ensemble models of type: {}\n".format(cfg.ENSEMBLE.NUM_MODELS, cfg.ENSEMBLE.MODEL_TYPE)) logger.info("{} ensemble models of type: {}\n".format( cfg.ENSEMBLE.NUM_MODELS, cfg.ENSEMBLE.MODEL_TYPE)) # This is to seamlessly use the code originally written for AL episodes cfg.EPISODE_DIR = cfg.EXP_DIR # Train models print("======== ENSEMBLE TRAINING ========") logger.info("======== ENSEMBLE TRAINING ========") best_model_paths = [] test_accs = [] for i in range(num_ensembles): print("=== Training ensemble [{}/{}] ===".format(i + 1, num_ensembles)) # Construct the optimizer optimizer = optim.construct_optimizer(cfg, models[i]) print("optimizer: {}\n".format(optimizer)) logger.info("optimizer: {}\n".format(optimizer)) # Each ensemble gets its own output directory cfg.EPISODE_DIR = os.path.join(cfg.EPISODE_DIR, 'model_{} '.format(i + 1)) # Train the model best_val_acc, best_val_epoch, checkpoint_file = ensemble_train_model( trainSet_loader, valSet_loader, models[i], optimizer, cfg) best_model_paths.append(checkpoint_file) print("Best Validation Accuracy by Model {}: {}\nBest Epoch: {}\n". format(i + 1, round(best_val_acc, 4), best_val_epoch)) logger.info( "Best Validation Accuracy by Model {}: {}\tBest Epoch: {}\n". format(i + 1, round(best_val_acc, 4), best_val_epoch)) # Test the model print("=== Testing ensemble [{}/{}] ===".format(i + 1, num_ensembles)) test_acc = ensemble_test_model(test_loader, checkpoint_file, cfg, cur_episode=0) test_accs.append(test_acc) print("Test Accuracy by Model {}: {}.\n".format( i + 1, round(test_acc, 4))) logger.info("Test Accuracy by Model {}: {}.\n".format(i + 1, test_acc)) # Reset EPISODE_DIR cfg.EPISODE_DIR = cfg.EXP_DIR # Test each best model checkpoint and report the average print("======== ENSEMBLE TESTING ========\n") logger.info("======== ENSEMBLE TESTING ========\n") mean_test_acc = np.mean(test_accs) print("Average Ensemble Test Accuracy: {}.\n".format( round(mean_test_acc, 4))) logger.info("Average Ensemble Test Accuracy: {}.\n".format(mean_test_acc)) print("================================\n\n") logger.info("================================\n\n")
def main(cfg): # Setting up GPU args use_cuda = (cfg.NUM_GPUS > 0) and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = { 'num_workers': cfg.DATA_LOADER.NUM_WORKERS, 'pin_memory': cfg.DATA_LOADER.PIN_MEMORY } if use_cuda else {} # Using specific GPU os.environ['NVIDIA_VISIBLE_DEVICES'] = str(cfg.GPU_ID) os.environ['CUDA_VISIBLE_DEVICES'] = '0' print("Using GPU : {}.\n".format(cfg.GPU_ID)) # Getting the output directory ready (default is "/output") cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR) if not os.path.exists(cfg.OUT_DIR): os.makedirs(cfg.OUT_DIR) # Create "DATASET" specific directory dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME, cfg.MODEL.TYPE) if not os.path.exists(dataset_out_dir): os.makedirs(dataset_out_dir) # Creating the experiment directory inside the dataset specific directory # all logs, labeled, unlabeled, validation sets are stroed here # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed} if cfg.EXP_NAME == 'auto': now = datetime.now() exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}' else: exp_dir = cfg.EXP_NAME exp_dir = os.path.join(dataset_out_dir, exp_dir) if not os.path.exists(exp_dir): os.mkdir(exp_dir) print("Experiment Directory is {}.\n".format(exp_dir)) else: print( "Experiment Directory Already Exists: {}. Reusing it may lead to loss of old logs in the directory.\n" .format(exp_dir)) cfg.EXP_DIR = exp_dir # Setup Logger lu.setup_logging(cfg) # Dataset preparing steps print("\n======== PREPARING DATA AND MODEL ========\n") cfg.DATASET.ROOT_DIR = os.path.join(os.path.abspath('..'), cfg.DATASET.ROOT_DIR) data_obj = Data(cfg) train_data, train_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR, isTrain=True, isDownload=True) train_data = RotNetDataset(cfg.DATASET.NAME, train_data) train_size = len(train_data) print("\n Rotation Dataset {} Loaded Sucessfully.\nTotal Train Size: {}\n". format(cfg.DATASET.NAME, train_size)) logger.info( "Rotation Dataset {} Loaded Sucessfully. Total Train Size: {}\n". format(cfg.DATASET.NAME, train_size)) trainSet_path, valSet_path = data_obj.makeTVSets(val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data,\ seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR) cfg.INIT_POOL.TRAINSET_PATH = trainSet_path cfg.INIT_POOL.VALSET_PATH = valSet_path trainSet, valSet = data_obj.loadTVPartitions( trainSetPath=cfg.INIT_POOL.TRAINSET_PATH, valSetPath=cfg.INIT_POOL.VALSET_PATH) print("Data Partitioning Complete. \nTrain Set: {}, Validation Set: {}\n". format(len(trainSet), len(valSet))) logger.info("Train Set: {}, Validation Set: {}\n".format( len(trainSet), len(valSet))) # Preparing dataloaders for initial training trainSet_loader = data_obj.getSequentialDataLoader( indexes=trainSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) valSet_loader = data_obj.getSequentialDataLoader( indexes=valSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data) # Initialize the model. model = model_builder.build_model(cfg) print("model: {}\n".format(cfg.MODEL.TYPE)) logger.info("model: {}\n".format(cfg.MODEL.TYPE)) # Construct the optimizer optimizer = optim.construct_optimizer(cfg, model) print("optimizer: {}\n".format(optimizer)) logger.info("optimizer: {}\n".format(optimizer)) # This is to seamlessly use the code originally written for AL episodes cfg.EPISODE_DIR = cfg.EXP_DIR # Train model print("======== ROTATION TRAINING ========") logger.info("======== ROTATION TRAINING ========") best_val_acc, best_val_epoch, checkpoint_file = train_model( trainSet_loader, valSet_loader, model, optimizer, cfg) print("Best Validation Accuracy: {}\nBest Epoch: {}\n".format( round(best_val_acc, 4), best_val_epoch)) logger.info("Best Validation Accuracy: {}\tBest Epoch: {}\n".format( round(best_val_acc, 4), best_val_epoch)) # Test best model checkpoint print("======== ROTATION TESTING ========\n") logger.info("======== ROTATION TESTING ========\n") test_acc = test_model(trainSet_loader, checkpoint_file, cfg, cur_episode=1) print("Test Accuracy: {}.\n".format(round(test_acc, 4))) logger.info("Test Accuracy {}.\n".format(test_acc)) print("================================\n\n") logger.info("================================\n\n")