Ejemplo n.º 1
0
def main(cfg):

    # Setting up GPU args
    use_cuda = (cfg.NUM_GPUS > 0) and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {
        'num_workers': cfg.DATA_LOADER.NUM_WORKERS,
        'pin_memory': cfg.DATA_LOADER.PIN_MEMORY
    } if use_cuda else {}

    # Auto assign a RNG_SEED when not supplied a value
    if cfg.RNG_SEED is None:
        cfg.RNG_SEED = np.random.randint(100)

    # Using specific GPU
    # os.environ['NVIDIA_VISIBLE_DEVICES'] = str(cfg.GPU_ID)
    # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    # print("Using GPU : {}.\n".format(cfg.GPU_ID))

    # Getting the output directory ready (default is "/output")
    cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR)
    if not os.path.exists(cfg.OUT_DIR):
        os.mkdir(cfg.OUT_DIR)
    # Create "DATASET/MODEL TYPE" specific directory
    dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME,
                                   cfg.MODEL.TYPE)
    if not os.path.exists(dataset_out_dir):
        os.makedirs(dataset_out_dir)
    # Creating the experiment directory inside the dataset specific directory
    # all logs, labeled, unlabeled, validation sets are stroed here
    # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed}
    if cfg.EXP_NAME == 'auto':
        now = datetime.now()
        exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}'
    else:
        exp_dir = cfg.EXP_NAME

    exp_dir = os.path.join(dataset_out_dir, exp_dir)
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)
        print("Experiment Directory is {}.\n".format(exp_dir))
    else:
        print(
            "Experiment Directory Already Exists: {}. Reusing it may lead to loss of old logs in the directory.\n"
            .format(exp_dir))
    cfg.EXP_DIR = exp_dir

    # Save the config file in EXP_DIR
    dump_cfg(cfg)

    # Setup Logger
    lu.setup_logging(cfg)

    # Dataset preparing steps
    print("\n======== PREPARING DATA AND MODEL ========\n")
    cfg.DATASET.ROOT_DIR = os.path.join(os.path.abspath('..'),
                                        cfg.DATASET.ROOT_DIR)
    data_obj = Data(cfg)
    train_data, train_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR,
                                                 isTrain=True,
                                                 isDownload=True)
    test_data, test_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR,
                                               isTrain=False,
                                               isDownload=True)

    print(
        "\nDataset {} Loaded Sucessfully.\nTotal Train Size: {} and Total Test Size: {}\n"
        .format(cfg.DATASET.NAME, train_size, test_size))
    logger.info(
        "Dataset {} Loaded Sucessfully. Total Train Size: {} and Total Test Size: {}\n"
        .format(cfg.DATASET.NAME, train_size, test_size))

    lSet_path, uSet_path, valSet_path = data_obj.makeLUVSets(train_split_ratio=cfg.ACTIVE_LEARNING.INIT_L_RATIO, \
        val_split_ratio=cfg.DATASET.VAL_RATIO, data=train_data, seed_id=cfg.RNG_SEED, save_dir=cfg.EXP_DIR)

    cfg.ACTIVE_LEARNING.LSET_PATH = lSet_path
    cfg.ACTIVE_LEARNING.USET_PATH = uSet_path
    cfg.ACTIVE_LEARNING.VALSET_PATH = valSet_path

    lSet, uSet, valSet = data_obj.loadPartitions(lSetPath=cfg.ACTIVE_LEARNING.LSET_PATH, \
            uSetPath=cfg.ACTIVE_LEARNING.USET_PATH, valSetPath = cfg.ACTIVE_LEARNING.VALSET_PATH)

    print(
        "Data Partitioning Complete. \nLabeled Set: {}, Unlabeled Set: {}, Validation Set: {}\n"
        .format(len(lSet), len(uSet), len(valSet)))
    logger.info(
        "Labeled Set: {}, Unlabeled Set: {}, Validation Set: {}\n".format(
            len(lSet), len(uSet), len(valSet)))

    # Preparing dataloaders for initial training
    lSet_loader = data_obj.getIndexesDataLoader(
        indexes=lSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data)
    valSet_loader = data_obj.getIndexesDataLoader(
        indexes=valSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data)
    test_loader = data_obj.getTestLoader(data=test_data,
                                         test_batch_size=cfg.TRAIN.BATCH_SIZE,
                                         seed_id=cfg.RNG_SEED)

    # Initialize the model.
    model = model_builder.build_model(cfg)
    print("model: {}\n".format(cfg.MODEL.TYPE))
    logger.info("model: {}\n".format(cfg.MODEL.TYPE))

    # Construct the optimizer
    optimizer = optim.construct_optimizer(cfg, model)
    print("optimizer: {}\n".format(optimizer))
    logger.info("optimizer: {}\n".format(optimizer))

    print("AL Query Method: {}\nMax AL Episodes: {}\n".format(
        cfg.ACTIVE_LEARNING.SAMPLING_FN, cfg.ACTIVE_LEARNING.MAX_ITER))
    logger.info("AL Query Method: {}\nMax AL Episodes: {}\n".format(
        cfg.ACTIVE_LEARNING.SAMPLING_FN, cfg.ACTIVE_LEARNING.MAX_ITER))

    for cur_episode in range(0, cfg.ACTIVE_LEARNING.MAX_ITER + 1):

        print("======== EPISODE {} BEGINS ========\n".format(cur_episode))
        logger.info(
            "======== EPISODE {} BEGINS ========\n".format(cur_episode))

        # Creating output directory for the episode
        episode_dir = os.path.join(cfg.EXP_DIR, f'episode_{cur_episode}')
        if not os.path.exists(episode_dir):
            os.mkdir(episode_dir)
        cfg.EPISODE_DIR = episode_dir

        # Train model
        print("======== TRAINING ========")
        logger.info("======== TRAINING ========")

        best_val_acc, best_val_epoch, checkpoint_file = train_model(
            lSet_loader, valSet_loader, model, optimizer, cfg)

        print("Best Validation Accuracy: {}\nBest Epoch: {}\n".format(
            round(best_val_acc, 4), best_val_epoch))
        logger.info(
            "EPISODE {} Best Validation Accuracy: {}\tBest Epoch: {}\n".format(
                cur_episode, round(best_val_acc, 4), best_val_epoch))

        # Test best model checkpoint
        print("======== TESTING ========\n")
        logger.info("======== TESTING ========\n")
        test_acc = test_model(test_loader, checkpoint_file, cfg, cur_episode)
        print("Test Accuracy: {}.\n".format(round(test_acc, 4)))
        logger.info("EPISODE {} Test Accuracy {}.\n".format(
            cur_episode, test_acc))

        # No need to perform active sampling in the last episode iteration
        if cur_episode == cfg.ACTIVE_LEARNING.MAX_ITER:
            # Save current lSet, uSet in the final episode directory
            data_obj.saveSet(lSet, 'lSet', cfg.EPISODE_DIR)
            data_obj.saveSet(uSet, 'uSet', cfg.EPISODE_DIR)
            break

        # Active Sample
        print("======== ACTIVE SAMPLING ========\n")
        logger.info("======== ACTIVE SAMPLING ========\n")
        al_obj = ActiveLearning(data_obj, cfg)
        clf_model = model_builder.build_model(cfg)
        clf_model = cu.load_checkpoint(checkpoint_file, clf_model)
        activeSet, new_uSet = al_obj.sample_from_uSet(clf_model, lSet, uSet,
                                                      train_data)

        # Save current lSet, new_uSet and activeSet in the episode directory
        data_obj.saveSets(lSet, uSet, activeSet, cfg.EPISODE_DIR)

        # Add activeSet to lSet, save new_uSet as uSet and update dataloader for the next episode
        lSet = np.append(lSet, activeSet)
        uSet = new_uSet

        lSet_loader = data_obj.getIndexesDataLoader(
            indexes=lSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data)
        valSet_loader = data_obj.getIndexesDataLoader(
            indexes=valSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data)
        uSet_loader = data_obj.getSequentialDataLoader(
            indexes=uSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data)

        print(
            "Active Sampling Complete. After Episode {}:\nNew Labeled Set: {}, New Unlabeled Set: {}, Active Set: {}\n"
            .format(cur_episode, len(lSet), len(uSet), len(activeSet)))
        logger.info(
            "Active Sampling Complete. After Episode {}:\nNew Labeled Set: {}, New Unlabeled Set: {}, Active Set: {}\n"
            .format(cur_episode, len(lSet), len(uSet), len(activeSet)))
        print("================================\n\n")
        logger.info("================================\n\n")
Ejemplo n.º 2
0
def main(cfg):

    # Login to wandb
    wandb.login()

    # Initialize a new wandb run
    wandb.init(project="rotation-pred", name=cfg.EXP_NAME)

    # Setting up GPU args
    use_cuda = (cfg.NUM_GPUS > 0) and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {
        'num_workers': cfg.DATA_LOADER.NUM_WORKERS,
        'pin_memory': cfg.DATA_LOADER.PIN_MEMORY
    } if use_cuda else {}

    # Using specific GPU
    os.environ['NVIDIA_VISIBLE_DEVICES'] = str(cfg.GPU_ID)
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    print("Using GPU : {}.\n".format(cfg.GPU_ID))

    # Getting the output directory ready (default is "/output")
    cfg.OUT_DIR = os.path.join(os.path.abspath('..'), cfg.OUT_DIR)
    if not os.path.exists(cfg.OUT_DIR):
        os.makedirs(cfg.OUT_DIR)
    # Create "DATASET" specific directory
    dataset_out_dir = os.path.join(cfg.OUT_DIR, cfg.DATASET.NAME,
                                   cfg.MODEL.TYPE)
    if not os.path.exists(dataset_out_dir):
        os.makedirs(dataset_out_dir)
    # Creating the experiment directory inside the dataset specific directory
    # all logs, labeled, unlabeled, validation sets are stroed here
    # E.g., output/CIFAR10/resnet18/{timestamp or cfg.EXP_NAME based on arguments passed}
    if cfg.EXP_NAME == 'auto':
        now = datetime.now()
        exp_dir = f'{now.year}_{now.month}_{now.day}_{now.hour}{now.minute}{now.second}'
    else:
        exp_dir = cfg.EXP_NAME

    exp_dir = os.path.join(dataset_out_dir, exp_dir)
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)
        print("Experiment Directory is {}.\n".format(exp_dir))
    else:
        print(
            "Experiment Directory Already Exists: {}. Reusing it may lead to loss of old logs in the directory.\n"
            .format(exp_dir))
    cfg.EXP_DIR = exp_dir

    # Setup Logger
    lu.setup_logging(cfg)

    # Dataset preparing steps
    print("\n======== PREPARING DATA AND SSL EVALUATION MODEL ========\n")
    cfg.DATASET.ROOT_DIR = os.path.join(os.path.abspath('..'),
                                        cfg.DATASET.ROOT_DIR)
    data_obj = Data(cfg)
    train_data, train_size = data_obj.getDataset(save_dir=cfg.DATASET.ROOT_DIR,
                                                 isTrain=True,
                                                 isDownload=True)
    trainSet = [i for i in range(train_size)]

    print("\n Rotation Dataset {} Loaded Sucessfully.\nTotal Train Size: {}\n".
          format(cfg.DATASET.NAME, train_size))
    logger.info(
        "Rotation Dataset {} Loaded Sucessfully. Total Train Size: {}\n".
        format(cfg.DATASET.NAME, train_size))

    trainSet_path = data_obj.saveSet(setArray=trainSet,
                                     setName='trainSet',
                                     save_dir=cfg.EXP_DIR)
    trainSet = data_obj.loadPartition(setPath=trainSet_path)

    # Preparing dataloaders for initial training
    trainSet_loader = data_obj.getSequentialDataLoader(
        indexes=trainSet, batch_size=cfg.TRAIN.BATCH_SIZE, data=train_data)

    # Initialize the evaluation model
    if cfg.MODEL.TYPE == 'linear':
        model = SSLEvaluator(n_input=cfg.MODEL.NUM_INPUT,
                             n_classes=cfg.MODEL.NUM_OUTPUT,
                             n_hidden=None)
    else:
        model = SSLEvaluator(n_input=cfg.MODEL.NUM_INPUT,
                             n_classes=cfg.MODEL.NUM_OUTPUT,
                             n_hidden=cfg.MODEL.NUM_HIDDEN)
    print("Evaluation model: {}\n".format(cfg.MODEL.EVAL))
    logger.info("Evalution model: {}\n".format(cfg.MODEL.EVAL))

    # Initialize the SSL model
    ssl_model = model_builder.build_model(cfg)
    ssl_checkpoint_file = os.path.join(os.path.abspath('..'),
                                       cfg.TEST.MODEL_PATH)
    ssl_model = cu.load_checkpoint(ssl_checkpoint_file, ssl_model)

    # Construct the optimizer
    optimizer = optim.construct_optimizer(cfg, model)
    print("optimizer: {}\n".format(optimizer))
    logger.info("optimizer: {}\n".format(optimizer))

    # This is to seamlessly use the code originally written for AL episodes
    cfg.EPISODE_DIR = cfg.EXP_DIR

    # Train model
    print("======== EVALUATOR TRAINING ========")
    logger.info("======== EVALUATOR TRAINING ========")

    _, _, eval_checkpoint_file = train_model(trainSet_loader, None, model,
                                             ssl_model, optimizer, cfg)

    # eval_checkpoint_file = os.path.join(os.path.abspath('..'), '')

    # Test best model checkpoint
    print("======== EVALUATOR TESTING ========\n")
    logger.info("======== EVALUATOR TESTING ========\n")

    test_acc = test_model(trainSet_loader,
                          eval_checkpoint_file,
                          ssl_checkpoint_file,
                          cfg,
                          cur_episode=1)
    print("Test Accuracy: {}.\n".format(round(test_acc, 4)))
    logger.info("Test Accuracy {}.\n".format(test_acc))

    print("================================\n\n")
    logger.info("================================\n\n")