def create_dataloaders(config):
    print('Loading Dataset...')
    train_dataset = get_training_dataset(config.dataset, config.train_anno, config.train_imgs, config)
    print("Loaded {} training images".format(len(train_dataset)))
    if config.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset,
                                                                        num_replicas=config.ngpus_per_node,
                                                                        rank=config.rank)
    else:
        train_sampler = None
    train_data_loader = data.DataLoader(
        train_dataset, config.batch_size,
        num_workers=config.workers,
        shuffle=(train_sampler is None),
        collate_fn=detection_collate,
        pin_memory=True,
        sampler=train_sampler
    )
    test_dataset = get_testing_dataset(config.dataset, config.test_anno, config.test_imgs, config)
    print("Loaded {} testing images".format(len(test_dataset)))
    if config.distributed:
        test_sampler = DistributedSampler(test_dataset, config.rank, config.world_size)
    else:
        test_sampler = None
    test_data_loader = data.DataLoader(
        test_dataset, config.batch_size,
        num_workers=config.workers,
        shuffle=False,
        collate_fn=detection_collate,
        pin_memory=True,
        drop_last=False,
        sampler=test_sampler
    )
    return test_data_loader, train_data_loader
Exemple #2
0
def create_dataloaders(config):
    logger.info('Loading Dataset...')
    train_dataset = get_training_dataset(config.dataset, config.train_anno,
                                         config.train_imgs, config)
    logger.info("Loaded {} training images".format(len(train_dataset)))
    if config.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset,
            num_replicas=config.ngpus_per_node,
            rank=config.rank)
    else:
        train_sampler = None

    def create_train_data_loader(batch_size):
        return data.DataLoader(train_dataset,
                               batch_size,
                               num_workers=config.workers,
                               shuffle=(train_sampler is None),
                               collate_fn=detection_collate,
                               pin_memory=True,
                               sampler=train_sampler)

    train_data_loader = create_train_data_loader(config.batch_size)
    if config.batch_size_init:
        init_data_loader = create_train_data_loader(config.batch_size_init)
    else:
        init_data_loader = deepcopy(train_data_loader)
    if config.distributed:
        init_data_loader.num_workers = 0  # PyTorch multiprocessing dataloader issue WA

    test_dataset = get_testing_dataset(config.dataset, config.test_anno,
                                       config.test_imgs, config)
    logger.info("Loaded {} testing images".format(len(test_dataset)))
    if config.distributed:
        test_sampler = DistributedSampler(test_dataset, config.rank,
                                          config.world_size)
    else:
        test_sampler = torch.utils.data.SequentialSampler(test_dataset)
    test_data_loader = data.DataLoader(test_dataset,
                                       config.batch_size,
                                       num_workers=config.workers,
                                       shuffle=False,
                                       collate_fn=detection_collate,
                                       pin_memory=True,
                                       drop_last=False,
                                       sampler=test_sampler)
    return test_data_loader, train_data_loader, init_data_loader