Ejemplo n.º 1
0
def build_data_loader(cfg, split="train", is_distributed=False, start_iter=0):
    num_gpus = get_world_size()
    num_im = cfg.DATASET.NUM_TRAIN_IMAGES if split=="train" else cfg.DATASET.NUM_TEST_IMAGES
    if (cfg.DATASET.NAME == "vg" or cfg.DATASET.NAME == "coco") and cfg.DATASET.MODE == "benchmark":
        transforms = build_transforms(cfg, is_train=True if split=="train" else False)
        dataset = vg_hdf5(cfg, split=split, transforms=transforms, num_im=num_im)
        sampler = make_data_sampler(dataset, True if split == "train" else False, is_distributed)
        images_per_batch = cfg.DATASET.TRAIN_BATCH_SIZE if split == "train" else cfg.DATASET.TEST_BATCH_SIZE
        if get_rank() == 0:
            print("images_per_batch: {}, num_gpus: {}".format(images_per_batch, num_gpus))
        images_per_gpu = images_per_batch // num_gpus if split == "train" else images_per_batch
        start_iter = start_iter if split == "train" else 0
        num_iters = cfg.SOLVER.MAX_ITER if split == "train" else None
        aspect_grouping = [1] if cfg.DATASET.ASPECT_RATIO_GROUPING else []
        batch_sampler = make_batch_data_sampler(
            dataset, sampler, aspect_grouping, images_per_gpu, num_iters, start_iter
        )
        collator = BatchCollator(cfg.DATASET.SIZE_DIVISIBILITY)
        dataloader = data.DataLoader(dataset,
                num_workers=images_per_batch,
                batch_sampler=batch_sampler,
                collate_fn=collator,
            )
        return dataloader
    else:
        raise NotImplementedError("Unsupported dataset {}.".format(cfg.DATASET.NAME))
Ejemplo n.º 2
0
def reduce_loss_dict(loss_dict):
    """
    Reduce the loss dictionary from all processes so that process with rank
    0 has the averaged results. Returns a dict with the same fields as
    loss_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return loss_dict
    with torch.no_grad():
        loss_names = []
        all_losses = []
        for k in sorted(loss_dict.keys()):
            loss_names.append(k)
            all_losses.append(loss_dict[k])
        all_losses = torch.stack(all_losses, dim=0)
        dist.reduce(all_losses, dst=0)
        if dist.get_rank() == 0:
            # only main process gets accumulated, so only divide by
            # world_size in this case
            all_losses /= world_size
        reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
    return reduced_losses
Ejemplo n.º 3
0
def build_data_loader(cfg,
                      split="train",
                      num_im=-1,
                      is_distributed=False,
                      start_iter=0):
    num_gpus = get_world_size()
    if cfg.DATASET.NAME == "vg" and cfg.DATASET.MODE == "benchmark":
        transforms = build_transforms(
            cfg, is_train=True if split == "train" else False)
        dataset = vg_hdf5(cfg,
                          split=split,
                          transforms=transforms,
                          num_im=num_im)

        sampler = make_data_sampler(dataset,
                                    True if split == "train" else False,
                                    is_distributed)

        images_per_batch = cfg.DATASET.TRAIN_BATCH_SIZE if split == "train" else cfg.DATASET.TEST_BATCH_SIZE
        if get_rank() == 0:
            print("images_per_batch: {}, num_gpus: {}".format(
                images_per_batch, num_gpus))
        images_per_gpu = images_per_batch // num_gpus if split == "train" else images_per_batch
        start_iter = start_iter if split == "train" else 0
        num_iters = cfg.SOLVER.MAX_ITER if split == "train" else None
        aspect_grouping = [1] if cfg.DATASET.ASPECT_RATIO_GROUPING else []
        batch_sampler = make_batch_data_sampler(dataset, sampler,
                                                aspect_grouping,
                                                images_per_gpu, num_iters,
                                                start_iter)
        collator = BatchCollator(cfg.DATASET.SIZE_DIVISIBILITY)
        dataloader = data.DataLoader(
            dataset,
            num_workers=images_per_batch,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        return dataloader
    elif cfg.DATASET.NAME == "vcr" and cfg.DATASET.MODE == "benchmark":
        transforms = build_transforms(cfg, is_train=False)

        # build Dataset
        dataset = vcr_hdf5(cfg, split=split, transforms=transforms)

        # build DataSampler
        # sequential Sampler, non-distributed
        sampler = make_data_sampler(dataset, False, False)

        images_per_batch = cfg.DATASET.TEST_BATCH_SIZE
        images_per_gpu = images_per_batch

        start_iter = 0
        num_iters = None
        #aspect_grouping = [1] if cfg.DATASET.ASPECT_RATIO_GROUPING else []
        aspect_grouping = []

        # batch random pictures or group them based on the aspect
        batch_sampler = make_batch_data_sampler(dataset, sampler,
                                                aspect_grouping,
                                                images_per_gpu, num_iters,
                                                start_iter)
        #collator = BatchCollator(cfg.DATASET.SIZE_DIVISIBILITY)

        # build DataLoader
        dataloader = data.DataLoader(dataset,
                                     num_workers=0,
                                     batch_sampler=batch_sampler)
        #collate_fn=collator)

        return dataloader
    else:
        raise NotImplementedError("Unsupported dataset {}.".format(
            cfg.DATASET.NAME))
Ejemplo n.º 4
0
def build_data_loader(cfg,
                      split="train",
                      num_im=-1,
                      is_distributed=False,
                      start_iter=0):
    num_gpus = get_world_size()
    if cfg.DATASET.NAME == "vg" and cfg.DATASET.MODE == "benchmark":
        transforms = build_transforms(
            cfg, is_train=True if split == "train" else False)
        dataset = vg_hdf5(cfg,
                          split=split,
                          transforms=transforms,
                          num_im=num_im)
        sampler = make_data_sampler(dataset,
                                    True if split == "train" else False,
                                    is_distributed)
        images_per_batch = cfg.DATASET.TRAIN_BATCH_SIZE if split == "train" else cfg.DATASET.TEST_BATCH_SIZE
        if get_rank() == 0:
            print("images_per_batch: {}, num_gpus: {}".format(
                images_per_batch, num_gpus))
        images_per_gpu = images_per_batch // num_gpus if split == "train" else images_per_batch
        start_iter = start_iter if split == "train" else 0
        num_iters = cfg.SOLVER.MAX_ITER if split == "train" else None
        aspect_grouping = [1] if cfg.DATASET.ASPECT_RATIO_GROUPING else []
        batch_sampler = make_batch_data_sampler(dataset, sampler,
                                                aspect_grouping,
                                                images_per_gpu, num_iters,
                                                start_iter)
        collator = BatchCollator(cfg.DATASET.SIZE_DIVISIBILITY)
        dataloader = data.DataLoader(
            dataset,
            num_workers=images_per_batch,
            batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        return dataloader
    elif cfg.DATASET.NAME == "refcoco":
        transforms = m_build_transforms(
            cfg, is_train=True if split == "train" else False)
        # TODO: replace this w/t refCOCO dataset class
        dataset = RefCOCO(split=split, transforms=transforms)
        sampler = make_data_sampler(dataset,
                                    True if split == "train" else False,
                                    is_distributed)
        images_per_batch = cfg.DATASET.TRAIN_BATCH_SIZE if split == "train" else cfg.DATASET.TEST_BATCH_SIZE
        if get_rank() == 0:
            print("images_per_batch: {}, num_gpus: {}".format(
                images_per_batch, num_gpus))
        images_per_gpu = images_per_batch // num_gpus if split == "train" else images_per_batch
        #start_iter = start_iter if split == "train" else 0
        #num_iters = cfg.SOLVER.MAX_ITER if split == "train" else None
        #aspect_grouping = [1] if cfg.DATASET.ASPECT_RATIO_GROUPING else []
        #batch_sampler = make_batch_data_sampler(
        #    dataset, sampler, aspect_grouping, images_per_gpu, num_iters, start_iter
        #)
        collator = BatchCollator(cfg.DATASET.SIZE_DIVISIBILITY)
        dataloader = data.DataLoader(
            dataset,
            num_workers=images_per_batch,
            shuffle=False,
            #batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        return dataloader
    elif cfg.DATASET.NAME == "mini":
        transforms = m_build_transforms(
            cfg, is_train=True if split == "train" else False)
        # TODO: replace this w/t refCOCO dataset class
        dataset = MiniDataset(transform=transforms)
        sampler = make_data_sampler(dataset,
                                    True if split == "train" else False,
                                    is_distributed)
        images_per_batch = cfg.DATASET.TRAIN_BATCH_SIZE if split == "train" else cfg.DATASET.TEST_BATCH_SIZE
        if get_rank() == 0:
            print("images_per_batch: {}, num_gpus: {}".format(
                images_per_batch, num_gpus))
        images_per_gpu = images_per_batch // num_gpus if split == "train" else images_per_batch
        collator = BatchCollator(cfg.DATASET.SIZE_DIVISIBILITY)
        dataloader = data.DataLoader(
            dataset,
            num_workers=images_per_batch,
            shuffle=False,
            #batch_sampler=batch_sampler,
            collate_fn=collator,
        )
        return dataloader
    else:
        raise NotImplementedError("Unsupported dataset {}.".format(
            cfg.DATASET.NAME))