Esempio n. 1
0
 def build_validation_data_loader(self) -> det_torch.DataLoader:
     eval_dataset = self.tokenized_datasets["validation"]
     return det_torch.DataLoader(
         eval_dataset,
         batch_size=self.context.get_per_slot_batch_size(),
         collate_fn=self.collator,
     )
    def build_validation_data_loader(self) -> torch.utils.data.DataLoader:
        if self.hparams["dataloader_type"] == "determined":
            return pytorch.DataLoader(
                OnesDataset(),
                batch_size=self.context.get_per_slot_batch_size())
        elif self.hparams["dataloader_type"] == "torch":
            dataset = OnesDataset()

            num_workers = self.context.distributed.get_size()
            rank = self.context.distributed.get_rank()
            batch_size = self.context.get_per_slot_batch_size()

            sampler = torch.utils.data.SequentialSampler(dataset)
            sampler = samplers.DistributedSampler(sampler,
                                                  num_workers=num_workers,
                                                  rank=rank)
            batch_sampler = torch.utils.data.BatchSampler(sampler,
                                                          batch_size,
                                                          drop_last=False)

            return torch.utils.data.DataLoader(dataset,
                                               batch_sampler=batch_sampler)
        else:
            raise ValueError(
                f"unknown dataloader_type: {self.hparams['dataloader_type']}")
    def build_training_data_loader(self) -> torch.utils.data.DataLoader:
        if self.hparams["dataloader_type"] == "determined":
            return pytorch.DataLoader(
                OnesDataset(),
                batch_size=self.context.get_per_slot_batch_size())
        elif self.hparams["dataloader_type"] == "torch":
            dataset = OnesDataset()

            seed = self.context.get_trial_seed()
            num_workers = self.context.distributed.get_size()
            rank = self.context.distributed.get_rank()
            batch_size = self.context.get_per_slot_batch_size()
            skip_batches = self.context.get_initial_batch()

            sampler = torch.utils.data.SequentialSampler(dataset)
            sampler = samplers.ReproducibleShuffleSampler(sampler, seed)
            sampler = samplers.RepeatSampler(sampler)
            sampler = samplers.DistributedSampler(sampler,
                                                  num_workers=num_workers,
                                                  rank=rank)
            batch_sampler = torch.utils.data.BatchSampler(sampler,
                                                          batch_size,
                                                          drop_last=False)
            batch_sampler = samplers.SkipBatchSampler(batch_sampler,
                                                      skip_batches)

            return torch.utils.data.DataLoader(dataset,
                                               batch_sampler=batch_sampler)
        else:
            raise ValueError(
                f"unknown dataloader_type: {self.hparams['dataloader_type']}")
 def build_validation_data_loader(self) -> det_torch.DataLoader:
     return det_torch.DataLoader(
         self.tokenized_datasets["validation"],
         batch_size=self.context.get_per_slot_batch_size(),
         collate_fn=transformers.DataCollatorForTokenClassification(
             self.tokenizer),
     )
Esempio n. 5
0
def xor_data_loader(batch_size: int) -> pytorch.DataLoader:
    training_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]],
                             dtype=np.float32)
    training_data = torch.Tensor(training_data)
    training_labels = np.array([0, 1, 1, 0], dtype=np.float32)
    training_labels = torch.Tensor(training_labels)
    training = TensorDataset(training_data, training_labels)
    return pytorch.DataLoader(training, batch_size=batch_size)
Esempio n. 6
0
 def build_validation_data_loader(self) -> det_torch.DataLoader:
     # Determined's distributed batch sampler interleaves shards on each GPU slot so
     # sample i goes to worker with rank i % world_size.  Therefore, we need to re-sort
     # all the samples once we gather the predictions before computing the validation metric.
     return det_torch.DataLoader(
         qa_utils.DatasetWithIndex(self.tokenized_datasets["validation"]),
         batch_size=self.context.get_per_slot_batch_size(),
         collate_fn=self.collator,
     )
Esempio n. 7
0
 def build_validation_data_loader(self) -> det_torch.DataLoader:
     eval_dataset = self.tokenized_datasets["validation_matched" if self.
                                            hparams.finetuning_task ==
                                            "mnli" else "validation"]
     return det_torch.DataLoader(
         eval_dataset,
         batch_size=self.context.get_per_slot_batch_size(),
         collate_fn=self.collator,
     )
Esempio n. 8
0
 def build_validation_data_loader(
     self, ) -> Union[pytorch.DataLoader, torch.utils.data.DataLoader]:
     dataset = LinearDataset(1, 1,
                             self.ds_config.train_micro_batch_size_per_gpu)
     dataloader = pytorch.DataLoader(
         dataset, batch_size=self.ds_config.train_micro_batch_size_per_gpu)
     if self.hparams.test_manual_dataloader or self.hparams.test_fail_dataset_repro_check:
         return dataloader.get_data_loader(repeat=True)
     return dataloader
Esempio n. 9
0
 def build_training_data_loader(
         self) -> Union[pytorch.DataLoader, torch.utils.data.DataLoader]:
     dataset = LinearDataset(1, 1, self.ds_config.train_batch_size * 2)
     dataloader = pytorch.DataLoader(
         dataset, batch_size=self.ds_config.train_micro_batch_size_per_gpu)
     if self.hparams.test_manual_dataloader or self.hparams.test_fail_dataset_repro_check:
         return ds_dataloader.RepeatingLoader(
             torch.utils.data.DataLoader(
                 dataset,
                 batch_size=self.ds_config.train_micro_batch_size_per_gpu))
     return dataloader
Esempio n. 10
0
def build_dataloader(
    cfg: mmcv.Config,
    split: "str",
    context: det_torch.PyTorchTrialContext,
    shuffle: bool,
) -> Tuple[torch_data.Dataset, det_torch.DataLoader]:
    """
    Build the dataset and dataloader according to cfg and sampler parameters.

    Arguments:
        cfg: mmcv.Config with dataset specifications.
        split: one of train, val, or test. If val or test, annotations are not loaded.
        context: PyTorchTrialContext with seed info used to seed the dataloader workers.
        shuffle: whether to shuffle indices for data loading.
    Returns:
        dataset and dataloader
    """
    assert split in ["train", "val", "test"
                     ], "argument split must be one of train, val, or test."
    num_samples_per_gpu = context.get_per_slot_batch_size()
    num_replicas = context.distributed.get_size()
    num_workers = cfg.workers_per_gpu
    test_mode = False if split == "train" else True

    cfg = eval(f"cfg.{split}")
    maybe_download_ann_file(cfg)

    dataset = mmdet.datasets.build_dataset(cfg, {"test_mode": test_mode})
    if test_mode:
        dataset = DatasetWithIndex(dataset)
    sampler = GroupSampler(dataset, num_samples_per_gpu,
                           num_replicas) if shuffle else None

    return dataset, det_torch.DataLoader(
        dataset,
        batch_size=num_samples_per_gpu,
        num_workers=num_workers,
        sampler=sampler,
        collate_fn=functools.partial(mmcv.parallel.collate,
                                     samples_per_gpu=num_samples_per_gpu),
        pin_memory=False,
        worker_init_fn=functools.partial(
            mmdet.datasets.builder.worker_init_fn,
            seed=context.get_trial_seed(),
            rank=context.distributed.get_rank(),
            num_workers=num_workers,
        ),
    )
Esempio n. 11
0
 def build_training_data_loader(self) -> pytorch.DataLoader:
     return pytorch.DataLoader(
         OnesDataset(), batch_size=self.context.get_per_slot_batch_size())
Esempio n. 12
0
 def build_training_data_loader(self) -> det_torch.DataLoader:
     return det_torch.DataLoader(
         self.tokenized_datasets["train"],
         batch_size=self.context.get_per_slot_batch_size(),
         collate_fn=self.collator,
     )
Esempio n. 13
0
 def build_validation_data_loader(self) -> pytorch.DataLoader:
     return pytorch.DataLoader(
         IdentityDataset(20),
         batch_size=self.context.get_per_slot_batch_size())
Esempio n. 14
0
 def build_validation_data_loader(self) -> det_torch.DataLoader:
     return det_torch.DataLoader(
         self.tokenized_datasets["validation"],
         batch_size=self.context.get_per_slot_batch_size(),
         collate_fn=lambda x: self.collator(x).data,
     )
 def build_validation_data_loader(self) -> pytorch.DataLoader:
     return pytorch.DataLoader(
         self.dm.val_dataloader().dataset,
         batch_size=self.context.get_per_slot_batch_size())
Esempio n. 16
0
 def build_validation_data_loader(self):
     return pytorch.DataLoader(
         OnesDataset(), batch_size=self.context.get_per_slot_batch_size())
 def build_validation_data_loader(self):
     # TODO: use Determined's Sampler API + pytorch_geomtric's DataLoader
     return pytorch.DataLoader(
         OnesDataset(), batch_size=self.context.get_per_slot_batch_size())