def build_validation_data_loader(self) -> det_torch.DataLoader: eval_dataset = self.tokenized_datasets["validation"] return det_torch.DataLoader( eval_dataset, batch_size=self.context.get_per_slot_batch_size(), collate_fn=self.collator, )
def build_validation_data_loader(self) -> torch.utils.data.DataLoader: if self.hparams["dataloader_type"] == "determined": return pytorch.DataLoader( OnesDataset(), batch_size=self.context.get_per_slot_batch_size()) elif self.hparams["dataloader_type"] == "torch": dataset = OnesDataset() num_workers = self.context.distributed.get_size() rank = self.context.distributed.get_rank() batch_size = self.context.get_per_slot_batch_size() sampler = torch.utils.data.SequentialSampler(dataset) sampler = samplers.DistributedSampler(sampler, num_workers=num_workers, rank=rank) batch_sampler = torch.utils.data.BatchSampler(sampler, batch_size, drop_last=False) return torch.utils.data.DataLoader(dataset, batch_sampler=batch_sampler) else: raise ValueError( f"unknown dataloader_type: {self.hparams['dataloader_type']}")
def build_training_data_loader(self) -> torch.utils.data.DataLoader: if self.hparams["dataloader_type"] == "determined": return pytorch.DataLoader( OnesDataset(), batch_size=self.context.get_per_slot_batch_size()) elif self.hparams["dataloader_type"] == "torch": dataset = OnesDataset() seed = self.context.get_trial_seed() num_workers = self.context.distributed.get_size() rank = self.context.distributed.get_rank() batch_size = self.context.get_per_slot_batch_size() skip_batches = self.context.get_initial_batch() sampler = torch.utils.data.SequentialSampler(dataset) sampler = samplers.ReproducibleShuffleSampler(sampler, seed) sampler = samplers.RepeatSampler(sampler) sampler = samplers.DistributedSampler(sampler, num_workers=num_workers, rank=rank) batch_sampler = torch.utils.data.BatchSampler(sampler, batch_size, drop_last=False) batch_sampler = samplers.SkipBatchSampler(batch_sampler, skip_batches) return torch.utils.data.DataLoader(dataset, batch_sampler=batch_sampler) else: raise ValueError( f"unknown dataloader_type: {self.hparams['dataloader_type']}")
def build_validation_data_loader(self) -> det_torch.DataLoader: return det_torch.DataLoader( self.tokenized_datasets["validation"], batch_size=self.context.get_per_slot_batch_size(), collate_fn=transformers.DataCollatorForTokenClassification( self.tokenizer), )
def xor_data_loader(batch_size: int) -> pytorch.DataLoader: training_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32) training_data = torch.Tensor(training_data) training_labels = np.array([0, 1, 1, 0], dtype=np.float32) training_labels = torch.Tensor(training_labels) training = TensorDataset(training_data, training_labels) return pytorch.DataLoader(training, batch_size=batch_size)
def build_validation_data_loader(self) -> det_torch.DataLoader: # Determined's distributed batch sampler interleaves shards on each GPU slot so # sample i goes to worker with rank i % world_size. Therefore, we need to re-sort # all the samples once we gather the predictions before computing the validation metric. return det_torch.DataLoader( qa_utils.DatasetWithIndex(self.tokenized_datasets["validation"]), batch_size=self.context.get_per_slot_batch_size(), collate_fn=self.collator, )
def build_validation_data_loader(self) -> det_torch.DataLoader: eval_dataset = self.tokenized_datasets["validation_matched" if self. hparams.finetuning_task == "mnli" else "validation"] return det_torch.DataLoader( eval_dataset, batch_size=self.context.get_per_slot_batch_size(), collate_fn=self.collator, )
def build_validation_data_loader( self, ) -> Union[pytorch.DataLoader, torch.utils.data.DataLoader]: dataset = LinearDataset(1, 1, self.ds_config.train_micro_batch_size_per_gpu) dataloader = pytorch.DataLoader( dataset, batch_size=self.ds_config.train_micro_batch_size_per_gpu) if self.hparams.test_manual_dataloader or self.hparams.test_fail_dataset_repro_check: return dataloader.get_data_loader(repeat=True) return dataloader
def build_training_data_loader( self) -> Union[pytorch.DataLoader, torch.utils.data.DataLoader]: dataset = LinearDataset(1, 1, self.ds_config.train_batch_size * 2) dataloader = pytorch.DataLoader( dataset, batch_size=self.ds_config.train_micro_batch_size_per_gpu) if self.hparams.test_manual_dataloader or self.hparams.test_fail_dataset_repro_check: return ds_dataloader.RepeatingLoader( torch.utils.data.DataLoader( dataset, batch_size=self.ds_config.train_micro_batch_size_per_gpu)) return dataloader
def build_dataloader( cfg: mmcv.Config, split: "str", context: det_torch.PyTorchTrialContext, shuffle: bool, ) -> Tuple[torch_data.Dataset, det_torch.DataLoader]: """ Build the dataset and dataloader according to cfg and sampler parameters. Arguments: cfg: mmcv.Config with dataset specifications. split: one of train, val, or test. If val or test, annotations are not loaded. context: PyTorchTrialContext with seed info used to seed the dataloader workers. shuffle: whether to shuffle indices for data loading. Returns: dataset and dataloader """ assert split in ["train", "val", "test" ], "argument split must be one of train, val, or test." num_samples_per_gpu = context.get_per_slot_batch_size() num_replicas = context.distributed.get_size() num_workers = cfg.workers_per_gpu test_mode = False if split == "train" else True cfg = eval(f"cfg.{split}") maybe_download_ann_file(cfg) dataset = mmdet.datasets.build_dataset(cfg, {"test_mode": test_mode}) if test_mode: dataset = DatasetWithIndex(dataset) sampler = GroupSampler(dataset, num_samples_per_gpu, num_replicas) if shuffle else None return dataset, det_torch.DataLoader( dataset, batch_size=num_samples_per_gpu, num_workers=num_workers, sampler=sampler, collate_fn=functools.partial(mmcv.parallel.collate, samples_per_gpu=num_samples_per_gpu), pin_memory=False, worker_init_fn=functools.partial( mmdet.datasets.builder.worker_init_fn, seed=context.get_trial_seed(), rank=context.distributed.get_rank(), num_workers=num_workers, ), )
def build_training_data_loader(self) -> pytorch.DataLoader: return pytorch.DataLoader( OnesDataset(), batch_size=self.context.get_per_slot_batch_size())
def build_training_data_loader(self) -> det_torch.DataLoader: return det_torch.DataLoader( self.tokenized_datasets["train"], batch_size=self.context.get_per_slot_batch_size(), collate_fn=self.collator, )
def build_validation_data_loader(self) -> pytorch.DataLoader: return pytorch.DataLoader( IdentityDataset(20), batch_size=self.context.get_per_slot_batch_size())
def build_validation_data_loader(self) -> det_torch.DataLoader: return det_torch.DataLoader( self.tokenized_datasets["validation"], batch_size=self.context.get_per_slot_batch_size(), collate_fn=lambda x: self.collator(x).data, )
def build_validation_data_loader(self) -> pytorch.DataLoader: return pytorch.DataLoader( self.dm.val_dataloader().dataset, batch_size=self.context.get_per_slot_batch_size())
def build_validation_data_loader(self): return pytorch.DataLoader( OnesDataset(), batch_size=self.context.get_per_slot_batch_size())
def build_validation_data_loader(self): # TODO: use Determined's Sampler API + pytorch_geomtric's DataLoader return pytorch.DataLoader( OnesDataset(), batch_size=self.context.get_per_slot_batch_size())