def build_validation_data_loader(self): if self.context.get_hparam('dataset') == 'imagenet': valdir = os.path.join(self.download_directory, 'val') self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), self.normalize, ])) return DataLoader( val_dataset, batch_size=self.context.get_per_slot_batch_size(), shuffle=False, num_workers=self.context.get_hparam("workers", pin_memory=True)) else: transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) val_dataset = datasets.CIFAR10(root=self.download_directory, train=False, download=True, transform=transform) return DataLoader( val_dataset, batch_size=self.context.get_per_slot_batch_size())
def build_validation_data_loader(self) -> DataLoader: if not self.data_downloaded: data.download_data(self.download_directory) self.data_downloaded = True corpus = data_util.Corpus(self.download_directory) test_dataset = data.PTBData( corpus.valid, self.context.get_hparam("seq_len"), self.context.get_hparam("eval_batch_size"), self.context.get_hparam("bptt"), self.context.get_hparam("max_seq_length_delta"), ) return DataLoader( test_dataset, batch_sampler=data.BatchSamp( test_dataset, self.context.get_hparam("bptt"), self.context.get_hparam("max_seq_length_delta"), valid=True, ), collate_fn=data.PadSequence(), )
def build_validation_data_loader(self) -> DataLoader: if self._should_evaluate_classifier(): # The BYOL paper performs validation in two steps: # - A subset of the training data (CLS_VALIDATION) is used to select an optimal LR for the classifier. # - Final results are reported on the test / validation data (TEST). # We combine these two datasets, and then use a custom reducer to calculate the final result. cls_val_dataset = build_dataset( self.data_config, DatasetSplit.CLS_VALIDATION, ) test_dataset = build_dataset(self.data_config, DatasetSplit.TEST) dataset: Dataset = JointDataset( [cls_val_dataset, test_dataset], ["lr_val", "test"] ) else: # When only reporting self-supervised loss, we just use CLS_VALIDATION. dataset = build_dataset( self.data_config, DatasetSplit.CLS_VALIDATION, ) return DataLoader( dataset, batch_size=self.context.get_per_slot_batch_size(), pin_memory=True, num_workers=self.data_config.num_workers, )
def build_training_data_loader(self) -> DataLoader: train_dataset = data.WikiTextDataset( self.corpus, batch_size=self.context.get_per_slot_batch_size(), ) batch_samp = data.BatchSamp(train_dataset, self.bptt) return DataLoader(train_dataset, batch_sampler=batch_samp)
def build_validation_data_loader(self) -> DataLoader: bucket_name = self.data_config["bucket_name"] normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) valid_data = ImageNetDataset( "validation", bucket_name, streaming=self.data_config["streaming"], data_download_dir=self.data_config["data_download_dir"], transform=transforms.Compose( [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ] ), ) valid_queue = DataLoader( valid_data, batch_size=self.context.get_per_slot_batch_size(), shuffle=False, pin_memory=True, num_workers=self.data_config["num_workers_val"], ) return valid_queue
def xor_data_loader(batch_size: int) -> DataLoader: training_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32) training_data = torch.Tensor(training_data) training_labels = np.array([0, 1, 1, 0], dtype=np.float32) training_labels = torch.Tensor(training_labels) training = TensorDataset(training_data, training_labels) return DataLoader(training, batch_size=batch_size)
def build_training_data_loader(self) -> Any: dataset = data.get_dataset(self.data_config) return DataLoader( dataset, batch_size=self.context.train_micro_batch_size_per_gpu, shuffle=True, num_workers=int(self.hparams.data_workers), )
def build_validation_data_loader(self): return DataLoader( self.valid_data, batch_size=self.neox_args.train_micro_batch_size_per_gpu, num_workers=self.neox_args.num_workers, drop_last=True, pin_memory=False, )
def build_training_data_loader(self) -> Any: transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] ) trainset = torchvision.datasets.CIFAR10( root=self.download_directory, train=True, download=True, transform=transform ) return DataLoader(trainset, batch_size=self.context.get_per_slot_batch_size())
def build_training_data_loader(self) -> DataLoader: dataset_train = build_dataset(image_set="train", args=self.hparams) return DataLoader( dataset_train, batch_size=self.context.get_per_slot_batch_size(), collate_fn=unwrap_collate_fn, num_workers=self.hparams.num_workers, shuffle=True, )
def build_validation_data_loader(self) -> DataLoader: val_dataset = data.WikiTextDataset( self.corpus, batch_size=self.eval_batch_size, valid=True, ) self.val_data_len = len(val_dataset) - 1 batch_samp = data.BatchSamp(val_dataset, self.bptt) return DataLoader(val_dataset, batch_sampler=batch_samp)
def _create_loader( self, dataset, input_size, batch_size, is_training=False, use_prefetcher=True, re_prob=0., re_mode='pixel', re_count=1, interpolation='bilinear', fill_color='mean', mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, num_workers=4, distributed=False, pin_mem=False, anchor_labeler=None, ): if isinstance(input_size, tuple): img_size = input_size[-2:] else: img_size = input_size if is_training: transform = transforms_coco_train(img_size, interpolation=interpolation, use_prefetcher=use_prefetcher, fill_color=fill_color, mean=mean, std=std) else: transform = transforms_coco_eval(img_size, interpolation=interpolation, use_prefetcher=use_prefetcher, fill_color=fill_color, mean=mean, std=std) dataset.transform = transform sampler = None collate_fn = DetectionFastCollate(anchor_labeler=anchor_labeler) loader = DataLoader( dataset, batch_size=self.context.get_per_slot_batch_size(), shuffle=False, #sampler is None and is_training, num_workers=num_workers, sampler=sampler, pin_memory=pin_mem, collate_fn=collate_fn, ) return loader
def build_validation_data_loader(self) -> DataLoader: uni_prot = UniProtData(self.tokenizer, self.context.get_hparam("sequence_length")) val_data = uni_prot.from_file(self.data_config["val_data"]) data_loader = DataLoader(val_data, batch_size=self.context.get_per_slot_batch_size(), collate_fn=uni_prot.prepare_sample, num_workers=self.data_config["worker"]) return data_loader
def build_training_data_loader(self): train_dataset, _, _ = data.load_and_cache_examples( tokenizer=self.tokenizer, task=self.context.get_data_config().get("task"), max_seq_length=self.context.get_hparam("max_seq_length"), doc_stride=self.context.get_hparam("doc_stride"), max_query_length=self.context.get_hparam("max_query_length"), evaluate=False, ) return DataLoader(train_dataset, batch_size=self.context.get_per_slot_batch_size())
def build_training_data_loader(self) -> DataLoader: if not self.data_downloaded: self.download_directory = data.download_dataset( download_directory=self.download_directory, data_config=self.context.get_data_config(), ) self.data_downloaded = True dataset = data.get_dataset(self.download_directory, train=True) return DataLoader(dataset, batch_size=self.context.get_per_slot_batch_size())
def build_validation_data_loader(self) -> DataLoader: if not self.data_downloaded: self.download_directory = data.download_dataset( download_directory=self.download_directory, data_config=self.context.get_data_config(), ) self.data_downloaded = True validation_data = data.get_dataset(self.download_directory, train=False) return DataLoader(validation_data, batch_size=self.context.get_per_slot_batch_size())
def build_training_data_loader(self) -> DataLoader: uni_prot = UniProtData(self.tokenizer, self.context.get_hparam("sequence_length")) train_data = uni_prot.from_file(self.data_config["train_data"], reduce_to_binary=self.context.get_hparam("reduce_to_binary_problem")) data_loader = DataLoader(train_data, batch_size=self.context.get_per_slot_batch_size(), sampler=RandomSampler(train_data), collate_fn=uni_prot.prepare_sample, num_workers=self.data_config["worker"]) return data_loader
def build_validation_data_loader(self) -> DataLoader: dataset = build_detection_test_loader( self.cfg, self.cfg.DATASETS.TEST[0], batch_size=self.context.get_per_slot_batch_size(), num_workers=None) return DataLoader(dataset, batch_sampler=AspectRatioGroupedDatasetBatchSamp( dataset, self.context.get_per_slot_batch_size()), collate_fn=PadSequence())
def build_validation_data_loader(self) -> DataLoader: dataset_val = build_dataset(image_set="val", args=self.hparams) # Set up evaluator self.base_ds = get_coco_api_from_dataset(dataset_val) return DataLoader( dataset_val, batch_size=self.context.get_per_slot_batch_size(), collate_fn=unwrap_collate_fn, num_workers=self.hparams.num_workers, shuffle=False, )
def build_training_data_loader(self) -> DataLoader: if not self.data_downloaded: self.download_dataset() train_dataset = data.load_and_cache_examples( base_data_dir=self.download_directory, config=self.context.get_data_config(), model_type=self.context.get_hparam("model_type"), max_seq_length=self.context.get_hparam("max_seq_length"), evaluate=False, ) return DataLoader(train_dataset, batch_size=self.context.get_per_slot_batch_size())
def build_training_data_loader(self) -> Any: # IGNORE: Dummy training data loader that must be specified but is unused transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) trainset = torchvision.datasets.CIFAR10(root=self.download_directory, train=True, download=True, transform=transform) return DataLoader(trainset, batch_size=self.context.get_per_slot_batch_size())
def build_training_data_loader(self) -> DataLoader: dataset = build_detection_train_loader( self.cfg, batch_size=self.context.get_per_slot_batch_size(), context=self.context, num_workers=None) return DataLoader(dataset, batch_sampler=AspectRatioGroupedDatasetBatchSamp( dataset, self.context.get_per_slot_batch_size()), collate_fn=PadSequence(), num_workers=4)
def build_training_data_loader(self) -> DataLoader: ds = torchvision.datasets.MNIST( self.download_directory, train=True, transform=transforms.Compose([ transforms.ToTensor(), # These are the precomputed mean and standard deviation of the # MNIST data; this normalizes the data to have zero mean and unit # standard deviation. transforms.Normalize((0.1307, ), (0.3081, )), ]), download=True) return DataLoader(ds, batch_size=1)
def build_validation_data_loader(self) -> DataLoader: _, valid_transform = data_transforms_cifar10() valid_data = dset.CIFAR10(root=self.data_dir, train=False, download=True, transform=valid_transform) valid_queue = DataLoader( valid_data, batch_size=self.context.get_per_slot_batch_size(), shuffle=False, num_workers=2, ) return valid_queue
def build_validation_data_loader(self) -> Any: dataset = data.get_dataset(self.data_config) # Since we're not doing validation, limit to single batch. dataset = torch.utils.data.Subset( dataset, list( range( self.context.train_micro_batch_size_per_gpu * self.context.distributed.get_size() ) ), ) return DataLoader(dataset, batch_size=self.context.train_micro_batch_size_per_gpu)
def build_dataloader(cfg, num_samples_per_gpu, num_replicas, num_workers, shuffle): dataset = build_dataset(cfg) sampler = ( MyGroupSampler(dataset, num_samples_per_gpu, num_replicas) if shuffle else None ) # may need to look into collate_fn for distributed data and init_fn for seeding return dataset, DataLoader( dataset, batch_size=num_samples_per_gpu, num_workers=num_workers, sampler=sampler, collate_fn=partial(collate, samples_per_gpu=num_samples_per_gpu), pin_memory=False, )
def build_training_data_loader(self) -> DataLoader: train_dataset = data.PTBData( self.corpus.train, self.context.get_per_slot_batch_size(), self.hparams.bptt, self.hparams.max_seq_length_delta, ) return DataLoader( train_dataset, batch_sampler=data.BatchSamp( train_dataset, self.hparams.bptt, self.hparams.max_seq_length_delta, ), collate_fn=data.PadSequence(), )
def build_validation_data_loader(self): self.validation_dataset, self.validation_examples, self.validation_features = data.load_and_cache_examples( data_dir=self.download_directory, tokenizer=self.tokenizer, task=self.context.get_data_config().get("task"), max_seq_length=self.context.get_hparam("max_seq_length"), doc_stride=self.context.get_hparam("doc_stride"), max_query_length=self.context.get_hparam("max_query_length"), evaluate=True, ) return DataLoader( self.validation_dataset, batch_size=self.context.get_per_slot_batch_size(), )
def build_training_data_loader(self) -> DataLoader: if not self.data_downloaded: self.download_directory = data.download_dataset( download_directory=self.download_directory, data_config=self.context.get_data_config(), ) self.data_downloaded = True train_data = data.get_multi_dataset(self.download_directory, train=True) return DataLoader( train_data, batch_size=self.context.get_per_slot_batch_size(), collate_fn=data.collate_fn, )
def build_validation_data_loader(self) -> DataLoader: dataset = OmniglotTasks( self.data_config["data_path"], self.data_config["tasks_per_epoch_val"], self.val_class_idxs, self.context.get_hparam("img_resize_dim"), self.num_classes["val"], self.num_support["val"], self.num_query["val"], ) return DataLoader( dataset, self.context.get_hparam("val_batch_size"), num_workers=self.data_config["val_workers"], collate_fn=dataset.get_collate_fn(), )