class FiDataLoader(object): def __init__(self, opt, dataset): super(FiDataLoader, self).__init__() if opt.shuffle: data_sampler = RandomSampler(dataset) else: data_sampler = None if opt.mode == 'train': self.data_loader = DataLoader( dataset, batch_size=opt.batch_size, shuffle=(data_sampler is None), num_workers=opt.workers, pin_memory=True, sampler=data_sampler) else: self.data_loader = DataLoader( dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers, pin_memory=True, sampler=data_sampler) self.dataset = dataset self.data_iter = self.data_loader.__iter__() def next_batch(self): try: batch = self.data_iter.__next__() except StopIteration: self.data_iter = self.data_loader.__iter__() batch = self.data_iter.__next__() return batch
def getEpisodeData(self, support_sampler, query_sampler): k, qk, n, N = self.readParams() support_loader = DataLoader( self.Dataset, batch_size=k * n, sampler=support_sampler, collate_fn=batchSequenceWithoutPad) # getBatchSequenceFunc()) supports, support_labels, support_lens = support_loader.__iter__( ).next() self.SupSeqLenCache = support_lens if query_sampler: query_loader = DataLoader( self.Dataset, batch_size=qk * n, sampler=query_sampler, collate_fn=batchSequenceWithoutPad) # getBatchSequenceFunc()) queries, query_labels, query_lens = query_loader.__iter__().next() self.QueSeqLenCache = query_lens return supports, support_labels, queries, query_labels else: return supports, support_labels
def setup(self, stage=None): if stage == 'fit' or stage is None: dset = DataLoader( datasets.MNIST(self.root, train=True, transform=self.transforms), 100000) x_train, y_train = next(dset.__iter__()) x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, train_size=int(50000 * self.train_fraction), test_size=10000) mnist_train = ContDataset(x_train, y_train, transforms=None) mnist_train.set_task(self.task_list) self.train = mnist_train mnist_val = ContDataset(x_val, y_val, transforms=None) mnist_val.set_task(self.task_list) self.val = mnist_val if stage == 'test' or stage is None: dset = DataLoader( datasets.MNIST(self.root, train=False, transform=self.transforms), 100000) x_test, y_test = next(dset.__iter__()) self.test = ContDataset(x_test, y_test, transforms=None)
class InfiniteDataloader(): def __init__(self, dataset, batch_size, collate_fn, num_workers, num_steps, weights=None, batch_sampler=None): self.dataset = dataset self.num_steps = num_steps if batch_sampler is not None: self.dataloader = DataLoader(dataset, batch_sampler=batch_sampler, collate_fn=collate_fn, num_workers=num_workers) else: if weights is None: self.dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=collate_fn, num_workers=num_workers) else: print("USING PROVIDED SAMPLE WEIGHTS") print("WEIGHTS SHAPE: ", weights.shape) sampler = torch.utils.data.sampler.WeightedRandomSampler( weights, len(weights)) self.dataloader = DataLoader(dataset, batch_size=batch_size, sampler=sampler, drop_last=True, collate_fn=collate_fn, num_workers=num_workers) self.iter = self.dataloader.__iter__() def __iter__(self): self.count = 0 return self def __len__(self): return self.num_steps def __next__(self): self.count += 1 if self.count == self.num_steps: raise StopIteration else: try: data = next(self.iter) except StopIteration: self.iter = self.dataloader.__iter__() data = next(self.iter) return data
def getEpisodeData(self, support_sampler, query_sampler): k, qk, n, N = self.readParams() support_loader = DataLoader(self.Dataset, batch_size=k * n, sampler=support_sampler) query_loader = DataLoader(self.Dataset, batch_size=qk * n, sampler=query_sampler) supports, support_labels = support_loader.__iter__().next() queries, query_labels = query_loader.__iter__().next() return supports, support_labels, queries, query_labels
def init_models(self, train_dataset): run_iter = DataLoader(train_dataset, batch_size=4, shuffle=False, pin_memory=False) init_batch = next(run_iter.__iter__()) self.forward(init_batch[0], init_batch[1], torch.device('cpu'))
class Generator(object): ''' DATA_DIR: Path to folder containing folder of images "/root/folder/img.jpg" - See: https://pytorch.org/vision/stable/datasets.html#imagefolder BATCH_SIZE: Size of batches yielded by iterator dim: Desired single integer square dimensions of images ''' def __init__(self, DATA_DIR: str, BATCH_SIZE: int, dim: int = 256): self.dir = DATA_DIR self.batch_size = BATCH_SIZE self.dim = dim data_transformer = Compose( [Resize((dim, dim)), ToTensor(), Lambda(lambda x: x.mul(dim))]) self.train = ImageFolder(DATA_DIR, data_transformer) self.train_loader = DataLoader(self.train, batch_size=BATCH_SIZE) def __iter__(self): return self.train_loader.__iter__()
class RandomIdSamplerTest(unittest.TestCase): def setUp(self): self.batch_id = 4 self.batch_image = 16 self.data_source = Market1501(root + '/bounding_box_train', transform=ToTensor()) self.sampler = RandomIdSampler(self.data_source, batch_image=self.batch_image) self.data_loader = DataLoader(self.data_source, sampler=self.sampler, batch_size=self.batch_id * self.batch_image) @patch('random.shuffle', lambda x: x) @patch('random.sample', lambda population, k: population[:k]) def test_sampler(self): imgs = [img for img in self.sampler] self.assertEqual(range(16), imgs[:16]) self.assertEqual( range(46, 53) + range(46, 53) + range(46, 48), imgs[16:32]) @patch('random.shuffle', lambda x: x) @patch('random.sample', lambda population, k: population[:k]) def test_data_loader(self): it = self.data_loader.__iter__() _, target = next(it) self.assertEqual([0] * 16 + [1] * 16 + [2] * 16 + [3] * 16, target.numpy().tolist()) _, target = next(it) self.assertEqual([4] * 16 + [5] * 16 + [6] * 16 + [7] * 16, target.numpy().tolist())
def test(self): """Test.""" class Dset(Dataset): def __len__(self): return 100 def __getitem__(self, i): return i data = Dset() sampler = StatefulSampler(data, shuffle=True) dl = DataLoader(data, sampler=sampler, batch_size=2, num_workers=2) used_inds = [] diter = dl.__iter__() for _ in range(10): batch = diter.__next__() used_inds.extend(batch.tolist()) state = sampler.state_dict(diter) sampler = StatefulSampler(data, shuffle=True) sampler.load_state_dict(state) dl = DataLoader(data, sampler=sampler, batch_size=2, num_workers=2) for batch in dl: used_inds.extend(batch.tolist()) assert len(used_inds) == 100 assert len(set(used_inds)) == 100
def test(config): """Test point cloud data loader. """ from torch.utils.data import DataLoader from lib.utils import Timer timer = Timer() DatasetClass = StanfordVoxelization2cmDataset transformations = [ t.RandomHorizontalFlip(DatasetClass.ROTATION_AXIS, DatasetClass.IS_TEMPORAL), t.ChromaticAutoContrast(), t.ChromaticTranslation(config.data_aug_color_trans_ratio), t.ChromaticJitter(config.data_aug_color_jitter_std), t.HueSaturationTranslation(config.data_aug_hue_max, config.data_aug_saturation_max), ] dataset = DatasetClass(config, input_transform=t.Compose(transformations), augment_data=True, cache=True, elastic_distortion=True) data_loader = DataLoader( dataset=dataset, collate_fn=t.cfl_collate_fn_factory(limit_numpoints=False), batch_size=4, shuffle=True) # Start from index 1 iter = data_loader.__iter__() for i in range(100): timer.tic() data = iter.next() print(timer.toc())
class RandomIdSamplerTest(unittest.TestCase): def setUp(self): self.batch_id = 4 self.batch_image = 16 self.data_source = Market1501(root + '/bounding_box_train', transform=ToTensor()) self.sampler = RandomIdSampler(self.data_source, batch_image=self.batch_image) self.data_loader = DataLoader(self.data_source, sampler=self.sampler, batch_size=self.batch_id * self.batch_image) @patch('random.shuffle', lambda x: x) @patch('random.sample', lambda population, k: population[:k]) def test_sampler(self): imgs = [img for img in self.sampler] self.assertEqual(range(16), imgs[:16]) self.assertEqual(range(46, 53) + range(46, 53) + range(46, 48), imgs[16:32]) @patch('random.shuffle', lambda x: x) @patch('random.sample', lambda population, k: population[:k]) def test_data_loader(self): it = self.data_loader.__iter__() _, target = next(it) self.assertEqual([0] * 16 + [1] * 16 + [2] * 16 + [3] * 16, target.numpy().tolist()) _, target = next(it) self.assertEqual([4] * 16 + [5] * 16 + [6] * 16 + [7] * 16, target.numpy().tolist())
class CelebAAugmentLoader(object): """ loader for the CELEB-A dataset 40: 218-30, 15:178-15 """ def __init__(self, file_path, batch_size, valid_size, crop, shuffle, use_cuda): kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {} transform_list = [] if crop: transform_list.append(transforms.CenterCrop(128)) transform_list.append(transforms.Resize((64, 64))) transform_list.append(transforms.ToTensor()) transform_list.append( transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))) transform = transforms.Compose(transform_list) train_dataset, test_dataset = self.get_dataset(file_path, transform) # Set the samplers num_train = len(train_dataset) indices = list(range(num_train)) split = int(np.floor(valid_size * num_train)) if shuffle: np.random.shuffle(indices) train_idx, valid_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) # Set the loaders self.train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, **kwargs) self.test_loader = DataLoader(test_dataset, batch_size=batch_size, sampler=valid_sampler, **kwargs) tmp_batch = self.train_loader.__iter__().__next__()[0] self.img_shape = list(tmp_batch.size())[1:] @staticmethod def get_dataset(file_path, transform): augment_transforms = get_augment_transforms() train_dataset = ImageFolderPair(augment_transforms, file_path, transform) test_dataset = ImageFolderPair(augment_transforms, file_path, transform) return train_dataset, test_dataset
def calculate_sum_of_exemplar(self, exemplar): dl = DataLoader(exemplar, shuffle=False, batch_size=exemplar.data.shape[0]) data = dl.__iter__().__next__()[1] data = data.cuda(self.device_num) if self.use_gpu else data with torch.no_grad(): soe = self.discriminator(data, classify=True).sum(dim=0) return soe
def _iter(): # wrap original iterator into an interruptible one for batch in DataLoader.__iter__(self): if self.interrupted: # if flagged as interrupted, return immediately, which would end the # BackgroundGenerator return # otherwise, yield data batch as normal yield batch
def test_handle_series_id(self): """Tests the handle_series_id method """ mse1 = MSELoss() d1 = DataLoader(self.data_loader, batch_size=2) d = DecoderTransformer(3, 8, 4, 128, 20, 0.2, 1, {}, seq_num1=3, forecast_length=1) x, y = d1.__iter__().__next__() l1 = handle_csv_id_output(x, y, d, mse1) self.assertGreater(l1, 0)
class Loader(object): def __init__(self, dataset_ident, file_path, download, shuffle, batch_size, data_transform, target_transform, use_cuda): kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {} # set the dataset # NOTE: will need a refractor one we load more different datasets, that require custom classes loader_map = { 'mnist': datasets.MNIST, 'MNIST': datasets.MNIST, 'FashionMNIST': datasets.FashionMNIST, 'fashion': datasets.FashionMNIST } num_class = { 'mnist': 10, 'MNIST': 10, 'fashion': 10, 'FashionMNIST': 10 } # Get the datasets train_dataset, test_dataset = self.get_dataset(loader_map[dataset_ident], file_path, download, data_transform, target_transform) # Set the loaders self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle, **kwargs) self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs) # infer and set size, idea from: # https://github.com/jramapuram/helpers/ tmp_batch, _ = self.train_loader.__iter__().__next__() self.img_shape = list(tmp_batch.size())[1:] self.num_class = num_class[dataset_ident] self.batch_size = batch_size @staticmethod def get_dataset(dataset, file_path, download, data_transform, target_transform): # Check for transform to be None, a single item, or a list # None -> default to transform_list = [transforms.ToTensor()] # single item -> list if not data_transform: data_transform = [transforms.ToTensor()] elif not isinstance(data_transform, list): data_transform = list(data_transform) # Training and Validation datasets train_dataset = dataset(file_path, train=True, download=download, transform=transforms.Compose(data_transform), target_transform=target_transform) test_dataset = dataset(file_path, train=False, download=download, transform=transforms.Compose(data_transform), target_transform=target_transform) return train_dataset, test_dataset
def build(self): # print("Building DataIterator...") dataloader = DataLoader(self.dataset, batch_size=self.batch_size, shuffle=True, num_workers=0, drop_last=True) self.dataiter = dataloader.__iter__()
class NodeDataLoader: """PyTorch dataloader for batch-iterating over a set of nodes, generating the list of blocks as computation dependency of the said minibatch. Parameters ---------- g : DGLGraph The graph. nids : Tensor or dict[ntype, Tensor] The node set to compute outputs. block_sampler : dgl.dataloading.BlockSampler The neighborhood sampler. kwargs : dict Arguments being passed to :py:class:`torch.utils.data.DataLoader`. Examples -------- To train a 3-layer GNN for node classification on a set of nodes ``train_nid`` on a homogeneous graph where each node takes messages from all neighbors (assume the backend is PyTorch): >>> sampler = dgl.dataloading.NeighborSampler([None, None, None]) >>> dataloader = dgl.dataloading.NodeDataLoader( ... g, train_nid, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, output_nodes, blocks in dataloader: ... train_on(input_nodes, output_nodes, blocks) """ collator_arglist = inspect.getfullargspec(NodeCollator).args def __init__(self, g, nids, block_sampler, **kwargs): collator_kwargs = {} dataloader_kwargs = {} for k, v in kwargs.items(): if k in self.collator_arglist: collator_kwargs[k] = v else: dataloader_kwargs[k] = v self.collator = NodeCollator(g, nids, block_sampler, **collator_kwargs) if isinstance(g, DistGraph): _remove_kwargs_dist(dataloader_kwargs) self.dataloader = DistDataLoader(self.collator.dataset, collate_fn=self.collator.collate, **dataloader_kwargs) else: self.dataloader = DataLoader(self.collator.dataset, collate_fn=self.collator.collate, **dataloader_kwargs) def __next__(self): return self.dataloader.__next() def __iter__(self): return self.dataloader.__iter__()
def load_and_display(num_epochs=10, learning_rate=0.001, dropout_p=0.0): """ Loads the saved model, invokes the dataloader, randomly evaluates on 5 samples of the test set and displys the images with the true and predicted labels. Parameters ---------- num_epochs : int, optional (default: ``10``) Number of epochs to run learning_rate : float, optional (default: ``0.001``) learning rate for the optimizer dropout_p : float, optional (default: ``0.0``) Dropout probability for the network layers Returns ------- None """ model_filename = f'ne{num_epochs}lr{learning_rate}dp{dropout_p}' model = CNNModel().cuda() model.load_state_dict( torch.load(os.path.join(SAVED_MODEL_DIR, f'{model_filename}.pth'))) model.eval() test_set = datasets.FashionMNIST(root=DATASET_ROOT, train=False, transform=transforms.ToTensor()) sampler = RandomSampler(data_source=test_set) test_loader = DataLoader(dataset=test_set, batch_size=5, sampler=sampler) images, labels = next(test_loader.__iter__()) images = images.cuda() labels = labels.cuda() logits = model(images) probs = F.softmax(logits.data, dim=1) _, predicted = torch.max(probs, 1) # Invert the dictionary class_to_idx to idx_to_class idx_to_class = {v: k for k, v in test_set.class_to_idx.items()} fig, axex = plt.subplots(1, 5, figsize=(25, 25)) zip_gen = axex.ravel(), predicted, labels, images.cpu().numpy().squeeze() for ax, predicted_class, label_class, img in zip(*zip_gen): ax.imshow(img, cmap='gray' if predicted_class == label_class else 'autumn') ax.axis('off') ax.set_title('Predicted: {} | True: {}'.format( idx_to_class[predicted_class.item()], idx_to_class[label_class.item()]))
class MNISTDataloader(object): """ dataloader for MNIST """ def __init__(self, dataset, opt): super().__init__() kwargs = {'num_workers': opt.num_workers} self.data_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, **kwargs) self.data_iter = self.data_loader.__iter__()
def dataloader_prefetch_batches( dataloader: DataLoader, device: Union[None, str, torch.device] = torch.cuda.current_device() ) -> DataLoader: """ Monkey-patch `__iter__` method of given `torch.utils.data.DataLoader` in order to prefetch next data batch to device memory during computing/training on previous batch. NOTE: In order to data batches being prefetched (to GPU memory), set `pin_memory` to `True` when instanciating DataLoader and provide a `device` which is not 'cpu' nor `None` (e.g. 'gpu'). NOTE: You won't need to move tensors batches from given dataloader to GPU device memory anymore; i.e., wont need to call `x.to(device)` before computing/training ops. Args: - dataloader: DataLoader which will be patched in order to prefetch batches (`dataloader.__iter__().__next__` will be monkey-patched) - device: Torch device to which data batches are prefetched. If `None` or 'cpu', then batch prefetching is disabled and no changes are made to `dataloader` Returns given `dataloader` which will be patched in order to prefetch batches if `device` isn't `None` nor 'cpu' and if `dataloader.pin_memory` is `True` (otherwise returns given DataLoader without any modifications) """ if not dataloader.pin_memory: logging.warn( f'Warning: DataLoader wont prefetch data batches: set `pin_memory=True` in your DataLoader when instanciating `{type(dataloader).__name__}`' ) elif device is None or device == 'cpu' or ( isinstance(device, torch.device) and device.type == 'cpu'): # TODO: condition this on 'cuda' instead? logging.warn( f'Warning: DataLoader wont prefetch data batches as given `device` argument is `{device}` when prefetching is aimed at GPU(s).' ) else: @functools.wraps(dataloader.__iter__) def __iter__patch(self: DataLoader, *args, **kwargs): nonlocal device iterator = self.__iter__(*args, **kwargs) iterator._prefetched_batch = iterator.__next__().to( device=device, non_blocking=True) @functools.wraps(iterator.__next__) def __next__patch(iterator_self: Iterable) -> Any: nonlocal device if isinstance(iterator_self._prefetched_batch, StopIteration): raise iterator_self._prefetched_batch else: batch = iterator_self._prefetched_batch try: iterator_self._prefetched_batch = iterator_self.__next__( ).to(device=device, non_blocking=True) except StopIteration as e: # Catch `StopIteration` to raise it later (during following call to `__next__`) iterator_self._prefetched_batch = e return batch iterator.__next__ = __next__patch return iterator dataloader.__iter__ = __iter__patch dataloader.__iter__._prefetched_batch = None return dataloader
class DiscriminatorOverfitMonitor(Listener): def __init__(self, train_dataset, validation_dataset, n_images, args): super().__init__() self.n_images = n_images self.cuda = args.cuda self.train_dataset = train_dataset self.validation_dataset = validation_dataset self.tloader = DataLoader(validation_dataset, self.n_images, True) self.vloader = DataLoader(validation_dataset, self.n_images, True) def initialize(self): self.x_train = self.tloader.__iter__().__next__()[0] if self.cuda: self.x_train = self.x_train.cuda() # Remove the loader since we got our test images del self.tloader self.x_valid = self.vloader.__iter__().__next__()[0] if self.cuda: self.x_valid = self.x_valid.cuda() # Remove the loader since we got our test images del self.vloader def report(self, state_dict): if "D" in state_dict["networks"]: D = state_dict["networks"]["D"] else: raise ValueError( "Could not find a discriminator network in the state dict!") d_train = D(self.x_train).mean().detach().cpu().item() d_valid = D(self.x_valid).mean().detach().cpu().item() print("D(train) mean: ", d_train) print("D(valid) mean: ", d_valid) print()
def main(): # Hyper-parameters NUM_TRAIN = 51200 NUM_VAL = 5120 NOISE_DIM = 96 batch_size = 128 # Load MNIST dataset if available, otherwise download the dataset mnist_train = dset.MNIST('./data', train=True, download=True, transform=T.ToTensor()) loader_train = DataLoader(mnist_train, batch_size=batch_size, sampler=ChunkSampler(NUM_TRAIN, 0)) mnist_val = dset.MNIST('./data', train=True, download=True, transform=T.ToTensor()) loader_val = DataLoader(mnist_val, batch_size=batch_size, sampler=ChunkSampler(NUM_VAL, NUM_TRAIN)) # Visualize training images for the reference imgs = loader_train.__iter__().next()[0].view(batch_size, 784).numpy().squeeze() show_images(imgs) # Build the DCGAN network dtype = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor D_DC = build_discriminator(batch_size).type(dtype) D_DC.apply(initialize_weights) G_DC = build_generator(batch_size, NOISE_DIM).type(dtype) G_DC.apply(initialize_weights) D_DC_solver = get_optimizer(D_DC) G_DC_solver = get_optimizer(G_DC) # Train the network run_a_gan(D_DC, G_DC, D_DC_solver, G_DC_solver, discriminator_loss, generator_loss, loader_train, dtype, num_epochs=8)
def main(): NUM_TRAIN = 50000 NUM_VAL = 5000 NOISE_DIM = 96 batch_size = 128 mnist_train = dset.MNIST('./data', train=True, download=True, transform=T.ToTensor()) loader_train = DataLoader(mnist_train, batch_size=batch_size, sampler=ChunkSampler(NUM_TRAIN, 0)) mnist_val = dset.MNIST('./data', train=True, download=True, transform=T.ToTensor()) loader_val = DataLoader(mnist_val, batch_size=batch_size, sampler=ChunkSampler(NUM_VAL, NUM_TRAIN)) imgs = loader_train.__iter__().next()[0].view(batch_size, 784).numpy().squeeze() show_images(imgs) #plt.show() plt.close() dtype = torch.FloatTensor dtype = torch.cuda.FloatTensor ## UNCOMMENT THIS LINE IF YOU'RE ON A GPU! D_DC = build_discriminator(batch_size).type(dtype) D_DC.apply(initialize_weights) G_DC = build_generator(batch_size, NOISE_DIM).type(dtype) G_DC.apply(initialize_weights) D_DC_solver = get_optimizer(D_DC) G_DC_solver = get_optimizer(G_DC) run_a_gan(D_DC, G_DC, D_DC_solver, G_DC_solver, discriminator_loss, generator_loss, loader_train, dtype, num_epochs=5)
def _extract_data(self, dataset, sampler): data_loader = DataLoader(dataset, batch_size=len(dataset), sampler=sampler) assert len(data_loader) == 1, 'data loader should have size 1' sample = next(data_loader.__iter__() ) # dataloader takes one (sub)set of the dataset ratings, user_ids, item_ids, item_metadata, item_text = self.dataloader_extract( sample) assert ratings.size() != torch.Size([]), 'ratings size empty' assert len( ratings ) >= self.batch_size, 'not enough ratings compared to batch size' return ratings, user_ids, item_ids, item_metadata, item_text
class data_gen: def __init__(self, batch_size, dataset, label_path, vocab_path, dict_path, train_percent=0.7, num_workers=8): train_data = dataset(label_path, vocab_path, dict_path, True, train_percent) self.train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) valid_data = dataset(label_path, vocab_path, dict_path, False, train_percent) self.valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True, num_workers=1) def train(self): images, encoded_captions, caption_lengths = self.train_loader.__iter__( ).__next__() images = images.to(device).float() encoded_captions = encoded_captions.to(device).long() caption_lengths = caption_lengths.to(device).long() return images, encoded_captions, caption_lengths def valid(self): images, encoded_captions, caption_lengths = self.valid_loader.__iter__( ).__next__() images = images.to(device).float() encoded_captions = encoded_captions.to(device).long() caption_lengths = caption_lengths.to(device).long() return images, encoded_captions, caption_lengths
class LoaderCIFAR(object): def __init__(self, file_path, download, batch_size, mu, use_cuda): kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {} self.mu = mu # Get the datasets train_labeled_dataset, train_unlabeled_dataset, test_dataset = self.get_dataset(file_path, download) # Set the samplers num_samples = len(train_labeled_dataset) sampler_labeled = RandomSampler(train_labeled_dataset, replacement=True, num_samples=num_samples) sampler_unlabeled = RandomSampler(train_unlabeled_dataset, replacement=True, num_samples=self.mu * num_samples) batch_sampler_labeled = BatchSampler(sampler_labeled, batch_size=batch_size, drop_last=False) batch_sampler_unlabeled = BatchSampler(sampler_unlabeled, batch_size=self.mu * batch_size, drop_last=False) # Set the loaders self.train_labeled = DataLoader(train_labeled_dataset, batch_sampler=batch_sampler_labeled, **kwargs) self.train_unlabeled = DataLoader(train_unlabeled_dataset, batch_sampler=batch_sampler_unlabeled, **kwargs) self.test = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs) tmp_batch = self.test.__iter__().__next__()[0] self.img_shape = list(tmp_batch.size())[1:] @staticmethod def get_dataset(file_path, download): # transforms weak_transform = cifar_weak_transforms() strong_transform = cifar_strong_transforms() test_transform = cifar_test_transforms() # Training and Validation datasets train_labeled_dataset = CIFAR10(root=file_path, train=True, download=download, transform=weak_transform, target_transform=None) train_unlabeled_dataset = CIFAR10C(weak_transform=weak_transform, strong_transform=strong_transform, root=file_path, train=True, download=download, transform=None, target_transform=None) test_dataset = CIFAR10(root=file_path, train=False, download=download, transform=test_transform, target_transform=None) return train_labeled_dataset, train_unlabeled_dataset, test_dataset
def test(config, intensity=False): """Test point cloud data loader. """ from torch.utils.data import DataLoader from lib.utils import Timer import open3d as o3d def make_pcd(coords, feats): pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(coords[:, :3].float().numpy()) pcd.colors = o3d.utility.Vector3dVector(feats[:, :3].numpy() / 255) if intensity: pcd.intensities = o3d.utility.Vector3dVector(feats[:, 3:3].numpy()) return pcd timer = Timer() DatasetClass = FacilityArea5Dataset transformations = [ t.RandomHorizontalFlip(DatasetClass.ROTATION_AXIS, DatasetClass.IS_TEMPORAL), t.ChromaticAutoContrast(), t.ChromaticTranslation(config.data_aug_color_trans_ratio), t.ChromaticJitter(config.data_aug_color_jitter_std), ] dataset = DatasetClass(config, prevoxel_transform=t.ElasticDistortion( DatasetClass.ELASTIC_DISTORT_PARAMS), input_transform=t.Compose(transformations), augment_data=True, cache=True, elastic_distortion=True) data_loader = DataLoader( dataset=dataset, collate_fn=t.cfl_collate_fn_factory(limit_numpoints=False), batch_size=1, shuffle=True) # Start from index 1 iter = data_loader.__iter__() for i in range(100): timer.tic() coords, feats, labels = iter.next() pcd = make_pcd(coords, feats) o3d.visualization.draw_geometries([pcd]) print(timer.toc())
def test() -> None: dataset = YoloDataset(table, anchors, shuffle=True) train_loader = DataLoader(dataset, batch_size=1, shuffle=False) myloss = YoloLoss() itr = train_loader.__iter__() image, targets = next(itr) b_, c_, h_, w_ = image.shape fakes = targets.copy() for idx, f in enumerate(fakes): b, c, h, w, m = f.shape index = f.reshape(-1, 6)[..., 5:].type(torch.int64) y = torch.zeros((b * c * h * w, 3)).scatter_(-1, index, 1) y = y.reshape((b, c, h, w, -1)) pred = torch.cat([f[..., :5], y], dim=-1) pred[..., 3:4] /= w_ pred[..., 4:5] /= h_ loss = myloss.forward(pred, targets[idx], anchors[idx])
def test3(): root_dir = '../../data/classifier_car/train' transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((227, 227)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) train_data_set = CustomClassifierDataset(root_dir, transform=transform) data_loader = DataLoader(train_data_set, batch_size=128, num_workers=8, drop_last=True) inputs, targets, cache_dicts = next(data_loader.__iter__()) print(targets) print(inputs.shape)
class Loader(object): def __init__(self, dataset_ident, file_path='', download=False, batch_size=128, train_transform=digit_five_train_transforms(), test_transform=digit_five_test_transforms(), target_transform=None, use_cuda=False): kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {} loader_map = { # 'MNIST': MNIST, 'MNISTM': MNISTM, # 'SVHN': SVHN, # 'SYN': SYN, # 'USPS': USPS, # 'MNISTC': MNISTC, } num_class = { # 'MNIST': 10, 'MNISTM': 10, # 'SVHN': 10, # 'SYN': 10, # 'USPS': 10, # 'MNISTC': 10, } # Get the datasets self.train_dataset, self.test_dataset = self.get_dataset(loader_map[dataset_ident], file_path, download, train_transform, test_transform, target_transform) # Set the loaders self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True, **kwargs) self.test_loader = DataLoader(self.test_dataset, batch_size=batch_size, shuffle=False, **kwargs) tmp_batch = self.train_loader.__iter__().__next__()[0] self.img_shape = list(tmp_batch.size())[1:] self.num_class = num_class[dataset_ident] @staticmethod def get_dataset(dataset, file_path, download, train_transform, test_transform, target_transform): # Training and Validation datasets train_dataset = dataset(file_path, train=True, download=download, transform=train_transform, target_transform=target_transform) test_dataset = dataset(file_path, train=False, download=download, transform=test_transform, target_transform=target_transform) return train_dataset, test_dataset