Beispiel #1
0
class FiDataLoader(object):
	def __init__(self, opt, dataset):
		super(FiDataLoader, self).__init__()

		if opt.shuffle:
			data_sampler = RandomSampler(dataset)
		else:
			data_sampler = None

		if opt.mode == 'train':
			self.data_loader = DataLoader(
					dataset, batch_size=opt.batch_size, shuffle=(data_sampler is None),
					num_workers=opt.workers, pin_memory=True, sampler=data_sampler)
		else:
			self.data_loader = DataLoader(
					dataset, batch_size=opt.batch_size, shuffle=False,
					num_workers=opt.workers, pin_memory=True, sampler=data_sampler)
		self.dataset = dataset
		self.data_iter = self.data_loader.__iter__()

	def next_batch(self):
		try:
			batch = self.data_iter.__next__()
		except StopIteration:
			self.data_iter = self.data_loader.__iter__()
			batch = self.data_iter.__next__()

		return batch
Beispiel #2
0
    def getEpisodeData(self, support_sampler, query_sampler):
        k, qk, n, N = self.readParams()

        support_loader = DataLoader(
            self.Dataset,
            batch_size=k * n,
            sampler=support_sampler,
            collate_fn=batchSequenceWithoutPad)  # getBatchSequenceFunc())

        supports, support_labels, support_lens = support_loader.__iter__(
        ).next()

        self.SupSeqLenCache = support_lens

        if query_sampler:
            query_loader = DataLoader(
                self.Dataset,
                batch_size=qk * n,
                sampler=query_sampler,
                collate_fn=batchSequenceWithoutPad)  # getBatchSequenceFunc())

            queries, query_labels, query_lens = query_loader.__iter__().next()

            self.QueSeqLenCache = query_lens

            return supports, support_labels, queries, query_labels

        else:
            return supports, support_labels
Beispiel #3
0
    def setup(self, stage=None):
        if stage == 'fit' or stage is None:
            dset = DataLoader(
                datasets.MNIST(self.root,
                               train=True,
                               transform=self.transforms), 100000)
            x_train, y_train = next(dset.__iter__())
            x_train, x_val, y_train, y_val = train_test_split(
                x_train,
                y_train,
                train_size=int(50000 * self.train_fraction),
                test_size=10000)

            mnist_train = ContDataset(x_train, y_train, transforms=None)
            mnist_train.set_task(self.task_list)
            self.train = mnist_train

            mnist_val = ContDataset(x_val, y_val, transforms=None)
            mnist_val.set_task(self.task_list)
            self.val = mnist_val
        if stage == 'test' or stage is None:
            dset = DataLoader(
                datasets.MNIST(self.root,
                               train=False,
                               transform=self.transforms), 100000)
            x_test, y_test = next(dset.__iter__())
            self.test = ContDataset(x_test, y_test, transforms=None)
Beispiel #4
0
class InfiniteDataloader():
    def __init__(self,
                 dataset,
                 batch_size,
                 collate_fn,
                 num_workers,
                 num_steps,
                 weights=None,
                 batch_sampler=None):
        self.dataset = dataset
        self.num_steps = num_steps
        if batch_sampler is not None:
            self.dataloader = DataLoader(dataset,
                                         batch_sampler=batch_sampler,
                                         collate_fn=collate_fn,
                                         num_workers=num_workers)
        else:
            if weights is None:
                self.dataloader = DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             drop_last=True,
                                             collate_fn=collate_fn,
                                             num_workers=num_workers)
            else:
                print("USING PROVIDED SAMPLE WEIGHTS")
                print("WEIGHTS SHAPE: ", weights.shape)
                sampler = torch.utils.data.sampler.WeightedRandomSampler(
                    weights, len(weights))
                self.dataloader = DataLoader(dataset,
                                             batch_size=batch_size,
                                             sampler=sampler,
                                             drop_last=True,
                                             collate_fn=collate_fn,
                                             num_workers=num_workers)
        self.iter = self.dataloader.__iter__()

    def __iter__(self):
        self.count = 0
        return self

    def __len__(self):
        return self.num_steps

    def __next__(self):
        self.count += 1
        if self.count == self.num_steps:
            raise StopIteration
        else:
            try:
                data = next(self.iter)
            except StopIteration:
                self.iter = self.dataloader.__iter__()
                data = next(self.iter)
            return data
Beispiel #5
0
    def getEpisodeData(self, support_sampler, query_sampler):
        k, qk, n, N = self.readParams()

        support_loader = DataLoader(self.Dataset,
                                    batch_size=k * n,
                                    sampler=support_sampler)
        query_loader = DataLoader(self.Dataset,
                                  batch_size=qk * n,
                                  sampler=query_sampler)

        supports, support_labels = support_loader.__iter__().next()
        queries, query_labels = query_loader.__iter__().next()

        return supports, support_labels, queries, query_labels
Beispiel #6
0
 def init_models(self, train_dataset):
     run_iter = DataLoader(train_dataset,
                           batch_size=4,
                           shuffle=False,
                           pin_memory=False)
     init_batch = next(run_iter.__iter__())
     self.forward(init_batch[0], init_batch[1], torch.device('cpu'))
class Generator(object):
    ''' 
        DATA_DIR: Path to folder containing folder of images "/root/folder/img.jpg"
        
            - See: https://pytorch.org/vision/stable/datasets.html#imagefolder
        
        BATCH_SIZE: Size of batches yielded by iterator
        
        dim: Desired single integer square dimensions of images
        
    '''
    def __init__(self, DATA_DIR: str, BATCH_SIZE: int, dim: int = 256):
        self.dir = DATA_DIR
        self.batch_size = BATCH_SIZE
        self.dim = dim
        data_transformer = Compose(
            [Resize((dim, dim)),
             ToTensor(),
             Lambda(lambda x: x.mul(dim))])

        self.train = ImageFolder(DATA_DIR, data_transformer)
        self.train_loader = DataLoader(self.train, batch_size=BATCH_SIZE)

    def __iter__(self):
        return self.train_loader.__iter__()
Beispiel #8
0
class RandomIdSamplerTest(unittest.TestCase):
    def setUp(self):
        self.batch_id = 4
        self.batch_image = 16
        self.data_source = Market1501(root + '/bounding_box_train',
                                      transform=ToTensor())
        self.sampler = RandomIdSampler(self.data_source,
                                       batch_image=self.batch_image)
        self.data_loader = DataLoader(self.data_source,
                                      sampler=self.sampler,
                                      batch_size=self.batch_id *
                                      self.batch_image)

    @patch('random.shuffle', lambda x: x)
    @patch('random.sample', lambda population, k: population[:k])
    def test_sampler(self):
        imgs = [img for img in self.sampler]
        self.assertEqual(range(16), imgs[:16])
        self.assertEqual(
            range(46, 53) + range(46, 53) + range(46, 48), imgs[16:32])

    @patch('random.shuffle', lambda x: x)
    @patch('random.sample', lambda population, k: population[:k])
    def test_data_loader(self):
        it = self.data_loader.__iter__()

        _, target = next(it)
        self.assertEqual([0] * 16 + [1] * 16 + [2] * 16 + [3] * 16,
                         target.numpy().tolist())

        _, target = next(it)
        self.assertEqual([4] * 16 + [5] * 16 + [6] * 16 + [7] * 16,
                         target.numpy().tolist())
Beispiel #9
0
        def test(self):
            """Test."""
            class Dset(Dataset):
                def __len__(self):
                    return 100

                def __getitem__(self, i):
                    return i

            data = Dset()
            sampler = StatefulSampler(data, shuffle=True)
            dl = DataLoader(data, sampler=sampler, batch_size=2, num_workers=2)
            used_inds = []
            diter = dl.__iter__()
            for _ in range(10):
                batch = diter.__next__()
                used_inds.extend(batch.tolist())
            state = sampler.state_dict(diter)

            sampler = StatefulSampler(data, shuffle=True)
            sampler.load_state_dict(state)
            dl = DataLoader(data, sampler=sampler, batch_size=2, num_workers=2)
            for batch in dl:
                used_inds.extend(batch.tolist())
            assert len(used_inds) == 100
            assert len(set(used_inds)) == 100
def test(config):
    """Test point cloud data loader.
  """
    from torch.utils.data import DataLoader
    from lib.utils import Timer
    timer = Timer()
    DatasetClass = StanfordVoxelization2cmDataset
    transformations = [
        t.RandomHorizontalFlip(DatasetClass.ROTATION_AXIS,
                               DatasetClass.IS_TEMPORAL),
        t.ChromaticAutoContrast(),
        t.ChromaticTranslation(config.data_aug_color_trans_ratio),
        t.ChromaticJitter(config.data_aug_color_jitter_std),
        t.HueSaturationTranslation(config.data_aug_hue_max,
                                   config.data_aug_saturation_max),
    ]

    dataset = DatasetClass(config,
                           input_transform=t.Compose(transformations),
                           augment_data=True,
                           cache=True,
                           elastic_distortion=True)

    data_loader = DataLoader(
        dataset=dataset,
        collate_fn=t.cfl_collate_fn_factory(limit_numpoints=False),
        batch_size=4,
        shuffle=True)

    # Start from index 1
    iter = data_loader.__iter__()
    for i in range(100):
        timer.tic()
        data = iter.next()
        print(timer.toc())
Beispiel #11
0
class RandomIdSamplerTest(unittest.TestCase):
    def setUp(self):
        self.batch_id = 4
        self.batch_image = 16
        self.data_source = Market1501(root + '/bounding_box_train', transform=ToTensor())
        self.sampler = RandomIdSampler(self.data_source, batch_image=self.batch_image)
        self.data_loader = DataLoader(self.data_source,
                                      sampler=self.sampler, batch_size=self.batch_id * self.batch_image)

    @patch('random.shuffle', lambda x: x)
    @patch('random.sample', lambda population, k: population[:k])
    def test_sampler(self):
        imgs = [img for img in self.sampler]
        self.assertEqual(range(16), imgs[:16])
        self.assertEqual(range(46, 53) + range(46, 53) + range(46, 48), imgs[16:32])

    @patch('random.shuffle', lambda x: x)
    @patch('random.sample', lambda population, k: population[:k])
    def test_data_loader(self):
        it = self.data_loader.__iter__()

        _, target = next(it)
        self.assertEqual([0] * 16 + [1] * 16 + [2] * 16 + [3] * 16, target.numpy().tolist())

        _, target = next(it)
        self.assertEqual([4] * 16 + [5] * 16 + [6] * 16 + [7] * 16, target.numpy().tolist())
class CelebAAugmentLoader(object):
    """
    loader for the CELEB-A dataset 40: 218-30, 15:178-15
    """
    def __init__(self, file_path, batch_size, valid_size, crop, shuffle,
                 use_cuda):

        kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}

        transform_list = []
        if crop:
            transform_list.append(transforms.CenterCrop(128))

        transform_list.append(transforms.Resize((64, 64)))
        transform_list.append(transforms.ToTensor())
        transform_list.append(
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)))

        transform = transforms.Compose(transform_list)

        train_dataset, test_dataset = self.get_dataset(file_path, transform)

        # Set the samplers
        num_train = len(train_dataset)
        indices = list(range(num_train))
        split = int(np.floor(valid_size * num_train))

        if shuffle:
            np.random.shuffle(indices)

        train_idx, valid_idx = indices[split:], indices[:split]
        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetRandomSampler(valid_idx)

        # Set the loaders
        self.train_loader = DataLoader(train_dataset,
                                       batch_size=batch_size,
                                       sampler=train_sampler,
                                       **kwargs)
        self.test_loader = DataLoader(test_dataset,
                                      batch_size=batch_size,
                                      sampler=valid_sampler,
                                      **kwargs)

        tmp_batch = self.train_loader.__iter__().__next__()[0]
        self.img_shape = list(tmp_batch.size())[1:]

    @staticmethod
    def get_dataset(file_path, transform):

        augment_transforms = get_augment_transforms()

        train_dataset = ImageFolderPair(augment_transforms, file_path,
                                        transform)
        test_dataset = ImageFolderPair(augment_transforms, file_path,
                                       transform)

        return train_dataset, test_dataset
Beispiel #13
0
 def calculate_sum_of_exemplar(self, exemplar):
     dl = DataLoader(exemplar,
                     shuffle=False,
                     batch_size=exemplar.data.shape[0])
     data = dl.__iter__().__next__()[1]
     data = data.cuda(self.device_num) if self.use_gpu else data
     with torch.no_grad():
         soe = self.discriminator(data, classify=True).sum(dim=0)
     return soe
Beispiel #14
0
 def _iter():
     # wrap original iterator into an interruptible one
     for batch in DataLoader.__iter__(self):
         if self.interrupted:
             # if flagged as interrupted, return immediately, which would end the
             # BackgroundGenerator
             return
         # otherwise, yield data batch as normal
         yield batch
 def test_handle_series_id(self):
     """Tests the handle_series_id method
     """
     mse1 = MSELoss()
     d1 = DataLoader(self.data_loader, batch_size=2)
     d = DecoderTransformer(3, 8, 4, 128, 20, 0.2, 1, {}, seq_num1=3, forecast_length=1)
     x, y = d1.__iter__().__next__()
     l1 = handle_csv_id_output(x, y, d, mse1)
     self.assertGreater(l1, 0)
Beispiel #16
0
class Loader(object):
    def __init__(self, dataset_ident, file_path, download, shuffle, batch_size, data_transform, target_transform, use_cuda):

        kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}

        # set the dataset
        # NOTE: will need a refractor one we load more different datasets, that require custom classes
        loader_map = {
            'mnist': datasets.MNIST,
            'MNIST': datasets.MNIST,
            'FashionMNIST': datasets.FashionMNIST,
            'fashion': datasets.FashionMNIST
        }

        num_class = {
            'mnist': 10,
            'MNIST': 10,
            'fashion': 10,
            'FashionMNIST': 10
        }

        # Get the datasets
        train_dataset, test_dataset = self.get_dataset(loader_map[dataset_ident], file_path, download,
                                                       data_transform, target_transform)
        # Set the loaders
        self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle, **kwargs)
        self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs)

        # infer and set size, idea from:
        # https://github.com/jramapuram/helpers/
        tmp_batch, _ = self.train_loader.__iter__().__next__()
        self.img_shape = list(tmp_batch.size())[1:]
        self.num_class = num_class[dataset_ident]
        self.batch_size = batch_size

    @staticmethod
    def get_dataset(dataset, file_path, download, data_transform, target_transform):

        # Check for transform to be None, a single item, or a list
        # None -> default to transform_list = [transforms.ToTensor()]
        # single item -> list
        if not data_transform:
            data_transform = [transforms.ToTensor()]
        elif not isinstance(data_transform, list):
            data_transform = list(data_transform)

        # Training and Validation datasets
        train_dataset = dataset(file_path, train=True, download=download,
                                transform=transforms.Compose(data_transform),
                                target_transform=target_transform)

        test_dataset = dataset(file_path, train=False, download=download,
                               transform=transforms.Compose(data_transform),
                               target_transform=target_transform)

        return train_dataset, test_dataset
Beispiel #17
0
    def build(self):

        # print("Building DataIterator...")

        dataloader = DataLoader(self.dataset,
                                batch_size=self.batch_size,
                                shuffle=True,
                                num_workers=0,
                                drop_last=True)
        self.dataiter = dataloader.__iter__()
Beispiel #18
0
class NodeDataLoader:
    """PyTorch dataloader for batch-iterating over a set of nodes, generating the list
    of blocks as computation dependency of the said minibatch.

    Parameters
    ----------
    g : DGLGraph
        The graph.
    nids : Tensor or dict[ntype, Tensor]
        The node set to compute outputs.
    block_sampler : dgl.dataloading.BlockSampler
        The neighborhood sampler.
    kwargs : dict
        Arguments being passed to :py:class:`torch.utils.data.DataLoader`.

    Examples
    --------
    To train a 3-layer GNN for node classification on a set of nodes ``train_nid`` on
    a homogeneous graph where each node takes messages from all neighbors (assume
    the backend is PyTorch):

    >>> sampler = dgl.dataloading.NeighborSampler([None, None, None])
    >>> dataloader = dgl.dataloading.NodeDataLoader(
    ...     g, train_nid, sampler,
    ...     batch_size=1024, shuffle=True, drop_last=False, num_workers=4)
    >>> for input_nodes, output_nodes, blocks in dataloader:
    ...     train_on(input_nodes, output_nodes, blocks)
    """
    collator_arglist = inspect.getfullargspec(NodeCollator).args

    def __init__(self, g, nids, block_sampler, **kwargs):
        collator_kwargs = {}
        dataloader_kwargs = {}
        for k, v in kwargs.items():
            if k in self.collator_arglist:
                collator_kwargs[k] = v
            else:
                dataloader_kwargs[k] = v
        self.collator = NodeCollator(g, nids, block_sampler, **collator_kwargs)
        if isinstance(g, DistGraph):
            _remove_kwargs_dist(dataloader_kwargs)
            self.dataloader = DistDataLoader(self.collator.dataset,
                                             collate_fn=self.collator.collate,
                                             **dataloader_kwargs)
        else:
            self.dataloader = DataLoader(self.collator.dataset,
                                         collate_fn=self.collator.collate,
                                         **dataloader_kwargs)

    def __next__(self):
        return self.dataloader.__next()

    def __iter__(self):
        return self.dataloader.__iter__()
def load_and_display(num_epochs=10, learning_rate=0.001, dropout_p=0.0):
    """
    Loads the saved model, invokes the dataloader, randomly evaluates on 5 samples 
    of the test set and displys the images with the true and predicted labels.

    Parameters
    ----------
        num_epochs : int, optional (default: ``10``)
            Number of epochs to run
        learning_rate : float, optional (default: ``0.001``)
            learning rate for the optimizer
        dropout_p : float, optional (default: ``0.0``)
            Dropout probability for the network layers

    Returns
    -------
    None
    """

    model_filename = f'ne{num_epochs}lr{learning_rate}dp{dropout_p}'
    model = CNNModel().cuda()
    model.load_state_dict(
        torch.load(os.path.join(SAVED_MODEL_DIR, f'{model_filename}.pth')))
    model.eval()

    test_set = datasets.FashionMNIST(root=DATASET_ROOT,
                                     train=False,
                                     transform=transforms.ToTensor())

    sampler = RandomSampler(data_source=test_set)
    test_loader = DataLoader(dataset=test_set, batch_size=5, sampler=sampler)

    images, labels = next(test_loader.__iter__())
    images = images.cuda()
    labels = labels.cuda()

    logits = model(images)
    probs = F.softmax(logits.data, dim=1)
    _, predicted = torch.max(probs, 1)

    # Invert the dictionary class_to_idx to idx_to_class
    idx_to_class = {v: k for k, v in test_set.class_to_idx.items()}

    fig, axex = plt.subplots(1, 5, figsize=(25, 25))

    zip_gen = axex.ravel(), predicted, labels, images.cpu().numpy().squeeze()
    for ax, predicted_class, label_class, img in zip(*zip_gen):
        ax.imshow(img,
                  cmap='gray' if predicted_class == label_class else 'autumn')
        ax.axis('off')
        ax.set_title('Predicted: {} | True: {}'.format(
            idx_to_class[predicted_class.item()],
            idx_to_class[label_class.item()]))
Beispiel #20
0
class MNISTDataloader(object):
    """ dataloader for MNIST """
    def __init__(self, dataset, opt):
        super().__init__()
        kwargs = {'num_workers': opt.num_workers}

        self.data_loader = DataLoader(dataset,
                                      batch_size=opt.batch_size,
                                      shuffle=True,
                                      **kwargs)

        self.data_iter = self.data_loader.__iter__()
Beispiel #21
0
def dataloader_prefetch_batches(
    dataloader: DataLoader,
    device: Union[None, str, torch.device] = torch.cuda.current_device()
) -> DataLoader:
    """ Monkey-patch `__iter__` method of given `torch.utils.data.DataLoader` in order to prefetch next data batch to device memory during computing/training on previous batch.
    NOTE: In order to data batches being prefetched (to GPU memory), set `pin_memory` to `True` when instanciating DataLoader and provide a `device` which is not 'cpu' nor `None` (e.g. 'gpu').
    NOTE: You won't need to move tensors batches from given dataloader to GPU device memory anymore; i.e., wont need to call `x.to(device)` before computing/training ops.
    Args:
        - dataloader: DataLoader which will be patched in order to prefetch batches (`dataloader.__iter__().__next__` will be monkey-patched)
        - device: Torch device to which data batches are prefetched. If `None` or 'cpu', then batch prefetching is disabled and no changes are made to `dataloader`
    Returns given `dataloader` which will be patched in order to prefetch batches if `device` isn't `None` nor 'cpu' and if `dataloader.pin_memory` is `True` (otherwise returns given DataLoader without any modifications)
    """
    if not dataloader.pin_memory:
        logging.warn(
            f'Warning: DataLoader wont prefetch data batches: set `pin_memory=True` in your DataLoader when instanciating `{type(dataloader).__name__}`'
        )
    elif device is None or device == 'cpu' or (
            isinstance(device, torch.device) and device.type
            == 'cpu'):  # TODO: condition this on 'cuda' instead?
        logging.warn(
            f'Warning: DataLoader wont prefetch data batches as given `device` argument is `{device}` when prefetching is aimed at GPU(s).'
        )
    else:

        @functools.wraps(dataloader.__iter__)
        def __iter__patch(self: DataLoader, *args, **kwargs):
            nonlocal device
            iterator = self.__iter__(*args, **kwargs)
            iterator._prefetched_batch = iterator.__next__().to(
                device=device, non_blocking=True)

            @functools.wraps(iterator.__next__)
            def __next__patch(iterator_self: Iterable) -> Any:
                nonlocal device
                if isinstance(iterator_self._prefetched_batch, StopIteration):
                    raise iterator_self._prefetched_batch
                else:
                    batch = iterator_self._prefetched_batch
                    try:
                        iterator_self._prefetched_batch = iterator_self.__next__(
                        ).to(device=device, non_blocking=True)
                    except StopIteration as e:
                        # Catch `StopIteration` to raise it later (during following call to `__next__`)
                        iterator_self._prefetched_batch = e
                    return batch

            iterator.__next__ = __next__patch
            return iterator

        dataloader.__iter__ = __iter__patch
        dataloader.__iter__._prefetched_batch = None
    return dataloader
Beispiel #22
0
class DiscriminatorOverfitMonitor(Listener):
    def __init__(self, train_dataset, validation_dataset, n_images, args):
        super().__init__()
        self.n_images = n_images
        self.cuda = args.cuda
        self.train_dataset = train_dataset
        self.validation_dataset = validation_dataset
        self.tloader = DataLoader(validation_dataset, self.n_images, True)
        self.vloader = DataLoader(validation_dataset, self.n_images, True)

    def initialize(self):
        self.x_train = self.tloader.__iter__().__next__()[0]
        if self.cuda:
            self.x_train = self.x_train.cuda()

        # Remove the loader since we got our test images
        del self.tloader

        self.x_valid = self.vloader.__iter__().__next__()[0]
        if self.cuda:
            self.x_valid = self.x_valid.cuda()

        # Remove the loader since we got our test images
        del self.vloader

    def report(self, state_dict):
        if "D" in state_dict["networks"]:
            D = state_dict["networks"]["D"]
        else:
            raise ValueError(
                "Could not find a discriminator network in the state dict!")

        d_train = D(self.x_train).mean().detach().cpu().item()
        d_valid = D(self.x_valid).mean().detach().cpu().item()

        print("D(train) mean: ", d_train)
        print("D(valid) mean: ", d_valid)
        print()
Beispiel #23
0
def main():
    # Hyper-parameters
    NUM_TRAIN = 51200
    NUM_VAL = 5120
    NOISE_DIM = 96
    batch_size = 128

    # Load MNIST dataset if available, otherwise download the dataset
    mnist_train = dset.MNIST('./data',
                             train=True,
                             download=True,
                             transform=T.ToTensor())
    loader_train = DataLoader(mnist_train,
                              batch_size=batch_size,
                              sampler=ChunkSampler(NUM_TRAIN, 0))
    mnist_val = dset.MNIST('./data',
                           train=True,
                           download=True,
                           transform=T.ToTensor())
    loader_val = DataLoader(mnist_val,
                            batch_size=batch_size,
                            sampler=ChunkSampler(NUM_VAL, NUM_TRAIN))

    # Visualize training images for the reference
    imgs = loader_train.__iter__().next()[0].view(batch_size,
                                                  784).numpy().squeeze()
    show_images(imgs)

    # Build the DCGAN network
    dtype = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.FloatTensor

    D_DC = build_discriminator(batch_size).type(dtype)
    D_DC.apply(initialize_weights)
    G_DC = build_generator(batch_size, NOISE_DIM).type(dtype)
    G_DC.apply(initialize_weights)

    D_DC_solver = get_optimizer(D_DC)
    G_DC_solver = get_optimizer(G_DC)

    # Train the network
    run_a_gan(D_DC,
              G_DC,
              D_DC_solver,
              G_DC_solver,
              discriminator_loss,
              generator_loss,
              loader_train,
              dtype,
              num_epochs=8)
Beispiel #24
0
def main():

    NUM_TRAIN = 50000
    NUM_VAL = 5000

    NOISE_DIM = 96
    batch_size = 128

    mnist_train = dset.MNIST('./data',
                             train=True,
                             download=True,
                             transform=T.ToTensor())
    loader_train = DataLoader(mnist_train,
                              batch_size=batch_size,
                              sampler=ChunkSampler(NUM_TRAIN, 0))

    mnist_val = dset.MNIST('./data',
                           train=True,
                           download=True,
                           transform=T.ToTensor())
    loader_val = DataLoader(mnist_val,
                            batch_size=batch_size,
                            sampler=ChunkSampler(NUM_VAL, NUM_TRAIN))

    imgs = loader_train.__iter__().next()[0].view(batch_size,
                                                  784).numpy().squeeze()
    show_images(imgs)
    #plt.show()
    plt.close()

    dtype = torch.FloatTensor
    dtype = torch.cuda.FloatTensor  ## UNCOMMENT THIS LINE IF YOU'RE ON A GPU!

    D_DC = build_discriminator(batch_size).type(dtype)
    D_DC.apply(initialize_weights)
    G_DC = build_generator(batch_size, NOISE_DIM).type(dtype)
    G_DC.apply(initialize_weights)

    D_DC_solver = get_optimizer(D_DC)
    G_DC_solver = get_optimizer(G_DC)

    run_a_gan(D_DC,
              G_DC,
              D_DC_solver,
              G_DC_solver,
              discriminator_loss,
              generator_loss,
              loader_train,
              dtype,
              num_epochs=5)
Beispiel #25
0
 def _extract_data(self, dataset, sampler):
     data_loader = DataLoader(dataset,
                              batch_size=len(dataset),
                              sampler=sampler)
     assert len(data_loader) == 1, 'data loader should have size 1'
     sample = next(data_loader.__iter__()
                   )  # dataloader takes one (sub)set of the dataset
     ratings, user_ids, item_ids, item_metadata, item_text = self.dataloader_extract(
         sample)
     assert ratings.size() != torch.Size([]), 'ratings size empty'
     assert len(
         ratings
     ) >= self.batch_size, 'not enough ratings compared to batch size'
     return ratings, user_ids, item_ids, item_metadata, item_text
Beispiel #26
0
class data_gen:
    def __init__(self,
                 batch_size,
                 dataset,
                 label_path,
                 vocab_path,
                 dict_path,
                 train_percent=0.7,
                 num_workers=8):
        train_data = dataset(label_path, vocab_path, dict_path, True,
                             train_percent)
        self.train_loader = DataLoader(train_data,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=num_workers)
        valid_data = dataset(label_path, vocab_path, dict_path, False,
                             train_percent)
        self.valid_loader = DataLoader(valid_data,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=1)

    def train(self):
        images, encoded_captions, caption_lengths = self.train_loader.__iter__(
        ).__next__()
        images = images.to(device).float()
        encoded_captions = encoded_captions.to(device).long()
        caption_lengths = caption_lengths.to(device).long()
        return images, encoded_captions, caption_lengths

    def valid(self):
        images, encoded_captions, caption_lengths = self.valid_loader.__iter__(
        ).__next__()
        images = images.to(device).float()
        encoded_captions = encoded_captions.to(device).long()
        caption_lengths = caption_lengths.to(device).long()
        return images, encoded_captions, caption_lengths
Beispiel #27
0
class LoaderCIFAR(object):
    def __init__(self, file_path, download, batch_size, mu, use_cuda):

        kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
        self.mu = mu

        # Get the datasets
        train_labeled_dataset, train_unlabeled_dataset, test_dataset = self.get_dataset(file_path, download)

        # Set the samplers
        num_samples = len(train_labeled_dataset)
        sampler_labeled = RandomSampler(train_labeled_dataset, replacement=True, num_samples=num_samples)
        sampler_unlabeled = RandomSampler(train_unlabeled_dataset, replacement=True, num_samples=self.mu * num_samples)

        batch_sampler_labeled = BatchSampler(sampler_labeled, batch_size=batch_size, drop_last=False)
        batch_sampler_unlabeled = BatchSampler(sampler_unlabeled, batch_size=self.mu * batch_size, drop_last=False)

        # Set the loaders
        self.train_labeled = DataLoader(train_labeled_dataset, batch_sampler=batch_sampler_labeled, **kwargs)
        self.train_unlabeled = DataLoader(train_unlabeled_dataset, batch_sampler=batch_sampler_unlabeled, **kwargs)

        self.test = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, **kwargs)

        tmp_batch = self.test.__iter__().__next__()[0]
        self.img_shape = list(tmp_batch.size())[1:]

    @staticmethod
    def get_dataset(file_path, download):

        # transforms
        weak_transform = cifar_weak_transforms()
        strong_transform = cifar_strong_transforms()
        test_transform = cifar_test_transforms()

        # Training and Validation datasets
        train_labeled_dataset = CIFAR10(root=file_path, train=True, download=download,
                                        transform=weak_transform,
                                        target_transform=None)
        train_unlabeled_dataset = CIFAR10C(weak_transform=weak_transform, strong_transform=strong_transform,
                                           root=file_path, train=True, download=download,
                                           transform=None,
                                           target_transform=None)

        test_dataset = CIFAR10(root=file_path, train=False, download=download,
                               transform=test_transform,
                               target_transform=None)

        return train_labeled_dataset, train_unlabeled_dataset, test_dataset
Beispiel #28
0
def test(config, intensity=False):
    """Test point cloud data loader.
  """
    from torch.utils.data import DataLoader
    from lib.utils import Timer
    import open3d as o3d

    def make_pcd(coords, feats):
        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(coords[:, :3].float().numpy())
        pcd.colors = o3d.utility.Vector3dVector(feats[:, :3].numpy() / 255)
        if intensity:
            pcd.intensities = o3d.utility.Vector3dVector(feats[:, 3:3].numpy())
        return pcd

    timer = Timer()
    DatasetClass = FacilityArea5Dataset
    transformations = [
        t.RandomHorizontalFlip(DatasetClass.ROTATION_AXIS,
                               DatasetClass.IS_TEMPORAL),
        t.ChromaticAutoContrast(),
        t.ChromaticTranslation(config.data_aug_color_trans_ratio),
        t.ChromaticJitter(config.data_aug_color_jitter_std),
    ]

    dataset = DatasetClass(config,
                           prevoxel_transform=t.ElasticDistortion(
                               DatasetClass.ELASTIC_DISTORT_PARAMS),
                           input_transform=t.Compose(transformations),
                           augment_data=True,
                           cache=True,
                           elastic_distortion=True)

    data_loader = DataLoader(
        dataset=dataset,
        collate_fn=t.cfl_collate_fn_factory(limit_numpoints=False),
        batch_size=1,
        shuffle=True)

    # Start from index 1
    iter = data_loader.__iter__()
    for i in range(100):
        timer.tic()
        coords, feats, labels = iter.next()
        pcd = make_pcd(coords, feats)
        o3d.visualization.draw_geometries([pcd])
        print(timer.toc())
Beispiel #29
0
def test() -> None:
    dataset = YoloDataset(table, anchors, shuffle=True)
    train_loader = DataLoader(dataset, batch_size=1, shuffle=False)
    myloss = YoloLoss()
    itr = train_loader.__iter__()
    image, targets = next(itr)
    b_, c_, h_, w_ = image.shape

    fakes = targets.copy()
    for idx, f in enumerate(fakes):
        b, c, h, w, m = f.shape
        index = f.reshape(-1, 6)[..., 5:].type(torch.int64)
        y = torch.zeros((b * c * h * w, 3)).scatter_(-1, index, 1)
        y = y.reshape((b, c, h, w, -1))
        pred = torch.cat([f[..., :5], y], dim=-1)
        pred[..., 3:4] /= w_
        pred[..., 4:5] /= h_
        loss = myloss.forward(pred, targets[idx], anchors[idx])
def test3():
    root_dir = '../../data/classifier_car/train'
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((227, 227)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_data_set = CustomClassifierDataset(root_dir, transform=transform)
    data_loader = DataLoader(train_data_set,
                             batch_size=128,
                             num_workers=8,
                             drop_last=True)

    inputs, targets, cache_dicts = next(data_loader.__iter__())
    print(targets)
    print(inputs.shape)
Beispiel #31
0
class Loader(object):
    def __init__(self, dataset_ident, file_path='', download=False, batch_size=128, train_transform=digit_five_train_transforms(), test_transform=digit_five_test_transforms(), target_transform=None, use_cuda=False):

        kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}

        loader_map = {
            # 'MNIST': MNIST,
            'MNISTM': MNISTM,
            # 'SVHN': SVHN,
            # 'SYN': SYN,
            # 'USPS': USPS,
            # 'MNISTC': MNISTC,
        }

        num_class = {
            # 'MNIST': 10,
            'MNISTM': 10,
            # 'SVHN': 10,
            # 'SYN': 10,
            # 'USPS': 10,
            # 'MNISTC': 10,
        }

        # Get the datasets
        self.train_dataset, self.test_dataset = self.get_dataset(loader_map[dataset_ident], file_path, download,
                                                       train_transform, test_transform, target_transform)
        # Set the loaders
        self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
        self.test_loader = DataLoader(self.test_dataset, batch_size=batch_size, shuffle=False, **kwargs)
        tmp_batch = self.train_loader.__iter__().__next__()[0]
        self.img_shape = list(tmp_batch.size())[1:]
        self.num_class = num_class[dataset_ident]

    @staticmethod
    def get_dataset(dataset, file_path, download, train_transform, test_transform, target_transform):
        # Training and Validation datasets
        train_dataset = dataset(file_path, train=True, download=download,
                                transform=train_transform,
                                target_transform=target_transform)
        test_dataset = dataset(file_path, train=False, download=download,
                               transform=test_transform,
                               target_transform=target_transform)
        return train_dataset, test_dataset