def generate_data_iterator(dataset, seed, num_workers=1, epoch=1., buffer_size=0): # check if dataset is a list: if isinstance(dataset, list): # this is a multidataset data_iterator = MultiDataIterator(dataset, seed=seed, num_workers=num_workers, epoch=epoch, buffer_size=buffer_size) else: data_iterator = DataIterator(dataset, dataset.collater, dataset.batches, seed=seed, num_workers=num_workers, epoch=epoch, buffer_size=buffer_size) return data_iterator
def generate_data_iterator(dataset, rank, world_size, seed, num_workers=1, epoch=1., buffer_size=0, fill_value=True): # check if dataset is a list: if isinstance(dataset, list): # this is a multidataset data_iterator = MultiDataIterator(dataset, seed=seed, num_workers=num_workers, epoch=epoch, buffer_size=buffer_size, num_shards=world_size, shard_id=rank, fill_value=fill_value) else: data_iterator = DataIterator(dataset, dataset.collater, dataset.batches, seed=seed, num_workers=num_workers, epoch=epoch, buffer_size=buffer_size, num_shards=world_size, shard_id=rank, fill_value=fill_value) return data_iterator