Esempio n. 1
0
    def __init__(
        self,
        iterable: Iterable,
        batch_size: Optional[int] = None,
        is_shuffle: bool = True,
        transforms_dict: Dict[str, List[Transform]] = None,
        batcher=None,
        collate_fn=None,
        chunk_size: int = 1000,
        is_cycle: bool = False,
        length: Optional[int] = None,
        rank: int = 0,
        num_workers: int = 1,
    ):
        self.iterable = itertools.cycle(iterable) if is_cycle else iterable
        if num_workers > 1:
            self.iterable = shard(self.iterable, rank, num_workers)
        self.batch_size = batch_size or batcher.batch_size
        self.batcher = batcher or Batcher(self.batch_size)
        self.is_shuffle = is_shuffle
        self.transforms_dict = transforms_dict or {}
        self.collate_fun = collate_fn or default_collate_fn

        self.chunk_size = chunk_size  # num of batches per chunk
        self.is_cycle = is_cycle
        self.length = length

        self.iterable = ChunkIterator(
            self.iterable, self.chunk_size * self.batch_size, self.length
        )
Esempio n. 2
0
    def __init__(
        self,
        iterable: Iterable,
        batch_size: int = 1,
        is_shuffle: bool = True,
        transform: Optional[Union[nn.Module, Callable]] = None,
        custom_batcher: Optional[Batcher] = None,
        collate_fn: Optional[Callable] = None,
        chunk_size: Optional[int] = 1000,
        is_cycle: bool = False,
        length: Optional[int] = None,
        rank: int = 0,
        world_size: int = 1,
    ):
        self.iterable = itertools.cycle(iterable) if is_cycle else iterable
        if world_size > 1:
            logger.error(
                f"data sharding for rank: {rank}, world_size: {world_size}")
            self.iterable = shard(self.iterable, rank, world_size)
        self.batch(batch_size, custom_batcher)
        self.is_shuffle = is_shuffle
        self.transform = RowsToColumnarTransform(transform
                                                 or IdentityTransform())
        self.collate_fn = collate_fn

        self.chunk_size = chunk_size  # num of batches per chunk
        self.is_cycle = is_cycle
        self.length = length

        if self.chunk_size and self.batch_size:
            self.iterable = ChunkIterator(self.iterable,
                                          self.chunk_size * self.batch_size,
                                          self.length)
Esempio n. 3
0
    def __init__(
        self,
        iterable: Iterable,
        batch_size: Optional[int] = None,
        is_shuffle: bool = True,
        transform: Optional[Transform] = None,
        custom_batcher: Optional[Batcher] = None,
        collate_fn=None,
        chunk_size: Optional[int] = 1000,
        is_cycle: bool = False,
        length: Optional[int] = None,
        rank: int = 0,
        world_size: int = 1,
    ):
        self.iterable = itertools.cycle(iterable) if is_cycle else iterable
        if world_size > 1:
            self.iterable = shard(self.iterable, rank, world_size)
        self.batch(batch_size, custom_batcher)
        self.is_shuffle = is_shuffle
        self.transform = RowsToColumnarTransform(
            transform) or IdentityTransform()
        self.collate_fn = collate_fn

        self.chunk_size = chunk_size  # num of batches per chunk
        self.is_cycle = is_cycle
        self.length = length

        if self.chunk_size and self.batch_size:
            self.iterable = ChunkIterator(self.iterable,
                                          self.chunk_size * self.batch_size,
                                          self.length)