def __init__( self, datapipe: MapDataPipe, fn: Callable = default_fn, ) -> None: super().__init__() self.datapipe = datapipe check_lambda_fn(fn) self.fn = fn # type: ignore[assignment]
def __new__(cls, datapipe: IterDataPipe, num_instances: int, classifier_fn: Callable[[T_co], Optional[int]], drop_none: bool = False, buffer_size: int = 1000): if num_instances < 1: raise ValueError(f"Expected `num_instaces` larger than 0, but {num_instances} is found") check_lambda_fn(classifier_fn) # When num_instances == 1, demux can be replaced by filter, # but keep it as Demultiplexer for the sake of consistency # like throwing Error when classification result is out of o range container = _DemultiplexerIterDataPipe(datapipe, num_instances, classifier_fn, drop_none, buffer_size) return [_ChildDataPipe(container, i) for i in range(num_instances)]
def __init__( self, datapipe: IterDataPipe, filter_fn: Callable, drop_empty_batches: bool = True, ) -> None: super().__init__() self.datapipe = datapipe check_lambda_fn(filter_fn) self.filter_fn = filter_fn # type: ignore[assignment] self.drop_empty_batches = drop_empty_batches
def __init__( self, datapipe: IterDataPipe, fn: Callable, input_col=None, output_col=None, ) -> None: super().__init__() self.datapipe = datapipe check_lambda_fn(fn) self.fn = fn # type: ignore[assignment] self.input_col = input_col if input_col is None and output_col is not None: raise ValueError("`output_col` must be None when `input_col` is None.") if isinstance(output_col, (list, tuple)): if len(output_col) > 1: raise ValueError("`output_col` must be a single-element list or tuple") output_col = output_col[0] self.output_col = output_col
def __init__(self, datapipe: IterDataPipe[T_co], group_key_fn: Callable, *, buffer_size: int = 10000, group_size: Optional[int] = None, guaranteed_group_size: Optional[int] = None, drop_remaining: bool = False): check_lambda_fn(group_key_fn) self.datapipe = datapipe self.group_key_fn = group_key_fn self.buffer_size = buffer_size self.group_size = group_size self.guaranteed_group_size = None if group_size is not None and buffer_size is not None: assert 0 < group_size <= buffer_size self.guaranteed_group_size = group_size if guaranteed_group_size is not None: assert group_size is not None and 0 < guaranteed_group_size <= group_size self.guaranteed_group_size = guaranteed_group_size self.drop_remaining = drop_remaining self.wrapper_class = DataChunk