Esempio n. 1
0
 def __init__(
     self,
     datapipe: MapDataPipe,
     fn: Callable = default_fn,
 ) -> None:
     super().__init__()
     self.datapipe = datapipe
     check_lambda_fn(fn)
     self.fn = fn  # type: ignore[assignment]
Esempio n. 2
0
    def __new__(cls, datapipe: IterDataPipe, num_instances: int,
                classifier_fn: Callable[[T_co], Optional[int]], drop_none: bool = False, buffer_size: int = 1000):
        if num_instances < 1:
            raise ValueError(f"Expected `num_instaces` larger than 0, but {num_instances} is found")

        check_lambda_fn(classifier_fn)

        # When num_instances == 1, demux can be replaced by filter,
        # but keep it as Demultiplexer for the sake of consistency
        # like throwing Error when classification result is out of o range
        container = _DemultiplexerIterDataPipe(datapipe, num_instances, classifier_fn, drop_none, buffer_size)
        return [_ChildDataPipe(container, i) for i in range(num_instances)]
Esempio n. 3
0
    def __init__(
        self,
        datapipe: IterDataPipe,
        filter_fn: Callable,
        drop_empty_batches: bool = True,
    ) -> None:
        super().__init__()
        self.datapipe = datapipe
        check_lambda_fn(filter_fn)

        self.filter_fn = filter_fn  # type: ignore[assignment]
        self.drop_empty_batches = drop_empty_batches
Esempio n. 4
0
    def __init__(
        self,
        datapipe: IterDataPipe,
        fn: Callable,
        input_col=None,
        output_col=None,
    ) -> None:
        super().__init__()
        self.datapipe = datapipe

        check_lambda_fn(fn)
        self.fn = fn  # type: ignore[assignment]

        self.input_col = input_col
        if input_col is None and output_col is not None:
            raise ValueError("`output_col` must be None when `input_col` is None.")
        if isinstance(output_col, (list, tuple)):
            if len(output_col) > 1:
                raise ValueError("`output_col` must be a single-element list or tuple")
            output_col = output_col[0]
        self.output_col = output_col
Esempio n. 5
0
 def __init__(self,
              datapipe: IterDataPipe[T_co],
              group_key_fn: Callable,
              *,
              buffer_size: int = 10000,
              group_size: Optional[int] = None,
              guaranteed_group_size: Optional[int] = None,
              drop_remaining: bool = False):
     check_lambda_fn(group_key_fn)
     self.datapipe = datapipe
     self.group_key_fn = group_key_fn
     self.buffer_size = buffer_size
     self.group_size = group_size
     self.guaranteed_group_size = None
     if group_size is not None and buffer_size is not None:
         assert 0 < group_size <= buffer_size
         self.guaranteed_group_size = group_size
     if guaranteed_group_size is not None:
         assert group_size is not None and 0 < guaranteed_group_size <= group_size
         self.guaranteed_group_size = guaranteed_group_size
     self.drop_remaining = drop_remaining
     self.wrapper_class = DataChunk