Ejemplo n.º 1
0
    def __init__(self,
                 cuts: CutSet,
                 max_frames: int = None,
                 max_samples: int = None,
                 max_duration: Seconds = None,
                 max_cuts: Optional[int] = None,
                 **kwargs):
        """
        SingleCutSampler's constructor.

        :param cuts: the ``CutSet`` to sample data from.
        :param max_frames: The maximum total number of feature frames from ``cuts``.
        :param max_samples: The maximum total number of audio samples from ``cuts``.
        :param max_duration: The maximum total recording duration from ``cuts``.
        :param max_cuts: The maximum number of cuts sampled to form a mini-batch.
            By default, this constraint is off.
        :param kwargs: Arguments to be passed into ``CutSampler``.
        """
        super().__init__(cuts.ids, provide_len=not cuts.is_lazy, **kwargs)
        self.cuts = cuts
        self.time_constraint = TimeConstraint(max_duration=max_duration,
                                              max_frames=max_frames,
                                              max_samples=max_samples)
        self.max_cuts = max_cuts
        assert self.time_constraint.is_active() or \
               not (self.time_constraint.is_active() and self.max_cuts is not None)
        # Constraints
        assert is_none_or_gt(self.max_cuts, 0)
Ejemplo n.º 2
0
    def __init__(
        self,
        cuts: CutSet,
        max_frames: int = None,
        max_samples: int = None,
        max_duration: Seconds = None,
        max_cuts: Optional[int] = None,
        shuffle: bool = False,
        drop_last: bool = False,
        world_size: Optional[int] = None,
        rank: Optional[int] = None,
        seed: int = 0,
    ):
        """
        SingleCutSampler's constructor.

        :param cuts: the ``CutSet`` to sample data from.
        :param max_frames: The maximum total number of feature frames from ``cuts``.
        :param max_samples: The maximum total number of audio samples from ``cuts``.
        :param max_duration: The maximum total recording duration from ``cuts``.
        :param max_cuts: The maximum number of cuts sampled to form a mini-batch.
            By default, this constraint is off.
        :param shuffle: When ``True``, the cuts will be shuffled at the start of iteration.
            Convenient when mini-batch loop is inside an outer epoch-level loop, e.g.:
            `for epoch in range(10): for batch in dataset: ...` as every epoch will see a
            different cuts order.
        :param drop_last: When ``True``, the last batch is dropped if it's incomplete.
        :param world_size: Total number of distributed nodes. We will try to infer it by default.
        :param rank: Index of distributed node. We will try to infer it by default.
        :param seed: Random seed used to consistently shuffle the dataset across different processes.
        """
        super().__init__(
            shuffle=shuffle,
            world_size=world_size,
            rank=rank,
            seed=seed,
        )
        self.data_source = DataSource(cuts)
        self.time_constraint = TimeConstraint(max_duration=max_duration,
                                              max_frames=max_frames,
                                              max_samples=max_samples)
        self.drop_last = drop_last
        self.max_cuts = max_cuts
        assert self.time_constraint.is_active() or not (
            self.time_constraint.is_active() and self.max_cuts is not None)
        # Constraints
        assert is_none_or_gt(self.max_cuts, 0)
Ejemplo n.º 3
0
 def __post_init__(self) -> None:
     assert exactly_one_not_null(*self._constraints) or all(
         x is None for x in self._constraints)
     for c in self._constraints:
         assert is_none_or_gt(c, 0)