def __init__(self, cuts: CutSet, return_cuts: bool = False, cut_transforms: List[Callable[[CutSet], CutSet]] = None, input_transforms: List[Callable[[torch.Tensor], torch.Tensor]] = None, input_strategy: InputStrategy = PrecomputedFeatures()): """ K2 ASR IterableDataset constructor. :param cuts: the ``CutSet`` to sample data from. :param return_cuts: When ``True``, will additionally return a "cut" field in each batch with the Cut objects used to create that batch. :param cut_transforms: A list of transforms to be applied on each sampled batch, before converting cuts to an input representation (audio/features). Examples: cut concatenation, noise cuts mixing, etc. :param input_transforms: A list of transforms to be applied on each sampled batch, after the cuts are converted to audio/features. Examples: normalization, SpecAugment, etc. :param input_strategy: Converts cuts into a collated batch of audio/features. By default, reads pre-computed features from disk. """ super().__init__() # Initialize the fields self.cuts = cuts self.return_cuts = return_cuts self.cut_transforms = ifnone(cut_transforms, []) self.input_transforms = ifnone(input_transforms, []) self.input_strategy = input_strategy self._validate()
def __init__( self, return_cuts: bool = False, cut_transforms: List[Callable[[CutSet], CutSet]] = None, input_transforms: List[Callable[[torch.Tensor], torch.Tensor]] = None, input_strategy: BatchIO = PrecomputedFeatures(), ): """ k2 ASR IterableDataset constructor. :param return_cuts: When ``True``, will additionally return a "cut" field in each batch with the Cut objects used to create that batch. :param cut_transforms: A list of transforms to be applied on each sampled batch, before converting cuts to an input representation (audio/features). Examples: cut concatenation, noise cuts mixing, etc. :param input_transforms: A list of transforms to be applied on each sampled batch, after the cuts are converted to audio/features. Examples: normalization, SpecAugment, etc. :param input_strategy: Converts cuts into a collated batch of audio/features. By default, reads pre-computed features from disk. """ super().__init__() # Initialize the fields self.return_cuts = return_cuts self.cut_transforms = ifnone(cut_transforms, []) self.input_transforms = ifnone(input_transforms, []) self.input_strategy = input_strategy # This attribute is a workaround to constantly growing HDF5 memory # throughout the epoch. It regularly closes open file handles to # reset the internal HDF5 caches. self.hdf5_fix = Hdf5MemoryIssueFix(reset_interval=100)
def __init__( self, cuts: CutSet, cut_transforms: List[Callable[[CutSet], CutSet]] = None, feature_input_strategy: BatchIO = PrecomputedFeatures(), feature_transforms: Union[Sequence[Callable], Callable] = None, add_eos: bool = True, add_bos: bool = True, ) -> None: super().__init__() self.cuts = cuts self.token_collater = TokenCollater(cuts, add_eos=add_eos, add_bos=add_bos) self.cut_transforms = ifnone(cut_transforms, []) self.feature_input_strategy = feature_input_strategy if feature_transforms is None: feature_transforms = [] elif not isinstance(feature_transforms, Sequence): feature_transforms = [feature_transforms] assert all( isinstance(transform, Callable) for transform in feature_transforms), "Feature transforms must be Callable" self.feature_transforms = feature_transforms
def __init__( self, cuts: CutSet, cut_transforms: List[Callable[[CutSet], CutSet]] = None, feature_input_strategy: InputStrategy = PrecomputedFeatures(), feature_transforms: Union[Sequence[Callable], Callable] = None, add_eos: bool = True, add_bos: bool = True, ) -> None: super().__init__() validate(cuts) for cut in cuts: assert (len(cut.supervisions) == 1 ), "Only the Cuts with single supervision are supported." self.cuts = cuts self.token_collater = TokenCollater(cuts, add_eos=add_eos, add_bos=add_bos) self.cut_transforms = ifnone(cut_transforms, []) self.feature_input_strategy = feature_input_strategy if feature_transforms is None: feature_transforms = [] elif not isinstance(feature_transforms, Sequence): feature_transforms = [feature_transforms] assert all(isinstance(transform, Callable) for transform in feature_transforms), \ "Feature transforms must be Callable" self.feature_transforms = feature_transforms
def __init__( self, input_strategy: BatchIO = PrecomputedFeatures(), cut_transforms: Sequence[Callable[[CutSet], CutSet]] = None, input_transforms: Sequence[Callable[[torch.Tensor], torch.Tensor]] = None, ) -> None: super().__init__() self.input_strategy = input_strategy self.cut_transforms = ifnone(cut_transforms, []) self.input_transforms = ifnone(input_transforms, [])
def __init__( self, cuts: CutSet, input_strategy: InputStrategy = PrecomputedFeatures(), cut_transforms: Sequence[Callable[[CutSet], CutSet]] = None, input_transforms: Sequence[Callable[[torch.Tensor], torch.Tensor]] = None ) -> None: super().__init__() validate(cuts) self.cuts = cuts self.input_strategy = input_strategy self.cut_transforms = ifnone(cut_transforms, []) self.input_transforms = ifnone(input_transforms, [])