Exemple #1
0
    def __init__(
        self,
        manifest_filepath,
        labels,
        featurizer,
        max_duration=None,
        min_duration=None,
        max_utts=0,
        blank_index=-1,
        unk_index=-1,
        normalize=True,
        trim=False,
        bos_id=None,
        eos_id=None,
        load_audio=True,
    ):
        self.collection = collections.ASRAudioText(
            manifests_files=manifest_filepath.split(','),
            parser=parsers.ENCharParser(
                labels=labels,
                unk_id=unk_index,
                blank_id=blank_index,
                do_normalize=normalize,
            ),
            min_duration=min_duration,
            max_duration=max_duration,
            max_number=max_utts,
        )

        self.featurizer = featurizer
        self.trim = trim
        self.eos_id = eos_id
        self.bos_id = bos_id
        self.load_audio = load_audio
Exemple #2
0
    def __init__(self, punct=True, spaces=False):
        labels = []
        self.space, labels = len(labels), labels + [' ']  # Space
        labels.extend(string.ascii_lowercase + "'")  # Apostrophe for saving "don't" and "Joe's"

        if punct:
            labels.extend(self.PUNCT)

        super().__init__(labels)

        self.punct = punct
        self.spaces = spaces

        self._parser = parsers.ENCharParser(labels)
Exemple #3
0
    def __init__(
        self, punct=True, spaces=False, apostrophe=True, add_blank_at="last_but_one",
    ):
        labels = []
        self.space, labels = len(labels), labels + [' ']  # Space
        labels.extend(string.ascii_lowercase)
        if apostrophe:
            labels.append("'")  # Apostrophe for saving "don't" and "Joe's"

        if punct:
            labels.extend(self.PUNCT)

        super().__init__(labels, add_blank_at=add_blank_at)

        self.punct = punct
        self.spaces = spaces

        self._parser = parsers.ENCharParser(labels)