예제 #1
0
 def test_text_to_ids_phonemes_with_eos_bos(self):
     text = "Bu bir Örnek."
     self.tokenizer_ph.use_eos_bos = True
     text_ph = IPAPhonemes().bos + self.ph.phonemize(
         text, separator="") + IPAPhonemes().eos
     ids = self.tokenizer_ph.text_to_ids(text)
     test_hat = self.tokenizer_ph.ids_to_text(ids)
     self.assertEqual(text_ph, test_hat)
예제 #2
0
 def setUp(self):
     self.phonemes = IPAPhonemes()
     self.base_vocab = BaseVocabulary(
         vocab=self.phonemes._vocab,
         pad=self.phonemes.pad,
         blank=self.phonemes.blank,
         bos=self.phonemes.bos,
         eos=self.phonemes.eos,
     )
     self.empty_vocab = BaseVocabulary({})
예제 #3
0
class BaseVocabularyTest(unittest.TestCase):
    def setUp(self):
        self.phonemes = IPAPhonemes()
        self.base_vocab = BaseVocabulary(
            vocab=self.phonemes._vocab,
            pad=self.phonemes.pad,
            blank=self.phonemes.blank,
            bos=self.phonemes.bos,
            eos=self.phonemes.eos,
        )
        self.empty_vocab = BaseVocabulary({})

    def test_pad_id(self):
        self.assertEqual(self.empty_vocab.pad_id, 0)
        self.assertEqual(self.base_vocab.pad_id, self.phonemes.pad_id)

    def test_blank_id(self):
        self.assertEqual(self.empty_vocab.blank_id, 0)
        self.assertEqual(self.base_vocab.blank_id, self.phonemes.blank_id)

    def test_vocab(self):
        self.assertEqual(self.empty_vocab.vocab, {})
        self.assertEqual(self.base_vocab.vocab, self.phonemes._vocab)

    # def test_init_from_config(self):
    #     ...

    def test_num_chars(self):
        self.assertEqual(self.empty_vocab.num_chars, 0)
        self.assertEqual(self.base_vocab.num_chars, self.phonemes.num_chars)

    def test_char_to_id(self):
        try:
            self.empty_vocab.char_to_id("a")
            raise Exception("Should have raised KeyError")
        except:
            pass
        for k in self.phonemes.vocab:
            self.assertEqual(self.base_vocab.char_to_id(k), self.phonemes.char_to_id(k))

    def test_id_to_char(self):
        try:
            self.empty_vocab.id_to_char(0)
            raise Exception("Should have raised KeyError")
        except:
            pass
        for k in self.phonemes.vocab:
            v = self.phonemes.char_to_id(k)
            self.assertEqual(self.base_vocab.id_to_char(v), self.phonemes.id_to_char(v))
예제 #4
0
    def setUp(self):
        self.tokenizer = TTSTokenizer(use_phonemes=False,
                                      characters=Graphemes())

        self.ph = ESpeak("tr", backend="espeak")
        self.tokenizer_ph = TTSTokenizer(use_phonemes=True,
                                         characters=IPAPhonemes(),
                                         phonemizer=self.ph)
예제 #5
0
 def test_not_found_characters(self):
     self.ph = ESpeak("en-us")
     tokenizer_local = TTSTokenizer(use_phonemes=True,
                                    characters=IPAPhonemes(),
                                    phonemizer=self.ph)
     self.assertEqual(len(self.tokenizer.not_found_characters), 0)
     text = "Yolk of one egg beaten light"
     ids = tokenizer_local.text_to_ids(text)
     text_hat = tokenizer_local.ids_to_text(ids)
     self.assertEqual(tokenizer_local.not_found_characters, ["̩"])
     self.assertEqual(text_hat, "jˈoʊk ʌv wˈʌn ˈɛɡ bˈiːʔn lˈaɪt")
예제 #6
0
    def init_from_config(config: "Coqpit",
                         characters: "BaseCharacters" = None):
        """Init Tokenizer object from config

        Args:
            config (Coqpit): Coqpit model config.
            characters (BaseCharacters): Defines the model character set. If not set, use the default options based on
                the config values. Defaults to None.
        """
        # init cleaners
        text_cleaner = None
        if isinstance(config.text_cleaner, (str, list)):
            text_cleaner = getattr(cleaners, config.text_cleaner)

        # init characters
        if characters is None:
            # set characters based on defined characters class
            if config.characters and config.characters.characters_class:
                CharactersClass = import_class(
                    config.characters.characters_class)
                characters, new_config = CharactersClass.init_from_config(
                    config)
            # set characters based on config
            else:
                if config.use_phonemes:
                    # init phoneme set
                    characters, new_config = IPAPhonemes().init_from_config(
                        config)
                else:
                    # init character set
                    characters, new_config = Graphemes().init_from_config(
                        config)

        else:
            characters, new_config = characters.init_from_config(config)

        # set characters class
        new_config.characters.characters_class = get_import_path(characters)

        # init phonemizer
        phonemizer = None
        if config.use_phonemes:
            phonemizer_kwargs = {"language": config.phoneme_language}

            if "phonemizer" in config and config.phonemizer:
                phonemizer = get_phonemizer_by_name(config.phonemizer,
                                                    **phonemizer_kwargs)
            else:
                try:
                    phonemizer = get_phonemizer_by_name(
                        DEF_LANG_TO_PHONEMIZER[config.phoneme_language],
                        **phonemizer_kwargs)
                    new_config.phonemizer = phonemizer.name()
                except KeyError as e:
                    raise ValueError(
                        f"""No phonemizer found for language {config.phoneme_language}.
                        You may need to install a third party library for this language."""
                    ) from e

        return (
            TTSTokenizer(config.use_phonemes, text_cleaner, characters,
                         phonemizer, config.add_blank,
                         config.enable_eos_bos_chars),
            new_config,
        )
예제 #7
0
 def test_default_character_sets(self):  # pylint: disable=no-self-use
     """Test initiation of default character sets"""
     _ = IPAPhonemes()
     _ = Graphemes()