def test_text_to_ids_phonemes_with_eos_bos(self): text = "Bu bir Örnek." self.tokenizer_ph.use_eos_bos = True text_ph = IPAPhonemes().bos + self.ph.phonemize( text, separator="") + IPAPhonemes().eos ids = self.tokenizer_ph.text_to_ids(text) test_hat = self.tokenizer_ph.ids_to_text(ids) self.assertEqual(text_ph, test_hat)
def setUp(self): self.phonemes = IPAPhonemes() self.base_vocab = BaseVocabulary( vocab=self.phonemes._vocab, pad=self.phonemes.pad, blank=self.phonemes.blank, bos=self.phonemes.bos, eos=self.phonemes.eos, ) self.empty_vocab = BaseVocabulary({})
class BaseVocabularyTest(unittest.TestCase): def setUp(self): self.phonemes = IPAPhonemes() self.base_vocab = BaseVocabulary( vocab=self.phonemes._vocab, pad=self.phonemes.pad, blank=self.phonemes.blank, bos=self.phonemes.bos, eos=self.phonemes.eos, ) self.empty_vocab = BaseVocabulary({}) def test_pad_id(self): self.assertEqual(self.empty_vocab.pad_id, 0) self.assertEqual(self.base_vocab.pad_id, self.phonemes.pad_id) def test_blank_id(self): self.assertEqual(self.empty_vocab.blank_id, 0) self.assertEqual(self.base_vocab.blank_id, self.phonemes.blank_id) def test_vocab(self): self.assertEqual(self.empty_vocab.vocab, {}) self.assertEqual(self.base_vocab.vocab, self.phonemes._vocab) # def test_init_from_config(self): # ... def test_num_chars(self): self.assertEqual(self.empty_vocab.num_chars, 0) self.assertEqual(self.base_vocab.num_chars, self.phonemes.num_chars) def test_char_to_id(self): try: self.empty_vocab.char_to_id("a") raise Exception("Should have raised KeyError") except: pass for k in self.phonemes.vocab: self.assertEqual(self.base_vocab.char_to_id(k), self.phonemes.char_to_id(k)) def test_id_to_char(self): try: self.empty_vocab.id_to_char(0) raise Exception("Should have raised KeyError") except: pass for k in self.phonemes.vocab: v = self.phonemes.char_to_id(k) self.assertEqual(self.base_vocab.id_to_char(v), self.phonemes.id_to_char(v))
def setUp(self): self.tokenizer = TTSTokenizer(use_phonemes=False, characters=Graphemes()) self.ph = ESpeak("tr", backend="espeak") self.tokenizer_ph = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=self.ph)
def test_not_found_characters(self): self.ph = ESpeak("en-us") tokenizer_local = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=self.ph) self.assertEqual(len(self.tokenizer.not_found_characters), 0) text = "Yolk of one egg beaten light" ids = tokenizer_local.text_to_ids(text) text_hat = tokenizer_local.ids_to_text(ids) self.assertEqual(tokenizer_local.not_found_characters, ["̩"]) self.assertEqual(text_hat, "jˈoʊk ʌv wˈʌn ˈɛɡ bˈiːʔn lˈaɪt")
def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None): """Init Tokenizer object from config Args: config (Coqpit): Coqpit model config. characters (BaseCharacters): Defines the model character set. If not set, use the default options based on the config values. Defaults to None. """ # init cleaners text_cleaner = None if isinstance(config.text_cleaner, (str, list)): text_cleaner = getattr(cleaners, config.text_cleaner) # init characters if characters is None: # set characters based on defined characters class if config.characters and config.characters.characters_class: CharactersClass = import_class( config.characters.characters_class) characters, new_config = CharactersClass.init_from_config( config) # set characters based on config else: if config.use_phonemes: # init phoneme set characters, new_config = IPAPhonemes().init_from_config( config) else: # init character set characters, new_config = Graphemes().init_from_config( config) else: characters, new_config = characters.init_from_config(config) # set characters class new_config.characters.characters_class = get_import_path(characters) # init phonemizer phonemizer = None if config.use_phonemes: phonemizer_kwargs = {"language": config.phoneme_language} if "phonemizer" in config and config.phonemizer: phonemizer = get_phonemizer_by_name(config.phonemizer, **phonemizer_kwargs) else: try: phonemizer = get_phonemizer_by_name( DEF_LANG_TO_PHONEMIZER[config.phoneme_language], **phonemizer_kwargs) new_config.phonemizer = phonemizer.name() except KeyError as e: raise ValueError( f"""No phonemizer found for language {config.phoneme_language}. You may need to install a third party library for this language.""" ) from e return ( TTSTokenizer(config.use_phonemes, text_cleaner, characters, phonemizer, config.add_blank, config.enable_eos_bos_chars), new_config, )
def test_default_character_sets(self): # pylint: disable=no-self-use """Test initiation of default character sets""" _ = IPAPhonemes() _ = Graphemes()