Example #1
0
    def test_sentence_phonemes(self):
        """Test sentence phonemes return format"""
        expected_phonemes = [
            ["ˈaɪ", "w", "ˈaʊ", "n", "d", "ð", "ə", "w", "ˈu", "n", "d", "‖"],
            [
                "ˈaɪ",
                "ɹ",
                "ɪ",
                "f",
                "j",
                "ˈu",
                "z",
                "ð",
                "ə",
                "ɹ",
                "ˈɛ",
                "f",
                "j",
                "ˌu",
                "z",
                "‖",
            ],
        ]
        actual_phonemes = text_to_phonemes(
            "I wound the wound. I refuse the refuse.", return_format="sentence_phonemes"
        )

        self.assertEqual(expected_phonemes, actual_phonemes)
Example #2
0
def text2phone(text, language, use_espeak_phonemes=False):
    """Convert graphemes to phonemes.
    Parameters:
            text (str): text to phonemize
            language (str): language of the text
    Returns:
            ph (str): phonemes as a string seperated by "|"
                    ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
    """

    # TO REVIEW : How to have a good implementation for this?
    if language == "zh-CN":
        ph = chinese_text_to_phonemes(text)
        return ph

    if language == "ja-jp":
        ph = japanese_text_to_phonemes(text)
        return ph

    if gruut.is_language_supported(language):
        # Use gruut for phonemization
        phonemizer_args = {
            "remove_stress": True,
            "ipa_minor_breaks": False,  # don't replace commas/semi-colons with IPA |
            "ipa_major_breaks": False,  # don't replace periods with IPA ‖
        }

        if use_espeak_phonemes:
            # Use a lexicon/g2p model train on eSpeak IPA instead of gruut IPA.
            # This is intended for backwards compatibility with TTS<=v0.0.13
            # pre-trained models.
            phonemizer_args["model_prefix"] = "espeak"

        ph_list = gruut.text_to_phonemes(
            text,
            lang=language,
            return_format="word_phonemes",
            phonemizer_args=phonemizer_args,
        )

        # Join and re-split to break apart dipthongs, suprasegmentals, etc.
        ph_words = ["|".join(word_phonemes) for word_phonemes in ph_list]
        ph = "| ".join(ph_words)

        # Fix a few phonemes
        ph = ph.translate(GRUUT_TRANS_TABLE)
        return ph

    raise ValueError(f" [!] Language {language} is not supported for phonemization.")
Example #3
0
    def test_word_phonemes(self):
        """Test word phonemes return format"""
        expected_phonemes = [
            ["ˈaɪ"],
            ["w", "ˈaʊ", "n", "d"],
            ["ð", "ə"],
            ["w", "ˈu", "n", "d"],
            ["‖"],
            ["ˈaɪ"],
            ["ɹ", "ɪ", "f", "j", "ˈu", "z"],
            ["ð", "ə"],
            ["ɹ", "ˈɛ", "f", "j", "ˌu", "z"],
            ["‖"],
        ]
        actual_phonemes = text_to_phonemes(
            "I wound the wound. I refuse the refuse.", return_format="word_phonemes"
        )

        self.assertEqual(expected_phonemes, actual_phonemes)
Example #4
0
    def test_sentences(self):
        """Test sentences return format"""
        expected_phonemes = [
            [["ˈaɪ"], ["w", "ˈaʊ", "n", "d"], ["ð", "ə"], ["w", "ˈu", "n", "d"], ["‖"]],
            [
                ["ˈaɪ"],
                ["ɹ", "ɪ", "f", "j", "ˈu", "z"],
                ["ð", "ə"],
                ["ɹ", "ˈɛ", "f", "j", "ˌu", "z"],
                ["‖"],
            ],
        ]
        actual_sentences = text_to_phonemes(
            "I wound the wound. I refuse the refuse.", return_format="sentences"
        )

        # Returns two Sentence objects
        self.assertEqual(len(actual_sentences), 2)
        self.assertIsInstance(actual_sentences[0], Sentence)
        self.assertIsInstance(actual_sentences[1], Sentence)

        self.assertEqual(expected_phonemes[0], actual_sentences[0].phonemes)
        self.assertEqual(expected_phonemes[1], actual_sentences[1].phonemes)
Example #5
0
from gruut import text_to_phonemes

text = 'He wound it around the wound, saying "I read it was $10 to read."'

for sent_idx, word, word_phonemes in text_to_phonemes(text, lang="en-us"):
    print(word, *word_phonemes)
Example #6
0
 def test_last_token(self):
     """Ensure liason does not leave last token"""
     phonemes = text_to_phonemes("Est-ce-que", lang="fr")
     self.assertGreater(len(phonemes), 0)