Esempio n. 1
0
    def test_split_diacritics(self):
        """Test Phonemes.split with a diacritic substring replacement"""
        pron_str = "/ɑɑ̃/"

        lang_phonemes = Phonemes.from_language("fr-fr")
        pron_phonemes = lang_phonemes.split(pron_str, keep_stress=False)

        # Ensure first ɑ is transformed into a, but not the second
        phoneme_strs = [p.text for p in pron_phonemes]
        self.assertEqual(phoneme_strs, ["a", "ɑ̃"])
Esempio n. 2
0
    def test_split_dipthong(self):
        """Test Phonemes.split with a dipthong"""
        pron_str = "/neu̯rt͡ʃɪtou̯/"

        lang_phonemes = Phonemes.from_language("cs-cz")
        pron_phonemes = lang_phonemes.split(pron_str, keep_stress=False)

        # Ensure eu̯ ends up as eu̯
        phoneme_strs = [p.text for p in pron_phonemes]
        self.assertEqual(phoneme_strs, ["n", "eu̯", "r", "t͡ʃ", "ɪ", "t", "ou̯"])
Esempio n. 3
0
    def test_split_substring(self):
        """Test Phonemes.split with a substring replacement"""
        pron_str = "/viːtɛt͡ʃnaː/"

        lang_phonemes = Phonemes.from_language("cs-cz")
        pron_phonemes = lang_phonemes.split(pron_str, keep_stress=False)

        # Ensure iː doesn't get transformed into ɪː
        phoneme_strs = [p.text for p in pron_phonemes]
        self.assertEqual(phoneme_strs, ["v", "iː", "t", "ɛ", "t͡ʃ", "n", "aː"])
Esempio n. 4
0
    def test_tones(self):
        """Test Phonemes.split with tones"""
        # á khôi
        pron_str = "/a˨˦xoj˧˧/"

        lang_phonemes = Phonemes.from_language("vi-n")
        pron_phonemes = lang_phonemes.split(pron_str)

        # Ensure tones are kept
        phoneme_strs = [p.text for p in pron_phonemes]
        self.assertEqual(phoneme_strs, ["a˨˦", "x", "oj˧˧"])
Esempio n. 5
0
    def test_dipthong(self):
        """Test Phonemes.from_string with a dipthong"""
        # ampliam
        pron_str = "/ɐ̃pliɐ̃w̃/"

        lang_phonemes = Phonemes.from_language("pt")
        pron_phonemes = lang_phonemes.split(pron_str)

        # Ensure "ɐ̃" and "ɐ̃w̃" are kept
        phoneme_strs = [p.text for p in pron_phonemes]
        self.assertEqual(phoneme_strs, ["ɐ̃", "p", "l", "i", "ɐ̃w̃"])
Esempio n. 6
0
    def test_split(self):
        """Test Phonemes.from_string"""
        # "Just a cow."
        pron_str = "/dʒʌst ə kˈaʊ/"

        lang_phonemes = Phonemes.from_language("en-us")
        pron_phonemes = lang_phonemes.split(pron_str, keep_stress=True)

        # Ensure "d ʒ" -> "d͡ʒ" and "a ʊ" -> "aʊ"
        phoneme_strs = [p.text for p in pron_phonemes]
        self.assertEqual(phoneme_strs, ["d͡ʒ", "ʌ", "s", "t", "ə", "k", "ˈaʊ"])
Esempio n. 7
0
    def __init__(
        self,
        config,
        language: typing.Optional[str] = None,
        preload_lexicon: bool = False,
        custom_tokenize: typing.Optional[TokenizeFunc] = None,
        custom_post_tokenize: typing.Optional[PostTokenizeFunc] = None,
    ):
        if language is None:
            self.language = pydash.get(config, "language.code")
        else:
            self.language = language

        self.config = config

        self.tokenizer = Tokenizer(
            config,
            custom_tokenize=custom_tokenize,
            custom_post_tokenize=custom_post_tokenize,
        )

        self.phonemizer = Phonemizer(config, preload_lexicon=preload_lexicon)
        self.phonemizer.is_word = self.tokenizer.is_word  # type: ignore

        self.phonemes = Phonemes.from_language(self.language)
        self.accents: typing.Dict[str, typing.Dict[str, typing.List[str]]] = {}

        # If True, primary/seconary stress should be kept during phonemization
        self.keep_stress = bool(
            pydash.get(self.config, "language.keep_stress", False))

        # If True, acute/grave accents should be kept during phonemization
        self.keep_accents = bool(
            pydash.get(self.config, "language.keep_accents", False))

        # Allowable tones in the language
        self.tones: typing.List[str] = pydash.get(self.config,
                                                  "language.tones", [])

        # Load language-specific "accents" (different than acute/grave)
        accents = self.config.get("accents", {})
        for accent_lang, accent_map in accents.items():
            final_map = {}
            for from_phoneme, to_phonemes in accent_map.items():
                if isinstance(to_phonemes, str):
                    to_phonemes = [to_phonemes]

                final_map[from_phoneme] = to_phonemes

            self.accents[accent_lang] = final_map
Esempio n. 8
0
    def __init__(self, config, language: typing.Optional[str] = None):
        if language is None:
            self.language = pydash.get(config, "language.code")
        else:
            self.language = language

        self.config = config

        # Language-specific loading
        custom_tokenize: typing.Optional[TOKENIZE_FUNC] = None
        if language == "fa":
            custom_tokenize = Language.make_fa_tokenize()

        self.tokenizer = Tokenizer(config, custom_tokenize=custom_tokenize)
        self.phonemizer = Phonemizer(config)
        self.phonemes = Phonemes.from_language(self.language)
        self.accents: typing.Dict[str, typing.Dict[str, typing.List[str]]] = {}

        # If True, primary/seconary stress should be kept during phonemization
        self.keep_stress = bool(
            pydash.get(self.config, "language.keep_stress", False))

        # If True, acute/grave accents should be kept during phonemization
        self.keep_accents = bool(
            pydash.get(self.config, "language.keep_accents", False))

        # Allowable tones in the language
        self.tones: typing.List[str] = pydash.get(self.config,
                                                  "language.tones", [])

        # Load language-specific "accents" (different than acute/grave)
        accents = self.config.get("accents", {})
        for accent_lang, accent_map in accents.items():
            final_map = {}
            for from_phoneme, to_phonemes in accent_map.items():
                if isinstance(to_phonemes, str):
                    to_phonemes = [to_phonemes]

                final_map[from_phoneme] = to_phonemes

            self.accents[accent_lang] = final_map
Esempio n. 9
0
 def setUpClass(cls):
     cls.de_phonemes = Phonemes.from_language("de-de")