from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Sinh:] [:Cf:]]') # TODO: ᵑɡ or ⁿɡ ? # TODO: No t͡ʃ d͡ʒ ? # TODO: No əː ? PHONEMES = makePhonemeSet(""" m n ɲ ŋ p b ᵐb ⁿd d t ʈ ɖ ⁿɖ k ɡ ⁿɡ s ʃ c ɟ f h r l j w i iː u uː e eː ə əː o oː æː æ a aː ei̯ ou̯ ou̯ æi̯ ai̯ au̯ . """) check('si-fonipa-t-si', GRAPHEMES, PHONEMES) regtest('si-fonipa-t-si', GRAPHEMES, PHONEMES)
p pʰ b t tʰ d t͡ɕ t͡ɕʰ d͡ʑ k kʰ g ʔ θ s sʰ z ʃ h w̥ w j l̥ l í ì ḭ ú ù ṵ ʊ ʊ́ ʊ̀ ʊ̰ ɪ ɪ́ ɪ̀ ɪ̰ e é è ḛ ó ò o̰ ə ɛ ɛ́ ɛ̀ ɛ̰ ɔ́ ɔ̀ ɔ̰ æ a á à a̰ eɪ̯ éɪ̯ èɪ̯ ḛɪ̯ oʊ̯ óʊ̯ òʊ̯ o̰ʊ̯ əʊ̯ aɪ̯ áɪ̯ àɪ̯ a̰ɪ̯ aʊ̯ áʊ̯ àʊ̯ a̰ʊ̯ . """) check('my-my_FONIPA.txt', GRAPHEMES, PHONEMES) regtest('my-my_FONIPA', GRAPHEMES, PHONEMES)
import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Latn:] [:P:]]') PHONEMES = makePhonemeSet(""" m n p b d t k ɡ s z ʃ ʒ t͡s d͡z t͡ʃ d͡ʒ f x h r v l j i u e o a ui̯ ei̯ eu̯ oi̯ ai̯ au̯ """) check('eo-fonipa-t-eo', GRAPHEMES, PHONEMES) regtest('eo-fonipa-t-eo', GRAPHEMES, PHONEMES)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Latn:]]') PHONEMES = makePhonemeSet(""" m n ŋ p b t d k ɡ f v s z ʃ ʒ h t͡s d͡ʒ ɾ l j w i u e o a ei̯ eu̯ oi̯ ai̯ au̯ . """) check('ia-fonipa-t-ia', GRAPHEMES, PHONEMES) regtest('ia-fonipa-t-ia', GRAPHEMES, PHONEMES)
import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Olck:]]') # TODO: This phoneme set seems a little large. # Collect a large corpus, and see which ones actually occur. # TODO: Is /ɽː/ physiologically possible? PHONEMES = makePhonemeSet(""" m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ g ʔ s sː h d͡ʒ ɽ r l lː w wː w̃ w̃ː i iː ĩ ĩː u uː ũ ũː e eː ẽ ẽː ə əː ə̃ ə̃ː o oː õ õː ɛ ɛː ɛ̃ ɛ̃ː ɔ ɔː ɔ̃ ɔ̃ː a aː ã ãː """) check('sat-sat_FONIPA.txt', GRAPHEMES, PHONEMES) regtest('sat-sat_FONIPA', GRAPHEMES, PHONEMES)
m̥ m n̥ n ɲ̥ ɲ ŋ̊ ŋ ɴ p pʰ b t tʰ d t͡ɕ t͡ɕʰ d͡ʑ k kʰ g ʔ θ s sʰ z ʃ h w̥ w j l̥ l í ì ḭ ú ù ṵ ʊ ʊ́ ʊ̀ ʊ̰ ɪ ɪ́ ɪ̀ ɪ̰ e é è ḛ ó ò o̰ ə ɛ ɛ́ ɛ̀ ɛ̰ ɔ́ ɔ̀ ɔ̰ æ a á à a̰ eɪ̯ éɪ̯ èɪ̯ ḛɪ̯ oʊ̯ óʊ̯ òʊ̯ o̰ʊ̯ əʊ̯ aɪ̯ áɪ̯ àɪ̯ a̰ɪ̯ aʊ̯ áʊ̯ àʊ̯ a̰ʊ̯ . """) check('my-my_FONIPA.txt', GRAPHEMES, PHONEMES) regtest('my-my_FONIPA', GRAPHEMES, PHONEMES)
from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern("[[:Olck:]]") # TODO: This phoneme set seems a little large. # Collect a large corpus, and see which ones actually occur. # TODO: Is /ɽː/ physiologically possible? PHONEMES = makePhonemeSet( """ m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ g ʔ s sː h d͡ʒ ɽ r l lː w wː w̃ w̃ː i iː ĩ ĩː u uː ũ ũː e eː ẽ ẽː ə əː ə̃ ə̃ː o oː õ õː ɛ ɛː ɛ̃ ɛ̃ː ɔ ɔː ɔ̃ ɔ̃ː a aː ã ãː """ ) check("sat-sat_FONIPA.txt", GRAPHEMES, PHONEMES) regtest("sat-sat_FONIPA", GRAPHEMES, PHONEMES)
from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Cyrl:]]') # TODO(sascha): Verify whether /lʲ/ is really phonemic in Kyrgyz; # is there really a minimal pair with /l/ versus /lʲ/? # # TODO(sascha): No gemination for /p b g q z ʃ f v r/? No long /ɯː/? PHONEMES = makePhonemeSet(""" m mː n nː ŋ p b t tː d dː k kː ɡ q t͡s t͡ʃ d͡ʒ s sː z ʃ f v j χ ʁ r l lː lʲ i iː y yː ɯ u uː e eː o oː ø øː ɑ ɑː . """) check('ky-ky_FONIPA.txt', GRAPHEMES, PHONEMES) regtest('ky-ky_FONIPA', GRAPHEMES, PHONEMES)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Taml:] [:P:]]') PHONEMES = makePhonemeSet(""" m n ɲ ɳ ŋ p b tʳ t̪ d̪ ʈ ɖ k ɡ f s ʂ sʼ ʃ h x ʋ r ɻ l ɭ j t͡ʃ d͡ʒ i iː u uː e eː o oː a aː aɪ̯ aʊ̯ """) check('ta-fonipa-t-ta', GRAPHEMES, PHONEMES) regtest('ta-fonipa-t-ta', GRAPHEMES, PHONEMES)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Ethi:]]') PHONEMES = makePhonemeSet(""" m n ɲ ŋ p pʼ b t tʼ d k kʼ ɡ ʔ f v s sʼ z ʃ ʒ ʕ h t͡ʃ t͡ʃʼ d͡ʒ r j l w i ɨ u e ə o a . """) check('am-am_FONIPA.txt', GRAPHEMES, PHONEMES) regtest('am-am_FONIPA', GRAPHEMES, PHONEMES)
p pʰ b t tʰ d t͡ɕ t͡ɕʰ d͡ʑ k kʰ g ʔ θ s sʰ z ʃ h w̥ w j l̥ l í ì ḭ ú ù ṵ ɪ ɪ́ ɪ̀ ɪ̰ e é è ḛ ó ò o̰ ə ɛ́ ɛ̰ ɔ́ ɔ̀ ɔ̰ æ a á à a̰ eɪ̯ éɪ̯ èɪ̯ ḛɪ̯ oʊ̯ óʊ̯ òʊ̯ o̰ʊ̯ əʊ̯ aɪ̯ áɪ̯ àɪ̯ a̰ɪ̯ aʊ̯ áʊ̯ àʊ̯ a̰ʊ̯ . """ ) check("my-my_FONIPA.txt", GRAPHEMES, PHONEMES) regtest("my-my_FONIPA", GRAPHEMES, PHONEMES)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Ethi:]]') PHONEMES = makePhonemeSet(""" m n ɲ ŋ p pʼ b t tʼ d k kʼ ɡ ʔ f v s sʼ z ʃ ʒ ʕ h t͡ʃ t͡ʃʼ d͡ʒ r j l w i ɨ u e ə o a . """) check('am-fonipa-t-am', GRAPHEMES, PHONEMES) regtest('am-fonipa-t-am', GRAPHEMES, PHONEMES)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Latn:]]') PHONEMES = makePhonemeSet(""" m n ɲ p b t d c ɟ k g f v s z ʃ ʒ h t͡ʃ t͡s r l j ʎ w i u e ʊ ɛ ɔ a ɪa̯ ɪa̯ʊ̯ ɪʊ̯ ɪɛ̯ u ʊa̯ ʊa̯ʊ̯ ʊɛ̯ ʊɛ̯ɪ̯ ʊɔ̯ ɛɪ̯ ɛʊ̯ aɪ̯ aʊ̯ """) check('rm_SURSILV-rm_FONIPA_SURSILV.txt', GRAPHEMES, PHONEMES) regtest('rm_SURSILV-rm_FONIPA_SURSILV', GRAPHEMES, PHONEMES)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Arab:] َ ٰ ْ ِ ُ ٓ ّ ٔ ً \u200c \u200d]') PHONEMES = makePhonemeSet(""" m n p b t d k ɡ ʔ f v s z ʃ ʒ ʁ ɢ h χ t͡ʃ d͡ʒ l ɾ j w i u e o æ ɒ ː """) check('fa-fonipa-t-fa', GRAPHEMES, PHONEMES) regtest('fa-fonipa-t-fa', GRAPHEMES, PHONEMES)
GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Tavt:]]') PHONEMES = makePhonemeSet(""" p pʰ b t tʰ d k kʰ ɡ ʔ m n ɲ ŋ f v s h x w j l t͡ɕ t͡ɕʷ t͡ɕʰ t͡ɕʰʷ pʷ pʰʷ tʷ dʷ kʰʷ kʷ ɡʷ mʷ nʷ ɲʷ ŋʷ fʷ sʷ hʷ xʷ i ɨ u ɛ e ə ɔ o a aː iə̯ ɨə̯ uə̯ ai̯ ˨ ˧˥ ˨˩ ˥ ˦ ˧˩ """) check('blt-fonipa-t-blt', GRAPHEMES, PHONEMES) regtest('blt-fonipa-t-blt', GRAPHEMES, PHONEMES)
from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Cyrl:]]') # TODO(sascha): Verify whether /lʲ/ is really phonemic in Kyrgyz; # is there really a minimal pair with /l/ versus /lʲ/? # # TODO(sascha): No gemination for /p b g q z ʃ f v r/? No long /ɯː/? PHONEMES = makePhonemeSet(""" m mː n nː ŋ p b t tː d dː k kː ɡ q t͡s t͡ʃ d͡ʒ s sː z ʃ f v j χ ʁ r l lː lʲ i iː y yː ɯ u uː e eː o oː ø øː ɑ ɑː . """) check('ky-fonipa-t-ky', GRAPHEMES, PHONEMES) regtest('ky-fonipa-t-ky', GRAPHEMES, PHONEMES)
m̥ m n̥ n ɲ̥ ɲ ŋ̊ ŋ ɴ p pʰ b t tʰ d t͡ɕ t͡ɕʰ d͡ʑ k kʰ g ʔ θ s sʰ z ʃ h w̥ w j l̥ l í ì ḭ ú ù ṵ ʊ ʊ́ ʊ̀ ʊ̰ ɪ ɪ́ ɪ̀ ɪ̰ e é è ḛ ó ò o̰ ə ɛ ɛ́ ɛ̀ ɛ̰ ɔ́ ɔ̀ ɔ̰ æ a á à a̰ eɪ̯ éɪ̯ èɪ̯ ḛɪ̯ oʊ̯ óʊ̯ òʊ̯ o̰ʊ̯ əʊ̯ aɪ̯ áɪ̯ àɪ̯ a̰ɪ̯ aʊ̯ áʊ̯ àʊ̯ a̰ʊ̯ . """) check('my-fonipa-t-my', GRAPHEMES, PHONEMES) regtest('my-fonipa-t-my', GRAPHEMES, PHONEMES)
import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Latn:] [:P:]]') PHONEMES = makePhonemeSet(""" m n p b d t k ɡ s z ʃ ʒ t͡s d͡z t͡ʃ d͡ʒ f x h r v l j i u e o a ui̯ ei̯ eu̯ oi̯ ai̯ au̯ """) check('eo-eo_FONIPA.txt', GRAPHEMES, PHONEMES) regtest('eo-eo_FONIPA', GRAPHEMES, PHONEMES)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Latn:]]') PHONEMES = makePhonemeSet(""" m n ŋ p b t d k ɡ f v s z ʃ ʒ h t͡s d͡ʒ ɾ l j w i u e o a ei̯ eu̯ oi̯ ai̯ au̯ . """) check('ia-ia_FONIPA.txt', GRAPHEMES, PHONEMES) regtest('ia-ia_FONIPA', GRAPHEMES, PHONEMES)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Latn:]]') PHONEMES = makePhonemeSet(""" m n ɲ p b t d c ɟ k g f v s z ʃ ʒ h t͡ʃ t͡s r l j ʎ w i u e ʊ ɛ ɔ a ɪa̯ ɪa̯ʊ̯ ɪʊ̯ ɪɛ̯ u ʊa̯ ʊa̯ʊ̯ ʊɛ̯ ʊɛ̯ɪ̯ ʊɔ̯ ɛɪ̯ ɛʊ̯ aɪ̯ aʊ̯ """) check('rm-fonipa-sursilv-t-rm-sursilv', GRAPHEMES, PHONEMES) regtest('rm-fonipa-sursilv-t-rm-sursilv', GRAPHEMES, PHONEMES)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Sinh:] [:Cf:]]') PHONEMES = makePhonemeSet(""" m n ɲ ŋ p b ᵐb ⁿd ʈ ɖ ⁿɖ k g ᵑg s ʃ t͡ʃ d͡ʒ f h r ʋ l j w i iː u uː e eː ə o oː æː æ a aː . """) check('si-si_FONIPA.txt', GRAPHEMES, PHONEMES) regtest('si-si_FONIPA', GRAPHEMES, PHONEMES)
import codecs import icu from cldr_util import makePhonemeSet, match, check, regtest GRAPHEMES = icu.UnicodeSet() GRAPHEMES.applyPattern('[[:Olck:]]') # TODO: This phoneme set seems a little large. # Collect a large corpus, and see which ones actually occur. # TODO: Is /ɽː/ physiologically possible? PHONEMES = makePhonemeSet(""" m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ g ʔ s sː h d͡ʒ ɽ r l lː w wː w̃ w̃ː i iː ĩ ĩː u uː ũ ũː e eː ẽ ẽː ə əː ə̃ ə̃ː o oː õ õː ɛ ɛː ɛ̃ ɛ̃ː ɔ ɔː ɔ̃ ɔ̃ː a aː ã ãː """) check('sat-fonipa-t-sat', GRAPHEMES, PHONEMES) regtest('sat-fonipa-t-sat', GRAPHEMES, PHONEMES)