Example #1
0
from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Sinh:] [:Cf:]]')

# TODO: ᵑɡ or ⁿɡ ?
# TODO: No  t͡ʃ  d͡ʒ ?
# TODO: No əː ?
PHONEMES = makePhonemeSet("""

    m n ɲ ŋ
    p b ᵐb ⁿd d t ʈ ɖ ⁿɖ k ɡ ⁿɡ
    s ʃ
    c ɟ
    f h
    r
    l j
    w

    i iː      u uː
    e eː ə əː o oː
    æː æ      a aː
    ei̯ ou̯ ou̯
    æi̯ ai̯ au̯
    .

""")

check('si-fonipa-t-si', GRAPHEMES, PHONEMES)
regtest('si-fonipa-t-si', GRAPHEMES, PHONEMES)
    p pʰ b t tʰ d t͡ɕ t͡ɕʰ d͡ʑ k kʰ g ʔ
    θ s sʰ z ʃ h
    w̥ w j
    l̥ l

    í ì ḭ
    ú ù ṵ
    ʊ ʊ́ ʊ̀ ʊ̰
    ɪ ɪ́ ɪ̀ ɪ̰
    e é è ḛ
    ó ò o̰
    ə
    ɛ ɛ́ ɛ̀ ɛ̰
    ɔ́ ɔ̀ ɔ̰
    æ
    a á à a̰

    eɪ̯ éɪ̯ èɪ̯ ḛɪ̯
    oʊ̯ óʊ̯ òʊ̯ o̰ʊ̯
    əʊ̯
    aɪ̯ áɪ̯ àɪ̯ a̰ɪ̯
    aʊ̯ áʊ̯ àʊ̯ a̰ʊ̯

    .

""")


check('my-my_FONIPA.txt', GRAPHEMES, PHONEMES)
regtest('my-my_FONIPA', GRAPHEMES, PHONEMES)
Example #3
0
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Latn:] [:P:]]')

PHONEMES = makePhonemeSet("""

    m n
    p b d t k ɡ
    s z ʃ ʒ
    t͡s d͡z  t͡ʃ  d͡ʒ
    f x h
    r
    v l j

    i u
    e o
    a

    ui̯
    ei̯ eu̯ oi̯
    ai̯ au̯

""")

check('eo-fonipa-t-eo', GRAPHEMES, PHONEMES)
regtest('eo-fonipa-t-eo', GRAPHEMES, PHONEMES)
Example #4
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Latn:]]')

PHONEMES = makePhonemeSet("""

    m n ŋ
    p b t d k ɡ
    f v s z ʃ ʒ h
    t͡s d͡ʒ
    ɾ l j w

    i u
    e o
    a

    ei̯ eu̯ oi̯
    ai̯ au̯

    .

""")

check('ia-fonipa-t-ia', GRAPHEMES, PHONEMES)
regtest('ia-fonipa-t-ia', GRAPHEMES, PHONEMES)
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Olck:]]')

# TODO: This phoneme set seems a little large.
# Collect a large corpus, and see which ones actually occur.
# TODO: Is /ɽː/ physiologically possible?
PHONEMES = makePhonemeSet("""

    m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
    p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ g ʔ
    s sː h
     d͡ʒ
    ɽ r
    l lː
    w wː w̃ w̃ː

    i iː ĩ ĩː u uː ũ ũː
    e eː ẽ ẽː ə əː ə̃ ə̃ː o oː õ õː
    ɛ ɛː ɛ̃ ɛ̃ː ɔ ɔː ɔ̃ ɔ̃ː
    a aː ã ãː

""")

check('sat-sat_FONIPA.txt', GRAPHEMES, PHONEMES)
regtest('sat-sat_FONIPA', GRAPHEMES, PHONEMES)
    m̥ m n̥ n ɲ̥ ɲ ŋ̊  ŋ ɴ
    p pʰ b t tʰ d t͡ɕ t͡ɕʰ d͡ʑ k kʰ g ʔ
    θ s sʰ z ʃ h
    w̥ w j
    l̥ l

    í ì ḭ
    ú ù ṵ
    ʊ ʊ́ ʊ̀ ʊ̰
    ɪ ɪ́ ɪ̀ ɪ̰
    e é è ḛ
    ó ò o̰
    ə
    ɛ ɛ́ ɛ̀ ɛ̰
    ɔ́ ɔ̀ ɔ̰
    æ
    a á à a̰

    eɪ̯ éɪ̯ èɪ̯ ḛɪ̯
    oʊ̯ óʊ̯ òʊ̯ o̰ʊ̯
    əʊ̯
    aɪ̯ áɪ̯ àɪ̯ a̰ɪ̯
    aʊ̯ áʊ̯ àʊ̯ a̰ʊ̯

    .

""")

check('my-my_FONIPA.txt', GRAPHEMES, PHONEMES)
regtest('my-my_FONIPA', GRAPHEMES, PHONEMES)
from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern("[[:Olck:]]")

# TODO: This phoneme set seems a little large.
# Collect a large corpus, and see which ones actually occur.
# TODO: Is /ɽː/ physiologically possible?
PHONEMES = makePhonemeSet(
    """

    m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
    p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ g ʔ
    s sː h
     d͡ʒ
    ɽ r
    l lː
    w wː w̃ w̃ː

    i iː ĩ ĩː u uː ũ ũː
    e eː ẽ ẽː ə əː ə̃ ə̃ː o oː õ õː
    ɛ ɛː ɛ̃ ɛ̃ː ɔ ɔː ɔ̃ ɔ̃ː
    a aː ã ãː

"""
)

check("sat-sat_FONIPA.txt", GRAPHEMES, PHONEMES)
regtest("sat-sat_FONIPA", GRAPHEMES, PHONEMES)
Example #8
0
from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Cyrl:]]')

# TODO(sascha): Verify whether /lʲ/ is really phonemic in Kyrgyz;
# is there really a minimal pair with /l/ versus /lʲ/?
#
# TODO(sascha): No gemination for /p b g q z ʃ f v r/? No long /ɯː/?
PHONEMES = makePhonemeSet("""

    m mː n nː ŋ
    p b t tː d dː k kː ɡ q
    t͡s t͡ʃ d͡ʒ
    s sː z ʃ
    f v j χ ʁ
    r l lː lʲ

    i iː y yː ɯ u uː
    e eː o oː
    ø øː
    ɑ ɑː

    .

""")

check('ky-ky_FONIPA.txt', GRAPHEMES, PHONEMES)
regtest('ky-ky_FONIPA', GRAPHEMES, PHONEMES)
Example #9
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Taml:] [:P:]]')

PHONEMES = makePhonemeSet("""

    m n ɲ ɳ ŋ
    p b tʳ t̪ d̪ ʈ ɖ k ɡ
    f s ʂ sʼ ʃ h x
    ʋ r ɻ l ɭ j

    t͡ʃ d͡ʒ

    i iː u uː
    e eː o oː
    a aː

    aɪ̯ aʊ̯


""")

check('ta-fonipa-t-ta', GRAPHEMES, PHONEMES)
regtest('ta-fonipa-t-ta', GRAPHEMES, PHONEMES)
Example #10
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Ethi:]]')

PHONEMES = makePhonemeSet("""

    m n ɲ ŋ
    p pʼ b t tʼ d k kʼ ɡ ʔ
    f v s sʼ z ʃ ʒ ʕ h
    t͡ʃ t͡ʃʼ d͡ʒ
    r j l
    w

    i ɨ u
    e ə o
    a

    .

""")

check('am-am_FONIPA.txt', GRAPHEMES, PHONEMES)
regtest('am-am_FONIPA', GRAPHEMES, PHONEMES)
Example #11
0
    p pʰ b t tʰ d t͡ɕ t͡ɕʰ d͡ʑ k kʰ g ʔ
    θ s sʰ z ʃ h
    w̥ w j
    l̥ l

    í ì ḭ
    ú ù ṵ
    ɪ ɪ́ ɪ̀ ɪ̰
    e é è ḛ
    ó ò o̰
    ə
    ɛ́ ɛ̰
    ɔ́ ɔ̀ ɔ̰
    æ
    a á à a̰

    eɪ̯ éɪ̯ èɪ̯ ḛɪ̯
    oʊ̯ óʊ̯ òʊ̯ o̰ʊ̯
    əʊ̯
    aɪ̯ áɪ̯ àɪ̯ a̰ɪ̯
    aʊ̯ áʊ̯ àʊ̯ a̰ʊ̯

    .

"""
)


check("my-my_FONIPA.txt", GRAPHEMES, PHONEMES)
regtest("my-my_FONIPA", GRAPHEMES, PHONEMES)
Example #12
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Ethi:]]')

PHONEMES = makePhonemeSet("""

    m n ɲ ŋ
    p pʼ b t tʼ d k kʼ ɡ ʔ
    f v s sʼ z ʃ ʒ ʕ h
    t͡ʃ t͡ʃʼ d͡ʒ
    r j l
    w

    i ɨ u
    e ə o
    a

    .

""")

check('am-fonipa-t-am', GRAPHEMES, PHONEMES)
regtest('am-fonipa-t-am', GRAPHEMES, PHONEMES)
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Latn:]]')

PHONEMES = makePhonemeSet("""

    m n ɲ
    p b t d c ɟ k g
    f v s z ʃ ʒ h
    t͡ʃ t͡s
    r l j ʎ
    w

    i u e ʊ ɛ ɔ a
    ɪa̯ ɪa̯ʊ̯ ɪʊ̯ ɪɛ̯ u ʊa̯ ʊa̯ʊ̯ ʊɛ̯ ʊɛ̯ɪ̯ ʊɔ̯
    ɛɪ̯ ɛʊ̯ aɪ̯ aʊ̯

""")

check('rm_SURSILV-rm_FONIPA_SURSILV.txt', GRAPHEMES, PHONEMES)
regtest('rm_SURSILV-rm_FONIPA_SURSILV', GRAPHEMES, PHONEMES)
Example #14
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Arab:] َ ٰ ْ ِ ُ ٓ ّ ٔ ً \u200c \u200d]')

PHONEMES = makePhonemeSet("""

    m n
    p b t d k ɡ ʔ
    f v s z ʃ ʒ ʁ ɢ h χ
    t͡ʃ d͡ʒ
    l ɾ j w
    i u e o æ ɒ
    ː

""")

check('fa-fonipa-t-fa', GRAPHEMES, PHONEMES)
regtest('fa-fonipa-t-fa', GRAPHEMES, PHONEMES)
Example #15
0
GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Tavt:]]')

PHONEMES = makePhonemeSet("""

p pʰ b t tʰ d k kʰ ɡ ʔ
m n ɲ ŋ
f v s h x
w j l

t͡ɕ t͡ɕʷ t͡ɕʰ t͡ɕʰʷ

pʷ pʰʷ tʷ dʷ kʰʷ kʷ ɡʷ
mʷ nʷ ɲʷ ŋʷ
fʷ sʷ hʷ xʷ

i ɨ u
ɛ e ə ɔ o
a aː

iə̯ ɨə̯ uə̯
ai̯

˨ ˧˥ ˨˩ ˥ ˦ ˧˩

""")

check('blt-fonipa-t-blt', GRAPHEMES, PHONEMES)
regtest('blt-fonipa-t-blt', GRAPHEMES, PHONEMES)
Example #16
0
from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Cyrl:]]')

# TODO(sascha): Verify whether /lʲ/ is really phonemic in Kyrgyz;
# is there really a minimal pair with /l/ versus /lʲ/?
#
# TODO(sascha): No gemination for /p b g q z ʃ f v r/? No long /ɯː/?
PHONEMES = makePhonemeSet("""

    m mː n nː ŋ
    p b t tː d dː k kː ɡ q
    t͡s t͡ʃ d͡ʒ
    s sː z ʃ
    f v j χ ʁ
    r l lː lʲ

    i iː y yː ɯ u uː
    e eː o oː
    ø øː
    ɑ ɑː

    .

""")

check('ky-fonipa-t-ky', GRAPHEMES, PHONEMES)
regtest('ky-fonipa-t-ky', GRAPHEMES, PHONEMES)
Example #17
0
    m̥ m n̥ n ɲ̥ ɲ ŋ̊  ŋ ɴ
    p pʰ b t tʰ d t͡ɕ t͡ɕʰ d͡ʑ k kʰ g ʔ
    θ s sʰ z ʃ h
    w̥ w j
    l̥ l

    í ì ḭ
    ú ù ṵ
    ʊ ʊ́ ʊ̀ ʊ̰
    ɪ ɪ́ ɪ̀ ɪ̰
    e é è ḛ
    ó ò o̰
    ə
    ɛ ɛ́ ɛ̀ ɛ̰
    ɔ́ ɔ̀ ɔ̰
    æ
    a á à a̰

    eɪ̯ éɪ̯ èɪ̯ ḛɪ̯
    oʊ̯ óʊ̯ òʊ̯ o̰ʊ̯
    əʊ̯
    aɪ̯ áɪ̯ àɪ̯ a̰ɪ̯
    aʊ̯ áʊ̯ àʊ̯ a̰ʊ̯

    .

""")

check('my-fonipa-t-my', GRAPHEMES, PHONEMES)
regtest('my-fonipa-t-my', GRAPHEMES, PHONEMES)
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Latn:] [:P:]]')

PHONEMES = makePhonemeSet("""

    m n
    p b d t k ɡ
    s z ʃ ʒ
    t͡s d͡z  t͡ʃ  d͡ʒ
    f x h
    r
    v l j

    i u
    e o
    a

    ui̯
    ei̯ eu̯ oi̯
    ai̯ au̯

""")

check('eo-eo_FONIPA.txt', GRAPHEMES, PHONEMES)
regtest('eo-eo_FONIPA', GRAPHEMES, PHONEMES)
Example #19
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Latn:]]')

PHONEMES = makePhonemeSet("""

    m n ŋ
    p b t d k ɡ
    f v s z ʃ ʒ h
    t͡s d͡ʒ
    ɾ l j w

    i u
    e o
    a

    ei̯ eu̯ oi̯
    ai̯ au̯

    .

""")

check('ia-ia_FONIPA.txt', GRAPHEMES, PHONEMES)
regtest('ia-ia_FONIPA', GRAPHEMES, PHONEMES)
Example #20
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Latn:]]')

PHONEMES = makePhonemeSet("""

    m n ɲ
    p b t d c ɟ k g
    f v s z ʃ ʒ h
    t͡ʃ t͡s
    r l j ʎ
    w

    i u e ʊ ɛ ɔ a
    ɪa̯ ɪa̯ʊ̯ ɪʊ̯ ɪɛ̯ u ʊa̯ ʊa̯ʊ̯ ʊɛ̯ ʊɛ̯ɪ̯ ʊɔ̯
    ɛɪ̯ ɛʊ̯ aɪ̯ aʊ̯

""")

check('rm-fonipa-sursilv-t-rm-sursilv', GRAPHEMES, PHONEMES)
regtest('rm-fonipa-sursilv-t-rm-sursilv', GRAPHEMES, PHONEMES)
Example #21
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Sinh:] [:Cf:]]')

PHONEMES = makePhonemeSet("""

    m n ɲ ŋ
    p b ᵐb ⁿd ʈ ɖ ⁿɖ k g ᵑg
    s ʃ
    t͡ʃ  d͡ʒ
    f h
    r
    ʋ l j
    w

    i iː   u uː
    e eː ə o oː
    æː æ   a aː
    .

""")

check('si-si_FONIPA.txt', GRAPHEMES, PHONEMES)
regtest('si-si_FONIPA', GRAPHEMES, PHONEMES)
Example #22
0
import codecs
import icu

from cldr_util import makePhonemeSet, match, check, regtest

GRAPHEMES = icu.UnicodeSet()
GRAPHEMES.applyPattern('[[:Olck:]]')

# TODO: This phoneme set seems a little large.
# Collect a large corpus, and see which ones actually occur.
# TODO: Is /ɽː/ physiologically possible?
PHONEMES = makePhonemeSet("""

    m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
    p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ g ʔ
    s sː h
     d͡ʒ
    ɽ r
    l lː
    w wː w̃ w̃ː

    i iː ĩ ĩː u uː ũ ũː
    e eː ẽ ẽː ə əː ə̃ ə̃ː o oː õ õː
    ɛ ɛː ɛ̃ ɛ̃ː ɔ ɔː ɔ̃ ɔ̃ː
    a aː ã ãː

""")

check('sat-fonipa-t-sat', GRAPHEMES, PHONEMES)
regtest('sat-fonipa-t-sat', GRAPHEMES, PHONEMES)