Beispiel #1
0
 def test_denasalise_nasal_assimilation(self):
     self.assertEqual(transcribe('mąka'), 'mɔŋka')
     self.assertEqual(transcribe('gęsty'), 'ɡɛ̃w̃stɨ')
     self.assertEqual(transcribe('rąbać'), 'rɔmbatɕ')
     self.assertEqual(transcribe('podjęli'), 'pɔdʲjɛlʲi')
     self.assertEqual(transcribe('stanął'), 'stanɔw')
     self.assertEqual(transcribe('pieniądz'), 'pʲjɛɲɔɳts')
     self.assertEqual(transcribe('pędzić'), 'pɛɲdʑitɕ')
     self.assertEqual(transcribe('pójdą'), 'pujdɔ̃w̃')
     self.assertEqual(transcribe('kręty'), 'krɛntɨ')
     self.assertEqual(transcribe('pójdę'), 'pujdɛ')
def transcribe_text_Cracow(text):

    # lowercase all letters
    text = text.lower()

    # extract alphabetic substrings
    words = re.findall(r'[a-ząćęłńóśźż]+', text)

    # phonemic transcription of words taken individually (regional processes not applied)
    ph_words = []
    if words:
        for word in words:
            ph_words.append(transcribe(word))

    # pre-vocalic voicing and pre-voiced-obstruent voicing
    for i in range(len(ph_words) - 1):
        if (ph_words[i][-2:] in voi_dict_rev and
            (ph_words[i + 1][0] in vocs or ph_words[i + 1][0] in voicing_dict
             or ph_words[i + 1][0] in ipa_vowels)):
            ph_words[i] = (ph_words[i][:-2]) + (voi_dict_rev[
                (ph_words[i][-2:])])
        elif ((ph_words[i][-1]) in voi_dict_rev and
              (ph_words[i + 1][0] in vocs or ph_words[i + 1][0] in voicing_dict
               or ph_words[i + 1][0] in ipa_vowels)):
            ph_words[i] = (ph_words[i][:-1]) + (voi_dict_rev[
                (ph_words[i][-1])])

    # extract non-alphabetic substrings
    non_words = re.findall(r'[^a-ząćęłńóśźż]+', text)

    # put transcriptions of alphabetic strings and non-alphabetic substrings back together
    res = []
    maxi = max([len(ph_words), len(non_words)])
    if maxi == len(ph_words) and len(ph_words) != len(non_words):
        for i in range(len(ph_words) - 1):
            res.append(ph_words[i])
            res.append(non_words[i])
        res.append(ph_words[-1])
    elif len(ph_words) == len(non_words):
        if text[0].isalpha():
            for i in range(len(ph_words)):
                res.append(ph_words[i])
                res.append(non_words[i])
        else:
            for i in range(len(ph_words)):
                res.append(non_words[i])
                res.append(ph_words[i])
    elif maxi == len(non_words) and len(ph_words) != len(non_words):
        for i in range(len(non_words) - 1):
            res.append(non_words[i])
            res.append(ph_words[i])
        res.append(non_words[-1])

    result = ''.join(res)

    return result
Beispiel #3
0
 def test_final_devoicing(self):
     self.assertEqual(transcribe('standard'), 'standart')
     self.assertEqual(transcribe('wąż'), 'vɔ̃w̃ʂ')
     self.assertEqual(transcribe('bug'), 'buk')
Beispiel #4
0
 def surface_palatalisation(self):
     self.assertEqual(transcribe('kibitki'), 'kʲibʲitkʲi')
     self.assertEqual(transcribe('kwiatki'), 'kfʲjatkʲi')
     self.assertEqual(transcribe('kolie'), 'kɔlʲjɛ')
     self.assertEqual(transcribe('robi'), 'rɔbʲi')
Beispiel #5
0
 def test_regressive_devoicing(self):
     self.assertEqual(transcribe('babka'), 'bapka')
     self.assertEqual(transcribe('rozpacz'), 'rɔspatʂ')
Beispiel #6
0
 def test_progressive_voicing(self):
     self.assertEqual(transcribe('poczdam'), 'pɔdʐdam')
     self.assertEqual(transcribe('prośba'), 'prɔʑba')
Beispiel #7
0
 def test_progressive_devoicing(self):
     self.assertEqual(transcribe('świat'), 'ɕfʲjat')
     self.assertEqual(transcribe('przetrzyma'), 'pʂɛtʂɨma')
     self.assertEqual(transcribe('wycwanić'), 'vɨtsfaɲitɕ')
Beispiel #8
0
 def test_progressive_voicing_morphological(self):
     self.assertEqual(transcribe('jakże'), 'jaɡʐɛ')
     self.assertEqual(transcribe('wszakżeż'), 'fʂaɡʐɛʂ')
     self.assertEqual(transcribe('paśże'), 'paʑʐɛ')