def test_denasalise_nasal_assimilation(self): self.assertEqual(transcribe('mąka'), 'mɔŋka') self.assertEqual(transcribe('gęsty'), 'ɡɛ̃w̃stɨ') self.assertEqual(transcribe('rąbać'), 'rɔmbatɕ') self.assertEqual(transcribe('podjęli'), 'pɔdʲjɛlʲi') self.assertEqual(transcribe('stanął'), 'stanɔw') self.assertEqual(transcribe('pieniądz'), 'pʲjɛɲɔɳts') self.assertEqual(transcribe('pędzić'), 'pɛɲdʑitɕ') self.assertEqual(transcribe('pójdą'), 'pujdɔ̃w̃') self.assertEqual(transcribe('kręty'), 'krɛntɨ') self.assertEqual(transcribe('pójdę'), 'pujdɛ')
def transcribe_text_Cracow(text): # lowercase all letters text = text.lower() # extract alphabetic substrings words = re.findall(r'[a-ząćęłńóśźż]+', text) # phonemic transcription of words taken individually (regional processes not applied) ph_words = [] if words: for word in words: ph_words.append(transcribe(word)) # pre-vocalic voicing and pre-voiced-obstruent voicing for i in range(len(ph_words) - 1): if (ph_words[i][-2:] in voi_dict_rev and (ph_words[i + 1][0] in vocs or ph_words[i + 1][0] in voicing_dict or ph_words[i + 1][0] in ipa_vowels)): ph_words[i] = (ph_words[i][:-2]) + (voi_dict_rev[ (ph_words[i][-2:])]) elif ((ph_words[i][-1]) in voi_dict_rev and (ph_words[i + 1][0] in vocs or ph_words[i + 1][0] in voicing_dict or ph_words[i + 1][0] in ipa_vowels)): ph_words[i] = (ph_words[i][:-1]) + (voi_dict_rev[ (ph_words[i][-1])]) # extract non-alphabetic substrings non_words = re.findall(r'[^a-ząćęłńóśźż]+', text) # put transcriptions of alphabetic strings and non-alphabetic substrings back together res = [] maxi = max([len(ph_words), len(non_words)]) if maxi == len(ph_words) and len(ph_words) != len(non_words): for i in range(len(ph_words) - 1): res.append(ph_words[i]) res.append(non_words[i]) res.append(ph_words[-1]) elif len(ph_words) == len(non_words): if text[0].isalpha(): for i in range(len(ph_words)): res.append(ph_words[i]) res.append(non_words[i]) else: for i in range(len(ph_words)): res.append(non_words[i]) res.append(ph_words[i]) elif maxi == len(non_words) and len(ph_words) != len(non_words): for i in range(len(non_words) - 1): res.append(non_words[i]) res.append(ph_words[i]) res.append(non_words[-1]) result = ''.join(res) return result
def test_final_devoicing(self): self.assertEqual(transcribe('standard'), 'standart') self.assertEqual(transcribe('wąż'), 'vɔ̃w̃ʂ') self.assertEqual(transcribe('bug'), 'buk')
def surface_palatalisation(self): self.assertEqual(transcribe('kibitki'), 'kʲibʲitkʲi') self.assertEqual(transcribe('kwiatki'), 'kfʲjatkʲi') self.assertEqual(transcribe('kolie'), 'kɔlʲjɛ') self.assertEqual(transcribe('robi'), 'rɔbʲi')
def test_regressive_devoicing(self): self.assertEqual(transcribe('babka'), 'bapka') self.assertEqual(transcribe('rozpacz'), 'rɔspatʂ')
def test_progressive_voicing(self): self.assertEqual(transcribe('poczdam'), 'pɔdʐdam') self.assertEqual(transcribe('prośba'), 'prɔʑba')
def test_progressive_devoicing(self): self.assertEqual(transcribe('świat'), 'ɕfʲjat') self.assertEqual(transcribe('przetrzyma'), 'pʂɛtʂɨma') self.assertEqual(transcribe('wycwanić'), 'vɨtsfaɲitɕ')
def test_progressive_voicing_morphological(self): self.assertEqual(transcribe('jakże'), 'jaɡʐɛ') self.assertEqual(transcribe('wszakżeż'), 'fʂaɡʐɛʂ') self.assertEqual(transcribe('paśże'), 'paʑʐɛ')