def test_context(self): """ class TestLops.test_context """ # missing asterisk : phoneticsegments_list = [] lops = LOPhoneticSegments(phoneticsegments_list) with self.assertRaises(PhoSegError): lops.context(10, "VC") # several asterisks : phoneticsegments_list = [] lops = LOPhoneticSegments(phoneticsegments_list) with self.assertRaises(PhoSegError): lops.context(10, "C*VC*") # asterisk in position #0 : phoneticsegments_list = [] lops = LOPhoneticSegments(phoneticsegments_list) with self.assertRaises(PhoSegError): lops.context(10, "*VC") # empty string but everything's ok : phoneticsegments_list = [] lops = LOPhoneticSegments(phoneticsegments_list) self.assertEqual(lops.context(10, "V*C"), False) # nothing's special, everything's ok : phoneticsegments_list = [ps[0] for ps in PhoSegObject(ipa="tapa").get_phonemes()] lops = LOPhoneticSegments(phoneticsegments_list) self.assertEqual(lops.context(1, "CV*C"), True) # nothing's special, everything's ok : phoneticsegments_list = [ps[0] for ps in PhoSegObject(ipa="tapa").get_phonemes()] lops = LOPhoneticSegments(phoneticsegments_list) self.assertEqual(lops.context(0, "C*VC"), True) # nothing's special, everything's ok : phoneticsegments_list = [ps[0] for ps in PhoSegObject(ipa="tapa").get_phonemes()] lops = LOPhoneticSegments(phoneticsegments_list) self.assertEqual(lops.context(3, "CVCV*"), True)
def LATsyllabication(list_of_phonemes): """ Return a list of Syllable objects. This function uses the SSP principle. list_of_phonemes : (str) string written with the IPA format. basic Latin syllabication : * (rule #1) V - CV * (rule #2) VC - CV """ if len(list_of_phonemes)==0: return [] lops = LOPhoneticSegments(list_of_phonemes) res = [] # we add at least one syllable : res.append( Syllable() ) last_sonority = -1 position = "onset" # = onset/nucleus/coda for index_p, phon in enumerate(list_of_phonemes): #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . # <phon> is a vowel : #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . if phon.is_a_vowel(): # current position : onset # (t*,,) -> (t,i,) if position == "onset": position = "nucleus" res[-1].nucleus.append( phon ) # current position : nucleus or coda # (,a*,) -> (,a,)(,i,) # (,a,p*) -> (,a,p)(,i,) else: res.append( Syllable() ) res[-1].nucleus.append( phon ) position = "nucleus" #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . # <phon> is a consonant or a semi-vowel : #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . else: # current position : onset # (f*,,) -> (fr,,) : sonorority is increasing # (f*,,) -> (f,,)(t,,) : sonorority is decreasing or equal if position == "onset": if phon.get_sonority()>last_sonority: position = "onset" res[-1].onset.append( phon ) else: position = "onset" res.append( Syllable() ) res[-1].onset.append( phon ) # current position : nucleus # (,a*,) -> (,a,p) # (,a*,) -> (,a,)(p,i,) rule #1 : VCV -> V_CV elif position == 'nucleus': if phon.is_a_semivowel() or lops.context(index_p, "VC*V"): position = "onset" res.append( Syllable() ) res[-1].onset.append( phon ) else: position = "coda" res[-1].coda.append( phon ) else: # current position : coda # (d,i,s*) -> (d,i,s)(p,e,) : rule #2 # (,a,f*) -> (,a,f)(r,,) : sonorority is increasing # (,a,f*) -> (,a,ft) : sonorority is decreasing or equal if lops.context(index_p, "VCC*V") or lops.context(index_p, "VCCC*V"): position = "onset" res.append( Syllable() ) res[-1].onset.append( phon ) elif phon.get_sonority()>last_sonority: position = "onset" res.append( Syllable() ) res[-1].onset.append( phon ) else: position = "coda" res[-1].coda.append( phon ) last_sonority = phon.get_sonority() return res
def GRCsyllabication(list_of_phonemes): """ Return a list of Syllable objects. This function uses the SSP principle. list_of_phonemes : (str) string written with the IPA format. Ancient Greek (without correptio attica) syllabication : * (rule #1) V - CV * (rule #2) VC - CV * (rule #3) VC - CCV ἔσφιγξα = ˈespʰiŋ|ksa, not ˈespʰiŋk|sa """ if len(list_of_phonemes)==0: return [] lops = LOPhoneticSegments(list_of_phonemes) res = [] # we add at least one syllable : res.append( Syllable() ) last_sonority = -1 position = "onset" # = onset/nucleus/coda for index_p, phon in enumerate(list_of_phonemes): # "h" is a special sound : phon_is_the_h_sound = phon.is_the_h_sound() # "j" is a special sound : phon_is_the_j_sound = phon.is_the_j_sound() # "w" is a special sound : phon_is_the_w_sound = phon.is_the_w_sound() #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . # <phon> is a vowel or a semivowel : (e.g. 'i') #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . if (phon.is_a_vowel() or phon.is_a_semivowel()) and \ not phon_is_the_h_sound and \ not phon_is_the_j_sound and \ not phon_is_the_w_sound: # current position : onset # (t*,,) -> (t,i,) if position == "onset": position = "nucleus" res[-1].nucleus.append( phon ) # current position : nucleus or coda # (,a*,) -> (,a,)(,i,) # (,a,p*) -> (,a,p)(,i,) else: res.append( Syllable() ) res[-1].nucleus.append( phon ) position = "nucleus" #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . # <phon> is a consonant or the "h" sound or the "j" sound or the "w" sound : #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . else: # current position : onset if position == "onset": position = "onset" res[-1].onset.append( phon ) # current position : nucleus # (,a*,) -> (,a,p) # (,a*,) -> (,a,)(p,i,) rule #1 : VCV -> V_CV elif position == 'nucleus': if lops.context(index_p, "VC*V"): position = "onset" res.append( Syllable() ) res[-1].onset.append( phon ) else: position = "coda" # no [h], no [j], no [w] in coda : if not phon_is_the_h_sound and \ not phon_is_the_j_sound and \ not phon_is_the_w_sound: res[-1].coda.append( phon ) else: # [j], [h] can't be in the coda : position = "onset" res.append( Syllable() ) res[-1].onset.append( phon ) else: # current position : coda # (,i,ŋ*) -> (,i,ŋ)(ks,a) : rule #3 VC - CCV # (d,i,s*) -> (d,i,s)(p,e,) : rule #2 VC - CV # (,a,f*) -> (,a,f)(r,,) : sonority is increasing # (,a,f*) -> (,a,ft) : sonority is decreasing or equal if lops.context(index_p, "VCC*CV"): position = "onset" res.append( Syllable() ) res[-1].onset.append( phon ) elif lops.context(index_p, "VCC*V"): position = "onset" res.append( Syllable() ) res[-1].onset.append( phon ) elif phon.get_sonority() > last_sonority: position = "onset" res.append( Syllable() ) res[-1].onset.append( phon ) else: position = "coda" res[-1].coda.append( phon ) last_sonority = phon.get_sonority() #........................................................................... # # Moving quasi empty syllables having just a 's' sound in the onset to the # coda of the prededing syllable. # # E.g. : (b,a)(s,,)(t,i,,) -> (b,a,s)(t,i,) # #........................................................................... for index, syllable in enumerate(res): if len(syllable.onset) == 1 and \ syllable.onset[0].is_the_s_sound() and \ len(syllable.nucleus) == 0 and \ len(syllable.coda) == 0: res[index-1].coda.append( syllable.onset[0] ) del( syllable.onset[0] ) #........................................................................... # # adding the non-empty syllables to the result. # # E.g. (b,a,)(,,)(t,i) -> (b,a,)(t,i) # #........................................................................... _res = [] for syllable in res: if len(syllable.onset) != 0 or \ len(syllable.coda) != 0 or \ len(syllable.nucleus) != 0: _res.append(syllable) return _res