def test_zajtso(self): word = run_through_module( TextPhonotypes('zajtšo', [CONS, VOWEL, SONOR, SUBUNIT, CONS, VOWEL])) self.assertEqual(word.get_syllables(), ['zaj', 'tšo']) self.assertEqual(word.get_phonotypes(), [[CONS, VOWEL, SONOR], [SUBUNIT, CONS, VOWEL]])
def test_sahac(self): word = run_through_module( TextPhonotypes('sahać', [CONS, VOWEL, CONS, VOWEL, CONS])) self.assertEqual(word.get_syllables(), ['sa', 'hać']) self.assertEqual(word.get_phonotypes(), [[ CONS, VOWEL, ], [CONS, VOWEL, CONS]])
def test_neurcity(self): word = run_through_module( TextPhonotypes( 'neurčitý', [SONOR, VOWEL, VOWEL, SONOR, CONS, VOWEL, CONS, VOWEL])) self.assertEqual(word.get_syllables(), ['ne', 'ur', 'či', 'tý']) self.assertEqual( word.get_phonotypes(), [[SONOR, VOWEL], [VOWEL, SONOR], [CONS, VOWEL], [CONS, VOWEL]])
def test_агульным(self): word = run_through_module( TextPhonotypes( 'агульным', [VOWEL, CONS, VOWEL, SONOR, SPEC, SONOR, VOWEL, SONOR])) self.assertEqual(word.get_syllables(), ['а', 'гу', 'льным']) self.assertEqual( word.get_phonotypes(), [[VOWEL], [CONS, VOWEL], [SONOR, SPEC, SONOR, VOWEL, SONOR]])
def test_колькасць(self): word = run_through_module( TextPhonotypes( 'колькасць', [CONS, VOWEL, SONOR, SPEC, CONS, VOWEL, CONS, CONS, SPEC])) self.assertEqual(word.get_syllables(), ['коль', 'касць']) self.assertEqual( word.get_phonotypes(), [[CONS, VOWEL, SONOR, SPEC], [CONS, VOWEL, CONS, CONS, SPEC]])
def test_srozpadem(self): word = run_through_module( TextPhonotypes( 'srozpadem', [CONS, SONOR, VOWEL, CONS, CONS, VOWEL, CONS, VOWEL, SONOR])) self.assertEqual(word.get_syllables(), ['sro', 'zpa', 'dem']) self.assertEqual( word.get_phonotypes(), [[CONS, SONOR, VOWEL], [CONS, CONS, VOWEL], [CONS, VOWEL, SONOR]])
def test_automaticky(self): word = run_through_module( TextPhonotypes('automatický', [ SUBUNIT, VOWEL, CONS, VOWEL, SONOR, VOWEL, CONS, VOWEL, CONS, CONS, VOWEL ])) self.assertEqual(word.get_syllables(), ['au', 'to', 'ma', 'ti', 'cký']) self.assertEqual(word.get_phonotypes(), [[SUBUNIT, VOWEL], [CONS, VOWEL], [SONOR, VOWEL], [CONS, VOWEL], [CONS, CONS, VOWEL]])
def test_bijologije(self): word = run_through_module( TextPhonotypes('bijologije', [ CONS, VOWEL, SONOR, VOWEL, SONOR, VOWEL, CONS, VOWEL, SONOR, VOWEL ])) self.assertEqual(word.get_syllables(), ['bi', 'jo', 'lo', 'gi', 'je']) self.assertEqual(word.get_phonotypes(), [[CONS, VOWEL], [SONOR, VOWEL], [SONOR, VOWEL], [CONS, VOWEL], [SONOR, VOWEL]])
def test_зяўляецца(self): word = run_through_module( TextPhonotypes('з’яўляецца', [ CONS, SPEC, VOWEL, SONOR, SONOR, VOWEL, VOWEL, CONS, CONS, VOWEL ])) self.assertEqual(word.get_syllables(), ['з’я', 'ўля', 'е', 'цца']) self.assertEqual(word.get_phonotypes(), [[CONS, SPEC, VOWEL], [SONOR, SONOR, VOWEL], [VOWEL], [CONS, CONS, VOWEL]])
def set_phonotypes(self, word) -> TextPhonotypes: cluster_letters = self.cluster_letters phono_changes = self.phono_changes signs = self.signs word_text = self.text_changes_func(word.get_text()) # TEXT CHANGES word_length = len(word_text) cluster_exception = False cluster_croatian_exception = False for i in range(word_length): if cluster_exception: cluster_exception = False continue if cluster_croatian_exception: cluster_croatian_exception = False continue current_to_subunit_next_unchanged = phonotype_exception = False previous_letter = word_text[i - 1] if i - 1 >= 0 else "" current_letter = word_text[i] next_letter = word_text[i + 1] if i + 1 < word_length else "" exception_next_letter = word_text[ i + 2] if i + 2 < word_length else "" # used because of exception in croatian language (ije -> [SUBUNIT, SUBUNIT, VOWEL]) if current_letter in cluster_letters: # CLUSTERS if exception_next_letter != "" and next_letter + exception_next_letter in cluster_letters[ current_letter]: cluster_exception, current_to_subunit_next_unchanged = \ self.clusters_func(word_text, current_letter, next_letter, exception_next_letter) cluster_croatian_exception = True if next_letter in cluster_letters[current_letter]: cluster_exception, current_to_subunit_next_unchanged = \ self.clusters_func(word_text, current_letter, next_letter, "") elif current_letter in phono_changes: # PHONOTYPE CHANGES phonotype_exception = self.phonotype_changes_func( word_text, previous_letter, current_letter, next_letter) no_exceptions = not current_to_subunit_next_unchanged and not cluster_exception and not phonotype_exception if no_exceptions: self.phonotypes.append(signs[current_letter]) phonotypes = self.phonotypes self.phonotypes = [] return TextPhonotypes(word_text, phonotypes)
def test_українськогож(self): word = run_through_module( TextPhonotypes('українськогож', [ VOWEL, CONS, SONOR, VOWEL, VOWEL, SONOR, CONS, SPEC, CONS, VOWEL, CONS, VOWEL, CONS ])) self.assertEqual(word.get_syllables(), ['у', 'кра', 'їн', 'сько', 'гож']) self.assertEqual(word.get_phonotypes(), [[VOWEL], [CONS, SONOR, VOWEL], [VOWEL, SONOR], [CONS, SPEC, CONS, VOWEL], [CONS, VOWEL, CONS]])
def test_farmaceuticky(self): word = run_through_module( TextPhonotypes('farmaceutický', [ CONS, VOWEL, SONOR, SONOR, VOWEL, CONS, SUBUNIT, VOWEL, CONS, VOWEL, CONS, CONS, VOWEL ])) self.assertEqual(word.get_syllables(), ['fa', 'rma', 'ceu', 'ti', 'cký']) self.assertEqual( word.get_phonotypes(), [[CONS, VOWEL], [SONOR, SONOR, VOWEL], [CONS, SUBUNIT, VOWEL], [CONS, VOWEL], [CONS, CONS, VOWEL]])
def set_phonotypes(self, word) -> TextPhonotypes: cluster_letters = self.cluster_letters phono_changes = self.phono_changes signs = self.signs word_text = self.text_changes_func(word.get_text()) # TEXT CHANGES word_length = len(word_text) cluster_exception = False for i in range(word_length): if cluster_exception: cluster_exception = False continue current_to_subunit_next_unchanged = phonotype_exception = False previous_sign = word_text[i - 1] if i - 1 >= 0 else None current_sign = word_text[i] next_sign = word_text[i + 1] if i + 1 < word_length else None if current_sign in cluster_letters and next_sign in cluster_letters[ current_sign]: # CLUSTERS cluster_exception, current_to_subunit_next_unchanged = \ self.clusters_func(word_text, current_sign, next_sign) elif current_sign in phono_changes: # PHONOTYPE CHANGES phonotype_exception = self.phonotype_changes_func( word_text, previous_sign, current_sign, next_sign) no_exceptions = not current_to_subunit_next_unchanged and \ not cluster_exception and not phonotype_exception if no_exceptions: self.phonotypes.append(signs[current_sign]) phonotypes = self.phonotypes self.phonotypes = [] return TextPhonotypes(word_text, phonotypes)
def test_sneh(self): word = run_through_module( TextPhonotypes('sněh', [CONS, SONOR, VOWEL, CONS])) self.assertEqual(word.get_syllables(), ['sněh']) self.assertEqual(word.get_phonotypes(), [[CONS, SONOR, VOWEL, CONS]])
def test_osm(self): word = run_through_module(TextPhonotypes('osm', [VOWEL, CONS, VOWEL])) self.assertEqual(word.get_syllables(), ['o', 'sm']) self.assertEqual(word.get_phonotypes(), [[VOWEL], [CONS, VOWEL]])
def test_rz(self): word = run_through_module(TextPhonotypes('rž', [VOWEL, CONS])) self.assertEqual(word.get_syllables(), ['rž']) self.assertEqual(word.get_phonotypes(), [[VOWEL, CONS]])
def test_maria(self): word = run_through_module( TextPhonotypes('maria', [SONOR, VOWEL, SONOR, SUBUNIT, VOWEL])) self.assertEqual(word.get_syllables(), ['ma', 'ria']) self.assertEqual(word.get_phonotypes(), [[SONOR, VOWEL], [SONOR, SUBUNIT, VOWEL]])
def test_vlna(self): word = run_through_module( TextPhonotypes('vlna', [CONS, VOWEL, SONOR, VOWEL])) self.assertEqual(word.get_syllables(), ['vl', 'na']) self.assertEqual(word.get_phonotypes(), [[CONS, VOWEL], [SONOR, VOWEL]])
def test_późniejszy(self): word = run_through_module( TextPhonotypes('późniejszy', [0, 2, 0, 1, 4, 2, 1, 4, 0, 2])) self.assertEqual(word.get_syllables(), ['pó', 'źniej', 'szy']) self.assertEqual(word.get_phonotypes(), [[0, 2], [0, 1, 4, 2, 1], [4, 0, 2]])
def test_pouzil(self): word = run_through_module( TextPhonotypes('použil', [CONS, VOWEL, VOWEL, CONS, VOWEL, SONOR])) self.assertEqual(word.get_syllables(), ['po', 'u', 'žil']) self.assertEqual(word.get_phonotypes(), [[CONS, VOWEL], [VOWEL], [CONS, VOWEL, SONOR]])
def test_pouze(self): word = run_through_module( TextPhonotypes('pouze', [CONS, SUBUNIT, VOWEL, CONS, VOWEL])) self.assertEqual(word.get_syllables(), ['pou', 'ze']) self.assertEqual(word.get_phonotypes(), [[CONS, SUBUNIT, VOWEL], [CONS, VOWEL]])
from config_data import ConfigData from pipe import Pipe from queue import Queue from threading import Condition from count_module import CountModule from syllabify_module import SyllabifyModule from word import TextPhonotypes, SyllablesLengths from constants import SONOR, CONS, VOWEL, SPEC, SUBUNIT from end import End import unittest import time words_to_do = [ TextPhonotypes('farmaceutický', [CONS, VOWEL, SONOR, SONOR, VOWEL, CONS, SUBUNIT, VOWEL, CONS, VOWEL, CONS, CONS, VOWEL]), TextPhonotypes('pouze', [CONS, SUBUNIT, VOWEL, CONS, VOWEL]), TextPhonotypes('použil', [CONS, VOWEL, VOWEL, CONS, VOWEL, SONOR]), TextPhonotypes('neurčitý', [SONOR, VOWEL, VOWEL, SONOR, CONS, VOWEL, CONS, VOWEL]), TextPhonotypes('automatický', [SUBUNIT, VOWEL, CONS, VOWEL, SONOR, VOWEL, CONS, VOWEL, CONS, CONS, VOWEL]), TextPhonotypes('vlna', [CONS, VOWEL, SONOR, VOWEL]), TextPhonotypes('osm', [VOWEL, CONS, VOWEL]), TextPhonotypes('bijologije', [CONS, VOWEL, SONOR, VOWEL, SONOR, VOWEL, CONS, VOWEL, SONOR, VOWEL]), TextPhonotypes('srozpadem', [CONS, SONOR, VOWEL, CONS, CONS, VOWEL, CONS, VOWEL, SONOR]), TextPhonotypes('vědomí', [CONS, VOWEL, CONS, VOWEL, SONOR, VOWEL]), TextPhonotypes('sex', [CONS, VOWEL, CONS]) ] syllables = [['fa', 'rma', 'ceu', 'ti', 'cký'], ['pou', 'ze'], ['po', 'u', 'žil'], ['ne', 'ur', 'či', 'tý'], ['au', 'to', 'ma', 'ti', 'cký'], ['vl', 'na'], ['o', 'sm'], ['bi', 'jo', 'lo', 'gi', 'je'], ['sro', 'zpa', 'dem'], ['vě', 'do', 'mí'], ['sex']]
def test_wrobl(self): word = run_through_module( TextPhonotypes('wrobl', [SONOR, SONOR, VOWEL, CONS, VOWEL])) self.assertEqual(word.get_syllables(), ['wro', 'bl']) self.assertEqual(word.get_phonotypes(), [[SONOR, SONOR, VOWEL], [CONS, VOWEL]])
def text_zastojnstwa(self): word = run_through_module( TextPhonotypes('zastojnstwa', [0, 2, 0, 1, 4, 2, 1, 4, 0, 2])) self.assertEqual(word.get_syllables(), ['za', 'stojn', 'stwa']) self.assertEqual(word.get_phonotypes(), [[0, 2], [0, 0, 2, 1, 1], [0, 0, 1, 2]])
def test_vedomi(self): word = run_through_module( TextPhonotypes('vědomí', [CONS, VOWEL, CONS, VOWEL, SONOR, VOWEL])) self.assertEqual(word.get_syllables(), ['vě', 'do', 'mí']) self.assertEqual(word.get_phonotypes(), [[CONS, VOWEL], [CONS, VOWEL], [SONOR, VOWEL]])