def run(self): data = self.data read_module_pipes = [self.pipe_read_clean] read_module = ReadModule(read_module_pipes, self.file_path + self.file_name, self.encoding, data) clean_module_pipes = [self.pipe_read_clean, self.pipe_clean_sound] clean_module = CleanModule(clean_module_pipes, data) phonotype_module_pipes = [self.pipe_clean_sound, self.pipe_sound_syll] phonotype_module = PhonotypeModule(phonotype_module_pipes, data) syllabify_module_pipes = [self.pipe_sound_syll, self.pipe_syll_count] syllabify_module = SyllabifyModule(syllabify_module_pipes) count_module_pipes = [self.pipe_syll_count, self.pipe_count_txt] count_module = CountModule(count_module_pipes, data, self.file_path) write_module_pipes = [self.pipe_count_txt] write_module = WriteModule(write_module_pipes, self.file_path) clean_module.start() phonotype_module.start() syllabify_module.start() count_module.start() write_module.start() read_module.run() phonotype_module.join() syllabify_module.join() count_module.join() write_module.join()
def get_from_module(pipe_out): read_module = ReadModule([read_clean_pipe], file_path, encoding, data) read_module.run() result = [] while True: pipe_out.acquire() if pipe_out.empty(): pipe_out.wait() cleaned_word = pipe_out.get() result.append(cleaned_word) pipe_out.release() if isinstance(cleaned_word, End): break return result
import unittest import constants from config_data import ConfigData from end import End from pipe import * from read_module import ReadModule from word import TextPunctuation file_path = '../test_files/belarusian/test_belarusian.txt' encoding = 'utf-8-sig' data = ConfigData('../../../py_scripts/configs/conf_be_cyr.json') pipe_out = Pipe(queue.Queue(), threading.Condition()) module = ReadModule([pipe_out], file_path, encoding, data) expected_result = [TextPunctuation('У', [None]), TextPunctuation('беларускай', [None, None, None, None, None, None, None, None, None, None]), TextPunctuation('мове', [None, None, None, None]), TextPunctuation('зычныя', [None, None, None, None, None, None]), TextPunctuation('могуць', [None, None, None, None, None, None]), TextPunctuation('адрознівацца', [None, None, None, None, None, None, None, None, None, None, None, None]), TextPunctuation('даўжынёй', [None, None, None, None, None, None, None, None]), TextPunctuation('гучання,', [None, None, None, None, None, None, None, constants.PUNCT]), TextPunctuation('якая', [None, None, None, None]), TextPunctuation('пака-звае', [None, None, None, None, constants.HYPHEN, None, None, None, None]), TextPunctuation('на', [None, None]), TextPunctuation('стык', [None, None, None, None]), TextPunctuation('марфем...', [None, None, None, None, None, None, constants.PUNCT, constants.PUNCT, constants.PUNCT]), TextPunctuation('Пераважная', [None, None, None, None, None, None, None, None, None, None]), TextPunctuation('‚колькасць‘', [constants.PUNCT, None, None, None, None, None, None, None, None, None, constants.PUNCT]), TextPunctuation('гукаў', [None, None, None, None, None]), TextPunctuation('утвараюцца', [None, None, None, None, None, None, None, None, None, None]), TextPunctuation('ў', [None]), TextPunctuation('цэнтры', [None, None, None, None, None, None]), TextPunctuation('ротавай', [None, None, None, None, None, None, None]), TextPunctuation('поласці', [None, None, None, None, None, None, None]), TextPunctuation('пры', [None, None, None]), TextPunctuation('высокім', [None, None, None, None, None, None, None]), TextPunctuation('агульным', [None, None, None, None, None, None, None, None]), TextPunctuation('пад’ёме', [None, None, None, None, None, None, None]), TextPunctuation('языка.', [None, None, None, None, None, constants.PUNCT]), TextPunctuation('Вялікае', [None, None, None, None, None, None, None]), TextPunctuation('Ducatus', [None, None, None, None, None, None, None]), TextPunctuation('Lithuaniae', [None, None, None, None, None, None, None, None, None, None]), TextPunctuation('знаходзілася', [None, None, None, None, None, None, None, None, None, None, None, None]), TextPunctuation('ў', [None]), TextPunctuation('дынастычнай', [None, None, None, None, None, None, None, None, None, None, None]), TextPunctuation('уніі', [None, None, None, None]), TextPunctuation('—', [constants.PUNCT]), TextPunctuation('з', [None]), TextPunctuation('Польскім', [None, None, None, None, None, None, None, None]), TextPunctuation('кара-леўствам!', [None, None, None, None, constants.HYPHEN, None, None, None, None, None, None, None, None, constants.PUNCT]), End()] def get_from_module(): module.run() result = [] while True: pipe_out.acquire() if pipe_out.empty(): pipe_out.wait() cleaned_word = pipe_out.get() result.append(cleaned_word)