Exemple #1
0
    def run(self):
        data = self.data

        read_module_pipes = [self.pipe_read_clean]
        read_module = ReadModule(read_module_pipes,
                                 self.file_path + self.file_name,
                                 self.encoding, data)

        clean_module_pipes = [self.pipe_read_clean, self.pipe_clean_sound]
        clean_module = CleanModule(clean_module_pipes, data)

        phonotype_module_pipes = [self.pipe_clean_sound, self.pipe_sound_syll]
        phonotype_module = PhonotypeModule(phonotype_module_pipes, data)

        syllabify_module_pipes = [self.pipe_sound_syll, self.pipe_syll_count]
        syllabify_module = SyllabifyModule(syllabify_module_pipes)

        count_module_pipes = [self.pipe_syll_count, self.pipe_count_txt]
        count_module = CountModule(count_module_pipes, data, self.file_path)

        write_module_pipes = [self.pipe_count_txt]
        write_module = WriteModule(write_module_pipes, self.file_path)

        clean_module.start()
        phonotype_module.start()
        syllabify_module.start()
        count_module.start()
        write_module.start()

        read_module.run()

        phonotype_module.join()
        syllabify_module.join()
        count_module.join()
        write_module.join()
Exemple #2
0
def get_from_module(pipe_out):
    read_module = ReadModule([read_clean_pipe], file_path, encoding, data)
    read_module.run()
    result = []

    while True:
        pipe_out.acquire()
        if pipe_out.empty():
            pipe_out.wait()
        cleaned_word = pipe_out.get()
        result.append(cleaned_word)
        pipe_out.release()

        if isinstance(cleaned_word, End):
            break

    return result
Exemple #3
0
import unittest

import constants
from config_data import ConfigData
from end import End
from pipe import *
from read_module import ReadModule
from word import TextPunctuation


file_path = '../test_files/belarusian/test_belarusian.txt'
encoding = 'utf-8-sig'
data = ConfigData('../../../py_scripts/configs/conf_be_cyr.json')

pipe_out = Pipe(queue.Queue(), threading.Condition())
module = ReadModule([pipe_out], file_path, encoding, data)

expected_result = [TextPunctuation('У', [None]), TextPunctuation('беларускай', [None, None, None, None, None, None, None, None, None, None]), TextPunctuation('мове', [None, None, None, None]), TextPunctuation('зычныя', [None, None, None, None, None, None]), TextPunctuation('могуць', [None, None, None, None, None, None]), TextPunctuation('адрознівацца', [None, None, None, None, None, None, None, None, None, None, None, None]), TextPunctuation('даўжынёй', [None, None, None, None, None, None, None, None]), TextPunctuation('гучання,', [None, None, None, None, None, None, None, constants.PUNCT]), TextPunctuation('якая', [None, None, None, None]), TextPunctuation('пака-звае', [None, None, None, None, constants.HYPHEN, None, None, None, None]), TextPunctuation('на', [None, None]), TextPunctuation('стык', [None, None, None, None]), TextPunctuation('марфем...', [None, None, None, None, None, None, constants.PUNCT, constants.PUNCT, constants.PUNCT]), TextPunctuation('Пераважная', [None, None, None, None, None, None, None, None, None, None]), TextPunctuation('‚колькасць‘', [constants.PUNCT, None, None, None, None, None, None, None, None, None, constants.PUNCT]), TextPunctuation('гукаў', [None, None, None, None, None]), TextPunctuation('утвараюцца', [None, None, None, None, None, None, None, None, None, None]), TextPunctuation('ў', [None]), TextPunctuation('цэнтры', [None, None, None, None, None, None]), TextPunctuation('ротавай', [None, None, None, None, None, None, None]), TextPunctuation('поласці', [None, None, None, None, None, None, None]), TextPunctuation('пры', [None, None, None]), TextPunctuation('высокім', [None, None, None, None, None, None, None]), TextPunctuation('агульным', [None, None, None, None, None, None, None, None]), TextPunctuation('пад’ёме', [None, None, None, None, None, None, None]), TextPunctuation('языка.', [None, None, None, None, None, constants.PUNCT]), TextPunctuation('Вялікае', [None, None, None, None, None, None, None]), TextPunctuation('Ducatus', [None, None, None, None, None, None, None]), TextPunctuation('Lithuaniae', [None, None, None, None, None, None, None, None, None, None]), TextPunctuation('знаходзілася', [None, None, None, None, None, None, None, None, None, None, None, None]), TextPunctuation('ў', [None]), TextPunctuation('дынастычнай', [None, None, None, None, None, None, None, None, None, None, None]), TextPunctuation('уніі', [None, None, None, None]), TextPunctuation('—', [constants.PUNCT]), TextPunctuation('з', [None]), TextPunctuation('Польскім', [None, None, None, None, None, None, None, None]), TextPunctuation('кара-леўствам!', [None, None, None, None, constants.HYPHEN, None, None, None, None, None, None, None, None, constants.PUNCT]), End()]


def get_from_module():

    module.run()
    result = []

    while True:
        pipe_out.acquire()
        if pipe_out.empty():
            pipe_out.wait()
        cleaned_word = pipe_out.get()
        result.append(cleaned_word)