コード例 #1
0
ファイル: clean_module.py プロジェクト: toma-s/slabiky
 def get_attachment(self, word: TextPunctuation) -> dict:
     text = word.get_text()
     sign = word.get_punctuation()
     buffer_attach = dict()
     attachment = self.get_data().zero_syll_words[text]
     buffer_attach['attachment'] = attachment
     buffer_attach['text'] = text
     buffer_attach['sign'] = sign
     return buffer_attach
コード例 #2
0
ファイル: clean_module.py プロジェクト: toma-s/slabiky
    def clean(self, words: list) -> list:

        curr, foll = words[0], words[1]
        buffer_text, buffer_signs = [], []

        if len(curr.get_text()) != 1 and curr.get_text().isupper():
            return [None, foll]

        for i in range(len(curr.get_text())):
            sym = curr.get_text()[i]
            sign = curr.get_punctuation()[i]

            if sym == '.' and isinstance(
                    foll, TextPunctuation) and not foll.get_text().istitle():
                return [None, foll]

            if not len(buffer_text) and sign == constants.PUNCT:
                if sym in punctuation_to_erase:
                    continue
                elif sym in dashes:
                    return [None, foll]

            if sign == constants.HYPHEN:
                if len(curr.get_text()) == i + 1:
                    return [None, foll]
                else:
                    continue

            if sign == constants.PUNCT and sym not in hyphen_dashes:
                for j in range(i + 1, len(curr.get_text())):
                    next_sign = curr.get_punctuation()[j]
                    if next_sign != constants.PUNCT:
                        return [None, foll]
                return [
                    TextPunctuation(''.join(buffer_text), buffer_signs), foll
                ]

            sym_low = sym.lower()
            if sym_low not in self.get_data().letters:
                return [None, foll]

            buffer_text.append(sym_low)
            buffer_signs.append(sign)

        curr = TextPunctuation(''.join(buffer_text), buffer_signs)

        if isinstance(foll, TextPunctuation) and len(foll.get_text()) == 1:
            foll_low = TextPunctuation(foll.get_text().lower(),
                                       foll.get_punctuation())
            if self.is_zero_syll(foll_low.get_text()):
                buffer_attach = self.get_attachment(foll_low)
                if buffer_attach['attachment'] == 'to_preceding':
                    buffer_text.append(foll_low.get_text())
                    buffer_signs.append(foll_low.get_punctuation())
                    foll = None
        elif isinstance(foll, TextPunctuation) and len(
                curr.get_text()) == 1 and self.is_zero_syll(curr.get_text()):
            buffer_attach = self.get_attachment(curr)
            if buffer_attach['attachment'] == 'to_following':
                foll.set_text(curr.get_text() + foll.get_text())
                foll.set_punctuation(curr.get_punctuation() +
                                     foll.get_punctuation())
                return [None, foll]

        return [TextPunctuation(''.join(buffer_text), buffer_signs), foll]