예제 #1
0
    def feed(self, character: str) -> None:
        if character.isalpha():
            self._buffer = "".join([self._buffer, character])
            if is_accentuated(character):
                self._buffer_accent_count += 1
            return
        if not self._buffer:
            return
        if (character.isspace() or is_punctuation(character)
                or is_separator(character)) and self._buffer:
            self._word_count += 1
            buffer_length = len(self._buffer)  # type: int

            self._character_count += buffer_length

            if buffer_length >= 4 and self._buffer_accent_count / buffer_length >= 0.3:
                self._is_current_word_bad = True

            if self._is_current_word_bad:
                self._bad_word_count += 1
                self._bad_character_count += len(self._buffer)
                self._is_current_word_bad = False

            self._buffer = ""
            self._buffer_accent_count = 0
        elif character not in {
                "<", ">", "-", "="
        } and character.isdigit() is False and is_symbol(character):
            self._is_current_word_bad = True
            self._buffer += character
예제 #2
0
    def feed(self, character: str) -> None:
        self._character_count += 1

        if character != self._last_printable_char and character not in ["<", ">", "=", ":", "/", "&", ";", "{", "}", "[", "]"]:
            if is_punctuation(character):
                self._punctuation_count += 1
            elif character.isdigit() is False and is_symbol(character):
                self._symbol_count += 2

        self._last_printable_char = character
예제 #3
0
    def feed(self, character: str) -> None:
        self._character_count += 1

        if self._last_printable_seen is None:
            self._last_printable_seen = character
            return

        if character.isspace() or is_punctuation(character):
            self._last_printable_seen = None
            return

        unicode_range_a = unicode_range(self._last_printable_seen)  # type: Optional[str]
        unicode_range_b = unicode_range(character)  # type: Optional[str]

        if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
            self._suspicious_successive_range_count += 1

        self._last_printable_seen = character