def feed(self, character: str) -> None: if character.isalpha(): self._buffer = "".join([self._buffer, character]) if is_accentuated(character): self._buffer_accent_count += 1 return if not self._buffer: return if (character.isspace() or is_punctuation(character) or is_separator(character)) and self._buffer: self._word_count += 1 buffer_length = len(self._buffer) # type: int self._character_count += buffer_length if buffer_length >= 4 and self._buffer_accent_count / buffer_length >= 0.3: self._is_current_word_bad = True if self._is_current_word_bad: self._bad_word_count += 1 self._bad_character_count += len(self._buffer) self._is_current_word_bad = False self._buffer = "" self._buffer_accent_count = 0 elif character not in { "<", ">", "-", "=" } and character.isdigit() is False and is_symbol(character): self._is_current_word_bad = True self._buffer += character
def feed(self, character: str) -> None: self._character_count += 1 if character != self._last_printable_char and character not in ["<", ">", "=", ":", "/", "&", ";", "{", "}", "[", "]"]: if is_punctuation(character): self._punctuation_count += 1 elif character.isdigit() is False and is_symbol(character): self._symbol_count += 2 self._last_printable_char = character
def feed(self, character: str) -> None: self._character_count += 1 if self._last_printable_seen is None: self._last_printable_seen = character return if character.isspace() or is_punctuation(character): self._last_printable_seen = None return unicode_range_a = unicode_range(self._last_printable_seen) # type: Optional[str] unicode_range_b = unicode_range(character) # type: Optional[str] if is_suspiciously_successive_range(unicode_range_a, unicode_range_b): self._suspicious_successive_range_count += 1 self._last_printable_seen = character