def init(self): spell = xmpp.config.get('spell') self.aggro = (spell and spell.get('aggro')) or False self.lang = (spell and spell.get('lang')) or 'en' self.rgx = re.compile(r"[^a-zA-Z']") self.speller = Speller('lang', self.lang)
class Spell(XMPPModule): def __init__(self, xmpp): XMPPModule.__init__(self, xmpp) spell = xmpp.config.get('spell') self.aggro = (spell and spell.get('aggro')) or True self.lang = (spell and spell.get('lang')) or 'en' self.rgx = re.compile(r"[^a-zA-Z']") self.speller = Speller('lang', self.lang) def ignore(self, msg, words): if len(words) > 1: for m in words: for w in self.rgx.split(m): self.speller.addtoSession(w) self.xmpp.reply(msg, 'These words are now a component of the descriptivist English language: ' + ', '.join(words[1:])) else: self.xmpp.reply(msg, 'Ah, good, I see you are a prescriptivist as well.') def correct(self, msg, words): for m in words: for w in self.rgx.split(m): if w.isalpha() and not self.speller.check(w): if (len(self.speller.suggest(w)) > 0): self.xmpp.reply(msg, "'" + w + "'? Did you mean '" + self.speller.suggest(w)[0] + "'?") def handleMessage(self, msg): words = msg['body'].split(' ') if words[0] == '!spellignore': self.ignore(msg, words[1:]) elif words[0] == '!spellcheck': self.correct(msg, words[1:]) elif self.aggro: self.correct(msg, words) def help(self, feature): if feature in ['spellignore', '!spellignore', 'ignore']: return ''' Tells the spell checker to ignore the given words as spelling errors. usage: !spellignore [word1] [word2] ... ''' if feature in ['spellcheck', '!spellcheck', 'check']: return ''' Spell checks the given phrase. usage: !spellcheck [word1] [word2] ... ''' if feature == 'lang': return '\nCurrent language in use: ' + self.lang + '\n' if self.aggro: s = ''' When a charlatan spels something incorrectly, correct them.''' else: s = ''' Corrects the spelling of those who request it.''' return s + '''
def __init__(self, xmpp): XMPPModule.__init__(self, xmpp) spell = xmpp.config.get('spell') self.aggro = (spell and spell.get('aggro')) or True self.lang = (spell and spell.get('lang')) or 'en' self.rgx = re.compile(r"[^a-zA-Z']") self.speller = Speller('lang', self.lang)
def __init__(self, server_list, nick="OrthoNazi", langs=["fr_FR", "en_US"], channels=["#test"], whitelist_path=None, delay=300, trump_delay=10, victim="un débile profond", **params): super().__init__(server_list, nick, "OrthoNazi", **params) self.nazi_channels = channels self.spellers = [Speller("lang", lang) for lang in langs] self.rl = RateLimiter(delay) self.trump_rl = RateLimiter(trump_delay) self.whitelist_path = whitelist_path self.whitelist = {nick.lower(): True} self.victim = victim self.load()
def get_speller(lang=settings.LANGUAGE_CODE, encoding=settings.DEFAULT_CHARSET, home_dir=getattr(settings, 'ASPELL_HOME_DIR', None)): """ Returns a Speller object ready for checking words. Note: aspell and pyaspell seem to have a different configkeys format in the constructor. Depending on the c_aspell variable the args are adjuseted slightly. """ args = [('encoding', encoding), ('lang', lang)] if home_dir is not None: args.append(('home-dir', home_dir)) if not c_aspell: args = [ args, ] return Speller(*args)
def __init__(self, server_list, nick="OrthoNazi", langs=["fr_FR", "en_US"], channels=["#test"], whitelist_path=None, delay=300, **params): super().__init__(server_list, nick, "OrthoNazi", **params) self.nazi_channels = channels self.spellers = [Speller("lang", lang) for lang in langs] self.rl = RateLimiter(delay) self.whitelist_path = whitelist_path try: with open(whitelist_path) as f: self.whitelist = pickle.load(f) logging.info("Whitelist loaded with {0} words".format( len(self.whitelist))) except: self.whitelist = {} self.whitelist = {nick.lower(): True}
import pickle from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from multiprocessing import Pool from logging import getLogger logger = getLogger(__name__) wnl = WordNetLemmatizer() from nltk.corpus import stopwords from nltk.corpus import wordnet as wn stops = set(stopwords.words("english")) | set( ['?', ',', '.', ';', ':', '"', "'"]) from aspell import Speller asp = Speller('lang', 'en') import spacy nlp = spacy.load('en') def get(w): if w in asp: return w try: return asp.suggest(w)[0] except IndexError: return w def split_into_words(text): return [
from pattern.en import suggest from aspell import Speller # The algorithm manually implemented at spelling_correction_manual.py is available to be # used out of the box in the pattern library: # https://www.clips.uantwerpen.be/pattern. # (Compatible with python 2.7 only) # # "Pattern is a web mining module for the Python programming language. # It has tools for data mining (Google, Twitter and Wikipedia API, a web crawler, a HTML DOM parser), # natural language processing (part-of-speech taggers, n-gram search, sentiment analysis, WordNet), # machine learning (vector space model, clustering, SVM), network analysis and <canvas> visualization." # Other libraries are: # PyEnchant: http://pythonhosted.org/pyenchant/ # AspellPython, wrapper around GNU Aspell: https://github.com/WojciechMula/aspell-python (requires libaspell-dev) # sudo apt install libaspell-dev # pip install aspell-python-py3 print(suggest("fianlly")) print(suggest("flaot")) sp = Speller() print(sp.suggest("fianlly"))
def score(self, data): scores = [] txt = self.tokenizer(self.text) for d in self.make_docs(data): score = max([self.sim(txt, doc) for doc in d]) scores.append(score) return scores def _create(self, data): return Series(self.score(data), index=data.index, name=data.name) chkr = None try: from aspell import Speller chkr = Speller() except ImportError: pass def count_spell_errors(toks, exemptions): if not toks: return 0 return sum([not chkr.check(t) for t in toks if t not in exemptions]) class SpellingErrorCount(Feature): def __init__(self, feature, exemptions=None): if not chkr: raise NameError("You need to install the python aspell library") super(SpellingErrorCount, self).__init__(feature)
class Spell(XMPPModule): priority = 40 def init(self): spell = xmpp.config.get('spell') self.aggro = (spell and spell.get('aggro')) or False self.lang = (spell and spell.get('lang')) or 'en' self.rgx = re.compile(r"[^a-zA-Z']") self.speller = Speller('lang', self.lang) def ignore(self, msg, words): if len(words) > 1: for m in words: for w in self.rgx.split(m): self.speller.addtoSession(w) self.xmpp.reply( msg, 'These words are now a component of the descriptivist English language: ' + ', '.join(words[1:])) else: self.xmpp.reply( msg, 'Ah, good, I see you are a prescriptivist as well.') def correct(self, msg, words): for m in words: for w in self.rgx.split(m): if w.isalpha() and not self.speller.check(w): if (len(self.speller.suggest(w)) > 0): self.xmpp.reply( msg, "'" + w + "'? Did you mean '" + self.speller.suggest(w)[0] + "'?") def handleMessage(self, msg): words = msg['body'].split(' ') if words[0] == '!spellignore': self.ignore(msg, words[1:]) elif words[0] == '!spellcheck': self.correct(msg, words[1:]) elif self.aggro: self.correct(msg, words) def help(self, feature): if feature in ['spellignore', '!spellignore', 'ignore']: return ''' Tells the spell checker to ignore the given words as spelling errors. usage: !spellignore [word1] [word2] ... ''' if feature in ['spellcheck', '!spellcheck', 'check']: return ''' Spell checks the given phrase. usage: !spellcheck [word1] [word2] ... ''' if feature == 'lang': return '\nCurrent language in use: ' + self.lang + '\n' if self.aggro: s = ''' When a charlatan spels something incorrectly, correct them.''' else: s = ''' Corrects the spelling of those who request it.''' return s + '''