def __init__(self, s): Sc = SpellChecker() self.s = Sc.corrections(s) self.Lex_learn = dict() self.Lex_arret = list() self._read_lexique() self.clf = self._learn_arret()
def spell_check(filename): try: spell_checker = SpellChecker() result = spell_checker.multiple_check( open(os.path.join(app.config['UPLOAD_FOLDER'], filename)).read()) return jsonify(result) except FileNotFoundError as e: return jsonify('File not found'), 400
def main(dictionary, intractive): fuzzy = FuzzyWord() if not intractive: with open(dictionary) as f: for line in f: print fuzzy.generate(line[:-1]) else: from spell_checker import SpellChecker spchecker = SpellChecker() spchecker.load(dictionary) while True: print '> ', fuzzy_word = fuzzy.generate(sys.stdin.readline()[:-1]) print fuzzy_word, word = spchecker.check(fuzzy_word) print word if word else 'NO SUGGESTION'
class TransformService(object): def __init__(self): self.spellChecker = SpellChecker() self.lolCatTranslator = LolCatTranslator() def spell_check(self, message): return self.spellChecker.spell_check_message(message) def lol_cat(self, message): return self.lolCatTranslator.translate_message(message)
def build_word_dict(tokenized_corpus_filenames, pronouncing_word_count_filename): word_to_dword = {} dword_to_int = {} word_to_int = {} word_counts = defaultdict(int) slang_word_counts = defaultdict(int) def add_word(orig, dword=None): if not dword: word_to_int[orig] = len(dword_to_int) slang_word_counts[orig] += 1 return if dword not in dword_to_int: dword_to_int[dword] = len(dword_to_int) if orig not in word_to_dword: word_to_dword[orig] = dword word_to_int[orig] = dword_to_int[dword] word_counts[dword] += 1 spell_checker = SpellChecker(pronouncing_word_count_filename) for fname in tokenized_corpus_filenames: with open(fname, 'r') as fo: words = fo.read().split() for word in words: if word in spell_checker.word_counter: add_word(word, word) else: if len(word) > 5: correction = spell_checker.correction(word) if correction: add_word(word, correction) else: add_word(word) else: add_word(word) return dict(word_to_dword=word_to_dword, dword_to_int=dword_to_int, word_to_int=word_to_int, word_counts=word_counts, slang_word_counts=slang_word_counts)
from spell_checker import SpellChecker if __name__ == '__main__': print('\nThis is a simple deterministic spell checker. It corrects lower/upper case and mistyped vowels.') print('At the prompt, enter the word you want to spell check.\n') spell_checker = SpellChecker('../words.txt') while True: word = input('> ') result = spell_checker.spell_check(word.strip().lower()) if result == 'No suggestion': print(result) else: print('Did you mean \'' + result + '\' ?')
from text_preprocessing import clean_text from vocabulary import build_vocabulary, known_words, WORDS_NLTK from spell_checker import SpellChecker # Load training text filepath = '' # complete with the filepath where your data are with open(filepath) as f: text = f.read() # Clean text text = clean_text(text) # build vocabulary WORDS = build_vocabulary(text) KNOWN_WORDS = known_words(list(WORDS.keys())) # Instantiate SpellChecker spellchecker = SpellChecker(words=WORDS, known_words=KNOWN_WORDS) spellchecker.spell_checking('helo')
def setUp(self): self.spchecker = SpellChecker() self.spchecker.register('job') self.spchecker.register('jaob')
class TestStringMethods(unittest.TestCase): def setUp(self): self.spchecker = SpellChecker() self.spchecker.register('job') self.spchecker.register('jaob') def test_no_suggesstion(self): self.assertIsNone(self.spchecker.check(None)) self.assertIsNone(self.spchecker.check('')) self.assertIsNone(self.spchecker.check('joe')) self.assertIsNone(self.spchecker.check('jobs')) self.assertIsNone(self.spchecker.check('job1')) self.assertIsNone(self.spchecker.check('spell_checker')) self.assertIsNone(self.spchecker.check('spell checker')) self.assertIsNone(self.spchecker.check(1)) self.assertIsNone(self.spchecker.check(['job'])) def test_case_error(self): self.assertEqual(self.spchecker.check('JOB'), 'job') self.assertEqual(self.spchecker.check('JoB'), 'job') self.assertEqual(self.spchecker.check('JaoB'), 'jaob') def test_vowel_error(self): self.assertEqual(self.spchecker.check('jab'), 'job') self.assertEqual(self.spchecker.check('jaAb'), 'jaob') def test_duplicate_error(self): self.assertEqual(self.spchecker.check('jooooobbbb'), 'job') self.assertEqual(self.spchecker.check('jjjjjaaaaaab'), 'jaob') def test_combination_error(self): self.assertEqual(self.spchecker.check('jJoOoAEIbBbB'), 'job') self.assertEqual(self.spchecker.check('jJJaAeIObBB'), 'jaob')
from spell_checker import SpellChecker NWORDS = file('../../data/CombinedProductText.dat').read() spell_checker = SpellChecker(NWORDS) print spell_checker.correct('Cosmopolitan')
def create_spell_checker(populate_dict=True): checker = SpellChecker() if populate_dict: checker.add_dictionary(read_dictionary(dictionary_file)) return checker
from tensorflow.contrib.training import HParams from gensim.models import KeyedVectors import nltk.tokenize as tokenizers from spell_checker import SpellChecker import numpy as np import argparse import codecs import yaml from nltk.stem import WordNetLemmatizer spell_checker = SpellChecker() lemmatizer = WordNetLemmatizer() t_functions = { 'word': tokenizers.word_tokenize, 'wordpunct': tokenizers.wordpunct_tokenize, 'none': lambda t: [t] } def pad_sequence(sequence, max_len=60, padding_value='<PAD>'): padding_size = max(max_len - len(sequence), 0) return sequence[:max_len] + [padding_value] * padding_size def _preprocess_bunch(text, max_len=None, padding_value='<PAD>', wordlevel=True, check_spelling=True, sep='<SEP>', bunch_size=None): processed_texts = text if not isinstance(processed_texts, str): processed_texts = processed_texts.decode('utf-8', errors='ignore') processed_texts = processed_texts.lower().split(sep) if wordlevel: processed_texts = map(tokenizers.word_tokenize, processed_texts)
#!/usr/bin/env python import re import sys from spell_checker import SpellChecker nonword_pattern = re.compile(r"[^a-zA-Z']") sc = SpellChecker() def get_word_freq(text): word_freq = {} words = text.split() for word in words: word = word.lower() if (sc.check(word)): if word in word_freq: word_freq[word] += 1 else: word_freq[word] = 1 return word_freq review = {} for line in sys.stdin: line = line.strip() try: key, value = line.split(': ', 1)
# This script exercises a plugin spelling checker. # A filename is given at the command line, the file is then # searched for errors and the results printed in stdout. from spell_checker import SpellChecker print 'This file uses a dictionary of known words to check a text file ' + \ 'for spelling. Supply paths to these files or press enter for defaults.\n' known_words = raw_input('Filename or path to dictionary of known words: ') print 'Loading trie from dictionary file...' if known_words is None or known_words == '': known_words = 'words.txt' speller = SpellChecker(known_words) filename = raw_input('File to check: ') if filename is None or filename == '': filename = 'test_doc.txt' print 'Using default test file...\n' for mistake in speller.iter_spelling_on_file(filename): print mistake print '\nDone!'
def __init__(self): self.spellChecker = SpellChecker() self.lolCatTranslator = LolCatTranslator()
def spell_check_post(): data = request.get_json().get('data') spell_checker = SpellChecker() result = spell_checker.multiple_check(data) return jsonify({'original_text': data, 'spell_check': result})
def main(): checker = SpellChecker() sent = 'Hello ther i em Philip' correction = checker.correct_sentence(sent) print(correction)