Ejemplo n.º 1
0
 def __init__(self, s):
     Sc = SpellChecker()
     self.s = Sc.corrections(s)
     self.Lex_learn = dict()
     self.Lex_arret = list()
     self._read_lexique()
     self.clf = self._learn_arret()
Ejemplo n.º 2
0
def spell_check(filename):
    try:
        spell_checker = SpellChecker()
        result = spell_checker.multiple_check(
            open(os.path.join(app.config['UPLOAD_FOLDER'], filename)).read())
        return jsonify(result)
    except FileNotFoundError as e:
        return jsonify('File not found'), 400
Ejemplo n.º 3
0
def main(dictionary, intractive):
    fuzzy = FuzzyWord()
    if not intractive:
        with open(dictionary) as f:
            for line in f:
                print fuzzy.generate(line[:-1])
    else:
        from spell_checker import SpellChecker
        spchecker = SpellChecker()
        spchecker.load(dictionary)
        while True:
            print '> ',
            fuzzy_word = fuzzy.generate(sys.stdin.readline()[:-1])
            print fuzzy_word,
            word = spchecker.check(fuzzy_word)
            print word if word else 'NO SUGGESTION'
Ejemplo n.º 4
0
class TransformService(object):
    def __init__(self):
        self.spellChecker = SpellChecker()
        self.lolCatTranslator = LolCatTranslator()

    def spell_check(self, message):
        return self.spellChecker.spell_check_message(message)

    def lol_cat(self, message):
        return self.lolCatTranslator.translate_message(message)
Ejemplo n.º 5
0
def build_word_dict(tokenized_corpus_filenames,
                    pronouncing_word_count_filename):
    word_to_dword = {}
    dword_to_int = {}
    word_to_int = {}
    word_counts = defaultdict(int)
    slang_word_counts = defaultdict(int)

    def add_word(orig, dword=None):
        if not dword:
            word_to_int[orig] = len(dword_to_int)
            slang_word_counts[orig] += 1
            return

        if dword not in dword_to_int:
            dword_to_int[dword] = len(dword_to_int)
        if orig not in word_to_dword:
            word_to_dword[orig] = dword
            word_to_int[orig] = dword_to_int[dword]
        word_counts[dword] += 1

    spell_checker = SpellChecker(pronouncing_word_count_filename)
    for fname in tokenized_corpus_filenames:
        with open(fname, 'r') as fo:
            words = fo.read().split()
            for word in words:
                if word in spell_checker.word_counter:
                    add_word(word, word)
                else:
                    if len(word) > 5:
                        correction = spell_checker.correction(word)
                        if correction:
                            add_word(word, correction)
                        else:
                            add_word(word)
                    else:
                        add_word(word)
    return dict(word_to_dword=word_to_dword,
                dword_to_int=dword_to_int,
                word_to_int=word_to_int,
                word_counts=word_counts,
                slang_word_counts=slang_word_counts)
Ejemplo n.º 6
0
def build_word_dict(tokenized_corpus_filenames, pronouncing_word_count_filename):
    word_to_dword = {}
    dword_to_int = {}
    word_to_int = {}
    word_counts = defaultdict(int)
    slang_word_counts = defaultdict(int)

    def add_word(orig, dword=None):
        if not dword:
            word_to_int[orig] = len(dword_to_int)
            slang_word_counts[orig] += 1
            return

        if dword not in dword_to_int:
            dword_to_int[dword] = len(dword_to_int)
        if orig not in word_to_dword:
            word_to_dword[orig] = dword
            word_to_int[orig] = dword_to_int[dword]
        word_counts[dword] += 1

    spell_checker = SpellChecker(pronouncing_word_count_filename)
    for fname in tokenized_corpus_filenames:
        with open(fname, 'r') as fo:
            words = fo.read().split()
            for word in words:
                if word in spell_checker.word_counter:
                    add_word(word, word)
                else:
                    if len(word) > 5:
                        correction = spell_checker.correction(word)
                        if correction:
                            add_word(word, correction)
                        else:
                            add_word(word)
                    else:
                        add_word(word)
    return dict(word_to_dword=word_to_dword,
                dword_to_int=dword_to_int,
                word_to_int=word_to_int,
                word_counts=word_counts,
                slang_word_counts=slang_word_counts)
Ejemplo n.º 7
0
from spell_checker import SpellChecker

if __name__ == '__main__':

    print('\nThis is a simple deterministic spell checker. It corrects lower/upper case and mistyped vowels.')
    print('At the prompt, enter the word you want to spell check.\n')

    spell_checker = SpellChecker('../words.txt')

    while True:
        word = input('> ')
        result = spell_checker.spell_check(word.strip().lower())
        if result == 'No suggestion':
            print(result)
        else:
            print('Did you mean \'' + result + '\' ?')
Ejemplo n.º 8
0
from text_preprocessing import clean_text
from vocabulary import build_vocabulary, known_words, WORDS_NLTK
from spell_checker import SpellChecker


# Load training text
filepath = ''  # complete with the filepath where your data are
with open(filepath) as f:
    text = f.read()

# Clean text
text = clean_text(text)

# build vocabulary
WORDS = build_vocabulary(text)
KNOWN_WORDS = known_words(list(WORDS.keys()))

# Instantiate SpellChecker
spellchecker = SpellChecker(words=WORDS, known_words=KNOWN_WORDS)
spellchecker.spell_checking('helo')
Ejemplo n.º 9
0
 def setUp(self):
     self.spchecker = SpellChecker()
     self.spchecker.register('job')
     self.spchecker.register('jaob')
Ejemplo n.º 10
0
class TestStringMethods(unittest.TestCase):
    def setUp(self):
        self.spchecker = SpellChecker()
        self.spchecker.register('job')
        self.spchecker.register('jaob')

    def test_no_suggesstion(self):
        self.assertIsNone(self.spchecker.check(None))
        self.assertIsNone(self.spchecker.check(''))
        self.assertIsNone(self.spchecker.check('joe'))
        self.assertIsNone(self.spchecker.check('jobs'))
        self.assertIsNone(self.spchecker.check('job1'))
        self.assertIsNone(self.spchecker.check('spell_checker'))
        self.assertIsNone(self.spchecker.check('spell checker'))
        self.assertIsNone(self.spchecker.check(1))
        self.assertIsNone(self.spchecker.check(['job']))

    def test_case_error(self):
        self.assertEqual(self.spchecker.check('JOB'), 'job')
        self.assertEqual(self.spchecker.check('JoB'), 'job')
        self.assertEqual(self.spchecker.check('JaoB'), 'jaob')

    def test_vowel_error(self):
        self.assertEqual(self.spchecker.check('jab'), 'job')
        self.assertEqual(self.spchecker.check('jaAb'), 'jaob')

    def test_duplicate_error(self):
        self.assertEqual(self.spchecker.check('jooooobbbb'), 'job')
        self.assertEqual(self.spchecker.check('jjjjjaaaaaab'), 'jaob')

    def test_combination_error(self):
        self.assertEqual(self.spchecker.check('jJoOoAEIbBbB'), 'job')
        self.assertEqual(self.spchecker.check('jJJaAeIObBB'), 'jaob')
Ejemplo n.º 11
0
from spell_checker import SpellChecker

NWORDS = file('../../data/CombinedProductText.dat').read()

spell_checker = SpellChecker(NWORDS)

print spell_checker.correct('Cosmopolitan')
Ejemplo n.º 12
0
def create_spell_checker(populate_dict=True):
    checker = SpellChecker()
    if populate_dict:
        checker.add_dictionary(read_dictionary(dictionary_file))

    return checker
Ejemplo n.º 13
0
from tensorflow.contrib.training import HParams
from gensim.models import KeyedVectors
import nltk.tokenize as tokenizers
from spell_checker import SpellChecker
import numpy as np
import argparse
import codecs
import yaml
from nltk.stem import WordNetLemmatizer

spell_checker = SpellChecker()
lemmatizer = WordNetLemmatizer()

t_functions = {
    'word': tokenizers.word_tokenize,
    'wordpunct': tokenizers.wordpunct_tokenize,
    'none': lambda t: [t]
}


def pad_sequence(sequence, max_len=60, padding_value='<PAD>'):
    padding_size = max(max_len - len(sequence), 0)
    return sequence[:max_len] + [padding_value] * padding_size


def _preprocess_bunch(text, max_len=None, padding_value='<PAD>', wordlevel=True, check_spelling=True, sep='<SEP>', bunch_size=None):
    processed_texts = text
    if not isinstance(processed_texts, str): processed_texts = processed_texts.decode('utf-8', errors='ignore')
    processed_texts = processed_texts.lower().split(sep)
    if wordlevel:
        processed_texts = map(tokenizers.word_tokenize, processed_texts)
#!/usr/bin/env python

import re
import sys
from spell_checker import SpellChecker

nonword_pattern = re.compile(r"[^a-zA-Z']")
sc = SpellChecker()


def get_word_freq(text):
    word_freq = {}
    words = text.split()

    for word in words:
        word = word.lower()
        if (sc.check(word)):
            if word in word_freq:
                word_freq[word] += 1
            else:
                word_freq[word] = 1

    return word_freq

review = {}

for line in sys.stdin:

    line = line.strip()
    try:
        key, value = line.split(': ', 1)
Ejemplo n.º 15
0
# This script exercises a plugin spelling checker.
# A filename is given at the command line, the file is then
# searched for errors and the results printed in stdout.
from spell_checker import SpellChecker

print 'This file uses a dictionary of known words to check a text file ' + \
    'for spelling. Supply paths to these files or press enter for defaults.\n'

known_words = raw_input('Filename or path to dictionary of known words: ')
print 'Loading trie from dictionary file...'

if known_words is None or known_words == '':
    known_words = 'words.txt'
speller = SpellChecker(known_words)
filename = raw_input('File to check: ')
if filename is None or filename == '':
    filename = 'test_doc.txt'
    print 'Using default test file...\n'

for mistake in speller.iter_spelling_on_file(filename):
    print mistake

print '\nDone!'
Ejemplo n.º 16
0
 def __init__(self):
     self.spellChecker = SpellChecker()
     self.lolCatTranslator = LolCatTranslator()
Ejemplo n.º 17
0
def spell_check_post():
    data = request.get_json().get('data')
    spell_checker = SpellChecker()
    result = spell_checker.multiple_check(data)
    return jsonify({'original_text': data, 'spell_check': result})
Ejemplo n.º 18
0
def main():
    checker = SpellChecker()
    sent = 'Hello ther i em Philip'
    correction = checker.correct_sentence(sent)
    print(correction)