Ejemplo n.º 1
0
def suggestions(word):
    spell = SpellChecker()
    errorList = {}
    if not word.isupper() and not any(char.isdigit() for char in word):
        candidateWords = spell.candidates(word)
        errorList[word] = {
            "correction": max(candidateWords, key=spell.word_probability),
            "suggestion": spell.candidates(word)
        }
    return errorList
Ejemplo n.º 2
0
 def test_candidates(self):
     ''' test spell checker candidates '''
     spell = SpellChecker()
     cands = {'tes', 'tps', 'th', 'thi', 'tvs', 'tds', 'tbs', 'bhs', 'thf',
              'chs', 'tis', 'thes', 'tls', 'tho', 'thu', 'thr', 'dhs',
              "th'", 'thus', 'ts', 'ehs', 'tas', 'ahs', 'thos', 'thy',
              'tcs', 'nhs', 'the', 'tss', 'hs', 'lhs', 'vhs', "t's", 'tha',
              'whs', 'ghs', 'rhs', 'this'}
     self.assertEqual(spell.candidates('ths'), cands)
     self.assertEqual(spell.candidates('the'), {'the'})
     self.assertEqual(spell.candidates('-'), {'-'})
Ejemplo n.º 3
0
 def test_candidates(self):
     ''' test spell checker candidates '''
     spell = SpellChecker()
     cands = {'tes', 'thas', 'tis', 'thse', 'thes', 'thus', 'ohs', 'thu',
              'thy', 'thi', 'tas', 'tus', 'thos', 'ahs', 'tho', 'tha',
              'thsi', 'tos', 'the', 'this', 'iths'}
     self.assertEqual(spell.candidates('ths'), cands)
     self.assertEqual(spell.candidates('the'), {'the'})
     self.assertEqual(spell.candidates('-'), {'-'})
     # something that cannot exist... should return just the same element...
     self.assertEqual(spell.candidates('manasaeds'), {'manasaeds'})
Ejemplo n.º 4
0
def spellCorrect(word):
    if len(word) < 8:
        spell = SpellChecker()
    else:
        spell = SpellChecker(distance=2)

    #if word is mispelled
    if spell[word] == 0:
        # Get a list of `likely` options
        print(spell.candidates(word))

    return spell.candidates(word)
 def test_candidates(self):
     ''' test spell checker candidates '''
     spell = SpellChecker()
     cands = {'tes', 'tps', 'th', 'thi', 'tvs', 'tds', 'tbs', 'bhs', 'thf',
              'chs', 'tis', 'thes', 'tls', 'tho', 'thu', 'thr', 'dhs',
              "th'", 'thus', 'ts', 'ehs', 'tas', 'ahs', 'thos', 'thy',
              'tcs', 'nhs', 'the', 'tss', 'hs', 'lhs', 'vhs', "t's", 'tha',
              'whs', 'ghs', 'rhs', 'this'}
     self.assertEqual(spell.candidates('ths'), cands)
     self.assertEqual(spell.candidates('the'), {'the'})
     self.assertEqual(spell.candidates('-'), {'-'})
     # something that cannot exist... should return just the same element...
     self.assertEqual(spell.candidates('manasaeds'), {'manasaeds'})
class SpellChecking():
    def __init__(self, stc, lm_scorer
                 ):  #st means sentence which need be checked by spellcheker
        self.stc = stc
        self.lm_scorer = lm_scorer
        self.checker = SpellChecker()

    def puncRemove(self, sentence):
        tranTemp = str.maketrans({key: None for key in string.punctuation})
        tgtSentence = sentence.translate(tranTemp)
        return tgtSentence

    def errorFind(self, sentence):
        #tgtSentence = self.puncRemove(sentence)
        tokenList = sentence.split(' ')
        posList = []  #Store the position of wrong words' positions
        numList = []  #Store the number of wrong words' candidates
        for tokenIndex in range(len(tokenList)):
            if self.checker.correction(
                    tokenList[tokenIndex]
            ) != tokenList[tokenIndex]:  #Checking if this word is right
                posList.append(
                    tokenIndex
                )  #will be replaced by method searching for all candidates by language model
                numList.append(
                    len(self.checker.candidates(tokenList[tokenIndex])))
        return (posList, numList)

    def suggest(self, sentence):
        (posList, numList) = self.errorFind(sentence)
        lenInt = len(sentence.split(' '))
        for index in range(len(numList)):
            candiList = []
            lenNewInt = len(sentence.split(' '))
            lenGapInt = lenNewInt - lenInt
            #print(posList, numList)
            if lenGapInt != 0:
                lenInt = lenNewInt
                for pos in range(len(posList)):
                    posList[pos] += lenGapInt
            for num in range(numList[index]):
                tokenList = sentence.split(' ')
                tokenList[posList[index]] = list(
                    self.checker.candidates(tokenList[posList[index]]))[num]
                newSentence = ' '.join(tokenList)
                candiList.append(newSentence)  # Store candidates
            scoreDict = self.lm_scorer.score(candiList)  # Score sentences
            maxInt = max(scoreDict, key=scoreDict.get)  #find the max
            sentence = candiList[maxInt]
        return sentence
Ejemplo n.º 7
0
 def spell_check(self, query):
     """
     This method checks for spelling errors in the query, and updates any
     misspelt word.It uses the python library 'pyspellchecker'
     :param query: query is a list of words which may contain misspelt words.
     :return: It returns a list which contains correct words in query
              as well as the incorrect words which have now been modified.
     """
     spell = SpellChecker()
     misspelled = spell.unknown(query)  # list of misspelled words in query
     #print(misspelled)
     for i in range(0, len(query)):
         if query[i] in misspelled:
             candidate_found = False
             for candidate in spell.candidates(
                     query[i]):  # possible candidates
                 if candidate in docFreq:
                     query[i] = candidate
                     candidate_found = True
                     break
             if candidate_found is False:
                 query[i] = spell.correction(
                     query[i])  # Get the one `most likely` answer
     #print(query)        # query with correct spellings
     return query
def check_spelling(word_list):
    """#returns a dictionary that contains the possible misspelled
        word and suggestions"""
    try:
        from spellchecker import SpellChecker
    except ModuleNotFoundError as _:
        while 1:
            choice = input("Fixing '{}' for you by installing SpellChecker.\n \
                   do you agree? [Y/N]\n".format(_))
            if choice.lower() == "n":
                sys.exit(0)
            elif choice.lower() == "y":
                subprocess.call("pip install pyspellchecker")
                from spellchecker import SpellChecker
                break
            else:
                print("Not a valid choice.")
    spell = SpellChecker(distance=1)
    misspelled = spell.unknown(word_list)
    misspelled_dict = {}
    for word in misspelled:
        misspelled_dict[word] = spell.candidates(word)
    print("\nWord :: Suggestions\n")
    for key, value in misspelled_dict.items():
        print(f"{key} :: {value}")
    print()
Ejemplo n.º 9
0
def spellcorrect(document):
    Token.set_extension('spelling_correction', default=None, force=True)
    Token.set_extension('spelling_candidates', default=None, force=True)

    spell = SpellChecker()
    violations = []
    for word in document:
        ignore_regex = r"(“|”|’|\n|\t|\\n|\\t| |'s|’s|n't|n’t)"
        misspelled = (not bool(re.match(
            ignore_regex, word.text))) & (len(spell.unknown([word.text])) > 0)
        if misspelled:
            word._.spelling_correction = spell.correction(word.text)
            word._.spelling_candidates = spell.candidates(word.text)

            violations.append({
                'rule_id':
                'spellCorrect',
                'word':
                word.text,
                'positions': [[word.idx, word.idx + len(word.text)]],
                'correction':
                word._.spelling_correction,
                'candidates':
                list(word._.spelling_candidates)
            })
    return violations
Ejemplo n.º 10
0
class SpellCheckerML:
    def __init__(self):
        self.spell_checker = SpellChecker()
        self.autocomplete = autocomplete
        self.autocomplete.load()
        
    def train(self, text, model_name=''):
        if model_name == '':
            self.autocomplete.models.train_models(text, model_name=False)
        else:
            self.autocomplete.models.train_models(text, model_name=model_name)
        self.autocomplete.load()
        
    def correction(self, previous_word, word):
        if self.spell_checker.known([word]):
            return word
        else:
            spell_checker_candidates = self.spell_checker.candidates(word)
            autocomplete_predictions = self.autocomplete.predict(previous_word, word[0])
            autocomplete_candidates = [elem[0] for elem in autocomplete_predictions]
            best_choices = []
            for candidate in spell_checker_candidates:
                try:
                    candidate_index = autocomplete_candidates.index(candidate)
                    best_choices.append(autocomplete_predictions[candidate_index])
                except:
                    continue
            if best_choices:
                best_choices = sorted(best_choices, key=lambda t:t[1])
                return list(best_choices[-1])[0]
            else:
                return random.choice(list(spell_checker_candidates))
Ejemplo n.º 11
0
def pyspell_dict(input_dict, name):
    """
    Adds suggested corrections to provided dictionary
    :param input_dict: input dictionary template
    :param name: name of dataset for use in file output write
    :return: Updated dictionary with suggestions and candidates, name of file written to
    """
    file_name = name + "_pyspell_dict.txt"
    spell = SpellChecker()
    target_words = pickle.load(
        open('../spelling_mistakes/target_words_all.txt', 'rb'))
    misspelled = spell.unknown(target_words)
    spell.word_frequency.load_words(misspelled)

    working_dict = dict(input_dict)
    counter = 0
    for misspelling, details in working_dict.items():
        if counter % 100 == 0:
            print(counter)
        working_dict[misspelling]['suggested'] = spell.correction(misspelling)
        working_dict[misspelling]['candidates'] = list(
            spell.candidates(misspelling))
        counter += 1

    return working_dict, file_name
Ejemplo n.º 12
0
def correct_instructions(instruction):
    # The directives get checked and corrected using spellchecker distance ?
    # The strings get corrected to their closest words in the corpus
    # "TAGE 1 TAGLET" becomes "TAKE 1 TABLET"
    spell = SpellChecker()
    spell.distance = 3
    keywords = [
        'every', 'once', 'twice', 'thrice', 'daily', 'hours', 'hour', 'day',
        'days', 'weeks', 'morning', 'afternoon', 'night', 'one', 'tablet'
    ]
    # Tokenize the directive
    token_instruction = instruction.split()
    # Spell Check and correct all the elements of the list
    close_words = [spell.candidates(token) for token in token_instruction]

    # Correct any words that are close in distance to the keywords
    for counter, token in enumerate(token_instruction):
        for keyword in close_words[counter]:
            if keyword in keywords:
                token_instruction[counter] = keyword

    # General spell correction
    for counter, token in enumerate(token_instruction):
        if token.isalpha():
            token_instruction[counter] = spell.correction(token)

            # Make the tokens uppercase
    token_instruction = [token.upper() for token in token_instruction]
    # print(close_words)

    return token_instruction
Ejemplo n.º 13
0
class SC:

    def __init__(self):
        self.spell = SpellChecker()

    def check(self, word):
        res = {}
        misspelled = self.spell.unknown([word])
        for word in misspelled:
            print("Oops...You may spell the wrong word. Here're possible candidates:")
            print("\t"+", ".join(self.spell.candidates(word)))
            # print(type(self.spell.candidates(word)))
            res["error"] = "Oops...You may spell the wrong word."
            res["candi"] = list(self.spell.candidates(word))
            return False, res
        return True, res
Ejemplo n.º 14
0
def locationSpellCheck(location):
    finalStr = ""
    
    # Each word in location
    for sep in location:
        spell = SpellChecker()
        misspelled = spell.unknown([sep])
        
        # If word is correctly spelled
        if len(misspelled) == 0:
            finalStr = finalStr + sep + " "
            
        # For each candidate in misspelled
        for word in misspelled:
            
            # Get the most likely answer
            candidateCorrection = spell.correction(word)
            
            # Get a list of likely options
            candidates = spell.candidates(word)
            
            # If no candidates were returned indicating an invalid word
            if len(candidates) == 1 and location == candidateCorrection:
                return -1
            else:
                finalStr = finalStr + candidateCorrection + " "
    return finalStr
Ejemplo n.º 15
0
class SpellcheckFilter(AbstractFilter):
    """Uses PySpellChecker in order to attempt to create spellchecked candidates."""

    NAME = "spellcheck"
    spellchecker = None

    def __init__(self):
        self.spellchecker = SpellChecker()

    def info(self):
        """Display information about this filter."""
        return """Spellcheck Filter

        Takes a candidate line and feeds it into PySpellChecker (https://pypi.org/project/pyspellchecker/)
        in an attempt to generate potential word candidates based upon Levenshtein distance.
        This is similar to wordlist expansion done by iphelix's PACK.
        """

    def filter_line(self, line):
        """Filter a single line of data."""

        misspelled = self.spellchecker.unknown(line)

        candidates = []
        for word in misspelled:
            candidates.append(self.spellchecker.candidates(word))
        return candidates
Ejemplo n.º 16
0
 def test_edit_distance_one(self):
     ''' test a case where edit distance must be one '''
     here = os.path.dirname(__file__)
     filepath = '{}/resources/small_dictionary.json'.format(here)
     spell = SpellChecker(language=None,
                          local_dictionary=filepath,
                          distance=1)
     self.assertEqual(spell.candidates('hike'), {'bike'})
Ejemplo n.º 17
0
 def non_pronounceable_id(self, identifier):
     spell = SpellChecker()
     misspelled_parts = spell.unknown(identifier.parts)
     if misspelled_parts is not None:
         print('{0} misspelled part found in the name'.format(len(misspelled_parts)))
         for misspelled_word in misspelled_parts:
             print(str(id), 'misspelled part in name:', misspelled_word)
             print('\tcandidates', spell.candidates(misspelled_word))
Ejemplo n.º 18
0
def read_sentence():
    print("@ ", end="")
    raw_command = input().strip().split(" ")
    command = [re.sub(r'\W+', '', i).lower() for i in raw_command]
    spell = SpellChecker()
    spell_command = []
    for i in command:
        spell_command.extend(spell.candidates(i))
    return (command, raw_command, spell_command)
class SpellingSuggestor(object):
    def __init__(self, word):
        self.word = word
        self.spell = SpellChecker()

    """"method to replace underscore or dash by space"""

    def pre_process(self):
        return re.sub(r'([^\s\w]|_|-)+', ' ', self.word)

    """method to remove letters which occur more than twice"""

    def reduce_lengthening(self):
        pattern = re.compile(r"(.)\1{2,}")
        return pattern.sub(r"\1\1", self.word)

    "main method to call and do spell check processing "

    def spell_checker_result(self):
        self.word = self.pre_process()
        self.word = self.reduce_lengthening().lower()
        i = 1
        print("word after cleaning ", self.word)
        misspelled = self.spell.unknown([self.word])
        if len(misspelled) == 0:
            return [self.word]
        result = set()
        while (i < len(self.word)):
            r1 = self.spell.candidates(self.word[:i].strip())
            r2 = self.spell.candidates(self.word[i].strip())
            r1 = self.spell.known(r1)
            r2 = self.spell.known(r2)
            if len(r1) > 0 and len(r2) > 0:
                try:
                    for v1 in r1:
                        result.add(v1)
                        for v2 in r2:
                            if len(v2) > 2:
                                result.add(v2)
                                result.add(v1 + " " + v2)
                except Exception as ex:
                    print("some error", ex)
            i += 1
        return result
Ejemplo n.º 20
0
def run(targTrainFile, targetOutputFile=""):

    if targetOutputFile == "":
        targetOutputFile = os.path.dirname(os.path.abspath(__file__)) +  \
            "/../data/spellCorrections.txt"

    spell = SpellChecker()

    allText = ""

    with open(targTrainFile, mode='r') as input:
        allText = input.read()

    allData = {}

    lines = allText.split("\n")
    for line in lines:
        print(line)
        exit(0)
        if re.search('^,.*$', line):
            continue
        arr = line.split(",")
        word = arr[0]
        if len(word) == 0:
            continue
        allData[word] = line

    finalOutput = ""
    wordsSeen = 0

    for key in allData:
        wordsSeen += 1
        if re.search('^[\d\.]+$', key):
            continue
        misspelled = spell.unknown([key])
        for word in misspelled:
            trans = spell.correction(word)
            if word == trans:
                # Drop from allData, it's incorrect without replacement
                finalOutput += "del " + word + "\n"
                #print(word)
            elif trans in allData:
                #print(word+" => "+trans)
                finalOutput += key + ":" + trans + "\n"
            else:
                #print("Many")
                for option in spell.candidates(word):
                    if option in allData:
                        finalOutput += key + ":" + option + "\n"
                        break
        if wordsSeen % 10000 == 0:
            print(wordsSeen)

    with open(targetOutputFile, mode="w") as output:
        output.write(finalOutput)
        output.close()
Ejemplo n.º 21
0
def demo_spellchecker():
    """演示如何使用spellchecker库
    官方介绍文档 pyspellchecker · PyPI: https://pypi.org/project/pyspellchecker/
    190909周一15:58,from 陈坤泽
    """
    # 1 创建对象
    # 可以设置语言、大小写敏感、拼写检查的最大距离
    #   默认'en'英语,大小写不敏感
    spell = SpellChecker()
    # 如果是英语,SpellChecker会自动加载语言包site-packages\spellchecker\resources\en.json.gz,大概12万个词汇,包括词频权重
    d = spell.word_frequency  # 这里的d是WordFrequency对象,其底层用了Counter类进行数据存储
    dprint(d.unique_words, d.total_words)  # 词汇数,权重总和

    # 2 修改词频表 spell.word_frequency
    dprint(d['ckz'])  # 不存在的词汇直接输出0
    d.add('ckz')  # 可以添加ckz词汇的一次词频
    d.load_words(['ckz', 'ckz', 'lyb'])  # 可以批量添加词汇
    dprint(d['ckz'], d['lyb'])  # d['ckz']=3  d['lyb']=1
    d.load_words(['ckz'] * 100 + ['lyb'] * 500)  # 可以用这种技巧进行大权重的添加
    dprint(d['ckz'], d['lyb'])  # d['ckz']=103  d['lyb']=501

    # 同理,去除也有remove和remove_words两种方法
    d.remove('ckz')
    # d.remove_words(['ckz', 'lyb'])  # 不过注意不能删除已经不存在的key('ckz'),否则会报KeyError
    dprint(d['ckz'], d['lyb'])  # d['ckz']=0  d['lyb']=501
    # remove是完全去除单词,如果只是要减权重可以访问底层的_dictionary对象操作
    d._dictionary['lyb'] -= 100  # 当然不太建议直接访问下划线开头的成员变量~~
    dprint(d['lyb'])  # ['lyb']=401

    # 还可以按阈值删除词频不超过设置阈值的词汇
    d.remove_by_threshold(5)

    # 3 spell的基本功能
    # (1)用unknown可以找到可能拼写错误的单词,再用correction可以获得最佳修改意见
    misspelled = spell.unknown(['something', 'is', 'hapenning', 'here'])
    dprint(misspelled)  # misspelled<set>={'hapenning'}

    for word in misspelled:
        # Get the one `most likely` answer
        dprint(spell.correction(word))  # <str>='happening'
        # Get a list of `likely` options
        dprint(spell.candidates(
            word))  # <set>={'henning', 'happening', 'penning'}

    # 注意默认的spell不区分大小写,如果词库存储了100次'ckz'
    #   此时判断任意大小写形式组合的'CKZ'都是返回原值
    #   例如 spell.correction('ckZ') => 'ckZ'

    # (2)可以通过修改spell.word_frequency影响correction的计算结果
    dprint(d['henning'], d['happening'], d['penning'])
    # d['henning']<int>=53    d['happening']<int>=4538    d['penning']<int>=23
    d._dictionary['henning'] += 10000
    dprint(spell.correction('hapenning'))  # <str>='henning'

    # (3)词汇在整个字典里占的权重
    dprint(spell.word_probability('henning'))  # <float>=0.0001040741914298211
Ejemplo n.º 22
0
def word_check2(s):
    spell_corrector = SpellChecker()
    for word in s:
        # Get the one `most likely` answer
        if word != spell_corrector.correction(word):
            print("Word incorrectly spelt:" + word)
            print("Most likely: " + spell_corrector.correction(word))
            # Get a list of `likely` options
            print("Other possibilities: " +
                  str(spell_corrector.candidates(word)))
Ejemplo n.º 23
0
def spell_check(extracted):
    checker = SpellChecker()
    words = extracted.split()

    alternatives = {}
    for word in words:
        if not word in alternatives and checker.correction(word) != word:
            alternatives[word] = checker.candidates(word)

    return alternatives
Ejemplo n.º 24
0
def get_correct(word):
    if not word or isinstance(word, bool):
        return None

    spell = SpellChecker()
    result = spell.candidates(word)
    if spell.correction(word) == word:
        return None
    else:
        return result
Ejemplo n.º 25
0
def index():
    correct = ""
    a = ""
    list1 = []
    list2 = []
    misspelled = []
    error = ""
    if request.method == "POST":
        print("FORM DATA RECEIVED")

        if "file" not in request.files:
            flash('No file part')
            return redirect(request.url)

        file = request.files["file"]
        if file.filename == "":
            flash('No selected file')
            return redirect(request.url)
        if file.filename != '':
            file_ext = os.path.splitext(file.filename)[1]
            if file_ext not in app.config['UPLOAD_EXTENSIONS']:
                print("Please upload a .txt file type only")
                return abort(400)

        if file:
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename))
            print('C:/Users/' + username + '/Desktop/' + file.filename)
            f = open('C:/Users/' + username + '/Desktop/' + file.filename,
                     "r+")
            filecontent = f.read()
            a = str(filecontent)
            b = TextBlob(a)
            correct = str(b.correct())

            # remove all punctuations before finding possible misspelled words
            s = re.sub(r'[^\w\s]', '', filecontent)
            #print("Text without punctuations:\n", s)
            wordlist = s.split()
            spell = SpellChecker()
            # find those words that may be misspelled
            misspelled = list(spell.unknown(wordlist))
            for word in misspelled:
                # Get the one `most likely` answer
                list1.append(spell.correction(word))
                # Get a list of `likely` options
                list2.append(spell.candidates(word))

    return render_template('index.html',
                           a=a,
                           correct=correct,
                           misspelled=misspelled,
                           list1=list1,
                           list2=list2,
                           len=len(misspelled),
                           len1=len(list1))
Ejemplo n.º 26
0
def checkspelling(wordList):
    spell = SpellChecker(distance=1)
    misspelled = spell.unknown(wordList)
    misspelledDict = {}
    #print(type(misspelled))
    #print("Possible Misspelled Words:\n{}".format(misspelled))
    for word in misspelled:
        misspelledDict[word] = spell.candidates(word)
    print("Word : Suggestions")
    for key, value in misspelledDict.items():
        print(f"{key} :: {value}")
Ejemplo n.º 27
0
    def test_capitalization_when_case_sensitive_true(self):
        ''' test that capitalization affects comparisons '''
        spell = SpellChecker(language=None, case_sensitive=True)
        spell.word_frequency.add('Bob')
        self.assertEqual('Bob' in spell, True)
        self.assertEqual('BOb' in spell, False)
        self.assertEqual('BOB' in spell, False)
        self.assertEqual('bob' in spell, False)

        words = ['Bb', 'bb', 'BB']
        self.assertEqual(spell.unknown(words), {'Bb', 'bb', 'BB'})

        case_variant_words = ['BOB', 'bOb']
        self.assertEqual(spell.known(case_variant_words), set())

        self.assertEqual(spell.candidates('Bb'), {'Bob'})
        self.assertEqual(spell.candidates('bob'), {'Bob'})
        self.assertEqual(spell.correction('Bb'), 'Bob')
        self.assertEqual(spell.correction('bob'), 'Bob')
        self.assertEqual(spell.unknown(['bob']), {'bob'})
def check(word):
    global spellings_to_check

    spell = SpellChecker()
    word = word.lower()

    for possible in spell.candidates(word):
        if possible in spellings_to_check:
            return possible

    return word
def spell_correct(query_str):
    spell = SpellChecker()
    # find those words that may be misspelled
    list_of_words = get_list(query_str)
    new_str = ""
    for word in list_of_words:
        # Get the one `most likely` answer
        new_str += str((" " + spell.correction(word)))
        # Get a list of `likely` options
        print(word + ' ->', spell.candidates(word))
    print("Did you mean " + "'" + new_str + "'" + "?")
    return new_str
Ejemplo n.º 30
0
class Spell_Searcher:
    def __init__(self, indexer):
        self._indexer = indexer
        self.spell = None

    def query_expansion(self, query):
        """
        This function finds a misspelled word and finds its closest similarity.
        first by tracking all of its candidates. the candidate with the most appearances in the inverted index
        will be the "replaced"
        :param query: query dictionary
        :return: query dictionary with replaced correct words.
        """
        try:
            self.spell = SpellChecker(local_dictionary='spell_dict.json', distance=1)
        except:
            pass

        query_dict = query.query_dict
        for term in query_dict:

            if term.lower() not in self._indexer.inverted_idx and term.upper() not in self._indexer.inverted_idx:

                misspelled_checker = self.spell.unknown([term])

                if len(misspelled_checker) != 0:
                    candidates = list(self.spell.edit_distance_1(term))

                    super_candidates = list(self.spell.candidates(term))
                    candidates.extend(super_candidates)

                    max_freq_in_corpus = 0
                    max_freq_name = ''

                    for i, candidate in enumerate(candidates):
                        if candidate in self._indexer.inverted_idx:
                            curr_freq = self._indexer.inverted_idx[candidate]
                            if curr_freq > max_freq_in_corpus:
                                max_freq_in_corpus = curr_freq
                                max_freq_name = candidate

                        elif candidate.upper() in self._indexer.inverted_idx:
                            curr_freq = self._indexer.inverted_idx[candidate.upper()]
                            if curr_freq > max_freq_in_corpus:
                                max_freq_in_corpus = curr_freq
                                max_freq_name = candidate

                    if max_freq_name != '':
                        print(max_freq_name)
                        query_dict[max_freq_name] = query_dict.pop(term)
                    else:
                        continue
Ejemplo n.º 31
0
#!/usr/bin/env python

'''
pip install pyspellchecker
'''

from spellchecker import SpellChecker
spell = SpellChecker()

# find those words that may be misspelled
misspelled = spell.unknown(['let', 'us', 'wlak','on','the','groun'])

for word in misspelled:
    # Get the one `most likely` answer
    print(spell.correction(word))

    # Get a list of `likely` options
    print(spell.candidates(word))