コード例 #1
0
def init_dictionary(corpus_path):
    sym_spell = SymSpell()
    dictionary_path = pkg_resources.resource_filename(
        "symspellpy", "frequency_dictionary_en_82_765.txt")
    sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)
    sym_spell.create_dictionary(corpus_path)
    return sym_spell
コード例 #2
0
ファイル: spell.py プロジェクト: Almazishe/kaz-spell-api
def create_dictionary(file) -> SymSpellPy:
    symspell = SymSpellPy()

    with open(file, encoding="utf8") as dictionary:
        symspell.create_dictionary(dictionary)

    return symspell
コード例 #3
0
    def test_create_dictionary(self):
        corpus_path = os.path.join(self.fortests_path, "big_modified.txt")
        big_words_path = os.path.join(self.fortests_path, "big_words.txt")

        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.create_dictionary(corpus_path, encoding="utf-8")

        num_lines = 0
        with open(big_words_path, "r") as infile:
            for line in infile:
                key, count = line.rstrip().split(" ")
                self.assertEqual(int(count), sym_spell.words[key])
                num_lines += 1
        self.assertEqual(num_lines, sym_spell.word_count)
コード例 #4
0
ファイル: test_symspellpy.py プロジェクト: youikim/symspellpy
 def test_loading_dictionary_from_fileobject(self):
     big_words_path = os.path.join(self.fortests_path, "big_words.txt")
     edit_distance_max = 2
     prefix_length = 7
     sym_spell = SymSpell(edit_distance_max, prefix_length)
     with open(big_words_path, 'r', encoding='utf8') as file:
         self.assertEqual(True, sym_spell.create_dictionary(file))
コード例 #5
0
 def useSymspell(self):
     self.originalText, self.errorText = FP().prepareFiles()
     originalSentencesList, errorSentencesList = EC().textToSentences(
         self.originalText, self.errorText)
     print(len(originalSentencesList), len(errorSentencesList))
     speller = SymSpell()
     corpusPath = FP().definePathToCoprus()
     speller.create_dictionary(corpusPath, encoding='utf-8')
     processedWordsList = []
     for sentence in errorSentencesList:
         sentenceWords = EC().sentencesToWords(sentence)
         for word in sentenceWords:
             suggestions = speller.lookup(word,
                                          Verbosity.CLOSEST,
                                          max_edit_distance=2,
                                          include_unknown=True)
             for suggestion in suggestions:
                 processedWordsList.append(suggestion.term)
                 break
     print(len(processedWordsList))
     self.useWordsMetrics(self.originalText, processedWordsList)
コード例 #6
0
 def test_create_dictionary_invalid_path(self):
     edit_distance_max = 2
     prefix_length = 7
     sym_spell = SymSpell(edit_distance_max, prefix_length)
     self.assertEqual(
         False, sym_spell.create_dictionary("invalid/dictionary/path.txt"))
コード例 #7
0
    try:
        suggestion = peterNorvigSimString.correction(origIngredient)
    except:
        suggestion = origIngredient
    return suggestion


#################
# SymSpell Code #
#################

from symspellpy import SymSpell, Verbosity
import pkg_resources

sym_spell = SymSpell()
sym_spell.create_dictionary('AdditivesDict.txt')


def getSymSpell(mispell):
    """ Use SymSpell implementation to get suggestion """
    suggestion = sym_spell.lookup_compound(mispell,
                                           max_edit_distance=2)[0].term
    return suggestion.upper()


#########################
# Database Editing Code #
#########################


def performReplace(result, ingredients, i, method, output):
コード例 #8
0
from symspellpy import SymSpell

sym_spell = SymSpell()
corpus_path = "word.txt"
sym_spell.create_dictionary(corpus_path)

print(sym_spell.words)
コード例 #9
0
ファイル: nextwd.py プロジェクト: asifatick/NameSuggestion
nsorted = names.sort_values(by="Name")
nameind = nsorted["Name"].str.split(' ', expand=True)
new = pd.concat([nameind[0], nameind[1], nameind[2]], ignore_index=True)
new = new.dropna()
name_counts = Counter(new)
name_df = pd.DataFrame.from_dict(name_counts, orient='index').reset_index()
name_df = name_df.rename(columns={'index': 'Name', 0: 'Count'})
name_df.to_csv("NameFrequency.csv", index=False)
print(name_df)

new = pd.unique(new)
#print(new)

sym_spell = SymSpell()
corpus_path = new
sym_spell.create_dictionary(corpus_path, encoding="utf-8")

#print(sym_spell.words)

# lookup suggestions for single-word input strings
input_term = "বক"  # misspelling of "members"
# max edit distance per lookup
# (max_edit_distance_lookup <= max_dictionary_edit_distance)
suggestions = sym_spell.lookup(input_term,
                               Verbosity.CLOSEST,
                               max_edit_distance=2)
# display suggestion term, term frequency, and edit distance
for suggestion in suggestions:
    print(suggestion)

#nameind =nameind.sort_values(by =["0","1"]])