def init_dictionary(corpus_path): sym_spell = SymSpell() dictionary_path = pkg_resources.resource_filename( "symspellpy", "frequency_dictionary_en_82_765.txt") sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1) sym_spell.create_dictionary(corpus_path) return sym_spell
def create_dictionary(file) -> SymSpellPy: symspell = SymSpellPy() with open(file, encoding="utf8") as dictionary: symspell.create_dictionary(dictionary) return symspell
def test_create_dictionary(self): corpus_path = os.path.join(self.fortests_path, "big_modified.txt") big_words_path = os.path.join(self.fortests_path, "big_words.txt") edit_distance_max = 2 prefix_length = 7 sym_spell = SymSpell(edit_distance_max, prefix_length) sym_spell.create_dictionary(corpus_path, encoding="utf-8") num_lines = 0 with open(big_words_path, "r") as infile: for line in infile: key, count = line.rstrip().split(" ") self.assertEqual(int(count), sym_spell.words[key]) num_lines += 1 self.assertEqual(num_lines, sym_spell.word_count)
def test_loading_dictionary_from_fileobject(self): big_words_path = os.path.join(self.fortests_path, "big_words.txt") edit_distance_max = 2 prefix_length = 7 sym_spell = SymSpell(edit_distance_max, prefix_length) with open(big_words_path, 'r', encoding='utf8') as file: self.assertEqual(True, sym_spell.create_dictionary(file))
def useSymspell(self): self.originalText, self.errorText = FP().prepareFiles() originalSentencesList, errorSentencesList = EC().textToSentences( self.originalText, self.errorText) print(len(originalSentencesList), len(errorSentencesList)) speller = SymSpell() corpusPath = FP().definePathToCoprus() speller.create_dictionary(corpusPath, encoding='utf-8') processedWordsList = [] for sentence in errorSentencesList: sentenceWords = EC().sentencesToWords(sentence) for word in sentenceWords: suggestions = speller.lookup(word, Verbosity.CLOSEST, max_edit_distance=2, include_unknown=True) for suggestion in suggestions: processedWordsList.append(suggestion.term) break print(len(processedWordsList)) self.useWordsMetrics(self.originalText, processedWordsList)
def test_create_dictionary_invalid_path(self): edit_distance_max = 2 prefix_length = 7 sym_spell = SymSpell(edit_distance_max, prefix_length) self.assertEqual( False, sym_spell.create_dictionary("invalid/dictionary/path.txt"))
try: suggestion = peterNorvigSimString.correction(origIngredient) except: suggestion = origIngredient return suggestion ################# # SymSpell Code # ################# from symspellpy import SymSpell, Verbosity import pkg_resources sym_spell = SymSpell() sym_spell.create_dictionary('AdditivesDict.txt') def getSymSpell(mispell): """ Use SymSpell implementation to get suggestion """ suggestion = sym_spell.lookup_compound(mispell, max_edit_distance=2)[0].term return suggestion.upper() ######################### # Database Editing Code # ######################### def performReplace(result, ingredients, i, method, output):
from symspellpy import SymSpell sym_spell = SymSpell() corpus_path = "word.txt" sym_spell.create_dictionary(corpus_path) print(sym_spell.words)
nsorted = names.sort_values(by="Name") nameind = nsorted["Name"].str.split(' ', expand=True) new = pd.concat([nameind[0], nameind[1], nameind[2]], ignore_index=True) new = new.dropna() name_counts = Counter(new) name_df = pd.DataFrame.from_dict(name_counts, orient='index').reset_index() name_df = name_df.rename(columns={'index': 'Name', 0: 'Count'}) name_df.to_csv("NameFrequency.csv", index=False) print(name_df) new = pd.unique(new) #print(new) sym_spell = SymSpell() corpus_path = new sym_spell.create_dictionary(corpus_path, encoding="utf-8") #print(sym_spell.words) # lookup suggestions for single-word input strings input_term = "বক" # misspelling of "members" # max edit distance per lookup # (max_edit_distance_lookup <= max_dictionary_edit_distance) suggestions = sym_spell.lookup(input_term, Verbosity.CLOSEST, max_edit_distance=2) # display suggestion term, term frequency, and edit distance for suggestion in suggestions: print(suggestion) #nameind =nameind.sort_values(by =["0","1"]])