def create_dictionary(file) -> SymSpellPy: symspell = SymSpellPy() with open(file, encoding="utf8") as dictionary: symspell.create_dictionary(dictionary) return symspell
def test_load_dict_symspellpy(benchmark): sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7) benchmark(sym_spell.load_dictionary, dict_path, term_index=0, count_index=1, separator=" ")
def test_lookup_transfer_casing_symspellpy(benchmark): sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7) sym_spell.create_dictionary_entry("steam", 4) result = benchmark(sym_spell.lookup, "StreaM", VerbosityPy.TOP, 2, transfer_casing=True) assert (result[0].term == "SteaM")
def test_save_pickle_symspellpy(benchmark): sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7) sym_spell.load_dictionary(dict_path, term_index=0, count_index=1, separator=" ") os.makedirs("temp_py", exist_ok=True) result = benchmark(sym_spell.save_pickle, "temp_py/temp.pk") assert (sym_spell._max_length == 28)
def test_lookup_compound_term_symspellpy(benchmark): sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7) sym_spell.load_dictionary(dict_path, term_index=0, count_index=1, separator=" ") input_term = "whereis th elove" result = benchmark(sym_spell.lookup_compound, input_term, max_edit_distance=2) assert (result[0].term == "whereas the love")
def test_lookup_compund_transfer_casing_symspellpy(benchmark): sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7) sym_spell.load_dictionary(dict_path, 0, 1) typo = ("Whereis th elove hehaD Dated forImuch of thepast who " "couqdn'tread in sixthgrade AND ins pired him") correction = ("Whereas the love heaD Dated for much of the past " "who couldn't read in sixth grade AND inspired him") results = benchmark(sym_spell.lookup_compound, typo, 2, transfer_casing=True) assert (results[0].term == correction)
def test_lookup_term_symspellpy(benchmark): sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7) sym_spell.load_dictionary(dict_path, term_index=0, count_index=1, separator=" ") input_term = "mEmEbers" result = benchmark(sym_spell.lookup, input_term, VerbosityPy.CLOSEST, max_edit_distance=2, transfer_casing=True) assert (result[0].term.lower() == "members")
def test_word_segmentation_symspellpy(benchmark): sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7) sym_spell.load_dictionary(dict_path, term_index=0, count_index=1, separator=" ") input_term = "thequickbrownfoxjumpsoverthelazydog" result = benchmark(sym_spell.word_segmentation, input_term, max_edit_distance=0, max_segmentation_word_length=5) assert (result.segmented_string == "the quick brown fox jumps over the lazy dog")
def SymSpell() -> SymSpellPy: symspell = SymSpellPy() symspell.load_dictionary('data/dictionary.txt', 0, 1) return symspell
def test_load_pickle_symspellpy(benchmark): sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7) benchmark(sym_spell.load_pickle, "temp_py/temp.pk") os.remove("temp_py/temp.pk") os.rmdir("temp_py") assert (sym_spell.lookup("tke", VerbosityPy.CLOSEST)[0].term == "the")