Ejemplo n.º 1
0
def create_dictionary(file) -> SymSpellPy:
    symspell = SymSpellPy()

    with open(file, encoding="utf8") as dictionary:
        symspell.create_dictionary(dictionary)

    return symspell
Ejemplo n.º 2
0
def test_load_dict_symspellpy(benchmark):
    sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7)
    benchmark(sym_spell.load_dictionary,
              dict_path,
              term_index=0,
              count_index=1,
              separator=" ")
Ejemplo n.º 3
0
def test_lookup_transfer_casing_symspellpy(benchmark):
    sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.create_dictionary_entry("steam", 4)
    result = benchmark(sym_spell.lookup,
                       "StreaM",
                       VerbosityPy.TOP,
                       2,
                       transfer_casing=True)
    assert (result[0].term == "SteaM")
Ejemplo n.º 4
0
def test_save_pickle_symspellpy(benchmark):
    sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.load_dictionary(dict_path,
                              term_index=0,
                              count_index=1,
                              separator=" ")
    os.makedirs("temp_py", exist_ok=True)
    result = benchmark(sym_spell.save_pickle, "temp_py/temp.pk")
    assert (sym_spell._max_length == 28)
Ejemplo n.º 5
0
def test_lookup_compound_term_symspellpy(benchmark):
    sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.load_dictionary(dict_path,
                              term_index=0,
                              count_index=1,
                              separator=" ")
    input_term = "whereis th elove"
    result = benchmark(sym_spell.lookup_compound,
                       input_term,
                       max_edit_distance=2)
    assert (result[0].term == "whereas the love")
Ejemplo n.º 6
0
def test_lookup_compund_transfer_casing_symspellpy(benchmark):
    sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.load_dictionary(dict_path, 0, 1)
    typo = ("Whereis th elove hehaD Dated forImuch of thepast who "
            "couqdn'tread in sixthgrade AND ins pired him")
    correction = ("Whereas the love heaD Dated for much of the past "
                  "who couldn't read in sixth grade AND inspired him")
    results = benchmark(sym_spell.lookup_compound,
                        typo,
                        2,
                        transfer_casing=True)
    assert (results[0].term == correction)
Ejemplo n.º 7
0
def test_lookup_term_symspellpy(benchmark):
    sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.load_dictionary(dict_path,
                              term_index=0,
                              count_index=1,
                              separator=" ")
    input_term = "mEmEbers"
    result = benchmark(sym_spell.lookup,
                       input_term,
                       VerbosityPy.CLOSEST,
                       max_edit_distance=2,
                       transfer_casing=True)
    assert (result[0].term.lower() == "members")
Ejemplo n.º 8
0
def test_word_segmentation_symspellpy(benchmark):
    sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.load_dictionary(dict_path,
                              term_index=0,
                              count_index=1,
                              separator=" ")
    input_term = "thequickbrownfoxjumpsoverthelazydog"
    result = benchmark(sym_spell.word_segmentation,
                       input_term,
                       max_edit_distance=0,
                       max_segmentation_word_length=5)
    assert (result.segmented_string ==
            "the quick brown fox jumps over the lazy dog")
Ejemplo n.º 9
0
def SymSpell() -> SymSpellPy:
    symspell = SymSpellPy()

    symspell.load_dictionary('data/dictionary.txt', 0, 1)

    return symspell
Ejemplo n.º 10
0
def test_load_pickle_symspellpy(benchmark):
    sym_spell = SymSpellPy(max_dictionary_edit_distance=2, prefix_length=7)
    benchmark(sym_spell.load_pickle, "temp_py/temp.pk")
    os.remove("temp_py/temp.pk")
    os.rmdir("temp_py")
    assert (sym_spell.lookup("tke", VerbosityPy.CLOSEST)[0].term == "the")