Ejemplo n.º 1
0
def test_save_pickle_symspellcpppy(benchmark):
    sym_spell = SymSpellCpp(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.load_dictionary(dict_path,
                              term_index=0,
                              count_index=1,
                              separator=" ")
    os.makedirs("temp_cpppy", exist_ok=True)
    result = benchmark(sym_spell.save_pickle, "temp_cpppy/temp.bin")
    assert (sym_spell.max_length() == 28)
Ejemplo n.º 2
0
    def test_word_segmentation_apostrophe(self):
        edit_distance_max = 0
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = "There'resomewords"
        correction = ("There' re some words")
        result = sym_spell.word_segmentation(typo)
        self.assertEqual(correction, result.corrected_string)
Ejemplo n.º 3
0
    def test_load_dictionary_encoding(self):
        dictionary_path = os.path.join(self.fortests_path, "non_en_dict.txt")

        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(dictionary_path, 0, 1)

        result = sym_spell.lookup("АБ", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("АБИ", result[0].term)
Ejemplo n.º 4
0
def test_lookup_compound_term_symspellcpppy(benchmark):
    sym_spell = SymSpellCpp(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.load_dictionary(dict_path,
                              term_index=0,
                              count_index=1,
                              separator=" ")
    input_term = "whereis th elove"
    result = benchmark(sym_spell.lookup_compound,
                       input_term,
                       max_edit_distance=2)
    assert (result[0].term == "whereas the love")
Ejemplo n.º 5
0
def test_lookup_term_symspellcpppy(benchmark):
    sym_spell = SymSpellCpp(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.load_dictionary(dict_path,
                              term_index=0,
                              count_index=1,
                              separator=" ")
    input_term = "mEmEbers"
    result = benchmark(sym_spell.lookup,
                       input_term,
                       VerbosityCpp.CLOSEST,
                       max_edit_distance=2)
    assert (result[0].term == "members")
Ejemplo n.º 6
0
def test_lookup_compund_transfer_casing_symspellcpppy(benchmark):
    sym_spell = SymSpellCpp(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.load_dictionary(dict_path, 0, 1)
    typo = ("Whereis th elove hehaD Dated forImuch of thepast who "
            "couqdn'tread in sixthgrade AND ins pired him")
    correction = ("Whereas the love heaD Dated for much of the past "
                  "who couldn't read in sixth grade AND inspired him")
    results = benchmark(sym_spell.lookup_compound,
                        typo,
                        2,
                        transfer_casing=True)
    assert (results[0].term == correction)
Ejemplo n.º 7
0
def test_word_segmentation_symspellcpppy(benchmark):
    sym_spell = SymSpellCpp(max_dictionary_edit_distance=2, prefix_length=7)
    sym_spell.load_dictionary(dict_path,
                              term_index=0,
                              count_index=1,
                              separator=" ")
    input_term = "thequickbrownfoxjumpsoverthelazydog"
    result = benchmark(sym_spell.word_segmentation,
                       input_term,
                       max_edit_distance=0,
                       max_segmentation_word_length=5)
    assert (result.segmented_string ==
            "the quick brown fox jumps over the lazy dog")
Ejemplo n.º 8
0
    def test_lookup_compound_transfer_casing_no_bigram(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = ("Whereis th elove hehaD Dated forImuch of thepast who "
                "couqdn'tread in sixthgrade AND ins pired him")
        correction = ("Whereas the love heaD Dated for much of the past "
                      "who couldn't read in sixth grade AND inspired him")

        results = sym_spell.lookup_compound(typo,
                                            edit_distance_max,
                                            transfer_casing=True)
        self.assertEqual(correction, results[0].term)
Ejemplo n.º 9
0
    def test_pickle_compressed(self):
        pickle_path = os.path.join(self.fortests_path, "dictionary.pickle")
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)
        sym_spell.save_pickle(pickle_path)

        sym_spell_2 = SymSpell(edit_distance_max, prefix_length)
        sym_spell_2.load_pickle(pickle_path)
        self.assertEqual(sym_spell.max_length(), sym_spell_2.max_length())
        self.assertEqual(
            sym_spell.lookup("flam", Verbosity.TOP, 0, True)[0].term,
            sym_spell_2.lookup("flam", Verbosity.TOP, 0, True)[0].term)
        os.remove(pickle_path)
Ejemplo n.º 10
0
 def test_load_dictionary_invalid_path(self):
     edit_distance_max = 2
     prefix_length = 7
     sym_spell = SymSpell(edit_distance_max, prefix_length)
     self.assertEqual(
         False,
         sym_spell.load_dictionary("invalid/dictionary/path.txt", 0, 1))
Ejemplo n.º 11
0
 def test_load_dictionary_bad_dictionary(self):
     dictionary_path = os.path.join(self.fortests_path, "bad_dict.txt")
     edit_distance_max = 2
     prefix_length = 7
     sym_spell = SymSpell(edit_distance_max, prefix_length)
     self.assertEqual(True,
                      sym_spell.load_dictionary(dictionary_path, 0, 1))
     self.assertEqual(7, sym_spell.word_count())
Ejemplo n.º 12
0
    def test_word_segmentation_capitalize(self):
        edit_distance_max = 0
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = "Thequickbrownfoxjumpsoverthelazydog"
        correction = "The quick brown fox jumps over the lazy dog"
        result = sym_spell.word_segmentation(typo)
        self.assertEqual(correction, result.corrected_string)

        typo = "Itwasabrightcolddayinaprilandtheclockswerestrikingthirteen"
        correction = ("It was a bright cold day in april and the clocks "
                      "were striking thirteen")
        result = sym_spell.word_segmentation(typo)
        self.assertEqual(correction, result.segmented_string)

        typo = ("Itwasthebestoftimesitwastheworstoftimesitwastheageofwisdom"
                "itwastheageoffoolishness")
        correction = ("It was the best of times it was the worst of times "
                      "it was the age of wisdom it was the age of foolishness")
        result = sym_spell.word_segmentation(typo)
        self.assertEqual(correction, result.segmented_string)
Ejemplo n.º 13
0
    def test_lookup_compound_no_bigram(self):
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)

        typo = "whereis th elove"
        correction = "whereas the love"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(2, results[0].distance)
        self.assertEqual(64, results[0].count)

        typo = "the bigjest playrs"
        correction = "the biggest players"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(2, results[0].distance)
        self.assertEqual(34, results[0].count)

        typo = "can yu readthis"
        correction = "can you read this"
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(2, results[0].distance)
        self.assertEqual(3, results[0].count)

        typo = ("whereis th elove hehad dated forImuch of thepast who "
                "couqdn'tread in sixthgrade and ins pired him")
        correction = ("whereas the love head dated for much of the past who "
                      "couldn't read in sixth grade and inspired him")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(9, results[0].distance)
        self.assertEqual(0, results[0].count)

        typo = "in te dhird qarter oflast jear he hadlearned ofca sekretplan"
        correction = ("in the third quarter of last year he had learned of "
                      "a secret plan")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(9, results[0].distance)
        self.assertEqual(0, results[0].count)

        typo = ("the bigjest playrs in te strogsommer film slatew ith plety "
                "of funn")
        correction = ("the biggest players in the strong summer film slate "
                      "with plenty of fun")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(9, results[0].distance)
        self.assertEqual(0, results[0].count)

        typo = ("can yu readthis messa ge despite thehorible sppelingmsitakes")
        correction = ("can you read this message despite the horrible "
                      "spelling mistakes")
        results = sym_spell.lookup_compound(typo, edit_distance_max)
        self.assertEqual(1, len(results))
        self.assertEqual(correction, results[0].term)
        self.assertEqual(9, results[0].distance)
        self.assertEqual(0, results[0].count)