예제 #1
0
    def test_words_with_shared_prefix_should_retain_counts(self):
        sym_spell = SymSpell(1, 3)
        sym_spell.create_dictionary_entry("pipe", 5)
        sym_spell.create_dictionary_entry("pips", 10)

        result = sym_spell.lookup("pipe", Verbosity.ALL, 1)
        self.assertEqual(2, len(result))
        self.assertEqual("pipe", result[0].term)
        self.assertEqual(5, result[0].count)
        self.assertEqual("pips", result[1].term)
        self.assertEqual(10, result[1].count)

        result = sym_spell.lookup("pips", Verbosity.ALL, 1)
        self.assertEqual(2, len(result))
        self.assertEqual("pips", result[0].term)
        self.assertEqual(10, result[0].count)
        self.assertEqual("pipe", result[1].term)
        self.assertEqual(5, result[1].count)

        result = sym_spell.lookup("pip", Verbosity.ALL, 1)
        self.assertEqual(2, len(result))
        self.assertEqual("pips", result[0].term)
        self.assertEqual(10, result[0].count)
        self.assertEqual("pipe", result[1].term)
        self.assertEqual(5, result[1].count)
예제 #2
0
    def test_delete_dictionary_entry(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("stea", 1)
        sym_spell.create_dictionary_entry("steama", 2)
        sym_spell.create_dictionary_entry("steem", 3)

        result = sym_spell.lookup("steama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steama", result[0].term)
        self.assertEqual(len("steama"), sym_spell.max_length())
        self.assertEqual(3, sym_spell.word_count())

        self.assertTrue(sym_spell.delete_dictionary_entry("steama"))
        self.assertEqual(len("steem"), sym_spell.max_length())
        self.assertEqual(2, sym_spell.word_count())
        result = sym_spell.lookup("steama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steem", result[0].term)

        self.assertTrue(sym_spell.delete_dictionary_entry("stea"))
        self.assertEqual(len("steem"), sym_spell.max_length())
        self.assertEqual(1, sym_spell.word_count())
        result = sym_spell.lookup("steama", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("steem", result[0].term)
예제 #3
0
 def test_lookup_should_not_return_non_word_delete(self):
     sym_spell = SymSpell(2, 7, 10)
     sym_spell.create_dictionary_entry("pawn", 10)
     result = sym_spell.lookup("paw", Verbosity.TOP, 0)
     self.assertEqual(0, len(result))
     result = sym_spell.lookup("awn", Verbosity.TOP, 0)
     self.assertEqual(0, len(result))
예제 #4
0
    def test_lookup_transfer_casing(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 4)
        result = sym_spell.lookup("Stream",
                                  Verbosity.TOP,
                                  2,
                                  transfer_casing=True)
        self.assertEqual("Steam", result[0].term)

        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 4)
        result = sym_spell.lookup("StreaM",
                                  Verbosity.TOP,
                                  2,
                                  transfer_casing=True)
        self.assertEqual("SteaM", result[0].term)

        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 4)
        result = sym_spell.lookup("STREAM",
                                  Verbosity.TOP,
                                  2,
                                  transfer_casing=True)
        self.assertEqual("STEAM", result[0].term)

        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("i", 4)
        result = sym_spell.lookup("I", Verbosity.TOP, 2, transfer_casing=True)
        self.assertEqual("I", result[0].term)
예제 #5
0
    def test_add_additional_counts_should_increase_count(self):
        sym_spell = SymSpell()
        word = "hello"
        sym_spell.create_dictionary_entry(word, 11)
        result = sym_spell.lookup(word, Verbosity.ALL)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(11, count)

        sym_spell.create_dictionary_entry(word, 3)
        result = sym_spell.lookup(word, Verbosity.ALL)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(11 + 3, count)
예제 #6
0
    def test_verbosity_should_control_lookup_results(self):
        sym_spell = SymSpell()
        sym_spell.create_dictionary_entry("steam", 1)
        sym_spell.create_dictionary_entry("steams", 2)
        sym_spell.create_dictionary_entry("steem", 3)

        result = sym_spell.lookup("steems", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        result = sym_spell.lookup("steems", Verbosity.CLOSEST, 2)
        self.assertEqual(2, len(result))
        result = sym_spell.lookup("steems", Verbosity.ALL, 2)
        self.assertEqual(3, len(result))
예제 #7
0
    def test_add_additional_counts_should_not_overflow(self):
        sym_spell = SymSpell()
        word = "hello"
        sym_spell.create_dictionary_entry(word, sys.maxsize - 10)
        result = sym_spell.lookup(word, Verbosity.ALL)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(sys.maxsize - 10, count)

        sym_spell.create_dictionary_entry(word, 11)
        result = sym_spell.lookup(word, Verbosity.ALL)
        count = result[0].count if len(result) == 1 else 0
        self.assertEqual(sys.maxsize, count)
예제 #8
0
    def test_pickle_compressed(self):
        pickle_path = os.path.join(self.fortests_path, "dictionary.pickle")
        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(self.dictionary_path, 0, 1)
        sym_spell.save_pickle(pickle_path)

        sym_spell_2 = SymSpell(edit_distance_max, prefix_length)
        sym_spell_2.load_pickle(pickle_path)
        self.assertEqual(sym_spell.max_length(), sym_spell_2.max_length())
        self.assertEqual(
            sym_spell.lookup("flam", Verbosity.TOP, 0, True)[0].term,
            sym_spell_2.lookup("flam", Verbosity.TOP, 0, True)[0].term)
        os.remove(pickle_path)
예제 #9
0
 def test_lookup_include_unknown(self):
     sym_spell = SymSpell(2, 7, 10)
     sym_spell.create_dictionary_entry("flame", 20)
     sym_spell.create_dictionary_entry("flam", 1)
     result = sym_spell.lookup("flam", Verbosity.TOP, 0, True)
     self.assertEqual(1, len(result))
     self.assertEqual("flam", result[0].term)
예제 #10
0
 def test_lookup_should_not_return_low_count_word_that_are_also_delete_word(
         self):
     sym_spell = SymSpell(2, 7, 10)
     sym_spell.create_dictionary_entry("flame", 20)
     sym_spell.create_dictionary_entry("flam", 1)
     result = sym_spell.lookup("flam", Verbosity.TOP, 0)
     self.assertEqual(0, len(result))
예제 #11
0
 def test_lookup_should_find_exact_match(self):
     sym_spell = SymSpell()
     sym_spell.create_dictionary_entry("steama", 4)
     sym_spell.create_dictionary_entry("steamb", 6)
     sym_spell.create_dictionary_entry("steamc", 2)
     result = sym_spell.lookup("streama", Verbosity.TOP, 2)
     self.assertEqual(1, len(result))
     self.assertEqual("steama", result[0].term)
예제 #12
0
 def test_lookup_should_return_most_frequent(self):
     sym_spell = SymSpell()
     sym_spell.create_dictionary_entry("steama", 4)
     sym_spell.create_dictionary_entry("steamb", 6)
     sym_spell.create_dictionary_entry("steamc", 2)
     result = sym_spell.lookup("stream", Verbosity.TOP, 2)
     self.assertEqual(1, len(result))
     self.assertEqual("steamb", result[0].term)
     self.assertEqual(6, result[0].count)
예제 #13
0
 def test_deletes(self):
     sym_spell = SymSpell()
     sym_spell.create_dictionary_entry("steama", 4)
     sym_spell.create_dictionary_entry("steamb", 6)
     sym_spell.create_dictionary_entry("steamc", 2)
     result = sym_spell.lookup("stream", Verbosity.TOP, 2)
     self.assertEqual(1, len(result))
     self.assertEqual("steamb", result[0].term)
     self.assertEqual(6, result[0].count)
     self.assertTrue(sym_spell.entry_count())
예제 #14
0
    def test_load_dictionary_encoding(self):
        dictionary_path = os.path.join(self.fortests_path, "non_en_dict.txt")

        edit_distance_max = 2
        prefix_length = 7
        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.load_dictionary(dictionary_path, 0, 1)

        result = sym_spell.lookup("АБ", Verbosity.TOP, 2)
        self.assertEqual(1, len(result))
        self.assertEqual("АБИ", result[0].term)
예제 #15
0
    def test_pickle_bytes(self):
        edit_distance_max = 2
        prefix_length = 7

        sym_spell = SymSpell(edit_distance_max, prefix_length)
        sym_spell.create_dictionary_entry("test", 123)
        sym_spell.create_dictionary_entry("ball", 4)
        sym_spell.create_dictionary_entry("code", 56)
        sym_bytes = sym_spell.save_pickle_bytes()

        sym_spell_ld = SymSpell(edit_distance_max, prefix_length)
        sym_spell_ld.load_pickle_bytes(sym_bytes)

        self.assertEqual("test",
                         sym_spell_ld.lookup("tst", Verbosity.CLOSEST)[0].term)
        self.assertEqual(
            "ball",
            sym_spell_ld.lookup("boll", Verbosity.CLOSEST)[0].term)
        self.assertEqual(
            2,
            sym_spell_ld.lookup("c0d3", Verbosity.CLOSEST)[0].distance)
예제 #16
0
 def test_save_load(self):
     before_save = self.symSpell.lookup("tke", Verbosity.CLOSEST)[0].term
     before_max_length = self.symSpell.max_length()
     os.makedirs("temp", exist_ok=True)
     self.symSpell.save_pickle("temp/temp.bin")
     load_sym_spell = SymSpell()
     load_sym_spell.load_pickle("temp/temp.bin")
     after_load = load_sym_spell.lookup("tke", Verbosity.CLOSEST)[0].term
     after_max_length = load_sym_spell.max_length()
     os.remove("temp/temp.bin")
     os.rmdir("temp")
     assert (before_save == after_load)
     assert (before_max_length == after_max_length)
예제 #17
0
def test_load_pickle_symspellcpppy(benchmark):
    sym_spell = SymSpellCpp(max_dictionary_edit_distance=2, prefix_length=7)
    benchmark(sym_spell.load_pickle, "temp_cpppy/temp.bin")
    os.remove("temp_cpppy/temp.bin")
    os.rmdir("temp_cpppy")
    assert (sym_spell.lookup("tke", VerbosityCpp.CLOSEST)[0].term == "the")