Ejemplo n.º 1
0
    def __init__(self):
        self.index_key = Levenshtein_search.populate_wordset(-1, [])

        try : # py 2
            self._doc_to_id = collections.defaultdict(itertools.count(1).next)
        except AttributeError : # py 3
            self._doc_to_id = collections.defaultdict(itertools.count(1).__next__)

        self.docs = []
Ejemplo n.º 2
0
 def test_remove_doc(self):
     index = Levenshtein_search.populate_wordset(-1, self.excerpt1)
     Levenshtein_search.remove_string(index, 'overcoat')
     results = Levenshtein_search.lookup(index, 'overcoat', 6)
     assert results == [['went', 6, 0.024390243902439025],
                        ['cold', 6, 0.024390243902439025],
                        ['Versh', 6, 0.04878048780487805],
                        ['overshoes', 4, 0.04878048780487805],
                        ['not', 6, 0.024390243902439025]]
Ejemplo n.º 3
0
    def test_query_overcoat(self):
        index = Levenshtein_search.populate_wordset(-1, self.excerpt1)
        results = Levenshtein_search.lookup(index, 'overcoat', 6)
        assert results == [['overcoat', 0, 0.023809523809523808],
                           ['went', 6, 0.023809523809523808],
                           ['cold', 6, 0.023809523809523808],
                           ['Versh', 6, 0.047619047619047616],
                           ['overshoes', 4, 0.047619047619047616],
                           ['not', 6, 0.023809523809523808]]

        
        index = Levenshtein_search.populate_wordset(-1, self.excerpt2)
        results = Levenshtein_search.lookup(index, 'overcoat', 6)
        assert results == [['Versh', 6, 0.044444444444444446],
                           ['overshoes', 4, 0.022222222222222223],
                           ['coat', 4, 0.022222222222222223],
                           ['out', 6, 0.044444444444444446],
                           ['here', 6, 0.022222222222222223]]
Ejemplo n.º 4
0
    def __init__(self):
        self.index_key = Levenshtein_search.populate_wordset(-1, [])

        try:  # py 2
            self._doc_to_id = collections.defaultdict(itertools.count(1).next)
        except AttributeError:  # py 3
            self._doc_to_id = collections.defaultdict(
                itertools.count(1).__next__)

        self.docs = []
Ejemplo n.º 5
0
 def test_index_increment(self):
     first = Levenshtein_search.populate_wordset(-1, self.excerpt1)
     second = Levenshtein_search.populate_wordset(-1, self.excerpt2)
     print(first, second)
     
     assert first != second
Ejemplo n.º 6
0
 def test_clear(self):
     index = Levenshtein_search.populate_wordset(-1, self.excerpt1)
     Levenshtein_search.clear_wordset(index)        
Ejemplo n.º 7
0
 def __init__(self):
     self.index_key = Levenshtein_search.populate_wordset(-1, [])
     self._doc_to_id = Enumerator(start=1)
Ejemplo n.º 8
0
 def __init__(self):
     self.index_key = Levenshtein_search.populate_wordset(-1, [])
     self._doc_to_id = Enumerator(start=1)
Ejemplo n.º 9
0
 def unindex(self, doc):
     del self._doc_to_id[doc]
     Levenshtein_search.clear_wordset(self.index_key)
     self.index_key = Levenshtein_search.populate_wordset(
         -1, list(self._doc_to_id))
import Levenshtein_search

conn = psycopg2.connect("host='127.0.0.1' port='5432' dbname='benchmark' user='******' password=''")
cur = conn.cursor()
cur.execute("set schema 'public';")
query_word = "\"philippe the original\""
max_dist = 2
sqlquery = "select name from restaurant_nophone_training where levenshtein_less_equal(name, '" + query_word + "', " + str(max_dist) + ") <= " + str(max_dist) + ";"
print(sqlquery)
starttime = time.clock()
cur.execute(sqlquery)
results = cur.fetchall()
print(str(time.clock() - starttime) + " sec")
print(results)
print(" ")

print("Levenshtein_search algorithm:")
cur.execute("select name from restaurant_nophone_training")
names = cur.fetchall()
namelist = []
for name in names:
    namelist.append(name[0])
	
idx = Levenshtein_search.populate_wordset(-1,namelist)
starttime = time.clock()
results = Levenshtein_search.lookup(idx,query_word,max_dist)
print(str(time.clock() - starttime) + " sec")
print(results)

Levenshtein_search.clear_wordset(idx)
conn.close()