Esempio n. 1
0
 def search(self, doc, threshold=0):
     matching_docs = Levenshtein_search.lookup(self.index_key, doc,
                                               threshold)
     if matching_docs:
         return [self._doc_to_id[match] for match, _, _ in matching_docs]
     else:
         return []
Esempio n. 2
0
 def test_remove_doc(self):
     index = Levenshtein_search.populate_wordset(-1, self.excerpt1)
     Levenshtein_search.remove_string(index, 'overcoat')
     results = Levenshtein_search.lookup(index, 'overcoat', 6)
     assert results == [['went', 6, 0.024390243902439025],
                        ['cold', 6, 0.024390243902439025],
                        ['Versh', 6, 0.04878048780487805],
                        ['overshoes', 4, 0.04878048780487805],
                        ['not', 6, 0.024390243902439025]]
Esempio n. 3
0
    def test_query_overcoat(self):
        index = Levenshtein_search.populate_wordset(-1, self.excerpt1)
        results = Levenshtein_search.lookup(index, 'overcoat', 6)
        assert results == [['overcoat', 0, 0.023809523809523808],
                           ['went', 6, 0.023809523809523808],
                           ['cold', 6, 0.023809523809523808],
                           ['Versh', 6, 0.047619047619047616],
                           ['overshoes', 4, 0.047619047619047616],
                           ['not', 6, 0.023809523809523808]]

        
        index = Levenshtein_search.populate_wordset(-1, self.excerpt2)
        results = Levenshtein_search.lookup(index, 'overcoat', 6)
        assert results == [['Versh', 6, 0.044444444444444446],
                           ['overshoes', 4, 0.022222222222222223],
                           ['coat', 4, 0.022222222222222223],
                           ['out', 6, 0.044444444444444446],
                           ['here', 6, 0.022222222222222223]]
Esempio n. 4
0
 def search(self, doc, threshold=0):
     results = Levenshtein_search.lookup(self.index_key, doc, threshold)
     
     return [doc for doc, _, _ in results]
Esempio n. 5
0
    def search(self, doc, threshold=0):
        results = Levenshtein_search.lookup(self.index_key, doc, threshold)

        return [doc for doc, _, _ in results]
Esempio n. 6
0
 def search(self, doc, threshold=0):
     results = Levenshtein_search.lookup(self.index_key, doc, threshold)
     if results:
         return [self._doc_to_id[doc] for doc, _, _ in results]
     else:
         return []
Esempio n. 7
0
 def search(self, doc, threshold=0):
     matching_docs = Levenshtein_search.lookup(self.index_key, doc, threshold)
     if matching_docs:
         return [self._doc_to_id[match] for match, _, _ in matching_docs]
     else:
         return []
import Levenshtein_search

conn = psycopg2.connect("host='127.0.0.1' port='5432' dbname='benchmark' user='******' password=''")
cur = conn.cursor()
cur.execute("set schema 'public';")
query_word = "\"philippe the original\""
max_dist = 2
sqlquery = "select name from restaurant_nophone_training where levenshtein_less_equal(name, '" + query_word + "', " + str(max_dist) + ") <= " + str(max_dist) + ";"
print(sqlquery)
starttime = time.clock()
cur.execute(sqlquery)
results = cur.fetchall()
print(str(time.clock() - starttime) + " sec")
print(results)
print(" ")

print("Levenshtein_search algorithm:")
cur.execute("select name from restaurant_nophone_training")
names = cur.fetchall()
namelist = []
for name in names:
    namelist.append(name[0])
	
idx = Levenshtein_search.populate_wordset(-1,namelist)
starttime = time.clock()
results = Levenshtein_search.lookup(idx,query_word,max_dist)
print(str(time.clock() - starttime) + " sec")
print(results)

Levenshtein_search.clear_wordset(idx)
conn.close()
Esempio n. 9
0
 def search(self, doc, threshold=0):
     results = Levenshtein_search.lookup(self.index_key, doc, threshold)
     if results:
         return [self._doc_to_id[doc] for doc, _, _ in results]
     else:
         return []