def search(self, doc, threshold=0): matching_docs = Levenshtein_search.lookup(self.index_key, doc, threshold) if matching_docs: return [self._doc_to_id[match] for match, _, _ in matching_docs] else: return []
def test_remove_doc(self): index = Levenshtein_search.populate_wordset(-1, self.excerpt1) Levenshtein_search.remove_string(index, 'overcoat') results = Levenshtein_search.lookup(index, 'overcoat', 6) assert results == [['went', 6, 0.024390243902439025], ['cold', 6, 0.024390243902439025], ['Versh', 6, 0.04878048780487805], ['overshoes', 4, 0.04878048780487805], ['not', 6, 0.024390243902439025]]
def test_query_overcoat(self): index = Levenshtein_search.populate_wordset(-1, self.excerpt1) results = Levenshtein_search.lookup(index, 'overcoat', 6) assert results == [['overcoat', 0, 0.023809523809523808], ['went', 6, 0.023809523809523808], ['cold', 6, 0.023809523809523808], ['Versh', 6, 0.047619047619047616], ['overshoes', 4, 0.047619047619047616], ['not', 6, 0.023809523809523808]] index = Levenshtein_search.populate_wordset(-1, self.excerpt2) results = Levenshtein_search.lookup(index, 'overcoat', 6) assert results == [['Versh', 6, 0.044444444444444446], ['overshoes', 4, 0.022222222222222223], ['coat', 4, 0.022222222222222223], ['out', 6, 0.044444444444444446], ['here', 6, 0.022222222222222223]]
def search(self, doc, threshold=0): results = Levenshtein_search.lookup(self.index_key, doc, threshold) return [doc for doc, _, _ in results]
def search(self, doc, threshold=0): results = Levenshtein_search.lookup(self.index_key, doc, threshold) if results: return [self._doc_to_id[doc] for doc, _, _ in results] else: return []
import Levenshtein_search conn = psycopg2.connect("host='127.0.0.1' port='5432' dbname='benchmark' user='******' password=''") cur = conn.cursor() cur.execute("set schema 'public';") query_word = "\"philippe the original\"" max_dist = 2 sqlquery = "select name from restaurant_nophone_training where levenshtein_less_equal(name, '" + query_word + "', " + str(max_dist) + ") <= " + str(max_dist) + ";" print(sqlquery) starttime = time.clock() cur.execute(sqlquery) results = cur.fetchall() print(str(time.clock() - starttime) + " sec") print(results) print(" ") print("Levenshtein_search algorithm:") cur.execute("select name from restaurant_nophone_training") names = cur.fetchall() namelist = [] for name in names: namelist.append(name[0]) idx = Levenshtein_search.populate_wordset(-1,namelist) starttime = time.clock() results = Levenshtein_search.lookup(idx,query_word,max_dist) print(str(time.clock() - starttime) + " sec") print(results) Levenshtein_search.clear_wordset(idx) conn.close()