forked from agilescientific/fuzzylas
-
Notifications
You must be signed in to change notification settings - Fork 1
/
fuzzylas.py
146 lines (123 loc) · 3.6 KB
/
fuzzylas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#################################
# Import stuff
import levenshtein
import logging
import process
from google.appengine.api import memcache
from google.appengine.api import search
#################################
# Pre-populate the memcache
# Do 10 TLA's first
# All of them will be about 17000 items, should take a while!
# How to not lock up the app while it does this?
#################################
# Various guessing algorithms
# Levenshtein guess
def guess_simple(data,word,lim):
words = []
distances = []
smallest = 100
for w in data.keys():
distance = levenshtein.levenshtein(word,w)
if distance <= smallest:
words.insert(0,w)
distances.insert(0,distance)
smallest = distance
else:
words.append(str(w))
distances.append(distance)
output = {}
for i in range(lim):
output[words[i]] = data[words[i]]
i += 1
return output
# Try another way to step over dict
def guess_simple2(data,word,lim):
words = []
distances = []
smallest = 100
for w in data:
distance = levenshtein.levenshtein(word,w)
if distance <= smallest:
words.insert(0,w)
distances.insert(0,distance)
smallest = distance
else:
words.append(str(w))
distances.append(distance)
output = {}
for i in range(lim):
output[words[i]] = data[words[i]]
i += 1
return output
# Try yet another way to step over dict
def guess_simple3(data,word,lim):
words = []
distances = []
smallest = 100
for w in data:
distance = levenshtein.levenshtein2(word,w)
if distance <= smallest:
words.insert(0,w)
distances.insert(0,distance)
smallest = distance
else:
words.append(str(w))
distances.append(distance)
output = {}
for i in range(lim):
output[words[i]] = data[words[i]]
i += 1
key = word + '-' + 'simple' + '-' + str(lim)
memcache.set(key,output)
return output
# Fuzzywuzzy guess
def guess_fuzzy(data,word,lim):
hits = process.extract(word,data.keys(),limit=lim)
output = {}
for hit in hits:
output[(hit[1],hit[0])] = data[hit[0]]
key = word + '-' + 'fuzzy' + '-' + str(lim)
memcache.set(key,output)
return output
# def guess_search(word,lim):
#
# index = search.Index(name='curves')
#
# query = "{0}".format(word)
# #query = "mnemonic = {0}".format(word)
#
# result = []
#
# try:
# results = index.search(search.Query(
# query_string=query,
# options=search.QueryOptions(
# limit=lim,
# returned_fields=['mnemonic', 'company', 'units']
# )
# ))
#
# return results
#
# except search.Error:
# logging.exception('Search failed; do something with this error')
#################################
# Main guess routine - calls one of the others
# curves dataset, input, method, guesses
def guess(data,word,method,lim):
key = word + '-' + method + '-' + str(lim)
guess = memcache.get(key)
if guess is None:
if word in data:
return {word: data[word]}
elif method == "exact":
return None # we only get this if there's no match
elif method == "simple":
return guess_simple3(data,word,lim)
# elif method == "search":
# return guess_search(word,lim)
else:
return guess_fuzzy(data,word,lim)
else:
return guess