def guess_fuzzy(data, word, limit=1, maxdist=1): """ Fuzzywuzzy guess Args: limit (int): the max number of items to return. maxdist (int): the max allowed edit distance. """ hits = process.extract(word, data.keys(), limit=limit) output = [] for w, s in hits: d = levenshtein.levenshtein2(word, w) if d > maxdist: continue curves = data[w] for c in curves: result = {"distance": d, "score": s, "mnemonic": w, "curve": c} output.append(result) key = word + '-' + 'fuzzy' + '-' + str(limit) + str(maxdist) memcache.set(key, output) return output
def analyse_edit_distance(recognized_concept_file, annotated_concept_file): recognized_concepts = list() annotated_concepts = list() with open(recognized_concept_file) as f: recognized_concepts = f.readlines() with open(annotated_concept_file) as f: annotated_concepts = f.readlines() # if len(recognized_concepts) != len(annotated_concepts): # print "something wrong in the code!!!" # return 0 edit_distance = list() for i in range(len(recognized_concepts)): a1 = recognized_concepts[i].strip(' \n').split(' ') a2 = annotated_concepts[i].strip().split(' ') if a1[0] != '': distance = lev.levenshtein2(a1, a2) edit_distance.append(distance) else: print 'else running' edit_distance.append(-1) return edit_distance
def guess_simple(data, word, limit=-1, maxdist=1): """ Another way to step over dict. Args: limit (int): the max number of items to return. We don't use it here. maxdist (int): the max allowed edit distance. We only use that. """ output = [] for w, curves in data.items(): d = levenshtein.levenshtein2(word, w) if d <= maxdist: for c in curves: if not c['mnemonic']: continue result = {"distance": d, "mnemonic": w, "curve": c} output.append(result) key = word + '-' + 'simple' + '-' + str(limit) + str(maxdist) memcache.set(key, output) return output
def guess_simple3(data,word,lim): words = [] distances = [] smallest = 100 for w in data: distance = levenshtein.levenshtein2(word,w) if distance <= smallest: words.insert(0,w) distances.insert(0,distance) smallest = distance else: words.append(str(w)) distances.append(distance) output = {} for i in range(lim): output[words[i]] = data[words[i]] i += 1 key = word + '-' + 'simple' + '-' + str(lim) memcache.set(key,output) return output
def guess_simple3(data, word, lim): words = [] distances = [] smallest = 100 for w in data: distance = levenshtein.levenshtein2(word, w) if distance <= smallest: words.insert(0, w) distances.insert(0, distance) smallest = distance else: words.append(str(w)) distances.append(distance) output = {} for i in range(lim): output[words[i]] = data[words[i]] i += 1 key = word + '-' + 'simple' + '-' + str(lim) memcache.set(key, output) return output