Exemple #1
0
def guess_fuzzy(data, word, limit=1, maxdist=1):
    """
    Fuzzywuzzy guess

    Args:
        limit (int): the max number of items to return.
        maxdist (int): the max allowed edit distance.
    """
    hits = process.extract(word, data.keys(), limit=limit)

    output = []
    for w, s in hits:
        d = levenshtein.levenshtein2(word, w)
        if d > maxdist:
            continue
        curves = data[w]
        for c in curves:
            result = {"distance": d,
                      "score": s,
                      "mnemonic": w,
                      "curve": c}
            output.append(result)
    key = word + '-' + 'fuzzy' + '-' + str(limit) + str(maxdist)
    memcache.set(key, output)

    return output
def analyse_edit_distance(recognized_concept_file, annotated_concept_file):
    recognized_concepts = list()
    annotated_concepts = list()

    with open(recognized_concept_file) as f:
        recognized_concepts = f.readlines()

    with open(annotated_concept_file) as f:
        annotated_concepts = f.readlines()

    # if len(recognized_concepts) != len(annotated_concepts):
    #     print "something wrong in the code!!!"
    #     return 0

    edit_distance = list()
    for i in range(len(recognized_concepts)):
        a1 = recognized_concepts[i].strip(' \n').split(' ')
        a2 = annotated_concepts[i].strip().split(' ')

        if a1[0] != '':
            distance = lev.levenshtein2(a1, a2)
            edit_distance.append(distance)
        else:
            print 'else running'
            edit_distance.append(-1)

    return edit_distance
Exemple #3
0
def guess_simple(data, word, limit=-1, maxdist=1):
    """
    Another way to step over dict.

    Args:
        limit (int): the max number of items to return. We don't use it here.
        maxdist (int): the max allowed edit distance. We only use that.
    """
    output = []
    for w, curves in data.items():
        d = levenshtein.levenshtein2(word, w)
        if d <= maxdist:
            for c in curves:
                if not c['mnemonic']: continue
                result = {"distance": d, "mnemonic": w, "curve": c}
                output.append(result)


    key = word + '-' + 'simple' + '-' + str(limit) + str(maxdist)
    memcache.set(key, output)

    return output
Exemple #4
0
def guess_simple3(data,word,lim):
    words = []
    distances = []
    smallest = 100
    for w in data:
        distance = levenshtein.levenshtein2(word,w)
        if distance <= smallest:
            words.insert(0,w)
            distances.insert(0,distance)
            smallest = distance
        else:
            words.append(str(w))
            distances.append(distance)

    output = {}
    for i in range(lim):
        output[words[i]] = data[words[i]]
        i += 1

    key = word + '-' + 'simple' + '-' + str(lim) 
    memcache.set(key,output)

    return output
Exemple #5
0
def guess_simple3(data, word, lim):
    words = []
    distances = []
    smallest = 100
    for w in data:
        distance = levenshtein.levenshtein2(word, w)
        if distance <= smallest:
            words.insert(0, w)
            distances.insert(0, distance)
            smallest = distance
        else:
            words.append(str(w))
            distances.append(distance)

    output = {}
    for i in range(lim):
        output[words[i]] = data[words[i]]
        i += 1

    key = word + '-' + 'simple' + '-' + str(lim)
    memcache.set(key, output)

    return output