Exemple #1
0
def memoized_leven_dist(s1, s2):
    memory = lev_dist_cache[s1][s2]
    if not (memory is None):
        return memory
    else:
        output = leven_dist(s1, s2)
        lev_dist_cache[s1][s2] = output
        return output
def find_station(stat_n):
	# finds the bike station using the Levenshtein distance if necessary
	if stat_n in stations_hs:
		return stations_hs[stat_n]
	elif stat_n.strip() in stations_hs:
		return stations_hs[stat_n.strip()]
	elif stat_n in aliases:
		return stations_hs[aliases[stat_n]]
	else:
		print 'couldn\'t find "%s"' % stat_n
		return min(stations_hs.values(), key=lambda x: leven_dist(x, stat_n))
Exemple #3
0
def find_station(stat_n):
    # finds the bike station using the Levenshtein distance if necessary
    if stat_n in stations_hs:
        return stations_hs[stat_n]
    elif stat_n.strip() in stations_hs:
        return stations_hs[stat_n.strip()]
    elif stat_n in aliases:
        return stations_hs[aliases[stat_n]]
    else:
        print 'couldn\'t find "%s"' % stat_n
        return min(stations_hs.values(), key=lambda x: leven_dist(x, stat_n))
Exemple #4
0
def levenshtein_multi_char_inserts(s_target, s_list):
    # Stores the action that resulted in the best score at a location in the string
    dp_memo = [{'cost': 0, 'from': None, 'via': None} for _ in s_target]

    for s_index, entry in enumerate(dp_memo):
        if s_index == 0:
            current_cost = 0
        else:
            current_cost = dp_memo[s_index - 1]['cost']
        for s in s_list:
            upperbound = int(min(len(s) * 1.5, len(s_target) - s_index))

            for i in xrange(1, upperbound + 1):
                substring_target = s_target[s_index:s_index + i]
                s_cost = current_cost + 1 + leven_dist(s, substring_target)

                if dp_memo[s_index + i -
                           1]['from'] is None or dp_memo[s_index + i -
                                                         1]['cost'] > s_cost:
                    dp_memo[s_index + i - 1]['cost'] = s_cost
                    dp_memo[s_index + i - 1]['from'] = s_index - 1
                    dp_memo[s_index + i - 1]['via'] = s

    return dp_memo
Exemple #5
0
def find_station_sub(stat_n):
    # finds the bike station using the Levenshtein distance if necessary
    if stat_n in subway_stats_name:
        return subway_stats_name[stat_n]
    else:
        return min(subway_stats.values(), key=lambda x: leven_dist(x, stat_n))
Exemple #6
0
def get_second_structural_distance(p1, p2):
    w1 = p1.get_str_representation()
    w2 = p2.get_str_representation()
    return leven_dist(w1, w2)
def find_station_sub(stat_n):
	# finds the bike station using the Levenshtein distance if necessary
	if stat_n in subway_stats_name:
		return subway_stats_name[stat_n]
	else:
		return min(subway_stats.values(), key=lambda x: leven_dist(x, stat_n))