def memoized_leven_dist(s1, s2): memory = lev_dist_cache[s1][s2] if not (memory is None): return memory else: output = leven_dist(s1, s2) lev_dist_cache[s1][s2] = output return output
def find_station(stat_n): # finds the bike station using the Levenshtein distance if necessary if stat_n in stations_hs: return stations_hs[stat_n] elif stat_n.strip() in stations_hs: return stations_hs[stat_n.strip()] elif stat_n in aliases: return stations_hs[aliases[stat_n]] else: print 'couldn\'t find "%s"' % stat_n return min(stations_hs.values(), key=lambda x: leven_dist(x, stat_n))
def levenshtein_multi_char_inserts(s_target, s_list): # Stores the action that resulted in the best score at a location in the string dp_memo = [{'cost': 0, 'from': None, 'via': None} for _ in s_target] for s_index, entry in enumerate(dp_memo): if s_index == 0: current_cost = 0 else: current_cost = dp_memo[s_index - 1]['cost'] for s in s_list: upperbound = int(min(len(s) * 1.5, len(s_target) - s_index)) for i in xrange(1, upperbound + 1): substring_target = s_target[s_index:s_index + i] s_cost = current_cost + 1 + leven_dist(s, substring_target) if dp_memo[s_index + i - 1]['from'] is None or dp_memo[s_index + i - 1]['cost'] > s_cost: dp_memo[s_index + i - 1]['cost'] = s_cost dp_memo[s_index + i - 1]['from'] = s_index - 1 dp_memo[s_index + i - 1]['via'] = s return dp_memo
def find_station_sub(stat_n): # finds the bike station using the Levenshtein distance if necessary if stat_n in subway_stats_name: return subway_stats_name[stat_n] else: return min(subway_stats.values(), key=lambda x: leven_dist(x, stat_n))
def get_second_structural_distance(p1, p2): w1 = p1.get_str_representation() w2 = p2.get_str_representation() return leven_dist(w1, w2)