def test_dl(self): self.assertEqual(dam_lev('', ''), 0) self.assertEqual(dam_lev('', 'a'), 1) self.assertEqual(dam_lev('a', ''), 1) self.assertEqual(dam_lev('a', 'b'), 1) self.assertEqual(dam_lev('a', 'ab'), 1) self.assertEqual(dam_lev('ab', 'ba'), 1) self.assertEqual(dam_lev('ab', 'bca'), 2) self.assertEqual(dam_lev('bca', 'ab'), 2) self.assertEqual(dam_lev('ab', 'bdca'), 3) self.assertEqual(dam_lev('bdca', 'ab'), 3)
def data_print(answer, recWord): substitute_costs = np.ones((128, 128), dtype=np.float64) for i in answer: print("The word may be : {} the change in char is of : {} digits". format( i, int(dam_lev(recWord, i, substitute_costs=substitute_costs))))
def weighted_ed_rel(self, cand, token, del_costs, ins_costs, sub_costs, trans_costs): w_editdist = dam_lev(token, cand, delete_costs=del_costs, insert_costs=ins_costs, substitute_costs=sub_costs, transpose_costs=trans_costs) rel_w_editdist = w_editdist / len(token) return rel_w_editdist
def __score(self, wrong_word, candidate): dl_dist = dam_lev(wrong_word, candidate, insert_costs=self.insert_costs, substitute_costs=self.substitute_costs, delete_costs=self.delete_costs, transpose_costs=self.transpose_costs) / \ max(len(wrong_word), len(candidate)) log_prior = self.priors[candidate] if candidate in self.priors \ else math.log(float(self.k) / self.N) return -dl_dist + self.lamda * log_prior
def edit_distance(edp: EditDistanceParams, a: str, b: str, error: bool) -> np.float64: """Damerau-Levenshtein edit distance between two pixel strings.""" res = weighted_levenshtein.dam_lev( a, b, insert_costs=edp.insert_costs, delete_costs=edp.delete_costs, substitute_costs=(edp.error_substitute_costs if error else edp.substitute_costs), ) # Make sure result can fit in a uint16 assert (0 <= res < 2**16), res return res
count = 0 # read wiki misspell file line by line # for line in f_mis: while count < 4453: string = f_mis_array[count].strip() bestv = 10000000 bests = "" smallest_list = [] # compare with dict lines for num in range(len(string) - 2, len(string) + 3): for word in f_len_dict_array[str(num)]: # for entry in my_dict: # string is lines in target doc # entry is lines in dict thisv = dam_lev(string, word.strip(), delete_costs=higher_costs_1) # thisv = editdistance.eval(string, word.strip()) if (thisv > 0 and thisv < bestv): bestv = thisv bests = word.strip() smallest_list = [bests] elif (thisv == bestv): smallest_list.append(word.strip()) print({ "result_s": smallest_list, "distance": bestv, "match": f_cor_array[count].strip() in smallest_list, "original": f_cor_array[count].strip() })
# RG3 to RG4 # RG3 to RG5 # RG4 to RG5 Root = ET.Element("xml") f = open('spineData-ascii.txt') f.readline() # read and ignore the first line for line in f: # iterate over the remaining lines v = line.split('\t') app = v[0] rg1_2 = v[1] + '::' + v[3] if '#fMS' in rg1_2: print('#fMS is here') transpose_costs = np.full((128, 128), 0.25, dtype=np.float64) dist1_2 = dam_lev(v[2], v[4], transpose_costs=transpose_costs) else: transpose_costs = np.ones((128, 128), dtype=np.float64) dist1_2 = dam_lev(v[2], v[4]) rg1_3 = v[1] + '::' + v[5] if '#fMS' in rg1_3: print('#fMS is here') transpose_costs = np.full((128, 128), 0.25, dtype=np.float64) dist1_3 = dam_lev(v[2], v[6], transpose_costs=transpose_costs) else: transpose_costs = np.ones((128, 128), dtype=np.float64) dist1_3 = dam_lev(v[2], v[6]) rg1_4 = v[1] + '::' + v[7] if '#fMS' in rg1_4:
count = 0 # read wiki misspell file line by line # for line in f_mis: while count < 4453: string = f_mis_array[count].strip() bestv = 10000000 bests = "" smallest_list = [] # compare with dict lines for num in range(len(string) - 2, len(string) + 3): for word in f_len_dict_array[str(num)]: # for entry in my_dict: # string is lines in target doc # entry is lines in dict thisv = dam_lev(string, word.strip(), insert_costs=higher_costs_1) # thisv = editdistance.eval(string, word.strip()) if (thisv > 0 and thisv < bestv): bestv = thisv bests = word.strip() smallest_list = [bests] elif (thisv == bestv): smallest_list.append(word.strip()) print({ "result_s": smallest_list, "distance": bestv, "match": f_cor_array[count].strip() in smallest_list, "original": f_cor_array[count].strip() })
def _dl(self, x, y): return dam_lev(x, y, self.iw, self.dw, self.sw, self.tw)
str_build_orders, open("data/build_orders/TvZ_build_orders_{}_{}.p".format(n, l), "wb")) pickle.dump(mmrs, open("data/mmr/TvZ_mmr_{}.p".format(n), "wb")) pickle.dump(player_ids, open("data/player_ids/TvZ_player_ids_{}.p".format(n), "wb")) i = 0 for y in range(n): print(i, "/", n) for x in range(n): str_build_orders[x] = str_build_orders[x] str_build_orders[y] = str_build_orders[y] if y == x: distance = 0 else: distance = dam_lev(str1=str_build_orders[x], str2=str_build_orders[y], insert_costs=insert_costs, delete_costs=delete_costs, substitute_costs=substitute_costs, transpose_costs=transpose_costs) distance = int(distance) D[y][x] = distance D[x][y] = distance #print(str_build_orders[x]) #print(str_build_orders[y]) #print(distance) i += 1 print("Storing distance matrix") pickle.dump(D, open("data/distance_matrix/TvZ_{}_{}.p".format(n, l), "wb"))
count = 2779 # read wiki misspell file line by line # for line in f_mis: while count < 4453: string = f_mis_array[count].strip() bestv = 10000000 bests = "" smallest_list = [] # compare with dict lines for num in range(len(string)-2, len(string)+3): for word in f_len_dict_array[str(num)]: # for entry in my_dict: # string is lines in target doc # entry is lines in dict thisv = dam_lev(string, word.strip(), transpose_costs=higher_costs_2, insert_costs=higher_costs_1) # thisv = editdistance.eval(string, word.strip()) if (thisv > 0 and thisv < bestv): bestv = thisv bests = word.strip() smallest_list = [bests] elif (thisv == bestv): smallest_list.append(word.strip()) print({"result_s": smallest_list, "distance": bestv, "match": f_cor_array[count].strip() in smallest_list, "original": f_cor_array[count].strip()}) # save to res dict res_dict[string] = {"result_s": smallest_list, "distance": bestv, "match": f_cor_array[count].strip() in smallest_list, "original": f_cor_array[count].strip()} count += 1 print("count",count)