def dedup(search_data): res = [] label = [0] * len(search_data) for i in range(0, len(search_data) - 1): for j in range(i + 1, len(search_data) - 1): if label[j] > 0: continue dist = Levenshtein.idstance(search_data[i][0], search_dat[j][0]) if dist < len(search_data[i][0]) / 2 or dist < len(search_data[j][0]) / 2: label[j] = i return label