import timeit from bktree import BKTree business_dictionary = [a.strip() for a in open('business-names.txt')] tree = BKTree(sanitize=True) tree.add(business_dictionary) setup = """ from bktree import BKTree business_dictionary = [a.strip() for a in open('business-names.txt')] tree = BKTree(sanitize=True) tree.add(business_dictionary) """ def test_word(word, radius): perf = timeit.timeit(f'tree.search("{word}", {radius})', number=100, setup=setup) print(f'Performance of tree.search("{word}", {radius}) = {perf}') print(tree.search(f"{word}", 1)) if __name__ == "__main__": for w, r in [ ('walmart', 1), ('walmartt', 1), ('walmarttt', 2), ('walllrt', 2),
str1 += "v" + str(vertex) features[i][str1] = qualityEdges[i][j] values = [build_by_features(features[i]) for i in range(l)] valuesRevDict = {} for i in range(len(values)): if values[i] in valuesRevDict: valuesRevDict[values[i]].append(i) else: valuesRevDict[values[i]] = [i] tree = BKTree() for value in values: tree.add(value) # for i in range(l): # for j in range(i+1,l): # score = computeScore(values[i], values[j]) # print str(graphs[i]["label"]) + " " + str(graphs[j]["label"]) + " " + str(score) for i in range(l): closest_pairs = tree.find(values[i], MAX_DISTANCE) final_pairs = [] for pair in closest_pairs: a, b = pair a = 1 - float(a) / F b = valuesRevDict[b] final_pairs.append((a, b)) print str(i) + " " + str(final_pairs)