def top_k_test(type="annoy"): logger = myutil.getLogger("try.log") if type == "annoy": model = AnnoyIndex(300) model.load('checkpoint/annoy.pk') elif type == "kd_tree": with open("checkpoint/kd_tree.pk", "rb") as file: model = joblib.load(file) elif type == "ball_tree": with open("checkpoint/ball_tree.pk", "rb") as file: model = joblib.load(file) max_num = 0.0 min_num = 1000.0 sum_num = 0.0 count = 0 for i in range(100000): vec = numpy.random.uniform(-1, 1, size=300) start = time.time() if type == "annoy": # 通过第几个item查询:get_nns_by_item 通过向量查询:get_nns_by_vector words, dis = model.get_nns_by_vector(vec, 100, include_distances=True) # for id in words: # print(id) else: dis, ind = model.query([vec], k=100) # for j in range(len(ind[0])): # print(ind[0][j], dis[0][j]) stop = time.time() # 更新 run_time = float(stop - start) sum_num += run_time count += 1 if run_time > max_num: max_num = run_time if run_time < min_num: min_num = run_time logger.info("%s, max: %f, min: %f, avg: %f, count: %f" % (type, max_num, min_num, (sum_num / count), count))