Beispiel #1
0
def top_k_test(type="annoy"):
    logger = myutil.getLogger("try.log")

    if type == "annoy":
        model = AnnoyIndex(300)
        model.load('checkpoint/annoy.pk')
    elif type == "kd_tree":
        with open("checkpoint/kd_tree.pk", "rb") as file:
            model = joblib.load(file)
    elif type == "ball_tree":
        with open("checkpoint/ball_tree.pk", "rb") as file:
            model = joblib.load(file)

    max_num = 0.0
    min_num = 1000.0
    sum_num = 0.0
    count = 0
    for i in range(100000):
        vec = numpy.random.uniform(-1, 1, size=300)
        start = time.time()
        if type == "annoy":
            # 通过第几个item查询:get_nns_by_item  通过向量查询:get_nns_by_vector
            words, dis = model.get_nns_by_vector(vec,
                                                 100,
                                                 include_distances=True)
            # for id in words:
            #     print(id)
        else:
            dis, ind = model.query([vec], k=100)
            # for j in range(len(ind[0])):
            #     print(ind[0][j], dis[0][j])
        stop = time.time()

        # 更新
        run_time = float(stop - start)
        sum_num += run_time
        count += 1
        if run_time > max_num:
            max_num = run_time
        if run_time < min_num:
            min_num = run_time

    logger.info("%s, max: %f, min: %f, avg: %f, count: %f" %
                (type, max_num, min_num, (sum_num / count), count))