Ejemplo n.º 1
0
def main():
    global args

    # create the random dataset
    data = generate_data(args.count, args.dim)
    queries = generate_data(args.num_queries, args.dim)

    # create and fill the LSH table.
    t = nr_lsh.lsh_table(bits=17, dim=args.dim, num_buckets=args.count)
    t.fill(data, False)

    # "multiprobe" query the table.
    for query in queries:
        # find the nearset neighbor and its distance
        nn, dist = exact_neighbor(query, data)
        # the probe returns an option of KV, so it is MAYBE the
        # neighbor and its id; but, it could be None.
        maybe_neighbor, stats = t.probe(query, args.adj)
        approx_dist1 = np.linalg.norm(maybe_neighbor[0] - query)

        maybe_neighbor, stats = t.probe_approx(query, 2.0, args.adj)
        if (maybe_neighbor):
            approx_dist2 = np.linalg.norm(maybe_neighbor[0] - query)
        else:
            approx_dist2 = 0

        print('{0:.2f} {1:.2f} {2:.2f}'.format(approx_dist1, approx_dist2,
                                               dist))
def main():
    global args

    # create the random dataset
    data = generate_data(args.count, args.dim)
    queries = generate_data(args.num_queries, args.dim)

    # create and fill the LSH table.
    t = nr_lsh.lsh_table(num_tables=20,
                         bits=32,
                         dim=args.dim,
                         num_buckets=args.count)
    t.fill(data, False)

    # track the total time to perform all queries.
    exact_total_time = 0
    probe_total_time = 0
    probe_approx_total_time = 0

    # "multiprobe" query the table.
    for query in queries:
        # find the nearset neighbor and its distance
        start = time.time()
        nn, dist = exact_neighbor(query, data)
        end = time.time()
        exact_total_time += (end - start)
        # probe the table and record the time.
        start = time.time()
        # the probe returns an option of KV, so it is MAYBE the
        # neighbor and its id; but, it could be None.
        maybe_neighbor, stats = t.probe(query, args.adj)
        end = time.time()
        probe_total_time += (end - start)

        approx_dist1 = np.linalg.norm(maybe_neighbor[0] - query)

        start = time.time()
        maybe_neighbor, stats = t.probe_approx(query, 2.0, args.adj)
        end = time.time()
        probe_approx_total_time += (end - start)
        if (maybe_neighbor):
            approx_dist2 = np.linalg.norm(maybe_neighbor[0] - query)
        else:
            approx_dist2 = 0

        print('{0:.2f} {1:.2f} {2:.2f}'.format(approx_dist1, approx_dist2,
                                               dist))
    print('Exact Average Time Per Query: ' +
          str(exact_total_time / args.num_queries))
    print('Probe Average Time Per Query: ' +
          str(probe_total_time / args.num_queries))
    print('Probe Approx Average Time Per Query: ' +
          str(probe_approx_total_time / args.num_queries))
Ejemplo n.º 3
0
def main():
    global args

    # create the random dataset
    data = generate_data(args.count, args.dim)
    queries = generate_data(args.num_queries, args.dim)

    # create and fill the LSH table.
    mean_recalls = []
    mean_times = []

    for num_tables in range(5, 106, 20):
        #bits, num_tables = nr_lsh.sizes_from_probs(args.count, p1, p2)
        t = nr_lsh.lsh_table(num_tables=num_tables,
                             bits=32,
                             dim=args.dim,
                             num_buckets=args.count)
        t.fill(data, False)

        k_probe_recall = []
        time_per_query = []

        for query in queries:
            # find the true topk nearset neighbor and their distances
            # get indices of the true topk to compute recall.
            indices, nns, dists = exact_k_neighbor(args.k, query, data)

            # probe the table and record the time.
            # the probe returns an option of KV, so it is MAYBE the
            # neighbor and its id; but, it could be None.
            start = time.time()
            maybe_neighbors, stats = t.k_probe(args.k, query, args.adj)
            end = time.time()
            time_per_query.append(end - start)

            # compute revall if no neighbors were found, recall is 0
            if maybe_neighbors is None:
                rec1 = 0
            else:
                neighbors = list(list(zip(*maybe_neighbors))[1])
                rec1 = recall(neighbors, indices)
            k_probe_recall.append(rec1)

        mean_recalls.append(sum(k_probe_recall) / len(k_probe_recall))
        mean_times.append(sum(time_per_query) / len(time_per_query))

    plt.plot(mean_times, mean_recalls)
    plt.show()
Ejemplo n.º 4
0
def main():
    global args

    # create the random dataset
    data = generate_data(args.count, args.dim)
    queries = generate_data(args.num_queries, args.dim)

    # create and fill the LSH table.
    t = nr_lsh.lsh_table(bits=args.bits,
                         dim=args.dim,
                         num_buckets=2**args.bits)
    t.fill(data, False)

    # lists to track the recalls of each type of probe.
    k_probe_recall = []
    k_probe_approx_recall = []

    # "multiprobe" query the table.
    for query in queries:
        # find the true neighbors, indices in the dataset, and their distances
        indices, nns, dists = exact_k_neighbor(args.k, query, data)

        # the probe returns an option of KV, so it is MAYBE the
        # neighbor and its id; but, it could be None.
        maybe_neighbors, stats = t.k_probe(args.k, query, args.adj)
        neighbors = list(list(zip(*maybe_neighbors))[1])
        rec1 = recall(neighbors, indices)
        k_probe_recall.append(rec1)

        maybe_neighbors, stats = t.k_probe_approx(args.k, query, 0.05,
                                                  args.adj)
        if (maybe_neighbors):
            neighbors = list(list(zip(*maybe_neighbors))[1])
            rec2 = recall(neighbors, indices)
        else:
            rec2 = 0
        k_probe_approx_recall.append(rec2)

        if args.v:
            print('{0:.2f} {1:.2f}'.format(rec1, rec2))

    print('mean recalls:')
    print(' * k_probe:        {0:.2f}'.format(
        sum(k_probe_recall) / len(k_probe_recall)))

    print(' * k_probe_approx: {0:.2f}'.format(
        sum(k_probe_approx_recall) / len(k_probe_approx_recall)))
def main():
    global args

    # create the random dataset
    data = generate_data(args.count, args.dim)
    queries = generate_data(args.num_queries, args.dim)

    # create and fill the LSH table.
    t = nr_lsh.lsh_table(num_tables=50,
                         bits=128,
                         dim=args.dim,
                         num_buckets=args.count)
    t.fill(data, False)

    k_probe_recall = []
    k_probe_approx_recall = []

    # track the total time to perform all queries.
    exact_total_time = 0
    probe_total_time = 0
    probe_approx_total_time = 0

    # "multiprobe" query the table.
    for query in queries:
        # find the true topk nearset neighbor and their distances
        # get indices of the true topk to compute recall.
        start = time.time()
        indices, nns, dists = exact_k_neighbor(args.k, query, data)
        end = time.time()
        exact_total_time += (end - start)

        # probe the table and record the time.
        start = time.time()
        # the probe returns an option of KV, so it is MAYBE the
        # neighbor and its id; but, it could be None.
        maybe_neighbors, stats = t.k_probe(args.k, query, args.adj)
        end = time.time()
        probe_total_time += (end - start)

        # compute revall if no neighbors were found, recall is 0
        if maybe_neighbors is None:
            rec1 = 0
        else:
            neighbors = list(list(zip(*maybe_neighbors))[1])
            rec1 = recall(neighbors, indices)
        k_probe_recall.append(rec1)

        # find the first k items within distance c from the query
        start = time.time()
        maybe_neighbors, stats = t.k_probe_approx(args.k, query, 2.0, args.adj)
        end = time.time()
        probe_approx_total_time += (end - start)

        # compute the recall. again, if no neighbors found, recall is 0.
        if maybe_neighbors is None:
            rec2 = 0
        else:
            neighbors = list(list(zip(*maybe_neighbors))[1])
            rec2 = recall(neighbors, indices)
        k_probe_approx_recall.append(rec2)

    print('Exact Average Time Per Query: ' +
          str(exact_total_time / args.num_queries))
    print('Probe Average Time Per Query: ' +
          str(probe_total_time / args.num_queries))
    print('Probe Average Time Per Query: ' +
          str(probe_approx_total_time / args.num_queries))

    print('mean recalls:')
    print(' * k_probe:        {0:.2f}'.format(
        sum(k_probe_recall) / len(k_probe_recall)))
    print(' * k_probe_appeox:        {0:.2f}'.format(
        sum(k_probe_approx_recall) / len(k_probe_approx_recall)))