def main(): global args # create the random dataset data = generate_data(args.count, args.dim) queries = generate_data(args.num_queries, args.dim) # create and fill the LSH table. t = nr_lsh.lsh_table(bits=17, dim=args.dim, num_buckets=args.count) t.fill(data, False) # "multiprobe" query the table. for query in queries: # find the nearset neighbor and its distance nn, dist = exact_neighbor(query, data) # the probe returns an option of KV, so it is MAYBE the # neighbor and its id; but, it could be None. maybe_neighbor, stats = t.probe(query, args.adj) approx_dist1 = np.linalg.norm(maybe_neighbor[0] - query) maybe_neighbor, stats = t.probe_approx(query, 2.0, args.adj) if (maybe_neighbor): approx_dist2 = np.linalg.norm(maybe_neighbor[0] - query) else: approx_dist2 = 0 print('{0:.2f} {1:.2f} {2:.2f}'.format(approx_dist1, approx_dist2, dist))
def main(): global args # create the random dataset data = generate_data(args.count, args.dim) queries = generate_data(args.num_queries, args.dim) # create and fill the LSH table. t = nr_lsh.lsh_table(num_tables=20, bits=32, dim=args.dim, num_buckets=args.count) t.fill(data, False) # track the total time to perform all queries. exact_total_time = 0 probe_total_time = 0 probe_approx_total_time = 0 # "multiprobe" query the table. for query in queries: # find the nearset neighbor and its distance start = time.time() nn, dist = exact_neighbor(query, data) end = time.time() exact_total_time += (end - start) # probe the table and record the time. start = time.time() # the probe returns an option of KV, so it is MAYBE the # neighbor and its id; but, it could be None. maybe_neighbor, stats = t.probe(query, args.adj) end = time.time() probe_total_time += (end - start) approx_dist1 = np.linalg.norm(maybe_neighbor[0] - query) start = time.time() maybe_neighbor, stats = t.probe_approx(query, 2.0, args.adj) end = time.time() probe_approx_total_time += (end - start) if (maybe_neighbor): approx_dist2 = np.linalg.norm(maybe_neighbor[0] - query) else: approx_dist2 = 0 print('{0:.2f} {1:.2f} {2:.2f}'.format(approx_dist1, approx_dist2, dist)) print('Exact Average Time Per Query: ' + str(exact_total_time / args.num_queries)) print('Probe Average Time Per Query: ' + str(probe_total_time / args.num_queries)) print('Probe Approx Average Time Per Query: ' + str(probe_approx_total_time / args.num_queries))
def main(): global args # create the random dataset data = generate_data(args.count, args.dim) queries = generate_data(args.num_queries, args.dim) # create and fill the LSH table. mean_recalls = [] mean_times = [] for num_tables in range(5, 106, 20): #bits, num_tables = nr_lsh.sizes_from_probs(args.count, p1, p2) t = nr_lsh.lsh_table(num_tables=num_tables, bits=32, dim=args.dim, num_buckets=args.count) t.fill(data, False) k_probe_recall = [] time_per_query = [] for query in queries: # find the true topk nearset neighbor and their distances # get indices of the true topk to compute recall. indices, nns, dists = exact_k_neighbor(args.k, query, data) # probe the table and record the time. # the probe returns an option of KV, so it is MAYBE the # neighbor and its id; but, it could be None. start = time.time() maybe_neighbors, stats = t.k_probe(args.k, query, args.adj) end = time.time() time_per_query.append(end - start) # compute revall if no neighbors were found, recall is 0 if maybe_neighbors is None: rec1 = 0 else: neighbors = list(list(zip(*maybe_neighbors))[1]) rec1 = recall(neighbors, indices) k_probe_recall.append(rec1) mean_recalls.append(sum(k_probe_recall) / len(k_probe_recall)) mean_times.append(sum(time_per_query) / len(time_per_query)) plt.plot(mean_times, mean_recalls) plt.show()
def main(): global args # create the random dataset data = generate_data(args.count, args.dim) queries = generate_data(args.num_queries, args.dim) # create and fill the LSH table. t = nr_lsh.lsh_table(bits=args.bits, dim=args.dim, num_buckets=2**args.bits) t.fill(data, False) # lists to track the recalls of each type of probe. k_probe_recall = [] k_probe_approx_recall = [] # "multiprobe" query the table. for query in queries: # find the true neighbors, indices in the dataset, and their distances indices, nns, dists = exact_k_neighbor(args.k, query, data) # the probe returns an option of KV, so it is MAYBE the # neighbor and its id; but, it could be None. maybe_neighbors, stats = t.k_probe(args.k, query, args.adj) neighbors = list(list(zip(*maybe_neighbors))[1]) rec1 = recall(neighbors, indices) k_probe_recall.append(rec1) maybe_neighbors, stats = t.k_probe_approx(args.k, query, 0.05, args.adj) if (maybe_neighbors): neighbors = list(list(zip(*maybe_neighbors))[1]) rec2 = recall(neighbors, indices) else: rec2 = 0 k_probe_approx_recall.append(rec2) if args.v: print('{0:.2f} {1:.2f}'.format(rec1, rec2)) print('mean recalls:') print(' * k_probe: {0:.2f}'.format( sum(k_probe_recall) / len(k_probe_recall))) print(' * k_probe_approx: {0:.2f}'.format( sum(k_probe_approx_recall) / len(k_probe_approx_recall)))
def main(): global args # create the random dataset data = generate_data(args.count, args.dim) queries = generate_data(args.num_queries, args.dim) # create and fill the LSH table. t = nr_lsh.lsh_table(num_tables=50, bits=128, dim=args.dim, num_buckets=args.count) t.fill(data, False) k_probe_recall = [] k_probe_approx_recall = [] # track the total time to perform all queries. exact_total_time = 0 probe_total_time = 0 probe_approx_total_time = 0 # "multiprobe" query the table. for query in queries: # find the true topk nearset neighbor and their distances # get indices of the true topk to compute recall. start = time.time() indices, nns, dists = exact_k_neighbor(args.k, query, data) end = time.time() exact_total_time += (end - start) # probe the table and record the time. start = time.time() # the probe returns an option of KV, so it is MAYBE the # neighbor and its id; but, it could be None. maybe_neighbors, stats = t.k_probe(args.k, query, args.adj) end = time.time() probe_total_time += (end - start) # compute revall if no neighbors were found, recall is 0 if maybe_neighbors is None: rec1 = 0 else: neighbors = list(list(zip(*maybe_neighbors))[1]) rec1 = recall(neighbors, indices) k_probe_recall.append(rec1) # find the first k items within distance c from the query start = time.time() maybe_neighbors, stats = t.k_probe_approx(args.k, query, 2.0, args.adj) end = time.time() probe_approx_total_time += (end - start) # compute the recall. again, if no neighbors found, recall is 0. if maybe_neighbors is None: rec2 = 0 else: neighbors = list(list(zip(*maybe_neighbors))[1]) rec2 = recall(neighbors, indices) k_probe_approx_recall.append(rec2) print('Exact Average Time Per Query: ' + str(exact_total_time / args.num_queries)) print('Probe Average Time Per Query: ' + str(probe_total_time / args.num_queries)) print('Probe Average Time Per Query: ' + str(probe_approx_total_time / args.num_queries)) print('mean recalls:') print(' * k_probe: {0:.2f}'.format( sum(k_probe_recall) / len(k_probe_recall))) print(' * k_probe_appeox: {0:.2f}'.format( sum(k_probe_approx_recall) / len(k_probe_approx_recall)))