def main(): n = 50000 max_dim = 45 dims = np.arange(2, max_dim + 1, step=2) # dims = [2,5,10,15,20,25,30,35,40,45] reps = 50 m = 0.4 p = 0.3 k = 10 page_size = 8000 num_bytes = 8 dim_percent = {d: [] for d in dims} for dim in dims: print('testing dimension:', dim) M = np.floor(page_size / (2 * dim * num_bytes + num_bytes)).astype(int) tree = RTree(M, dim, m, p) #data = np.random.rand(n,dim) data = np.loadtxt('../data/fv/fv' + str(dim) + 'd.txt') data = data[:n] #print(data.shape) tree.insert_batch(data, np.arange(n)) for i in np.arange(reps): #q = np.random.rand(dim) q_idx = np.random.randint(0, n) q = data[q_idx, :] rtree_nns, num_visited_leaves = tree.knn_naive(q, k) # true_nns, true_dists = knn(data, q, k) # rtree_nns = [x.rid for x in rtree_nns] # print('rtree nns:', rtree_nns) # print('true nns:', true_nns) leaves = tree.get_leaves() total_leaves = len(leaves) dim_percent[dim].append(num_visited_leaves / total_leaves) print('dim:', dim, '%:', np.mean(dim_percent[dim]) * 100) dim_percent = {k: np.mean(v) for k, v in dim_percent.items()} percents = [dim_percent[d] for d in dim_percent.keys()] out = np.vstack([dims, percents]).T np.savetxt('data/rtree_real.csv', out, delimiter=',')
def main(): n = 50000 max_dim = 30 dims = np.arange(2,max_dim+1, step=2) #dims = [2,3,4,5,10,15] reps = 50 m = 0.4 p = 0.3 k = 10 page_size = 8000 num_bytes = 8 dim_percent = {d:[] for d in dims} for dim in dims: print('testing dimension:', dim) M = np.floor(page_size/(2*dim*num_bytes + num_bytes)).astype(int) tree = RTree(M, dim, m, p) data = np.random.rand(n,dim) tree.insert_batch(data, np.arange(n)) for i in np.arange(reps): q_idx = np.random.randint(0,n) q = data[q_idx,:] _, num_visited_leaves = tree.knn_naive(q,k) leaves = tree.get_leaves() total_leaves = len(leaves) dim_percent[dim].append(num_visited_leaves/total_leaves) print('dim:', dim, '%:', np.mean(dim_percent[dim])*100) dim_percent = {k:np.mean(v) for k,v in dim_percent.items()} percents = [dim_percent[d] for d in dim_percent.keys()] out = np.vstack([dims, percents]).T np.savetxt('data/rtree_synthetic.csv', out, delimiter=',')
def main(): n = 50000 # 3 to 45 dimensions, step by 3 dims = np.arange(45, 48, 3) reps = 20 m = 0.4 p = 0.3 k = 10 dim_percent = {d: [] for d in dims} for dim in dims: print('testing dimension:', dim) M = np.floor(8000 / (2 * dim * 8 + 8)).astype(int) #M = 100 tree = RTree(M, dim, m, p) data = np.loadtxt('../data/fv/fv' + str(dim) + 'd.txt') #data = np.random.rand(n,dim) tree.insert_batch(data, np.arange(n)) for i in np.arange(reps): #q = np.random.rand(dim) # pass in the second row of dimension d as the query rand_index = randint(0, 50000) q = data[rand_index] _, num_visited_leaves = tree.knn_naive(q, k) leaves = tree.get_leaves() total_leaves = len(leaves) dim_percent[dim].append(num_visited_leaves / total_leaves) dim_percent = {k: np.mean(v) for k, v in dim_percent.items()} percents = [dim_percent[d] for d in dim_percent.keys()] out = np.vstack([dims, percents]).T print(out)
import numpy as np from rtree import RTree from node import Node import time from bb_utils import dist, min_dist start_time = time.time() n = 50000 dim = 8 per_page = 8000 // (dim * 2 * 4 + 4) # per_page = 20 print(per_page, 'entries per page') tree = RTree(per_page, dim) for i in range(n): tree.insert(np.random.rand(dim), i) if (i % (n / 10)) == 0: print(int((i / n) * 100), '%') print('construct time:', time.time() - start_time, 'seconds') leaves = tree.get_leaves() lower_mean = np.mean([l.mbb[0] for l in leaves], axis=0) upper_mean = np.mean([l.mbb[1] for l in leaves], axis=0) print('lower mean:', lower_mean, 'upper mean:', upper_mean) start_time = time.time() total_leaves = len(tree.get_leaves())
def __init__(self): pass self.rt = RTree()
def __init__(self, *args): RTree.__init__(self, *args)
def __init__(self): self.tree=RTree()
class TileLayer(object): def __init__(self): self.tree=RTree() def add(self,tile,rect): self.tree.add(tile,rect)
def __init__(self): self.loader = DataLoader() self.r_tree = RTree() self.seq_search = SequentialSearch() self.sequential_query_time = 0 self.rtree_query_time = 0
class QueryHandler(): def __init__(self): self.loader = DataLoader() self.r_tree = RTree() self.seq_search = SequentialSearch() self.sequential_query_time = 0 self.rtree_query_time = 0 # Loading the given data points def datapoints_loader(self, datapoints_path): print("\n\nLoading data points...") start_time = time.time() points = self.loader.load_datapoints(datapoints_path) end_time = time.time() print("Data points loaded successfully!!!") return points # Loading all the queries def queries_loader(self, queries_path): print("\n\nLoading queries...") start_time = time.time() queries = self.loader.load_query(queries_path) end_time = time.time() print("Queries loaded successfully!!!") print("Time taken for loading queries: {} secs".format(end_time - start_time)) return queries # Creating a R-tree index def create_rtree_index(self, points): print("\n\nCreating index for r-tree. Please wait for a while...") start_time = time.time() for index, point in points.iterrows(): self.r_tree.insert(self.r_tree.root, point) end_time = time.time() print("Rtree index created successfully!!!") print("Time taken for building R-tree is : {} secs".format(end_time - start_time)) # Sequential search def sequential_query(self, points, queries, single=False): print("\n\nSequential search:: Performing search. Please wait...") queries_result_sequential = [] if single: # Sequential search for each query print( "\n\nSequential search (only 1 query)::Performing search. Please wait..." ) start_time = time.time() q = {'x1': 17840, 'x2': 18840, 'y1': 13971, 'y2': 14971} n = self.seq_search.sequential_search(points, q) end_time = time.time() queries_result_sequential.append(n) print("Sequential search::Query completed successfully!!!") print("Time taken for searching single query:", end_time - start_time) print( "query result for sequential search (for 1 query) is : {} secs" .format(queries_result_sequential)) else: start_time = time.time() for index, query in queries.iterrows(): n = self.seq_search.sequential_search(points, query) queries_result_sequential.append(n) end_time = time.time() self.sequential_query_time = end_time - start_time print("Sequential search::Query completed successfully!!!") print("Time taken for sequential query: ", self.sequential_query_time) print("Average time taken for sequential query: ", self.sequential_query_time / len(queries)) print( "Search result for Sequential search (all queries) is : {} secs" .format(queries_result_sequential)) return queries_result_sequential # Searching using R-trees def rtree_search(self, queries, single=False): print("\n\nR-tree:: Performing search. Please wait...") queries_result_rtree = [] if single: # Searching using R-tree for each query print("\n\nR-tree (1 query) :: Performing search. Please wait...") start_time = time.time() q = {'x1': 17840, 'x2': 18840, 'y1': 13971, 'y2': 14971} n = self.r_tree.query(self.r_tree.root, q) end_time = time.time() rtree_query_time = end_time - start_time queries_result_rtree.append(n) print("Rtree::Query completed successfully!!!") print("Total time taken for R-tree query:", rtree_query_time) print("query result for R-tree search (for 1 query) is : {} secs". format(queries_result_rtree)) else: start_time = time.time() for index, query in queries.iterrows(): n = self.r_tree.query(self.r_tree.root, query) queries_result_rtree.append(n) end_time = time.time() self.rtree_query_time = end_time - start_time print("Rtree::Query completed successfully!!!") print("Time taken for R-Tree query:", self.rtree_query_time) print("Average time taken for R-Tree query: ", self.rtree_query_time / len(queries)) print( "query result for R-tree search (for all query) is : {} secs". format(queries_result_rtree)) return queries_result_rtree