Beispiel #1
0
def main():
    n = 50000

    max_dim = 45
    dims = np.arange(2, max_dim + 1, step=2)

    # dims = [2,5,10,15,20,25,30,35,40,45]

    reps = 50
    m = 0.4
    p = 0.3
    k = 10

    page_size = 8000
    num_bytes = 8
    dim_percent = {d: [] for d in dims}

    for dim in dims:
        print('testing dimension:', dim)
        M = np.floor(page_size / (2 * dim * num_bytes + num_bytes)).astype(int)

        tree = RTree(M, dim, m, p)
        #data = np.random.rand(n,dim)
        data = np.loadtxt('../data/fv/fv' + str(dim) + 'd.txt')

        data = data[:n]
        #print(data.shape)

        tree.insert_batch(data, np.arange(n))

        for i in np.arange(reps):
            #q = np.random.rand(dim)
            q_idx = np.random.randint(0, n)
            q = data[q_idx, :]

            rtree_nns, num_visited_leaves = tree.knn_naive(q, k)
            # true_nns, true_dists = knn(data, q, k)
            # rtree_nns = [x.rid for x in rtree_nns]
            # print('rtree nns:', rtree_nns)
            # print('true nns:', true_nns)
            leaves = tree.get_leaves()
            total_leaves = len(leaves)
            dim_percent[dim].append(num_visited_leaves / total_leaves)

        print('dim:', dim, '%:', np.mean(dim_percent[dim]) * 100)

    dim_percent = {k: np.mean(v) for k, v in dim_percent.items()}

    percents = [dim_percent[d] for d in dim_percent.keys()]

    out = np.vstack([dims, percents]).T
    np.savetxt('data/rtree_real.csv', out, delimiter=',')
Beispiel #2
0
def main():
    n = 50000
    max_dim = 30
    dims = np.arange(2,max_dim+1, step=2)

    #dims = [2,3,4,5,10,15]

    reps = 50
    m = 0.4
    p = 0.3
    k = 10

    page_size = 8000
    num_bytes = 8
    dim_percent = {d:[] for d in dims}

    for dim in dims:
        print('testing dimension:', dim)
        M = np.floor(page_size/(2*dim*num_bytes + num_bytes)).astype(int)

        tree = RTree(M, dim, m, p)
        data = np.random.rand(n,dim)
        tree.insert_batch(data, np.arange(n))

        for i in np.arange(reps):
            q_idx = np.random.randint(0,n)
            q = data[q_idx,:]
            _, num_visited_leaves = tree.knn_naive(q,k)
            leaves = tree.get_leaves()
            total_leaves = len(leaves)
            dim_percent[dim].append(num_visited_leaves/total_leaves)

        print('dim:', dim, '%:', np.mean(dim_percent[dim])*100)

    dim_percent = {k:np.mean(v) for k,v in dim_percent.items()}

    percents = [dim_percent[d] for d in dim_percent.keys()]

    out = np.vstack([dims, percents]).T 
    np.savetxt('data/rtree_synthetic.csv', out, delimiter=',')
Beispiel #3
0
def main():
    n = 50000

    # 3 to 45 dimensions, step by 3
    dims = np.arange(45, 48, 3)

    reps = 20
    m = 0.4
    p = 0.3
    k = 10

    dim_percent = {d: [] for d in dims}

    for dim in dims:
        print('testing dimension:', dim)
        M = np.floor(8000 / (2 * dim * 8 + 8)).astype(int)
        #M = 100
        tree = RTree(M, dim, m, p)
        data = np.loadtxt('../data/fv/fv' + str(dim) + 'd.txt')
        #data = np.random.rand(n,dim)
        tree.insert_batch(data, np.arange(n))

        for i in np.arange(reps):
            #q = np.random.rand(dim)
            # pass in the second row of dimension d as the query
            rand_index = randint(0, 50000)
            q = data[rand_index]

            _, num_visited_leaves = tree.knn_naive(q, k)
            leaves = tree.get_leaves()
            total_leaves = len(leaves)
            dim_percent[dim].append(num_visited_leaves / total_leaves)

    dim_percent = {k: np.mean(v) for k, v in dim_percent.items()}

    percents = [dim_percent[d] for d in dim_percent.keys()]

    out = np.vstack([dims, percents]).T
    print(out)
import numpy as np
from rtree import RTree
from node import Node
import time
from bb_utils import dist, min_dist

start_time = time.time()

n = 50000
dim = 8
per_page = 8000 // (dim * 2 * 4 + 4)
# per_page = 20
print(per_page, 'entries per page')

tree = RTree(per_page, dim)

for i in range(n):
    tree.insert(np.random.rand(dim), i)
    if (i % (n / 10)) == 0:
        print(int((i / n) * 100), '%')

print('construct time:', time.time() - start_time, 'seconds')

leaves = tree.get_leaves()
lower_mean = np.mean([l.mbb[0] for l in leaves], axis=0)
upper_mean = np.mean([l.mbb[1] for l in leaves], axis=0)
print('lower mean:', lower_mean, 'upper mean:', upper_mean)

start_time = time.time()

total_leaves = len(tree.get_leaves())
Beispiel #5
0
 def __init__(self):
     pass
     self.rt = RTree()
Beispiel #6
0
 def __init__(self, *args):
     RTree.__init__(self, *args)
Beispiel #7
0
 def __init__(self):
     self.tree=RTree()
Beispiel #8
0
class TileLayer(object):
    def __init__(self):
        self.tree=RTree()
        
    def add(self,tile,rect):
        self.tree.add(tile,rect)
Beispiel #9
0
 def __init__(self):
     self.loader = DataLoader()
     self.r_tree = RTree()
     self.seq_search = SequentialSearch()
     self.sequential_query_time = 0
     self.rtree_query_time = 0
Beispiel #10
0
class QueryHandler():
    def __init__(self):
        self.loader = DataLoader()
        self.r_tree = RTree()
        self.seq_search = SequentialSearch()
        self.sequential_query_time = 0
        self.rtree_query_time = 0

    # Loading the given data points
    def datapoints_loader(self, datapoints_path):
        print("\n\nLoading data points...")
        start_time = time.time()
        points = self.loader.load_datapoints(datapoints_path)
        end_time = time.time()
        print("Data points loaded successfully!!!")
        return points

    # Loading all the queries
    def queries_loader(self, queries_path):
        print("\n\nLoading queries...")
        start_time = time.time()
        queries = self.loader.load_query(queries_path)
        end_time = time.time()
        print("Queries loaded successfully!!!")
        print("Time taken for loading queries: {} secs".format(end_time -
                                                               start_time))
        return queries

    # Creating a R-tree index
    def create_rtree_index(self, points):
        print("\n\nCreating index for r-tree. Please wait for a while...")
        start_time = time.time()
        for index, point in points.iterrows():
            self.r_tree.insert(self.r_tree.root, point)
        end_time = time.time()
        print("Rtree index created successfully!!!")
        print("Time taken for building R-tree is : {} secs".format(end_time -
                                                                   start_time))

    # Sequential search
    def sequential_query(self, points, queries, single=False):
        print("\n\nSequential search:: Performing search. Please wait...")
        queries_result_sequential = []
        if single:
            # Sequential search for each query
            print(
                "\n\nSequential search (only 1 query)::Performing search. Please wait..."
            )
            start_time = time.time()
            q = {'x1': 17840, 'x2': 18840, 'y1': 13971, 'y2': 14971}
            n = self.seq_search.sequential_search(points, q)
            end_time = time.time()
            queries_result_sequential.append(n)
            print("Sequential search::Query completed successfully!!!")
            print("Time taken for searching single query:",
                  end_time - start_time)
            print(
                "query result for sequential search (for 1 query) is : {} secs"
                .format(queries_result_sequential))

        else:
            start_time = time.time()
            for index, query in queries.iterrows():
                n = self.seq_search.sequential_search(points, query)
                queries_result_sequential.append(n)
            end_time = time.time()
            self.sequential_query_time = end_time - start_time
            print("Sequential search::Query completed successfully!!!")
            print("Time taken for sequential query: ",
                  self.sequential_query_time)
            print("Average time taken for sequential query: ",
                  self.sequential_query_time / len(queries))
            print(
                "Search result for Sequential search (all queries) is : {} secs"
                .format(queries_result_sequential))

        return queries_result_sequential

    # Searching using R-trees
    def rtree_search(self, queries, single=False):
        print("\n\nR-tree:: Performing search. Please wait...")
        queries_result_rtree = []
        if single:
            # Searching using R-tree for each query
            print("\n\nR-tree (1 query) :: Performing search. Please wait...")
            start_time = time.time()
            q = {'x1': 17840, 'x2': 18840, 'y1': 13971, 'y2': 14971}
            n = self.r_tree.query(self.r_tree.root, q)
            end_time = time.time()
            rtree_query_time = end_time - start_time
            queries_result_rtree.append(n)
            print("Rtree::Query completed successfully!!!")
            print("Total time taken for R-tree query:", rtree_query_time)
            print("query result for R-tree search (for 1 query) is : {} secs".
                  format(queries_result_rtree))

        else:
            start_time = time.time()
            for index, query in queries.iterrows():
                n = self.r_tree.query(self.r_tree.root, query)
                queries_result_rtree.append(n)
            end_time = time.time()
            self.rtree_query_time = end_time - start_time
            print("Rtree::Query completed successfully!!!")
            print("Time taken for R-Tree query:", self.rtree_query_time)
            print("Average time taken for R-Tree query: ",
                  self.rtree_query_time / len(queries))
            print(
                "query result for R-tree search (for all query) is : {} secs".
                format(queries_result_rtree))

        return queries_result_rtree