Beispiel #1
0
def main():
    n = 50000

    max_dim = 45
    dims = np.arange(2, max_dim + 1, step=2)

    # dims = [2,5,10,15,20,25,30,35,40,45]

    reps = 50
    m = 0.4
    p = 0.3
    k = 10

    page_size = 8000
    num_bytes = 8
    dim_percent = {d: [] for d in dims}

    for dim in dims:
        print('testing dimension:', dim)
        M = np.floor(page_size / (2 * dim * num_bytes + num_bytes)).astype(int)

        tree = RTree(M, dim, m, p)
        #data = np.random.rand(n,dim)
        data = np.loadtxt('../data/fv/fv' + str(dim) + 'd.txt')

        data = data[:n]
        #print(data.shape)

        tree.insert_batch(data, np.arange(n))

        for i in np.arange(reps):
            #q = np.random.rand(dim)
            q_idx = np.random.randint(0, n)
            q = data[q_idx, :]

            rtree_nns, num_visited_leaves = tree.knn_naive(q, k)
            # true_nns, true_dists = knn(data, q, k)
            # rtree_nns = [x.rid for x in rtree_nns]
            # print('rtree nns:', rtree_nns)
            # print('true nns:', true_nns)
            leaves = tree.get_leaves()
            total_leaves = len(leaves)
            dim_percent[dim].append(num_visited_leaves / total_leaves)

        print('dim:', dim, '%:', np.mean(dim_percent[dim]) * 100)

    dim_percent = {k: np.mean(v) for k, v in dim_percent.items()}

    percents = [dim_percent[d] for d in dim_percent.keys()]

    out = np.vstack([dims, percents]).T
    np.savetxt('data/rtree_real.csv', out, delimiter=',')
Beispiel #2
0
def main():
    n = 50000
    max_dim = 30
    dims = np.arange(2,max_dim+1, step=2)

    #dims = [2,3,4,5,10,15]

    reps = 50
    m = 0.4
    p = 0.3
    k = 10

    page_size = 8000
    num_bytes = 8
    dim_percent = {d:[] for d in dims}

    for dim in dims:
        print('testing dimension:', dim)
        M = np.floor(page_size/(2*dim*num_bytes + num_bytes)).astype(int)

        tree = RTree(M, dim, m, p)
        data = np.random.rand(n,dim)
        tree.insert_batch(data, np.arange(n))

        for i in np.arange(reps):
            q_idx = np.random.randint(0,n)
            q = data[q_idx,:]
            _, num_visited_leaves = tree.knn_naive(q,k)
            leaves = tree.get_leaves()
            total_leaves = len(leaves)
            dim_percent[dim].append(num_visited_leaves/total_leaves)

        print('dim:', dim, '%:', np.mean(dim_percent[dim])*100)

    dim_percent = {k:np.mean(v) for k,v in dim_percent.items()}

    percents = [dim_percent[d] for d in dim_percent.keys()]

    out = np.vstack([dims, percents]).T 
    np.savetxt('data/rtree_synthetic.csv', out, delimiter=',')
Beispiel #3
0
def main():
    n = 50000

    # 3 to 45 dimensions, step by 3
    dims = np.arange(45, 48, 3)

    reps = 20
    m = 0.4
    p = 0.3
    k = 10

    dim_percent = {d: [] for d in dims}

    for dim in dims:
        print('testing dimension:', dim)
        M = np.floor(8000 / (2 * dim * 8 + 8)).astype(int)
        #M = 100
        tree = RTree(M, dim, m, p)
        data = np.loadtxt('../data/fv/fv' + str(dim) + 'd.txt')
        #data = np.random.rand(n,dim)
        tree.insert_batch(data, np.arange(n))

        for i in np.arange(reps):
            #q = np.random.rand(dim)
            # pass in the second row of dimension d as the query
            rand_index = randint(0, 50000)
            q = data[rand_index]

            _, num_visited_leaves = tree.knn_naive(q, k)
            leaves = tree.get_leaves()
            total_leaves = len(leaves)
            dim_percent[dim].append(num_visited_leaves / total_leaves)

    dim_percent = {k: np.mean(v) for k, v in dim_percent.items()}

    percents = [dim_percent[d] for d in dim_percent.keys()]

    out = np.vstack([dims, percents]).T
    print(out)
n = 50000
dim = 8
per_page = 8000 // (dim * 2 * 4 + 4)
# per_page = 20
print(per_page, 'entries per page')

tree = RTree(per_page, dim)

for i in range(n):
    tree.insert(np.random.rand(dim), i)
    if (i % (n / 10)) == 0:
        print(int((i / n) * 100), '%')

print('construct time:', time.time() - start_time, 'seconds')

leaves = tree.get_leaves()
lower_mean = np.mean([l.mbb[0] for l in leaves], axis=0)
upper_mean = np.mean([l.mbb[1] for l in leaves], axis=0)
print('lower mean:', lower_mean, 'upper mean:', upper_mean)

start_time = time.time()

total_leaves = len(tree.get_leaves())
q = np.random.rand(dim)
neighbors, num_leaves = tree.knn_naive(q, 10)
print('search time:', time.time() - start_time, 'seconds')

print('leaves visited:', num_leaves, 'out of', total_leaves,
      (num_leaves / total_leaves) * 100, '%')
for n in neighbors:
    print(n, dist(n, q))