Example #1
0
def bench_ball_tree(N=2000, D=3, k=15, leaf_size=30):
    print("Ball Tree")
    X = np.random.random((N, D)).astype(DTYPE)

    t0 = time()
    btskl = skBallTree(X, leaf_size=leaf_size)
    t1 = time()
    bt = BallTree(X, leaf_size=leaf_size)
    t2 = time()

    print("Build:")
    print("  sklearn : %.2g sec" % (t1 - t0))
    print("  new     : %.2g sec" % (t2 - t1))

    t0 = time()
    Dskl, Iskl = btskl.query(X, k)
    t1 = time()

    dist = [Dskl]
    ind = [Iskl]
    times = [t1 - t0]
    labels = ['sklearn']
    counts = [-1]

    for dualtree in (False, True):
        for breadth_first in (False, True):
            bt.reset_n_calls()
            t0 = time()
            D, I = bt.query(X,
                            k,
                            dualtree=dualtree,
                            breadth_first=breadth_first)
            t1 = time()
            dist.append(D)
            ind.append(I)
            times.append(t1 - t0)
            counts.append(bt.get_n_calls())

            if dualtree:
                label = 'dual/'
            else:
                label = 'single/'

            if breadth_first:
                label += 'breadthfirst'
            else:
                label += 'depthfirst'
            labels.append(label)

    print("Query:")
    for lab, t, c in zip(labels, times, counts):
        print("  %s : %.2g sec (%i calls)" % (lab, t, c))
    print
    print(
        " distances match: %s" % ', '.join([
            '%s' % np.allclose(dist[i - 1], dist[i]) for i in range(len(dist))
        ]))
    print(
        " indices match: %s" % ', '.join(
            ['%s' % np.allclose(ind[i - 1], ind[i]) for i in range(len(ind))]))
Example #2
0
def test_tree(N=10000, D=20, K=1, LS=40):
    from time import time
    from sklearn.neighbors import BallTree as skBallTree

    rseed = np.random.randint(10000)
    print("-------------------------------------------------------")
    print("{0} neighbors of {1} points in {2} dimensions".format(K, N, D))
    print("random seed = {0}".format(rseed))
    np.random.seed(rseed)
    X = np.random.random((N, D))

    t0 = time()
    bt1 = skBallTree(X, leaf_size=LS)
    t1 = time()
    dist1, ind1 = bt1.query(X, K)
    t2 = time()

    bt2 = BallTree(X, leaf_size=LS)
    t3 = time()
    dist2, ind2 = bt2.query(X, K)
    t4 = time()

    print("results match: {0} {1}".format(np.allclose(dist1, dist2),
                                          np.allclose(ind1, ind2)))
    print("")
    print("sklearn build: {0:.2g} sec".format(t1 - t0))
    print("python build  : {0:.2g} sec".format(t3 - t2))
    print("")
    print("sklearn query: {0:.2g} sec".format(t2 - t1))
    print("python query  : {0:.2g} sec".format(t4 - t3))
Example #3
0
def bench_ball_tree(N=2000, D=3, k=15, leaf_size=30):
    print("Ball Tree")
    X = np.random.random((N, D)).astype(DTYPE)

    t0 = time()
    btskl = skBallTree(X, leaf_size=leaf_size)
    t1 = time()
    bt = BallTree(X, leaf_size=leaf_size)
    t2 = time()

    print("Build:")
    print("  sklearn : %.2g sec" % (t1 - t0))
    print("  new     : %.2g sec" % (t2 - t1))

    t0 = time()
    Dskl, Iskl = btskl.query(X, k)
    t1 = time()

    dist = [Dskl]
    ind = [Iskl]
    times = [t1 - t0]
    labels = ['sklearn']
    counts = [-1]

    for dualtree in (False, True):
        for breadth_first in (False, True):
            bt.reset_n_calls()
            t0 = time()
            D, I = bt.query(X, k, dualtree=dualtree,
                            breadth_first=breadth_first)
            t1 = time()
            dist.append(D)
            ind.append(I)
            times.append(t1 - t0)
            counts.append(bt.get_n_calls())

            if dualtree:
                label = 'dual/'
            else:
                label = 'single/'

            if breadth_first:
                label += 'breadthfirst'
            else:
                label += 'depthfirst'
            labels.append(label)

    print("Query:")
    for lab, t, c in zip(labels, times, counts):
        print("  %s : %.2g sec (%i calls)" % (lab, t, c))
    print
    print(" distances match: %s"
          % ', '.join(['%s' % np.allclose(dist[i - 1], dist[i])
                       for i in range(len(dist))]))
    print(" indices match: %s"
          % ', '.join(['%s' % np.allclose(ind[i - 1], ind[i])
                       for i in range(len(ind))]))
Example #4
0
def test_tree(K=2, LS=3):
    df = gpd.read_file('../datasets/POINT/UK.geojson')
    X = np.stack(df['geometry']).astype(np.float32)

    print("-------------------------------------------------------")
    print("{0} neighbors of {1} points".format(K, len(X)))

    # pre-run to jit compile the code
    BallTree(X, leaf_size=LS).query(X, K)

    t0 = time()
    bt1 = skBallTree(X, leaf_size=LS, metric=vincenty)
    t1 = time()
    dist1, ind1 = bt1.query(X, K)
    t2 = time()

    bt2 = BallTree(X, leaf_size=LS)
    t3 = time()

#     geometry = gpd.GeoSeries(map(Point, bt2.node_centroids))
#     df = gpd.GeoDataFrame(geometry=geometry, crs={'init': 'epsg:4326'})
#     df['node_idx_start'] = bt2.node_idx[:, 0]
#     df['node_idx_end'] = bt2.node_idx[:, 1]
#     df['idx'] = np.arange(bt2.n_nodes)
#     df['radius'] = bt2.node_radius
#     df['geometry'] = df.apply(point_to_circle, axis=1)
#     df.to_file('nodes4_.geojson', driver='GeoJSON')

    dist2, ind2 = bt2.query(X, K)
    t4 = time()

    t5 = time()

    brute_dist = brute_min(X)

    t6 = time()
    print('Brute dist = sklearn:', np.allclose(dist1[:, 1], brute_dist))
    print('Brute dist = my dist:', np.allclose(dist2[:, 1], brute_dist))
    print()

    print('My dist = sklearn:', np.allclose(dist1, dist2))
    print('My index = sklearn', np.allclose(ind1, ind2, rtol=0))
    print()
    print("sklearn build: {0:.3g} sec".format(t1 - t0))
    print("numba build  : {0:.3g} sec".format(t3 - t2))
    print()
    print("sklearn query: {0:.3g} sec".format(t2 - t1))
    print("numba query  : {0:.3g} sec".format(t4 - t3))
    print("brute query  : {0:.3g} sec".format(t6 - t5))
def test_tree(N=1000, D=3, K=5, LS=40):
    from time import time
    from sklearn.neighbors import BallTree as skBallTree

    print("-------------------------------------------------------")
    print("Numba version: " + numba.__version__)

    rseed = np.random.randint(10000)
    print("-------------------------------------------------------")
    print("{0} neighbors of {1} points in {2} dimensions".format(K, N, D))
    print("random seed = {0}".format(rseed))
    np.random.seed(rseed)
    X = np.random.random((N, D))

    # pre-run to jit compile the code
    BallTree(X, leaf_size=LS).query(X, K)

    t0 = time()
    bt1 = skBallTree(X, leaf_size=LS)
    t1 = time()
    dist1, ind1 = bt1.query(X, K)
    t2 = time()

    bt2 = BallTree(X, leaf_size=LS)
    t3 = time()
    dist2, ind2 = bt2.query(X, K)
    t4 = time()

    print("results match: {0} {1}".format(np.allclose(dist1, dist2),
                                          np.allclose(ind1, ind2)))
    print("")
    print("sklearn build: {0:.3g} sec".format(t1 - t0))
    print("numba build  : {0:.3g} sec".format(t3 - t2))
    print("")
    print("sklearn query: {0:.3g} sec".format(t2 - t1))
    print("numba query  : {0:.3g} sec".format(t4 - t3))
Example #6
0
from time import time
import numpy as np
from ball_tree import BallTree as pyBallTree
from sklearn.neighbors import BallTree as skBallTree

X = np.random.random((10000, 3))

t0 = time()
pyBT = pyBallTree(X, 30)
t1 = time()
print "py construction: %.2g sec" % (t1 - t0)

t0 = time()
skBT = skBallTree(X, 30)
t1 = time()
print "sk construction: %.2g sec" % (t1 - t0)

for k in [1, 2, 4, 8]:
    print "query %i in [%i, %i]:" % (k, X.shape[0], X.shape[1])

    t0 = time()
    pyBT.query(X, k, dualtree=False)
    t1 = time()
    print "   py: %.2g sec" % (t1 - t0)

    t0 = time()
    skBT.query(X, k)
    t1 = time()
    print "   sk: %.2g sec" % (t1 - t0)
    
for r in 0.1, 0.3, 0.5:
Example #7
0
from time import time
import numpy as np
from ball_tree import BallTree as pyBallTree
from sklearn.neighbors import BallTree as skBallTree

X = np.random.random((10000, 3))

t0 = time()
pyBT = pyBallTree(X, 30)
t1 = time()
print "py construction: %.2g sec" % (t1 - t0)

t0 = time()
skBT = skBallTree(X, 30)
t1 = time()
print "sk construction: %.2g sec" % (t1 - t0)

for k in [1, 2, 4, 8]:
    print "query %i in [%i, %i]:" % (k, X.shape[0], X.shape[1])

    t0 = time()
    pyBT.query(X, k, dualtree=False)
    t1 = time()
    print "   py: %.2g sec" % (t1 - t0)

    t0 = time()
    skBT.query(X, k)
    t1 = time()
    print "   sk: %.2g sec" % (t1 - t0)

for r in 0.1, 0.3, 0.5: