def bench_ball_tree(N=2000, D=3, k=15, leaf_size=30): print("Ball Tree") X = np.random.random((N, D)).astype(DTYPE) t0 = time() btskl = skBallTree(X, leaf_size=leaf_size) t1 = time() bt = BallTree(X, leaf_size=leaf_size) t2 = time() print("Build:") print(" sklearn : %.2g sec" % (t1 - t0)) print(" new : %.2g sec" % (t2 - t1)) t0 = time() Dskl, Iskl = btskl.query(X, k) t1 = time() dist = [Dskl] ind = [Iskl] times = [t1 - t0] labels = ['sklearn'] counts = [-1] for dualtree in (False, True): for breadth_first in (False, True): bt.reset_n_calls() t0 = time() D, I = bt.query(X, k, dualtree=dualtree, breadth_first=breadth_first) t1 = time() dist.append(D) ind.append(I) times.append(t1 - t0) counts.append(bt.get_n_calls()) if dualtree: label = 'dual/' else: label = 'single/' if breadth_first: label += 'breadthfirst' else: label += 'depthfirst' labels.append(label) print("Query:") for lab, t, c in zip(labels, times, counts): print(" %s : %.2g sec (%i calls)" % (lab, t, c)) print print( " distances match: %s" % ', '.join([ '%s' % np.allclose(dist[i - 1], dist[i]) for i in range(len(dist)) ])) print( " indices match: %s" % ', '.join( ['%s' % np.allclose(ind[i - 1], ind[i]) for i in range(len(ind))]))
def test_tree(N=10000, D=20, K=1, LS=40): from time import time from sklearn.neighbors import BallTree as skBallTree rseed = np.random.randint(10000) print("-------------------------------------------------------") print("{0} neighbors of {1} points in {2} dimensions".format(K, N, D)) print("random seed = {0}".format(rseed)) np.random.seed(rseed) X = np.random.random((N, D)) t0 = time() bt1 = skBallTree(X, leaf_size=LS) t1 = time() dist1, ind1 = bt1.query(X, K) t2 = time() bt2 = BallTree(X, leaf_size=LS) t3 = time() dist2, ind2 = bt2.query(X, K) t4 = time() print("results match: {0} {1}".format(np.allclose(dist1, dist2), np.allclose(ind1, ind2))) print("") print("sklearn build: {0:.2g} sec".format(t1 - t0)) print("python build : {0:.2g} sec".format(t3 - t2)) print("") print("sklearn query: {0:.2g} sec".format(t2 - t1)) print("python query : {0:.2g} sec".format(t4 - t3))
def bench_ball_tree(N=2000, D=3, k=15, leaf_size=30): print("Ball Tree") X = np.random.random((N, D)).astype(DTYPE) t0 = time() btskl = skBallTree(X, leaf_size=leaf_size) t1 = time() bt = BallTree(X, leaf_size=leaf_size) t2 = time() print("Build:") print(" sklearn : %.2g sec" % (t1 - t0)) print(" new : %.2g sec" % (t2 - t1)) t0 = time() Dskl, Iskl = btskl.query(X, k) t1 = time() dist = [Dskl] ind = [Iskl] times = [t1 - t0] labels = ['sklearn'] counts = [-1] for dualtree in (False, True): for breadth_first in (False, True): bt.reset_n_calls() t0 = time() D, I = bt.query(X, k, dualtree=dualtree, breadth_first=breadth_first) t1 = time() dist.append(D) ind.append(I) times.append(t1 - t0) counts.append(bt.get_n_calls()) if dualtree: label = 'dual/' else: label = 'single/' if breadth_first: label += 'breadthfirst' else: label += 'depthfirst' labels.append(label) print("Query:") for lab, t, c in zip(labels, times, counts): print(" %s : %.2g sec (%i calls)" % (lab, t, c)) print print(" distances match: %s" % ', '.join(['%s' % np.allclose(dist[i - 1], dist[i]) for i in range(len(dist))])) print(" indices match: %s" % ', '.join(['%s' % np.allclose(ind[i - 1], ind[i]) for i in range(len(ind))]))
def test_tree(K=2, LS=3): df = gpd.read_file('../datasets/POINT/UK.geojson') X = np.stack(df['geometry']).astype(np.float32) print("-------------------------------------------------------") print("{0} neighbors of {1} points".format(K, len(X))) # pre-run to jit compile the code BallTree(X, leaf_size=LS).query(X, K) t0 = time() bt1 = skBallTree(X, leaf_size=LS, metric=vincenty) t1 = time() dist1, ind1 = bt1.query(X, K) t2 = time() bt2 = BallTree(X, leaf_size=LS) t3 = time() # geometry = gpd.GeoSeries(map(Point, bt2.node_centroids)) # df = gpd.GeoDataFrame(geometry=geometry, crs={'init': 'epsg:4326'}) # df['node_idx_start'] = bt2.node_idx[:, 0] # df['node_idx_end'] = bt2.node_idx[:, 1] # df['idx'] = np.arange(bt2.n_nodes) # df['radius'] = bt2.node_radius # df['geometry'] = df.apply(point_to_circle, axis=1) # df.to_file('nodes4_.geojson', driver='GeoJSON') dist2, ind2 = bt2.query(X, K) t4 = time() t5 = time() brute_dist = brute_min(X) t6 = time() print('Brute dist = sklearn:', np.allclose(dist1[:, 1], brute_dist)) print('Brute dist = my dist:', np.allclose(dist2[:, 1], brute_dist)) print() print('My dist = sklearn:', np.allclose(dist1, dist2)) print('My index = sklearn', np.allclose(ind1, ind2, rtol=0)) print() print("sklearn build: {0:.3g} sec".format(t1 - t0)) print("numba build : {0:.3g} sec".format(t3 - t2)) print() print("sklearn query: {0:.3g} sec".format(t2 - t1)) print("numba query : {0:.3g} sec".format(t4 - t3)) print("brute query : {0:.3g} sec".format(t6 - t5))
def test_tree(N=1000, D=3, K=5, LS=40): from time import time from sklearn.neighbors import BallTree as skBallTree print("-------------------------------------------------------") print("Numba version: " + numba.__version__) rseed = np.random.randint(10000) print("-------------------------------------------------------") print("{0} neighbors of {1} points in {2} dimensions".format(K, N, D)) print("random seed = {0}".format(rseed)) np.random.seed(rseed) X = np.random.random((N, D)) # pre-run to jit compile the code BallTree(X, leaf_size=LS).query(X, K) t0 = time() bt1 = skBallTree(X, leaf_size=LS) t1 = time() dist1, ind1 = bt1.query(X, K) t2 = time() bt2 = BallTree(X, leaf_size=LS) t3 = time() dist2, ind2 = bt2.query(X, K) t4 = time() print("results match: {0} {1}".format(np.allclose(dist1, dist2), np.allclose(ind1, ind2))) print("") print("sklearn build: {0:.3g} sec".format(t1 - t0)) print("numba build : {0:.3g} sec".format(t3 - t2)) print("") print("sklearn query: {0:.3g} sec".format(t2 - t1)) print("numba query : {0:.3g} sec".format(t4 - t3))
from time import time import numpy as np from ball_tree import BallTree as pyBallTree from sklearn.neighbors import BallTree as skBallTree X = np.random.random((10000, 3)) t0 = time() pyBT = pyBallTree(X, 30) t1 = time() print "py construction: %.2g sec" % (t1 - t0) t0 = time() skBT = skBallTree(X, 30) t1 = time() print "sk construction: %.2g sec" % (t1 - t0) for k in [1, 2, 4, 8]: print "query %i in [%i, %i]:" % (k, X.shape[0], X.shape[1]) t0 = time() pyBT.query(X, k, dualtree=False) t1 = time() print " py: %.2g sec" % (t1 - t0) t0 = time() skBT.query(X, k) t1 = time() print " sk: %.2g sec" % (t1 - t0) for r in 0.1, 0.3, 0.5: