def test_query_haversine(): rng = check_random_state(0) X = 2 * np.pi * rng.random_sample((40, 2)) bt = BallTree(X, leaf_size=1, metric='haversine') dist1, ind1 = bt.query(X, k=5) dist2, ind2 = brute_force_neighbors(X, X, k=5, metric='haversine') assert_array_almost_equal(dist1, dist2) assert_array_almost_equal(ind1, ind2)
def test_ball_tree_query_metrics(metric): rng = check_random_state(0) if metric in BOOLEAN_METRICS: X = rng.random_sample((40, 10)).round(0) Y = rng.random_sample((10, 10)).round(0) elif metric in DISCRETE_METRICS: X = (4 * rng.random_sample((40, 10))).round(0) Y = (4 * rng.random_sample((10, 10))).round(0) k = 5 bt = BallTree(X, leaf_size=1, metric=metric) dist1, ind1 = bt.query(Y, k) dist2, ind2 = brute_force_neighbors(X, Y, k, metric) assert_array_almost_equal(dist1, dist2)
def test_gaussian_kde(n_samples=1000): # Compare gaussian KDE results to scipy.stats.gaussian_kde from scipy.stats import gaussian_kde rng = check_random_state(0) x_in = rng.normal(0, 1, n_samples) x_out = np.linspace(-5, 5, 30) for h in [0.01, 0.1, 1]: bt = BallTree(x_in[:, None]) gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in)) dens_bt = bt.kernel_density(x_out[:, None], h) / n_samples dens_gkde = gkde.evaluate(x_out) assert_array_almost_equal(dens_bt, dens_gkde, decimal=3)
def test_ball_tree_query_radius(n_samples=100, n_features=10): rng = check_random_state(0) X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1 query_pt = np.zeros(n_features, dtype=float) eps = 1E-15 # roundoff error can cause test to fail bt = BallTree(X, leaf_size=5) rad = np.sqrt(((X - query_pt)**2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): ind = bt.query_radius([query_pt], r + eps)[0] i = np.where(rad <= r + eps)[0] ind.sort() i.sort() assert_array_almost_equal(i, ind)
def test_ball_tree_query(metric, k, dualtree, breadth_first): rng = check_random_state(0) X = rng.random_sample((40, DIMENSION)) Y = rng.random_sample((10, DIMENSION)) kwargs = METRICS[metric] bt = BallTree(X, leaf_size=1, metric=metric, **kwargs) dist1, ind1 = bt.query(Y, k, dualtree=dualtree, breadth_first=breadth_first) dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs) # don't check indices here: if there are any duplicate distances, # the indices may not match. Distances should not have this problem. assert_array_almost_equal(dist1, dist2)
def test_ball_tree_query_radius_distance(n_samples=100, n_features=10): rng = check_random_state(0) X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1 query_pt = np.zeros(n_features, dtype=float) eps = 1E-15 # roundoff error can cause test to fail bt = BallTree(X, leaf_size=5) rad = np.sqrt(((X - query_pt)**2).sum(1)) for r in np.linspace(rad[0], rad[-1], 100): ind, dist = bt.query_radius([query_pt], r + eps, return_distance=True) ind = ind[0] dist = dist[0] d = np.sqrt(((query_pt - X[ind])**2).sum(1)) assert_array_almost_equal(d, dist)
def test_ball_tree_kde(kernel, h, rtol, atol, breadth_first, n_samples=100, n_features=3): rng = np.random.RandomState(0) X = rng.random_sample((n_samples, n_features)) Y = rng.random_sample((n_samples, n_features)) bt = BallTree(X, leaf_size=10) dens_true = compute_kernel_slow(Y, X, kernel, h) dens = bt.kernel_density(Y, h, atol=atol, rtol=rtol, kernel=kernel, breadth_first=breadth_first) assert_allclose(dens, dens_true, atol=atol, rtol=max(rtol, 1e-7))
def test_ball_tree_pickle(): rng = check_random_state(0) X = rng.random_sample((10, 3)) bt1 = BallTree(X, leaf_size=1) # Test if BallTree with callable metric is picklable bt1_pyfunc = BallTree(X, metric=dist_func, leaf_size=1, p=2) ind1, dist1 = bt1.query(X) ind1_pyfunc, dist1_pyfunc = bt1_pyfunc.query(X) def check_pickle_protocol(protocol): s = pickle.dumps(bt1, protocol=protocol) bt2 = pickle.loads(s) s_pyfunc = pickle.dumps(bt1_pyfunc, protocol=protocol) bt2_pyfunc = pickle.loads(s_pyfunc) ind2, dist2 = bt2.query(X) ind2_pyfunc, dist2_pyfunc = bt2_pyfunc.query(X) assert_array_almost_equal(ind1, ind2) assert_array_almost_equal(dist1, dist2) assert_array_almost_equal(ind1_pyfunc, ind2_pyfunc) assert_array_almost_equal(dist1_pyfunc, dist2_pyfunc) assert isinstance(bt2, BallTree) for protocol in (0, 1, 2): check_pickle_protocol(protocol)
def test_ball_tree_two_point(n_samples=100, n_features=3): rng = check_random_state(0) X = rng.random_sample((n_samples, n_features)) Y = rng.random_sample((n_samples, n_features)) r = np.linspace(0, 1, 10) bt = BallTree(X, leaf_size=10) D = DistanceMetric.get_metric("euclidean").pairwise(Y, X) counts_true = [(D <= ri).sum() for ri in r] def check_two_point(r, dualtree): counts = bt.two_point_correlation(Y, r=r, dualtree=dualtree) assert_array_almost_equal(counts, counts_true) for dualtree in (True, False): check_two_point(r, dualtree)