def wass_2Ddist_approx(ssData, ssFake): """ Compute the Wasserstein via a projection onto the hilbert space filling curve. Parameters ---------- ssData, ssFake : n*2 arrays, where n is the sample size Returns ------- scalar The Wasserstein distance between the 2D samples """ maxData = np.max([np.max(ssData), np.max(ssFake)]) k = int(np.ceil(np.log(maxData + 1) / np.log(2))) hilbert_curve = HilbertCurve(k, 2) permut = np.argsort(hilbert_curve.distances_from_points( ssData.astype(int))) permutFake = np.argsort( hilbert_curve.distances_from_points(ssFake.astype(int))) diff = ssData[permut, :] - ssFake[permutFake, :] sqrtSumSqrs = np.sqrt(np.sum(diff**2, axis=1)) dist = np.mean(sqrtSumSqrs) return dist
def test_base(self): """Assert list is unmodified""" n = 4 p = 8 hilbert_curve = HilbertCurve(p, n) x = [[1, 5, 3, 19]] x_in = list(x) h = hilbert_curve.distances_from_points(x_in) self.assertEqual(x, x_in)
def test_reversibility(self): """Assert points_from_distances and distances_from_points are inverse operations.""" n = 3 p = 5 hilbert_curve = HilbertCurve(p, n) n_h = 2**(n * p) distances = list(range(n_h)) coordinates = hilbert_curve.points_from_distances(distances) distances_check = hilbert_curve.distances_from_points(coordinates) for dist, dist_check in zip(distances, distances_check): self.assertEqual(dist, dist_check)
def test_distances_from_points_ndarray(self): """Assert ndarray type matching works in distances_from_points""" n = 2 p = 3 hilbert_curve = HilbertCurve(p, n) points = np.array([ [0, 0], [7, 7], ]) distances = hilbert_curve.distances_from_points(points, match_type=True) target_type = type(points) self.assertTrue(isinstance(distances, target_type))
def test_distances_from_points_tuple(self): """Assert tuple type matching works in distances_from_points""" n = 2 p = 3 hilbert_curve = HilbertCurve(p, n) points = tuple([ tuple([0, 0]), tuple([7, 7]), ]) distances = hilbert_curve.distances_from_points(points, match_type=True) target_type = type(points) self.assertTrue(isinstance(distances, target_type))
def hilbert_distance_dask(geoseries, level=16): bounds = geoseries.bounds.to_numpy() total_bounds = geoseries.total_bounds x, y = _continuous_to_discrete_coords( bounds, level=level, total_bounds=total_bounds ) coords = np.stack((x, y), axis=1) hilbert_curve = HilbertCurve(p=level, n=2) expected = hilbert_curve.distances_from_points(coords) ddf = from_geopandas(geoseries, npartitions=1) result = ddf.hilbert_distance(level=level).compute() assert list(result) == expected assert isinstance(result, pd.Series) assert_index_equal(ddf.index.compute(), result.index)
# When using a single iteration (p=1) in 2 dimensions (N=2) there are only 4 # locations on the curve # distance | coordinates # 0 | [0, 0] # 1 | [0, 1] # 2 | [1, 1] # 3 | [1, 0] # calculate distances along a hilbert curve given points p = 1 n = 2 hilbert_curve = HilbertCurve(p, n) points = [[0,0], [0,1], [1,1], [1,0]] dists = hilbert_curve.distances_from_points(points) print("simple distances from points") print("="*80) for point, dist in zip(points, dists): print(f'distance(x={point}, p={p}, n={n}) = {dist}') print() # calculate coordinates given distances along a hilbert curve p = 1 n = 2 hilbert_curve = HilbertCurve(p, n) dists = list(range(4)) points = hilbert_curve.points_from_distances(dists) print("simple points from distances")
from hilbertcurve.hilbertcurve import HilbertCurve import numpy as np p = 2 n = 2 hilbert_curve = HilbertCurve(p, n) num_points = 10_000 points = np.random.randint( low=0, high=hilbert_curve.max_x + 1, size=(num_points, hilbert_curve.n) ) distances1 = hilbert_curve.distances_from_points(points) distances2 = hilbert_curve.distances_from_points(points, match_type=True) a=1