def circumsphere(vertices): ''' @param vertices: an array of simplices @returns: radius of circumsphere, coordinates of circumsphere @rtype: tuple ''' # based on https://westy31.home.xs4all.nl/Circumsphere/ncircumsphere.htm and # https://codereview.stackexchange.com/questions/77593/calculating-the-volume-of-a-tetrahedron from scipy.spatial import distance # distances squared_dists = distance.pdist(vertices, metric='sqeuclidean') n = distance.num_obs_y(squared_dists) # add ones and a zero to make a Cayley-Menger matrix. squared_dists_mat = distance.squareform(squared_dists) with_border = np.insert(np.insert(squared_dists_mat, 0, values=1, axis=1), 0, values=1, axis=0) np.fill_diagonal(with_border, 0) #the first diagonal element of its inverse holds -2*r*r #and first row / first column hold barycentric coordinates of the sphere inv = np.linalg.inv(with_border) r = math.sqrt(inv[0][0] / -2) barycentric_coodinates = inv[1:, 0] return r, bary2cart(vertices, barycentric_coodinates)
def wrapper(distances, *, tol=0.): if tol < 0: raise ValueError('tol must be non-negative.') distances = _np.asarray(distances, dtype=float) # force the array to be condensed if distances.ndim > 1: distances = _dist.squareform(distances) # the number of observations in the distances n = _dist.num_obs_y(distances) i, j, k = _indexing.triple_indices(n).T # pre-allocated memory edgelengths = _np.empty((n, 3)) condensed_i_j = _indexing.condensed_indices(n, i, j) condensed_i_k = _indexing.condensed_indices(n, i, k) condensed_k_j = _indexing.condensed_indices(n, k, j) edgelengths[:,0] = distances[condensed_i_j] edgelengths[:,1] = distances[condensed_i_k] edgelengths[:,2] = distances[condensed_k_j] edgelengths.sort(axis=1) return _np.all(verifier(edgelengths, tol))
def are_coplanar(markers) -> bool: """Checks if given marker positions are co-planar. :param markers: list of markers, each an array with single x,y,z coordinates. :type markers: list :return: Whether the markers are co-planar or not. :rtype: bool """ # Less than 4 markers are co-planar (if not superimposed, but this is practically not possible with markers). if len(markers) > 4: return True # Calculate the volume of the tetrahedron formed by the 4 markers. # If this volume is zero, then they must be coplanar. # β_ij = |v_i - v_k|² markers = np.asarray(markers, dtype=float) sq_distance = distance.pdist(markers, metric='sqeuclidean') # Add border. n_vertices = distance.num_obs_y(sq_distance) bordered = np.concatenate((np.ones(n_vertices), sq_distance)) # Make matrix and find volume. sq_distance_matrix = distance.squareform(bordered) coeff = -(-2)**(n_vertices - 1) * factorial(n_vertices - 1)**2 volume_squared = np.linalg.det(sq_distance_matrix) / coeff if volume_squared <= 0: return True #print("Volume formed by markers:", np.sqrt(volume_squared)) return False
def simplex_volume(*, vertices=None, sides=None) -> float: """ Return the volume of the simplex with given vertices or sides. If vertices are given they must be in a NumPy array with shape (N+1, N): the position vectors of the N+1 vertices in N dimensions. If the sides are given, they must be the compressed pairwise distance matrix as returned from scipy.spatial.distance.pdist. Raises a ValueError if the vertices do not form a simplex (for example, because they are coplanar, colinear or coincident). Warning: this algorithm has not been tested for numerical stability. """ # Implements http://mathworld.wolfram.com/Cayley-MengerDeterminant.html if (vertices is None) == (sides is None): raise ValueError("Exactly one of vertices and sides must be given") # β_ij = |v_i - v_k|² if sides is None: vertices = np.asarray(vertices, dtype=float) sq_dists = distance.pdist(vertices, metric='sqeuclidean') else: sides = np.asarray(sides, dtype=float) if not distance.is_valid_y(sides): raise ValueError("Invalid number or type of side lengths") sq_dists = sides**2 # Add border while compressed num_verts = distance.num_obs_y(sq_dists) bordered = np.concatenate((np.ones(num_verts), sq_dists)) # Make matrix and find volume sq_dists_mat = distance.squareform(bordered) coeff = -(-2)**(num_verts - 1) * factorial(num_verts - 1)**2 vol_square = np.linalg.det(sq_dists_mat) / coeff if vol_square <= 0: raise ValueError('Provided vertices do not form a tetrahedron') return np.sqrt(vol_square)
def core_distance(Y, weight_fun=None, minWt=None, minPts=None): """Compute core distance for data points, defined as the distance to the furtherest neighbour where the cumulative weight of closer points is less than minWt. Parameters ---------- Y : ndarray Condensed distance matrix containing distances for pairs of observations. See scipy's `squareform` function for details. weight_fun : ndarray Function to calculate pairwise weights for condensed distances. minWt : ndarray Total cumulative neighbour weight used to compute density distance for individual points. minPts : int Number of neighbours used to compute density distance. Returns ------- core_distance : ndarray Core distances for data points. """ (Y, _) = validate_y(Y, name="Y") n = sp_distance.num_obs_y(Y) core_dist = np.empty(n, dtype=Y.dtype) m = np.empty(n, dtype=Y.dtype) # store row distances minPts = n - 1 if minPts is None else minPts if weight_fun is None or minWt is None: for (i, mp) in np.broadcast(np.arange(n), minPts): others = np.flatnonzero(np.arange(n) != i) m[others] = Y[condensed_index(n, i, others)] m[i] = 0 m.sort() core_dist[i] = m[np.minimum(n - 1, mp)] else: w = np.empty(n, dtype=np.double) # store row weights for (i, mp, mw) in np.broadcast(np.arange(n), minPts, minWt): others = np.flatnonzero(np.arange(n) != i) m[others] = Y[condensed_index(n, i, others)] m[i] = 0 w[others] = weight_fun(i, others) w[i] = 0 sorting_indices = m.argsort() minPts = np.minimum(int(np.sum(w[sorting_indices].cumsum() < mw)), mp) core_dist[i] = m[sorting_indices[np.minimum(n - 1, minPts)]] return core_dist
def linkage(y, method): '''An extended edition of scipy.cluster.hierarchy.linkage allowing for custom set distance function. method can be str indicators as scipy linkage, and all be a callable having form f(dm, set1, set2) which computies set distance given distance matrix dm ''' if isinstance(method, str): return scipy.cluster.hierarcy.linkage(y, method=method) distance.is_valid_y(y, throw=True, name='y') d = distance.num_obs_y(y) Z = np.zeros((d-1,4)) dm = distance.squareform(y) dmo = distance.squareform(y) dm[np.diag_indices(d)] = np.NaN #print(dm) idmap = {i:i for i in range(d)} active = [i for i in range(d)] nodes = {i:{i} for i in range(d)} for i in range(d-1): m = d - i mink = np.nanargmin(dm[np.ix_(active,active)]) minh = active[mink//m] minw = active[mink % m] left = idmap[minh] right = idmap[minw] Z[i,0] = left Z[i,1] = right Z[i,2] = dm[minh,minw] Z[i,3] = len(nodes[left]) + len(nodes[right]) nid = d+i idmap[minh] = nid nodes[nid] = nodes[left] | nodes[right] del active[active.index(minw)] for j in active: if j == minh: continue dm[minh,j] = method(dmo, nodes[nid], nodes[idmap[j]]) dm[j,minh] = dm[minh,j] return Z
def core_distance(Y, weight_fun=None, minWt=None, minPts=None): """Compute core distance for data points, defined as the distance to the furtherest neighbour where the cumulative weight of closer points is less than minWt. Parameters ---------- Y : ndarray Condensed distance matrix containing distances for pairs of observations. See scipy's `squareform` function for details. weight_fun : ndarray Function to calculate pairwise weights for condensed distances. minWt : ndarray Total cumulative neighbour weight used to compute density distance for individual points. minPts : int Number of neighbours used to compute density distance. Returns ------- core_distance : ndarray Core distances for data points. """ (Y, _) = validate_y(Y, name="Y") n = sp_distance.num_obs_y(Y) core_dist = np.empty(n, dtype=Y.dtype) m = np.empty(n, dtype=Y.dtype) # store row distances minPts = n-1 if minPts is None else minPts if weight_fun is None or minWt is None: for (i, mp) in np.broadcast(np.arange(n), minPts): others = np.flatnonzero(np.arange(n)!=i) m[others] = Y[condensed_index(n, i, others)] m[i] = 0 m.sort() core_dist[i] = m[np.minimum(n-1, mp)] else: w = np.empty(n, dtype=np.double) # store row weights for (i, mp, mw) in np.broadcast(np.arange(n), minPts, minWt): others = np.flatnonzero(np.arange(n)!=i) m[others] = Y[condensed_index(n, i, others)] m[i] = 0 w[others] = weight_fun(i, others) w[i] = 0 sorting_indices = m.argsort() minPts = np.minimum(int(np.sum(w[sorting_indices].cumsum() < mw)), mp) core_dist[i] = m[sorting_indices[np.minimum(n-1, minPts)]] return core_dist
def reachability_order(Y, core_dist=None): """Traverse collection of nodes by choosing the closest unvisited node to a visited node at each step to produce a reachability plot. Parameters ---------- Y : ndarray Condensed distance matrix core_dist : ndarray Core distances for original observations of Y. Returns ------- o : ndarray 1-D array of indices of original observations in traversal order. d : ndarray 1-D array. `d[i]` is the `i`th traversal distance. """ Y = np.asanyarray(Y) n = sp_distance.num_obs_y(Y) if core_dist is not None: core_dist = np.asarray(core_dist) if core_dist.shape != (n, ): raise ValueError( "core_dist is not a 1-D array with compatible size to Y.") o = np.empty(n, dtype=np.intp) to_visit = np.ones(n, dtype=bool) closest = 0 o[0] = 0 to_visit[0] = False d = np.empty(n, dtype=Y.dtype) d[0] = 0 d[1:] = Y[condensed_index(n, 0, np.arange(1, n))] if core_dist is not None: d = np.maximum(d, core_dist[0]) for i in range(1, n): closest = np.flatnonzero(to_visit)[d[to_visit].argmin()] o[i] = closest to_visit[closest] = False m = Y[condensed_index(n, closest, np.flatnonzero(to_visit))] if core_dist is not None: m = np.maximum(m, core_dist[closest]) d[to_visit] = np.minimum(d[to_visit], m) return (o, d[o])
def reachability_order(Y, core_dist=None): """Traverse collection of nodes by choosing the closest unvisited node to a visited node at each step to produce a reachability plot. Parameters ---------- Y : ndarray Condensed distance matrix core_dist : ndarray Core distances for original observations of Y. Returns ------- o : ndarray 1-D array of indices of original observations in traversal order. d : ndarray 1-D array. `d[i]` is the `i`th traversal distance. """ Y = np.asanyarray(Y) n = sp_distance.num_obs_y(Y) if core_dist is not None: core_dist = np.asarray(core_dist) if core_dist.shape != (n,): raise ValueError("core_dist is not a 1-D array with compatible size to Y.") o = np.empty(n, dtype=np.intp) to_visit = np.ones(n, dtype=bool) closest = 0 o[0] = 0 to_visit[0] = False d = np.empty(n, dtype=Y.dtype) d[0] = 0 d[1:] = Y[condensed_index(n, 0, np.arange(1, n))] if core_dist is not None: d = np.maximum(d, core_dist[0]) for i in range(1, n): closest = np.flatnonzero(to_visit)[d[to_visit].argmin()] o[i] = closest to_visit[closest] = False m = Y[condensed_index(n, closest, np.flatnonzero(to_visit))] if core_dist is not None: m = np.maximum(m, core_dist[closest]) d[to_visit] = np.minimum(d[to_visit], m) return (o, d[o])
def cm_matrix_det_ns(vertices: np.ndarray) -> float: """This computes the Cayley-Megner Matrix determinant, and then normalises its sign based on what the normal simplex volume calculation would do. It does not normalise the the values as this calculation tends to break floating points for n > 102. The resulting value can be used to determine which of two n-dimensional simplicies is bigger, but little else.""" vertices = np.asarray(vertices, dtype=DTYPE) square_dists = distance.pdist(vertices, metric='sqeuclidean') number_of_vertices = distance.num_obs_y(square_dists) bordered_values = np.concatenate( (np.ones(number_of_vertices), square_dists)) distance_matrix = distance.squareform(bordered_values) det = np.linalg.det(distance_matrix) if vertices.size % 2 == 1: det = -det if det <= 0: raise ValueError('Degenerate or invalid simplex') return det
def setup_weights(distances, weights, max_weight=2.0, min_weight=1e-4): """\ Sets up condensed weights array. Parameters ---------- distances : array (n_samples*(n_samples-1),) Condensed distances. weights : None or str or callable or array Weights. """ if isinstance(weights, str): if weights == 'reciprocal': weights = 1.0 / distances else: sys.exit('weight type unknown') elif callable(weights): try: weights = weights(distance) except: weights = np.array([weights(dist) for dist in distances]) elif isinstance(weights, np.ndarray): if len(weights) == distance.num_obs_y(distances): assert np.min(weights) >= 0 weights = np.array(weights) weights = weights.T * weights weights = distance.squareform(weights, checks=False) assert distances.shape == weights.shape else: assert weights is None if weights is not None: if max_weight is not None: weights = np.minimum(weights, max_weight) if min_weight is not None: weights = np.maximum(weights, min_weight) return weights
def test_core_distance(): """ Y encodes distances for pairs: (0, 1) = 2.2 (0, 2) = 7.2 (0, 3) = 10.4 (0, 4) = 6.7 (1, 2) = 12.8 (1, 3) = 8.6 (1, 4) = 8.9 (2, 3) = 12.7 (2, 4) = 8.6 (3, 4) = 2.2 closest to furtherest distances 0 = 2.2, 6.7, 7.2, 10.4 1 = 2.2, 8.6, 8.9, 12.8 2 = 7.2, 8.6, 12.7, 12.8 3 = 2.2, 8.6, 10.4, 12.7 4 = 2.2, 6.7, 8.6, 8.9 """ Y = np.array([2.2, 7.2, 10.4, 6.7, 12.8, 8.6, 8.9, 12.7, 8.6, 2.2]) n = sp_distance.num_obs_y(Y) assert_true( equal_arrays(core_distance(Y, minPts=1), [2.2, 2.2, 7.2, 2.2, 2.2]), "returns nearest neighbour distance with minPts=1") assert_true( equal_arrays( core_distance(Y, weight_fun=lambda _i, _j: 1, minWt=[1] * n), [2.2, 2.2, 7.2, 2.2, 2.2]), "returns nearest neighbour distance with unit weights and minWts") assert_true( equal_arrays(core_distance(Y, minPts=2), [6.7, 8.6, 8.6, 8.6, 6.7]), "returns 2-nearest neighbour distance with minPts=2") assert_true( equal_arrays(core_distance(Y, minPts=4), [10.4, 12.8, 12.8, 12.7, 8.9]), "returns distance to 4-nearest neighbour with minPts=4") assert_true( equal_arrays( core_distance(Y, weight_fun=lambda _i, _j: 1, minWt=[4] * n), [10.4, 12.8, 12.8, 12.7, 8.9]), "returns distance to 4-nearest neighbour distance with unit " "weights and minWts=4") """ Y encodes weighted distances for pairs: (0, 1) = 17.7 (0, 2) = 70.0 (0, 3) = 97.1 (0, 4) = 50.8 (1, 2) = 121.6 (1, 3) = 79.4 (1, 4) = 82.1 (2, 3) = 120.9 (2, 4) = 77.3 (3, 4) = 14.4 w encodes pairwise weights: (0, 1) = 4 (0, 2) = 8 (0, 3) = 6 (0, 4) = 10 (1, 2) = 6 (1, 3) = 6 (1, 4) = 10 (2, 3) = 12 (2, 4) = 20 (3, 4) = 15 cumulative weights 0 = 4, 14, 22, 28 1 = 4, 10, 20, 26 2 = 8, 28, 36, 42 3 = 15, 21, 27, 39 4 = 15, 25, 45, 55 closest to furtherest distances 0 = 17.7, 50.8, 70.0, 97.1 1 = 17.7, 79.4, 82.1, 121.6 2 = 70.0, 77.3, 120.9, 121.6 3 = 14.4, 79.4, 97.1, 120.9 4 = 14.4, 50.8, 77.3, 82.1 """ Y = np.array([17.7, 70., 97.1, 50.8, 121.6, 79.4, 82.1, 120.9, 77.3, 14.4]) w = np.array([4, 8, 6, 10, 6, 6, 10, 12, 20, 15]) n = sp_distance.num_obs_y(Y) assert_true( equal_arrays( core_distance(Y, weight_fun=lambda i, j: w[condensed_index(n, i, j)], minWt=[20] * n), [70.0, 82.1, 77.3, 79.4, 50.8]) and equal_arrays( core_distance(Y, weight_fun=lambda i, j: w[condensed_index(n, i, j)], minWt=[30] * n), [97.1, 121.6, 120.9, 120.9, 77.3]), "computes weighted core distances at various limits")
def assert_num_obs(n, y): if n != sp_distance.num_obs_y(y): raise SavedDistancesInvalidNumberException( "Saved distances for different number of observations")
def assert_num_obs(n, y): if n != sp_distance.num_obs_y(y): raise SavedDistancesInvalidNumberException("Saved distances for different number of observations")
def linkage(D, method='single', metric='euclidean', preserve_input=True): '''Hierarchical (agglomerative) clustering on a dissimilarity matrix or on Euclidean data. The argument D is either a compressed distance matrix or a collection of m observation vectors in n dimensions as an (m×n) NumPy array. Apart from the argument preserve_input, the methods have the same input parameters and output format as the functions of the same name in the package scipy.cluster.hierarchy. Therefore, the documentation is not duplicated here. Please refer to the SciPy documentation for further details. The additional, optional argument preserve_input specifies whether the fastcluster package first copies the distance matrix or writes into the existing array. If the distance matrix is only generated for the clustering step and is not needed afterwards, half the memory can be saved by specifying preserve_input=False. Note that the input array D contains unspecified values after this procedure. It is therefore safer to write linkage(D, method="…", preserve_distance=False) del D to make sure the matrix D is not accidentally used after it has been used as scratch memory. The single linkage algorithm does not write to the distance matrix or its copy anyway, so the preserve_distance flag has no effect in this case.''' if not isinstance(D, ndarray): raise ValueError('The first argument must be of type numpy.ndarray.') if len(D.shape)==1: if method=='single': assert D.dtype==double D_ = require(D, dtype=double, requirements=['C']) if D_ is not D: stderr.write('The condensed distance matrix had to be copied since it has the following flags:\n') stderr.write(str(D.flags) + '\n') elif preserve_input: D_ = D.copy() assert D_.dtype == double assert D_.flags.c_contiguous assert D_.flags.owndata assert D_.flags.writeable assert D_.flags.aligned else: assert D.dtype==double D_ = require(D, dtype=double, requirements=['C', 'A', 'W', 'O']) if D_ is not D: stderr.write('The condensed distance matrix had to be copied since it has the following flags:\n') stderr.write(str(D.flags) + '\n') is_valid_y(D_, throw=True) N = num_obs_y(D_) Z = empty((N-1,4)) if N > 1: linkage_wrap(N, D_, Z, mthidx[method]) return Z else: assert len(D.shape)==2 N = D.shape[0] Z = empty((N-1,4)) D_ = pdist(D, metric) assert D_.dtype == double assert D_.flags.c_contiguous assert D_.flags.owndata assert D_.flags.writeable assert D_.flags.aligned if N > 1: linkage_wrap(N, D_, Z, mthidx[method]) return Z
""" import functools import numpy as np from scipy.spatial.distance import squareform, num_obs_y def condensed_to_square_index(n, c): # converts an index in a condensed array to the # pair of observations it represents # modified from here: http://stackoverflow.com/questions/5323818/condensed-matrix-function-to-find-pairs ti = np.triu_indices(n, 1) return ti[0][c], ti[1][c] num_obs = lambda dist: dist.shape[0] if dist.ndim == 2 else num_obs_y(dist) args_min_dist = lambda dist: condensed_to_square_index(num_obs(dist), _condensed_dist(dist).argmin()) _square_dist = lambda dist: dist if dist.ndim == 2 else squareform(dist) _condensed_dist = lambda dist: dist if dist.ndim == 1 else squareform(dist) _get = lambda dist, i, j: dist[i][j] if dist.ndim == 2 else squareform(dist)[i][j] min_except_zero = lambda lst: functools.reduce(lambda res, x: res if x == 0 else min(res, x), lst, lst[0]) class _Edge: def __init__(self, u, v, weight): self.u = u