Ejemplo n.º 1
0
def circumsphere(vertices):
    '''
    @param vertices: an array of simplices
    @returns: radius of circumsphere, coordinates of circumsphere
    @rtype: tuple
    '''

    # based on https://westy31.home.xs4all.nl/Circumsphere/ncircumsphere.htm and
    # https://codereview.stackexchange.com/questions/77593/calculating-the-volume-of-a-tetrahedron

    from scipy.spatial import distance

    # distances
    squared_dists = distance.pdist(vertices, metric='sqeuclidean')
    n = distance.num_obs_y(squared_dists)

    # add ones and a zero to make a Cayley-Menger matrix.
    squared_dists_mat = distance.squareform(squared_dists)
    with_border = np.insert(np.insert(squared_dists_mat, 0, values=1, axis=1),
                            0,
                            values=1,
                            axis=0)
    np.fill_diagonal(with_border, 0)

    #the first diagonal element of its inverse holds -2*r*r
    #and first row / first column hold barycentric coordinates of the sphere
    inv = np.linalg.inv(with_border)
    r = math.sqrt(inv[0][0] / -2)
    barycentric_coodinates = inv[1:, 0]
    return r, bary2cart(vertices, barycentric_coodinates)
Ejemplo n.º 2
0
    def wrapper(distances, *, tol=0.):
        if tol < 0:
            raise ValueError('tol must be non-negative.')

        distances = _np.asarray(distances, dtype=float)

        # force the array to be condensed
        if distances.ndim > 1:
            distances = _dist.squareform(distances)

        # the number of observations in the distances
        n = _dist.num_obs_y(distances)

        i, j, k = _indexing.triple_indices(n).T

        # pre-allocated memory
        edgelengths = _np.empty((n, 3))

        condensed_i_j = _indexing.condensed_indices(n, i, j)
        condensed_i_k = _indexing.condensed_indices(n, i, k)
        condensed_k_j = _indexing.condensed_indices(n, k, j)

        edgelengths[:,0] = distances[condensed_i_j]
        edgelengths[:,1] = distances[condensed_i_k]
        edgelengths[:,2] = distances[condensed_k_j]
        edgelengths.sort(axis=1)
        return _np.all(verifier(edgelengths, tol))
Ejemplo n.º 3
0
def are_coplanar(markers) -> bool:
    """Checks if given marker positions are co-planar.
    
    :param markers: list of markers, each an array with single x,y,z coordinates.
    :type markers: list
    :return: Whether the markers are co-planar or not.
    :rtype: bool
    """
    # Less than 4 markers are co-planar (if not superimposed, but this is practically not possible with markers).
    if len(markers) > 4:
        return True

    # Calculate the volume of the tetrahedron formed by the 4 markers.
    # If this volume is zero, then they must be coplanar.

    # β_ij = |v_i - v_k|²
    markers = np.asarray(markers, dtype=float)
    sq_distance = distance.pdist(markers, metric='sqeuclidean')

    # Add border.
    n_vertices = distance.num_obs_y(sq_distance)
    bordered = np.concatenate((np.ones(n_vertices), sq_distance))

    # Make matrix and find volume.
    sq_distance_matrix = distance.squareform(bordered)

    coeff = -(-2)**(n_vertices - 1) * factorial(n_vertices - 1)**2
    volume_squared = np.linalg.det(sq_distance_matrix) / coeff

    if volume_squared <= 0:
        return True
    #print("Volume formed by markers:", np.sqrt(volume_squared))

    return False
Ejemplo n.º 4
0
def simplex_volume(*, vertices=None, sides=None) -> float:
    """
    Return the volume of the simplex with given vertices or sides.

    If vertices are given they must be in a NumPy array with shape (N+1, N):
    the position vectors of the N+1 vertices in N dimensions. If the sides
    are given, they must be the compressed pairwise distance matrix as
    returned from scipy.spatial.distance.pdist.

    Raises a ValueError if the vertices do not form a simplex (for example,
    because they are coplanar, colinear or coincident).

    Warning: this algorithm has not been tested for numerical stability.
    """

    # Implements http://mathworld.wolfram.com/Cayley-MengerDeterminant.html

    if (vertices is None) == (sides is None):
        raise ValueError("Exactly one of vertices and sides must be given")

    # β_ij = |v_i - v_k|²
    if sides is None:
        vertices = np.asarray(vertices, dtype=float)
        sq_dists = distance.pdist(vertices, metric='sqeuclidean')

    else:
        sides = np.asarray(sides, dtype=float)
        if not distance.is_valid_y(sides):
            raise ValueError("Invalid number or type of side lengths")

        sq_dists = sides**2

    # Add border while compressed
    num_verts = distance.num_obs_y(sq_dists)
    bordered = np.concatenate((np.ones(num_verts), sq_dists))

    # Make matrix and find volume
    sq_dists_mat = distance.squareform(bordered)

    coeff = -(-2)**(num_verts - 1) * factorial(num_verts - 1)**2
    vol_square = np.linalg.det(sq_dists_mat) / coeff

    if vol_square <= 0:
        raise ValueError('Provided vertices do not form a tetrahedron')

    return np.sqrt(vol_square)
Ejemplo n.º 5
0
def core_distance(Y, weight_fun=None, minWt=None, minPts=None):
    """Compute core distance for data points, defined as the distance to the furtherest
    neighbour where the cumulative weight of closer points is less than minWt.

    Parameters
    ----------
    Y : ndarray
        Condensed distance matrix containing distances for pairs of
        observations. See scipy's `squareform` function for details.
    weight_fun : ndarray
        Function to calculate pairwise weights for condensed distances.
    minWt : ndarray
        Total cumulative neighbour weight used to compute density distance for individual points.
    minPts : int
        Number of neighbours used to compute density distance.

    Returns
    -------
    core_distance : ndarray
        Core distances for data points.
    """
    (Y, _) = validate_y(Y, name="Y")
    n = sp_distance.num_obs_y(Y)
    core_dist = np.empty(n, dtype=Y.dtype)
    m = np.empty(n, dtype=Y.dtype)  # store row distances
    minPts = n - 1 if minPts is None else minPts
    if weight_fun is None or minWt is None:
        for (i, mp) in np.broadcast(np.arange(n), minPts):
            others = np.flatnonzero(np.arange(n) != i)
            m[others] = Y[condensed_index(n, i, others)]
            m[i] = 0
            m.sort()
            core_dist[i] = m[np.minimum(n - 1, mp)]
    else:
        w = np.empty(n, dtype=np.double)  # store row weights
        for (i, mp, mw) in np.broadcast(np.arange(n), minPts, minWt):
            others = np.flatnonzero(np.arange(n) != i)
            m[others] = Y[condensed_index(n, i, others)]
            m[i] = 0
            w[others] = weight_fun(i, others)
            w[i] = 0
            sorting_indices = m.argsort()
            minPts = np.minimum(int(np.sum(w[sorting_indices].cumsum() < mw)),
                                mp)
            core_dist[i] = m[sorting_indices[np.minimum(n - 1, minPts)]]
    return core_dist
Ejemplo n.º 6
0
def linkage(y, method):
    '''An extended edition of scipy.cluster.hierarchy.linkage allowing for custom set distance function.
        method can be str indicators as scipy linkage,
        and all be a callable having form f(dm, set1, set2) which computies set distance given distance
        matrix dm
    '''

    if isinstance(method, str):
        return scipy.cluster.hierarcy.linkage(y, method=method)

    distance.is_valid_y(y, throw=True, name='y')
    d = distance.num_obs_y(y)

    Z = np.zeros((d-1,4))
    dm = distance.squareform(y)
    dmo = distance.squareform(y)
    
    dm[np.diag_indices(d)] = np.NaN
    #print(dm)
    idmap = {i:i for i in range(d)}
    active = [i for i in range(d)]
    nodes = {i:{i} for i in range(d)}
    for i in range(d-1):
        m = d - i
        mink = np.nanargmin(dm[np.ix_(active,active)])
        minh = active[mink//m]
        minw = active[mink % m]
        left = idmap[minh]
        right = idmap[minw]
        
        Z[i,0] = left
        Z[i,1] = right
        Z[i,2] = dm[minh,minw]
        Z[i,3] = len(nodes[left]) + len(nodes[right])
        nid = d+i
        
        idmap[minh] = nid
        nodes[nid] = nodes[left] | nodes[right]
        del active[active.index(minw)]
        
        for j in active:
            if j == minh:
                continue
            dm[minh,j] = method(dmo, nodes[nid], nodes[idmap[j]])
            dm[j,minh] = dm[minh,j]
    return Z            
Ejemplo n.º 7
0
def core_distance(Y, weight_fun=None, minWt=None, minPts=None):
    """Compute core distance for data points, defined as the distance to the furtherest
    neighbour where the cumulative weight of closer points is less than minWt.

    Parameters
    ----------
    Y : ndarray
        Condensed distance matrix containing distances for pairs of
        observations. See scipy's `squareform` function for details.
    weight_fun : ndarray
        Function to calculate pairwise weights for condensed distances.
    minWt : ndarray
        Total cumulative neighbour weight used to compute density distance for individual points.
    minPts : int
        Number of neighbours used to compute density distance.
        
    Returns
    -------
    core_distance : ndarray
        Core distances for data points.
    """
    (Y, _) = validate_y(Y, name="Y")
    n = sp_distance.num_obs_y(Y)
    core_dist = np.empty(n, dtype=Y.dtype)
    m = np.empty(n, dtype=Y.dtype) # store row distances
    minPts = n-1 if minPts is None else minPts
    if weight_fun is None or minWt is None:
        for (i, mp) in np.broadcast(np.arange(n), minPts):
            others = np.flatnonzero(np.arange(n)!=i)
            m[others] = Y[condensed_index(n, i, others)]
            m[i] = 0
            m.sort()
            core_dist[i] = m[np.minimum(n-1, mp)]
    else:
        w = np.empty(n, dtype=np.double) # store row weights
        for (i, mp, mw) in np.broadcast(np.arange(n), minPts, minWt):
            others = np.flatnonzero(np.arange(n)!=i)
            m[others] = Y[condensed_index(n, i, others)]
            m[i] = 0
            w[others] = weight_fun(i, others)
            w[i] = 0
            sorting_indices = m.argsort()
            minPts = np.minimum(int(np.sum(w[sorting_indices].cumsum() < mw)), mp)
            core_dist[i] = m[sorting_indices[np.minimum(n-1, minPts)]]
    return core_dist
Ejemplo n.º 8
0
def reachability_order(Y, core_dist=None):
    """Traverse collection of nodes by choosing the closest unvisited node to
    a visited node at each step to produce a reachability plot.

    Parameters
    ----------
    Y : ndarray
        Condensed distance matrix
    core_dist : ndarray
        Core distances for original observations of Y.

    Returns
    -------
    o : ndarray
        1-D array of indices of original observations in traversal order.
    d : ndarray
        1-D array. `d[i]` is the `i`th traversal distance.
    """
    Y = np.asanyarray(Y)
    n = sp_distance.num_obs_y(Y)
    if core_dist is not None:
        core_dist = np.asarray(core_dist)
        if core_dist.shape != (n, ):
            raise ValueError(
                "core_dist is not a 1-D array with compatible size to Y.")
    o = np.empty(n, dtype=np.intp)
    to_visit = np.ones(n, dtype=bool)
    closest = 0
    o[0] = 0
    to_visit[0] = False
    d = np.empty(n, dtype=Y.dtype)
    d[0] = 0
    d[1:] = Y[condensed_index(n, 0, np.arange(1, n))]
    if core_dist is not None:
        d = np.maximum(d, core_dist[0])
    for i in range(1, n):
        closest = np.flatnonzero(to_visit)[d[to_visit].argmin()]
        o[i] = closest
        to_visit[closest] = False
        m = Y[condensed_index(n, closest, np.flatnonzero(to_visit))]
        if core_dist is not None:
            m = np.maximum(m, core_dist[closest])
        d[to_visit] = np.minimum(d[to_visit], m)
    return (o, d[o])
Ejemplo n.º 9
0
def reachability_order(Y, core_dist=None):
    """Traverse collection of nodes by choosing the closest unvisited node to
    a visited node at each step to produce a reachability plot.
    
    Parameters
    ----------
    Y : ndarray
        Condensed distance matrix
    core_dist : ndarray
        Core distances for original observations of Y.
        
    Returns
    -------
    o : ndarray
        1-D array of indices of original observations in traversal order.
    d : ndarray
        1-D array. `d[i]` is the `i`th traversal distance.
    """
    Y = np.asanyarray(Y)
    n = sp_distance.num_obs_y(Y)
    if core_dist is not None:
        core_dist = np.asarray(core_dist)
        if core_dist.shape != (n,):
            raise ValueError("core_dist is not a 1-D array with compatible size to Y.")
    o = np.empty(n, dtype=np.intp)
    to_visit = np.ones(n, dtype=bool)
    closest = 0
    o[0] = 0
    to_visit[0] = False
    d = np.empty(n, dtype=Y.dtype)
    d[0] = 0
    d[1:] = Y[condensed_index(n, 0, np.arange(1, n))]
    if core_dist is not None:
        d = np.maximum(d, core_dist[0])
    for i in range(1, n):
        closest = np.flatnonzero(to_visit)[d[to_visit].argmin()]
        o[i] = closest
        to_visit[closest] = False
        m = Y[condensed_index(n, closest, np.flatnonzero(to_visit))]
        if core_dist is not None:
            m = np.maximum(m, core_dist[closest])
        d[to_visit] = np.minimum(d[to_visit], m)
    return (o, d[o])
Ejemplo n.º 10
0
def cm_matrix_det_ns(vertices: np.ndarray) -> float:
    """This computes the Cayley-Megner Matrix determinant, and then
    normalises its sign based on what the normal simplex volume
    calculation would do. It does not normalise the the values as
    this calculation tends to break floating points for n > 102.
    The resulting value can be used to determine which of two
    n-dimensional simplicies is bigger, but little else."""
    vertices = np.asarray(vertices, dtype=DTYPE)
    square_dists = distance.pdist(vertices, metric='sqeuclidean')
    number_of_vertices = distance.num_obs_y(square_dists)
    bordered_values = np.concatenate(
        (np.ones(number_of_vertices), square_dists))
    distance_matrix = distance.squareform(bordered_values)
    det = np.linalg.det(distance_matrix)
    if vertices.size % 2 == 1:
        det = -det
    if det <= 0:
        raise ValueError('Degenerate or invalid simplex')
    return det
Ejemplo n.º 11
0
def setup_weights(distances, weights, max_weight=2.0, min_weight=1e-4):
    """\
    Sets up condensed weights array.

    Parameters
    ----------

    distances : array (n_samples*(n_samples-1),)
    Condensed distances.

    weights : None or str or callable or array
    Weights.
    """
    if isinstance(weights, str):
        if weights == 'reciprocal':
            weights = 1.0 / distances
        else:
            sys.exit('weight type unknown')
    elif callable(weights):
        try:
            weights = weights(distance)
        except:
            weights = np.array([weights(dist) for dist in distances])
    elif isinstance(weights, np.ndarray):
        if len(weights) == distance.num_obs_y(distances):
            assert np.min(weights) >= 0
            weights = np.array(weights)
            weights = weights.T * weights
            weights = distance.squareform(weights, checks=False)
        assert distances.shape == weights.shape
    else:
        assert weights is None

    if weights is not None:
        if max_weight is not None:
            weights = np.minimum(weights, max_weight)
        if min_weight is not None:
            weights = np.maximum(weights, min_weight)

    return weights
Ejemplo n.º 12
0
def test_core_distance():
    """
    Y encodes distances for pairs:
    (0, 1) =  2.2
    (0, 2) =  7.2
    (0, 3) = 10.4
    (0, 4) =  6.7
    (1, 2) = 12.8
    (1, 3) =  8.6
    (1, 4) =  8.9
    (2, 3) = 12.7
    (2, 4) =  8.6
    (3, 4) =  2.2
    
    closest to furtherest distances
    0 = 2.2, 6.7,  7.2, 10.4
    1 = 2.2, 8.6,  8.9, 12.8
    2 = 7.2, 8.6, 12.7, 12.8
    3 = 2.2, 8.6, 10.4, 12.7
    4 = 2.2, 6.7,  8.6,  8.9
    """
    Y = np.array([2.2, 7.2, 10.4, 6.7, 12.8, 8.6, 8.9, 12.7, 8.6, 2.2])
    n = sp_distance.num_obs_y(Y)

    assert_true(
        equal_arrays(core_distance(Y, minPts=1), [2.2, 2.2, 7.2, 2.2, 2.2]),
        "returns nearest neighbour distance with minPts=1")
    assert_true(
        equal_arrays(
            core_distance(Y, weight_fun=lambda _i, _j: 1, minWt=[1] * n),
            [2.2, 2.2, 7.2, 2.2, 2.2]),
        "returns nearest neighbour distance with unit weights and minWts")

    assert_true(
        equal_arrays(core_distance(Y, minPts=2), [6.7, 8.6, 8.6, 8.6, 6.7]),
        "returns 2-nearest neighbour distance with minPts=2")

    assert_true(
        equal_arrays(core_distance(Y, minPts=4),
                     [10.4, 12.8, 12.8, 12.7, 8.9]),
        "returns distance to 4-nearest neighbour with minPts=4")
    assert_true(
        equal_arrays(
            core_distance(Y, weight_fun=lambda _i, _j: 1, minWt=[4] * n),
            [10.4, 12.8, 12.8, 12.7, 8.9]),
        "returns distance to 4-nearest neighbour distance with unit "
        "weights and minWts=4")
    """
    Y encodes weighted distances for pairs:
    (0, 1) =  17.7
    (0, 2) =  70.0
    (0, 3) =  97.1
    (0, 4) =  50.8
    (1, 2) = 121.6
    (1, 3) =  79.4
    (1, 4) =  82.1
    (2, 3) = 120.9
    (2, 4) =  77.3
    (3, 4) =  14.4
    
    w encodes pairwise weights:
    (0, 1) =  4
    (0, 2) =  8
    (0, 3) =  6
    (0, 4) = 10
    (1, 2) =  6
    (1, 3) =  6
    (1, 4) = 10
    (2, 3) = 12
    (2, 4) = 20
    (3, 4) = 15
    
    cumulative weights
    0 =  4, 14, 22, 28
    1 =  4, 10, 20, 26
    2 =  8, 28, 36, 42
    3 = 15, 21, 27, 39
    4 = 15, 25, 45, 55
    
    closest to furtherest distances
    0 = 17.7, 50.8,  70.0,  97.1
    1 = 17.7, 79.4,  82.1, 121.6
    2 = 70.0, 77.3, 120.9, 121.6
    3 = 14.4, 79.4,  97.1, 120.9
    4 = 14.4, 50.8,  77.3,  82.1  
    """
    Y = np.array([17.7, 70., 97.1, 50.8, 121.6, 79.4, 82.1, 120.9, 77.3, 14.4])
    w = np.array([4, 8, 6, 10, 6, 6, 10, 12, 20, 15])
    n = sp_distance.num_obs_y(Y)

    assert_true(
        equal_arrays(
            core_distance(Y,
                          weight_fun=lambda i, j: w[condensed_index(n, i, j)],
                          minWt=[20] * n), [70.0, 82.1, 77.3, 79.4, 50.8])
        and equal_arrays(
            core_distance(Y,
                          weight_fun=lambda i, j: w[condensed_index(n, i, j)],
                          minWt=[30] * n), [97.1, 121.6, 120.9, 120.9, 77.3]),
        "computes weighted core distances at various limits")
Ejemplo n.º 13
0
def assert_num_obs(n, y):
    if n != sp_distance.num_obs_y(y):
        raise SavedDistancesInvalidNumberException(
            "Saved distances for different number of observations")
Ejemplo n.º 14
0
def assert_num_obs(n, y):
    if n != sp_distance.num_obs_y(y):
        raise SavedDistancesInvalidNumberException("Saved distances for different number of observations")
Ejemplo n.º 15
0
def linkage(D, method='single', metric='euclidean', preserve_input=True):
    '''Hierarchical (agglomerative) clustering on a dissimilarity matrix or on
Euclidean data.

The argument D is either a compressed distance matrix or a collection
of m observation vectors in n dimensions as an (m×n) NumPy array. Apart
from the argument preserve_input, the methods have the same input
parameters and output format as the functions of the same name in the
package scipy.cluster.hierarchy. Therefore, the documentation is not
duplicated here. Please refer to the SciPy documentation for further
details.

The additional, optional argument preserve_input specifies whether the
fastcluster package first copies the distance matrix or writes into
the existing array. If the distance matrix is only generated for the
clustering step and is not needed afterwards, half the memory can be
saved by specifying preserve_input=False.

Note that the input array D contains unspecified values after this
procedure. It is therefore safer to write

    linkage(D, method="…", preserve_distance=False)
    del D

to make sure the matrix D is not accidentally used after it has been
used as scratch memory.

The single linkage algorithm does not write to the distance matrix or
its copy anyway, so the preserve_distance flag has no effect in this
case.'''
    if not isinstance(D, ndarray):
        raise ValueError('The first argument must be of type numpy.ndarray.')
    if len(D.shape)==1:
        if method=='single':
            assert D.dtype==double
            D_ = require(D, dtype=double, requirements=['C'])
            if D_ is not D:
                stderr.write('The condensed distance matrix had to be copied since it has the following flags:\n')
                stderr.write(str(D.flags) + '\n')
        elif preserve_input:
            D_ = D.copy()
            assert D_.dtype == double
            assert D_.flags.c_contiguous
            assert D_.flags.owndata
            assert D_.flags.writeable
            assert D_.flags.aligned
        else:
            assert D.dtype==double
            D_ = require(D, dtype=double, requirements=['C', 'A', 'W', 'O'])
            if D_ is not D:
                stderr.write('The condensed distance matrix had to be copied since it has the following flags:\n')
                stderr.write(str(D.flags) + '\n')

        is_valid_y(D_, throw=True)

        N = num_obs_y(D_)
        Z = empty((N-1,4))
        if N > 1:
            linkage_wrap(N, D_, Z, mthidx[method])
        return Z
    else:
        assert len(D.shape)==2
        N = D.shape[0]
        Z = empty((N-1,4))
        D_ = pdist(D, metric)
        assert D_.dtype == double
        assert D_.flags.c_contiguous
        assert D_.flags.owndata
        assert D_.flags.writeable
        assert D_.flags.aligned
        if N > 1:
            linkage_wrap(N, D_, Z, mthidx[method])
        return Z
Ejemplo n.º 16
0
"""

import functools
import numpy as np
from scipy.spatial.distance import squareform, num_obs_y


def condensed_to_square_index(n, c):
    # converts an index in a condensed array to the
    # pair of observations it represents
    # modified from here: http://stackoverflow.com/questions/5323818/condensed-matrix-function-to-find-pairs
    ti = np.triu_indices(n, 1)
    return ti[0][c], ti[1][c]


num_obs = lambda dist: dist.shape[0] if dist.ndim == 2 else num_obs_y(dist)

args_min_dist = lambda dist: condensed_to_square_index(num_obs(dist), _condensed_dist(dist).argmin())

_square_dist = lambda dist: dist if dist.ndim == 2 else squareform(dist)

_condensed_dist = lambda dist: dist if dist.ndim == 1 else squareform(dist)

_get = lambda dist, i, j: dist[i][j] if dist.ndim == 2 else squareform(dist)[i][j]

min_except_zero = lambda lst: functools.reduce(lambda res, x: res if x == 0 else min(res, x), lst, lst[0])


class _Edge:
    def __init__(self, u, v, weight):
        self.u = u