Ejemplo n.º 1
0
def nearest_neighbor_connections(distances: numpy.ndarray,
                                 k: int = 1,
                                 symmetric: bool = True) -> Connections:
    """
    connect each point to it's k nearest neighbors

    individual points can be connected to more than k points because of ties and because a pair are not necessarily
    each other's nearest neighbors

    :param distances: an n x n matrix containing the distances among a set of points
    :param k: the number of nearest neighbors to connect
    :param symmetric: should connections always be symmetric (defualt = True) or allow asymmetric connections because
                      the nearest neighbor of one point A does not necessarily have A as it's nearest neighbor
    :return: returns a Connections object
    """
    n = check_for_square_matrix(distances)
    output = Connections(n, symmetric)
    for i in range(n):
        dists = []
        for j in range(n):
            if j != i:
                dists.append([distances[i, j], j])
        dists.sort()
        c = k
        while dists[c][0] == dists[c + 1][0]:  # this accounts for ties
            c += 1
        for p in range(c):  # connect the c closest points to the ith point
            output.store(i, dists[p][1])
    return output
Ejemplo n.º 2
0
def minimum_spanning_tree(distances: numpy.ndarray) -> Connections:
    """
    calculate connections among points based on a minimum spanning tree

    Although I invented this algorithm myself, it sort of follows the suggestion made in Kruskal, Joseph B., Jr. 1956.
    On the shortest spanning subtree of a graph and the traveling salesman problem.  Proceedings of the
    American Mathematical Society 7(1):48-50.

    :param distances: an n x n matrix containing distances among points
    :return: returns a Connections object
    """
    n = check_for_square_matrix(distances)
    output = Connections(n)
    used = [i for i in range(n)]
    cnt = 1
    while cnt < n:
        new_point = cnt
        old_point = 0
        for i in range(cnt):
            for j in range(cnt, n):
                if distances[used[i], used[j]] < distances[used[old_point],
                                                           used[new_point]]:
                    old_point, new_point = i, j
        # make connection
        output.store(used[old_point], used[new_point])
        used[cnt], used[new_point] = used[new_point], used[
            cnt]  # swap out a used point with an unused point
        cnt += 1
    return output
Ejemplo n.º 3
0
def morans_i(y: numpy.ndarray,
             weights: Connections,
             alt_weights: Optional[numpy.ndarray] = None,
             variance: Optional[str] = "random",
             permutations: int = 0):
    check_variance_assumption(variance)
    n = len(y)
    mean_y = numpy.mean(y)
    dev_y = y - mean_y  # deviations from mean
    w = weights.as_binary()
    if alt_weights is not None:  # multiply to create non-binary weights, if necessary
        w = w * alt_weights
    sumy2 = numpy.sum(
        numpy.square(dev_y),
        dtype=numpy.float64)  # sum of squared deviations from mean
    sumw = numpy.sum(w, dtype=numpy.float64)  # sum of weight matrix
    sumw2 = sumw**2
    expected = -1 / (n - 1)
    sumyij = numpy.sum(numpy.outer(dev_y, dev_y) * w, dtype=numpy.float64)
    moran = n * sumyij / (sumw * sumy2)

    # permutations
    permuted_i_list = [moran]
    perm_p = 1
    if permutations > 0:
        rand_y = numpy.copy(dev_y)
        for k in range(permutations - 1):
            numpy.random.shuffle(rand_y)
            perm_sumyij = numpy.sum(numpy.outer(rand_y, rand_y) * w,
                                    dtype=numpy.float64)
            perm_moran = n * perm_sumyij / (sumw * sumy2)
            permuted_i_list.append(perm_moran)
            if abs(perm_moran - expected) >= abs(moran - expected):
                perm_p += 1
        perm_p /= permutations

    if variance is None:
        sd, z, p = None, None, None
    else:
        s1 = numpy.sum(numpy.square(w + numpy.transpose(w)),
                       dtype=numpy.float64) / 2
        s2 = numpy.sum(
            numpy.square(numpy.sum(w, axis=0) + numpy.sum(w, axis=1)),
            dtype=numpy.float64)
        if variance == "normal":
            v = ((n**2 * s1) - n * s2 + 3 * sumw2) / ((n**2 - 1) * sumw2)
        else:  # random
            b2 = n * numpy.sum(numpy.power(dev_y, 4),
                               dtype=numpy.float64) / (sumy2**2)
            v = ((n * ((n**2 - 3 * n + 3) * s1 - n * s2 + 3 * sumw2) - b2 *
                  ((n**2 - n) * s1 - 2 * n * s2 + 6 * sumw2)) /
                 ((n - 1) * (n - 2) * (n - 3) * sumw2)) - 1 / (n - 1)**2
        sd = sqrt(v)  # convert to standard dev
        z = abs(moran - expected) / sd
        p = scipy.stats.norm.sf(z) * 2  # two-tailed test

    return weights.min_scale, weights.max_scale, weights.n_pairs(
    ), expected, moran, sd, z, p, perm_p, permuted_i_list
Ejemplo n.º 4
0
def relative_neighborhood_network(distances: numpy.ndarray) -> Connections:
    """
    calculate connections among points based on a relative neighborhood network

    :param distances: an n x n matrix containing distances among points
    :return: returns a Connections object
    """
    n = check_for_square_matrix(distances)
    output = Connections(n)
    for i in range(n):
        for j in range(i):
            good = True
            for k in range(n):
                if (k != i) and (k != j):
                    if (distances[k, j] < distances[i, j]) and (
                            distances[k, i] < distances[i, j]):
                        good = False
            if good:
                output.store(i, j)
    return output
Ejemplo n.º 5
0
def mantel_correl(y: numpy.ndarray,
                  weights: Connections,
                  alt_weights: Optional[numpy.ndarray] = None,
                  variance=None,
                  permutations: int = 0):
    """
    in order to get the bearing version to work right, we have to use normal binary weights, then reverse the sign
    of the resulting Mantel correlation. if we use reverse binary weighting we end up multiplying the 'out of
    class' weights of 1 versus the angles, which is not what we want to test

    the variance variable is not needed except as a placeholder so the function fits with the Moran's I and
    Geary's c requirements
    """
    w = weights.as_binary()
    if alt_weights is not None:  # multiply to create non-binary weights, if necessary
        w *= alt_weights
    r, p_value, _, _, _, permuted_two_p, permuted_rs, z = pyssage.mantel.mantel(
        y, w, [], permutations=permutations)
    return weights.min_scale, weights.max_scale, weights.n_pairs(
    ), 0, -r, -z, p_value, permuted_two_p, permuted_rs
Ejemplo n.º 6
0
def gabriel_network(distances: numpy.ndarray) -> Connections:
    """
    calculate connections among points based on a Gabriel network

    :param distances: an n x n matrix containing distances among points
    :return: returns a Connections object
    """
    n = check_for_square_matrix(distances)
    output = Connections(n)
    sq_distances = numpy.square(distances)
    for i in range(n):
        for j in range(i):
            good = True
            for k in range(n):
                if (k != i) and (k != j):
                    if sq_distances[
                            i, j] > sq_distances[k, j] + sq_distances[k, i]:
                        good = False
            if good:
                output.store(i, j)
    return output
Ejemplo n.º 7
0
def delaunay_connections(triangle_list: list, point_list: list) -> Connections:
    """
    given a pre-determined list of triangles and points representing the triangle vertices, creates
    connections for all triangles

    this is not meant to be used as an independent algorithm; it is a piece of delaunay_tessellation()

    :param triangle_list: a list containing the trianngles identified by a previous function
    :param point_list: a list containing points assembled by a previous function
    :return: returns a Connection object
    """
    n = len(point_list)
    output = Connections(n)
    for triangle in triangle_list:
        for i in range(3):
            p1 = triangle.points[i]
            for j in range(i):
                p2 = triangle.points[j]
                if (p1 in point_list) and (p2 in point_list):
                    output.store(point_list.index(p1), point_list.index(p2))
    return output
Ejemplo n.º 8
0
def shortest_path_distances(
        distances: numpy.ndarray,
        connections: Connections) -> Tuple[numpy.ndarray, dict]:
    """
    create a shortest-path/geodesic distance matrix from a set of inter-point distances and a connection/network
    scheme

    This uses the Floyd-Warshall algorithm
    See Corman, T.H., Leiserson, C.E., and Rivest, R.L., 'Introduction to Algorithms', section 26.2, p. 558-562.

    trace_mat is a dictionary tracing the shortest path

    the algorithm will work on connection networks which are not fully spanning (i.e., there are no paths between
    some pairs of points), reporting infinity for the distance between such pairs

    :param distances: an n x n matrix containing distances among the n points
    :param connections: a Connections object containing connections or edges among the n points describing its
                        network
    :return: a tuple containing an n x n matrix with the shortest-path/geodesic distances and a dictionary
             containing trace data among the network for use in path reconstruction
    """
    n = len(distances)
    output = numpy.copy(distances)
    empty = numpy.invert(connections.as_boolean())
    # for the purposes of this algorithm, points must be connected to themselves
    for i in range(n):
        empty[i, i] = False
    trace_mat = {(i, j): j for i in range(n) for j in range(n)}
    for k in range(n):
        for i in range(n):
            for j in range(n):
                if (not empty[i, k]) and (not empty[j, k]):
                    if empty[i, j]:
                        output[i, j] = output[i, k] + output[k, j]
                        empty[i, j] = False
                        trace_mat[i, j] = trace_mat[i, k]
                    else:
                        if output[i, j] > output[i, k] + output[k, j]:
                            output[i, j] = output[i, k] + output[k, j]
                            trace_mat[i, j] = trace_mat[i, k]
    # the following removes "connections" among point pairs with no connected path
    for i in range(n):
        for j in range(n):
            if i != j:  # points cannot be unconnected from themselves
                if (trace_mat[i, j] == j) and not connections[i, j]:
                    trace_mat.pop((i, j))  # remove path from trace matrix
                    output[i, j] = float("inf")  # change distance to infinity
    output_tuple = namedtuple("output_tuple", ["output_dists", "trace_matrix"])
    return output_tuple(output, trace_mat)
Ejemplo n.º 9
0
def create_windrose_connections(distances: numpy.ndarray,
                                angles: numpy.ndarray, annulus: int,
                                sector: int, a: int, c: float, d: float,
                                e: float) -> Tuple[Connections, float, float]:
    n = check_for_square_matrix(distances)
    output = Connections(n)
    output.min_scale = c * annulus**2 + d * annulus + e
    output.max_scale = c * (annulus + 1)**2 + d * (annulus + 1) + e
    sector_breadth = pi / windrose_sectors_per_annulus(a, annulus)
    sector_min = sector * sector_breadth
    sector_max = (sector + 1) * sector_breadth
    for i in range(n):
        for j in range(i):
            if (output.min_scale <= distances[i, j] < output.max_scale) and (
                    sector_min <= angles[i, j] < sector_max):
                output.store(i, j)
    return output, sector_min, sector_max
Ejemplo n.º 10
0
def connect_distance_range(distances: numpy.ndarray,
                           maxdist: float,
                           mindist: float = 0) -> Connections:
    """
    calculate connections based on a distance range, defined by maxdist and mindist

    points are not connected to themselves, even with a distance of zero

    :param distances: an n x n matrix containing distances among points
    :param maxdist: the maximum distance between points to connect. this distance is exclusive
    :param mindist: the minimum distance between points to connect (default = 0). this distance is inclusive
    :return: returns a Connections object
    """
    n = check_for_square_matrix(distances)
    output = Connections(n)
    output.min_scale = mindist
    output.max_scale = maxdist
    for i in range(n):
        for j in range(i):
            if mindist <= distances[i, j] < maxdist:
                output.store(i, j)
    return output
Ejemplo n.º 11
0
def least_diagonal_network(x: numpy.ndarray, y: numpy.ndarray,
                           distances: numpy.ndarray) -> Connections:
    """
    calculate connections among points based on a least diagonal network

    :param x: the x coordinates of n points
    :param y: the y coordinates of n points
    :param distances: an n x n matrix containing the distances among the points defined by x and y
    :return: returns a Connections object
    """
    n = check_for_square_matrix(distances)
    if (n != len(x)) or (n != len(y)):
        raise ValueError(
            "The coordinate arrays and the distance matrix must have the same length"
        )
    output = Connections(n)
    # flatten distances into one dimension (half matrix only), but also track position in matrix
    dists = []
    for i in range(n):
        for j in range(i):
            dists.append([distances[i, j], i, j])
    dists.sort()
    good_pairs = []
    m1, m2 = 1, 1
    b1, b2 = 0, 0
    # work through all pairs from closest to farthest
    for d in dists:
        i, j = d[1], d[2]  # need the point indices, not the actual distance
        if x[i] != x[j]:
            vertical1 = False
            m1 = (y[i] - y[j]) / (x[i] - x[j])  # calculate slope
            b1 = y[i] - m1 * x[i]  # calculate intercept
        else:
            vertical1 = True
        # compare to previously added links
        k = 0
        good = True
        while k < len(good_pairs):
            pair = good_pairs[k]
            pair1, pair2 = pair[0], pair[1]
            if (i not in pair) and (j not in pair):
                if x[pair1] != x[pair2]:
                    vertical2 = False
                    m2 = (y[pair1] - y[pair2]) / (x[[pair1]] - x[pair2]
                                                  )  # calculate slope
                    b2 = y[pair1] - m2 * x[pair1]  # calculate intercept
                else:
                    vertical2 = True
                check = True
                xc, yc = x[i], y[j]  # defaults; likely unnecessary
                if vertical1 and vertical2:
                    # if both line segments are vertical, they overlap if either point of one pair is between both
                    # points of the other pair
                    check = False
                    if x[i] == x[pair1]:
                        if (y[i] < y[pair1] < y[j]) or (y[i] > y[pair1] > y[j]) or \
                                (y[i] < y[pair2] < y[j]) or (y[i] > y[pair2] > y[j]):
                            good = False
                elif vertical1:
                    # one segment is vertical; calculate the y at that x position
                    xc = x[i]
                    yc = m2 * xc + b2
                elif vertical2:
                    # one segment is vertical; calculate the y at that x position
                    xc = x[pair1]
                    yc = m1 * xc + b1
                elif m1 == m2:
                    # segments have identical slopes; can only overlap if they have identical projected intercepts
                    check = False
                    if b1 == b2:
                        # segments do have identical intercepts; they overlap if either point of one pair is between
                        # both points of the other pair
                        if (y[i] < y[pair1] < y[j]) or (y[i] > y[pair1] > y[j]) or \
                                (y[i] < y[pair1] < y[j]) or (y[i] > y[pair1] > y[j]):
                            good = False
                else:
                    xc = (b2 - b1) / (m1 - m2)
                    yc = m1 * xc + b1
                if check:  # did not get pre-checked from one of the parallel slope cases above
                    # xc, yc is the projected crossing point of the two line segments; the segments overlap if
                    # this point falls within both segments
                    if (((x[i] <= xc <= x[j]) or (x[i] >= xc >= x[j])) and
                        ((y[i] <= yc <= y[j]) or (y[i] >= yc >= y[j]))) and \
                            (((x[pair1] <= xc <= x[pair2]) or (x[pair1] >= xc >= x[pair2])) and
                             ((y[pair1] <= yc <= y[pair2]) or (y[pair1] >= yc >= y[pair2]))):
                        good = False
            if good:
                k += 1
            else:
                k = len(good_pairs)
        if good:
            good_pairs.append([i, j])
    for pair in good_pairs:
        output.store(pair[0], pair[1])
    return output
Ejemplo n.º 12
0
def gearys_c(y: numpy.ndarray,
             weights: Connections,
             alt_weights: Optional[numpy.ndarray] = None,
             variance: Optional[str] = "random",
             permutations: int = 0):
    check_variance_assumption(variance)
    n = len(y)
    mean_y = numpy.mean(y)
    dev_y = y - mean_y  # deviations from mean
    w = weights.as_binary()
    if alt_weights is not None:  # multiply to create non-binary weights, if necessary
        w *= alt_weights
    sumy2 = numpy.sum(
        numpy.square(dev_y),
        dtype=numpy.float64)  # sum of squared deviations from mean
    sumw = numpy.sum(w, dtype=numpy.float64)  # sum of weight matrix
    sumw2 = sumw**2
    sumdif2 = numpy.sum(numpy.square(w * (dev_y[:, numpy.newaxis] - dev_y)),
                        dtype=numpy.float64)
    geary = (n - 1) * sumdif2 / (2 * sumw * sumy2)

    # permutations
    permuted_c_list = [geary]
    perm_p = 1
    if permutations > 0:
        rand_y = numpy.copy(dev_y)
        for k in range(permutations - 1):
            numpy.random.shuffle(rand_y)
            perm_sumdif2 = numpy.sum(numpy.square(
                w * (rand_y[:, numpy.newaxis] - rand_y)),
                                     dtype=numpy.float64)
            perm_geary = (n - 1) * perm_sumdif2 / (2 * sumw * sumy2)
            permuted_c_list.append(perm_geary)
            if abs(perm_geary - 1) >= abs(geary - 1):
                perm_p += 1
        perm_p /= permutations

    if variance is None:
        sd, z, p = None, None, None
    else:
        s1 = numpy.sum(numpy.square(w + numpy.transpose(w)),
                       dtype=numpy.float64) / 2
        s2 = numpy.sum(
            numpy.square(numpy.sum(w, axis=0) + numpy.sum(w, axis=1)),
            dtype=numpy.float64)
        if variance == "normal":
            v = ((2 * s1 + s2) * (n - 1) - 4 * sumw2) / (2 * (n + 1) * sumw2)
        else:  # random
            b2 = n * numpy.sum(numpy.power(dev_y, 4),
                               dtype=numpy.float64) / (sumy2**2)
            nn2n3 = n * (n - 2) * (n - 3)
            v = ((n - 1) * s1 * (n**2 - 3 * n + 3 - (n - 1) * b2) /
                 (nn2n3 * sumw2) - (n - 1) * s2 * (n**2 + 3 * n - 6 -
                                                   (n**2 - n + 2) * b2) /
                 (4 * nn2n3 * sumw2) + (n**2 - 3 - b2 * (n - 1)**2) / nn2n3)
        sd = sqrt(v)  # convert to standard dev
        z = abs(geary - 1) / sd
        p = scipy.stats.norm.sf(z) * 2  # two-tailed test

    return weights.min_scale, weights.max_scale, weights.n_pairs(
    ), 1, geary, sd, z, p, perm_p, permuted_c_list