Ejemplo n.º 1
0
def dice_pairwise_und(a1, a2):
    '''
    Calculates pairwise dice similarity for each vertex between two
    matrices. Treats the matrices as binary and undirected.

    Paramaters
    ----------
    A1 : NxN np.ndarray
        Matrix 1
    A2 : NxN np.ndarray
        Matrix 2

    Returns
    -------
    D : Nx1 np.ndarray
        dice similarity vector
    '''
    a1 = binarize(a1, copy=True)
    a2 = binarize(a2, copy=True)  # ensure matrices are binary

    n = len(a1)
    np.fill_diagonal(a1, 0)
    np.fill_diagonal(a2, 0)  # set diagonals to 0

    d = np.zeros((n, ))  # dice similarity

    # calculate the common neighbors for each vertex
    for i in xrange(n):
        d[i] = 2 * (np.sum(np.logical_and(a1[:, i], a2[:, i])) /
                    (np.sum(a1[:, i]) + np.sum(a2[:, i])))

    return d
Ejemplo n.º 2
0
def dice_pairwise_und(a1, a2):
    '''
    Calculates pairwise dice similarity for each vertex between two
    matrices. Treats the matrices as binary and undirected.

    Paramaters
    ----------
    A1 : NxN np.ndarray
        Matrix 1
    A2 : NxN np.ndarray
        Matrix 2

    Returns
    -------
    D : Nx1 np.ndarray
        dice similarity vector
    '''
    a1 = binarize(a1, copy=True)
    a2 = binarize(a2, copy=True)  # ensure matrices are binary

    n = len(a1)
    np.fill_diagonal(a1, 0)
    np.fill_diagonal(a2, 0)  # set diagonals to 0

    d = np.zeros((n,))  # dice similarity

    # calculate the common neighbors for each vertex
    for i in xrange(n):
        d[i] = 2 * (np.sum(np.logical_and(a1[:, i], a2[:, i])) /
                    (np.sum(a1[:, i]) + np.sum(a2[:, i])))

    return d
Ejemplo n.º 3
0
def degrees_dir(CIJ):
    '''
    Node degree is the number of links connected to the node. The indegree
    is the number of inward links and the outdegree is the number of
    outward links.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        directed binary/weighted connection matrix

    Returns
    -------
    id : Nx1 np.ndarray
        node in-degree
    od : Nx1 np.ndarray
        node out-degree
    deg : Nx1 np.ndarray
        node degree (in-degree + out-degree)

    Notes
    -----
    Inputs are assumed to be on the columns of the CIJ matrix.
           Weight information is discarded.
    '''
    CIJ = binarize(CIJ, copy=True)  # ensure CIJ is binary
    id = np.sum(CIJ, axis=0)  # indegree = column sum of CIJ
    od = np.sum(CIJ, axis=1)  # outdegree = row sum of CIJ
    deg = id + od  # degree = indegree+outdegree
    return id, od, deg
Ejemplo n.º 4
0
def degrees_dir(CIJ):
    '''
    Node degree is the number of links connected to the node. The indegree
    is the number of inward links and the outdegree is the number of
    outward links.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        directed binary/weighted connection matrix

    Returns
    -------
    id : Nx1 np.ndarray
        node in-degree
    od : Nx1 np.ndarray
        node out-degree
    deg : Nx1 np.ndarray
        node degree (in-degree + out-degree)

    Notes
    -----
    Inputs are assumed to be on the columns of the CIJ matrix.
           Weight information is discarded.
    '''
    CIJ = binarize(CIJ, copy=True)  # ensure CIJ is binary
    id = np.sum(CIJ, axis=0)  # indegree = column sum of CIJ
    od = np.sum(CIJ, axis=1)  # outdegree = row sum of CIJ
    deg = id + od  # degree = indegree+outdegree
    return id, od, deg
Ejemplo n.º 5
0
def get_components(A, no_depend=False):
    '''
    Returns the components of an undirected graph specified by the binary and
    undirected adjacency matrix adj. Components and their constitutent nodes
    are assigned the same index and stored in the vector, comps. The vector,
    comp_sizes, contains the number of nodes beloning to each component.

    Parameters
    ----------
    A : NxN np.ndarray
        binary undirected adjacency matrix
    no_depend : Any
        Does nothing, included for backwards compatibility

    Returns
    -------
    comps : Nx1 np.ndarray
        vector of component assignments for each node
    comp_sizes : Mx1 np.ndarray
        vector of component sizes

    Notes
    -----
    Note: disconnected nodes will appear as components with a component
    size of 1

    Note: The identity of each component (i.e. its numerical value in the
    result) is not guaranteed to be identical the value returned in BCT,
    matlab code, although the component topology is.

    Many thanks to Nick Cullen for providing this implementation
    '''

    if not np.all(A == A.T):  # ensure matrix is undirected
        raise BCTParamError('get_components can only be computed for undirected'
                            ' matrices.  If your matrix is noisy, correct it with np.around')
    
    A = binarize(A, copy=True)
    n = len(A)
    np.fill_diagonal(A, 1)

    edge_map = [{u,v} for u in range(n) for v in range(n) if A[u,v] == 1]
    union_sets = []
    for item in edge_map:
        temp = []
        for s in union_sets:

            if not s.isdisjoint(item):
                item = s.union(item)
            else:
                temp.append(s)
        temp.append(item)
        union_sets = temp

    comps = np.array([i+1 for v in range(n) for i in 
        range(len(union_sets)) if v in union_sets[i]])
    comp_sizes = np.array([len(s) for s in union_sets])

    return comps, comp_sizes
Ejemplo n.º 6
0
def get_components(A, no_depend=False):
    '''
    Returns the components of an undirected graph specified by the binary and
    undirected adjacency matrix adj. Components and their constitutent nodes
    are assigned the same index and stored in the vector, comps. The vector,
    comp_sizes, contains the number of nodes beloning to each component.

    Parameters
    ----------
    A : NxN np.ndarray
        binary undirected adjacency matrix
    no_depend : Any
        Does nothing, included for backwards compatibility

    Returns
    -------
    comps : Nx1 np.ndarray
        vector of component assignments for each node
    comp_sizes : Mx1 np.ndarray
        vector of component sizes

    Notes
    -----
    Note: disconnected nodes will appear as components with a component
    size of 1

    Note: The identity of each component (i.e. its numerical value in the
    result) is not guaranteed to be identical the value returned in BCT,
    matlab code, although the component topology is.

    Many thanks to Nick Cullen for providing this implementation
    '''

    if not np.all(A == A.T):  # ensure matrix is undirected
        raise BCTParamError('get_components can only be computed for undirected'
                            ' matrices.  If your matrix is noisy, correct it with np.around')
    
    A = binarize(A, copy=True)
    n = len(A)
    np.fill_diagonal(A, 1)

    edge_map = [{u,v} for u in range(n) for v in range(n) if A[u,v] == 1]
    union_sets = []
    for item in edge_map:
        temp = []
        for s in union_sets:

            if not s.isdisjoint(item):
                item = s.union(item)
            else:
                temp.append(s)
        temp.append(item)
        union_sets = temp

    comps = np.array([i+1 for v in range(n) for i in 
        range(len(union_sets)) if v in union_sets[i]])
    comp_sizes = np.array([len(s) for s in union_sets])

    return comps, comp_sizes
Ejemplo n.º 7
0
def motif3struct_bin(A):
    '''
    Structural motifs are patterns of local connectivity. Motif frequency
    is the frequency of occurrence of motifs around a node.

    Parameters
    ----------
    A : NxN np.ndarray
        binary directed connection matrix

    Returns
    -------
    F : 13xN np.ndarray
        motif frequency matrix
    f : 13x1 np.ndarray
        motif frequency vector (averaged over all nodes)
    '''
    from scipy import io
    import os
    fname = os.path.join(os.path.dirname(__file__), motiflib)
    mot = io.loadmat(fname)
    m3n = mot['m3n']
    id3 = mot['id3'].squeeze()

    n = len(A)  # number of vertices in A
    f = np.zeros((13, ))  # motif count for whole graph
    F = np.zeros((13, n))  # motif frequency

    A = binarize(A, copy=True)  # ensure A is binary
    As = np.logical_or(A, A.T)  # symmetrized adjmat

    for u in range(n - 2):
        # v1: neighbors of u (>u)
        V1 = np.append(np.zeros((u, ), dtype=int), As[u, u + 1:n + 1])
        for v1 in np.where(V1)[0]:
            # v2: neighbors of v1 (>u)
            V2 = np.append(np.zeros((u, ), dtype=int), As[v1, u + 1:n + 1])
            V2[V1] = 0  # not already in V1
            # and all neighbors of u (>v1)
            V2 = np.logical_or(
                np.append(np.zeros((v1, )), As[u, v1 + 1:n + 1]), V2)
            for v2 in np.where(V2)[0]:
                a = np.array(
                    (A[v1, u], A[v2, u], A[u, v1], A[v2, v1], A[u, v2], A[v1,
                                                                          v2]))
                s = np.uint32(np.sum(np.power(10, np.arange(5, -1, -1)) * a))
                ix = id3[np.squeeze(s == m3n)] - 1
                F[ix, u] += 1
                F[ix, v1] += 1
                F[ix, v2] += 1
                f[ix] += 1

    return f, F
Ejemplo n.º 8
0
Archivo: motifs.py Proyecto: YSA6/bctpy
def motif3struct_bin(A):
    '''
    Structural motifs are patterns of local connectivity. Motif frequency
    is the frequency of occurrence of motifs around a node.

    Parameters
    ----------
    A : NxN np.ndarray
        binary directed connection matrix

    Returns
    -------
    F : 13xN np.ndarray
        motif frequency matrix
    f : 13x1 np.ndarray
        motif frequency vector (averaged over all nodes)
    '''
    from scipy import io
    import os
    fname = os.path.join(os.path.dirname(__file__), motiflib)
    mot = io.loadmat(fname)
    m3n = mot['m3n']
    id3 = mot['id3'].squeeze()

    n = len(A)  # number of vertices in A
    f = np.zeros((13,))  # motif count for whole graph
    F = np.zeros((13, n))  # motif frequency

    A = binarize(A, copy=True)  # ensure A is binary
    As = np.logical_or(A, A.T)  # symmetrized adjmat

    for u in xrange(n - 2):
        # v1: neighbors of u (>u)
        V1 = np.append(np.zeros((u,), dtype=int), As[u, u + 1:n + 1])
        for v1 in np.where(V1)[0]:
            # v2: neighbors of v1 (>u)
            V2 = np.append(np.zeros((u,), dtype=int), As[v1, u + 1:n + 1])
            V2[V1] = 0  # not already in V1
            # and all neighbors of u (>v1)
            V2 = np.logical_or(
                np.append(np.zeros((v1,)), As[u, v1 + 1:n + 1]), V2)
            for v2 in np.where(V2)[0]:
                a = np.array((A[v1, u], A[v2, u], A[u, v1],
                              A[v2, v1], A[u, v2], A[v1, v2]))
                s = np.uint32(np.sum(np.power(10, np.arange(5, -1, -1)) * a))
                ix = id3[np.squeeze(s == m3n)] - 1
                F[ix, u] += 1
                F[ix, v1] += 1
                F[ix, v2] += 1
                f[ix] += 1

    return f, F
Ejemplo n.º 9
0
    def binzrize(self):

        self.corr_matrix_thr_bin = np.zeros(
            (self.corr_matrix_thr.shape[0], self.corr_matrix_thr.shape[0],
             self.corr_matrix_thr.shape[2]))

        for session in range(self.corr_matrix_thr.shape[2]):

            session_matrix = self.corr_matrix_thr[:, :, session]
            session_matrix_bin = bctu.binarize(session_matrix)

            self.corr_matrix_thr_bin[:, :, session] = session_matrix_bin

        return self.corr_matrix_thr_bin
Ejemplo n.º 10
0
def jdegree(CIJ):
    '''
    This function returns a matrix in which the value of each element (u,v)
    corresponds to the number of nodes that have u outgoing connections
    and v incoming connections.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        directed binary/weighted connnection matrix

    Returns
    -------
    J : ZxZ np.ndarray
        joint degree distribution matrix
        (shifted by one, replicates matlab one-based-indexing)
    J_od : int
        number of vertices with od>id
    J_id : int
        number of vertices with id>od
    J_bl : int
        number of vertices with id==od

    Notes
    -----
    Weights are discarded.
    '''
    CIJ = binarize(CIJ, copy=True)  # ensure CIJ is binary
    n = len(CIJ)
    id = np.sum(CIJ, axis=0)  # indegree = column sum of CIJ
    od = np.sum(CIJ, axis=1)  # outdegree = row sum of CIJ

    # create the joint degree distribution matrix
    # note: the matrix is shifted by one, to accomodate zero id and od in the
    # first row/column
    # upper triangular part of the matrix has vertices with od>id
    # lower triangular part has vertices with id>od
    # main diagonal has units with id=od

    szJ = np.max((id, od)) + 1
    J = np.zeros((szJ, szJ))

    for i in range(n):
        J[id[i], od[i]] += 1

    J_od = np.sum(np.triu(J, 1))
    J_id = np.sum(np.tril(J, -1))
    J_bl = np.sum(np.diag(J))
    return J, J_od, J_id, J_bl
Ejemplo n.º 11
0
def jdegree(CIJ):
    '''
    This function returns a matrix in which the value of each element (u,v)
    corresponds to the number of nodes that have u outgoing connections
    and v incoming connections.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        directed binary/weighted connnection matrix

    Returns
    -------
    J : ZxZ np.ndarray
        joint degree distribution matrix
        (shifted by one, replicates matlab one-based-indexing)
    J_od : int
        number of vertices with od>id
    J_id : int
        number of vertices with id>od
    J_bl : int
        number of vertices with id==od

    Notes
    -----
    Weights are discarded.
    '''
    CIJ = binarize(CIJ, copy=True)  # ensure CIJ is binary
    n = len(CIJ)
    id = np.sum(CIJ, axis=0)  # indegree = column sum of CIJ
    od = np.sum(CIJ, axis=1)  # outdegree = row sum of CIJ

    # create the joint degree distribution matrix
    # note: the matrix is shifted by one, to accomodate zero id and od in the
    # first row/column
    # upper triangular part of the matrix has vertices with od>id
    # lower triangular part has vertices with id>od
    # main diagonal has units with id=od

    szJ = np.max((id, od)) + 1
    J = np.zeros((szJ, szJ))

    for i in range(n):
        J[id[i], od[i]] += 1

    J_od = np.sum(np.triu(J, 1))
    J_id = np.sum(np.tril(J, -1))
    J_bl = np.sum(np.diag(J))
    return J, J_od, J_id, J_bl
Ejemplo n.º 12
0
def degrees_und(CIJ):
    '''
    Node degree is the number of links connected to the node.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        undirected binary/weighted connection matrix

    Returns
    -------
    deg : Nx1 np.ndarray
        node degree

    Notes
    -----
    Weight information is discarded.
    '''
    CIJ = binarize(CIJ, copy=True)  # ensure CIJ is binary
    return np.sum(CIJ, axis=0)
Ejemplo n.º 13
0
def degrees_und(CIJ):
    '''
    Node degree is the number of links connected to the node.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        undirected binary/weighted connection matrix

    Returns
    -------
    deg : Nx1 np.ndarray
        node degree

    Notes
    -----
    Weight information is discarded.
    '''
    CIJ = binarize(CIJ, copy=True)  # ensure CIJ is binary
    return np.sum(CIJ, axis=0)
Ejemplo n.º 14
0
def distance_bin(G):
    '''
    The distance matrix contains lengths of shortest paths between all
    pairs of nodes. An entry (u,v) represents the length of shortest path
    from node u to node v. The average shortest path length is the
    characteristic path length of the network.

    Parameters
    ----------
    A : NxN np.ndarray
        binary directed/undirected connection matrix

    Returns
    -------
    D : NxN
        distance matrix

    Notes
    -----
    Lengths between disconnected nodes are set to Inf.
    Lengths on the main diagonal are set to 0.
    Algorithm: Algebraic shortest paths.
    '''
    G = binarize(G, copy=True)
    D = np.eye(len(G))
    n = 1
    nPATH = G.copy()  # n path matrix
    L = (nPATH != 0)  # shortest n-path matrix

    while np.any(L):
        D += n * L
        n += 1
        nPATH = np.dot(nPATH, G)
        L = (nPATH != 0) * (D == 0)

    D[D == 0] = np.inf  # disconnected nodes are assigned d=inf
    np.fill_diagonal(D, 0)
    return D
Ejemplo n.º 15
0
def distance_bin(G):
    '''
    The distance matrix contains lengths of shortest paths between all
    pairs of nodes. An entry (u,v) represents the length of shortest path
    from node u to node v. The average shortest path length is the
    characteristic path length of the network.

    Parameters
    ----------
    A : NxN np.ndarray
        binary directed/undirected connection matrix

    Returns
    -------
    D : NxN
        distance matrix

    Notes
    -----
    Lengths between disconnected nodes are set to Inf.
    Lengths on the main diagonal are set to 0.
    Algorithm: Algebraic shortest paths.
    '''
    G = binarize(G, copy=True)
    D = np.eye(len(G))
    n = 1
    nPATH = G.copy()  # n path matrix
    L = (nPATH != 0)  # shortest n-path matrix

    while np.any(L):
        D += n * L
        n += 1
        nPATH = np.dot(nPATH, G)
        L = (nPATH != 0) * (D == 0)

    D[D == 0] = np.inf  # disconnected nodes are assigned d=inf
    np.fill_diagonal(D, 0)
    return D
Ejemplo n.º 16
0
def findwalks(CIJ):
    '''
    Walks are sequences of linked nodes, that may visit a single node more
    than once. This function finds the number of walks of a given length,
    between any two nodes.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        binary directed/undirected connection matrix

    Returns
    -------
    Wq : NxNxQ np.ndarray
        Wq[i,j,q] is the number of walks from i to j of length q
    twalk : int
        total number of walks found
    wlq : Qx1 np.ndarray
        walk length distribution as a function of q

    Notes
    -----
    Wq grows very quickly for larger N,K,q. Weights are discarded.
    '''
    CIJ = binarize(CIJ, copy=True)
    n = len(CIJ)
    Wq = np.zeros((n, n, n))
    CIJpwr = CIJ.copy()
    Wq[:, :, 1] = CIJ
    for q in xrange(n):
        CIJpwr = np.dot(CIJpwr, CIJ)
        Wq[:, :, q] = CIJpwr

    twalk = np.sum(Wq)  # total number of walks
    wlq = np.sum(np.sum(Wq, axis=0), axis=0)
    return Wq, twalk, wlq
Ejemplo n.º 17
0
def findwalks(CIJ):
    '''
    Walks are sequences of linked nodes, that may visit a single node more
    than once. This function finds the number of walks of a given length,
    between any two nodes.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        binary directed/undirected connection matrix

    Returns
    -------
    Wq : NxNxQ np.ndarray
        Wq[i,j,q] is the number of walks from i to j of length q
    twalk : int
        total number of walks found
    wlq : Qx1 np.ndarray
        walk length distribution as a function of q

    Notes
    -----
    Wq grows very quickly for larger N,K,q. Weights are discarded.
    '''
    CIJ = binarize(CIJ, copy=True)
    n = len(CIJ)
    Wq = np.zeros((n, n, n))
    CIJpwr = CIJ.copy()
    Wq[:, :, 1] = CIJ
    for q in range(n):
        CIJpwr = np.dot(CIJpwr, CIJ)
        Wq[:, :, q] = CIJpwr

    twalk = np.sum(Wq)  # total number of walks
    wlq = np.sum(np.sum(Wq, axis=0), axis=0)
    return Wq, twalk, wlq
Ejemplo n.º 18
0
def get_components_fast(A):
    '''
    Returns the components of an undirected graph specified by the binary and
    undirected adjacency matrix adj. Components and their constitutent nodes
    are assigned the same index and stored in the vector, comps. The vector,
    comp_sizes, contains the number of nodes beloning to each component.

    Parameters
    ----------
    adj : NxN np.ndarray
        binary undirected adjacency matrix
    no_depend : bool
        If true, doesn't import networkx to do the calculation. Default value
        is false.

    Returns
    -------
    comps : Nx1 np.ndarray
        vector of component assignments for each node
    comp_sizes : Mx1 np.ndarray
        vector of component sizes

    Notes
    -----
    Note: disconnected nodes will appear as components with a component
    size of 1

    Note: The identity of each component (i.e. its numerical value in the
    result) is not guaranteed to be identical the value returned in BCT,
    although the component topology is.

    Note: networkx is used to do the computation efficiently. If networkx is
    not available a breadth-first search that does not depend on networkx is
    used instead, but this is less efficient. The corresponding BCT function
    does the computation by computing the Dulmage-Mendelsohn decomposition. I
    don't know what a Dulmage-Mendelsohn decomposition is and there doesn't
    appear to be a python equivalent. If you think of a way to implement this
    better, let me know.

    From Nick Cullen:
        This is the union-find algorithm for finding connected components. It could 
        definitely be sped up with a few tricks, but...

        For sparse graph, this is closer in speed to networkx algo. than the other:

                x = load_sparse_sample()

                %timeit a1,a2 = get_components(x,no_depend=True)
                10 loops, best of 3: 42 ms per loop

                %timeit a1,a2 = get_components(x,no_depend=False)
                100 loops, best of 3: 2 ms per loop

                %timeit b1,b2 = get_components_fast(x)
                100 loops, best of 3: 11 ms per loop

        For dense graphs, this is better than/close to networkx

                x = load_sample()

                %timeit a1,a2 = get_components(x,no_depend=False)
                10 loops, best of 3: 59 ms per loop

                %timeit a1,a2 = get_components(x,no_depend=True)
                1 loops, best of 3: 1.76 s per loop

                %timeit b1,b2 = get_components_fast(x)
                10 loops, best of 3: 52.8 ms per loop
    '''

    if not np.all(A == A.T):  # ensure matrix is undirected
        raise BCTParamError('get_components can only be computed for undirected'
                            ' matrices.  If your matrix is noisy, correct it with np.around')

    A = binarize(A, copy=True)
    n = len(A)
    np.fill_diagonal(A, 1)

    edge_map = [{u,v} for u in range(n) for v in range(n) if A[u][v]==1]
    union_sets = []
    for item in edge_map:
        temp = []
        for s in union_sets:
            if not s.isdisjoint(item):
                item = s.union(item)
            else:
                temp.append(s)
        temp.append(item)
        union_sets = temp

    comps = np.array([i+1 for v in range(n) for i in range(len(union_sets)) if v in union_sets[i]])
    comp_sizes = np.array([len(s) for s in union_sets])

    return comps, comp_sizes
Ejemplo n.º 19
0
def gtom(adj, nr_steps):
    '''
    The m-th step generalized topological overlap measure (GTOM) quantifies
    the extent to which a pair of nodes have similar m-th step neighbors.
    Mth-step neighbors are nodes that are reachable by a path of at most
    length m.

    This function computes the the M x M generalized topological overlap
    measure (GTOM) matrix for number of steps, numSteps.

    Parameters
    ----------
    adj : NxN np.ndarray
        connection matrix
    nr_steps : int
        number of steps

    Returns
    -------
    gt : NxN np.ndarray
        GTOM matrix

    Notes
    -----
    When numSteps is equal to 1, GTOM is identical to the topological
    overlap measure (TOM) from reference [2]. In that case the 'gt' matrix
    records, for each pair of nodes, the fraction of neighbors the two
    nodes share in common, where "neighbors" are one step removed. As
    'numSteps' is increased, neighbors that are furter out are considered.
    Elements of 'gt' are bounded between 0 and 1.  The 'gt' matrix can be
    converted from a similarity to a distance matrix by taking 1-gt.
    '''
    bm = binarize(adj, copy=True)
    bm_aux = bm.copy()
    nr_nodes = len(adj)

    if nr_steps > nr_nodes:
        print "Warning: nr_steps exceeded nr_nodes. Setting nr_steps=nr_nodes"
    if nr_steps == 0:
        return bm
    else:
        for steps in xrange(2, nr_steps):
            for i in xrange(nr_nodes):
                # neighbors of node i
                ng_col, = np.where(bm_aux[i, :] == 1)
                # neighbors of neighbors of node i
                nng_row, nng_col = np.where(bm_aux[ng_col, :] == 1)
                new_ng = np.setdiff1d(nng_col, (i, ))

                # neighbors of neighbors of i become considered neighbors of i
                bm_aux[i, new_ng] = 1
                bm_aux[new_ng, i] = 1

        # numerator of GTOM formula
        numerator_mat = np.dot(bm_aux, bm_aux) + bm + np.eye(nr_nodes)

        # vector of node degrees
        bms = np.sum(bm_aux, axis=0)
        bms_r = np.tile(bms, (nr_nodes, 1))

        denominator_mat = -bm + np.where(bms_r > bms_r.T, bms_r, bms_r.T) + 1
        return numerator_mat / denominator_mat
Ejemplo n.º 20
0
def reachdist(CIJ, ensure_binary=True):
    '''
    The binary reachability matrix describes reachability between all pairs
    of nodes. An entry (u,v)=1 means that there exists a path from node u
    to node v; alternatively (u,v)=0.

    The distance matrix contains lengths of shortest paths between all
    pairs of nodes. An entry (u,v) represents the length of shortest path
    from node u to  node v. The average shortest path length is the
    characteristic path length of the network.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        binary directed/undirected connection matrix
    ensure_binary : bool
        Binarizes input. Defaults to true. No user who is not testing 
        something will ever want to not use this, use distance_wei instead for 
        unweighted matrices.

    Returns
    -------
    R : NxN np.ndarray
        binary reachability matrix
    D : NxN np.ndarray
        distance matrix

    Notes
    -----
    faster but more memory intensive than "breadthdist.m".
    '''
    def reachdist2(CIJ, CIJpwr, R, D, n, powr, col, row):
        CIJpwr = np.dot(CIJpwr, CIJ)
        R = np.logical_or(R, CIJpwr != 0)
        D += R

        if powr <= n and np.any(R[np.ix_(row, col)] == 0):
            powr += 1
            R, D, powr = reachdist2(CIJ, CIJpwr, R, D, n, powr, col, row)
        return R, D, powr

    if ensure_binary:
        CIJ = binarize(CIJ)

    R = CIJ.copy()
    D = CIJ.copy()
    powr = 2
    n = len(CIJ)
    CIJpwr = CIJ.copy()

    # check for vertices that have no incoming or outgoing connections
    # these are ignored by reachdist
    id = np.sum(CIJ, axis=0)
    od = np.sum(CIJ, axis=1)
    id0, = np.where(id == 0)  # nothing goes in, so column(R) will be 0
    od0, = np.where(od == 0)  # nothing comes out, so row(R) will be 0
    # use these colums and rows to check for reachability
    col = list(range(n))
    col = np.delete(col, id0)
    row = list(range(n))
    row = np.delete(row, od0)

    R, D, powr = reachdist2(CIJ, CIJpwr, R, D, n, powr, col, row)

    #'invert' CIJdist to get distances
    D = powr - D + 1

    # put inf if no path found
    D[D == n + 2] = np.inf
    D[:, id0] = np.inf
    D[od0, :] = np.inf

    return R, D
Ejemplo n.º 21
0
def findpaths(CIJ, qmax, sources, savepths=False):
    '''
    Paths are sequences of linked nodes, that never visit a single node
    more than once. This function finds all paths that start at a set of
    source nodes, up to a specified length. Warning: very memory-intensive.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        binary directed/undirected connection matrix
    qmax : int
        maximal path length
    sources : Nx1 np.ndarray
        source units from which paths are grown
    savepths : bool
        True if all paths are to be collected and returned. This functionality
        is currently not enabled.

    Returns
    -------
    Pq : NxNxQ np.ndarray
        Path matrix with P[i,j,jq] = number of paths from i to j with length q
    tpath : int
        total number of paths found
    plq : Qx1 np.ndarray
        path length distribution as a function of q
    qstop : int
        path length at which findpaths is stopped
    allpths : None
        a matrix containing all paths up to qmax. This function is extremely
        complicated and reimplementing it in bctpy is not straightforward.
    util : NxQ np.ndarray
        node use index

    Notes
    -----
    Note that Pq(:,:,N) can only carry entries on the diagonal, as all
    "legal" paths of length N-1 must terminate.  Cycles of length N are
    possible, with all vertices visited exactly once (except for source and
    target). 'qmax = N' can wreak havoc (due to memory problems).

    Note: Weights are discarded.
    Note: I am certain that this algorithm is rather inefficient -
    suggestions for improvements are welcome.

    '''
    CIJ = binarize(CIJ, copy=True)  # ensure CIJ is binary
    n = len(CIJ)
    k = np.sum(CIJ)
    pths = []
    Pq = np.zeros((n, n, qmax))
    util = np.zeros((n, qmax))

    # this code is for pathlength=1
    # paths are seeded from sources
    q = 1
    for j in range(n):
        for i in range(len(sources)):
            i_s = sources[i]
            if CIJ[i_s, j] == 1:
                pths.append([i_s, j])
    pths = np.array(pths)

    # calculate the use index per vertex (for paths of length 1)
    util[:, q], _ = np.histogram(pths, bins=n)
    # now enter the found paths of length 1 into the pathmatrix Pq
    for nrp in range(np.size(pths, axis=0)):
        Pq[pths[nrp, 0], pths[nrp, q], q - 1] += 1

    # begin saving allpths
    if savepths:
        allpths = pths.copy()
    else:
        allpths = []

    npthscnt = k

    # big loop for all other pathlengths q
    for q in range(2, qmax + 1):
        # to keep track of time...
        print((
            'current pathlength (q=i, number of paths so far (up to q-1)=i' % (q, np.sum(Pq))))

        # old paths are now in 'pths'
        # new paths are about to be collected in 'npths'
        # estimate needed allocation for new paths
        len_npths = np.min((np.ceil(1.1 * npthscnt * k / n), 100000000))
        npths = np.zeros((q + 1, len_npths))

        # find the unique set of endpoints of 'pths'
        endp = np.unique(pths[:, q - 1])
        npthscnt = 0

        for i in endp:  # set of endpoints of previous paths
            # in 'pb' collect all previous paths with 'i' as their endpoint
            pb, = np.where(pths[:, q - 1] == i)
            # find the outgoing connections from i (breadth-first)
            nendp, = np.where(CIJ[i, :] == 1)
            # if i is not a dead end
            if nendp.size:
                for j in nendp:  # endpoints of next edge
                    # find new paths -- only legal ones, no vertex twice
                    # visited
                    pb_temp = pb[np.sum(j == pths[pb, 1:q], axis=1) == 0]

                    # add new paths to 'npths'
                    pbx = pths[pb_temp - 1, :]
                    npx = np.ones((len(pb_temp), 1)) * j
                    npths[:, npthscnt:npthscnt + len(pb_temp)] = np.append(
                        pbx, npx, axis=1).T
                    npthscnt += len(pb_temp)
                    # count new paths and add the number to P
                    Pq[:n, j, q -
                        1] += np.histogram(pths[pb_temp - 1, 0], bins=n)[0]

        # note: 'npths' now contains a list of all the paths of length q
        if len_npths > npthscnt:
            npths = npths[:, :npthscnt]

        # append the matrix of all paths
        # FIXME
        if savepths:
            raise NotImplementedError("Sorry allpaths is not yet implemented")

        # calculate the use index per vertex (correct for cycles, count
        # source/target only once)
        util[:, q - 1] += (np.histogram(npths[:, :npthscnt], bins=n)[0] -
                           np.diag(Pq[:, :, q - 1]))

        # elininate cycles from "making it" to the next level, so that "pths"
        # contains all the paths that have a chance of being continued
        if npths.size:
            pths = np.squeeze(npths[:, np.where(npths[0, :] != npths[q, :])]).T
        else:
            pths = []

        # if there are no 'pths' paths left, end the search
        if not pths.size:
            qstop = q
            tpath = np.sum(Pq)
            plq = np.sum(np.sum(Pq, axis=0), axis=0)
            return

    qstop = q
    tpath = np.sum(Pq)  # total number of paths
    plq = np.sum(np.sum(Pq, axis=0), axis=0)  # path length distribution

    return Pq, tpath, plq, qstop, allpths, util
Ejemplo n.º 22
0
Archivo: motifs.py Proyecto: YSA6/bctpy
def motif3struct_wei(W):
    '''
    Structural motifs are patterns of local connectivity. Motif frequency
    is the frequency of occurrence of motifs around a node. Motif intensity
    and coherence are weighted generalizations of motif frequency.

    Parameters
    ----------
    W : NxN np.ndarray
        weighted directed connection matrix (all weights between 0 and 1)

    Returns
    -------
    I : 13xN np.ndarray
        motif intensity matrix
    Q : 13xN np.ndarray
        motif coherence matrix
    F : 13xN np.ndarray
        motif frequency matrix

    Notes
    -----
    Average intensity and coherence are given by I./F and Q./F.
    '''
    from scipy import io
    import os
    fname = os.path.join(os.path.dirname(__file__), motiflib)
    mot = io.loadmat(fname)
    m3 = mot['m3']
    m3n = mot['m3n']
    id3 = mot['id3'].squeeze()
    n3 = mot['n3'].squeeze()

    n = len(W)  # number of vertices in W
    I = np.zeros((13, n))  # intensity
    Q = np.zeros((13, n))  # coherence
    F = np.zeros((13, n))  # frequency

    A = binarize(W, copy=True)  # create binary adjmat
    As = np.logical_or(A, A.T)  # symmetrized adjmat

    for u in xrange(n - 2):
        # v1: neighbors of u (>u)
        V1 = np.append(np.zeros((u,), dtype=int), As[u, u + 1:n + 1])
        for v1 in np.where(V1)[0]:
            # v2: neighbors of v1 (>u)
            V2 = np.append(np.zeros((u,), dtype=int), As[v1, u + 1:n + 1])
            V2[V1] = 0  # not already in V1
            # and all neighbors of u (>v1)
            V2 = np.logical_or(
                np.append(np.zeros((v1,)), As[u, v1 + 1:n + 1]), V2)
            for v2 in np.where(V2)[0]:
                a = np.array((A[v1, u], A[v2, u], A[u, v1],
                              A[v2, v1], A[u, v2], A[v1, 2]))
                s = np.uint32(np.sum(np.power(10, np.arange(5, -1, -1)) * a))
                ix = np.squeeze(s == m3n)

                w = np.array((W[v1, u], W[v2, u], W[u, v1],
                              W[v2, v1], W[u, v2], W[v1, v2]))

                M = w * m3[ix, :]
                id = id3[ix] - 1
                l = n3[ix]
                x = np.sum(M, axis=1) / l  # arithmetic mean
                M[M == 0] = 1  # enable geometric mean
                i = np.prod(M, axis=1)**(1 / l)  # intensity
                q = i / x  # coherence

                # add to cumulative counts
                I[id, u] += i
                I[id, v1] += i
                I[id, v2] += i
                Q[id, u] += q
                Q[id, v1] += q
                Q[id, v2] += q
                F[id, u] += 1
                F[id, v1] += 1
                F[id, v1] += 1

    return I, Q, F
Ejemplo n.º 23
0
def efficiency_bin(G, local=False):
    '''
    The global efficiency is the average of inverse shortest path length,
    and is inversely related to the characteristic path length.

    The local efficiency is the global efficiency computed on the
    neighborhood of the node, and is related to the clustering coefficient.

    Parameters
    ----------
    A : NxN np.ndarray
        binary undirected connection matrix
    local : bool
        If True, computes local efficiency instead of global efficiency.
        Default value = False.

    Returns
    -------
    Eglob : float
        global efficiency, only if local=False
    Eloc : Nx1 np.ndarray
        local efficiency, only if local=True
    '''
    def distance_inv(g):
        D = np.eye(len(g))
        n = 1
        nPATH = g.copy()
        L = (nPATH != 0)

        while np.any(L):
            D += n * L
            n += 1
            nPATH = np.dot(nPATH, g)
            L = (nPATH != 0) * (D == 0)
        D[np.logical_not(D)] = np.inf
        D = 1 / D
        np.fill_diagonal(D, 0)
        return D

    G = binarize(G)
    n = len(G)  # number of nodes
    if local:
        E = np.zeros((n,))  # local efficiency

        for u in range(n):
            # V,=np.where(G[u,:])			#neighbors
            # k=len(V)					#degree
            # if k>=2:					#degree must be at least 2
            #	e=distance_inv(G[V].T[V])
            #	E[u]=np.sum(e)/(k*k-k)	#local efficiency computation

            # find pairs of neighbors
            V, = np.where(np.logical_or(G[u, :], G[u, :].T))
            # inverse distance matrix
            e = distance_inv(G[np.ix_(V, V)])
            # symmetrized inverse distance matrix
            se = e + e.T

            # symmetrized adjacency vector
            sa = G[u, V] + G[V, u].T
            numer = np.sum(np.outer(sa.T, sa) * se) / 2
            if numer != 0:
                denom = np.sum(sa)**2 - np.sum(sa * sa)
                E[u] = numer / denom  # local efficiency

    else:
        e = distance_inv(G)
        E = np.sum(e) / (n * n - n)  # global efficiency
    return E
Ejemplo n.º 24
0
Archivo: motifs.py Proyecto: YSA6/bctpy
def motif4funct_wei(W):
    '''
    Functional motifs are subsets of connection patterns embedded within
    anatomical motifs. Motif frequency is the frequency of occurrence of
    motifs around a node. Motif intensity and coherence are weighted
    generalizations of motif frequency.

    Parameters
    ----------
    W : NxN np.ndarray
        weighted directed connection matrix (all weights between 0 and 1)

    Returns
    -------
    I : 199xN np.ndarray
        motif intensity matrix
    Q : 199xN np.ndarray
        motif coherence matrix
    F : 199xN np.ndarray
        motif frequency matrix

    Notes
    -----
    Average intensity and coherence are given by I./F and Q./F.
    '''
    from scipy import io
    import os
    fname = os.path.join(os.path.dirname(__file__), motiflib)
    mot = io.loadmat(fname)
    m4 = mot['m4']
    id4 = mot['id4'].squeeze()
    n4 = mot['n4'].squeeze()

    n = len(W)
    I = np.zeros((199, n))  # intensity
    Q = np.zeros((199, n))  # coherence
    F = np.zeros((199, n))  # frequency

    A = binarize(W, copy=True)  # ensure A is binary
    As = np.logical_or(A, A.T)  # symmetrized adjmat

    for u in xrange(n - 3):
        # v1: neighbors of u (>u)
        V1 = np.append(np.zeros((u,), dtype=int), As[u, u + 1:n + 1])
        for v1 in np.where(V1)[0]:
            V2 = np.append(np.zeros((u,), dtype=int), As[v1, u + 1:n + 1])
            V2[V1] = 0  # not already in V1
            # and all neighbors of u (>v1)
            V2 = np.logical_or(
                np.append(np.zeros((v1,)), As[u, v1 + 1:n + 1]), V2)
            for v2 in np.where(V2)[0]:
                vz = np.max((v1, v2))  # vz: largest rank node
                # v3: all neighbors of v2 (>u)
                V3 = np.append(np.zeros((u,), dtype=int), As[v2, u + 1:n + 1])
                V3[V2] = 0  # not already in V1 and V2
                # and all neighbors of v1 (>v2)
                V3 = np.logical_or(
                    np.append(np.zeros((v2,)), As[v1, v2 + 1:n + 1]), V3)
                V3[V1] = 0  # not already in V1
                # and all neighbors of u (>vz)
                V3 = np.logical_or(
                    np.append(np.zeros((vz,)), As[u, vz + 1:n + 1]), V3)
                for v3 in np.where(V3)[0]:
                    a = np.array((A[v1, u], A[v2, u], A[v3, u], A[u, v1], A[v2, v1],
                                  A[v3, v1], A[u, v2], A[v1, v2], A[
                                      v3, v2], A[u, v3], A[v1, v3],
                                  A[v2, v3]))
                    ix = (np.dot(m4, a) == n4)  # find all contained isomorphs

                    w = np.array((W[v1, u], W[v2, u], W[v3, u], W[u, v1], W[v2, v1],
                                  W[v3, v1], W[u, v2], W[v1, v2], W[
                                      v3, v2], W[u, v3], W[v1, v3],
                                  W[v2, v3]))

                    m = np.sum(ix)
                    M = m4[ix, :] * np.tile(w, (m, 1))
                    id = id4[ix] - 1
                    l = n4[ix]
                    x = np.sum(M, axis=1) / l  # arithmetic mean
                    M[M == 0] = 1  # enable geometric mean
                    i = np.prod(M, axis=1)**(1 / l)  # intensity
                    q = i / x  # coherence

                    # unique motif occurrences
                    idu, jx = np.unique(id, return_index=True)
                    jx = np.append((0,), jx + 1)

                    mu = len(idu)  # number of unique motifs
                    i2, q2, f2 = np.zeros((3, mu))

                    for h in xrange(mu):
                        i2[h] = np.sum(i[jx[h] + 1:jx[h + 1] + 1])
                        q2[h] = np.sum(q[jx[h] + 1:jx[h + 1] + 1])
                        f2[h] = jx[h + 1] - jx[h]

                    # then add to cumulative count
                    I[idu, u] += i2
                    I[idu, v1] += i2
                    I[idu, v2] += i2
                    I[idu, v3] += i2
                    Q[idu, u] += q2
                    Q[idu, v1] += q2
                    Q[idu, v2] += q2
                    Q[idu, v3] += q2
                    F[idu, u] += f2
                    F[idu, v1] += f2
                    F[idu, v2] += f2
                    F[idu, v3] += f2

    return I, Q, F
Ejemplo n.º 25
0
Archivo: motifs.py Proyecto: YSA6/bctpy
def motif4funct_bin(A):
    '''
    Functional motifs are subsets of connection patterns embedded within
    anatomical motifs. Motif frequency is the frequency of occurrence of
    motifs around a node.

    Parameters
    ----------
    A : NxN np.ndarray
        binary directed connection matrix

    Returns
    -------
    F : 199xN np.ndarray
        motif frequency matrix
    f : 199x1 np.ndarray
        motif frequency vector (averaged over all nodes)
    '''
    from scipy import io
    import os
    fname = os.path.join(os.path.dirname(__file__), motiflib)
    mot = io.loadmat(fname)
    m4 = mot['m4']
    id4 = mot['id4'].squeeze()
    n4 = mot['n4'].squeeze()

    n = len(A)
    f = np.zeros((199,))
    F = np.zeros((199, n))  # frequency

    A = binarize(A, copy=True)  # ensure A is binary
    As = np.logical_or(A, A.T)  # symmetrized adjmat

    for u in xrange(n - 3):
        # v1: neighbors of u (>u)
        V1 = np.append(np.zeros((u,), dtype=int), As[u, u + 1:n + 1])
        for v1 in np.where(V1)[0]:
            V2 = np.append(np.zeros((u,), dtype=int), As[v1, u + 1:n + 1])
            V2[V1] = 0  # not already in V1
            # and all neighbors of u (>v1)
            V2 = np.logical_or(
                np.append(np.zeros((v1,)), As[u, v1 + 1:n + 1]), V2)
            for v2 in np.where(V2)[0]:
                vz = np.max((v1, v2))  # vz: largest rank node
                # v3: all neighbors of v2 (>u)
                V3 = np.append(np.zeros((u,), dtype=int), As[v2, u + 1:n + 1])
                V3[V2] = 0  # not already in V1 and V2
                # and all neighbors of v1 (>v2)
                V3 = np.logical_or(
                    np.append(np.zeros((v2,)), As[v1, v2 + 1:n + 1]), V3)
                V3[V1] = 0  # not already in V1
                # and all neighbors of u (>vz)
                V3 = np.logical_or(
                    np.append(np.zeros((vz,)), As[u, vz + 1:n + 1]), V3)
                for v3 in np.where(V3)[0]:
                    a = np.array((A[v1, u], A[v2, u], A[v3, u], A[u, v1], A[v2, v1],
                                  A[v3, v1], A[u, v2], A[v1, v2], A[
                                      v3, v2], A[u, v3], A[v1, v3],
                                  A[v2, v3]))

                    ix = (np.dot(m4, a) == n4)  # find all contained isomorphs
                    id = id4[ix] - 1

                    # unique motif occurrences
                    idu, jx = np.unique(id, return_index=True)
                    jx = np.append((0,), jx)
                    mu = len(idu)  # number of unique motifs
                    f2 = np.zeros((mu,))
                    for h in xrange(mu):
                        f2[h] = jx[h + 1] - jx[h]

                    # add to cumulative count
                    f[idu] += f2
                    F[idu, u] += f2
                    F[idu, v1] += f2
                    F[idu, v2] += f2
                    F[idu, v3] += f2

    return f, F
Ejemplo n.º 26
0
Archivo: motifs.py Proyecto: YSA6/bctpy
def motif4struct_wei(W):
    '''
    Structural motifs are patterns of local connectivity. Motif frequency
    is the frequency of occurrence of motifs around a node. Motif intensity
    and coherence are weighted generalizations of motif frequency.

    Parameters
    ----------
    W : NxN np.ndarray
        weighted directed connection matrix (all weights between 0 and 1)

    Returns
    -------
    I : 199xN np.ndarray
        motif intensity matrix
    Q : 199xN np.ndarray
        motif coherence matrix
    F : 199xN np.ndarray
        motif frequency matrix

    Notes
    -----
    Average intensity and coherence are given by I./F and Q./F.
    '''
    from scipy import io
    import os
    fname = os.path.join(os.path.dirname(__file__), motiflib)
    mot = io.loadmat(fname)
    m4 = mot['m4']
    m4n = mot['m4n']
    id4 = mot['id4'].squeeze()
    n4 = mot['n4'].squeeze()

    n = len(W)
    I = np.zeros((199, n))  # intensity
    Q = np.zeros((199, n))  # coherence
    F = np.zeros((199, n))  # frequency

    A = binarize(W, copy=True)  # ensure A is binary
    As = np.logical_or(A, A.T)  # symmetrized adjmat

    for u in xrange(n - 3):
        # v1: neighbors of u (>u)
        V1 = np.append(np.zeros((u,), dtype=int), As[u, u + 1:n + 1])
        for v1 in np.where(V1)[0]:
            V2 = np.append(np.zeros((u,), dtype=int), As[v1, u + 1:n + 1])
            V2[V1] = 0  # not already in V1
            # and all neighbors of u (>v1)
            V2 = np.logical_or(
                np.append(np.zeros((v1,)), As[u, v1 + 1:n + 1]), V2)
            for v2 in np.where(V2)[0]:
                vz = np.max((v1, v2))  # vz: largest rank node
                # v3: all neighbors of v2 (>u)
                V3 = np.append(np.zeros((u,), dtype=int), As[v2, u + 1:n + 1])
                V3[V2] = 0  # not already in V1 and V2
                # and all neighbors of v1 (>v2)
                V3 = np.logical_or(
                    np.append(np.zeros((v2,)), As[v1, v2 + 1:n + 1]), V3)
                V3[V1] = 0  # not already in V1
                # and all neighbors of u (>vz)
                V3 = np.logical_or(
                    np.append(np.zeros((vz,)), As[u, vz + 1:n + 1]), V3)
                for v3 in np.where(V3)[0]:
                    a = np.array((A[v1, u], A[v2, u], A[v3, u], A[u, v1], A[v2, v1],
                                  A[v3, v1], A[u, v2], A[v1, v2], A[
                                      v3, v2], A[u, v3], A[v1, v3],
                                  A[v2, v3]))
                    s = np.uint64(
                        np.sum(np.power(10, np.arange(11, -1, -1)) * a))
                    # print np.shape(s),np.shape(m4n)
                    ix = np.squeeze(s == m4n)

                    w = np.array((W[v1, u], W[v2, u], W[v3, u], W[u, v1], W[v2, v1],
                                  W[v3, v1], W[u, v2], W[v1, v2], W[
                                      v3, v2], W[u, v3], W[v1, v3],
                                  W[v2, v3]))

                    M = w * m4[ix, :]
                    id = id4[ix] - 1
                    l = n4[ix]
                    x = np.sum(M, axis=1) / l  # arithmetic mean
                    M[M == 0] = 1  # enable geometric mean
                    i = np.prod(M, axis=1)**(1 / l)  # intensity
                    q = i / x  # coherence

                    # then add to cumulative count
                    I[id, u] += i
                    I[id, v1] += i
                    I[id, v2] += i
                    I[id, v3] += i
                    Q[id, u] += q
                    Q[id, v1] += q
                    Q[id, v2] += q
                    Q[id, v3] += q
                    F[id, u] += 1
                    F[id, v1] += 1
                    F[id, v2] += 1
                    F[id, v3] += 1

    return I, Q, F
Ejemplo n.º 27
0
Archivo: motifs.py Proyecto: YSA6/bctpy
def motif4struct_bin(A):
    '''
    Structural motifs are patterns of local connectivity. Motif frequency
    is the frequency of occurrence of motifs around a node.

    Parameters
    ----------
    A : NxN np.ndarray
        binary directed connection matrix

    Returns
    -------
    F : 199xN np.ndarray
        motif frequency matrix
    f : 199x1 np.ndarray
        motif frequency vector (averaged over all nodes)
    '''
    from scipy import io
    import os
    fname = os.path.join(os.path.dirname(__file__), motiflib)
    mot = io.loadmat(fname)
    m4n = mot['m4n']
    id4 = mot['id4'].squeeze()

    n = len(A)
    f = np.zeros((199,))
    F = np.zeros((199, n))  # frequency

    A = binarize(A, copy=True)  # ensure A is binary
    As = np.logical_or(A, A.T)  # symmetrized adjmat

    for u in xrange(n - 3):
        # v1: neighbors of u (>u)
        V1 = np.append(np.zeros((u,), dtype=int), As[u, u + 1:n + 1])
        for v1 in np.where(V1)[0]:
            V2 = np.append(np.zeros((u,), dtype=int), As[v1, u + 1:n + 1])
            V2[V1] = 0  # not already in V1
            # and all neighbors of u (>v1)
            V2 = np.logical_or(
                np.append(np.zeros((v1,)), As[u, v1 + 1:n + 1]), V2)
            for v2 in np.where(V2)[0]:
                vz = np.max((v1, v2))  # vz: largest rank node
                # v3: all neighbors of v2 (>u)
                V3 = np.append(np.zeros((u,), dtype=int), As[v2, u + 1:n + 1])
                V3[V2] = 0  # not already in V1 and V2
                # and all neighbors of v1 (>v2)
                V3 = np.logical_or(
                    np.append(np.zeros((v2,)), As[v1, v2 + 1:n + 1]), V3)
                V3[V1] = 0  # not already in V1
                # and all neighbors of u (>vz)
                V3 = np.logical_or(
                    np.append(np.zeros((vz,)), As[u, vz + 1:n + 1]), V3)
                for v3 in np.where(V3)[0]:

                    a = np.array((A[v1, u], A[v2, u], A[v3, u], A[u, v1], A[v2, v1],
                                  A[v3, v1], A[u, v2], A[v1, v2], A[
                                      v3, v2], A[u, v3], A[v1, v3],
                                  A[v2, v3]))

                    s = np.uint64(
                        np.sum(np.power(10, np.arange(11, -1, -1)) * a))
                    ix = id4[np.squeeze(s == m4n)]
                    F[ix, u] += 1
                    F[ix, v1] += 1
                    F[ix, v2] += 1
                    F[ix, v3] += 1
                    f[ix] += 1

    return f, F
Ejemplo n.º 28
0
Archivo: motifs.py Proyecto: YSA6/bctpy
def motif3funct_bin(A):
    '''
    Functional motifs are subsets of connection patterns embedded within
    anatomical motifs. Motif frequency is the frequency of occurrence of
    motifs around a node.

    Parameters
    ----------
    A : NxN np.ndarray
        binary directed connection matrix

    Returns
    -------
    F : 13xN np.ndarray
        motif frequency matrix
    f : 13x1 np.ndarray
        motif frequency vector (averaged over all nodes)
    '''
    from scipy import io
    import os
    fname = os.path.join(os.path.dirname(__file__), motiflib)
    mot = io.loadmat(fname)
    m3 = mot['m3']
    id3 = mot['id3'].squeeze()
    n3 = mot['n3'].squeeze()

    n = len(A)  # number of vertices in A
    f = np.zeros((13,))  # motif count for whole graph
    F = np.zeros((13, n))  # motif frequency

    A = binarize(A, copy=True)  # ensure A is binary
    As = np.logical_or(A, A.T)  # symmetrized adjmat

    for u in xrange(n - 2):
        # v1: neighbors of u (>u)
        V1 = np.append(np.zeros((u,), dtype=int), As[u, u + 1:n + 1])
        for v1 in np.where(V1)[0]:
            # v2: neighbors of v1 (>u)
            V2 = np.append(np.zeros((u,), dtype=int), As[v1, u + 1:n + 1])
            V2[V1] = 0  # not already in V1
            # and all neighbors of u (>v1)
            V2 = np.logical_or(
                np.append(np.zeros((v1,)), As[u, v1 + 1:n + 1]), V2)
            for v2 in np.where(V2)[0]:
                a = np.array((A[v1, u], A[v2, u], A[u, v1],
                              A[v2, v1], A[u, v2], A[v1, 2]))
                # find all contained isomorphs
                ix = (np.dot(m3, a) == n3)
                id = id3[ix] - 1

                # unique motif occurrences
                idu, jx = np.unique(id, return_index=True)
                jx = np.append((0,), jx + 1)

                mu = len(idu)  # number of unique motifs
                f2 = np.zeros((mu,))
                for h in xrange(mu):  # for each unique motif
                    f2[h] = jx[h + 1] - jx[h]  # and frequencies

                # then add to a cumulative count
                f[idu] += f2
                # numpy indexing is teh sucks :(
                F[idu, u] += f2
                F[idu, v1] += f2
                F[idu, v2] += f2

    return f, F
Ejemplo n.º 29
0
def reachdist(CIJ, ensure_binary=True):
    '''
    The binary reachability matrix describes reachability between all pairs
    of nodes. An entry (u,v)=1 means that there exists a path from node u
    to node v; alternatively (u,v)=0.

    The distance matrix contains lengths of shortest paths between all
    pairs of nodes. An entry (u,v) represents the length of shortest path
    from node u to  node v. The average shortest path length is the
    characteristic path length of the network.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        binary directed/undirected connection matrix
    ensure_binary : bool
        Binarizes input. Defaults to true. No user who is not testing 
        something will ever want to not use this, use distance_wei instead for 
        unweighted matrices.

    Returns
    -------
    R : NxN np.ndarray
        binary reachability matrix
    D : NxN np.ndarray
        distance matrix

    Notes
    -----
    faster but more memory intensive than "breadthdist.m".
    '''
    def reachdist2(CIJ, CIJpwr, R, D, n, powr, col, row):
        CIJpwr = np.dot(CIJpwr, CIJ)
        R = np.logical_or(R, CIJpwr != 0)
        D += R

        if powr <= n and np.any(R[np.ix_(row, col)] == 0):
            powr += 1
            R, D, powr = reachdist2(CIJ, CIJpwr, R, D, n, powr, col, row)
        return R, D, powr

    if ensure_binary:
        CIJ = binarize(CIJ)

    R = CIJ.copy()
    D = CIJ.copy()
    powr = 2
    n = len(CIJ)
    CIJpwr = CIJ.copy()

    # check for vertices that have no incoming or outgoing connections
    # these are ignored by reachdist
    id = np.sum(CIJ, axis=0)
    od = np.sum(CIJ, axis=1)
    id0, = np.where(id == 0)  # nothing goes in, so column(R) will be 0
    od0, = np.where(od == 0)  # nothing comes out, so row(R) will be 0
    # use these colums and rows to check for reachability
    col = list(range(n))
    col = np.delete(col, id0)
    row = list(range(n))
    row = np.delete(row, od0)

    R, D, powr = reachdist2(CIJ, CIJpwr, R, D, n, powr, col, row)

    #'invert' CIJdist to get distances
    D = powr - D + 1

    # put inf if no path found
    D[D == n + 2] = np.inf
    D[:, id0] = np.inf
    D[od0, :] = np.inf

    return R, D
Ejemplo n.º 30
0
def get_components_old(A, no_depend=False):
    '''
    Returns the components of an undirected graph specified by the binary and
    undirected adjacency matrix adj. Components and their constitutent nodes
    are assigned the same index and stored in the vector, comps. The vector,
    comp_sizes, contains the number of nodes beloning to each component.

    Parameters
    ----------
    adj : NxN np.ndarray
        binary undirected adjacency matrix
    no_depend : bool
        If true, doesn't import networkx to do the calculation. Default value
        is false.

    Returns
    -------
    comps : Nx1 np.ndarray
        vector of component assignments for each node
    comp_sizes : Mx1 np.ndarray
        vector of component sizes

    Notes
    -----
    Note: disconnected nodes will appear as components with a component
    size of 1

    Note: The identity of each component (i.e. its numerical value in the
    result) is not guaranteed to be identical the value returned in BCT,
    although the component topology is.

    Note: networkx is used to do the computation efficiently. If networkx is
    not available a breadth-first search that does not depend on networkx is
    used instead, but this is less efficient. The corresponding BCT function
    does the computation by computing the Dulmage-Mendelsohn decomposition. I
    don't know what a Dulmage-Mendelsohn decomposition is and there doesn't
    appear to be a python equivalent. If you think of a way to implement this
    better, let me know.
        '''
    # nonsquare matrices cannot be symmetric; no need to check

    if not np.all(A == A.T):  # ensure matrix is undirected
        raise BCTParamError('get_components can only be computed for undirected'
                            ' matrices.  If your matrix is noisy, correct it with np.around')

    A = binarize(A, copy=True)
    n = len(A)
    np.fill_diagonal(A, 1)

    try:
        if no_depend:
            raise ImportError()
        else:
            import networkx as nx
        net = nx.from_numpy_matrix(A)
        cpts = list(nx.connected_components(net))

        cptvec = np.zeros((n,))
        cptsizes = np.zeros(len(cpts))
        for i, cpt in enumerate(cpts):
            cptsizes[i] = len(cpt)
            for node in cpt:
                cptvec[node] = i + 1

    except ImportError:
        # if networkx is not available use less efficient breadth first search
        cptvec = np.zeros((n,))
        r, _ = breadthdist(A)
        for node, reach in enumerate(r):
            if cptvec[node] > 0:
                continue
            else:
                cptvec[np.where(reach)] = np.max(cptvec) + 1

        cptsizes = np.zeros(np.max(cptvec))
        for i in np.arange(np.max(cptvec)):
            cptsizes[i] = np.size(np.where(cptvec == i + 1))

    return cptvec, cptsizes
Ejemplo n.º 31
0
def gtom(adj, nr_steps):
    '''
    The m-th step generalized topological overlap measure (GTOM) quantifies
    the extent to which a pair of nodes have similar m-th step neighbors.
    Mth-step neighbors are nodes that are reachable by a path of at most
    length m.

    This function computes the the M x M generalized topological overlap
    measure (GTOM) matrix for number of steps, numSteps.

    Parameters
    ----------
    adj : NxN np.ndarray
        connection matrix
    nr_steps : int
        number of steps

    Returns
    -------
    gt : NxN np.ndarray
        GTOM matrix

    Notes
    -----
    When numSteps is equal to 1, GTOM is identical to the topological
    overlap measure (TOM) from reference [2]. In that case the 'gt' matrix
    records, for each pair of nodes, the fraction of neighbors the two
    nodes share in common, where "neighbors" are one step removed. As
    'numSteps' is increased, neighbors that are furter out are considered.
    Elements of 'gt' are bounded between 0 and 1.  The 'gt' matrix can be
    converted from a similarity to a distance matrix by taking 1-gt.
    '''
    bm = binarize(adj, copy=True)
    bm_aux = bm.copy()
    nr_nodes = len(adj)

    if nr_steps > nr_nodes:
        print "Warning: nr_steps exceeded nr_nodes. Setting nr_steps=nr_nodes"
    if nr_steps == 0:
        return bm
    else:
        for steps in xrange(2, nr_steps):
            for i in xrange(nr_nodes):
                # neighbors of node i
                ng_col, = np.where(bm_aux[i, :] == 1)
                # neighbors of neighbors of node i
                nng_row, nng_col = np.where(bm_aux[ng_col, :] == 1)
                new_ng = np.setdiff1d(nng_col, (i,))

                # neighbors of neighbors of i become considered neighbors of i
                bm_aux[i, new_ng] = 1
                bm_aux[new_ng, i] = 1

        # numerator of GTOM formula
        numerator_mat = np.dot(bm_aux, bm_aux) + bm + np.eye(nr_nodes)

        # vector of node degrees
        bms = np.sum(bm_aux, axis=0)
        bms_r = np.tile(bms, (nr_nodes, 1))

        denominator_mat = -bm + np.where(bms_r > bms_r.T, bms_r, bms_r.T) + 1
        return numerator_mat / denominator_mat
Ejemplo n.º 32
0
def findpaths(CIJ, qmax, sources, savepths=False):
    '''
    Paths are sequences of linked nodes, that never visit a single node
    more than once. This function finds all paths that start at a set of
    source nodes, up to a specified length. Warning: very memory-intensive.

    Parameters
    ----------
    CIJ : NxN np.ndarray
        binary directed/undirected connection matrix
    qmax : int
        maximal path length
    sources : Nx1 np.ndarray
        source units from which paths are grown
    savepths : bool
        True if all paths are to be collected and returned. This functionality
        is currently not enabled.

    Returns
    -------
    Pq : NxNxQ np.ndarray
        Path matrix with P[i,j,jq] = number of paths from i to j with length q
    tpath : int
        total number of paths found
    plq : Qx1 np.ndarray
        path length distribution as a function of q
    qstop : int
        path length at which findpaths is stopped
    allpths : None
        a matrix containing all paths up to qmax. This function is extremely
        complicated and reimplementing it in bctpy is not straightforward.
    util : NxQ np.ndarray
        node use index

    Notes
    -----
    Note that Pq(:,:,N) can only carry entries on the diagonal, as all
    "legal" paths of length N-1 must terminate.  Cycles of length N are
    possible, with all vertices visited exactly once (except for source and
    target). 'qmax = N' can wreak havoc (due to memory problems).

    Note: Weights are discarded.
    Note: I am certain that this algorithm is rather inefficient -
    suggestions for improvements are welcome.

    '''
    CIJ = binarize(CIJ, copy=True)  # ensure CIJ is binary
    n = len(CIJ)
    k = np.sum(CIJ)
    pths = []
    Pq = np.zeros((n, n, qmax))
    util = np.zeros((n, qmax))

    # this code is for pathlength=1
    # paths are seeded from sources
    q = 1
    for j in xrange(n):
        for i in xrange(len(sources)):
            i_s = sources[i]
            if CIJ[i_s, j] == 1:
                pths.append([i_s, j])
    pths = np.array(pths)

    # calculate the use index per vertex (for paths of length 1)
    util[:, q], _ = np.histogram(pths, bins=n)
    # now enter the found paths of length 1 into the pathmatrix Pq
    for nrp in xrange(np.size(pths, axis=0)):
        Pq[pths[nrp, 0], pths[nrp, q], q - 1] += 1

    # begin saving allpths
    if savepths:
        allpths = pths.copy()
    else:
        allpths = []

    npthscnt = k

    # big loop for all other pathlengths q
    for q in xrange(2, qmax + 1):
        # to keep track of time...
        print (
            'current pathlength (q=i, number of paths so far (up to q-1)=i' % (q, np.sum(Pq)))

        # old paths are now in 'pths'
        # new paths are about to be collected in 'npths'
        # estimate needed allocation for new paths
        len_npths = np.min((np.ceil(1.1 * npthscnt * k / n), 100000000))
        npths = np.zeros((q + 1, len_npths))

        # find the unique set of endpoints of 'pths'
        endp = np.unique(pths[:, q - 1])
        npthscnt = 0

        for i in endp:  # set of endpoints of previous paths
            # in 'pb' collect all previous paths with 'i' as their endpoint
            pb, = np.where(pths[:, q - 1] == i)
            # find the outgoing connections from i (breadth-first)
            nendp, = np.where(CIJ[i, :] == 1)
            # if i is not a dead end
            if nendp.size:
                for j in nendp:  # endpoints of next edge
                    # find new paths -- only legal ones, no vertex twice
                    # visited
                    pb_temp = pb[np.sum(j == pths[pb, 1:q], axis=1) == 0]

                    # add new paths to 'npths'
                    pbx = pths[pb_temp - 1, :]
                    npx = np.ones((len(pb_temp), 1)) * j
                    npths[:, npthscnt:npthscnt + len(pb_temp)] = np.append(
                        pbx, npx, axis=1).T
                    npthscnt += len(pb_temp)
                    # count new paths and add the number to P
                    Pq[:n, j, q -
                        1] += np.histogram(pths[pb_temp - 1, 0], bins=n)[0]

        # note: 'npths' now contains a list of all the paths of length q
        if len_npths > npthscnt:
            npths = npths[:, :npthscnt]

        # append the matrix of all paths
        # FIXME
        if savepths:
            raise NotImplementedError("Sorry allpaths is not yet implemented")

        # calculate the use index per vertex (correct for cycles, count
        # source/target only once)
        util[:, q - 1] += (np.histogram(npths[:, :npthscnt], bins=n)[0] -
                           np.diag(Pq[:, :, q - 1]))

        # elininate cycles from "making it" to the next level, so that "pths"
        # contains all the paths that have a chance of being continued
        if npths.size:
            pths = np.squeeze(npths[:, np.where(npths[0, :] != npths[q, :])]).T
        else:
            pths = []

        # if there are no 'pths' paths left, end the search
        if not pths.size:
            qstop = q
            tpath = np.sum(Pq)
            plq = np.sum(np.sum(Pq, axis=0), axis=0)
            return

    qstop = q
    tpath = np.sum(Pq)  # total number of paths
    plq = np.sum(np.sum(Pq, axis=0), axis=0)  # path length distribution

    return Pq, tpath, plq, qstop, allpths, util
Ejemplo n.º 33
0
def get_components_old(A, no_depend=False):
    '''
    Returns the components of an undirected graph specified by the binary and
    undirected adjacency matrix adj. Components and their constitutent nodes
    are assigned the same index and stored in the vector, comps. The vector,
    comp_sizes, contains the number of nodes beloning to each component.

    Parameters
    ----------
    adj : NxN np.ndarray
        binary undirected adjacency matrix
    no_depend : bool
        If true, doesn't import networkx to do the calculation. Default value
        is false.

    Returns
    -------
    comps : Nx1 np.ndarray
        vector of component assignments for each node
    comp_sizes : Mx1 np.ndarray
        vector of component sizes

    Notes
    -----
    Note: disconnected nodes will appear as components with a component
    size of 1

    Note: The identity of each component (i.e. its numerical value in the
    result) is not guaranteed to be identical the value returned in BCT,
    although the component topology is.

    Note: networkx is used to do the computation efficiently. If networkx is
    not available a breadth-first search that does not depend on networkx is
    used instead, but this is less efficient. The corresponding BCT function
    does the computation by computing the Dulmage-Mendelsohn decomposition. I
    don't know what a Dulmage-Mendelsohn decomposition is and there doesn't
    appear to be a python equivalent. If you think of a way to implement this
    better, let me know.
        '''
    # nonsquare matrices cannot be symmetric; no need to check

    if not np.all(A == A.T):  # ensure matrix is undirected
        raise BCTParamError('get_components can only be computed for undirected'
                            ' matrices.  If your matrix is noisy, correct it with np.around')

    A = binarize(A, copy=True)
    n = len(A)
    np.fill_diagonal(A, 1)

    try:
        if no_depend:
            raise ImportError()
        else:
            import networkx as nx
        net = nx.from_numpy_matrix(A)
        cpts = list(nx.connected_components(net))

        cptvec = np.zeros((n,))
        cptsizes = np.zeros(len(cpts))
        for i, cpt in enumerate(cpts):
            cptsizes[i] = len(cpt)
            for node in cpt:
                cptvec[node] = i + 1

    except ImportError:
        # if networkx is not available use less efficient breadth first search
        cptvec = np.zeros((n,))
        r, _ = breadthdist(A)
        for node, reach in enumerate(r):
            if cptvec[node] > 0:
                continue
            else:
                cptvec[np.where(reach)] = np.max(cptvec) + 1

        cptsizes = np.zeros(np.max(cptvec))
        for i in np.arange(np.max(cptvec)):
            cptsizes[i] = np.size(np.where(cptvec == i + 1))

    return cptvec, cptsizes
Ejemplo n.º 34
0
def efficiency_bin(G, local=False):
    '''
    The global efficiency is the average of inverse shortest path length,
    and is inversely related to the characteristic path length.

    The local efficiency is the global efficiency computed on the
    neighborhood of the node, and is related to the clustering coefficient.

    Parameters
    ----------
    A : NxN np.ndarray
        binary undirected connection matrix
    local : bool
        If True, computes local efficiency instead of global efficiency.
        Default value = False.

    Returns
    -------
    Eglob : float
        global efficiency, only if local=False
    Eloc : Nx1 np.ndarray
        local efficiency, only if local=True
    '''
    def distance_inv(g):
        D = np.eye(len(g))
        n = 1
        nPATH = g.copy()
        L = (nPATH != 0)

        while np.any(L):
            D += n * L
            n += 1
            nPATH = np.dot(nPATH, g)
            L = (nPATH != 0) * (D == 0)
        D[np.logical_not(D)] = np.inf
        D = 1 / D
        np.fill_diagonal(D, 0)
        return D

    G = binarize(G)
    n = len(G)  # number of nodes
    if local:
        E = np.zeros((n,))  # local efficiency

        for u in xrange(n):
            # V,=np.where(G[u,:])			#neighbors
            # k=len(V)					#degree
            # if k>=2:					#degree must be at least 2
            #	e=distance_inv(G[V].T[V])
            #	E[u]=np.sum(e)/(k*k-k)	#local efficiency computation

            # find pairs of neighbors
            V, = np.where(np.logical_or(G[u, :], G[u, :].T))
            # inverse distance matrix
            e = distance_inv(G[np.ix_(V, V)])
            # symmetrized inverse distance matrix
            se = e + e.T

            # symmetrized adjacency vector
            sa = G[u, V] + G[V, u].T
            numer = np.sum(np.outer(sa.T, sa) * se) / 2
            if numer != 0:
                denom = np.sum(sa)**2 - np.sum(sa * sa)
                E[u] = numer / denom  # local efficiency

    else:
        e = distance_inv(G)
        E = np.sum(e) / (n * n - n)  # global efficiency
    return E