def clustering_coef_wd(W): ''' The weighted clustering coefficient is the average "intensity" of triangles around a node. Parameters ---------- W : NxN np.ndarray weighted directed connection matrix Returns ------- C : Nx1 np.ndarray clustering coefficient vector Notes ----- Methodological note (also see clustering_coef_bd) The weighted modification is as follows: - The numerator: adjacency matrix is replaced with weights matrix ^ 1/3 - The denominator: no changes from the binary version The above reduces to symmetric and/or binary versions of the clustering coefficient for respective graphs. ''' A = np.logical_not(W == 0).astype(float) # adjacency matrix S = cuberoot(W) + cuberoot(W.T) # symmetrized weights matrix ^1/3 K = np.sum(A + A.T, axis=1) # total degree (in+out) cyc3 = np.diag(np.dot(S, np.dot(S, S))) / 2 # number of 3-cycles K[np.where(cyc3 == 0)] = np.inf # if no 3-cycles exist, make C=0 # number of all possible 3 cycles CYC3 = K * (K - 1) - 2 * np.diag(np.dot(A, A)) C = cyc3 / CYC3 # clustering coefficient return C
def transitivity_wd(W): ''' Transitivity is the ratio of 'triangles to triplets' in the network. (A classical version of the clustering coefficient). Parameters ---------- W : NxN np.ndarray weighted directed connection matrix Returns ------- T : int transitivity scalar Methodological note (also see note for clustering_coef_bd) The weighted modification is as follows: - The numerator: adjacency matrix is replaced with weights matrix ^ 1/3 - The denominator: no changes from the binary version The above reduces to symmetric and/or binary versions of the clustering coefficient for respective graphs. ''' A = np.logical_not(W == 0).astype(float) # adjacency matrix S = cuberoot(W) + cuberoot(W.T) # symmetrized weights matrix ^1/3 K = np.sum(A + A.T, axis=1) # total degree (in+out) cyc3 = np.diag(np.dot(S, np.dot(S, S))) / 2 # number of 3-cycles K[np.where(cyc3 == 0)] = np.inf # if no 3-cycles exist, make T=0 # number of all possible 3-cycles CYC3 = K * (K - 1) - 2 * np.diag(np.dot(A, A)) return np.sum(cyc3) / np.sum(CYC3) # transitivity
def transitivity_wu(W): ''' Transitivity is the ratio of 'triangles to triplets' in the network. (A classical version of the clustering coefficient). Parameters ---------- W : NxN np.ndarray weighted undirected connection matrix Returns ------- T : int transitivity scalar ''' K = np.sum(np.logical_not(W == 0), axis=1) ws = cuberoot(W) cyc3 = np.diag(np.dot(ws, np.dot(ws, ws))) return np.sum(cyc3, axis=0) / np.sum(K * (K - 1), axis=0)
def clustering_coef_wu(W): ''' The weighted clustering coefficient is the average "intensity" of triangles around a node. Parameters ---------- W : NxN np.ndarray weighted undirected connection matrix Returns ------- C : Nx1 np.ndarray clustering coefficient vector ''' K = np.array(np.sum(np.logical_not(W == 0), axis=1), dtype=float) ws = cuberoot(W) cyc3 = np.diag(np.dot(ws, np.dot(ws, ws))) K[np.where(cyc3 == 0)] = np.inf # if no 3-cycles exist, set C=0 C = cyc3 / (K * (K - 1)) return C
def clustering_coef_wu_sign(W, coef_type='default'): ''' Returns the weighted clustering coefficient generalized or separated for positive and negative weights. Three Algorithms are supported; herefore referred to as default, zhang, and constantini. 1. Default (Onnela et al.), as in the traditional clustering coefficient computation. Computed separately for positive and negative weights. 2. Zhang & Horvath. Similar to Onnela formula except weight information incorporated in denominator. Reduces sensitivity of the measure to weights directly connected to the node of interest. Computed separately for positive and negative weights. 3. Constantini & Perugini generalization of Zhang & Horvath formula. Takes both positive and negative weights into account simultaneously. Particularly sensitive to non-redundancy in path information based on sign. Returns only one value. Parameters ---------- W : NxN np.ndarray weighted undirected connection matrix corr_type : enum Allowed values are 'default', 'zhang', 'constantini' Returns ------- Cpos : Nx1 np.ndarray Clustering coefficient vector for positive weights Cneg : Nx1 np.ndarray Clustering coefficient vector for negative weights, unless coef_type == 'constantini'. References: Onnela et al. (2005) Phys Rev E 71:065103 Zhang & Horvath (2005) Stat Appl Genet Mol Biol 41:1544-6115 Costantini & Perugini (2014) PLOS ONE 9:e88669 ''' n = len(W) np.fill_diagonal(W, 0) if coef_type == 'default': W_pos = W * (W > 0) K_pos = np.array(np.sum(np.logical_not(W_pos == 0), axis=1), dtype=float) ws_pos = cuberoot(W_pos) cyc3_pos = np.diag(np.dot(ws_pos, np.dot(ws_pos, ws_pos))) K_pos[np.where(cyc3_pos == 0)] = np.inf C_pos = cyc3_pos / (K_pos * (K_pos - 1)) W_neg = -W * (W < 0) K_neg = np.array(np.sum(np.logical_not(W_neg == 0), axis=1), dtype=float) ws_neg = cuberoot(W_neg) cyc3_neg = np.diag(np.dot(ws_neg, np.dot(ws_neg, ws_neg))) K_neg[np.where(cyc3_neg == 0)] = np.inf C_neg = cyc3_neg / (K_neg * (K_neg - 1)) return C_pos, C_neg elif coef_type in ('zhang', 'Zhang'): W_pos = W * (W > 0) cyc3_pos = np.zeros((n,)) cyc2_pos = np.zeros((n,)) W_neg = -W * (W < 0) cyc3_neg = np.zeros((n,)) cyc2_neg = np.zeros((n,)) for i in range(n): for j in range(n): for q in range(n): cyc3_pos[i] += W_pos[j, i] * W_pos[i, q] * W_pos[j, q] cyc3_neg[i] += W_neg[j, i] * W_neg[i, q] * W_neg[j, q] if j != q: cyc2_pos[i] += W_pos[j, i] * W_pos[i, q] cyc2_neg[i] += W_neg[j, i] * W_neg[i, q] cyc2_pos[np.where(cyc3_pos == 0)] = np.inf C_pos = cyc3_pos / cyc2_pos cyc2_neg[np.where(cyc3_neg == 0)] = np.inf C_neg = cyc3_neg / cyc2_neg return C_pos, C_neg elif coef_type in ('constantini', 'Constantini'): cyc3 = np.zeros((n,)) cyc2 = np.zeros((n,)) for i in range(n): for j in range(n): for q in range(n): cyc3[i] += W[j, i] * W[i, q] * W[j, q] if j != q: cyc2[i] += np.abs(W[j, i] * W[i, q]) cyc2[np.where(cyc3 == 0)] = np.inf C = cyc3 / cyc2 return C
def efficiency_wei(Gw, local=False): ''' The global efficiency is the average of inverse shortest path length, and is inversely related to the characteristic path length. The local efficiency is the global efficiency computed on the neighborhood of the node, and is related to the clustering coefficient. Parameters ---------- W : NxN np.ndarray undirected weighted connection matrix (all weights in W must be between 0 and 1) local : bool If True, computes local efficiency instead of global efficiency. Default value = False. Returns ------- Eglob : float global efficiency, only if local=False Eloc : Nx1 np.ndarray local efficiency, only if local=True Notes ----- The efficiency is computed using an auxiliary connection-length matrix L, defined as L_ij = 1/W_ij for all nonzero L_ij; This has an intuitive interpretation, as higher connection weights intuitively correspond to shorter lengths. The weighted local efficiency broadly parallels the weighted clustering coefficient of Onnela et al. (2005) and distinguishes the influence of different paths based on connection weights of the corresponding neighbors to the node in question. In other words, a path between two neighbors with strong connections to the node in question contributes more to the local efficiency than a path between two weakly connected neighbors. Note that this weighted variant of the local efficiency is hence not a strict generalization of the binary variant. Algorithm: Dijkstra's algorithm ''' def distance_inv_wei(G): n = len(G) D = np.zeros((n, n)) # distance matrix D[np.logical_not(np.eye(n))] = np.inf for u in range(n): # distance permanence (true is temporary) S = np.ones((n,), dtype=bool) G1 = G.copy() V = [u] while True: S[V] = 0 # distance u->V is now permanent G1[:, V] = 0 # no in-edges as already shortest for v in V: W, = np.where(G1[v, :]) # neighbors of smallest nodes td = np.array( [D[u, W].flatten(), (D[u, v] + G1[v, W]).flatten()]) D[u, W] = np.min(td, axis=0) if D[u, S].size == 0: # all nodes reached break minD = np.min(D[u, S]) if np.isinf(minD): # some nodes cannot be reached break V, = np.where(D[u, :] == minD) np.fill_diagonal(D, 1) D = 1 / D np.fill_diagonal(D, 0) return D n = len(Gw) Gl = invert(Gw, copy=True) # connection length matrix A = np.array((Gw != 0), dtype=int) if local: E = np.zeros((n,)) # local efficiency for u in range(n): # V,=np.where(Gw[u,:]) #neighbors # k=len(V) #degree # if k>=2: #degree must be at least 2 # e=(distance_inv_wei(Gl[V].T[V])*np.outer(Gw[V,u],Gw[u,V]))**1/3 # E[u]=np.sum(e)/(k*k-k) # find pairs of neighbors V, = np.where(np.logical_or(Gw[u, :], Gw[:, u].T)) # symmetrized vector of weights sw = cuberoot(Gw[u, V]) + cuberoot(Gw[V, u].T) # inverse distance matrix e = distance_inv_wei(Gl[np.ix_(V, V)]) # symmetrized inverse distance matrix se = cuberoot(e) + cuberoot(e.T) numer = np.sum(np.outer(sw.T, sw) * se) / 2 if numer != 0: # symmetrized adjacency vector sa = A[u, V] + A[V, u].T denom = np.sum(sa)**2 - np.sum(sa * sa) # print numer,denom E[u] = numer / denom # local efficiency else: e = distance_inv_wei(Gl) E = np.sum(e) / (n * n - n) return E
def clustering_coef_wu_sign(W, coef_type='default'): ''' Returns the weighted clustering coefficient generalized or separated for positive and negative weights. Three Algorithms are supported; herefore referred to as default, zhang, and constantini. 1. Default (Onnela et al.), as in the traditional clustering coefficient computation. Computed separately for positive and negative weights. 2. Zhang & Horvath. Similar to Onnela formula except weight information incorporated in denominator. Reduces sensitivity of the measure to weights directly connected to the node of interest. Computed separately for positive and negative weights. 3. Constantini & Perugini generalization of Zhang & Horvath formula. Takes both positive and negative weights into account simultaneously. Particularly sensitive to non-redundancy in path information based on sign. Returns only one value. Parameters ---------- W : NxN np.ndarray weighted undirected connection matrix corr_type : enum Allowed values are 'default', 'zhang', 'constantini' Returns ------- Cpos : Nx1 np.ndarray Clustering coefficient vector for positive weights Cneg : Nx1 np.ndarray Clustering coefficient vector for negative weights, unless coef_type == 'constantini'. References: Onnela et al. (2005) Phys Rev E 71:065103 Zhang & Horvath (2005) Stat Appl Genet Mol Biol 41:1544-6115 Costantini & Perugini (2014) PLOS ONE 9:e88669 ''' n = len(W) np.fill_diagonal(W, 0) if coef_type == 'default': W_pos = W * (W > 0) K_pos = np.array(np.sum(np.logical_not(W_pos == 0), axis=1), dtype=float) ws_pos = cuberoot(W_pos) cyc3_pos = np.diag(np.dot(ws_pos, np.dot(ws_pos, ws_pos))) K_pos[np.where(cyc3_pos == 0)] = np.inf C_pos = cyc3_pos / (K_pos * (K_pos - 1)) W_neg = -W * (W < 0) K_neg = np.array(np.sum(np.logical_not(W_neg == 0), axis=1), dtype=float) ws_neg = cuberoot(W_neg) cyc3_neg = np.diag(np.dot(ws_neg, np.dot(ws_neg, ws_neg))) K_neg[np.where(cyc3_neg == 0)] = np.inf C_neg = cyc3_neg / (K_neg * (K_neg - 1)) return C_pos, C_neg elif coef_type in ('zhang', 'Zhang'): W_pos = W * (W > 0) cyc3_pos = np.zeros((n,)) cyc2_pos = np.zeros((n,)) W_neg = -W * (W < 0) cyc3_neg = np.zeros((n,)) cyc2_neg = np.zeros((n,)) for i in range(n): for j in range(n): for q in range(n): cyc3_pos[i] += W_pos[j, i] * W_pos[i, q] * W_pos[j, q] cyc3_neg[i] += W_neg[j, i] * W_neg[i, q] * W_neg[j, q] if j != q: cyc2_pos[i] += W_pos[j, i] * W_pos[i, q] cyc2_neg[i] += W_neg[j, i] * W_neg[i, q] cyc2_pos[np.where(cyc3_pos == 0)] = np.inf C_pos = cyc3_pos / cyc2_pos cyc2_neg[np.where(cyc3_neg == 0)] = np.inf C_neg = cyc3_neg / cyc2_neg return C_pos, C_neg elif coef_type in ('constantini', 'Constantini'): cyc3 = np.zeros((n,)) cyc2 = np.zeros((n,)) for i in range(n): for j in range(n): for q in range(n): cyc3[i] += W[j, i] * W[i, q] * W[j, q] if j != q: cyc2[i] += W[j, i] * W[i, q] cyc2[np.where(cyc3 == 0)] = np.inf C = cyc3 / cyc2 return C
def efficiency_wei(Gw, local=False): ''' The global efficiency is the average of inverse shortest path length, and is inversely related to the characteristic path length. The local efficiency is the global efficiency computed on the neighborhood of the node, and is related to the clustering coefficient. Parameters ---------- W : NxN np.ndarray undirected weighted connection matrix (all weights in W must be between 0 and 1) local : bool If True, computes local efficiency instead of global efficiency. Default value = False. Returns ------- Eglob : float global efficiency, only if local=False Eloc : Nx1 np.ndarray local efficiency, only if local=True Notes ----- The efficiency is computed using an auxiliary connection-length matrix L, defined as L_ij = 1/W_ij for all nonzero L_ij; This has an intuitive interpretation, as higher connection weights intuitively correspond to shorter lengths. The weighted local efficiency broadly parallels the weighted clustering coefficient of Onnela et al. (2005) and distinguishes the influence of different paths based on connection weights of the corresponding neighbors to the node in question. In other words, a path between two neighbors with strong connections to the node in question contributes more to the local efficiency than a path between two weakly connected neighbors. Note that this weighted variant of the local efficiency is hence not a strict generalization of the binary variant. Algorithm: Dijkstra's algorithm ''' def distance_inv_wei(G): n = len(G) D = np.zeros((n, n)) # distance matrix D[np.logical_not(np.eye(n))] = np.inf for u in xrange(n): # distance permanence (true is temporary) S = np.ones((n,), dtype=bool) G1 = G.copy() V = [u] while True: S[V] = 0 # distance u->V is now permanent G1[:, V] = 0 # no in-edges as already shortest for v in V: W, = np.where(G1[v, :]) # neighbors of smallest nodes td = np.array( [D[u, W].flatten(), (D[u, v] + G1[v, W]).flatten()]) D[u, W] = np.min(td, axis=0) if D[u, S].size == 0: # all nodes reached break minD = np.min(D[u, S]) if np.isinf(minD): # some nodes cannot be reached break V, = np.where(D[u, :] == minD) np.fill_diagonal(D, 1) D = 1 / D np.fill_diagonal(D, 0) return D n = len(Gw) Gl = invert(Gw, copy=True) # connection length matrix A = np.array((Gw != 0), dtype=int) if local: E = np.zeros((n,)) # local efficiency for u in xrange(n): # V,=np.where(Gw[u,:]) #neighbors # k=len(V) #degree # if k>=2: #degree must be at least 2 # e=(distance_inv_wei(Gl[V].T[V])*np.outer(Gw[V,u],Gw[u,V]))**1/3 # E[u]=np.sum(e)/(k*k-k) # find pairs of neighbors V, = np.where(np.logical_or(Gw[u, :], Gw[:, u].T)) # symmetrized vector of weights sw = cuberoot(Gw[u, V]) + cuberoot(Gw[V, u].T) # inverse distance matrix e = distance_inv_wei(Gl[np.ix_(V, V)]) # symmetrized inverse distance matrix se = cuberoot(e) + cuberoot(e.T) numer = np.sum(np.outer(sw.T, sw) * se) / 2 if numer != 0: # symmetrized adjacency vector sa = A[u, V] + A[V, u].T denom = np.sum(sa)**2 - np.sum(sa * sa) # print numer,denom E[u] = numer / denom # local efficiency else: e = distance_inv_wei(Gl) E = np.sum(e) / (n * n - n) return E